byterover-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +6 -81
  2. package/dist/agent/core/domain/llm/index.d.ts +1 -1
  3. package/dist/agent/core/domain/llm/index.js +1 -1
  4. package/dist/agent/core/domain/llm/registry.d.ts +8 -0
  5. package/dist/agent/core/domain/llm/registry.js +34 -0
  6. package/dist/agent/core/domain/sandbox/types.d.ts +2 -0
  7. package/dist/agent/core/domain/tools/constants.d.ts +3 -0
  8. package/dist/agent/core/domain/tools/constants.js +3 -0
  9. package/dist/agent/core/interfaces/cipher-services.d.ts +2 -4
  10. package/dist/agent/core/interfaces/i-cipher-agent.d.ts +9 -1
  11. package/dist/agent/core/interfaces/i-sandbox-service.d.ts +8 -0
  12. package/dist/agent/core/interfaces/i-tool-provider.d.ts +10 -0
  13. package/dist/agent/core/interfaces/i-tool-scheduler.d.ts +9 -0
  14. package/dist/agent/infra/agent/agent-schemas.d.ts +0 -9
  15. package/dist/agent/infra/agent/agent-schemas.js +0 -3
  16. package/dist/agent/infra/agent/cipher-agent.d.ts +25 -1
  17. package/dist/agent/infra/agent/cipher-agent.js +138 -11
  18. package/dist/agent/infra/agent/provider-update-config.d.ts +0 -2
  19. package/dist/agent/infra/agent/service-initializer.d.ts +2 -6
  20. package/dist/agent/infra/agent/service-initializer.js +45 -38
  21. package/dist/agent/infra/blob/blob-storage-factory.d.ts +2 -2
  22. package/dist/agent/infra/blob/blob-storage-factory.js +4 -4
  23. package/dist/agent/infra/blob/file-blob-storage.d.ts +96 -0
  24. package/dist/agent/infra/blob/file-blob-storage.js +454 -0
  25. package/dist/agent/infra/blob/index.d.ts +2 -3
  26. package/dist/agent/infra/blob/index.js +4 -6
  27. package/dist/agent/infra/llm/agent-llm-service.d.ts +3 -0
  28. package/dist/agent/infra/llm/agent-llm-service.js +34 -52
  29. package/dist/agent/infra/llm/context/compression/compression-helpers.d.ts +35 -0
  30. package/dist/agent/infra/llm/context/compression/compression-helpers.js +124 -0
  31. package/dist/agent/infra/llm/context/compression/escalated-compression.d.ts +62 -0
  32. package/dist/agent/infra/llm/context/compression/escalated-compression.js +144 -0
  33. package/dist/agent/infra/llm/context/compression/index.d.ts +3 -0
  34. package/dist/agent/infra/llm/context/compression/index.js +3 -0
  35. package/dist/agent/infra/llm/context/compression/reactive-overflow.d.ts +0 -27
  36. package/dist/agent/infra/llm/context/compression/reactive-overflow.js +5 -122
  37. package/dist/agent/infra/llm/context/context-manager.d.ts +20 -1
  38. package/dist/agent/infra/llm/context/context-manager.js +37 -7
  39. package/dist/agent/infra/llm/providers/index.js +0 -2
  40. package/dist/agent/infra/llm/providers/types.d.ts +1 -5
  41. package/dist/agent/infra/map/agentic-map-service.d.ts +97 -0
  42. package/dist/agent/infra/map/agentic-map-service.js +309 -0
  43. package/dist/agent/infra/map/context-tree-store.d.ts +94 -0
  44. package/dist/agent/infra/map/context-tree-store.js +278 -0
  45. package/dist/agent/infra/map/index.d.ts +4 -0
  46. package/dist/agent/infra/map/index.js +4 -0
  47. package/dist/agent/infra/map/llm-map-memory.d.ts +59 -0
  48. package/dist/agent/infra/map/llm-map-memory.js +187 -0
  49. package/dist/agent/infra/map/llm-map-service.d.ts +36 -0
  50. package/dist/agent/infra/map/llm-map-service.js +118 -0
  51. package/dist/agent/infra/map/map-shared.d.ts +140 -0
  52. package/dist/agent/infra/map/map-shared.js +325 -0
  53. package/dist/agent/infra/map/worker-pool.d.ts +45 -0
  54. package/dist/agent/infra/map/worker-pool.js +73 -0
  55. package/dist/agent/infra/sandbox/curation-helpers.d.ts +62 -0
  56. package/dist/agent/infra/sandbox/curation-helpers.js +219 -0
  57. package/dist/agent/infra/sandbox/sandbox-service.d.ts +12 -0
  58. package/dist/agent/infra/sandbox/sandbox-service.js +39 -7
  59. package/dist/agent/infra/sandbox/tools-sdk.d.ts +48 -1
  60. package/dist/agent/infra/sandbox/tools-sdk.js +52 -1
  61. package/dist/agent/infra/session/session-manager.d.ts +8 -1
  62. package/dist/agent/infra/session/session-manager.js +24 -4
  63. package/dist/agent/infra/storage/file-key-storage.d.ts +142 -0
  64. package/dist/agent/infra/storage/file-key-storage.js +572 -0
  65. package/dist/agent/infra/storage/granular-history-storage.d.ts +1 -1
  66. package/dist/agent/infra/storage/granular-history-storage.js +1 -1
  67. package/dist/agent/infra/system-prompt/contributors/context-tree-structure-contributor.d.ts +4 -0
  68. package/dist/agent/infra/system-prompt/contributors/context-tree-structure-contributor.js +42 -14
  69. package/dist/agent/infra/system-prompt/contributors/map-selection-contributor.d.ts +16 -0
  70. package/dist/agent/infra/system-prompt/contributors/map-selection-contributor.js +47 -0
  71. package/dist/agent/infra/tools/core-tool-scheduler.js +3 -1
  72. package/dist/agent/infra/tools/implementations/agentic-map-tool.d.ts +35 -0
  73. package/dist/agent/infra/tools/implementations/agentic-map-tool.js +156 -0
  74. package/dist/agent/infra/tools/implementations/code-exec-tool.js +1 -0
  75. package/dist/agent/infra/tools/implementations/curate-tool.d.ts +9 -9
  76. package/dist/agent/infra/tools/implementations/expand-knowledge-tool.d.ts +18 -0
  77. package/dist/agent/infra/tools/implementations/expand-knowledge-tool.js +43 -0
  78. package/dist/agent/infra/tools/implementations/llm-map-tool.d.ts +24 -0
  79. package/dist/agent/infra/tools/implementations/llm-map-tool.js +87 -0
  80. package/dist/agent/infra/tools/implementations/memory-symbol-tree.d.ts +28 -1
  81. package/dist/agent/infra/tools/implementations/memory-symbol-tree.js +27 -3
  82. package/dist/agent/infra/tools/implementations/search-knowledge-service.d.ts +1 -0
  83. package/dist/agent/infra/tools/implementations/search-knowledge-service.js +83 -12
  84. package/dist/agent/infra/tools/implementations/search-knowledge-tool.js +2 -2
  85. package/dist/agent/infra/tools/tool-manager.js +6 -0
  86. package/dist/agent/infra/tools/tool-provider.d.ts +12 -0
  87. package/dist/agent/infra/tools/tool-provider.js +78 -0
  88. package/dist/agent/infra/tools/tool-registry.d.ts +14 -0
  89. package/dist/agent/infra/tools/tool-registry.js +32 -0
  90. package/dist/agent/resources/prompts/system-prompt.yml +48 -74
  91. package/dist/agent/resources/tools/expand_knowledge.txt +20 -0
  92. package/dist/oclif/commands/curate/index.js +1 -2
  93. package/dist/oclif/commands/main.js +1 -0
  94. package/dist/oclif/commands/providers/connect.d.ts +1 -3
  95. package/dist/oclif/commands/providers/connect.js +7 -29
  96. package/dist/oclif/commands/query.js +1 -2
  97. package/dist/server/constants.d.ts +7 -0
  98. package/dist/server/constants.js +8 -0
  99. package/dist/server/core/domain/entities/provider-registry.js +1 -15
  100. package/dist/server/core/domain/knowledge/memory-scoring.js +1 -1
  101. package/dist/server/core/domain/knowledge/summary-types.d.ts +126 -0
  102. package/dist/server/core/domain/knowledge/summary-types.js +7 -0
  103. package/dist/server/core/domain/transport/schemas.d.ts +0 -4
  104. package/dist/server/core/interfaces/context-tree/i-context-tree-archive-service.d.ts +30 -0
  105. package/dist/server/core/interfaces/context-tree/i-context-tree-archive-service.js +1 -0
  106. package/dist/server/core/interfaces/context-tree/i-context-tree-manifest-service.d.ts +30 -0
  107. package/dist/server/core/interfaces/context-tree/i-context-tree-manifest-service.js +1 -0
  108. package/dist/server/core/interfaces/context-tree/i-context-tree-summary-service.d.ts +29 -0
  109. package/dist/server/core/interfaces/context-tree/i-context-tree-summary-service.js +1 -0
  110. package/dist/server/infra/cogit/context-tree-to-push-context-mapper.js +10 -3
  111. package/dist/server/infra/connectors/skill/skill-connector.d.ts +4 -0
  112. package/dist/server/infra/connectors/skill/skill-connector.js +4 -0
  113. package/dist/server/infra/context-tree/children-hash.d.ts +20 -0
  114. package/dist/server/infra/context-tree/children-hash.js +22 -0
  115. package/dist/server/infra/context-tree/derived-artifact.d.ts +28 -0
  116. package/dist/server/infra/context-tree/derived-artifact.js +48 -0
  117. package/dist/server/infra/context-tree/file-context-tree-archive-service.d.ts +37 -0
  118. package/dist/server/infra/context-tree/file-context-tree-archive-service.js +219 -0
  119. package/dist/server/infra/context-tree/file-context-tree-manifest-service.d.ts +50 -0
  120. package/dist/server/infra/context-tree/file-context-tree-manifest-service.js +278 -0
  121. package/dist/server/infra/context-tree/file-context-tree-merger.js +4 -0
  122. package/dist/server/infra/context-tree/file-context-tree-snapshot-service.js +12 -4
  123. package/dist/server/infra/context-tree/file-context-tree-summary-service.d.ts +44 -0
  124. package/dist/server/infra/context-tree/file-context-tree-summary-service.js +313 -0
  125. package/dist/server/infra/context-tree/file-context-tree-writer-service.js +5 -0
  126. package/dist/server/infra/context-tree/prompts/summary-generation.d.ts +22 -0
  127. package/dist/server/infra/context-tree/prompts/summary-generation.js +45 -0
  128. package/dist/server/infra/context-tree/snapshot-diff.d.ts +19 -0
  129. package/dist/server/infra/context-tree/snapshot-diff.js +39 -0
  130. package/dist/server/infra/context-tree/summary-frontmatter.d.ts +24 -0
  131. package/dist/server/infra/context-tree/summary-frontmatter.js +111 -0
  132. package/dist/server/infra/daemon/agent-process.js +2 -14
  133. package/dist/server/infra/executor/curate-executor.d.ts +1 -0
  134. package/dist/server/infra/executor/curate-executor.js +82 -34
  135. package/dist/server/infra/executor/folder-pack-executor.js +1 -1
  136. package/dist/server/infra/executor/pre-compaction/compaction-escalation.d.ts +6 -0
  137. package/dist/server/infra/executor/pre-compaction/compaction-escalation.js +6 -0
  138. package/dist/server/infra/executor/pre-compaction/index.d.ts +3 -0
  139. package/dist/server/infra/executor/pre-compaction/index.js +1 -0
  140. package/dist/server/infra/executor/pre-compaction/pre-compaction-service.d.ts +59 -0
  141. package/dist/server/infra/executor/pre-compaction/pre-compaction-service.js +124 -0
  142. package/dist/server/infra/executor/pre-compaction/prompts.d.ts +24 -0
  143. package/dist/server/infra/executor/pre-compaction/prompts.js +47 -0
  144. package/dist/server/infra/executor/query-executor.d.ts +3 -0
  145. package/dist/server/infra/executor/query-executor.js +39 -4
  146. package/dist/server/infra/http/authenticated-http-client.js +4 -0
  147. package/dist/server/infra/http/provider-model-fetcher-registry.js +1 -5
  148. package/dist/server/infra/http/provider-model-fetchers.d.ts +0 -14
  149. package/dist/server/infra/http/provider-model-fetchers.js +0 -132
  150. package/dist/server/infra/provider/provider-config-resolver.js +0 -55
  151. package/dist/server/utils/curate-result-parser.d.ts +4 -4
  152. package/dist/shared/constants/curation.d.ts +6 -0
  153. package/dist/shared/constants/curation.js +6 -0
  154. package/dist/shared/utils/escalation-utils.d.ts +59 -0
  155. package/dist/shared/utils/escalation-utils.js +141 -0
  156. package/dist/tui/components/command-input.js +1 -1
  157. package/dist/tui/components/inline-prompts/inline-confirm.js +6 -1
  158. package/dist/tui/features/commands/definitions/exit.d.ts +2 -0
  159. package/dist/tui/features/commands/definitions/exit.js +9 -0
  160. package/dist/tui/features/commands/definitions/index.js +3 -0
  161. package/dist/tui/features/exit/components/exit-flow.d.ts +10 -0
  162. package/dist/tui/features/exit/components/exit-flow.js +19 -0
  163. package/dist/tui/features/provider/components/provider-flow.js +1 -21
  164. package/oclif.manifest.json +100 -109
  165. package/package.json +11 -4
  166. package/dist/agent/infra/blob/migrations.d.ts +0 -63
  167. package/dist/agent/infra/blob/migrations.js +0 -148
  168. package/dist/agent/infra/blob/sqlite-blob-storage.d.ts +0 -82
  169. package/dist/agent/infra/blob/sqlite-blob-storage.js +0 -307
  170. package/dist/agent/infra/llm/providers/google-vertex.d.ts +0 -15
  171. package/dist/agent/infra/llm/providers/google-vertex.js +0 -36
  172. package/dist/agent/infra/storage/blob-history-storage.d.ts +0 -81
  173. package/dist/agent/infra/storage/blob-history-storage.js +0 -193
  174. package/dist/agent/infra/storage/dual-format-history-storage.d.ts +0 -83
  175. package/dist/agent/infra/storage/dual-format-history-storage.js +0 -165
  176. package/dist/agent/infra/storage/sqlite-key-storage.d.ts +0 -113
  177. package/dist/agent/infra/storage/sqlite-key-storage.js +0 -438
  178. package/dist/server/infra/provider/vertex-ai-utils.d.ts +0 -10
  179. package/dist/server/infra/provider/vertex-ai-utils.js +0 -28
  180. package/dist/tui/features/provider/components/credential-path-dialog.d.ts +0 -30
  181. package/dist/tui/features/provider/components/credential-path-dialog.js +0 -85
@@ -0,0 +1,73 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ // ── Worker Pool ──────────────────────────────────────────────────────────────
3
+ /**
4
+ * In-memory parallel worker pool for map operations.
5
+ *
6
+ * N workers run in parallel via Promise.all(). Each worker claims items by
7
+ * incrementing a shared index counter (safe because JS is single-threaded
8
+ * for synchronous code — no cross-process races).
9
+ *
10
+ * Replaces the previous FileMapStore-backed implementation which used atomic
11
+ * file renames for item claiming. That pattern was ported from VoltCode's
12
+ * multi-process PostgreSQL architecture but is unnecessary for byterover-cli's
13
+ * single-process execution model.
14
+ *
15
+ * @returns Summary of the map run including a results Map (index → result)
16
+ */
17
+ export async function runMapWorkerPool(options) {
18
+ const { abortSignal, concurrency, items, onProgress, processItem } = options;
19
+ const mapId = `map-${Date.now()}-${randomUUID().slice(0, 8)}`;
20
+ const total = items.length;
21
+ const results = new Map();
22
+ let succeededCount = 0;
23
+ let failedCount = 0;
24
+ let runningCount = 0;
25
+ let nextIndex = 0;
26
+ function emitProgress() {
27
+ onProgress?.({
28
+ failed: failedCount,
29
+ mapId,
30
+ running: runningCount,
31
+ succeeded: succeededCount,
32
+ total,
33
+ });
34
+ }
35
+ /**
36
+ * Single worker: claims items by incrementing the shared index until
37
+ * the queue is exhausted or the abort signal fires.
38
+ */
39
+ async function runWorker() {
40
+ while (!abortSignal?.aborted) {
41
+ const idx = nextIndex++;
42
+ if (idx >= items.length) {
43
+ break;
44
+ }
45
+ runningCount++;
46
+ emitProgress();
47
+ try {
48
+ // eslint-disable-next-line no-await-in-loop
49
+ const result = await processItem(idx, items[idx]);
50
+ results.set(idx, result);
51
+ succeededCount++;
52
+ }
53
+ catch {
54
+ // Store null placeholder so output JSONL maintains 1:1 line mapping with input
55
+ results.set(idx, null);
56
+ failedCount++;
57
+ }
58
+ runningCount--;
59
+ emitProgress();
60
+ }
61
+ }
62
+ // Launch worker pool — N workers run in parallel
63
+ const workerCount = Math.min(concurrency, total || concurrency);
64
+ const workers = Array.from({ length: workerCount }, () => runWorker());
65
+ await Promise.all(workers);
66
+ return {
67
+ failed: failedCount,
68
+ mapId,
69
+ results,
70
+ succeeded: succeededCount,
71
+ total,
72
+ };
73
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Curation REPL Library — Pre-built helpers for curation workflow.
3
+ *
4
+ * Injected into the sandbox as `tools.curation.*` so the LLM calls
5
+ * these functions instead of generating identical infrastructure code
6
+ * (chunking loops, metadata inspection, deduplication) every curation run.
7
+ *
8
+ * All functions operate on values passed in, not variable names.
9
+ * - recon, chunk, detectMessageBoundaries, groupBySubject, dedup: stateless (no mutation, no I/O)
10
+ * - recordProgress: intentionally mutating (pushes entry into history object)
11
+ */
12
+ /** Threshold below which chunking is skipped — derived from shared constant */
13
+ export declare const SINGLE_PASS_CHAR_THRESHOLD = 20000;
14
+ /** Valid categories — mirrors CurateFact.category from i-curate-service.ts:51 */
15
+ export type CurationCategory = 'convention' | 'environment' | 'other' | 'personal' | 'preference' | 'project' | 'team';
16
+ export declare const VALID_CATEGORIES: Set<string>;
17
+ export interface CurationFact {
18
+ category?: CurationCategory;
19
+ statement: string;
20
+ subject?: string;
21
+ }
22
+ export interface ReconResult {
23
+ headPreview: string;
24
+ history: {
25
+ domains: Record<string, string[]>;
26
+ totalProcessed: number;
27
+ };
28
+ meta: {
29
+ charCount: number;
30
+ lineCount: number;
31
+ messageCount: number;
32
+ };
33
+ suggestedChunkCount: number;
34
+ suggestedMode: 'chunked' | 'single-pass';
35
+ tailPreview: string;
36
+ }
37
+ export interface ChunkResult {
38
+ boundaries: Array<{
39
+ end: number;
40
+ start: number;
41
+ }>;
42
+ chunks: string[];
43
+ totalChunks: number;
44
+ }
45
+ export interface MessageBoundary {
46
+ index: number;
47
+ offset: number;
48
+ role: string;
49
+ }
50
+ export declare function recon(context: string, meta: Record<string, unknown>, history: Record<string, unknown>): ReconResult;
51
+ export declare function chunk(context: string, options?: {
52
+ overlap?: number;
53
+ size?: number;
54
+ }): ChunkResult;
55
+ export declare function detectMessageBoundaries(context: string): MessageBoundary[];
56
+ export declare function groupBySubject(facts: CurationFact[]): Record<string, CurationFact[]>;
57
+ export declare function dedup(facts: CurationFact[], threshold?: number): CurationFact[];
58
+ export declare function recordProgress(history: Record<string, unknown>, entry: {
59
+ domain: string;
60
+ keyFacts: string[];
61
+ title: string;
62
+ }): void;
@@ -0,0 +1,219 @@
1
+ /**
2
+ * Curation REPL Library — Pre-built helpers for curation workflow.
3
+ *
4
+ * Injected into the sandbox as `tools.curation.*` so the LLM calls
5
+ * these functions instead of generating identical infrastructure code
6
+ * (chunking loops, metadata inspection, deduplication) every curation run.
7
+ *
8
+ * All functions operate on values passed in, not variable names.
9
+ * - recon, chunk, detectMessageBoundaries, groupBySubject, dedup: stateless (no mutation, no I/O)
10
+ * - recordProgress: intentionally mutating (pushes entry into history object)
11
+ */
12
+ import { CURATION_CHAR_THRESHOLD } from '../../../shared/constants/curation.js';
13
+ // ---------------------------------------------------------------------------
14
+ // Types
15
+ // ---------------------------------------------------------------------------
16
+ /** Threshold below which chunking is skipped — derived from shared constant */
17
+ export const SINGLE_PASS_CHAR_THRESHOLD = CURATION_CHAR_THRESHOLD;
18
+ export const VALID_CATEGORIES = new Set([
19
+ 'convention', 'environment', 'other', 'personal', 'preference', 'project', 'team',
20
+ ]);
21
+ // ---------------------------------------------------------------------------
22
+ // recon — combines Steps 0-2 into one call
23
+ // ---------------------------------------------------------------------------
24
+ export function recon(context, meta, history) {
25
+ const charCount = context.length;
26
+ const lines = context.split('\n');
27
+ const lineCount = lines.length;
28
+ const messageCount = (context.match(/\n\n\[(USER|ASSISTANT)\]:/g) || []).length;
29
+ // Summarize history domains
30
+ const histEntries = history.entries ?? [];
31
+ const domains = {};
32
+ for (const entry of histEntries) {
33
+ const domain = entry.domain ?? 'unknown';
34
+ if (!domains[domain]) {
35
+ domains[domain] = [];
36
+ }
37
+ if (entry.title) {
38
+ domains[domain].push(entry.title);
39
+ }
40
+ }
41
+ const totalProcessed = history.totalProcessed ?? 0;
42
+ const suggestedChunkCount = Math.ceil(charCount / 8000);
43
+ const suggestedMode = charCount < SINGLE_PASS_CHAR_THRESHOLD ? 'single-pass' : 'chunked';
44
+ return {
45
+ headPreview: context.slice(0, 3000),
46
+ history: { domains, totalProcessed },
47
+ meta: { charCount, lineCount, messageCount },
48
+ suggestedChunkCount,
49
+ suggestedMode,
50
+ tailPreview: context.slice(-1000),
51
+ };
52
+ }
53
+ // ---------------------------------------------------------------------------
54
+ // chunk — intelligent boundary-aware text splitting
55
+ // ---------------------------------------------------------------------------
56
+ const CODE_FENCE_REGEX = /^```/;
57
+ export function chunk(context, options) {
58
+ const chunkSize = options?.size ?? 8000;
59
+ const overlap = options?.overlap ?? 200;
60
+ if (!context || context.length === 0) {
61
+ return { boundaries: [], chunks: [], totalChunks: 0 };
62
+ }
63
+ if (context.length <= chunkSize) {
64
+ return {
65
+ boundaries: [{ end: context.length, start: 0 }],
66
+ chunks: [context],
67
+ totalChunks: 1,
68
+ };
69
+ }
70
+ const chunks = [];
71
+ const boundaries = [];
72
+ let offset = 0;
73
+ while (offset < context.length) {
74
+ let end = Math.min(offset + chunkSize, context.length);
75
+ // If not at the end of the string, try to find a good boundary
76
+ if (end < context.length) {
77
+ end = findChunkBoundary(context, offset, end);
78
+ }
79
+ chunks.push(context.slice(offset, end));
80
+ boundaries.push({ end, start: offset });
81
+ // Advance with overlap (but never go backwards)
82
+ const nextOffset = end - overlap;
83
+ offset = nextOffset > offset ? nextOffset : end;
84
+ // Safety: ensure we always advance (prevents infinite loops on pathological input)
85
+ if (offset <= boundaries.at(-1).start) {
86
+ offset = end;
87
+ }
88
+ }
89
+ return { boundaries, chunks, totalChunks: chunks.length };
90
+ }
91
+ /**
92
+ * Find the best chunk boundary near `end` without going past `offset + maxSize`.
93
+ * Priority: \n\n (paragraph) > [USER]:/[ASSISTANT]: marker > \n (line) > hard cut.
94
+ * Never splits inside ``` code fences.
95
+ */
96
+ function findChunkBoundary(context, offset, end) {
97
+ const searchStart = Math.max(offset + Math.floor((end - offset) * 0.5), offset);
98
+ const region = context.slice(searchStart, end);
99
+ // Check if we're inside a code fence and try to close it
100
+ const fencesBefore = countCodeFences(context.slice(offset, end));
101
+ if (fencesBefore % 2 !== 0) {
102
+ // Inside a code fence — look for closing fence after end
103
+ const closingFence = context.indexOf('```', end);
104
+ if (closingFence !== -1 && closingFence - offset <= (end - offset) * 1.2) {
105
+ // Extend to include closing fence + newline
106
+ const afterFence = context.indexOf('\n', closingFence);
107
+ return afterFence === -1 ? closingFence + 3 : afterFence + 1;
108
+ }
109
+ }
110
+ // Try paragraph boundary (\n\n)
111
+ const paraBreak = region.lastIndexOf('\n\n');
112
+ if (paraBreak !== -1) {
113
+ return searchStart + paraBreak + 2;
114
+ }
115
+ // Try message boundary ([USER]: or [ASSISTANT]:)
116
+ const msgPattern = /\n\[(USER|ASSISTANT)\]:/g;
117
+ let lastMsgMatch = null;
118
+ let match = null;
119
+ while ((match = msgPattern.exec(region)) !== null) {
120
+ lastMsgMatch = match;
121
+ }
122
+ if (lastMsgMatch) {
123
+ return searchStart + lastMsgMatch.index + 1;
124
+ }
125
+ // Try line boundary (\n)
126
+ const lineBreak = region.lastIndexOf('\n');
127
+ if (lineBreak !== -1) {
128
+ return searchStart + lineBreak + 1;
129
+ }
130
+ // Hard cut — guarantees forward progress
131
+ return end;
132
+ }
133
+ function countCodeFences(text) {
134
+ let count = 0;
135
+ for (const line of text.split('\n')) {
136
+ if (CODE_FENCE_REGEX.test(line.trim())) {
137
+ count++;
138
+ }
139
+ }
140
+ return count;
141
+ }
142
+ // ---------------------------------------------------------------------------
143
+ // detectMessageBoundaries
144
+ // ---------------------------------------------------------------------------
145
+ const MESSAGE_BOUNDARY_REGEX = /\n\[(USER|ASSISTANT)\]:/g;
146
+ export function detectMessageBoundaries(context) {
147
+ const results = [];
148
+ let match = null;
149
+ let index = 0;
150
+ MESSAGE_BOUNDARY_REGEX.lastIndex = 0;
151
+ while ((match = MESSAGE_BOUNDARY_REGEX.exec(context)) !== null) {
152
+ results.push({
153
+ index: index++,
154
+ offset: match.index + 1, // skip the leading \n
155
+ role: match[1].toLowerCase(),
156
+ });
157
+ }
158
+ return results;
159
+ }
160
+ // ---------------------------------------------------------------------------
161
+ // groupBySubject
162
+ // ---------------------------------------------------------------------------
163
+ export function groupBySubject(facts) {
164
+ const groups = {};
165
+ for (const fact of facts) {
166
+ const key = fact.subject ?? fact.category ?? 'uncategorized';
167
+ if (!groups[key]) {
168
+ groups[key] = [];
169
+ }
170
+ groups[key].push(fact);
171
+ }
172
+ return groups;
173
+ }
174
+ // ---------------------------------------------------------------------------
175
+ // dedup — word-overlap Jaccard similarity
176
+ // ---------------------------------------------------------------------------
177
+ export function dedup(facts, threshold = 0.85) {
178
+ if (facts.length <= 1) {
179
+ return facts;
180
+ }
181
+ const tokenized = facts.map((f) => tokenize(f.statement));
182
+ const keep = Array.from({ length: facts.length }, () => true);
183
+ for (let i = 0; i < facts.length; i++) {
184
+ if (!keep[i])
185
+ continue;
186
+ for (let j = i + 1; j < facts.length; j++) {
187
+ if (!keep[j])
188
+ continue;
189
+ if (jaccardSimilarity(tokenized[i], tokenized[j]) >= threshold) {
190
+ keep[j] = false;
191
+ }
192
+ }
193
+ }
194
+ return facts.filter((_, i) => keep[i]);
195
+ }
196
+ function tokenize(text) {
197
+ return new Set(text.toLowerCase().split(/\s+/).filter(Boolean));
198
+ }
199
+ function jaccardSimilarity(a, b) {
200
+ if (a.size === 0 && b.size === 0)
201
+ return 1;
202
+ let intersection = 0;
203
+ for (const word of a) {
204
+ if (b.has(word))
205
+ intersection++;
206
+ }
207
+ const union = a.size + b.size - intersection;
208
+ return union === 0 ? 0 : intersection / union;
209
+ }
210
+ // ---------------------------------------------------------------------------
211
+ // recordProgress — intentionally mutating
212
+ // ---------------------------------------------------------------------------
213
+ export function recordProgress(history, entry) {
214
+ const entries = (history.entries ?? []);
215
+ entries.push(entry);
216
+ history.entries = entries;
217
+ const current = (history.totalProcessed ?? 0);
218
+ history.totalProcessed = current + 1;
219
+ }
@@ -1,5 +1,6 @@
1
1
  import type { EnvironmentContext } from '../../core/domain/environment/types.js';
2
2
  import type { REPLResult, SandboxConfig } from '../../core/domain/sandbox/types.js';
3
+ import type { IContentGenerator } from '../../core/interfaces/i-content-generator.js';
3
4
  import type { ICurateService } from '../../core/interfaces/i-curate-service.js';
4
5
  import type { IFileSystem } from '../../core/interfaces/i-file-system.js';
5
6
  import type { ISandboxService } from '../../core/interfaces/i-sandbox-service.js';
@@ -12,6 +13,8 @@ import type { ISearchKnowledgeService } from './tools-sdk.js';
12
13
  export declare class SandboxService implements ISandboxService {
13
14
  /** Collector wrapping curateService — captures curate() results per executeCode() call */
14
15
  private collector?;
16
+ /** Content generator for parallel LLM operations (mapExtract) */
17
+ private contentGenerator?;
15
18
  /** Curate service for Tools SDK */
16
19
  private curateService?;
17
20
  /** Environment context for sandbox injection */
@@ -20,6 +23,8 @@ export declare class SandboxService implements ISandboxService {
20
23
  private fileSystem?;
21
24
  /** Variables buffered before sandbox creation, keyed by sessionId */
22
25
  private pendingVariables;
26
+ /** Command type used to build each sandbox's ToolsSDK, keyed by sessionId */
27
+ private sandboxCommandTypes;
23
28
  /** Map of agent sessionId to LocalSandbox instance */
24
29
  private sandboxes;
25
30
  /** Search knowledge service for Tools SDK */
@@ -53,6 +58,13 @@ export declare class SandboxService implements ISandboxService {
53
58
  * @returns Execution result
54
59
  */
55
60
  executeCode(code: string, sessionId: string, config?: SandboxConfig): Promise<REPLResult>;
61
+ /**
62
+ * Set the content generator for parallel LLM operations (mapExtract).
63
+ * When set, new sandboxes will have access to `tools.curation.mapExtract()`.
64
+ *
65
+ * @param contentGenerator - Content generator instance
66
+ */
67
+ setContentGenerator(contentGenerator: IContentGenerator): void;
56
68
  /**
57
69
  * Set the curate service for Tools SDK injection.
58
70
  * When set, new sandboxes will have access to curate operations via `tools.curate()`.
@@ -8,6 +8,8 @@ import { createToolsSDK } from './tools-sdk.js';
8
8
  export class SandboxService {
9
9
  /** Collector wrapping curateService — captures curate() results per executeCode() call */
10
10
  collector;
11
+ /** Content generator for parallel LLM operations (mapExtract) */
12
+ contentGenerator;
11
13
  /** Curate service for Tools SDK */
12
14
  curateService;
13
15
  /** Environment context for sandbox injection */
@@ -16,6 +18,8 @@ export class SandboxService {
16
18
  fileSystem;
17
19
  /** Variables buffered before sandbox creation, keyed by sessionId */
18
20
  pendingVariables = new Map();
21
+ /** Command type used to build each sandbox's ToolsSDK, keyed by sessionId */
22
+ sandboxCommandTypes = new Map();
19
23
  /** Map of agent sessionId to LocalSandbox instance */
20
24
  sandboxes = new Map();
21
25
  /** Search knowledge service for Tools SDK */
@@ -27,6 +31,7 @@ export class SandboxService {
27
31
  */
28
32
  async cleanup() {
29
33
  this.sandboxes.clear();
34
+ this.sandboxCommandTypes.clear();
30
35
  this.pendingVariables.clear();
31
36
  }
32
37
  /**
@@ -36,6 +41,7 @@ export class SandboxService {
36
41
  */
37
42
  async clearSession(sessionId) {
38
43
  this.sandboxes.delete(sessionId);
44
+ this.sandboxCommandTypes.delete(sessionId);
39
45
  this.pendingVariables.delete(sessionId);
40
46
  }
41
47
  /**
@@ -66,7 +72,22 @@ export class SandboxService {
66
72
  async executeCode(code, sessionId, config) {
67
73
  // Get or create sandbox for this agent session
68
74
  let sandbox = this.sandboxes.get(sessionId);
69
- if (!sandbox) {
75
+ if (sandbox) {
76
+ // Hot-swap ToolsSDK if commandType changed (security: enforce read-only on transition)
77
+ const previousCommandType = this.sandboxCommandTypes.get(sessionId);
78
+ if (config?.commandType !== previousCommandType) {
79
+ const newToolsSDK = this.buildToolsSDK(sessionId, config?.commandType);
80
+ if (newToolsSDK) {
81
+ sandbox.updateContext({ tools: newToolsSDK });
82
+ }
83
+ this.sandboxCommandTypes.set(sessionId, config?.commandType);
84
+ }
85
+ // Update context if provided
86
+ if (config?.contextPayload) {
87
+ sandbox.updateContext({ context: config.contextPayload });
88
+ }
89
+ }
90
+ else {
70
91
  // First execution for this session - create new sandbox
71
92
  const initialContext = {};
72
93
  if (config?.contextPayload) {
@@ -79,17 +100,14 @@ export class SandboxService {
79
100
  this.pendingVariables.delete(sessionId);
80
101
  }
81
102
  // Build per-session ToolsSDK (includes agentQuery bound to this sessionId)
82
- const sessionToolsSDK = this.buildToolsSDK(sessionId);
103
+ const sessionToolsSDK = this.buildToolsSDK(sessionId, config?.commandType);
83
104
  sandbox = new LocalSandbox({
84
105
  environmentContext: this.environmentContext,
85
106
  initialContext,
86
107
  toolsSDK: sessionToolsSDK,
87
108
  });
88
109
  this.sandboxes.set(sessionId, sandbox);
89
- }
90
- else if (config?.contextPayload) {
91
- // Update context if provided
92
- sandbox.updateContext({ context: config.contextPayload });
110
+ this.sandboxCommandTypes.set(sessionId, config?.commandType);
93
111
  }
94
112
  if (this.collector) {
95
113
  const { curateResults, result } = await this.collector.collect(() => sandbox.execute(code, config));
@@ -97,6 +115,16 @@ export class SandboxService {
97
115
  }
98
116
  return sandbox.execute(code, config);
99
117
  }
118
+ /**
119
+ * Set the content generator for parallel LLM operations (mapExtract).
120
+ * When set, new sandboxes will have access to `tools.curation.mapExtract()`.
121
+ *
122
+ * @param contentGenerator - Content generator instance
123
+ */
124
+ setContentGenerator(contentGenerator) {
125
+ this.contentGenerator = contentGenerator;
126
+ this.invalidateSandboxes();
127
+ }
100
128
  /**
101
129
  * Set the curate service for Tools SDK injection.
102
130
  * When set, new sandboxes will have access to curate operations via `tools.curate()`.
@@ -118,6 +146,7 @@ export class SandboxService {
118
146
  this.environmentContext = environmentContext;
119
147
  // Clear existing sandboxes so new ones get the updated environment
120
148
  this.sandboxes.clear();
149
+ this.sandboxCommandTypes.clear();
121
150
  }
122
151
  /**
123
152
  * Set the file system service for Tools SDK injection.
@@ -176,11 +205,13 @@ export class SandboxService {
176
205
  * Build a Tools SDK instance for a specific session.
177
206
  * Includes `agentQuery` bound to the session's ID for sub-agent delegation.
178
207
  */
179
- buildToolsSDK(sessionId) {
208
+ buildToolsSDK(sessionId, commandType) {
180
209
  if (!this.fileSystem) {
181
210
  return undefined;
182
211
  }
183
212
  return createToolsSDK({
213
+ commandType,
214
+ contentGenerator: this.contentGenerator,
184
215
  curateService: this.curateService,
185
216
  fileSystem: this.fileSystem,
186
217
  parentSessionId: sessionId,
@@ -196,6 +227,7 @@ export class SandboxService {
196
227
  invalidateSandboxes() {
197
228
  if (this.fileSystem) {
198
229
  this.sandboxes.clear();
230
+ this.sandboxCommandTypes.clear();
199
231
  }
200
232
  }
201
233
  }
@@ -1,8 +1,10 @@
1
1
  import type { FileContent, GlobResult, ListDirectoryResult, SearchResult, WriteResult } from '../../core/domain/file-system/types.js';
2
+ import type { IContentGenerator } from '../../core/interfaces/i-content-generator.js';
2
3
  import type { CurateOperation, CurateOptions, CurateResult, DetectDomainsInput, DetectDomainsResult, ICurateService } from '../../core/interfaces/i-curate-service.js';
3
4
  import type { IFileSystem } from '../../core/interfaces/i-file-system.js';
4
5
  import type { ISandboxService } from '../../core/interfaces/i-sandbox-service.js';
5
6
  import type { SessionManager } from '../session/session-manager.js';
7
+ import { type ChunkResult, type CurationFact, type MessageBoundary, type ReconResult } from './curation-helpers.js';
6
8
  /**
7
9
  * Options for glob operation in ToolsSDK.
8
10
  */
@@ -67,6 +69,8 @@ export interface SearchKnowledgeOptions {
67
69
  export interface SearchKnowledgeResult {
68
70
  message: string;
69
71
  results: Array<{
72
+ /** For archive stubs: path to full content (for expand_knowledge tool) */
73
+ archiveFullPath?: string;
70
74
  /** Number of other memories that reference this one */
71
75
  backlinkCount?: number;
72
76
  excerpt: string;
@@ -74,7 +78,7 @@ export interface SearchKnowledgeResult {
74
78
  /** Top backlink source paths (max 3) */
75
79
  relatedPaths?: string[];
76
80
  score: number;
77
- /** Symbol kind: 'domain' | 'topic' | 'subtopic' | 'context' */
81
+ /** Symbol kind: 'domain' | 'topic' | 'subtopic' | 'context' | 'archive_stub' */
78
82
  symbolKind?: string;
79
83
  /** Resolved hierarchical path in the symbol tree */
80
84
  symbolPath?: string;
@@ -100,6 +104,7 @@ export interface ToolsSDK {
100
104
  * Only the final response string flows back.
101
105
  * @param prompt - Prompt for the sub-agent
102
106
  * @param options - Optional limits
107
+ * @param options.contextData - Optional key/value context injected into child session sandbox
103
108
  * @param options.maxIterations - Maximum agentic iterations (default: 5)
104
109
  * @returns Promise resolving to the sub-agent's final response
105
110
  */
@@ -115,6 +120,44 @@ export interface ToolsSDK {
115
120
  * @returns Promise resolving to curate result with applied operations and summary
116
121
  */
117
122
  curate(operations: CurateOperation[], options?: CurateOptions): Promise<CurateResult>;
123
+ /**
124
+ * Pre-built curation helpers — reduces LLM iteration overhead.
125
+ * Stateless functions except recordProgress (intentionally mutating).
126
+ */
127
+ readonly curation: {
128
+ /** Intelligent boundary-aware text splitting */
129
+ chunk(context: string, options?: {
130
+ overlap?: number;
131
+ size?: number;
132
+ }): ChunkResult;
133
+ /** Remove near-duplicate facts using Jaccard word-overlap similarity */
134
+ dedup(facts: CurationFact[], threshold?: number): CurationFact[];
135
+ /** Find [USER]: and [ASSISTANT]: markers with offsets */
136
+ detectMessageBoundaries(context: string): MessageBoundary[];
137
+ /** Group facts by subject, with fallback to category */
138
+ groupBySubject(facts: CurationFact[]): Record<string, CurationFact[]>;
139
+ /** Parallel LLM extraction over chunked context. Curate mode only. */
140
+ mapExtract(context: string, options: {
141
+ chunkSize?: number;
142
+ concurrency?: number;
143
+ maxContextTokens?: number;
144
+ prompt: string;
145
+ taskId?: string;
146
+ }): Promise<{
147
+ facts: CurationFact[];
148
+ failed: number;
149
+ succeeded: number;
150
+ total: number;
151
+ }>;
152
+ /** Combine Steps 0-2 into one call: metadata + history + preview + mode recommendation */
153
+ recon(context: string, meta: Record<string, unknown>, history: Record<string, unknown>): ReconResult;
154
+ /** Push entry into history and increment totalProcessed (intentionally mutating) */
155
+ recordProgress(history: Record<string, unknown>, entry: {
156
+ domain: string;
157
+ keyFacts: string[];
158
+ title: string;
159
+ }): void;
160
+ };
118
161
  /**
119
162
  * Detect and validate domains from input data.
120
163
  * Use this to analyze text and categorize it into knowledge domains.
@@ -170,6 +213,10 @@ export interface ToolsSDK {
170
213
  * Options for creating a Tools SDK instance.
171
214
  */
172
215
  export interface CreateToolsSDKOptions {
216
+ /** Command type — when 'query', mutating APIs (curate, writeFile) are disabled */
217
+ commandType?: string;
218
+ /** Content generator for parallel LLM operations (mapExtract) */
219
+ contentGenerator?: IContentGenerator;
173
220
  /** Curate service for knowledge curation */
174
221
  curateService?: ICurateService;
175
222
  /** File system service for file operations */