@openanonymity/nanomem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +194 -0
  2. package/package.json +85 -0
  3. package/src/backends/BaseStorage.js +177 -0
  4. package/src/backends/filesystem.js +177 -0
  5. package/src/backends/indexeddb.js +208 -0
  6. package/src/backends/ram.js +113 -0
  7. package/src/backends/schema.js +42 -0
  8. package/src/bullets/bulletIndex.js +125 -0
  9. package/src/bullets/compaction.js +109 -0
  10. package/src/bullets/index.js +16 -0
  11. package/src/bullets/normalize.js +241 -0
  12. package/src/bullets/parser.js +199 -0
  13. package/src/bullets/scoring.js +53 -0
  14. package/src/cli/auth.js +323 -0
  15. package/src/cli/commands.js +411 -0
  16. package/src/cli/config.js +120 -0
  17. package/src/cli/diff.js +68 -0
  18. package/src/cli/help.js +84 -0
  19. package/src/cli/output.js +269 -0
  20. package/src/cli/spinner.js +54 -0
  21. package/src/cli.js +178 -0
  22. package/src/engine/compactor.js +247 -0
  23. package/src/engine/executors.js +152 -0
  24. package/src/engine/ingester.js +229 -0
  25. package/src/engine/retriever.js +414 -0
  26. package/src/engine/toolLoop.js +176 -0
  27. package/src/imports/chatgpt.js +160 -0
  28. package/src/imports/index.js +14 -0
  29. package/src/imports/markdown.js +104 -0
  30. package/src/imports/oaFastchat.js +124 -0
  31. package/src/index.js +199 -0
  32. package/src/llm/anthropic.js +264 -0
  33. package/src/llm/openai.js +179 -0
  34. package/src/prompt_sets/conversation/ingestion.js +51 -0
  35. package/src/prompt_sets/document/ingestion.js +43 -0
  36. package/src/prompt_sets/index.js +31 -0
  37. package/src/types.js +382 -0
  38. package/src/utils/portability.js +174 -0
  39. package/types/backends/BaseStorage.d.ts +42 -0
  40. package/types/backends/filesystem.d.ts +11 -0
  41. package/types/backends/indexeddb.d.ts +12 -0
  42. package/types/backends/ram.d.ts +8 -0
  43. package/types/backends/schema.d.ts +14 -0
  44. package/types/bullets/bulletIndex.d.ts +47 -0
  45. package/types/bullets/compaction.d.ts +10 -0
  46. package/types/bullets/index.d.ts +36 -0
  47. package/types/bullets/normalize.d.ts +95 -0
  48. package/types/bullets/parser.d.ts +31 -0
  49. package/types/bullets/scoring.d.ts +12 -0
  50. package/types/engine/compactor.d.ts +27 -0
  51. package/types/engine/executors.d.ts +46 -0
  52. package/types/engine/ingester.d.ts +29 -0
  53. package/types/engine/retriever.d.ts +50 -0
  54. package/types/engine/toolLoop.d.ts +9 -0
  55. package/types/imports/chatgpt.d.ts +14 -0
  56. package/types/imports/index.d.ts +3 -0
  57. package/types/imports/markdown.d.ts +31 -0
  58. package/types/imports/oaFastchat.d.ts +30 -0
  59. package/types/index.d.ts +21 -0
  60. package/types/llm/anthropic.d.ts +16 -0
  61. package/types/llm/openai.d.ts +16 -0
  62. package/types/prompt_sets/conversation/ingestion.d.ts +7 -0
  63. package/types/prompt_sets/document/ingestion.d.ts +7 -0
  64. package/types/prompt_sets/index.d.ts +11 -0
  65. package/types/types.d.ts +293 -0
  66. package/types/utils/portability.d.ts +33 -0
@@ -0,0 +1,414 @@
1
+ /**
2
+ * MemoryRetriever — Read path for agentic memory.
3
+ *
4
+ * Uses tool-calling via the agentic loop to let the LLM search, read,
5
+ * and assemble relevant memory context. Falls back to brute-force text
6
+ * search if the LLM call fails.
7
+ */
8
+ /** @import { LLMClient, Message, ProgressEvent, RetrievalResult, StorageBackend, ToolDefinition } from '../types.js' */
9
+ import { runAgenticToolLoop } from './toolLoop.js';
10
+ import { createRetrievalExecutors } from './executors.js';
11
+ import {
12
+ normalizeFactText,
13
+ parseBullets,
14
+ renderBullet,
15
+ scoreBullet,
16
+ tokenizeQuery
17
+ } from '../bullets/index.js';
18
+
19
+ const MAX_FILES_TO_LOAD = 8;
20
+ const MAX_TOTAL_CONTEXT_CHARS = 4000;
21
+ const MAX_SNIPPETS = 18;
22
+ const MAX_RECENT_CONTEXT_CHARS = 2000;
23
+
24
+ /** @type {ToolDefinition[]} */
25
+ const RETRIEVAL_TOOLS = [
26
+ {
27
+ type: 'function',
28
+ function: {
29
+ name: 'list_directory',
30
+ description: 'List all files and subdirectories in a directory. Use this to discover all files in a domain (e.g. "health" to see all health condition files).',
31
+ parameters: {
32
+ type: 'object',
33
+ properties: {
34
+ dir_path: { type: 'string', description: 'Directory path (e.g. "health", "personal", "work"). Use empty string for root.' }
35
+ },
36
+ required: ['dir_path']
37
+ }
38
+ }
39
+ },
40
+ {
41
+ type: 'function',
42
+ function: {
43
+ name: 'retrieve_file',
44
+ description: 'Search memory files by keyword. Returns paths of files whose content or path matches the query. Use read_file instead if you already know the file path.',
45
+ parameters: {
46
+ type: 'object',
47
+ properties: {
48
+ query: { type: 'string', description: 'Keyword to search for in file contents (e.g. "cooking", "Stanford", "project")' }
49
+ },
50
+ required: ['query']
51
+ }
52
+ }
53
+ },
54
+ {
55
+ type: 'function',
56
+ function: {
57
+ name: 'read_file',
58
+ description: 'Read the content of a memory file by its path.',
59
+ parameters: {
60
+ type: 'object',
61
+ properties: {
62
+ path: { type: 'string', description: 'File path to read (e.g. personal/about.md)' }
63
+ },
64
+ required: ['path']
65
+ }
66
+ }
67
+ },
68
+ {
69
+ type: 'function',
70
+ function: {
71
+ name: 'assemble_context',
72
+ description: 'Synthesize and return the final answer to the user\'s query based on what you read. Do NOT paste raw file content — write a clear, direct answer in plain prose. You MUST call this when done, even if nothing relevant was found (pass an empty string).',
73
+ parameters: {
74
+ type: 'object',
75
+ properties: {
76
+ content: { type: 'string', description: 'A synthesized, human-readable answer to the query derived from the memory files. Write prose, not raw bullet dumps. If nothing relevant was found, pass an empty string.' }
77
+ },
78
+ required: ['content']
79
+ }
80
+ }
81
+ }
82
+ ];
83
+
84
+ const RETRIEVAL_SYSTEM_PROMPT = `You are a memory retrieval assistant. Your job is to find and assemble relevant personal context from the user's memory files to help answer their query.
85
+
86
+ You have access to a memory filesystem. The index below shows all available files:
87
+
88
+ \`\`\`
89
+ {INDEX}
90
+ \`\`\`
91
+
92
+ Instructions:
93
+ 1. Look at the index above. If you can already see relevant file paths, use read_file directly to read them.
94
+ 2. Use retrieve_file only when you need to search by keyword (e.g. "cooking", "Stanford") — it searches file contents, not paths.
95
+ 3. Use list_directory to see ALL files in a directory when the query relates to a broad domain (e.g. list "health" for any medicine/health query).
96
+ 4. Read at most ${MAX_FILES_TO_LOAD} files.
97
+ 5. You MUST always finish by calling assemble_context — write a direct, synthesized answer in plain prose based on what you read. Do NOT paste raw bullet lists or file content. If the query is historical or comparative, reason over the facts and answer accordingly.
98
+ 6. If nothing is relevant, call assemble_context with an empty string.
99
+
100
+ IMPORTANT — Domain-exhaustive retrieval:
101
+ - When a query touches a domain (health, work, personal), prefer completeness over selectivity within that domain. File descriptions may be incomplete.
102
+ - For family-related queries: check personal/family.md AND any health files about family members.
103
+
104
+ When recent conversation context is provided alongside the query, use it to resolve references like "that", "the same", "what we discussed", etc. The conversation shows what the user has been talking about recently.
105
+
106
+ Only include content that genuinely helps answer this specific query. Do not include unrelated files from other domains.`;
107
+
108
+
109
+ class MemoryRetriever {
110
+ constructor({ backend, bulletIndex, llmClient, model, onProgress, onModelText }) {
111
+ this._backend = backend;
112
+ this._bulletIndex = bulletIndex;
113
+ this._llmClient = llmClient;
114
+ this._model = model;
115
+ this._onProgress = onProgress || null;
116
+ this._onModelText = onModelText || null;
117
+ }
118
+
119
+ /**
120
+ * Retrieve relevant memory context for a user query.
121
+ *
122
+ * @param {string} query the user's message text
123
+ * @param {string} [conversationText] current session text for reference resolution
124
+ * @returns {Promise<RetrievalResult | null>}
125
+ */
126
+ async retrieveForQuery(query, conversationText) {
127
+ if (!query || !query.trim()) return null;
128
+
129
+ const onProgress = this._onProgress;
130
+ const onModelText = this._onModelText;
131
+
132
+ onProgress?.({ stage: 'init', message: 'Reading memory index...' });
133
+ await this._backend.init();
134
+ const index = await this._backend.getTree();
135
+
136
+ if (!index || await this._isMemoryEmpty(index)) {
137
+ return null;
138
+ }
139
+
140
+ let result;
141
+ try {
142
+ onProgress?.({ stage: 'retrieval', message: 'Selecting relevant memory files...' });
143
+ result = await this._toolCallingRetrieval(query, index, onProgress, conversationText, onModelText);
144
+ } catch (err) {
145
+ const message = err instanceof Error ? err.message : String(err);
146
+ onProgress?.({ stage: 'fallback', message: `LLM unavailable (${message}) — falling back to keyword search. Results may be less accurate.` });
147
+ result = await this._textSearchFallbackWithLoad(query, onProgress, conversationText);
148
+ }
149
+
150
+ // Post-filter assembled context to remove facts already in the conversation
151
+ if (result?.assembledContext && conversationText) {
152
+ result.assembledContext = this._filterRedundantContext(result.assembledContext, conversationText);
153
+ }
154
+
155
+ return result;
156
+ }
157
+
158
+ async _toolCallingRetrieval(query, index, onProgress, conversationText, onModelText) {
159
+ const systemPrompt = RETRIEVAL_SYSTEM_PROMPT
160
+ .replace('{INDEX}', index);
161
+ const toolExecutors = createRetrievalExecutors(this._backend);
162
+
163
+ const recentContext = this._buildRecentContext(conversationText);
164
+ const userContent = recentContext
165
+ ? `Recent conversation:\n\`\`\`\n${recentContext}\n\`\`\`\n\nCurrent query: ${query}`
166
+ : query;
167
+
168
+ const { terminalToolResult, toolCallLog } = await runAgenticToolLoop({
169
+ llmClient: this._llmClient,
170
+ model: this._model,
171
+ tools: RETRIEVAL_TOOLS,
172
+ toolExecutors,
173
+ messages: [
174
+ { role: 'system', content: systemPrompt },
175
+ { role: 'user', content: userContent }
176
+ ],
177
+ terminalTool: 'assemble_context',
178
+ maxIterations: 8,
179
+ maxOutputTokens: 4000,
180
+ temperature: 0,
181
+ onToolCall: (name, args, result) => {
182
+ onProgress?.({ stage: 'tool_call', message: `Tool: ${name}`, tool: name, args, result });
183
+ },
184
+ onModelText,
185
+ onReasoning: (chunk, iteration) => {
186
+ onProgress?.({ stage: 'reasoning', message: chunk, iteration });
187
+ }
188
+ });
189
+
190
+ const files = [];
191
+ const seenPaths = new Set();
192
+ for (const entry of toolCallLog) {
193
+ if (entry.name === 'read_file' && entry.args?.path && entry.result) {
194
+ const path = entry.args.path;
195
+ if (seenPaths.has(path)) continue;
196
+ try {
197
+ const parsed = JSON.parse(entry.result);
198
+ if (parsed.error) continue;
199
+ } catch { /* not JSON, it's file content */ }
200
+ seenPaths.add(path);
201
+ files.push({ path, content: entry.result });
202
+ }
203
+ }
204
+
205
+ const assembledContext = terminalToolResult?.arguments?.content || null;
206
+ const paths = files.map(f => f.path);
207
+
208
+ if (files.length === 0 && !assembledContext) return null;
209
+
210
+ const snippetContext = await this._buildSnippetContext(paths, query, conversationText);
211
+
212
+ onProgress?.({
213
+ stage: 'complete',
214
+ message: `Retrieved ${files.length} memory file${files.length === 1 ? '' : 's'}.`,
215
+ paths
216
+ });
217
+
218
+ return { files, paths, assembledContext: assembledContext || snippetContext };
219
+ }
220
+
221
+ async _textSearchFallbackWithLoad(query, onProgress, conversationText) {
222
+ const paths = await this._textSearchFallback(query);
223
+ if (!paths || paths.length === 0) return null;
224
+
225
+ const MAX_PER_FILE_CHARS = 1500;
226
+ const files = [];
227
+ let total = 0;
228
+ for (const path of paths.slice(0, MAX_FILES_TO_LOAD)) {
229
+ onProgress?.({ stage: 'loading', message: `Loading ${path}...`, path });
230
+ const raw = await this._backend.read(path);
231
+ if (!raw) continue;
232
+ const content = raw.length > MAX_PER_FILE_CHARS
233
+ ? raw.slice(0, MAX_PER_FILE_CHARS) + '...(truncated)'
234
+ : raw;
235
+ if (total + content.length > MAX_TOTAL_CONTEXT_CHARS) break;
236
+ files.push({ path, content });
237
+ total += content.length;
238
+ }
239
+
240
+ if (files.length === 0) return null;
241
+
242
+ const assembled = await this._buildSnippetContext(files.map(f => f.path), query, conversationText);
243
+
244
+ onProgress?.({
245
+ stage: 'complete',
246
+ message: `Retrieved ${files.length} memory file${files.length === 1 ? '' : 's'}.`,
247
+ paths: files.map(f => f.path)
248
+ });
249
+
250
+ return { files, paths: files.map(f => f.path), assembledContext: assembled };
251
+ }
252
+
253
+ async _textSearchFallback(query) {
254
+ const words = query.split(/\s+/).filter(w => w.length > 3).slice(0, 3);
255
+ const allPaths = new Set();
256
+
257
+ for (const word of words) {
258
+ const results = await this._backend.search(word);
259
+ for (const r of results) {
260
+ allPaths.add(r.path);
261
+ }
262
+ }
263
+
264
+ return [...allPaths].slice(0, MAX_FILES_TO_LOAD);
265
+ }
266
+
267
+ async _buildSnippetContext(paths, query, conversationText) {
268
+ const queryTerms = tokenizeQuery(query);
269
+ let candidates = [];
270
+ const convWords = conversationText
271
+ ? new Set(normalizeFactText(conversationText).split(/\s+/).filter(w => w.length >= 3))
272
+ : null;
273
+
274
+ await this._bulletIndex.init();
275
+ const indexed = this._bulletIndex.getBulletsForPaths(paths);
276
+
277
+ if (indexed.length === 0) {
278
+ for (const path of paths) {
279
+ await this._bulletIndex.refreshPath(path);
280
+ }
281
+ }
282
+
283
+ const indexedAfterRefresh = this._bulletIndex.getBulletsForPaths(paths);
284
+ for (const item of indexedAfterRefresh) {
285
+ const score = scoreBullet(item.bullet, queryTerms);
286
+ candidates.push({
287
+ path: item.path,
288
+ score,
289
+ text: renderBullet(item.bullet),
290
+ updatedAt: item.bullet.updatedAt || '',
291
+ fileUpdatedAt: item.fileUpdatedAt || 0
292
+ });
293
+ }
294
+
295
+ // Legacy fallback if a path is still not indexable.
296
+ if (candidates.length === 0) {
297
+ for (const path of paths) {
298
+ const raw = await this._backend.read(path);
299
+ if (!raw) continue;
300
+ const bullets = parseBullets(raw);
301
+ if (bullets.length > 0) {
302
+ for (const bullet of bullets) {
303
+ const score = scoreBullet(bullet, queryTerms);
304
+ candidates.push({ path, score, text: renderBullet(bullet), updatedAt: bullet.updatedAt || '' });
305
+ }
306
+ continue;
307
+ }
308
+ for (const snippet of this._scoreRawLines(raw, queryTerms)) {
309
+ candidates.push({ path, score: snippet.score, text: `- ${snippet.text}`, updatedAt: '' });
310
+ }
311
+ }
312
+ }
313
+
314
+ // Filter out bullets already present in the current conversation
315
+ if (convWords && convWords.size > 0) {
316
+ candidates = candidates.filter(c => {
317
+ const factWords = normalizeFactText(c.text).split(/\s+/).filter(w => w.length >= 3);
318
+ if (factWords.length < 2) return true;
319
+ const matchCount = factWords.filter(w => convWords.has(w)).length;
320
+ return matchCount / factWords.length < 0.8;
321
+ });
322
+ }
323
+
324
+ if (candidates.length === 0) return null;
325
+
326
+ candidates.sort((a, b) => {
327
+ if (b.score !== a.score) return b.score - a.score;
328
+ return String(b.updatedAt || '').localeCompare(String(a.updatedAt || ''));
329
+ });
330
+
331
+ const selected = candidates.slice(0, MAX_SNIPPETS);
332
+ const grouped = new Map();
333
+ for (const item of selected) {
334
+ const list = grouped.get(item.path) || [];
335
+ list.push(item.text);
336
+ grouped.set(item.path, list);
337
+ }
338
+
339
+ let total = 0;
340
+ const sections = [];
341
+ for (const [path, lines] of grouped.entries()) {
342
+ const section = `### ${path}\n${lines.join('\n')}`;
343
+ if (total + section.length > MAX_TOTAL_CONTEXT_CHARS) break;
344
+ sections.push(section);
345
+ total += section.length;
346
+ }
347
+
348
+ return sections.join('\n\n').trim() || null;
349
+ }
350
+
351
+ _filterRedundantContext(assembledContext, conversationText) {
352
+ const convWords = new Set(
353
+ normalizeFactText(conversationText).split(/\s+/).filter(w => w.length >= 3)
354
+ );
355
+ if (convWords.size === 0) return assembledContext;
356
+
357
+ const lines = assembledContext.split('\n');
358
+ const filtered = lines.filter(line => {
359
+ const trimmed = line.trim();
360
+ if (!trimmed || trimmed.startsWith('#') || !trimmed.startsWith('-')) return true;
361
+ const factWords = normalizeFactText(trimmed).split(/\s+/).filter(w => w.length >= 3);
362
+ if (factWords.length < 2) return true;
363
+ const matchCount = factWords.filter(w => convWords.has(w)).length;
364
+ return matchCount / factWords.length < 0.8;
365
+ });
366
+
367
+ const result = filtered.join('\n').trim();
368
+ return result || null;
369
+ }
370
+
371
+ _scoreRawLines(content, queryTerms) {
372
+ const lines = String(content || '')
373
+ .split('\n')
374
+ .map((line) => line.trim())
375
+ .filter(Boolean)
376
+ .filter((line) => !line.startsWith('#'));
377
+ if (lines.length === 0) return [];
378
+
379
+ const snippets = lines.map((line) => {
380
+ const lower = line.toLowerCase();
381
+ let score = 0;
382
+ for (const term of queryTerms) {
383
+ if (lower.includes(term)) score += 1;
384
+ }
385
+ return { text: line, score };
386
+ });
387
+
388
+ snippets.sort((a, b) => b.score - a.score);
389
+ return snippets.slice(0, 5);
390
+ }
391
+
392
+ _buildRecentContext(conversationText) {
393
+ if (!conversationText || conversationText.length < 20) return null;
394
+ if (conversationText.length <= MAX_RECENT_CONTEXT_CHARS) {
395
+ const hasMultipleTurns = /\n/.test(conversationText.trim());
396
+ return hasMultipleTurns ? conversationText : null;
397
+ }
398
+ let tail = conversationText.slice(-MAX_RECENT_CONTEXT_CHARS);
399
+ const firstNewline = tail.indexOf('\n');
400
+ if (firstNewline > 0 && firstNewline < 200) {
401
+ tail = tail.slice(firstNewline + 1);
402
+ }
403
+ return tail.trim() || null;
404
+ }
405
+
406
+ async _isMemoryEmpty(index) {
407
+ const all = await this._backend.exportAll();
408
+ const realFiles = all.filter(f => !f.path.endsWith('_tree.md'));
409
+ if (realFiles.length === 0) return true;
410
+ return !realFiles.some(f => (f.itemCount || 0) > 0);
411
+ }
412
+ }
413
+
414
+ export { MemoryRetriever };
@@ -0,0 +1,176 @@
1
+ /**
2
+ * AgenticToolLoop — Backend-agnostic agentic tool-calling loop.
3
+ *
4
+ * Sends messages to an LLM with OpenAI-format tool definitions, executes
5
+ * tool calls locally, and loops until the LLM stops calling tools or a
6
+ * terminal tool is invoked. When an onReasoning callback is provided,
7
+ * uses streaming to surface reasoning tokens in real time; otherwise
8
+ * uses non-streaming requests for reliable tool call parsing.
9
+ */
10
+ /** @import { ToolLoopOptions, ToolLoopResult, ChatCompletionResponse, ToolCall, LLMMessage } from '../types.js' */
11
+
12
+ const DEFAULT_MAX_ITERATIONS = 10;
13
+ const DEFAULT_MAX_OUTPUT_TOKENS = 500;
14
+ const DEFAULT_TEMPERATURE = 0;
15
+
16
+ /**
17
+ * Run an agentic tool-calling loop.
18
+ *
19
+ * @param {ToolLoopOptions} options
20
+ * @returns {Promise<ToolLoopResult>}
21
+ */
22
+ export async function runAgenticToolLoop(options) {
23
+ const {
24
+ llmClient,
25
+ model,
26
+ tools,
27
+ toolExecutors,
28
+ messages: initialMessages,
29
+ terminalTool = null,
30
+ maxIterations = DEFAULT_MAX_ITERATIONS,
31
+ maxOutputTokens = DEFAULT_MAX_OUTPUT_TOKENS,
32
+ temperature = DEFAULT_TEMPERATURE,
33
+ onToolCall = null,
34
+ onModelText = null,
35
+ onReasoning = null,
36
+ signal = null
37
+ } = options;
38
+
39
+ const messages = [...initialMessages];
40
+ const toolCallLog = [];
41
+ let textResponse = '';
42
+ let terminalToolResult = null;
43
+ let iterations = 0;
44
+
45
+ // Stream when onReasoning is provided (surfaces reasoning tokens in real time).
46
+ // Otherwise use non-streaming createChatCompletion for reliable tool call parsing.
47
+ const useStreaming = !!onReasoning && !!llmClient.streamChatCompletion;
48
+
49
+ for (let i = 0; i < maxIterations; i++) {
50
+ if (signal?.aborted) break;
51
+ iterations++;
52
+
53
+ const requestPayload = {
54
+ model,
55
+ messages: messages.map(m => ({
56
+ role: m.role,
57
+ content: m.content,
58
+ ...(m.tool_calls ? { tool_calls: m.tool_calls } : {}),
59
+ ...(m.tool_call_id ? { tool_call_id: m.tool_call_id } : {})
60
+ })),
61
+ tools,
62
+ max_tokens: maxOutputTokens,
63
+ temperature
64
+ };
65
+
66
+ let response;
67
+ let iterationText = '';
68
+
69
+ if (useStreaming) {
70
+ response = await llmClient.streamChatCompletion({
71
+ ...requestPayload,
72
+ onDelta: (d) => { iterationText += d; },
73
+ onReasoning: (d) => { onReasoning(d, iterations); }
74
+ });
75
+ } else {
76
+ response = await llmClient.createChatCompletion(requestPayload);
77
+ }
78
+
79
+ // If output was truncated and no tool calls came through, retry once with 2× tokens.
80
+ if (response.finish_reason === 'length' && (response.tool_calls || []).length === 0) {
81
+ const retryTokens = requestPayload.max_tokens * 2;
82
+ const retryPayload = { ...requestPayload, max_tokens: retryTokens };
83
+ iterationText = '';
84
+ response = useStreaming
85
+ ? await llmClient.streamChatCompletion({ ...retryPayload, onDelta: (d) => { iterationText += d; }, onReasoning: (d) => { onReasoning(d, iterations); } })
86
+ : await llmClient.createChatCompletion(retryPayload);
87
+ }
88
+
89
+ const responseToolCalls = response.tool_calls || [];
90
+ const responseText = iterationText || response.content || '';
91
+
92
+ // Forward model text to caller (even alongside tool calls)
93
+ if (responseText && onModelText) {
94
+ onModelText(responseText, iterations);
95
+ }
96
+
97
+ // No tool calls → LLM is done, return text response
98
+ if (responseToolCalls.length === 0) {
99
+ textResponse = responseText;
100
+ break;
101
+ }
102
+
103
+ // Append assistant message with tool_calls to conversation
104
+ messages.push({
105
+ role: 'assistant',
106
+ content: responseText || null,
107
+ tool_calls: responseToolCalls
108
+ });
109
+
110
+ // Execute each tool call
111
+ let hitTerminal = false;
112
+ for (const tc of responseToolCalls) {
113
+ const toolName = tc.function?.name || '';
114
+ let args;
115
+ try {
116
+ args = typeof tc.function?.arguments === 'string'
117
+ ? JSON.parse(tc.function.arguments)
118
+ : (tc.function?.arguments || {});
119
+ } catch {
120
+ args = {};
121
+ }
122
+
123
+ const toolCallId = tc.id || '';
124
+
125
+ // Check for terminal tool
126
+ if (terminalTool && toolName === terminalTool) {
127
+ terminalToolResult = { name: toolName, arguments: args };
128
+ toolCallLog.push({ name: toolName, args, result: '[terminal]', toolCallId });
129
+ onToolCall?.(toolName, args, '[terminal]');
130
+
131
+ // Still need to append tool result so conversation is valid
132
+ messages.push({
133
+ role: 'tool',
134
+ content: JSON.stringify({ acknowledged: true }),
135
+ tool_call_id: toolCallId
136
+ });
137
+ hitTerminal = true;
138
+ break;
139
+ }
140
+
141
+ // Execute the tool
142
+ let result;
143
+ const executor = toolExecutors[toolName];
144
+ if (!executor) {
145
+ result = JSON.stringify({ error: `Unknown tool: ${toolName}` });
146
+ } else {
147
+ try {
148
+ result = await executor(args);
149
+ } catch (err) {
150
+ const message = err instanceof Error ? err.message : String(err);
151
+ result = JSON.stringify({ error: `Tool error: ${message}` });
152
+ }
153
+ }
154
+
155
+ toolCallLog.push({ name: toolName, args, result, toolCallId });
156
+ onToolCall?.(toolName, args, result);
157
+
158
+ // Append tool result
159
+ messages.push({
160
+ role: 'tool',
161
+ content: typeof result === 'string' ? result : JSON.stringify(result),
162
+ tool_call_id: toolCallId
163
+ });
164
+ }
165
+
166
+ if (hitTerminal) break;
167
+ }
168
+
169
+ return {
170
+ textResponse,
171
+ terminalToolResult,
172
+ messages,
173
+ iterations,
174
+ toolCallLog
175
+ };
176
+ }