@dotsetlabs/dotclaw 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/.env.example +12 -0
  2. package/README.md +5 -2
  3. package/config-examples/runtime.json +46 -5
  4. package/config-examples/tool-budgets.json +1 -1
  5. package/config-examples/tool-policy.json +1 -1
  6. package/container/Dockerfile +5 -1
  7. package/container/agent-runner/package.json +1 -1
  8. package/container/agent-runner/src/agent-config.ts +67 -17
  9. package/container/agent-runner/src/container-protocol.ts +6 -0
  10. package/container/agent-runner/src/daemon.ts +18 -5
  11. package/container/agent-runner/src/index.ts +442 -243
  12. package/container/agent-runner/src/ipc.ts +76 -1
  13. package/container/agent-runner/src/mcp-registry.ts +11 -0
  14. package/container/agent-runner/src/memory.ts +145 -3
  15. package/container/agent-runner/src/process-registry.ts +257 -0
  16. package/container/agent-runner/src/system-prompt.ts +337 -0
  17. package/container/agent-runner/src/tools.ts +382 -29
  18. package/container/agent-runner/src/tts.ts +42 -0
  19. package/dist/agent-context.d.ts +1 -0
  20. package/dist/agent-context.d.ts.map +1 -1
  21. package/dist/agent-context.js +6 -3
  22. package/dist/agent-context.js.map +1 -1
  23. package/dist/agent-execution.d.ts +1 -0
  24. package/dist/agent-execution.d.ts.map +1 -1
  25. package/dist/agent-execution.js +11 -4
  26. package/dist/agent-execution.js.map +1 -1
  27. package/dist/container-protocol.d.ts +8 -0
  28. package/dist/container-protocol.d.ts.map +1 -1
  29. package/dist/container-runner.d.ts.map +1 -1
  30. package/dist/container-runner.js +44 -8
  31. package/dist/container-runner.js.map +1 -1
  32. package/dist/error-messages.d.ts.map +1 -1
  33. package/dist/error-messages.js +22 -5
  34. package/dist/error-messages.js.map +1 -1
  35. package/dist/index.js +53 -6
  36. package/dist/index.js.map +1 -1
  37. package/dist/ipc-dispatcher.d.ts.map +1 -1
  38. package/dist/ipc-dispatcher.js +336 -6
  39. package/dist/ipc-dispatcher.js.map +1 -1
  40. package/dist/memory-recall.d.ts +1 -0
  41. package/dist/memory-recall.d.ts.map +1 -1
  42. package/dist/memory-recall.js +3 -0
  43. package/dist/memory-recall.js.map +1 -1
  44. package/dist/memory-store.d.ts.map +1 -1
  45. package/dist/memory-store.js +5 -3
  46. package/dist/memory-store.js.map +1 -1
  47. package/dist/message-pipeline.d.ts.map +1 -1
  48. package/dist/message-pipeline.js +53 -12
  49. package/dist/message-pipeline.js.map +1 -1
  50. package/dist/model-registry.d.ts +15 -0
  51. package/dist/model-registry.d.ts.map +1 -1
  52. package/dist/model-registry.js +56 -12
  53. package/dist/model-registry.js.map +1 -1
  54. package/dist/providers/telegram/telegram-provider.d.ts +1 -0
  55. package/dist/providers/telegram/telegram-provider.d.ts.map +1 -1
  56. package/dist/providers/telegram/telegram-provider.js +14 -0
  57. package/dist/providers/telegram/telegram-provider.js.map +1 -1
  58. package/dist/request-router.d.ts +0 -1
  59. package/dist/request-router.d.ts.map +1 -1
  60. package/dist/request-router.js +18 -6
  61. package/dist/request-router.js.map +1 -1
  62. package/dist/runtime-config.d.ts +14 -0
  63. package/dist/runtime-config.d.ts.map +1 -1
  64. package/dist/runtime-config.js +64 -16
  65. package/dist/runtime-config.js.map +1 -1
  66. package/dist/task-scheduler.d.ts.map +1 -1
  67. package/dist/task-scheduler.js +3 -5
  68. package/dist/task-scheduler.js.map +1 -1
  69. package/dist/tool-budgets.js +1 -1
  70. package/dist/tool-budgets.js.map +1 -1
  71. package/dist/tool-policy.d.ts.map +1 -1
  72. package/dist/tool-policy.js +13 -3
  73. package/dist/tool-policy.js.map +1 -1
  74. package/dist/webhook.d.ts +14 -0
  75. package/dist/webhook.d.ts.map +1 -0
  76. package/dist/webhook.js +169 -0
  77. package/dist/webhook.js.map +1 -0
  78. package/package.json +3 -2
@@ -6,8 +6,8 @@
6
6
  import fs from 'fs';
7
7
  import path from 'path';
8
8
  import { fileURLToPath } from 'url';
9
- import { OpenRouter, stepCountIs } from '@openrouter/sdk';
10
- import { createTools, discoverMcpTools, ToolCallRecord } from './tools.js';
9
+ import { OpenRouter } from '@openrouter/sdk';
10
+ import { createTools, discoverMcpTools, ToolCallRecord, type ToolResultRecord } from './tools.js';
11
11
  import { createIpcHandlers } from './ipc.js';
12
12
  import { loadAgentConfig } from './agent-config.js';
13
13
  import { OUTPUT_START_MARKER, OUTPUT_END_MARKER, type ContainerInput, type ContainerOutput } from './container-protocol.js';
@@ -19,15 +19,21 @@ import {
19
19
  shouldCompact,
20
20
  archiveConversation,
21
21
  buildSummaryPrompt,
22
+ buildMultiPartSummaryPrompt,
23
+ splitMessagesByTokenShare,
22
24
  parseSummaryResponse,
23
25
  retrieveRelevantMemories,
24
26
  saveMemoryState,
25
27
  writeHistory,
28
+ estimateTokens,
29
+ pruneContextMessages,
30
+ limitHistoryTurns,
26
31
  MemoryConfig,
27
32
  Message
28
33
  } from './memory.js';
29
34
  import { loadPromptPackWithCanary, formatPromptPack, PromptPack } from './prompt-packs.js';
30
- import { buildSkillCatalog, formatSkillCatalog, type SkillCatalog } from './skill-loader.js';
35
+ import { buildSkillCatalog, type SkillCatalog } from './skill-loader.js';
36
+ import { buildSystemPrompt } from './system-prompt.js';
31
37
 
32
38
  type OpenRouterResult = ReturnType<OpenRouter['callModel']>;
33
39
 
@@ -50,6 +56,51 @@ const PROMPT_PACKS_MAX_CHARS = agent.promptPacks.maxChars;
50
56
  const PROMPT_PACKS_MAX_DEMOS = agent.promptPacks.maxDemos;
51
57
  const PROMPT_PACKS_CANARY_RATE = agent.promptPacks.canaryRate;
52
58
 
59
+ // ── Model cooldown tracking ──────────────────────────────────────────
60
+ // After a model fails, put it in cooldown. 429 → 60s, 5xx/timeout → 300s.
61
+ const modelCooldowns = new Map<string, number>(); // model → cooldown-until epoch ms
62
+
63
+ function isModelInCooldown(model: string): boolean {
64
+ const until = modelCooldowns.get(model);
65
+ if (!until) return false;
66
+ if (Date.now() >= until) {
67
+ modelCooldowns.delete(model);
68
+ return false;
69
+ }
70
+ return true;
71
+ }
72
+
73
+ function cooldownModel(model: string, err: unknown): void {
74
+ const msg = err instanceof Error ? err.message : String(err);
75
+ const lower = msg.toLowerCase();
76
+ let durationMs = 300_000; // default: 5 min for 5xx/timeout
77
+ if (/429|rate.?limit/.test(lower)) {
78
+ durationMs = 60_000; // 1 min for rate limits
79
+ }
80
+ modelCooldowns.set(model, Date.now() + durationMs);
81
+ log(`Model ${model} in cooldown for ${durationMs / 1000}s`);
82
+ }
83
+
84
+ // ── Reply tag parsing ────────────────────────────────────────────────
85
+ // Parse [[reply_to_current]] and [[reply_to:<id>]] tags from agent output.
86
+ export function parseReplyTags(text: string): { cleanText: string; replyToId?: string } {
87
+ if (!text) return { cleanText: text };
88
+ const replyCurrentMatch = text.match(/\[\[reply_to_current\]\]/);
89
+ const replyIdMatch = text.match(/\[\[reply_to:(\d+)\]\]/);
90
+ let replyToId: string | undefined;
91
+ let cleanText = text;
92
+
93
+ if (replyIdMatch) {
94
+ replyToId = replyIdMatch[1];
95
+ cleanText = cleanText.replace(/\[\[reply_to:\d+\]\]/g, '').trim();
96
+ } else if (replyCurrentMatch) {
97
+ replyToId = '__current__'; // sentinel — host resolves to the triggering message
98
+ cleanText = cleanText.replace(/\[\[reply_to_current\]\]/g, '').trim();
99
+ }
100
+
101
+ return { cleanText, replyToId };
102
+ }
103
+
53
104
  let cachedOpenRouter: OpenRouter | null = null;
54
105
  let cachedOpenRouterKey = '';
55
106
  let cachedOpenRouterOptions = '';
@@ -119,6 +170,34 @@ async function getResponseText(result: OpenRouterResult, context: string): Promi
119
170
  return { text: '' };
120
171
  }
121
172
 
173
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
174
+ function extractTextFromApiResponse(response: any): string {
175
+ if (response?.outputText) return response.outputText;
176
+ for (const item of response?.output || []) {
177
+ if (item?.type === 'message') {
178
+ for (const part of item.content || []) {
179
+ if (part?.type === 'output_text' && part.text) return part.text;
180
+ }
181
+ }
182
+ }
183
+ return '';
184
+ }
185
+
186
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
187
+ function extractFunctionCalls(response: any): Array<{ id: string; name: string; arguments: any }> {
188
+ const calls: Array<{ id: string; name: string; arguments: unknown }> = [];
189
+ for (const item of response?.output || []) {
190
+ if (item?.type === 'function_call') {
191
+ let args = item.arguments;
192
+ if (typeof args === 'string') {
193
+ try { args = JSON.parse(args); } catch { /* keep as string */ }
194
+ }
195
+ calls.push({ id: item.callId, name: item.name, arguments: args });
196
+ }
197
+ }
198
+ return calls;
199
+ }
200
+
122
201
  function writeOutput(output: ContainerOutput): void {
123
202
  console.log(OUTPUT_START_MARKER);
124
203
  console.log(JSON.stringify(output));
@@ -232,6 +311,34 @@ function getConfig(config: ReturnType<typeof loadAgentConfig>): MemoryConfig & {
232
311
  };
233
312
  }
234
313
 
314
+ function resolveModelLimits(
315
+ input: ContainerInput,
316
+ configDefaults: { maxContextTokens: number; maxOutputTokens: number; compactionTriggerTokens: number; maxContextMessageTokens: number }
317
+ ) {
318
+ const caps = input.modelCapabilities;
319
+
320
+ // Context: use model capability, fall back to config
321
+ const contextLength = caps?.context_length || configDefaults.maxContextTokens;
322
+
323
+ // Output tokens: only set when explicitly configured by user.
324
+ // DO NOT use caps.max_completion_tokens — for reasoning models, maxOutputTokens covers
325
+ // both reasoning tokens AND visible text. Setting it to the model's max causes the model
326
+ // to allocate the entire budget to reasoning with 0 left for visible output.
327
+ let maxOutputTokens: number | undefined;
328
+ if (input.modelMaxOutputTokens && Number.isFinite(input.modelMaxOutputTokens)) {
329
+ maxOutputTokens = input.modelMaxOutputTokens; // Explicit cost-control override
330
+ }
331
+ // else: undefined — omit from callModel(), let the API decide token budgeting
332
+
333
+ // Derive other limits from context length
334
+ const outputReserve = maxOutputTokens || Math.floor(contextLength * 0.25);
335
+ const maxContextTokens = contextLength;
336
+ const compactionTriggerTokens = Math.max(1000, contextLength - outputReserve);
337
+ const maxContextMessageTokens = Math.max(1000, Math.floor(contextLength * 0.03));
338
+
339
+ return { maxContextTokens, maxOutputTokens, compactionTriggerTokens, maxContextMessageTokens };
340
+ }
341
+
235
342
  function getOpenRouterOptions(config: ReturnType<typeof loadAgentConfig>) {
236
343
  const timeoutMs = config.agent.openrouter.timeoutMs;
237
344
  const retryEnabled = config.agent.openrouter.retry;
@@ -294,9 +401,7 @@ function estimateMessagesTokens(messages: Message[], tokensPerChar: number, toke
294
401
  return total;
295
402
  }
296
403
 
297
- const MEMORY_SUMMARY_MAX_CHARS = 2000;
298
-
299
- function buildSystemInstructions(params: {
404
+ function buildInstructions(params: {
300
405
  assistantName: string;
301
406
  groupNotes?: string | null;
302
407
  globalNotes?: string | null;
@@ -322,185 +427,40 @@ function buildSystemInstructions(params: {
322
427
  memoryPolicyPack?: PromptPack | null;
323
428
  memoryRecallPack?: PromptPack | null;
324
429
  maxToolSteps?: number;
430
+ trimLevel?: number;
325
431
  }): string {
326
- const toolGuidance = [
327
- 'Key tool rules:',
328
- '- User attachments arrive in /workspace/group/inbox/ (see <attachment> tags). Process with Read/Bash/Python.',
329
- '- To send media from the web: download_url → send_photo/send_file/send_audio.',
330
- '- Charts/plots: matplotlib → savefig → send_photo. Graphviz → dot -Tpng → send_photo.',
331
- '- Voice messages are auto-transcribed (<transcript> in <attachment>). Reply with normal text — the host auto-converts to voice.',
332
- '- GitHub CLI (`gh`) is available if GH_TOKEN is set.',
333
- '- plugin__* and mcp_ext__* tools may be available if configured.'
334
- ].join('\n');
335
-
336
- const browserAutomation = agentConfig.agent.browser.enabled ? [
337
- 'Browser Tool: actions: navigate, snapshot, click, fill, screenshot, extract, evaluate, close.',
338
- 'Use snapshot with interactive=true for clickable refs (@e1, @e2). Screenshots → /workspace/group/screenshots/.'
339
- ].join('\n') : '';
340
-
341
- const hasAnyMemory = params.memorySummary || params.memoryFacts.length > 0 ||
342
- params.longTermRecall.length > 0 || params.userProfile;
343
-
344
- const memorySummary = params.memorySummary
345
- ? params.memorySummary.slice(0, MEMORY_SUMMARY_MAX_CHARS)
346
- : '';
347
- const memoryFacts = params.memoryFacts.length > 0
348
- ? params.memoryFacts.map(fact => `- ${fact}`).join('\n')
349
- : '';
350
- const sessionRecall = params.sessionRecall.length > 0
351
- ? params.sessionRecall.map(item => `- ${item}`).join('\n')
352
- : '';
353
- const longTermRecall = params.longTermRecall.length > 0
354
- ? params.longTermRecall.map(item => `- ${item}`).join('\n')
355
- : '';
356
- const userProfile = params.userProfile || '';
357
- const memoryStats = params.memoryStats
358
- ? `Total: ${params.memoryStats.total}, User: ${params.memoryStats.user}, Group: ${params.memoryStats.group}, Global: ${params.memoryStats.global}`
359
- : '';
360
-
361
- const availableGroups = params.availableGroups && params.availableGroups.length > 0
362
- ? params.availableGroups
363
- .map(group => `- ${group.name} (chat ${group.jid}, last: ${group.lastActivity})`)
364
- .join('\n')
365
- : '';
366
-
367
- const groupNotes = params.groupNotes ? `Group notes:\n${params.groupNotes}` : '';
368
- const globalNotes = params.globalNotes ? `Global notes:\n${params.globalNotes}` : '';
369
- const skillNotes = params.skillCatalog ? formatSkillCatalog(params.skillCatalog) : '';
370
-
371
- const toolReliability = params.toolReliability && params.toolReliability.length > 0
372
- ? params.toolReliability
373
- .sort((a, b) => a.success_rate - b.success_rate)
374
- .slice(0, 20)
375
- .map(tool => {
376
- const pct = `${Math.round(tool.success_rate * 100)}%`;
377
- const avg = Number.isFinite(tool.avg_duration_ms) ? `${Math.round(tool.avg_duration_ms!)}ms` : 'n/a';
378
- return `- ${tool.name}: success ${pct} over ${tool.count} calls (avg ${avg})`;
379
- })
380
- .join('\n')
381
- : '';
382
-
383
- const behaviorNotes: string[] = [];
384
- const responseStyle = typeof params.behaviorConfig?.response_style === 'string'
385
- ? String(params.behaviorConfig.response_style)
386
- : '';
387
- if (responseStyle === 'concise') {
388
- behaviorNotes.push('Keep responses short and to the point.');
389
- } else if (responseStyle === 'detailed') {
390
- behaviorNotes.push('Give detailed, step-by-step responses when helpful.');
391
- }
392
- const toolBias = typeof params.behaviorConfig?.tool_calling_bias === 'number'
393
- ? Number(params.behaviorConfig.tool_calling_bias)
394
- : null;
395
- if (toolBias !== null && toolBias < 0.4) {
396
- behaviorNotes.push('Ask before using tools unless the intent is obvious.');
397
- } else if (toolBias !== null && toolBias > 0.6) {
398
- behaviorNotes.push('Use tools proactively when they add accuracy or save time.');
399
- }
400
- const cautionBias = typeof params.behaviorConfig?.caution_bias === 'number'
401
- ? Number(params.behaviorConfig.caution_bias)
402
- : null;
403
- if (cautionBias !== null && cautionBias > 0.6) {
404
- behaviorNotes.push('Double-check uncertain facts and flag limitations.');
405
- }
406
-
407
- const timezoneNote = params.timezone
408
- ? `Timezone: ${params.timezone}. Use this timezone when interpreting or presenting timestamps unless the user specifies another.`
409
- : '';
410
-
411
- const hostPlatformNote = params.hostPlatform
412
- ? (params.hostPlatform.startsWith('linux')
413
- ? `Host platform: ${params.hostPlatform} (matches container).`
414
- : `You are running inside a Linux container, but the user's host machine is ${params.hostPlatform}. Packages with platform-specific native binaries (e.g. esbuild, swc, sharp) installed here won't work on the host. When you create projects with dependencies, delete node_modules before finishing and tell the user to run the install command on their machine.`)
415
- : '';
416
-
417
- const scheduledNote = params.isScheduledTask
418
- ? `You are running as a scheduled task${params.taskId ? ` (task id: ${params.taskId})` : ''}. If you need to communicate, use \`mcp__dotclaw__send_message\`.`
419
- : '';
420
-
421
- const fmtPack = (label: string, pack: PromptPack | null | undefined) =>
422
- pack ? formatPromptPack({ label, pack, maxDemos: PROMPT_PACKS_MAX_DEMOS, maxChars: PROMPT_PACKS_MAX_CHARS }) : '';
423
-
424
- const PROMPT_PACKS_TOTAL_BUDGET = PROMPT_PACKS_MAX_CHARS * 3;
425
- const allPackBlocks: string[] = [];
426
- {
427
- const packEntries: Array<[string, PromptPack | null | undefined]> = [
428
- ['Tool Calling Guidelines', params.toolCallingPack],
429
- ['Tool Outcome Guidelines', params.toolOutcomePack],
430
- ['Task Extraction Guidelines', params.taskExtractionPack],
431
- ['Response Quality Guidelines', params.responseQualityPack],
432
- ['Memory Policy Guidelines', params.memoryPolicyPack],
433
- ['Memory Recall Guidelines', params.memoryRecallPack],
434
- ];
435
- let totalChars = 0;
436
- for (const [label, pack] of packEntries) {
437
- const block = fmtPack(label, pack);
438
- if (!block) continue;
439
- if (totalChars + block.length > PROMPT_PACKS_TOTAL_BUDGET) break;
440
- allPackBlocks.push(block);
441
- totalChars += block.length;
442
- }
443
- }
444
- const taskExtractionBlock = allPackBlocks.find(b => b.includes('Task Extraction')) || '';
445
- const responseQualityBlock = allPackBlocks.find(b => b.includes('Response Quality')) || '';
446
- const toolCallingBlock = allPackBlocks.find(b => b.includes('Tool Calling')) || '';
447
- const toolOutcomeBlock = allPackBlocks.find(b => b.includes('Tool Outcome')) || '';
448
- const memoryPolicyBlock = allPackBlocks.find(b => b.includes('Memory Policy')) || '';
449
- const memoryRecallBlock = allPackBlocks.find(b => b.includes('Memory Recall')) || '';
450
-
451
- const memorySections: string[] = [];
452
- {
453
- if (hasAnyMemory) {
454
- if (memorySummary) {
455
- memorySections.push('Long-term memory summary:', memorySummary);
456
- }
457
- if (memoryFacts) {
458
- memorySections.push('Long-term facts:', memoryFacts);
459
- }
460
- if (userProfile) {
461
- memorySections.push('User profile (if available):', userProfile);
462
- }
463
- if (longTermRecall) {
464
- memorySections.push('What you remember about the user (long-term):', longTermRecall);
465
- }
466
- if (memoryStats) {
467
- memorySections.push('Memory stats:', memoryStats);
468
- }
469
- } else {
470
- memorySections.push('No long-term memory available yet.');
471
- }
472
- }
473
-
474
- // Session recall is always included (local context from current conversation)
475
- if (sessionRecall) {
476
- memorySections.push('Recent conversation context:', sessionRecall);
477
- }
478
-
479
- return [
480
- `You are ${params.assistantName}, a personal assistant running inside DotClaw.${params.messagingPlatform ? ` You are currently connected via ${params.messagingPlatform}.` : ''}`,
481
- hostPlatformNote,
482
- scheduledNote,
483
- toolGuidance,
484
- browserAutomation,
485
- groupNotes,
486
- globalNotes,
487
- skillNotes,
488
- timezoneNote,
489
- toolCallingBlock,
490
- toolOutcomeBlock,
491
- taskExtractionBlock,
492
- responseQualityBlock,
493
- memoryPolicyBlock,
494
- memoryRecallBlock,
495
- ...memorySections,
496
- availableGroups ? `Available groups (main group only):\n${availableGroups}` : '',
497
- toolReliability ? `Tool reliability (recent):\n${toolReliability}` : '',
498
- behaviorNotes.length > 0 ? `Behavior notes:\n${behaviorNotes.join('\n')}` : '',
499
- params.maxToolSteps
500
- ? `You have a budget of ${params.maxToolSteps} tool steps per request. If a task is large, break your work into phases and always finish with a text summary of what you accomplished — never end on a tool call without a response.`
501
- : '',
502
- 'Be concise and helpful. When you use tools, summarize what happened rather than dumping raw output.'
503
- ].filter(Boolean).join('\n\n');
432
+ return buildSystemPrompt({
433
+ mode: 'full',
434
+ assistantName: params.assistantName,
435
+ messagingPlatform: params.messagingPlatform,
436
+ hostPlatform: params.hostPlatform,
437
+ timezone: params.timezone,
438
+ isScheduledTask: params.isScheduledTask,
439
+ taskId: params.taskId,
440
+ groupNotes: params.groupNotes,
441
+ globalNotes: params.globalNotes,
442
+ skillCatalog: params.skillCatalog,
443
+ memorySummary: params.memorySummary,
444
+ memoryFacts: params.memoryFacts,
445
+ sessionRecall: params.sessionRecall,
446
+ longTermRecall: params.longTermRecall,
447
+ userProfile: params.userProfile,
448
+ memoryStats: params.memoryStats,
449
+ availableGroups: params.availableGroups,
450
+ toolReliability: params.toolReliability,
451
+ behaviorConfig: params.behaviorConfig,
452
+ taskExtractionPack: params.taskExtractionPack,
453
+ responseQualityPack: params.responseQualityPack,
454
+ toolCallingPack: params.toolCallingPack,
455
+ toolOutcomePack: params.toolOutcomePack,
456
+ memoryPolicyPack: params.memoryPolicyPack,
457
+ memoryRecallPack: params.memoryRecallPack,
458
+ maxToolSteps: params.maxToolSteps,
459
+ browserEnabled: agentConfig.agent.browser.enabled,
460
+ promptPacksMaxChars: PROMPT_PACKS_MAX_CHARS,
461
+ promptPacksMaxDemos: PROMPT_PACKS_MAX_DEMOS,
462
+ trimLevel: params.trimLevel,
463
+ });
504
464
  }
505
465
 
506
466
  function loadAvailableGroups(): Array<{ jid: string; name: string; lastActivity: string; isRegistered: boolean }> {
@@ -728,14 +688,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
728
688
  const memoryModel = agent.models.memory;
729
689
  const assistantName = agent.assistantName;
730
690
  const config = getConfig(agentConfig);
731
- if (input.modelContextTokens && Number.isFinite(input.modelContextTokens)) {
732
- config.maxContextTokens = Math.min(config.maxContextTokens, input.modelContextTokens);
733
- const compactionTarget = input.modelContextTokens - config.maxOutputTokens;
734
- config.compactionTriggerTokens = Math.max(1000, Math.min(config.compactionTriggerTokens, compactionTarget));
735
- }
736
- if (input.modelMaxOutputTokens && Number.isFinite(input.modelMaxOutputTokens)) {
737
- config.maxOutputTokens = input.modelMaxOutputTokens;
738
- }
691
+ const limits = resolveModelLimits(input, {
692
+ maxContextTokens: config.maxContextTokens,
693
+ maxOutputTokens: config.maxOutputTokens,
694
+ compactionTriggerTokens: config.compactionTriggerTokens,
695
+ maxContextMessageTokens: agent.context.maxContextMessageTokens,
696
+ });
697
+ config.maxContextTokens = limits.maxContextTokens;
698
+ config.compactionTriggerTokens = limits.compactionTriggerTokens;
699
+ const resolvedMaxOutputTokens = limits.maxOutputTokens; // may be undefined
700
+ const resolvedMaxContextMessageTokens = limits.maxContextMessageTokens;
739
701
  if (input.modelTemperature && Number.isFinite(input.modelTemperature)) {
740
702
  config.temperature = input.modelTemperature;
741
703
  }
@@ -749,7 +711,6 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
749
711
  const memoryExtractionMaxOutputTokens = agent.memory.extraction.maxOutputTokens;
750
712
  const memoryExtractScheduled = agent.memory.extractScheduled;
751
713
  const memoryArchiveSync = agent.memory.archiveSync;
752
- const maxContextMessageTokens = agent.context.maxContextMessageTokens;
753
714
 
754
715
  const openrouter = getCachedOpenRouter(apiKey, openrouterOptions);
755
716
  const tokenEstimate = resolveTokenEstimate(input, agentConfig);
@@ -763,6 +724,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
763
724
 
764
725
  const { ctx: sessionCtx, isNew } = createSessionContext(SESSION_ROOT, input.sessionId);
765
726
  const toolCalls: ToolCallRecord[] = [];
727
+ const toolOutputs: ToolResultRecord[] = [];
766
728
  let memoryItemsUpserted = 0;
767
729
  let memoryItemsExtracted = 0;
768
730
  const timings: { memory_extraction_ms?: number; tool_ms?: number } = {};
@@ -779,6 +741,9 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
779
741
  onToolCall: (call) => {
780
742
  toolCalls.push(call);
781
743
  },
744
+ onToolResult: (record) => {
745
+ toolOutputs.push(record);
746
+ },
782
747
  policy: input.toolPolicy
783
748
  });
784
749
 
@@ -812,6 +777,27 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
812
777
  }
813
778
  }
814
779
 
780
+ // Build schema-only tools (no execute functions) for SDK — prevents the SDK from
781
+ // auto-executing tools in its internal loop, which drops conversation context in
782
+ // follow-up API calls (makeFollowupRequest only sends model output + tool results,
783
+ // losing the original user messages). We run the tool loop ourselves instead.
784
+ const schemaTools = tools.map(t => {
785
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars, @typescript-eslint/no-explicit-any
786
+ const { execute, ...rest } = t.function as any;
787
+ return { type: t.type, function: rest };
788
+ }) as typeof tools;
789
+
790
+ // Map tool names → original execute functions (with policy/callback wrappers intact)
791
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
792
+ const toolExecutors = new Map<string, (args: any) => Promise<any>>();
793
+ for (const t of tools) {
794
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
795
+ const fn = (t.function as any).execute;
796
+ if (typeof fn === 'function') {
797
+ toolExecutors.set(t.function.name, fn);
798
+ }
799
+ }
800
+
815
801
  if (process.env.DOTCLAW_SELF_CHECK === '1') {
816
802
  try {
817
803
  const details = await runSelfCheck({ model });
@@ -858,8 +844,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
858
844
  appendHistory(sessionCtx, 'user', prompt);
859
845
  let history = loadHistory(sessionCtx);
860
846
 
847
+ if (agent.context.maxHistoryTurns > 0) {
848
+ history = limitHistoryTurns(history, agent.context.maxHistoryTurns);
849
+ }
850
+
851
+ // Dynamic context budget: if recentContextTokens is 0 (auto), allocate 60% of context window
852
+ const effectiveRecentTokens = config.recentContextTokens > 0
853
+ ? config.recentContextTokens
854
+ : Math.floor(config.maxContextTokens * 0.6);
861
855
  const tokenRatio = tokenEstimate.tokensPerChar > 0 ? (0.25 / tokenEstimate.tokensPerChar) : 1;
862
- const adjustedRecentTokens = Math.max(1000, Math.floor(config.recentContextTokens * tokenRatio));
856
+ const adjustedRecentTokens = Math.max(1000, Math.floor(effectiveRecentTokens * tokenRatio));
863
857
 
864
858
  const totalTokens = history.reduce(
865
859
  (sum, message) => sum + estimateTokensForModel(message.content, tokenEstimate.tokensPerChar) + tokenEstimate.tokensPerMessage,
@@ -871,14 +865,69 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
871
865
  log(`Compacting history: ${totalTokens} tokens`);
872
866
  archiveConversation(history, sessionCtx.state.summary || null, GROUP_DIR);
873
867
 
874
- const summaryUpdate = await updateMemorySummary({
875
- openrouter,
876
- model: summaryModel,
877
- existingSummary: sessionCtx.state.summary,
878
- existingFacts: sessionCtx.state.facts,
879
- newMessages: olderMessages,
880
- maxOutputTokens: config.summaryMaxOutputTokens
881
- });
868
+ // Multi-part compaction: split older messages into chunks, summarize each
869
+ const olderTokens = olderMessages.reduce(
870
+ (sum, m) => sum + estimateTokens(m.content), 0
871
+ );
872
+ const MULTI_PART_THRESHOLD = 40_000; // Use multi-part for large histories
873
+ const numParts = olderTokens > MULTI_PART_THRESHOLD ? Math.min(3, Math.ceil(olderTokens / MULTI_PART_THRESHOLD)) : 1;
874
+
875
+ let summaryUpdate: { summary: string; facts: string[] } | null = null;
876
+
877
+ if (numParts > 1) {
878
+ log(`Multi-part compaction: ${numParts} parts`);
879
+ const chunks = splitMessagesByTokenShare(olderMessages, numParts);
880
+ const partSummaries: string[] = [];
881
+ const mergedFacts: string[] = [...sessionCtx.state.facts];
882
+
883
+ for (let i = 0; i < chunks.length; i++) {
884
+ const partPrompt = buildMultiPartSummaryPrompt(
885
+ sessionCtx.state.summary,
886
+ mergedFacts,
887
+ chunks[i],
888
+ i,
889
+ chunks.length,
890
+ partSummaries
891
+ );
892
+ const partResult = openrouter.callModel({
893
+ model: summaryModel,
894
+ instructions: partPrompt.instructions,
895
+ input: partPrompt.input,
896
+ maxOutputTokens: config.summaryMaxOutputTokens,
897
+ temperature: 0.1,
898
+ reasoning: { effort: 'low' as const }
899
+ });
900
+ const { text: partText } = await getResponseText(partResult, `summary_part_${i}`);
901
+ const parsed = parseSummaryResponse(partText);
902
+ if (parsed) {
903
+ partSummaries.push(parsed.summary);
904
+ // Merge facts, deduplicating by content
905
+ const existingSet = new Set(mergedFacts.map(f => f.toLowerCase()));
906
+ for (const fact of parsed.facts) {
907
+ if (!existingSet.has(fact.toLowerCase())) {
908
+ mergedFacts.push(fact);
909
+ existingSet.add(fact.toLowerCase());
910
+ }
911
+ }
912
+ }
913
+ }
914
+
915
+ if (partSummaries.length > 0) {
916
+ summaryUpdate = {
917
+ summary: partSummaries.join(' '),
918
+ facts: mergedFacts
919
+ };
920
+ }
921
+ } else {
922
+ summaryUpdate = await updateMemorySummary({
923
+ openrouter,
924
+ model: summaryModel,
925
+ existingSummary: sessionCtx.state.summary,
926
+ existingFacts: sessionCtx.state.facts,
927
+ newMessages: olderMessages,
928
+ maxOutputTokens: config.summaryMaxOutputTokens
929
+ });
930
+ }
882
931
 
883
932
  if (summaryUpdate) {
884
933
  sessionCtx.state.summary = summaryUpdate.summary;
@@ -980,7 +1029,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
980
1029
  if (memoryPolicyResult) promptPackVersions['memory-policy'] = memoryPolicyResult.pack.version;
981
1030
  if (memoryRecallResult) promptPackVersions['memory-recall'] = memoryRecallResult.pack.version;
982
1031
 
983
- const buildInstructions = () => buildSystemInstructions({
1032
+ const resolveInstructions = (trimLevel = 0) => buildInstructions({
984
1033
  assistantName,
985
1034
  groupNotes: claudeNotes.group,
986
1035
  globalNotes: claudeNotes.global,
@@ -1005,16 +1054,35 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1005
1054
  toolOutcomePack: toolOutcomeResult?.pack || null,
1006
1055
  memoryPolicyPack: memoryPolicyResult?.pack || null,
1007
1056
  memoryRecallPack: memoryRecallResult?.pack || null,
1008
- maxToolSteps
1057
+ maxToolSteps,
1058
+ trimLevel
1009
1059
  });
1010
1060
 
1011
1061
  const buildContext = () => {
1012
- const resolvedInstructions = buildInstructions();
1013
- const resolvedInstructionTokens = estimateTokensForModel(resolvedInstructions, tokenEstimate.tokensPerChar);
1014
- const resolvedMaxContext = Math.max(config.maxContextTokens - config.maxOutputTokens - resolvedInstructionTokens, 2000);
1062
+ // System prompt budget: 25% of context window
1063
+ const maxSystemPromptTokens = Math.floor(config.maxContextTokens * 0.25);
1064
+ const MAX_TRIM_LEVEL = 4;
1065
+
1066
+ let resolvedInstructions = '';
1067
+ let resolvedInstructionTokens = 0;
1068
+ let trimLevel = 0;
1069
+
1070
+ // Progressive trimming loop: build prompt, check size, trim if needed
1071
+ for (trimLevel = 0; trimLevel <= MAX_TRIM_LEVEL; trimLevel++) {
1072
+ resolvedInstructions = resolveInstructions(trimLevel);
1073
+ resolvedInstructionTokens = estimateTokensForModel(resolvedInstructions, tokenEstimate.tokensPerChar);
1074
+ if (resolvedInstructionTokens <= maxSystemPromptTokens || trimLevel === MAX_TRIM_LEVEL) {
1075
+ break;
1076
+ }
1077
+ log(`System prompt ${resolvedInstructionTokens} tokens exceeds budget ${maxSystemPromptTokens}, trimming (level ${trimLevel + 1})`);
1078
+ }
1079
+
1080
+ const outputReserve = resolvedMaxOutputTokens || Math.floor(config.maxContextTokens * 0.25);
1081
+ const resolvedMaxContext = Math.max(config.maxContextTokens - outputReserve - resolvedInstructionTokens, 2000);
1015
1082
  const resolvedAdjusted = Math.max(1000, Math.floor(resolvedMaxContext * tokenRatio));
1016
1083
  let { recentMessages: contextMessages } = splitRecentHistory(recentMessages, resolvedAdjusted, 6);
1017
- contextMessages = clampContextMessages(contextMessages, tokenEstimate.tokensPerChar, maxContextMessageTokens);
1084
+ contextMessages = clampContextMessages(contextMessages, tokenEstimate.tokensPerChar, resolvedMaxContextMessageTokens);
1085
+ contextMessages = pruneContextMessages(contextMessages, agent.context.contextPruning);
1018
1086
  return {
1019
1087
  instructions: resolvedInstructions,
1020
1088
  instructionsTokens: resolvedInstructionTokens,
@@ -1065,61 +1133,171 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1065
1133
  let lastError: unknown = null;
1066
1134
  for (let attempt = 0; attempt < modelChain.length; attempt++) {
1067
1135
  currentModel = modelChain[attempt];
1136
+ // Skip models in cooldown (unless it's the last option)
1137
+ if (isModelInCooldown(currentModel) && attempt < modelChain.length - 1) {
1138
+ log(`Skipping ${currentModel} (in cooldown)`);
1139
+ continue;
1140
+ }
1068
1141
  if (attempt > 0) log(`Fallback ${attempt}: trying ${currentModel}`);
1069
1142
 
1070
1143
  try {
1071
1144
  log(`Starting OpenRouter call (${currentModel})...`);
1072
1145
  const startedAt = Date.now();
1073
- const result = openrouter.callModel({
1146
+ // ── Custom tool execution loop ──────────────────────────────────
1147
+ // The SDK's built-in tool loop (executeToolsIfNeeded) drops conversation
1148
+ // context in follow-up API calls — it only sends [function_calls, function_call_outputs]
1149
+ // without the original user messages or previousResponseId. This causes models to
1150
+ // produce empty text after tools that return minimal results (e.g. sequential-thinking).
1151
+ // We use schema-only tools (no execute functions) so the SDK returns tool calls
1152
+ // without auto-executing, then run the loop ourselves with full context.
1153
+
1154
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1155
+ let conversationInput: any[] = [...contextInput];
1156
+ let step = 0;
1157
+ let streamSeq = 0;
1158
+
1159
+ // Helper to write a stream chunk
1160
+ const writeStreamChunk = (text: string) => {
1161
+ if (!input.streamDir) return;
1162
+ streamSeq++;
1163
+ const chunkFile = path.join(input.streamDir, `chunk_${String(streamSeq).padStart(6, '0')}.txt`);
1164
+ const tmpFile = chunkFile + '.tmp';
1165
+ try {
1166
+ fs.writeFileSync(tmpFile, text);
1167
+ fs.renameSync(tmpFile, chunkFile);
1168
+ } catch (writeErr) {
1169
+ log(`Stream write error at seq ${streamSeq}: ${writeErr instanceof Error ? writeErr.message : String(writeErr)}`);
1170
+ }
1171
+ };
1172
+
1173
+ // Helper to finalize streaming
1174
+ const finalizeStream = () => {
1175
+ if (!input.streamDir) return;
1176
+ try {
1177
+ if (!fs.existsSync(path.join(input.streamDir, 'done'))) {
1178
+ fs.writeFileSync(path.join(input.streamDir, 'done'), '');
1179
+ }
1180
+ } catch { /* ignore */ }
1181
+ };
1182
+
1183
+ // Initial call — uses streaming for real-time delivery
1184
+ const initialResult = openrouter.callModel({
1074
1185
  model: currentModel,
1075
1186
  instructions: resolvedInstructions,
1076
- input: contextInput,
1077
- tools,
1078
- stopWhen: stepCountIs(maxToolSteps),
1079
- maxOutputTokens: config.maxOutputTokens,
1187
+ input: conversationInput,
1188
+ tools: schemaTools,
1189
+ maxOutputTokens: resolvedMaxOutputTokens,
1080
1190
  temperature: config.temperature,
1081
1191
  reasoning: resolvedReasoning
1082
1192
  });
1083
1193
 
1084
- // Stream text chunks to IPC if streamDir is provided
1194
+ // Stream text from initial response
1085
1195
  if (input.streamDir) {
1086
- let seq = 0;
1087
1196
  try {
1088
1197
  fs.mkdirSync(input.streamDir, { recursive: true });
1089
- for await (const delta of result.getTextStream()) {
1090
- seq++;
1091
- const chunkFile = path.join(input.streamDir, `chunk_${String(seq).padStart(6, '0')}.txt`);
1092
- const tmpFile = chunkFile + '.tmp';
1093
- fs.writeFileSync(tmpFile, delta);
1094
- fs.renameSync(tmpFile, chunkFile);
1198
+ for await (const delta of initialResult.getTextStream()) {
1199
+ writeStreamChunk(delta);
1095
1200
  }
1096
- fs.writeFileSync(path.join(input.streamDir, 'done'), '');
1097
1201
  } catch (streamErr) {
1098
1202
  log(`Stream error: ${streamErr instanceof Error ? streamErr.message : String(streamErr)}`);
1099
1203
  try { fs.writeFileSync(path.join(input.streamDir, 'error'), streamErr instanceof Error ? streamErr.message : String(streamErr)); } catch { /* ignore */ }
1100
1204
  }
1101
1205
  }
1102
1206
 
1103
- latencyMs = Date.now() - startedAt;
1207
+ // Get initial response (no auto-execution since schemaTools have no execute fns)
1208
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1209
+ let lastResponse: any;
1210
+ try {
1211
+ lastResponse = await initialResult.getResponse();
1212
+ } catch (err) {
1213
+ const message = err instanceof Error ? err.message : String(err);
1214
+ log(`Initial getResponse failed: ${message}`);
1215
+ finalizeStream();
1216
+ throw err;
1217
+ }
1104
1218
 
1105
- const completionResult = await getResponseText(result, 'completion');
1106
- responseText = completionResult.text;
1219
+ responseText = extractTextFromApiResponse(lastResponse);
1220
+ let pendingCalls = extractFunctionCalls(lastResponse);
1221
+
1222
+ // Tool execution loop — execute tools ourselves, include full context in follow-ups
1223
+ while (pendingCalls.length > 0 && step < maxToolSteps) {
1224
+ log(`Step ${step}: executing ${pendingCalls.length} tool call(s): ${pendingCalls.map(c => c.name).join(', ')}`);
1225
+
1226
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1227
+ const toolResults: any[] = [];
1228
+ for (const fc of pendingCalls) {
1229
+ const executor = toolExecutors.get(fc.name);
1230
+ if (!executor) {
1231
+ log(`Unknown tool: ${fc.name}`);
1232
+ toolResults.push({
1233
+ type: 'function_call_output',
1234
+ callId: fc.id,
1235
+ output: JSON.stringify({ error: `Unknown tool: ${fc.name}` })
1236
+ });
1237
+ step++;
1238
+ continue;
1239
+ }
1107
1240
 
1108
- const toolCallsFromModel = await result.getToolCalls();
1109
- if (toolCallsFromModel.length > 0) {
1110
- log(`Model made ${toolCallsFromModel.length} tool call(s): ${toolCallsFromModel.map(t => t.name).join(', ')}`);
1111
- }
1112
- if (!responseText || !responseText.trim()) {
1113
- if (completionResult.error) {
1114
- log(`Tool execution failed: ${completionResult.error}`);
1115
- responseText = `Something went wrong while processing your request: ${completionResult.error}. Please try again.`;
1116
- } else if (toolCallsFromModel.length > 0) {
1117
- responseText = 'I started running tool calls but did not get a final response. If you want me to continue, please ask a narrower subtask or say "continue".';
1118
- } else {
1119
- log(`Warning: Model returned empty/whitespace response. tool calls: ${toolCallsFromModel.length}`);
1241
+ try {
1242
+ // Calling the wrapped execute fires onToolCall/onToolResult callbacks
1243
+ const result = await executor(fc.arguments);
1244
+ toolResults.push({
1245
+ type: 'function_call_output',
1246
+ callId: fc.id,
1247
+ output: JSON.stringify(result)
1248
+ });
1249
+ } catch (err) {
1250
+ const error = err instanceof Error ? err.message : String(err);
1251
+ toolResults.push({
1252
+ type: 'function_call_output',
1253
+ callId: fc.id,
1254
+ output: JSON.stringify({ error })
1255
+ });
1256
+ }
1257
+ step++;
1120
1258
  }
1259
+
1260
+ // Build follow-up input with FULL conversation context:
1261
+ // original messages + model output + tool results (accumulated each round)
1262
+ conversationInput = [...conversationInput, ...lastResponse.output, ...toolResults];
1263
+
1264
+ // Follow-up call with complete context — model sees the full conversation
1265
+ const followupResult = openrouter.callModel({
1266
+ model: currentModel,
1267
+ instructions: resolvedInstructions,
1268
+ input: conversationInput,
1269
+ tools: schemaTools,
1270
+ maxOutputTokens: resolvedMaxOutputTokens,
1271
+ temperature: config.temperature,
1272
+ reasoning: resolvedReasoning
1273
+ });
1274
+
1275
+ try {
1276
+ lastResponse = await followupResult.getResponse();
1277
+ } catch (err) {
1278
+ const message = err instanceof Error ? err.message : String(err);
1279
+ log(`Follow-up getResponse failed at step ${step}: ${message}`);
1280
+ break;
1281
+ }
1282
+
1283
+ const followupText = extractTextFromApiResponse(lastResponse);
1284
+ if (followupText) {
1285
+ responseText = followupText;
1286
+ writeStreamChunk(followupText);
1287
+ }
1288
+
1289
+ pendingCalls = extractFunctionCalls(lastResponse);
1290
+ }
1291
+
1292
+ finalizeStream();
1293
+ latencyMs = Date.now() - startedAt;
1294
+
1295
+ if (responseText && responseText.trim()) {
1296
+ log(`Model returned text response (${responseText.length} chars, ${step} tool steps)`);
1297
+ } else if (toolCalls.length > 0) {
1298
+ log(`Warning: Model returned empty response after ${toolCalls.length} tool call(s) and ${step} steps`);
1121
1299
  } else {
1122
- log(`Model returned text response (${responseText.length} chars)`);
1300
+ log(`Warning: Model returned empty/whitespace response`);
1123
1301
  }
1124
1302
 
1125
1303
  completionTokens = estimateTokensForModel(responseText || '', tokenEstimate.tokensPerChar);
@@ -1128,9 +1306,12 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1128
1306
  break; // Success
1129
1307
  } catch (err) {
1130
1308
  lastError = err;
1131
- if (classifyError(err) && attempt < modelChain.length - 1) {
1132
- log(`${currentModel} failed (${classifyError(err)}): ${err instanceof Error ? err.message : err}`);
1133
- continue;
1309
+ if (classifyError(err)) {
1310
+ cooldownModel(currentModel, err);
1311
+ if (attempt < modelChain.length - 1) {
1312
+ log(`${currentModel} failed (${classifyError(err)}): ${err instanceof Error ? err.message : err}`);
1313
+ continue;
1314
+ }
1134
1315
  }
1135
1316
  throw err; // Non-retryable or last model — propagate
1136
1317
  }
@@ -1162,6 +1343,14 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1162
1343
  };
1163
1344
  }
1164
1345
 
1346
+ // Parse reply tags from response before saving to history
1347
+ let replyToId: string | undefined;
1348
+ if (responseText) {
1349
+ const parsed = parseReplyTags(responseText);
1350
+ responseText = parsed.cleanText;
1351
+ replyToId = parsed.replyToId;
1352
+ }
1353
+
1165
1354
  appendHistory(sessionCtx, 'assistant', responseText || '');
1166
1355
  history = loadHistory(sessionCtx);
1167
1356
 
@@ -1221,7 +1410,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1221
1410
  if (memoryExtractionEnabled && isDaemon && (!input.isScheduledTask || memoryExtractScheduled)) {
1222
1411
  // Fire-and-forget in daemon mode; skip entirely in ephemeral mode
1223
1412
  void runMemoryExtraction().catch((err) => {
1224
- log(`Memory extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1413
+ const errMsg = err instanceof Error ? err.message : String(err);
1414
+ log(`Memory extraction failed: ${errMsg}`);
1415
+ // Write error to IPC status file so host can detect the failure
1416
+ try {
1417
+ const statusPath = path.join(IPC_DIR, 'memory_extraction_error.json');
1418
+ fs.writeFileSync(statusPath, JSON.stringify({
1419
+ error: errMsg,
1420
+ timestamp: new Date().toISOString(),
1421
+ }));
1422
+ } catch { /* best-effort status write */ }
1225
1423
  });
1226
1424
  }
1227
1425
 
@@ -1255,7 +1453,8 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1255
1453
  memory_items_extracted: memoryItemsExtracted,
1256
1454
  timings: Object.keys(timings).length > 0 ? timings : undefined,
1257
1455
  tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
1258
- latency_ms: latencyMs
1456
+ latency_ms: latencyMs,
1457
+ replyToId
1259
1458
  };
1260
1459
  }
1261
1460