npm - @dotsetlabs/dotclaw - Versions diffs - 2.1.0 → 2.3.0 - Mend

@dotsetlabs/dotclaw 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/.env.example +12 -0
package/README.md +5 -2
package/config-examples/runtime.json +46 -5
package/config-examples/tool-budgets.json +1 -1
package/config-examples/tool-policy.json +1 -1
package/container/Dockerfile +5 -1
package/container/agent-runner/package.json +1 -1
package/container/agent-runner/src/agent-config.ts +67 -17
package/container/agent-runner/src/container-protocol.ts +6 -0
package/container/agent-runner/src/daemon.ts +18 -5
package/container/agent-runner/src/index.ts +442 -243
package/container/agent-runner/src/ipc.ts +76 -1
package/container/agent-runner/src/mcp-registry.ts +11 -0
package/container/agent-runner/src/memory.ts +145 -3
package/container/agent-runner/src/process-registry.ts +257 -0
package/container/agent-runner/src/system-prompt.ts +337 -0
package/container/agent-runner/src/tools.ts +382 -29
package/container/agent-runner/src/tts.ts +42 -0
package/dist/agent-context.d.ts +1 -0
package/dist/agent-context.d.ts.map +1 -1
package/dist/agent-context.js +6 -3
package/dist/agent-context.js.map +1 -1
package/dist/agent-execution.d.ts +1 -0
package/dist/agent-execution.d.ts.map +1 -1
package/dist/agent-execution.js +11 -4
package/dist/agent-execution.js.map +1 -1
package/dist/container-protocol.d.ts +8 -0
package/dist/container-protocol.d.ts.map +1 -1
package/dist/container-runner.d.ts.map +1 -1
package/dist/container-runner.js +44 -8
package/dist/container-runner.js.map +1 -1
package/dist/error-messages.d.ts.map +1 -1
package/dist/error-messages.js +22 -5
package/dist/error-messages.js.map +1 -1
package/dist/index.js +53 -6
package/dist/index.js.map +1 -1
package/dist/ipc-dispatcher.d.ts.map +1 -1
package/dist/ipc-dispatcher.js +336 -6
package/dist/ipc-dispatcher.js.map +1 -1
package/dist/memory-recall.d.ts +1 -0
package/dist/memory-recall.d.ts.map +1 -1
package/dist/memory-recall.js +3 -0
package/dist/memory-recall.js.map +1 -1
package/dist/memory-store.d.ts.map +1 -1
package/dist/memory-store.js +5 -3
package/dist/memory-store.js.map +1 -1
package/dist/message-pipeline.d.ts.map +1 -1
package/dist/message-pipeline.js +53 -12
package/dist/message-pipeline.js.map +1 -1
package/dist/model-registry.d.ts +15 -0
package/dist/model-registry.d.ts.map +1 -1
package/dist/model-registry.js +56 -12
package/dist/model-registry.js.map +1 -1
package/dist/providers/telegram/telegram-provider.d.ts +1 -0
package/dist/providers/telegram/telegram-provider.d.ts.map +1 -1
package/dist/providers/telegram/telegram-provider.js +14 -0
package/dist/providers/telegram/telegram-provider.js.map +1 -1
package/dist/request-router.d.ts +0 -1
package/dist/request-router.d.ts.map +1 -1
package/dist/request-router.js +18 -6
package/dist/request-router.js.map +1 -1
package/dist/runtime-config.d.ts +14 -0
package/dist/runtime-config.d.ts.map +1 -1
package/dist/runtime-config.js +64 -16
package/dist/runtime-config.js.map +1 -1
package/dist/task-scheduler.d.ts.map +1 -1
package/dist/task-scheduler.js +3 -5
package/dist/task-scheduler.js.map +1 -1
package/dist/tool-budgets.js +1 -1
package/dist/tool-budgets.js.map +1 -1
package/dist/tool-policy.d.ts.map +1 -1
package/dist/tool-policy.js +13 -3
package/dist/tool-policy.js.map +1 -1
package/dist/webhook.d.ts +14 -0
package/dist/webhook.d.ts.map +1 -0
package/dist/webhook.js +169 -0
package/dist/webhook.js.map +1 -0
package/package.json +3 -2

package/container/agent-runner/src/index.ts CHANGED Viewed

@@ -6,8 +6,8 @@
 import fs from 'fs';
 import path from 'path';
 import { fileURLToPath } from 'url';
-import { OpenRouter, stepCountIs } from '@openrouter/sdk';
-import { createTools, discoverMcpTools, ToolCallRecord } from './tools.js';
+import { OpenRouter } from '@openrouter/sdk';
+import { createTools, discoverMcpTools, ToolCallRecord, type ToolResultRecord } from './tools.js';
 import { createIpcHandlers } from './ipc.js';
 import { loadAgentConfig } from './agent-config.js';
 import { OUTPUT_START_MARKER, OUTPUT_END_MARKER, type ContainerInput, type ContainerOutput } from './container-protocol.js';
@@ -19,15 +19,21 @@ import {
   shouldCompact,
   archiveConversation,
   buildSummaryPrompt,
+  buildMultiPartSummaryPrompt,
+  splitMessagesByTokenShare,
   parseSummaryResponse,
   retrieveRelevantMemories,
   saveMemoryState,
   writeHistory,
+  estimateTokens,
+  pruneContextMessages,
+  limitHistoryTurns,
   MemoryConfig,
   Message
 } from './memory.js';
 import { loadPromptPackWithCanary, formatPromptPack, PromptPack } from './prompt-packs.js';
-import { buildSkillCatalog, formatSkillCatalog, type SkillCatalog } from './skill-loader.js';
+import { buildSkillCatalog, type SkillCatalog } from './skill-loader.js';
+import { buildSystemPrompt } from './system-prompt.js';
 type OpenRouterResult = ReturnType<OpenRouter['callModel']>;
@@ -50,6 +56,51 @@ const PROMPT_PACKS_MAX_CHARS = agent.promptPacks.maxChars;
 const PROMPT_PACKS_MAX_DEMOS = agent.promptPacks.maxDemos;
 const PROMPT_PACKS_CANARY_RATE = agent.promptPacks.canaryRate;
+// ── Model cooldown tracking ──────────────────────────────────────────
+// After a model fails, put it in cooldown. 429 → 60s, 5xx/timeout → 300s.
+const modelCooldowns = new Map<string, number>(); // model → cooldown-until epoch ms
+function isModelInCooldown(model: string): boolean {
+  const until = modelCooldowns.get(model);
+  if (!until) return false;
+  if (Date.now() >= until) {
+    modelCooldowns.delete(model);
+    return false;
+  }
+  return true;
+}
+function cooldownModel(model: string, err: unknown): void {
+  const msg = err instanceof Error ? err.message : String(err);
+  const lower = msg.toLowerCase();
+  let durationMs = 300_000; // default: 5 min for 5xx/timeout
+  if (/429|rate.?limit/.test(lower)) {
+    durationMs = 60_000; // 1 min for rate limits
+  }
+  modelCooldowns.set(model, Date.now() + durationMs);
+  log(`Model ${model} in cooldown for ${durationMs / 1000}s`);
+}
+// ── Reply tag parsing ────────────────────────────────────────────────
+// Parse [[reply_to_current]] and [[reply_to:<id>]] tags from agent output.
+export function parseReplyTags(text: string): { cleanText: string; replyToId?: string } {
+  if (!text) return { cleanText: text };
+  const replyCurrentMatch = text.match(/\[\[reply_to_current\]\]/);
+  const replyIdMatch = text.match(/\[\[reply_to:(\d+)\]\]/);
+  let replyToId: string | undefined;
+  let cleanText = text;
+  if (replyIdMatch) {
+    replyToId = replyIdMatch[1];
+    cleanText = cleanText.replace(/\[\[reply_to:\d+\]\]/g, '').trim();
+  } else if (replyCurrentMatch) {
+    replyToId = '__current__'; // sentinel — host resolves to the triggering message
+    cleanText = cleanText.replace(/\[\[reply_to_current\]\]/g, '').trim();
+  }
+  return { cleanText, replyToId };
+}
 let cachedOpenRouter: OpenRouter | null = null;
 let cachedOpenRouterKey = '';
 let cachedOpenRouterOptions = '';
@@ -119,6 +170,34 @@ async function getResponseText(result: OpenRouterResult, context: string): Promi
   return { text: '' };
 }
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function extractTextFromApiResponse(response: any): string {
+  if (response?.outputText) return response.outputText;
+  for (const item of response?.output || []) {
+    if (item?.type === 'message') {
+      for (const part of item.content || []) {
+        if (part?.type === 'output_text' && part.text) return part.text;
+      }
+    }
+  }
+  return '';
+}
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function extractFunctionCalls(response: any): Array<{ id: string; name: string; arguments: any }> {
+  const calls: Array<{ id: string; name: string; arguments: unknown }> = [];
+  for (const item of response?.output || []) {
+    if (item?.type === 'function_call') {
+      let args = item.arguments;
+      if (typeof args === 'string') {
+        try { args = JSON.parse(args); } catch { /* keep as string */ }
+      }
+      calls.push({ id: item.callId, name: item.name, arguments: args });
+    }
+  }
+  return calls;
+}
 function writeOutput(output: ContainerOutput): void {
   console.log(OUTPUT_START_MARKER);
   console.log(JSON.stringify(output));
@@ -232,6 +311,34 @@ function getConfig(config: ReturnType<typeof loadAgentConfig>): MemoryConfig & {
   };
 }
+function resolveModelLimits(
+  input: ContainerInput,
+  configDefaults: { maxContextTokens: number; maxOutputTokens: number; compactionTriggerTokens: number; maxContextMessageTokens: number }
+) {
+  const caps = input.modelCapabilities;
+  // Context: use model capability, fall back to config
+  const contextLength = caps?.context_length || configDefaults.maxContextTokens;
+  // Output tokens: only set when explicitly configured by user.
+  // DO NOT use caps.max_completion_tokens — for reasoning models, maxOutputTokens covers
+  // both reasoning tokens AND visible text. Setting it to the model's max causes the model
+  // to allocate the entire budget to reasoning with 0 left for visible output.
+  let maxOutputTokens: number | undefined;
+  if (input.modelMaxOutputTokens && Number.isFinite(input.modelMaxOutputTokens)) {
+    maxOutputTokens = input.modelMaxOutputTokens;  // Explicit cost-control override
+  }
+  // else: undefined — omit from callModel(), let the API decide token budgeting
+  // Derive other limits from context length
+  const outputReserve = maxOutputTokens || Math.floor(contextLength * 0.25);
+  const maxContextTokens = contextLength;
+  const compactionTriggerTokens = Math.max(1000, contextLength - outputReserve);
+  const maxContextMessageTokens = Math.max(1000, Math.floor(contextLength * 0.03));
+  return { maxContextTokens, maxOutputTokens, compactionTriggerTokens, maxContextMessageTokens };
+}
 function getOpenRouterOptions(config: ReturnType<typeof loadAgentConfig>) {
   const timeoutMs = config.agent.openrouter.timeoutMs;
   const retryEnabled = config.agent.openrouter.retry;
@@ -294,9 +401,7 @@ function estimateMessagesTokens(messages: Message[], tokensPerChar: number, toke
   return total;
 }
-const MEMORY_SUMMARY_MAX_CHARS = 2000;
-function buildSystemInstructions(params: {
+function buildInstructions(params: {
   assistantName: string;
   groupNotes?: string | null;
   globalNotes?: string | null;
@@ -322,185 +427,40 @@ function buildSystemInstructions(params: {
   memoryPolicyPack?: PromptPack | null;
   memoryRecallPack?: PromptPack | null;
   maxToolSteps?: number;
+  trimLevel?: number;
 }): string {
-  const toolGuidance = [
-    'Key tool rules:',
-    '- User attachments arrive in /workspace/group/inbox/ (see <attachment> tags). Process with Read/Bash/Python.',
-    '- To send media from the web: download_url → send_photo/send_file/send_audio.',
-    '- Charts/plots: matplotlib → savefig → send_photo. Graphviz → dot -Tpng → send_photo.',
-    '- Voice messages are auto-transcribed (<transcript> in <attachment>). Reply with normal text — the host auto-converts to voice.',
-    '- GitHub CLI (`gh`) is available if GH_TOKEN is set.',
-    '- plugin__* and mcp_ext__* tools may be available if configured.'
-  ].join('\n');
-  const browserAutomation = agentConfig.agent.browser.enabled ? [
-    'Browser Tool: actions: navigate, snapshot, click, fill, screenshot, extract, evaluate, close.',
-    'Use snapshot with interactive=true for clickable refs (@e1, @e2). Screenshots → /workspace/group/screenshots/.'
-  ].join('\n') : '';
-  const hasAnyMemory = params.memorySummary || params.memoryFacts.length > 0 ||
-    params.longTermRecall.length > 0 || params.userProfile;
-  const memorySummary = params.memorySummary
-    ? params.memorySummary.slice(0, MEMORY_SUMMARY_MAX_CHARS)
-    : '';
-  const memoryFacts = params.memoryFacts.length > 0
-    ? params.memoryFacts.map(fact => `- ${fact}`).join('\n')
-    : '';
-  const sessionRecall = params.sessionRecall.length > 0
-    ? params.sessionRecall.map(item => `- ${item}`).join('\n')
-    : '';
-  const longTermRecall = params.longTermRecall.length > 0
-    ? params.longTermRecall.map(item => `- ${item}`).join('\n')
-    : '';
-  const userProfile = params.userProfile || '';
-  const memoryStats = params.memoryStats
-    ? `Total: ${params.memoryStats.total}, User: ${params.memoryStats.user}, Group: ${params.memoryStats.group}, Global: ${params.memoryStats.global}`
-    : '';
-  const availableGroups = params.availableGroups && params.availableGroups.length > 0
-    ? params.availableGroups
-      .map(group => `- ${group.name} (chat ${group.jid}, last: ${group.lastActivity})`)
-      .join('\n')
-    : '';
-  const groupNotes = params.groupNotes ? `Group notes:\n${params.groupNotes}` : '';
-  const globalNotes = params.globalNotes ? `Global notes:\n${params.globalNotes}` : '';
-  const skillNotes = params.skillCatalog ? formatSkillCatalog(params.skillCatalog) : '';
-  const toolReliability = params.toolReliability && params.toolReliability.length > 0
-    ? params.toolReliability
-      .sort((a, b) => a.success_rate - b.success_rate)
-      .slice(0, 20)
-      .map(tool => {
-        const pct = `${Math.round(tool.success_rate * 100)}%`;
-        const avg = Number.isFinite(tool.avg_duration_ms) ? `${Math.round(tool.avg_duration_ms!)}ms` : 'n/a';
-        return `- ${tool.name}: success ${pct} over ${tool.count} calls (avg ${avg})`;
-      })
-      .join('\n')
-    : '';
-  const behaviorNotes: string[] = [];
-  const responseStyle = typeof params.behaviorConfig?.response_style === 'string'
-    ? String(params.behaviorConfig.response_style)
-    : '';
-  if (responseStyle === 'concise') {
-    behaviorNotes.push('Keep responses short and to the point.');
-  } else if (responseStyle === 'detailed') {
-    behaviorNotes.push('Give detailed, step-by-step responses when helpful.');
-  }
-  const toolBias = typeof params.behaviorConfig?.tool_calling_bias === 'number'
-    ? Number(params.behaviorConfig.tool_calling_bias)
-    : null;
-  if (toolBias !== null && toolBias < 0.4) {
-    behaviorNotes.push('Ask before using tools unless the intent is obvious.');
-  } else if (toolBias !== null && toolBias > 0.6) {
-    behaviorNotes.push('Use tools proactively when they add accuracy or save time.');
-  }
-  const cautionBias = typeof params.behaviorConfig?.caution_bias === 'number'
-    ? Number(params.behaviorConfig.caution_bias)
-    : null;
-  if (cautionBias !== null && cautionBias > 0.6) {
-    behaviorNotes.push('Double-check uncertain facts and flag limitations.');
-  }
-  const timezoneNote = params.timezone
-    ? `Timezone: ${params.timezone}. Use this timezone when interpreting or presenting timestamps unless the user specifies another.`
-    : '';
-  const hostPlatformNote = params.hostPlatform
-    ? (params.hostPlatform.startsWith('linux')
-      ? `Host platform: ${params.hostPlatform} (matches container).`
-      : `You are running inside a Linux container, but the user's host machine is ${params.hostPlatform}. Packages with platform-specific native binaries (e.g. esbuild, swc, sharp) installed here won't work on the host. When you create projects with dependencies, delete node_modules before finishing and tell the user to run the install command on their machine.`)
-    : '';
-  const scheduledNote = params.isScheduledTask
-    ? `You are running as a scheduled task${params.taskId ? ` (task id: ${params.taskId})` : ''}. If you need to communicate, use \`mcp__dotclaw__send_message\`.`
-    : '';
-  const fmtPack = (label: string, pack: PromptPack | null | undefined) =>
-    pack ? formatPromptPack({ label, pack, maxDemos: PROMPT_PACKS_MAX_DEMOS, maxChars: PROMPT_PACKS_MAX_CHARS }) : '';
-  const PROMPT_PACKS_TOTAL_BUDGET = PROMPT_PACKS_MAX_CHARS * 3;
-  const allPackBlocks: string[] = [];
-  {
-    const packEntries: Array<[string, PromptPack | null | undefined]> = [
-      ['Tool Calling Guidelines', params.toolCallingPack],
-      ['Tool Outcome Guidelines', params.toolOutcomePack],
-      ['Task Extraction Guidelines', params.taskExtractionPack],
-      ['Response Quality Guidelines', params.responseQualityPack],
-      ['Memory Policy Guidelines', params.memoryPolicyPack],
-      ['Memory Recall Guidelines', params.memoryRecallPack],
-    ];
-    let totalChars = 0;
-    for (const [label, pack] of packEntries) {
-      const block = fmtPack(label, pack);
-      if (!block) continue;
-      if (totalChars + block.length > PROMPT_PACKS_TOTAL_BUDGET) break;
-      allPackBlocks.push(block);
-      totalChars += block.length;
-    }
-  }
-  const taskExtractionBlock = allPackBlocks.find(b => b.includes('Task Extraction')) || '';
-  const responseQualityBlock = allPackBlocks.find(b => b.includes('Response Quality')) || '';
-  const toolCallingBlock = allPackBlocks.find(b => b.includes('Tool Calling')) || '';
-  const toolOutcomeBlock = allPackBlocks.find(b => b.includes('Tool Outcome')) || '';
-  const memoryPolicyBlock = allPackBlocks.find(b => b.includes('Memory Policy')) || '';
-  const memoryRecallBlock = allPackBlocks.find(b => b.includes('Memory Recall')) || '';
-  const memorySections: string[] = [];
-  {
-    if (hasAnyMemory) {
-      if (memorySummary) {
-        memorySections.push('Long-term memory summary:', memorySummary);
-      }
-      if (memoryFacts) {
-        memorySections.push('Long-term facts:', memoryFacts);
-      }
-      if (userProfile) {
-        memorySections.push('User profile (if available):', userProfile);
-      }
-      if (longTermRecall) {
-        memorySections.push('What you remember about the user (long-term):', longTermRecall);
-      }
-      if (memoryStats) {
-        memorySections.push('Memory stats:', memoryStats);
-      }
-    } else {
-      memorySections.push('No long-term memory available yet.');
-    }
-  }
-  // Session recall is always included (local context from current conversation)
-  if (sessionRecall) {
-    memorySections.push('Recent conversation context:', sessionRecall);
-  }
-  return [
-    `You are ${params.assistantName}, a personal assistant running inside DotClaw.${params.messagingPlatform ? ` You are currently connected via ${params.messagingPlatform}.` : ''}`,
-    hostPlatformNote,
-    scheduledNote,
-    toolGuidance,
-    browserAutomation,
-    groupNotes,
-    globalNotes,
-    skillNotes,
-    timezoneNote,
-    toolCallingBlock,
-    toolOutcomeBlock,
-    taskExtractionBlock,
-    responseQualityBlock,
-    memoryPolicyBlock,
-    memoryRecallBlock,
-    ...memorySections,
-    availableGroups ? `Available groups (main group only):\n${availableGroups}` : '',
-    toolReliability ? `Tool reliability (recent):\n${toolReliability}` : '',
-    behaviorNotes.length > 0 ? `Behavior notes:\n${behaviorNotes.join('\n')}` : '',
-    params.maxToolSteps
-      ? `You have a budget of ${params.maxToolSteps} tool steps per request. If a task is large, break your work into phases and always finish with a text summary of what you accomplished — never end on a tool call without a response.`
-      : '',
-    'Be concise and helpful. When you use tools, summarize what happened rather than dumping raw output.'
-  ].filter(Boolean).join('\n\n');
+  return buildSystemPrompt({
+    mode: 'full',
+    assistantName: params.assistantName,
+    messagingPlatform: params.messagingPlatform,
+    hostPlatform: params.hostPlatform,
+    timezone: params.timezone,
+    isScheduledTask: params.isScheduledTask,
+    taskId: params.taskId,
+    groupNotes: params.groupNotes,
+    globalNotes: params.globalNotes,
+    skillCatalog: params.skillCatalog,
+    memorySummary: params.memorySummary,
+    memoryFacts: params.memoryFacts,
+    sessionRecall: params.sessionRecall,
+    longTermRecall: params.longTermRecall,
+    userProfile: params.userProfile,
+    memoryStats: params.memoryStats,
+    availableGroups: params.availableGroups,
+    toolReliability: params.toolReliability,
+    behaviorConfig: params.behaviorConfig,
+    taskExtractionPack: params.taskExtractionPack,
+    responseQualityPack: params.responseQualityPack,
+    toolCallingPack: params.toolCallingPack,
+    toolOutcomePack: params.toolOutcomePack,
+    memoryPolicyPack: params.memoryPolicyPack,
+    memoryRecallPack: params.memoryRecallPack,
+    maxToolSteps: params.maxToolSteps,
+    browserEnabled: agentConfig.agent.browser.enabled,
+    promptPacksMaxChars: PROMPT_PACKS_MAX_CHARS,
+    promptPacksMaxDemos: PROMPT_PACKS_MAX_DEMOS,
+    trimLevel: params.trimLevel,
+  });
 }
 function loadAvailableGroups(): Array<{ jid: string; name: string; lastActivity: string; isRegistered: boolean }> {
@@ -728,14 +688,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
   const memoryModel = agent.models.memory;
   const assistantName = agent.assistantName;
   const config = getConfig(agentConfig);
-  if (input.modelContextTokens && Number.isFinite(input.modelContextTokens)) {
-    config.maxContextTokens = Math.min(config.maxContextTokens, input.modelContextTokens);
-    const compactionTarget = input.modelContextTokens - config.maxOutputTokens;
-    config.compactionTriggerTokens = Math.max(1000, Math.min(config.compactionTriggerTokens, compactionTarget));
-  }
-  if (input.modelMaxOutputTokens && Number.isFinite(input.modelMaxOutputTokens)) {
-    config.maxOutputTokens = input.modelMaxOutputTokens;
-  }
+  const limits = resolveModelLimits(input, {
+    maxContextTokens: config.maxContextTokens,
+    maxOutputTokens: config.maxOutputTokens,
+    compactionTriggerTokens: config.compactionTriggerTokens,
+    maxContextMessageTokens: agent.context.maxContextMessageTokens,
+  });
+  config.maxContextTokens = limits.maxContextTokens;
+  config.compactionTriggerTokens = limits.compactionTriggerTokens;
+  const resolvedMaxOutputTokens = limits.maxOutputTokens;  // may be undefined
+  const resolvedMaxContextMessageTokens = limits.maxContextMessageTokens;
   if (input.modelTemperature && Number.isFinite(input.modelTemperature)) {
     config.temperature = input.modelTemperature;
   }
@@ -749,7 +711,6 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
   const memoryExtractionMaxOutputTokens = agent.memory.extraction.maxOutputTokens;
   const memoryExtractScheduled = agent.memory.extractScheduled;
   const memoryArchiveSync = agent.memory.archiveSync;
-  const maxContextMessageTokens = agent.context.maxContextMessageTokens;
   const openrouter = getCachedOpenRouter(apiKey, openrouterOptions);
   const tokenEstimate = resolveTokenEstimate(input, agentConfig);
@@ -763,6 +724,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
   const { ctx: sessionCtx, isNew } = createSessionContext(SESSION_ROOT, input.sessionId);
   const toolCalls: ToolCallRecord[] = [];
+  const toolOutputs: ToolResultRecord[] = [];
   let memoryItemsUpserted = 0;
   let memoryItemsExtracted = 0;
   const timings: { memory_extraction_ms?: number; tool_ms?: number } = {};
@@ -779,6 +741,9 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
     onToolCall: (call) => {
       toolCalls.push(call);
     },
+    onToolResult: (record) => {
+      toolOutputs.push(record);
+    },
     policy: input.toolPolicy
   });
@@ -812,6 +777,27 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
     }
   }
+  // Build schema-only tools (no execute functions) for SDK — prevents the SDK from
+  // auto-executing tools in its internal loop, which drops conversation context in
+  // follow-up API calls (makeFollowupRequest only sends model output + tool results,
+  // losing the original user messages). We run the tool loop ourselves instead.
+  const schemaTools = tools.map(t => {
+    // eslint-disable-next-line @typescript-eslint/no-unused-vars, @typescript-eslint/no-explicit-any
+    const { execute, ...rest } = t.function as any;
+    return { type: t.type, function: rest };
+  }) as typeof tools;
+  // Map tool names → original execute functions (with policy/callback wrappers intact)
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const toolExecutors = new Map<string, (args: any) => Promise<any>>();
+  for (const t of tools) {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const fn = (t.function as any).execute;
+    if (typeof fn === 'function') {
+      toolExecutors.set(t.function.name, fn);
+    }
+  }
   if (process.env.DOTCLAW_SELF_CHECK === '1') {
     try {
       const details = await runSelfCheck({ model });
@@ -858,8 +844,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
   appendHistory(sessionCtx, 'user', prompt);
   let history = loadHistory(sessionCtx);
+  if (agent.context.maxHistoryTurns > 0) {
+    history = limitHistoryTurns(history, agent.context.maxHistoryTurns);
+  }
+  // Dynamic context budget: if recentContextTokens is 0 (auto), allocate 60% of context window
+  const effectiveRecentTokens = config.recentContextTokens > 0
+    ? config.recentContextTokens
+    : Math.floor(config.maxContextTokens * 0.6);
   const tokenRatio = tokenEstimate.tokensPerChar > 0 ? (0.25 / tokenEstimate.tokensPerChar) : 1;
-  const adjustedRecentTokens = Math.max(1000, Math.floor(config.recentContextTokens * tokenRatio));
+  const adjustedRecentTokens = Math.max(1000, Math.floor(effectiveRecentTokens * tokenRatio));
   const totalTokens = history.reduce(
     (sum, message) => sum + estimateTokensForModel(message.content, tokenEstimate.tokensPerChar) + tokenEstimate.tokensPerMessage,
@@ -871,14 +865,69 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
     log(`Compacting history: ${totalTokens} tokens`);
     archiveConversation(history, sessionCtx.state.summary || null, GROUP_DIR);
-    const summaryUpdate = await updateMemorySummary({
-      openrouter,
-      model: summaryModel,
-      existingSummary: sessionCtx.state.summary,
-      existingFacts: sessionCtx.state.facts,
-      newMessages: olderMessages,
-      maxOutputTokens: config.summaryMaxOutputTokens
-    });
+    // Multi-part compaction: split older messages into chunks, summarize each
+    const olderTokens = olderMessages.reduce(
+      (sum, m) => sum + estimateTokens(m.content), 0
+    );
+    const MULTI_PART_THRESHOLD = 40_000; // Use multi-part for large histories
+    const numParts = olderTokens > MULTI_PART_THRESHOLD ? Math.min(3, Math.ceil(olderTokens / MULTI_PART_THRESHOLD)) : 1;
+    let summaryUpdate: { summary: string; facts: string[] } | null = null;
+    if (numParts > 1) {
+      log(`Multi-part compaction: ${numParts} parts`);
+      const chunks = splitMessagesByTokenShare(olderMessages, numParts);
+      const partSummaries: string[] = [];
+      const mergedFacts: string[] = [...sessionCtx.state.facts];
+      for (let i = 0; i < chunks.length; i++) {
+        const partPrompt = buildMultiPartSummaryPrompt(
+          sessionCtx.state.summary,
+          mergedFacts,
+          chunks[i],
+          i,
+          chunks.length,
+          partSummaries
+        );
+        const partResult = openrouter.callModel({
+          model: summaryModel,
+          instructions: partPrompt.instructions,
+          input: partPrompt.input,
+          maxOutputTokens: config.summaryMaxOutputTokens,
+          temperature: 0.1,
+          reasoning: { effort: 'low' as const }
+        });
+        const { text: partText } = await getResponseText(partResult, `summary_part_${i}`);
+        const parsed = parseSummaryResponse(partText);
+        if (parsed) {
+          partSummaries.push(parsed.summary);
+          // Merge facts, deduplicating by content
+          const existingSet = new Set(mergedFacts.map(f => f.toLowerCase()));
+          for (const fact of parsed.facts) {
+            if (!existingSet.has(fact.toLowerCase())) {
+              mergedFacts.push(fact);
+              existingSet.add(fact.toLowerCase());
+            }
+          }
+        }
+      }
+      if (partSummaries.length > 0) {
+        summaryUpdate = {
+          summary: partSummaries.join(' '),
+          facts: mergedFacts
+        };
+      }
+    } else {
+      summaryUpdate = await updateMemorySummary({
+        openrouter,
+        model: summaryModel,
+        existingSummary: sessionCtx.state.summary,
+        existingFacts: sessionCtx.state.facts,
+        newMessages: olderMessages,
+        maxOutputTokens: config.summaryMaxOutputTokens
+      });
+    }
     if (summaryUpdate) {
       sessionCtx.state.summary = summaryUpdate.summary;
@@ -980,7 +1029,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
   if (memoryPolicyResult) promptPackVersions['memory-policy'] = memoryPolicyResult.pack.version;
   if (memoryRecallResult) promptPackVersions['memory-recall'] = memoryRecallResult.pack.version;
-  const buildInstructions = () => buildSystemInstructions({
+  const resolveInstructions = (trimLevel = 0) => buildInstructions({
     assistantName,
     groupNotes: claudeNotes.group,
     globalNotes: claudeNotes.global,
@@ -1005,16 +1054,35 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
     toolOutcomePack: toolOutcomeResult?.pack || null,
     memoryPolicyPack: memoryPolicyResult?.pack || null,
     memoryRecallPack: memoryRecallResult?.pack || null,
-    maxToolSteps
+    maxToolSteps,
+    trimLevel
   });
   const buildContext = () => {
-    const resolvedInstructions = buildInstructions();
-    const resolvedInstructionTokens = estimateTokensForModel(resolvedInstructions, tokenEstimate.tokensPerChar);
-    const resolvedMaxContext = Math.max(config.maxContextTokens - config.maxOutputTokens - resolvedInstructionTokens, 2000);
+    // System prompt budget: 25% of context window
+    const maxSystemPromptTokens = Math.floor(config.maxContextTokens * 0.25);
+    const MAX_TRIM_LEVEL = 4;
+    let resolvedInstructions = '';
+    let resolvedInstructionTokens = 0;
+    let trimLevel = 0;
+    // Progressive trimming loop: build prompt, check size, trim if needed
+    for (trimLevel = 0; trimLevel <= MAX_TRIM_LEVEL; trimLevel++) {
+      resolvedInstructions = resolveInstructions(trimLevel);
+      resolvedInstructionTokens = estimateTokensForModel(resolvedInstructions, tokenEstimate.tokensPerChar);
+      if (resolvedInstructionTokens <= maxSystemPromptTokens || trimLevel === MAX_TRIM_LEVEL) {
+        break;
+      }
+      log(`System prompt ${resolvedInstructionTokens} tokens exceeds budget ${maxSystemPromptTokens}, trimming (level ${trimLevel + 1})`);
+    }
+    const outputReserve = resolvedMaxOutputTokens || Math.floor(config.maxContextTokens * 0.25);
+    const resolvedMaxContext = Math.max(config.maxContextTokens - outputReserve - resolvedInstructionTokens, 2000);
     const resolvedAdjusted = Math.max(1000, Math.floor(resolvedMaxContext * tokenRatio));
     let { recentMessages: contextMessages } = splitRecentHistory(recentMessages, resolvedAdjusted, 6);
-    contextMessages = clampContextMessages(contextMessages, tokenEstimate.tokensPerChar, maxContextMessageTokens);
+    contextMessages = clampContextMessages(contextMessages, tokenEstimate.tokensPerChar, resolvedMaxContextMessageTokens);
+    contextMessages = pruneContextMessages(contextMessages, agent.context.contextPruning);
     return {
       instructions: resolvedInstructions,
       instructionsTokens: resolvedInstructionTokens,
@@ -1065,61 +1133,171 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
     let lastError: unknown = null;
     for (let attempt = 0; attempt < modelChain.length; attempt++) {
       currentModel = modelChain[attempt];
+      // Skip models in cooldown (unless it's the last option)
+      if (isModelInCooldown(currentModel) && attempt < modelChain.length - 1) {
+        log(`Skipping ${currentModel} (in cooldown)`);
+        continue;
+      }
       if (attempt > 0) log(`Fallback ${attempt}: trying ${currentModel}`);
       try {
         log(`Starting OpenRouter call (${currentModel})...`);
         const startedAt = Date.now();
-        const result = openrouter.callModel({
+        // ── Custom tool execution loop ──────────────────────────────────
+        // The SDK's built-in tool loop (executeToolsIfNeeded) drops conversation
+        // context in follow-up API calls — it only sends [function_calls, function_call_outputs]
+        // without the original user messages or previousResponseId. This causes models to
+        // produce empty text after tools that return minimal results (e.g. sequential-thinking).
+        // We use schema-only tools (no execute functions) so the SDK returns tool calls
+        // without auto-executing, then run the loop ourselves with full context.
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        let conversationInput: any[] = [...contextInput];
+        let step = 0;
+        let streamSeq = 0;
+        // Helper to write a stream chunk
+        const writeStreamChunk = (text: string) => {
+          if (!input.streamDir) return;
+          streamSeq++;
+          const chunkFile = path.join(input.streamDir, `chunk_${String(streamSeq).padStart(6, '0')}.txt`);
+          const tmpFile = chunkFile + '.tmp';
+          try {
+            fs.writeFileSync(tmpFile, text);
+            fs.renameSync(tmpFile, chunkFile);
+          } catch (writeErr) {
+            log(`Stream write error at seq ${streamSeq}: ${writeErr instanceof Error ? writeErr.message : String(writeErr)}`);
+          }
+        };
+        // Helper to finalize streaming
+        const finalizeStream = () => {
+          if (!input.streamDir) return;
+          try {
+            if (!fs.existsSync(path.join(input.streamDir, 'done'))) {
+              fs.writeFileSync(path.join(input.streamDir, 'done'), '');
+            }
+          } catch { /* ignore */ }
+        };
+        // Initial call — uses streaming for real-time delivery
+        const initialResult = openrouter.callModel({
           model: currentModel,
           instructions: resolvedInstructions,
-          input: contextInput,
-          tools,
-          stopWhen: stepCountIs(maxToolSteps),
-          maxOutputTokens: config.maxOutputTokens,
+          input: conversationInput,
+          tools: schemaTools,
+          maxOutputTokens: resolvedMaxOutputTokens,
           temperature: config.temperature,
           reasoning: resolvedReasoning
         });
-        // Stream text chunks to IPC if streamDir is provided
+        // Stream text from initial response
         if (input.streamDir) {
-          let seq = 0;
           try {
             fs.mkdirSync(input.streamDir, { recursive: true });
-            for await (const delta of result.getTextStream()) {
-              seq++;
-              const chunkFile = path.join(input.streamDir, `chunk_${String(seq).padStart(6, '0')}.txt`);
-              const tmpFile = chunkFile + '.tmp';
-              fs.writeFileSync(tmpFile, delta);
-              fs.renameSync(tmpFile, chunkFile);
+            for await (const delta of initialResult.getTextStream()) {
+              writeStreamChunk(delta);
             }
-            fs.writeFileSync(path.join(input.streamDir, 'done'), '');
           } catch (streamErr) {
             log(`Stream error: ${streamErr instanceof Error ? streamErr.message : String(streamErr)}`);
             try { fs.writeFileSync(path.join(input.streamDir, 'error'), streamErr instanceof Error ? streamErr.message : String(streamErr)); } catch { /* ignore */ }
           }
         }
-        latencyMs = Date.now() - startedAt;
+        // Get initial response (no auto-execution since schemaTools have no execute fns)
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        let lastResponse: any;
+        try {
+          lastResponse = await initialResult.getResponse();
+        } catch (err) {
+          const message = err instanceof Error ? err.message : String(err);
+          log(`Initial getResponse failed: ${message}`);
+          finalizeStream();
+          throw err;
+        }
-        const completionResult = await getResponseText(result, 'completion');
-        responseText = completionResult.text;
+        responseText = extractTextFromApiResponse(lastResponse);
+        let pendingCalls = extractFunctionCalls(lastResponse);
+        // Tool execution loop — execute tools ourselves, include full context in follow-ups
+        while (pendingCalls.length > 0 && step < maxToolSteps) {
+          log(`Step ${step}: executing ${pendingCalls.length} tool call(s): ${pendingCalls.map(c => c.name).join(', ')}`);
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          const toolResults: any[] = [];
+          for (const fc of pendingCalls) {
+            const executor = toolExecutors.get(fc.name);
+            if (!executor) {
+              log(`Unknown tool: ${fc.name}`);
+              toolResults.push({
+                type: 'function_call_output',
+                callId: fc.id,
+                output: JSON.stringify({ error: `Unknown tool: ${fc.name}` })
+              });
+              step++;
+              continue;
+            }
-        const toolCallsFromModel = await result.getToolCalls();
-        if (toolCallsFromModel.length > 0) {
-          log(`Model made ${toolCallsFromModel.length} tool call(s): ${toolCallsFromModel.map(t => t.name).join(', ')}`);
-        }
-        if (!responseText || !responseText.trim()) {
-          if (completionResult.error) {
-            log(`Tool execution failed: ${completionResult.error}`);
-            responseText = `Something went wrong while processing your request: ${completionResult.error}. Please try again.`;
-          } else if (toolCallsFromModel.length > 0) {
-            responseText = 'I started running tool calls but did not get a final response. If you want me to continue, please ask a narrower subtask or say "continue".';
-          } else {
-            log(`Warning: Model returned empty/whitespace response. tool calls: ${toolCallsFromModel.length}`);
+            try {
+              // Calling the wrapped execute fires onToolCall/onToolResult callbacks
+              const result = await executor(fc.arguments);
+              toolResults.push({
+                type: 'function_call_output',
+                callId: fc.id,
+                output: JSON.stringify(result)
+              });
+            } catch (err) {
+              const error = err instanceof Error ? err.message : String(err);
+              toolResults.push({
+                type: 'function_call_output',
+                callId: fc.id,
+                output: JSON.stringify({ error })
+              });
+            }
+            step++;
           }
+          // Build follow-up input with FULL conversation context:
+          // original messages + model output + tool results (accumulated each round)
+          conversationInput = [...conversationInput, ...lastResponse.output, ...toolResults];
+          // Follow-up call with complete context — model sees the full conversation
+          const followupResult = openrouter.callModel({
+            model: currentModel,
+            instructions: resolvedInstructions,
+            input: conversationInput,
+            tools: schemaTools,
+            maxOutputTokens: resolvedMaxOutputTokens,
+            temperature: config.temperature,
+            reasoning: resolvedReasoning
+          });
+          try {
+            lastResponse = await followupResult.getResponse();
+          } catch (err) {
+            const message = err instanceof Error ? err.message : String(err);
+            log(`Follow-up getResponse failed at step ${step}: ${message}`);
+            break;
+          }
+          const followupText = extractTextFromApiResponse(lastResponse);
+          if (followupText) {
+            responseText = followupText;
+            writeStreamChunk(followupText);
+          }
+          pendingCalls = extractFunctionCalls(lastResponse);
+        }
+        finalizeStream();
+        latencyMs = Date.now() - startedAt;
+        if (responseText && responseText.trim()) {
+          log(`Model returned text response (${responseText.length} chars, ${step} tool steps)`);
+        } else if (toolCalls.length > 0) {
+          log(`Warning: Model returned empty response after ${toolCalls.length} tool call(s) and ${step} steps`);
         } else {
-          log(`Model returned text response (${responseText.length} chars)`);
+          log(`Warning: Model returned empty/whitespace response`);
         }
         completionTokens = estimateTokensForModel(responseText || '', tokenEstimate.tokensPerChar);
@@ -1128,9 +1306,12 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
         break; // Success
       } catch (err) {
         lastError = err;
-        if (classifyError(err) && attempt < modelChain.length - 1) {
-          log(`${currentModel} failed (${classifyError(err)}): ${err instanceof Error ? err.message : err}`);
-          continue;
+        if (classifyError(err)) {
+          cooldownModel(currentModel, err);
+          if (attempt < modelChain.length - 1) {
+            log(`${currentModel} failed (${classifyError(err)}): ${err instanceof Error ? err.message : err}`);
+            continue;
+          }
         }
         throw err; // Non-retryable or last model — propagate
       }
@@ -1162,6 +1343,14 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
     };
   }
+  // Parse reply tags from response before saving to history
+  let replyToId: string | undefined;
+  if (responseText) {
+    const parsed = parseReplyTags(responseText);
+    responseText = parsed.cleanText;
+    replyToId = parsed.replyToId;
+  }
   appendHistory(sessionCtx, 'assistant', responseText || '');
   history = loadHistory(sessionCtx);
@@ -1221,7 +1410,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
   if (memoryExtractionEnabled && isDaemon && (!input.isScheduledTask || memoryExtractScheduled)) {
     // Fire-and-forget in daemon mode; skip entirely in ephemeral mode
     void runMemoryExtraction().catch((err) => {
-      log(`Memory extraction failed: ${err instanceof Error ? err.message : String(err)}`);
+      const errMsg = err instanceof Error ? err.message : String(err);
+      log(`Memory extraction failed: ${errMsg}`);
+      // Write error to IPC status file so host can detect the failure
+      try {
+        const statusPath = path.join(IPC_DIR, 'memory_extraction_error.json');
+        fs.writeFileSync(statusPath, JSON.stringify({
+          error: errMsg,
+          timestamp: new Date().toISOString(),
+        }));
+      } catch { /* best-effort status write */ }
     });
   }
@@ -1255,7 +1453,8 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
     memory_items_extracted: memoryItemsExtracted,
     timings: Object.keys(timings).length > 0 ? timings : undefined,
     tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
-    latency_ms: latencyMs
+    latency_ms: latencyMs,
+    replyToId
   };
 }