@dotsetlabs/dotclaw 1.9.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/.env.example +6 -0
  2. package/README.md +13 -8
  3. package/config-examples/groups/global/CLAUDE.md +6 -14
  4. package/config-examples/groups/main/CLAUDE.md +8 -39
  5. package/config-examples/runtime.json +16 -122
  6. package/config-examples/tool-policy.json +2 -15
  7. package/container/agent-runner/package-lock.json +258 -0
  8. package/container/agent-runner/package.json +2 -1
  9. package/container/agent-runner/src/agent-config.ts +62 -47
  10. package/container/agent-runner/src/browser.ts +180 -0
  11. package/container/agent-runner/src/container-protocol.ts +4 -9
  12. package/container/agent-runner/src/id.ts +3 -2
  13. package/container/agent-runner/src/index.ts +331 -846
  14. package/container/agent-runner/src/ipc.ts +3 -33
  15. package/container/agent-runner/src/mcp-client.ts +222 -0
  16. package/container/agent-runner/src/mcp-registry.ts +163 -0
  17. package/container/agent-runner/src/skill-loader.ts +375 -0
  18. package/container/agent-runner/src/tools.ts +154 -184
  19. package/container/agent-runner/src/tts.ts +61 -0
  20. package/dist/admin-commands.d.ts.map +1 -1
  21. package/dist/admin-commands.js +12 -0
  22. package/dist/admin-commands.js.map +1 -1
  23. package/dist/agent-execution.d.ts +5 -9
  24. package/dist/agent-execution.d.ts.map +1 -1
  25. package/dist/agent-execution.js +32 -20
  26. package/dist/agent-execution.js.map +1 -1
  27. package/dist/cli.js +61 -16
  28. package/dist/cli.js.map +1 -1
  29. package/dist/config.d.ts +1 -4
  30. package/dist/config.d.ts.map +1 -1
  31. package/dist/config.js +2 -5
  32. package/dist/config.js.map +1 -1
  33. package/dist/container-protocol.d.ts +4 -9
  34. package/dist/container-protocol.d.ts.map +1 -1
  35. package/dist/container-runner.d.ts.map +1 -1
  36. package/dist/container-runner.js +3 -8
  37. package/dist/container-runner.js.map +1 -1
  38. package/dist/dashboard.d.ts +5 -6
  39. package/dist/dashboard.d.ts.map +1 -1
  40. package/dist/dashboard.js +12 -60
  41. package/dist/dashboard.js.map +1 -1
  42. package/dist/db.d.ts +1 -59
  43. package/dist/db.d.ts.map +1 -1
  44. package/dist/db.js +41 -262
  45. package/dist/db.js.map +1 -1
  46. package/dist/error-messages.d.ts.map +1 -1
  47. package/dist/error-messages.js +5 -1
  48. package/dist/error-messages.js.map +1 -1
  49. package/dist/hooks.d.ts +7 -0
  50. package/dist/hooks.d.ts.map +1 -0
  51. package/dist/hooks.js +93 -0
  52. package/dist/hooks.js.map +1 -0
  53. package/dist/id.d.ts.map +1 -1
  54. package/dist/id.js +2 -1
  55. package/dist/id.js.map +1 -1
  56. package/dist/index.js +673 -2790
  57. package/dist/index.js.map +1 -1
  58. package/dist/ipc-dispatcher.d.ts +26 -0
  59. package/dist/ipc-dispatcher.d.ts.map +1 -0
  60. package/dist/ipc-dispatcher.js +861 -0
  61. package/dist/ipc-dispatcher.js.map +1 -0
  62. package/dist/local-embeddings.d.ts +7 -0
  63. package/dist/local-embeddings.d.ts.map +1 -0
  64. package/dist/local-embeddings.js +60 -0
  65. package/dist/local-embeddings.js.map +1 -0
  66. package/dist/maintenance.d.ts.map +1 -1
  67. package/dist/maintenance.js +3 -7
  68. package/dist/maintenance.js.map +1 -1
  69. package/dist/memory-embeddings.d.ts +1 -1
  70. package/dist/memory-embeddings.d.ts.map +1 -1
  71. package/dist/memory-embeddings.js +59 -31
  72. package/dist/memory-embeddings.js.map +1 -1
  73. package/dist/memory-store.d.ts +0 -10
  74. package/dist/memory-store.d.ts.map +1 -1
  75. package/dist/memory-store.js +11 -27
  76. package/dist/memory-store.js.map +1 -1
  77. package/dist/message-pipeline.d.ts +47 -0
  78. package/dist/message-pipeline.d.ts.map +1 -0
  79. package/dist/message-pipeline.js +652 -0
  80. package/dist/message-pipeline.js.map +1 -0
  81. package/dist/metrics.d.ts +7 -10
  82. package/dist/metrics.d.ts.map +1 -1
  83. package/dist/metrics.js +2 -33
  84. package/dist/metrics.js.map +1 -1
  85. package/dist/model-registry.d.ts +0 -14
  86. package/dist/model-registry.d.ts.map +1 -1
  87. package/dist/model-registry.js +0 -36
  88. package/dist/model-registry.js.map +1 -1
  89. package/dist/paths.d.ts.map +1 -1
  90. package/dist/paths.js +2 -0
  91. package/dist/paths.js.map +1 -1
  92. package/dist/providers/discord/discord-format.d.ts +16 -0
  93. package/dist/providers/discord/discord-format.d.ts.map +1 -0
  94. package/dist/providers/discord/discord-format.js +153 -0
  95. package/dist/providers/discord/discord-format.js.map +1 -0
  96. package/dist/providers/discord/discord-provider.d.ts +50 -0
  97. package/dist/providers/discord/discord-provider.d.ts.map +1 -0
  98. package/dist/providers/discord/discord-provider.js +607 -0
  99. package/dist/providers/discord/discord-provider.js.map +1 -0
  100. package/dist/providers/discord/index.d.ts +4 -0
  101. package/dist/providers/discord/index.d.ts.map +1 -0
  102. package/dist/providers/discord/index.js +3 -0
  103. package/dist/providers/discord/index.js.map +1 -0
  104. package/dist/providers/registry.d.ts +14 -0
  105. package/dist/providers/registry.d.ts.map +1 -0
  106. package/dist/providers/registry.js +49 -0
  107. package/dist/providers/registry.js.map +1 -0
  108. package/dist/providers/telegram/index.d.ts +4 -0
  109. package/dist/providers/telegram/index.d.ts.map +1 -0
  110. package/dist/providers/telegram/index.js +3 -0
  111. package/dist/providers/telegram/index.js.map +1 -0
  112. package/dist/providers/telegram/telegram-format.d.ts +3 -0
  113. package/dist/providers/telegram/telegram-format.d.ts.map +1 -0
  114. package/dist/providers/telegram/telegram-format.js +215 -0
  115. package/dist/providers/telegram/telegram-format.js.map +1 -0
  116. package/dist/providers/telegram/telegram-provider.d.ts +51 -0
  117. package/dist/providers/telegram/telegram-provider.d.ts.map +1 -0
  118. package/dist/providers/telegram/telegram-provider.js +824 -0
  119. package/dist/providers/telegram/telegram-provider.js.map +1 -0
  120. package/dist/providers/types.d.ts +107 -0
  121. package/dist/providers/types.d.ts.map +1 -0
  122. package/dist/providers/types.js +2 -0
  123. package/dist/providers/types.js.map +1 -0
  124. package/dist/request-router.d.ts +9 -31
  125. package/dist/request-router.d.ts.map +1 -1
  126. package/dist/request-router.js +12 -142
  127. package/dist/request-router.js.map +1 -1
  128. package/dist/runtime-config.d.ts +79 -101
  129. package/dist/runtime-config.d.ts.map +1 -1
  130. package/dist/runtime-config.js +140 -208
  131. package/dist/runtime-config.js.map +1 -1
  132. package/dist/skill-manager.d.ts +39 -0
  133. package/dist/skill-manager.d.ts.map +1 -0
  134. package/dist/skill-manager.js +286 -0
  135. package/dist/skill-manager.js.map +1 -0
  136. package/dist/streaming.d.ts +58 -0
  137. package/dist/streaming.d.ts.map +1 -0
  138. package/dist/streaming.js +196 -0
  139. package/dist/streaming.js.map +1 -0
  140. package/dist/task-scheduler.d.ts.map +1 -1
  141. package/dist/task-scheduler.js +11 -45
  142. package/dist/task-scheduler.js.map +1 -1
  143. package/dist/tool-policy.d.ts.map +1 -1
  144. package/dist/tool-policy.js +13 -5
  145. package/dist/tool-policy.js.map +1 -1
  146. package/dist/transcription.d.ts +8 -0
  147. package/dist/transcription.d.ts.map +1 -0
  148. package/dist/transcription.js +174 -0
  149. package/dist/transcription.js.map +1 -0
  150. package/dist/types.d.ts +2 -50
  151. package/dist/types.d.ts.map +1 -1
  152. package/package.json +15 -4
  153. package/scripts/bootstrap.js +40 -4
  154. package/scripts/configure.js +129 -7
  155. package/scripts/doctor.js +30 -4
  156. package/scripts/init.js +13 -6
  157. package/scripts/install.sh +1 -1
  158. package/config-examples/plugin-http.json +0 -18
  159. package/container/skills/agent-browser.md +0 -159
  160. package/dist/background-job-classifier.d.ts +0 -20
  161. package/dist/background-job-classifier.d.ts.map +0 -1
  162. package/dist/background-job-classifier.js +0 -145
  163. package/dist/background-job-classifier.js.map +0 -1
  164. package/dist/background-jobs.d.ts +0 -56
  165. package/dist/background-jobs.d.ts.map +0 -1
  166. package/dist/background-jobs.js +0 -550
  167. package/dist/background-jobs.js.map +0 -1
  168. package/dist/planner-probe.d.ts +0 -14
  169. package/dist/planner-probe.d.ts.map +0 -1
  170. package/dist/planner-probe.js +0 -97
  171. package/dist/planner-probe.js.map +0 -1
@@ -7,7 +7,7 @@ import fs from 'fs';
7
7
  import path from 'path';
8
8
  import { fileURLToPath } from 'url';
9
9
  import { OpenRouter, stepCountIs } from '@openrouter/sdk';
10
- import { createTools, ToolCallRecord } from './tools.js';
10
+ import { createTools, discoverMcpTools, ToolCallRecord } from './tools.js';
11
11
  import { createIpcHandlers } from './ipc.js';
12
12
  import { loadAgentConfig } from './agent-config.js';
13
13
  import { OUTPUT_START_MARKER, OUTPUT_END_MARKER, type ContainerInput, type ContainerOutput } from './container-protocol.js';
@@ -27,6 +27,7 @@ import {
27
27
  Message
28
28
  } from './memory.js';
29
29
  import { loadPromptPackWithCanary, formatPromptPack, PromptPack } from './prompt-packs.js';
30
+ import { buildSkillCatalog, formatSkillCatalog, type SkillCatalog } from './skill-loader.js';
30
31
 
31
32
  type OpenRouterResult = ReturnType<OpenRouter['callModel']>;
32
33
 
@@ -40,9 +41,6 @@ const AVAILABLE_GROUPS_PATH = '/workspace/ipc/available_groups.json';
40
41
  const GROUP_CLAUDE_PATH = path.join(GROUP_DIR, 'CLAUDE.md');
41
42
  const GLOBAL_CLAUDE_PATH = path.join(GLOBAL_DIR, 'CLAUDE.md');
42
43
  const CLAUDE_NOTES_MAX_CHARS = 4000;
43
- const SKILL_NOTES_MAX_FILES = 16;
44
- const SKILL_NOTES_MAX_CHARS = 3000;
45
- const SKILL_NOTES_TOTAL_MAX_CHARS = 18_000;
46
44
 
47
45
  const agentConfig = loadAgentConfig();
48
46
  const agent = agentConfig.agent;
@@ -61,10 +59,31 @@ function getCachedOpenRouter(apiKey: string, options: ReturnType<typeof getOpenR
61
59
  if (cachedOpenRouter && cachedOpenRouterKey === apiKey && cachedOpenRouterOptions === optionsKey) {
62
60
  return cachedOpenRouter;
63
61
  }
64
- cachedOpenRouter = new OpenRouter({
62
+ const client = new OpenRouter({
65
63
  apiKey,
66
64
  ...options
67
65
  });
66
+
67
+ // The SDK accepts httpReferer/xTitle in the constructor but never injects
68
+ // them as HTTP headers in the Responses API path (betaResponsesSend).
69
+ // Wrap callModel to inject them on every request.
70
+ const { httpReferer, xTitle } = options;
71
+ if (httpReferer || xTitle) {
72
+ const extraHeaders: Record<string, string> = {};
73
+ if (httpReferer) extraHeaders['HTTP-Referer'] = httpReferer;
74
+ if (xTitle) extraHeaders['X-Title'] = xTitle;
75
+
76
+ const originalCallModel = client.callModel.bind(client);
77
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
78
+ client.callModel = (request: any, opts?: any) => {
79
+ return originalCallModel(request, {
80
+ ...opts,
81
+ headers: { ...extraHeaders, ...(opts?.headers as Record<string, string>) }
82
+ });
83
+ };
84
+ }
85
+
86
+ cachedOpenRouter = client;
68
87
  cachedOpenRouterKey = apiKey;
69
88
  cachedOpenRouterOptions = optionsKey;
70
89
  return cachedOpenRouter;
@@ -74,168 +93,30 @@ function log(message: string): void {
74
93
  console.error(`[agent-runner] ${message}`);
75
94
  }
76
95
 
77
- // ── Response extraction pipeline ─────────────────────────────────────
78
- // OpenRouter SDK v0.3.x returns raw response IDs (gen-*, resp-*, etc.) instead
79
- // of text for fast reasoning models (GPT-5-mini/nano). Reasoning tokens consume
80
- // the output budget, leaving nothing for actual text. This multi-layer pipeline
81
- // works around that:
82
- // 1. isLikelyResponseId detect leaked IDs so we never surface them
83
- // 2. extractTextFromRawResponse walk raw response fields ourselves
84
- // 3. getTextWithFallback — try SDK getText(), fall back to raw extraction
85
- // 4. chatCompletionsFallback — retry via /chat/completions when all else fails
86
- // Remove this pipeline once the SDK reliably returns text for reasoning models.
87
-
88
- const RESPONSE_ID_PREFIXES = ['gen-', 'resp-', 'resp_', 'chatcmpl-', 'msg_'];
89
-
90
- function isLikelyResponseId(value: string): boolean {
91
- const trimmed = value.trim();
92
- if (!trimmed || trimmed.includes(' ') || trimmed.includes('\n')) return false;
93
- return RESPONSE_ID_PREFIXES.some(prefix => trimmed.startsWith(prefix));
96
+ function classifyError(err: unknown): 'retryable' | null {
97
+ const msg = err instanceof Error ? err.message : String(err);
98
+ const lower = msg.toLowerCase();
99
+ if (/429|rate.?limit/.test(lower)) return 'retryable';
100
+ if (/\b5\d{2}\b/.test(msg) || /server error|bad gateway|unavailable/.test(lower)) return 'retryable';
101
+ if (/timeout|timed out|deadline/.test(lower)) return 'retryable';
102
+ if (/model.?not.?available|no endpoints|provider error/.test(lower)) return 'retryable';
103
+ return null;
94
104
  }
95
105
 
96
- function isValidText(value: unknown): value is string {
97
- return typeof value === 'string' && value.trim().length > 0 && !isLikelyResponseId(value);
98
- }
99
-
100
- function extractTextFromRawResponse(response: unknown): string {
101
- if (!response || typeof response !== 'object') return '';
102
- const record = response as Record<string, unknown>;
103
-
104
- // 1. SDK-parsed camelCase field
105
- if (isValidText(record.outputText)) return record.outputText;
106
-
107
- // 2. Raw API snake_case field
108
- if (isValidText(record.output_text)) return record.output_text;
109
-
110
- // 3. Walk response.output[] for message/output_text items
111
- if (Array.isArray(record.output)) {
112
- const parts: string[] = [];
113
- for (const item of record.output) {
114
- if (!item || typeof item !== 'object') continue;
115
- const typed = item as { type?: string; content?: unknown; text?: string };
116
- if (typed.type === 'message' && Array.isArray(typed.content)) {
117
- for (const part of typed.content as Array<{ type?: string; text?: string }>) {
118
- if (part?.type === 'output_text' && isValidText(part.text)) {
119
- parts.push(part.text);
120
- }
121
- }
122
- } else if (typed.type === 'output_text' && isValidText(typed.text)) {
123
- parts.push(typed.text);
124
- }
125
- }
126
- const joined = parts.join('');
127
- if (joined.trim()) return joined;
128
- }
129
-
130
- // 4. OpenAI chat completions compat
131
- if (Array.isArray(record.choices) && record.choices.length > 0) {
132
- const choice = record.choices[0] as { message?: { content?: unknown } } | null | undefined;
133
- if (choice?.message && isValidText(choice.message.content)) {
134
- return choice.message.content;
135
- }
136
- }
137
-
138
- return '';
139
- }
106
+ // ── Response text extraction ─────────────────────────────────────────
140
107
 
141
- async function getTextWithFallback(result: OpenRouterResult, context: string): Promise<string> {
142
- // 1. Try the SDK's proper getText() first — this handles tool execution and
143
- // extracts text from the final response via the SDK's own logic.
108
+ async function getResponseText(result: OpenRouterResult, context: string): Promise<{ text: string; error?: string }> {
144
109
  try {
145
110
  const text = await result.getText();
146
- if (isValidText(text)) {
147
- return text;
148
- }
149
- if (text && isLikelyResponseId(text)) {
150
- log(`Ignored response id from getText (${context}): ${String(text).slice(0, 60)}`);
111
+ if (typeof text === 'string' && text.trim()) {
112
+ return { text };
151
113
  }
152
114
  } catch (err) {
153
- log(`getText failed (${context}): ${err instanceof Error ? err.message : String(err)}`);
115
+ const message = err instanceof Error ? err.message : String(err);
116
+ log(`getText failed (${context}): ${message}`);
117
+ return { text: '', error: message };
154
118
  }
155
-
156
- // 2. Fall back to raw response extraction — walk known fields ourselves
157
- try {
158
- const response = await result.getResponse();
159
- const fallbackText = extractTextFromRawResponse(response);
160
- if (fallbackText) {
161
- log(`Recovered text from raw response (${context})`);
162
- return fallbackText;
163
- }
164
- const r = response as Record<string, unknown>;
165
- const outputLen = Array.isArray(r.output) ? (r.output as unknown[]).length : 0;
166
- log(`No text in raw response (${context}): id=${String(r.id ?? 'none').slice(0, 40)} status=${String(r.status ?? '?')} outputs=${outputLen}`);
167
- } catch (err) {
168
- log(`Raw response extraction failed (${context}): ${err instanceof Error ? err.message : String(err)}`);
169
- }
170
-
171
- // 3. Never return a response ID
172
- return '';
173
- }
174
-
175
- /**
176
- * Direct Chat Completions API fallback.
177
- * When the Responses API returns a gen-ID instead of text (common with fast
178
- * models like gpt-5-nano/mini via OpenRouter), retry using the standard
179
- * /chat/completions endpoint which reliably returns text content.
180
- */
181
- async function chatCompletionsFallback(params: {
182
- model: string;
183
- instructions: string;
184
- messages: Array<{ role: string; content: string }>;
185
- maxOutputTokens: number;
186
- temperature: number;
187
- }): Promise<string> {
188
- const apiKey = process.env.OPENROUTER_API_KEY;
189
- if (!apiKey) return '';
190
-
191
- const headers: Record<string, string> = {
192
- 'Authorization': `Bearer ${apiKey}`,
193
- 'Content-Type': 'application/json'
194
- };
195
- if (agent.openrouter.siteUrl) {
196
- headers['HTTP-Referer'] = agent.openrouter.siteUrl;
197
- }
198
- if (agent.openrouter.siteName) {
199
- headers['X-Title'] = agent.openrouter.siteName;
200
- }
201
-
202
- const chatMessages = [
203
- { role: 'system', content: params.instructions },
204
- ...params.messages
205
- ];
206
-
207
- log(`Chat Completions fallback: model=${params.model}, messages=${chatMessages.length}`);
208
- const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
209
- method: 'POST',
210
- headers,
211
- body: JSON.stringify({
212
- model: params.model,
213
- messages: chatMessages,
214
- max_completion_tokens: params.maxOutputTokens,
215
- temperature: params.temperature,
216
- reasoning_effort: 'low'
217
- }),
218
- signal: AbortSignal.timeout(agent.openrouter.timeoutMs)
219
- });
220
-
221
- const bodyText = await response.text();
222
- if (!response.ok) {
223
- log(`Chat Completions fallback HTTP ${response.status}: ${bodyText.slice(0, 300)}`);
224
- return '';
225
- }
226
-
227
- try {
228
- const data = JSON.parse(bodyText);
229
- const content = data?.choices?.[0]?.message?.content;
230
- if (isValidText(content)) {
231
- log(`Chat Completions fallback recovered text (${String(content).length} chars)`);
232
- return content;
233
- }
234
- log(`Chat Completions fallback returned no text: ${JSON.stringify(data).slice(0, 300)}`);
235
- } catch (err) {
236
- log(`Chat Completions fallback parse error: ${err instanceof Error ? err.message : String(err)}`);
237
- }
238
- return '';
119
+ return { text: '' };
239
120
  }
240
121
 
241
122
  function writeOutput(output: ContainerOutput): void {
@@ -351,62 +232,6 @@ function getConfig(config: ReturnType<typeof loadAgentConfig>): MemoryConfig & {
351
232
  };
352
233
  }
353
234
 
354
- function buildPlannerPrompt(messages: Message[]): { instructions: string; input: string } {
355
- const transcript = messages.map(msg => `${msg.role.toUpperCase()}: ${msg.content}`).join('\n\n');
356
- const instructions = [
357
- 'You are a planning module for a personal assistant.',
358
- 'Given the conversation, produce a concise plan in JSON.',
359
- 'Return JSON only with keys:',
360
- '- steps: array of short action steps',
361
- '- tools: array of tool names you expect to use (if any)',
362
- '- risks: array of potential pitfalls or missing info',
363
- '- questions: array of clarifying questions (if any)',
364
- 'Keep each array short. Use empty arrays if not needed.'
365
- ].join('\n');
366
- const input = `Conversation:\n${transcript}`;
367
- return { instructions, input };
368
- }
369
-
370
- function parsePlannerResponse(text: string): { steps: string[]; tools: string[]; risks: string[]; questions: string[] } | null {
371
- const trimmed = text.trim();
372
- let jsonText = trimmed;
373
- const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i);
374
- if (fenceMatch) {
375
- jsonText = fenceMatch[1].trim();
376
- }
377
- try {
378
- const parsed = JSON.parse(jsonText) as Record<string, unknown>;
379
- const steps = Array.isArray(parsed.steps) ? parsed.steps.filter(item => typeof item === 'string') : [];
380
- const tools = Array.isArray(parsed.tools) ? parsed.tools.filter(item => typeof item === 'string') : [];
381
- const risks = Array.isArray(parsed.risks) ? parsed.risks.filter(item => typeof item === 'string') : [];
382
- const questions = Array.isArray(parsed.questions) ? parsed.questions.filter(item => typeof item === 'string') : [];
383
- return { steps, tools, risks, questions };
384
- } catch {
385
- return null;
386
- }
387
- }
388
-
389
- function formatPlanBlock(plan: { steps: string[]; tools: string[]; risks: string[]; questions: string[] }): string {
390
- const lines: string[] = ['Planned approach (planner):'];
391
- if (plan.steps.length > 0) {
392
- lines.push('Steps:');
393
- for (const step of plan.steps) lines.push(`- ${step}`);
394
- }
395
- if (plan.tools.length > 0) {
396
- lines.push('Tools:');
397
- for (const tool of plan.tools) lines.push(`- ${tool}`);
398
- }
399
- if (plan.risks.length > 0) {
400
- lines.push('Risks:');
401
- for (const risk of plan.risks) lines.push(`- ${risk}`);
402
- }
403
- if (plan.questions.length > 0) {
404
- lines.push('Questions:');
405
- for (const question of plan.questions) lines.push(`- ${question}`);
406
- }
407
- return lines.join('\n');
408
- }
409
-
410
235
  function getOpenRouterOptions(config: ReturnType<typeof loadAgentConfig>) {
411
236
  const timeoutMs = config.agent.openrouter.timeoutMs;
412
237
  const retryEnabled = config.agent.openrouter.retry;
@@ -469,11 +294,13 @@ function estimateMessagesTokens(messages: Message[], tokensPerChar: number, toke
469
294
  return total;
470
295
  }
471
296
 
297
+ const MEMORY_SUMMARY_MAX_CHARS = 2000;
298
+
472
299
  function buildSystemInstructions(params: {
473
300
  assistantName: string;
474
301
  groupNotes?: string | null;
475
302
  globalNotes?: string | null;
476
- skillNotes?: SkillNote[];
303
+ skillCatalog?: SkillCatalog | null;
477
304
  memorySummary: string;
478
305
  memoryFacts: string[];
479
306
  sessionRecall: string[];
@@ -484,161 +311,74 @@ function buildSystemInstructions(params: {
484
311
  toolReliability?: Array<{ name: string; success_rate: number; count: number; avg_duration_ms: number | null }>;
485
312
  behaviorConfig?: Record<string, unknown>;
486
313
  isScheduledTask: boolean;
487
- isBackgroundTask: boolean;
488
314
  taskId?: string;
489
- isBackgroundJob: boolean;
490
- jobId?: string;
491
315
  timezone?: string;
492
316
  hostPlatform?: string;
493
- planBlock?: string;
317
+ messagingPlatform?: string;
494
318
  taskExtractionPack?: PromptPack | null;
495
319
  responseQualityPack?: PromptPack | null;
496
320
  toolCallingPack?: PromptPack | null;
497
321
  toolOutcomePack?: PromptPack | null;
498
322
  memoryPolicyPack?: PromptPack | null;
499
323
  memoryRecallPack?: PromptPack | null;
324
+ maxToolSteps?: number;
500
325
  }): string {
501
- const toolsDoc = [
502
- 'Tools available (use with care):',
503
- '- `Bash`: run shell commands in `/workspace/group`.',
504
- '- `Read`, `Write`, `Edit`, `Glob`, `Grep`: filesystem operations within mounted paths.',
505
- '- `WebSearch`: Brave Search API (requires `BRAVE_SEARCH_API_KEY`).',
506
- '- `WebFetch`: fetch URLs (limit payload sizes).',
507
- '- `GitClone`: clone git repositories into the workspace.',
508
- '- `NpmInstall`: install npm dependencies in the workspace.',
509
- '- `mcp__dotclaw__send_message`: send Telegram messages.',
510
- '- `mcp__dotclaw__send_file`: send a file/document.',
511
- '- `mcp__dotclaw__send_photo`: send a photo with compression.',
512
- '- `mcp__dotclaw__send_voice`: send a voice message (.ogg format).',
513
- '- `mcp__dotclaw__send_audio`: send an audio file (mp3, m4a, etc.).',
514
- '- `mcp__dotclaw__send_location`: send a map pin (latitude/longitude).',
515
- '- `mcp__dotclaw__send_contact`: send a contact card (phone + name).',
516
- '- `mcp__dotclaw__send_poll`: create a Telegram poll.',
517
- '- `mcp__dotclaw__send_buttons`: send a message with inline keyboard buttons.',
518
- '- `mcp__dotclaw__edit_message`: edit a previously sent message.',
519
- '- `mcp__dotclaw__delete_message`: delete a message.',
520
- '- `mcp__dotclaw__download_url`: download a URL to the workspace as a file.',
521
- '- To send media from the web: (1) download with `mcp__dotclaw__download_url` or `curl`/`wget` via Bash, (2) send with `mcp__dotclaw__send_photo`/`send_file`/`send_voice`/`send_audio`. This is a quick foreground task — do NOT use spawn_job for it.',
522
- '- Users may send photos, documents, voice messages, and videos. These are downloaded to `/workspace/group/inbox/` and referenced as `<attachment>` tags in messages. Process them with Read/Bash/Python tools. Use ffmpeg for audio/video transcoding (e.g. voice messages must be .ogg Opus for send_voice).',
523
- '- GitHub CLI (`gh`) is available. If GH_TOKEN is set, you can clone repos, create PRs, manage issues, etc. Use `gh auth status` to check authentication.',
524
- '- `mcp__dotclaw__schedule_task`: schedule tasks (set `timezone` for locale-specific schedules).',
525
- '- `mcp__dotclaw__run_task`: run a scheduled task immediately.',
526
- '- `mcp__dotclaw__list_tasks`, `mcp__dotclaw__pause_task`, `mcp__dotclaw__resume_task`, `mcp__dotclaw__cancel_task`.',
527
- '- `mcp__dotclaw__update_task`: update a task (state, prompt, schedule, status).',
528
- '- `mcp__dotclaw__spawn_job`: start a background job.',
529
- '- `mcp__dotclaw__job_status`, `mcp__dotclaw__list_jobs`, `mcp__dotclaw__cancel_job`.',
530
- '- `mcp__dotclaw__job_update`: log job progress or notify the user.',
531
- 'Rule: Use `mcp__dotclaw__spawn_job` ONLY for tasks that genuinely take more than ~2 minutes (cloning large repos, multi-page web research, complex coding projects). Everything else — downloading files, sending media, quick lookups, data analysis, format conversions, chart generation, scheduling reminders, web searches — should be done directly in the foreground. When in doubt, do it in the foreground.',
532
- 'When you DO spawn a background job, keep your reply to the user minimal — e.g. "Working on it, I\'ll send the results when done." Do not include the job ID, bullet-point plans, next steps, or status monitoring offers. The user will receive the result automatically.',
533
- '- `mcp__dotclaw__register_group`: manage chat registrations.',
534
- '- `mcp__dotclaw__remove_group`, `mcp__dotclaw__list_groups`: manage registered groups.',
535
- '- `mcp__dotclaw__set_model`: change the active model.',
536
- '- `mcp__dotclaw__memory_upsert`: store durable memories.',
537
- '- `mcp__dotclaw__memory_search`, `mcp__dotclaw__memory_list`, `mcp__dotclaw__memory_forget`, `mcp__dotclaw__memory_stats`.',
538
- '- `plugin__*`: dynamically loaded plugin tools (if present and allowed by policy).'
539
- ].join('\n');
540
- const browserAutomation = [
541
- 'Browser automation (via Bash):',
542
- '- Use `agent-browser open <url>` then `agent-browser snapshot -i`.',
543
- '- Interact with refs using `agent-browser click @e1`, `fill @e2 "text"`.',
544
- '- Capture evidence with `agent-browser screenshot`.'
326
+ const toolGuidance = [
327
+ 'Key tool rules:',
328
+ '- User attachments arrive in /workspace/group/inbox/ (see <attachment> tags). Process with Read/Bash/Python.',
329
+ '- To send media from the web: download_url send_photo/send_file/send_audio.',
330
+ '- Charts/plots: matplotlib savefig send_photo. Graphviz → dot -Tpng → send_photo.',
331
+ '- Voice messages are auto-transcribed (<transcript> in <attachment>). Reply with normal text — the host auto-converts to voice.',
332
+ '- GitHub CLI (`gh`) is available if GH_TOKEN is set.',
333
+ '- plugin__* and mcp_ext__* tools may be available if configured.'
545
334
  ].join('\n');
546
335
 
547
- const commonWorkflows = [
548
- 'Common workflows (do all of these in the foreground — act immediately, never spawn_job):',
549
- '',
550
- 'Sending media from the web:',
551
- ' download_url (or curl/wget) → send_photo / send_file / send_audio.',
552
- '',
553
- 'Charts & plots:',
554
- ' Python: matplotlib/pandas .plot() → plt.savefig("/workspace/group/chart.png") → send_photo.',
555
- ' Graphviz: write .dot file → Bash `dot -Tpng diagram.dot -o diagram.png` → send_photo.',
556
- ' Always save to a file and send — never try to "display" inline.',
557
- '',
558
- 'Processing user attachments:',
559
- ' Files arrive in /workspace/group/inbox/. The path is in the <attachment> tag.',
560
- ' Spreadsheets (.xlsx/.csv): `pd.read_excel()` or `pd.read_csv()` → analyze → respond.',
561
- ' JSON: Read tool or `json.load()` → analyze → respond.',
562
- ' Images: Python Pillow for processing, or describe what you see if relevant.',
563
- ' PDFs: Python `PyPDF2` (install at runtime if needed), or `pdftotext` via Bash.',
564
- ' Archives (.zip/.tar): `unzip -l` or `tar -tf` to list, extract as needed.',
565
- ' Unknown types: use `file` command to identify, then process accordingly.',
566
- '',
567
- 'Creating & delivering files:',
568
- ' When the user asks you to create a file (report, CSV, spreadsheet, script, etc.):',
569
- ' Write/Python to create the file → send_file to deliver it. Do not paste large file content as a message.',
570
- ' For Excel: `openpyxl` or `pd.to_excel()`. For CSV: `pd.to_csv()` or Write tool.',
571
- '',
572
- 'Format conversions:',
573
- ' Images: Python Pillow `Image.open().save("out.png")` → send_file.',
574
- ' Audio/Video: `ffmpeg -i input.ext output.ext` via Bash → send_file / send_audio.',
575
- ' Documents: use appropriate Python libraries or CLI tools → send_file.',
576
- '',
577
- 'Voice messages:',
578
- ' Received: arrives as .ogg in inbox. No built-in speech-to-text — acknowledge this to the user.',
579
- ' Sending: create/obtain audio → `ffmpeg -i input.mp3 -c:a libopus output.ogg` → send_voice.',
580
- '',
581
- 'Quick lookups (one tool call, immediate response):',
582
- ' Time zones: `python3 -c "from datetime import datetime; from zoneinfo import ZoneInfo; ..."`',
583
- ' Math/conversions: Python one-liner.',
584
- ' Unit conversions, currency, percentages: Python one-liner.',
585
- '',
586
- 'Web research:',
587
- ' Simple question: WebSearch → summarize in send_message.',
588
- ' Summarize a URL: WebFetch → summarize in send_message.',
589
- ' Deep research (many sources): this is the one case that may warrant spawn_job.',
590
- '',
591
- 'Reminders & scheduling:',
592
- ' "Remind me at 5pm": one schedule_task call with a cron expression. Done.',
593
- ' "Every Monday at 9am": one schedule_task call with cron. Done.',
594
- ' Do not overthink scheduling — it is a single tool call.',
595
- '',
596
- 'Diagrams & visualizations:',
597
- ' Flowcharts/graphs: write Graphviz .dot → `dot -Tpng` → send_photo.',
598
- ' Data visualizations: matplotlib/pandas → savefig → send_photo.',
599
- ' Tables: use `tabulate` for markdown/ASCII tables in messages, or create an image for complex tables.'
600
- ].join('\n');
336
+ const browserAutomation = agentConfig.agent.browser.enabled ? [
337
+ 'Browser Tool: actions: navigate, snapshot, click, fill, screenshot, extract, evaluate, close.',
338
+ 'Use snapshot with interactive=true for clickable refs (@e1, @e2). Screenshots → /workspace/group/screenshots/.'
339
+ ].join('\n') : '';
340
+
341
+ const hasAnyMemory = params.memorySummary || params.memoryFacts.length > 0 ||
342
+ params.longTermRecall.length > 0 || params.userProfile;
601
343
 
602
- const memorySummary = params.memorySummary ? params.memorySummary : 'None yet.';
344
+ const memorySummary = params.memorySummary
345
+ ? params.memorySummary.slice(0, MEMORY_SUMMARY_MAX_CHARS)
346
+ : '';
603
347
  const memoryFacts = params.memoryFacts.length > 0
604
348
  ? params.memoryFacts.map(fact => `- ${fact}`).join('\n')
605
- : 'None yet.';
349
+ : '';
606
350
  const sessionRecall = params.sessionRecall.length > 0
607
351
  ? params.sessionRecall.map(item => `- ${item}`).join('\n')
608
- : 'None.';
609
-
352
+ : '';
610
353
  const longTermRecall = params.longTermRecall.length > 0
611
354
  ? params.longTermRecall.map(item => `- ${item}`).join('\n')
612
- : 'None.';
613
-
614
- const userProfile = params.userProfile
615
- ? params.userProfile
616
- : 'None.';
617
-
355
+ : '';
356
+ const userProfile = params.userProfile || '';
618
357
  const memoryStats = params.memoryStats
619
358
  ? `Total: ${params.memoryStats.total}, User: ${params.memoryStats.user}, Group: ${params.memoryStats.group}, Global: ${params.memoryStats.global}`
620
- : 'Unknown.';
359
+ : '';
621
360
 
622
361
  const availableGroups = params.availableGroups && params.availableGroups.length > 0
623
362
  ? params.availableGroups
624
363
  .map(group => `- ${group.name} (chat ${group.jid}, last: ${group.lastActivity})`)
625
364
  .join('\n')
626
- : 'None.';
365
+ : '';
627
366
 
628
367
  const groupNotes = params.groupNotes ? `Group notes:\n${params.groupNotes}` : '';
629
368
  const globalNotes = params.globalNotes ? `Global notes:\n${params.globalNotes}` : '';
630
- const skillNotes = formatSkillNotes(params.skillNotes || []);
369
+ const skillNotes = params.skillCatalog ? formatSkillCatalog(params.skillCatalog) : '';
631
370
 
632
371
  const toolReliability = params.toolReliability && params.toolReliability.length > 0
633
372
  ? params.toolReliability
634
- .sort((a, b) => b.success_rate - a.success_rate)
373
+ .sort((a, b) => a.success_rate - b.success_rate)
374
+ .slice(0, 20)
635
375
  .map(tool => {
636
376
  const pct = `${Math.round(tool.success_rate * 100)}%`;
637
377
  const avg = Number.isFinite(tool.avg_duration_ms) ? `${Math.round(tool.avg_duration_ms!)}ms` : 'n/a';
638
378
  return `- ${tool.name}: success ${pct} over ${tool.count} calls (avg ${avg})`;
639
379
  })
640
380
  .join('\n')
641
- : 'No recent tool reliability data.';
381
+ : '';
642
382
 
643
383
  const behaviorNotes: string[] = [];
644
384
  const responseStyle = typeof params.behaviorConfig?.response_style === 'string'
@@ -677,64 +417,88 @@ function buildSystemInstructions(params: {
677
417
  const scheduledNote = params.isScheduledTask
678
418
  ? `You are running as a scheduled task${params.taskId ? ` (task id: ${params.taskId})` : ''}. If you need to communicate, use \`mcp__dotclaw__send_message\`.`
679
419
  : '';
680
- const backgroundNote = params.isBackgroundTask
681
- ? 'You are running in the background for a user request. Focus on completing the task and return a complete response without asking follow-up questions unless strictly necessary.'
682
- : '';
683
- const jobNote = params.isBackgroundJob
684
- ? `You are running as a background job${params.jobId ? ` (job id: ${params.jobId})` : ''}. Complete the task silently and return the result. Do NOT call \`mcp__dotclaw__job_update\` for routine progress — only for critical blockers or required user decisions. Do NOT send messages to the chat about your progress. Just do the work and return the final result. The system will deliver your result to the user automatically.`
685
- : '';
686
- const jobArtifactsNote = params.isBackgroundJob && params.jobId
687
- ? `Job artifacts directory: /workspace/group/jobs/${params.jobId}`
688
- : '';
689
420
 
690
421
  const fmtPack = (label: string, pack: PromptPack | null | undefined) =>
691
422
  pack ? formatPromptPack({ label, pack, maxDemos: PROMPT_PACKS_MAX_DEMOS, maxChars: PROMPT_PACKS_MAX_CHARS }) : '';
692
423
 
693
- const taskExtractionBlock = fmtPack('Task Extraction Guidelines', params.taskExtractionPack);
694
- const responseQualityBlock = fmtPack('Response Quality Guidelines', params.responseQualityPack);
695
- const toolCallingBlock = fmtPack('Tool Calling Guidelines', params.toolCallingPack);
696
- const toolOutcomeBlock = fmtPack('Tool Outcome Guidelines', params.toolOutcomePack);
697
- const memoryPolicyBlock = fmtPack('Memory Policy Guidelines', params.memoryPolicyPack);
698
- const memoryRecallBlock = fmtPack('Memory Recall Guidelines', params.memoryRecallPack);
424
+ const PROMPT_PACKS_TOTAL_BUDGET = PROMPT_PACKS_MAX_CHARS * 3;
425
+ const allPackBlocks: string[] = [];
426
+ {
427
+ const packEntries: Array<[string, PromptPack | null | undefined]> = [
428
+ ['Tool Calling Guidelines', params.toolCallingPack],
429
+ ['Tool Outcome Guidelines', params.toolOutcomePack],
430
+ ['Task Extraction Guidelines', params.taskExtractionPack],
431
+ ['Response Quality Guidelines', params.responseQualityPack],
432
+ ['Memory Policy Guidelines', params.memoryPolicyPack],
433
+ ['Memory Recall Guidelines', params.memoryRecallPack],
434
+ ];
435
+ let totalChars = 0;
436
+ for (const [label, pack] of packEntries) {
437
+ const block = fmtPack(label, pack);
438
+ if (!block) continue;
439
+ if (totalChars + block.length > PROMPT_PACKS_TOTAL_BUDGET) break;
440
+ allPackBlocks.push(block);
441
+ totalChars += block.length;
442
+ }
443
+ }
444
+ const taskExtractionBlock = allPackBlocks.find(b => b.includes('Task Extraction')) || '';
445
+ const responseQualityBlock = allPackBlocks.find(b => b.includes('Response Quality')) || '';
446
+ const toolCallingBlock = allPackBlocks.find(b => b.includes('Tool Calling')) || '';
447
+ const toolOutcomeBlock = allPackBlocks.find(b => b.includes('Tool Outcome')) || '';
448
+ const memoryPolicyBlock = allPackBlocks.find(b => b.includes('Memory Policy')) || '';
449
+ const memoryRecallBlock = allPackBlocks.find(b => b.includes('Memory Recall')) || '';
450
+
451
+ const memorySections: string[] = [];
452
+ {
453
+ if (hasAnyMemory) {
454
+ if (memorySummary) {
455
+ memorySections.push('Long-term memory summary:', memorySummary);
456
+ }
457
+ if (memoryFacts) {
458
+ memorySections.push('Long-term facts:', memoryFacts);
459
+ }
460
+ if (userProfile) {
461
+ memorySections.push('User profile (if available):', userProfile);
462
+ }
463
+ if (longTermRecall) {
464
+ memorySections.push('What you remember about the user (long-term):', longTermRecall);
465
+ }
466
+ if (memoryStats) {
467
+ memorySections.push('Memory stats:', memoryStats);
468
+ }
469
+ } else {
470
+ memorySections.push('No long-term memory available yet.');
471
+ }
472
+ }
473
+
474
+ // Session recall is always included (local context from current conversation)
475
+ if (sessionRecall) {
476
+ memorySections.push('Recent conversation context:', sessionRecall);
477
+ }
699
478
 
700
479
  return [
701
- `You are ${params.assistantName}, a personal assistant running inside DotClaw.`,
480
+ `You are ${params.assistantName}, a personal assistant running inside DotClaw.${params.messagingPlatform ? ` You are currently connected via ${params.messagingPlatform}.` : ''}`,
702
481
  hostPlatformNote,
703
482
  scheduledNote,
704
- backgroundNote,
705
- jobNote,
706
- jobArtifactsNote,
707
- toolsDoc,
483
+ toolGuidance,
708
484
  browserAutomation,
709
- commonWorkflows,
710
485
  groupNotes,
711
486
  globalNotes,
712
487
  skillNotes,
713
488
  timezoneNote,
714
- params.planBlock || '',
715
489
  toolCallingBlock,
716
490
  toolOutcomeBlock,
717
491
  taskExtractionBlock,
718
492
  responseQualityBlock,
719
493
  memoryPolicyBlock,
720
494
  memoryRecallBlock,
721
- 'Long-term memory summary:',
722
- memorySummary,
723
- 'Long-term facts:',
724
- memoryFacts,
725
- 'User profile (if available):',
726
- userProfile,
727
- 'What you remember about the user (long-term):',
728
- longTermRecall,
729
- 'Recent conversation context:',
730
- sessionRecall,
731
- 'Memory stats:',
732
- memoryStats,
733
- 'Available groups (main group only):',
734
- availableGroups,
735
- 'Tool reliability (recent):',
736
- toolReliability,
495
+ ...memorySections,
496
+ availableGroups ? `Available groups (main group only):\n${availableGroups}` : '',
497
+ toolReliability ? `Tool reliability (recent):\n${toolReliability}` : '',
737
498
  behaviorNotes.length > 0 ? `Behavior notes:\n${behaviorNotes.join('\n')}` : '',
499
+ params.maxToolSteps
500
+ ? `You have a budget of ${params.maxToolSteps} tool steps per request. If a task is large, break your work into phases and always finish with a text summary of what you accomplished — never end on a tool call without a response.`
501
+ : '',
738
502
  'Be concise and helpful. When you use tools, summarize what happened rather than dumping raw output.'
739
503
  ].filter(Boolean).join('\n\n');
740
504
  }
@@ -770,122 +534,6 @@ function loadClaudeNotes(): { group: string | null; global: string | null } {
770
534
  };
771
535
  }
772
536
 
773
- export type SkillNote = {
774
- scope: 'group' | 'global';
775
- path: string;
776
- content: string;
777
- };
778
-
779
- function collectSkillFiles(rootDir: string, maxFiles: number): string[] {
780
- const files: string[] = [];
781
- const seen = new Set<string>();
782
- const addFile = (filePath: string) => {
783
- const normalized = path.resolve(filePath);
784
- if (seen.has(normalized)) return;
785
- if (!fs.existsSync(normalized)) return;
786
- let stat: fs.Stats;
787
- try {
788
- stat = fs.statSync(normalized);
789
- } catch {
790
- return;
791
- }
792
- if (!stat.isFile()) return;
793
- if (!normalized.toLowerCase().endsWith('.md')) return;
794
- seen.add(normalized);
795
- files.push(normalized);
796
- };
797
-
798
- addFile(path.join(rootDir, 'SKILL.md'));
799
-
800
- const skillsDir = path.join(rootDir, 'skills');
801
- if (fs.existsSync(skillsDir)) {
802
- const stack = [skillsDir];
803
- while (stack.length > 0 && files.length < maxFiles) {
804
- const current = stack.pop();
805
- if (!current) continue;
806
- let entries: fs.Dirent[];
807
- try {
808
- entries = fs.readdirSync(current, { withFileTypes: true });
809
- } catch {
810
- continue;
811
- }
812
- entries.sort((a, b) => a.name.localeCompare(b.name));
813
- for (const entry of entries) {
814
- const nextPath = path.join(current, entry.name);
815
- if (entry.isSymbolicLink()) continue;
816
- if (entry.isDirectory()) {
817
- stack.push(nextPath);
818
- continue;
819
- }
820
- if (entry.isFile()) {
821
- addFile(nextPath);
822
- }
823
- if (files.length >= maxFiles) break;
824
- }
825
- }
826
- }
827
-
828
- files.sort((a, b) => a.localeCompare(b));
829
- return files.slice(0, maxFiles);
830
- }
831
-
832
- export function loadSkillNotesFromRoots(params: {
833
- groupDir: string;
834
- globalDir: string;
835
- maxFiles?: number;
836
- maxCharsPerFile?: number;
837
- maxTotalChars?: number;
838
- }): SkillNote[] {
839
- const maxFiles = Number.isFinite(params.maxFiles) ? Math.max(1, Math.floor(params.maxFiles!)) : SKILL_NOTES_MAX_FILES;
840
- const maxCharsPerFile = Number.isFinite(params.maxCharsPerFile)
841
- ? Math.max(200, Math.floor(params.maxCharsPerFile!))
842
- : SKILL_NOTES_MAX_CHARS;
843
- const maxTotalChars = Number.isFinite(params.maxTotalChars)
844
- ? Math.max(maxCharsPerFile, Math.floor(params.maxTotalChars!))
845
- : SKILL_NOTES_TOTAL_MAX_CHARS;
846
-
847
- const notes: SkillNote[] = [];
848
- let consumedChars = 0;
849
-
850
- const appendScopeNotes = (scope: 'group' | 'global', rootDir: string) => {
851
- const skillFiles = collectSkillFiles(rootDir, maxFiles);
852
- for (const filePath of skillFiles) {
853
- if (notes.length >= maxFiles) break;
854
- if (consumedChars >= maxTotalChars) break;
855
- const content = readTextFileLimited(filePath, maxCharsPerFile);
856
- if (!content) continue;
857
- const remaining = maxTotalChars - consumedChars;
858
- const truncated = content.length > remaining
859
- ? `${content.slice(0, remaining)}\n\n[Truncated for total skill budget]`
860
- : content;
861
- const relativePath = path.relative(rootDir, filePath).split(path.sep).join('/');
862
- notes.push({
863
- scope,
864
- path: relativePath || path.basename(filePath),
865
- content: truncated
866
- });
867
- consumedChars += truncated.length;
868
- if (consumedChars >= maxTotalChars) break;
869
- }
870
- };
871
-
872
- appendScopeNotes('group', params.groupDir);
873
- appendScopeNotes('global', params.globalDir);
874
- return notes;
875
- }
876
-
877
- function formatSkillNotes(notes: SkillNote[]): string {
878
- if (!notes || notes.length === 0) return '';
879
- const lines: string[] = [
880
- 'Skill instructions (loaded from SKILL.md / skills/*.md):',
881
- 'When a task matches a skill, follow that skill workflow first and keep output concise.'
882
- ];
883
- for (const note of notes) {
884
- lines.push(`[${note.scope}] ${note.path}`);
885
- lines.push(note.content);
886
- }
887
- return lines.join('\n\n');
888
- }
889
537
 
890
538
  function extractQueryFromPrompt(prompt: string): string {
891
539
  if (!prompt) return '';
@@ -906,6 +554,47 @@ function decodeXml(value: string): string {
906
554
  .replace(/&amp;/g, '&');
907
555
  }
908
556
 
557
+ // ── Image/Vision support ──────────────────────────────────────────────
558
+
559
+ const MAX_IMAGE_BYTES = 5 * 1024 * 1024; // 5MB per image
560
+ const MAX_TOTAL_IMAGE_BYTES = 20 * 1024 * 1024; // 20MB total across all images
561
+ const IMAGE_MIME_TYPES = new Set(['image/jpeg', 'image/png', 'image/gif', 'image/webp']);
562
+
563
+ function loadImageAttachments(attachments?: ContainerInput['attachments']): Array<{
564
+ type: 'image_url';
565
+ image_url: { url: string };
566
+ }> {
567
+ if (!attachments) return [];
568
+ const images: Array<{ type: 'image_url'; image_url: { url: string } }> = [];
569
+ let totalBytes = 0;
570
+ for (const att of attachments) {
571
+ if (att.type !== 'photo') continue;
572
+ const mime = att.mime_type || 'image/jpeg';
573
+ if (!IMAGE_MIME_TYPES.has(mime)) continue;
574
+ try {
575
+ const stat = fs.statSync(att.path);
576
+ if (stat.size > MAX_IMAGE_BYTES) {
577
+ log(`Skipping image ${att.path}: ${stat.size} bytes exceeds ${MAX_IMAGE_BYTES}`);
578
+ continue;
579
+ }
580
+ if (totalBytes + stat.size > MAX_TOTAL_IMAGE_BYTES) {
581
+ log(`Skipping image ${att.path}: cumulative size would exceed ${MAX_TOTAL_IMAGE_BYTES}`);
582
+ break;
583
+ }
584
+ const data = fs.readFileSync(att.path);
585
+ totalBytes += data.length;
586
+ const b64 = data.toString('base64');
587
+ images.push({
588
+ type: 'image_url',
589
+ image_url: { url: `data:${mime};base64,${b64}` }
590
+ });
591
+ } catch (err) {
592
+ log(`Failed to load image ${att.path}: ${err instanceof Error ? err.message : err}`);
593
+ }
594
+ }
595
+ return images;
596
+ }
597
+
909
598
  function messagesToOpenRouter(messages: Message[]) {
910
599
  return messages.map(message => ({
911
600
  role: message.role,
@@ -948,7 +637,7 @@ async function updateMemorySummary(params: {
948
637
  temperature: 0.1,
949
638
  reasoning: { effort: 'low' as const }
950
639
  });
951
- const text = await getTextWithFallback(result, 'summary');
640
+ const { text } = await getResponseText(result, 'summary');
952
641
  return parseSummaryResponse(text);
953
642
  }
954
643
 
@@ -1022,122 +711,6 @@ function parseMemoryExtraction(text: string): Array<Record<string, unknown>> {
1022
711
  }
1023
712
  }
1024
713
 
1025
- type ResponseValidation = {
1026
- verdict: 'pass' | 'fail';
1027
- issues: string[];
1028
- missing: string[];
1029
- };
1030
-
1031
- function buildResponseValidationPrompt(params: { userPrompt: string; response: string }): { instructions: string; input: string } {
1032
- const instructions = [
1033
- 'You are a strict response quality checker.',
1034
- 'Given a user request and an assistant response, decide if the response fully addresses the request.',
1035
- 'Fail if the response is empty, generic, deflects, promises work without results, or ignores any explicit questions.',
1036
- 'Pass only if the response directly answers all parts with concrete, relevant content.',
1037
- 'Return JSON only with keys: verdict ("pass"|"fail"), issues (array of strings), missing (array of strings).'
1038
- ].join('\n');
1039
-
1040
- const input = [
1041
- 'User request:',
1042
- params.userPrompt,
1043
- '',
1044
- 'Assistant response:',
1045
- params.response
1046
- ].join('\n');
1047
-
1048
- return { instructions, input };
1049
- }
1050
-
1051
- function parseResponseValidation(text: string): ResponseValidation | null {
1052
- const trimmed = text.trim();
1053
- let jsonText = trimmed;
1054
- const fenceMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i);
1055
- if (fenceMatch) {
1056
- jsonText = fenceMatch[1].trim();
1057
- }
1058
- try {
1059
- const parsed = JSON.parse(jsonText);
1060
- const verdict = parsed?.verdict;
1061
- if (verdict !== 'pass' && verdict !== 'fail') return null;
1062
- const issues = Array.isArray(parsed?.issues)
1063
- ? parsed.issues.filter((issue: unknown) => typeof issue === 'string')
1064
- : [];
1065
- const missing = Array.isArray(parsed?.missing)
1066
- ? parsed.missing.filter((item: unknown) => typeof item === 'string')
1067
- : [];
1068
- return { verdict, issues, missing };
1069
- } catch {
1070
- return null;
1071
- }
1072
- }
1073
-
1074
- async function validateResponseQuality(params: {
1075
- openrouter: OpenRouter;
1076
- model: string;
1077
- userPrompt: string;
1078
- response: string;
1079
- maxOutputTokens: number;
1080
- temperature: number;
1081
- }): Promise<ResponseValidation | null> {
1082
- const prompt = buildResponseValidationPrompt({
1083
- userPrompt: params.userPrompt,
1084
- response: params.response
1085
- });
1086
- const result = await params.openrouter.callModel({
1087
- model: params.model,
1088
- instructions: prompt.instructions,
1089
- input: prompt.input,
1090
- maxOutputTokens: params.maxOutputTokens,
1091
- temperature: params.temperature,
1092
- reasoning: { effort: 'low' as const }
1093
- });
1094
- const text = await getTextWithFallback(result, 'response_validation');
1095
- return parseResponseValidation(text);
1096
- }
1097
-
1098
- function buildRetryGuidance(validation: ResponseValidation | null): string {
1099
- const issues = validation?.issues || [];
1100
- const missing = validation?.missing || [];
1101
- const points = [...issues, ...missing].filter(Boolean).slice(0, 8);
1102
- const details = points.length > 0
1103
- ? points.map(item => `- ${item}`).join('\n')
1104
- : '- The previous response did not fully address the request.';
1105
- return [
1106
- 'IMPORTANT: Your previous response did not fully answer the user request.',
1107
- 'Provide a direct, complete answer now. Do not mention this retry.',
1108
- 'Issues to fix:',
1109
- details
1110
- ].join('\n');
1111
- }
1112
-
1113
- function buildPlannerTrigger(pattern: string | undefined): RegExp | null {
1114
- if (!pattern) return null;
1115
- try {
1116
- return new RegExp(pattern, 'i');
1117
- } catch {
1118
- return null;
1119
- }
1120
- }
1121
-
1122
- function shouldRunPlanner(params: {
1123
- enabled: boolean;
1124
- mode: string;
1125
- prompt: string;
1126
- tokensPerChar: number;
1127
- minTokens: number;
1128
- trigger: RegExp | null;
1129
- }): boolean {
1130
- if (!params.enabled) return false;
1131
- const mode = params.mode.toLowerCase();
1132
- if (mode === 'always') return true;
1133
- if (mode === 'off') return false;
1134
-
1135
- const estimatedTokens = estimateTokensForModel(params.prompt, params.tokensPerChar);
1136
- if (params.minTokens > 0 && estimatedTokens >= params.minTokens) return true;
1137
- if (params.trigger && params.trigger.test(params.prompt)) return true;
1138
- return false;
1139
- }
1140
-
1141
714
  export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutput> {
1142
715
  log(`Received input for group: ${input.groupFolder}`);
1143
716
 
@@ -1161,7 +734,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1161
734
  config.compactionTriggerTokens = Math.max(1000, Math.min(config.compactionTriggerTokens, compactionTarget));
1162
735
  }
1163
736
  if (input.modelMaxOutputTokens && Number.isFinite(input.modelMaxOutputTokens)) {
1164
- config.maxOutputTokens = Math.min(config.maxOutputTokens, input.modelMaxOutputTokens);
737
+ config.maxOutputTokens = input.modelMaxOutputTokens;
1165
738
  }
1166
739
  if (input.modelTemperature && Number.isFinite(input.modelTemperature)) {
1167
740
  config.temperature = input.modelTemperature;
@@ -1170,46 +743,29 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1170
743
  const maxToolSteps = Number.isFinite(input.maxToolSteps)
1171
744
  ? Math.max(1, Math.floor(input.maxToolSteps as number))
1172
745
  : agent.tools.maxToolSteps;
1173
- const memoryExtractionEnabled = agent.memory.extraction.enabled && !input.disableMemoryExtraction;
746
+ const memoryExtractionEnabled = agent.memory.extraction.enabled;
1174
747
  const isDaemon = process.env.DOTCLAW_DAEMON === '1';
1175
- const memoryExtractionAsync = agent.memory.extraction.async;
1176
748
  const memoryExtractionMaxMessages = agent.memory.extraction.maxMessages;
1177
749
  const memoryExtractionMaxOutputTokens = agent.memory.extraction.maxOutputTokens;
1178
750
  const memoryExtractScheduled = agent.memory.extractScheduled;
1179
751
  const memoryArchiveSync = agent.memory.archiveSync;
1180
- const plannerEnabled = agent.planner.enabled && !input.disablePlanner;
1181
- const plannerMode = String(agent.planner.mode || 'auto').toLowerCase();
1182
- const plannerMinTokens = agent.planner.minTokens;
1183
- const plannerTrigger = buildPlannerTrigger(agent.planner.triggerRegex);
1184
- const plannerModel = agent.models.planner;
1185
- const plannerMaxOutputTokens = agent.planner.maxOutputTokens;
1186
- const plannerTemperature = agent.planner.temperature;
1187
- const responseValidateEnabled = agent.responseValidation.enabled && !input.disableResponseValidation;
1188
- const responseValidateModel = agent.models.responseValidation;
1189
- const responseValidateMaxOutputTokens = agent.responseValidation.maxOutputTokens;
1190
- const responseValidateTemperature = agent.responseValidation.temperature;
1191
- const responseValidateMaxRetries = Number.isFinite(input.responseValidationMaxRetries)
1192
- ? Math.max(0, Math.floor(input.responseValidationMaxRetries as number))
1193
- : agent.responseValidation.maxRetries;
1194
- const responseValidateAllowToolCalls = agent.responseValidation.allowToolCalls;
1195
- const responseValidateMinPromptTokens = agent.responseValidation.minPromptTokens || 0;
1196
- const responseValidateMinResponseTokens = agent.responseValidation.minResponseTokens || 0;
1197
752
  const maxContextMessageTokens = agent.context.maxContextMessageTokens;
1198
753
 
1199
754
  const openrouter = getCachedOpenRouter(apiKey, openrouterOptions);
1200
755
  const tokenEstimate = resolveTokenEstimate(input, agentConfig);
1201
756
  const availableGroups = loadAvailableGroups();
1202
757
  const claudeNotes = loadClaudeNotes();
1203
- const skillNotes = loadSkillNotesFromRoots({
758
+ const skillCatalog = buildSkillCatalog({
1204
759
  groupDir: GROUP_DIR,
1205
- globalDir: GLOBAL_DIR
760
+ globalDir: GLOBAL_DIR,
761
+ maxSkills: agent.skills.maxSkills
1206
762
  });
1207
763
 
1208
764
  const { ctx: sessionCtx, isNew } = createSessionContext(SESSION_ROOT, input.sessionId);
1209
765
  const toolCalls: ToolCallRecord[] = [];
1210
766
  let memoryItemsUpserted = 0;
1211
767
  let memoryItemsExtracted = 0;
1212
- const timings: { planner_ms?: number; response_validation_ms?: number; memory_extraction_ms?: number; tool_ms?: number } = {};
768
+ const timings: { memory_extraction_ms?: number; tool_ms?: number } = {};
1213
769
  const ipc = createIpcHandlers({
1214
770
  chatJid: input.chatJid,
1215
771
  groupFolder: input.groupFolder,
@@ -1223,13 +779,39 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1223
779
  onToolCall: (call) => {
1224
780
  toolCalls.push(call);
1225
781
  },
1226
- policy: input.toolPolicy,
1227
- jobProgress: {
1228
- jobId: input.jobId,
1229
- enabled: Boolean(input.isBackgroundJob)
1230
- }
782
+ policy: input.toolPolicy
1231
783
  });
1232
784
 
785
+ // Discover MCP external tools if enabled
786
+ let mcpCleanup: (() => Promise<void>) | null = null;
787
+ if (agent.mcp.enabled && agent.mcp.servers.length > 0) {
788
+ try {
789
+ // Build a minimal wrapExecute for MCP tools (policy + logging handled by createTools wrapExecute pattern)
790
+ const wrapMcp = <TInput, TOutput>(name: string, execute: (args: TInput) => Promise<TOutput>) => {
791
+ return async (args: TInput): Promise<TOutput> => {
792
+ const start = Date.now();
793
+ try {
794
+ const result = await execute(args);
795
+ toolCalls.push({ name, ok: true, duration_ms: Date.now() - start });
796
+ return result;
797
+ } catch (err) {
798
+ const error = err instanceof Error ? err.message : String(err);
799
+ toolCalls.push({ name, ok: false, duration_ms: Date.now() - start, error });
800
+ throw err;
801
+ }
802
+ };
803
+ };
804
+ const mcp = await discoverMcpTools(agent, wrapMcp);
805
+ tools.push(...mcp.tools);
806
+ mcpCleanup = mcp.cleanup;
807
+ if (mcp.tools.length > 0) {
808
+ log(`MCP: discovered ${mcp.tools.length} external tools`);
809
+ }
810
+ } catch (err) {
811
+ log(`MCP discovery failed: ${err instanceof Error ? err.message : String(err)}`);
812
+ }
813
+ }
814
+
1233
815
  if (process.env.DOTCLAW_SELF_CHECK === '1') {
1234
816
  try {
1235
817
  const details = await runSelfCheck({ model });
@@ -1250,6 +832,14 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1250
832
  }
1251
833
  }
1252
834
 
835
+ // Resolve reasoning effort: input override > agent config > 'low'
836
+ const VALID_EFFORTS = ['off', 'low', 'medium', 'high'] as const;
837
+ const rawEffort = input.reasoningEffort || agent.reasoning?.effort || 'low';
838
+ const reasoningEffort = VALID_EFFORTS.includes(rawEffort as typeof VALID_EFFORTS[number]) ? rawEffort : 'low';
839
+ const resolvedReasoning = reasoningEffort === 'off'
840
+ ? undefined
841
+ : { effort: reasoningEffort as 'low' | 'medium' | 'high' };
842
+
1253
843
  let prompt = input.prompt;
1254
844
  if (input.isScheduledTask) {
1255
845
  prompt = `[SCHEDULED TASK - You are running automatically, not in response to a user message. Use mcp__dotclaw__send_message if needed to communicate with the user.]\n\n${input.prompt}`;
@@ -1390,11 +980,11 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1390
980
  if (memoryPolicyResult) promptPackVersions['memory-policy'] = memoryPolicyResult.pack.version;
1391
981
  if (memoryRecallResult) promptPackVersions['memory-recall'] = memoryRecallResult.pack.version;
1392
982
 
1393
- const buildInstructions = (planBlockValue: string) => buildSystemInstructions({
983
+ const buildInstructions = () => buildSystemInstructions({
1394
984
  assistantName,
1395
985
  groupNotes: claudeNotes.group,
1396
986
  globalNotes: claudeNotes.global,
1397
- skillNotes,
987
+ skillCatalog,
1398
988
  memorySummary: sessionCtx.state.summary,
1399
989
  memoryFacts: sessionCtx.state.facts,
1400
990
  sessionRecall,
@@ -1405,73 +995,21 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1405
995
  toolReliability: input.toolReliability,
1406
996
  behaviorConfig: input.behaviorConfig,
1407
997
  isScheduledTask: !!input.isScheduledTask,
1408
- isBackgroundTask: !!input.isBackgroundTask,
1409
998
  taskId: input.taskId,
1410
- isBackgroundJob: !!input.isBackgroundJob,
1411
- jobId: input.jobId,
1412
999
  timezone: typeof input.timezone === 'string' ? input.timezone : undefined,
1413
1000
  hostPlatform: typeof input.hostPlatform === 'string' ? input.hostPlatform : undefined,
1414
- planBlock: planBlockValue,
1001
+ messagingPlatform: input.chatJid?.includes(':') ? input.chatJid.split(':')[0] : undefined,
1415
1002
  taskExtractionPack: taskPackResult?.pack || null,
1416
1003
  responseQualityPack: responseQualityResult?.pack || null,
1417
1004
  toolCallingPack: toolCallingResult?.pack || null,
1418
1005
  toolOutcomePack: toolOutcomeResult?.pack || null,
1419
1006
  memoryPolicyPack: memoryPolicyResult?.pack || null,
1420
- memoryRecallPack: memoryRecallResult?.pack || null
1007
+ memoryRecallPack: memoryRecallResult?.pack || null,
1008
+ maxToolSteps
1421
1009
  });
1422
1010
 
1423
- let planBlock = '';
1424
- let instructions = buildInstructions(planBlock);
1425
- let instructionsTokens = estimateTokensForModel(instructions, tokenEstimate.tokensPerChar);
1426
- let maxContextTokens = Math.max(config.maxContextTokens - config.maxOutputTokens - instructionsTokens, 2000);
1427
- let adjustedContextTokens = Math.max(1000, Math.floor(maxContextTokens * tokenRatio));
1428
- let { recentMessages: plannerContextMessages } = splitRecentHistory(recentMessages, adjustedContextTokens, 6);
1429
- plannerContextMessages = clampContextMessages(plannerContextMessages, tokenEstimate.tokensPerChar, maxContextMessageTokens);
1430
-
1431
- if (shouldRunPlanner({
1432
- enabled: plannerEnabled,
1433
- mode: plannerMode,
1434
- prompt,
1435
- tokensPerChar: tokenEstimate.tokensPerChar,
1436
- minTokens: plannerMinTokens,
1437
- trigger: plannerTrigger
1438
- })) {
1439
- try {
1440
- const plannerStartedAt = Date.now();
1441
- const plannerPrompt = buildPlannerPrompt(plannerContextMessages);
1442
- const plannerResult = await openrouter.callModel({
1443
- model: plannerModel,
1444
- instructions: plannerPrompt.instructions,
1445
- input: plannerPrompt.input,
1446
- maxOutputTokens: plannerMaxOutputTokens,
1447
- temperature: plannerTemperature,
1448
- reasoning: { effort: 'low' as const }
1449
- });
1450
- const plannerText = await getTextWithFallback(plannerResult, 'planner');
1451
- const plan = parsePlannerResponse(plannerText);
1452
- if (plan) {
1453
- planBlock = formatPlanBlock(plan);
1454
- }
1455
- timings.planner_ms = Date.now() - plannerStartedAt;
1456
- } catch (err) {
1457
- log(`Planner failed: ${err instanceof Error ? err.message : String(err)}`);
1458
- }
1459
- }
1460
-
1461
- if (planBlock) {
1462
- instructions = buildInstructions(planBlock);
1463
- instructionsTokens = estimateTokensForModel(instructions, tokenEstimate.tokensPerChar);
1464
- maxContextTokens = Math.max(config.maxContextTokens - config.maxOutputTokens - instructionsTokens, 2000);
1465
- adjustedContextTokens = Math.max(1000, Math.floor(maxContextTokens * tokenRatio));
1466
- ({ recentMessages: plannerContextMessages } = splitRecentHistory(recentMessages, adjustedContextTokens, 6));
1467
- plannerContextMessages = clampContextMessages(plannerContextMessages, tokenEstimate.tokensPerChar, maxContextMessageTokens);
1468
- }
1469
-
1470
- const buildContext = (extraInstruction?: string) => {
1471
- let resolvedInstructions = buildInstructions(planBlock);
1472
- if (extraInstruction) {
1473
- resolvedInstructions = `${resolvedInstructions}\n\n${extraInstruction}`;
1474
- }
1011
+ const buildContext = () => {
1012
+ const resolvedInstructions = buildInstructions();
1475
1013
  const resolvedInstructionTokens = estimateTokensForModel(resolvedInstructions, tokenEstimate.tokensPerChar);
1476
1014
  const resolvedMaxContext = Math.max(config.maxContextTokens - config.maxOutputTokens - resolvedInstructionTokens, 2000);
1477
1015
  const resolvedAdjusted = Math.max(1000, Math.floor(resolvedMaxContext * tokenRatio));
@@ -1487,17 +1025,13 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1487
1025
  let responseText = '';
1488
1026
  let completionTokens = 0;
1489
1027
  let promptTokens = 0;
1490
- let modelToolCalls: Array<{ name: string }> = [];
1491
-
1492
1028
  let latencyMs: number | undefined;
1493
- const runCompletion = async (extraInstruction?: string): Promise<{
1494
- responseText: string;
1495
- completionTokens: number;
1496
- promptTokens: number;
1497
- latencyMs?: number;
1498
- modelToolCalls: Array<{ name: string }>;
1499
- }> => {
1500
- const { instructions: resolvedInstructions, instructionsTokens: resolvedInstructionTokens, contextMessages } = buildContext(extraInstruction);
1029
+
1030
+ const modelChain = [model, ...(input.modelFallbacks || [])].slice(0, 3);
1031
+ let currentModel = model;
1032
+
1033
+ try {
1034
+ const { instructions: resolvedInstructions, instructionsTokens: resolvedInstructionTokens, contextMessages } = buildContext();
1501
1035
  const resolvedPromptTokens = resolvedInstructionTokens
1502
1036
  + estimateMessagesTokens(contextMessages, tokenEstimate.tokensPerChar, tokenEstimate.tokensPerMessage)
1503
1037
  + tokenEstimate.tokensPerRequest;
@@ -1512,127 +1046,107 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1512
1046
  }
1513
1047
  }
1514
1048
 
1515
- log('Starting OpenRouter call...');
1516
- const startedAt = Date.now();
1517
- const callParams = {
1518
- model,
1519
- instructions: resolvedInstructions,
1520
- input: messagesToOpenRouter(contextMessages),
1521
- tools,
1522
- stopWhen: stepCountIs(maxToolSteps),
1523
- maxOutputTokens: config.maxOutputTokens,
1524
- temperature: config.temperature,
1525
- reasoning: { effort: 'low' as const }
1526
- };
1527
- const result = await openrouter.callModel(callParams);
1528
- const localLatencyMs = Date.now() - startedAt;
1529
-
1530
- // Get the complete response text via the SDK's proper getText() path
1531
- let localResponseText = await getTextWithFallback(result, 'completion');
1532
-
1533
- const toolCallsFromModel = await result.getToolCalls();
1534
- if (toolCallsFromModel.length > 0) {
1535
- log(`Model made ${toolCallsFromModel.length} tool call(s): ${toolCallsFromModel.map(t => t.name).join(', ')}`);
1049
+ const contextInput = messagesToOpenRouter(contextMessages);
1050
+
1051
+ // Inject vision content into the last user message if images are present
1052
+ const imageContent = loadImageAttachments(input.attachments);
1053
+ if (imageContent.length > 0 && contextInput.length > 0) {
1054
+ const lastMsg = contextInput[contextInput.length - 1];
1055
+ if (lastMsg.role === 'user') {
1056
+ // Convert string content to multi-modal content array
1057
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1058
+ (lastMsg as any).content = [
1059
+ { type: 'text', text: typeof lastMsg.content === 'string' ? lastMsg.content : '' },
1060
+ ...imageContent
1061
+ ];
1062
+ }
1536
1063
  }
1537
1064
 
1538
- if (!localResponseText || !localResponseText.trim()) {
1539
- if (toolCallsFromModel.length > 0) {
1540
- localResponseText = 'I started running tool calls but did not get a final response. If you want me to continue, please ask a narrower subtask or say "continue".';
1541
- } else {
1542
- // Responses API likely returned a gen-ID; retry with Chat Completions API
1543
- try {
1544
- localResponseText = await chatCompletionsFallback({
1545
- model,
1546
- instructions: resolvedInstructions,
1547
- messages: messagesToOpenRouter(contextMessages),
1548
- maxOutputTokens: config.maxOutputTokens,
1549
- temperature: config.temperature
1550
- });
1551
- } catch (err) {
1552
- log(`Chat Completions fallback error: ${err instanceof Error ? err.message : String(err)}`);
1065
+ let lastError: unknown = null;
1066
+ for (let attempt = 0; attempt < modelChain.length; attempt++) {
1067
+ currentModel = modelChain[attempt];
1068
+ if (attempt > 0) log(`Fallback ${attempt}: trying ${currentModel}`);
1069
+
1070
+ try {
1071
+ log(`Starting OpenRouter call (${currentModel})...`);
1072
+ const startedAt = Date.now();
1073
+ const result = openrouter.callModel({
1074
+ model: currentModel,
1075
+ instructions: resolvedInstructions,
1076
+ input: contextInput,
1077
+ tools,
1078
+ stopWhen: stepCountIs(maxToolSteps),
1079
+ maxOutputTokens: config.maxOutputTokens,
1080
+ temperature: config.temperature,
1081
+ reasoning: resolvedReasoning
1082
+ });
1083
+
1084
+ // Stream text chunks to IPC if streamDir is provided
1085
+ if (input.streamDir) {
1086
+ let seq = 0;
1087
+ try {
1088
+ fs.mkdirSync(input.streamDir, { recursive: true });
1089
+ for await (const delta of result.getTextStream()) {
1090
+ seq++;
1091
+ const chunkFile = path.join(input.streamDir, `chunk_${String(seq).padStart(6, '0')}.txt`);
1092
+ const tmpFile = chunkFile + '.tmp';
1093
+ fs.writeFileSync(tmpFile, delta);
1094
+ fs.renameSync(tmpFile, chunkFile);
1095
+ }
1096
+ fs.writeFileSync(path.join(input.streamDir, 'done'), '');
1097
+ } catch (streamErr) {
1098
+ log(`Stream error: ${streamErr instanceof Error ? streamErr.message : String(streamErr)}`);
1099
+ try { fs.writeFileSync(path.join(input.streamDir, 'error'), streamErr instanceof Error ? streamErr.message : String(streamErr)); } catch { /* ignore */ }
1100
+ }
1553
1101
  }
1554
- }
1555
- if (!localResponseText || !localResponseText.trim()) {
1556
- log(`Warning: Model returned empty/whitespace response after all fallbacks. tool calls: ${toolCallsFromModel.length}`);
1557
- }
1558
- } else {
1559
- log(`Model returned text response (${localResponseText.length} chars)`);
1560
- }
1561
1102
 
1562
- const localCompletionTokens = estimateTokensForModel(localResponseText || '', tokenEstimate.tokensPerChar);
1563
- return {
1564
- responseText: localResponseText,
1565
- completionTokens: localCompletionTokens,
1566
- promptTokens: resolvedPromptTokens,
1567
- latencyMs: localLatencyMs,
1568
- modelToolCalls: toolCallsFromModel
1569
- };
1570
- };
1103
+ latencyMs = Date.now() - startedAt;
1571
1104
 
1572
- try {
1573
- const firstAttempt = await runCompletion();
1574
- responseText = firstAttempt.responseText;
1575
- completionTokens = firstAttempt.completionTokens;
1576
- promptTokens = firstAttempt.promptTokens;
1577
- latencyMs = firstAttempt.latencyMs;
1578
- modelToolCalls = firstAttempt.modelToolCalls;
1579
-
1580
- const shouldValidate = responseValidateEnabled
1581
- && promptTokens >= responseValidateMinPromptTokens
1582
- && completionTokens >= responseValidateMinResponseTokens
1583
- && (responseValidateAllowToolCalls || modelToolCalls.length === 0);
1584
- if (shouldValidate) {
1585
- const MAX_VALIDATION_ITERATIONS = 5;
1586
- let retriesLeft = responseValidateMaxRetries;
1587
- for (let _validationIter = 0; _validationIter < MAX_VALIDATION_ITERATIONS; _validationIter++) {
1588
- if (!responseValidateAllowToolCalls && modelToolCalls.length > 0) {
1589
- break;
1105
+ const completionResult = await getResponseText(result, 'completion');
1106
+ responseText = completionResult.text;
1107
+
1108
+ const toolCallsFromModel = await result.getToolCalls();
1109
+ if (toolCallsFromModel.length > 0) {
1110
+ log(`Model made ${toolCallsFromModel.length} tool call(s): ${toolCallsFromModel.map(t => t.name).join(', ')}`);
1590
1111
  }
1591
- let validationResult: ResponseValidation | null = null;
1592
1112
  if (!responseText || !responseText.trim()) {
1593
- validationResult = { verdict: 'fail', issues: ['Response was empty.'], missing: [] };
1594
- } else {
1595
- try {
1596
- const validationStartedAt = Date.now();
1597
- validationResult = await validateResponseQuality({
1598
- openrouter,
1599
- model: responseValidateModel,
1600
- userPrompt: query,
1601
- response: responseText,
1602
- maxOutputTokens: responseValidateMaxOutputTokens,
1603
- temperature: responseValidateTemperature
1604
- });
1605
- timings.response_validation_ms = (timings.response_validation_ms ?? 0) + (Date.now() - validationStartedAt);
1606
- } catch (err) {
1607
- log(`Response validation failed: ${err instanceof Error ? err.message : String(err)}`);
1113
+ if (completionResult.error) {
1114
+ log(`Tool execution failed: ${completionResult.error}`);
1115
+ responseText = `Something went wrong while processing your request: ${completionResult.error}. Please try again.`;
1116
+ } else if (toolCallsFromModel.length > 0) {
1117
+ responseText = 'I started running tool calls but did not get a final response. If you want me to continue, please ask a narrower subtask or say "continue".';
1118
+ } else {
1119
+ log(`Warning: Model returned empty/whitespace response. tool calls: ${toolCallsFromModel.length}`);
1608
1120
  }
1121
+ } else {
1122
+ log(`Model returned text response (${responseText.length} chars)`);
1609
1123
  }
1610
- if (!validationResult || validationResult.verdict === 'pass') {
1611
- break;
1612
- }
1613
- if (retriesLeft <= 0) {
1614
- break;
1124
+
1125
+ completionTokens = estimateTokensForModel(responseText || '', tokenEstimate.tokensPerChar);
1126
+ promptTokens = resolvedPromptTokens;
1127
+ lastError = null;
1128
+ break; // Success
1129
+ } catch (err) {
1130
+ lastError = err;
1131
+ if (classifyError(err) && attempt < modelChain.length - 1) {
1132
+ log(`${currentModel} failed (${classifyError(err)}): ${err instanceof Error ? err.message : err}`);
1133
+ continue;
1615
1134
  }
1616
- retriesLeft -= 1;
1617
- log(`Response validation failed; retrying (${retriesLeft} retries left)`);
1618
- const retryGuidance = buildRetryGuidance(validationResult);
1619
- const retryAttempt = await runCompletion(retryGuidance);
1620
- responseText = retryAttempt.responseText;
1621
- completionTokens = retryAttempt.completionTokens;
1622
- promptTokens = retryAttempt.promptTokens;
1623
- latencyMs = retryAttempt.latencyMs;
1624
- modelToolCalls = retryAttempt.modelToolCalls;
1135
+ throw err; // Non-retryable or last model — propagate
1625
1136
  }
1626
1137
  }
1138
+
1139
+ if (lastError) throw lastError;
1627
1140
  } catch (err) {
1628
1141
  const errorMessage = err instanceof Error ? err.message : String(err);
1629
- log(`Agent error: ${errorMessage}`);
1142
+ const allFailed = modelChain.length > 1 ? `All models failed. Last error: ${errorMessage}` : errorMessage;
1143
+ log(`Agent error: ${allFailed}`);
1630
1144
  return {
1631
1145
  status: 'error',
1632
1146
  result: null,
1633
1147
  newSessionId: isNew ? sessionCtx.sessionId : undefined,
1634
- error: errorMessage,
1635
- model,
1148
+ error: allFailed,
1149
+ model: currentModel,
1636
1150
  prompt_pack_versions: Object.keys(promptPackVersions).length > 0 ? promptPackVersions : undefined,
1637
1151
  memory_summary: sessionCtx.state.summary,
1638
1152
  memory_facts: sessionCtx.state.facts,
@@ -1649,25 +1163,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1649
1163
  }
1650
1164
 
1651
1165
  appendHistory(sessionCtx, 'assistant', responseText || '');
1652
-
1653
1166
  history = loadHistory(sessionCtx);
1654
- const newMessages = history.filter(m => m.seq > sessionCtx.state.lastSummarySeq);
1655
- if (newMessages.length >= config.summaryUpdateEveryMessages) {
1656
- const summaryUpdate = await updateMemorySummary({
1657
- openrouter,
1658
- model: summaryModel,
1659
- existingSummary: sessionCtx.state.summary,
1660
- existingFacts: sessionCtx.state.facts,
1661
- newMessages,
1662
- maxOutputTokens: config.summaryMaxOutputTokens
1663
- });
1664
- if (summaryUpdate) {
1665
- sessionCtx.state.summary = summaryUpdate.summary;
1666
- sessionCtx.state.facts = summaryUpdate.facts;
1667
- sessionCtx.state.lastSummarySeq = newMessages[newMessages.length - 1].seq;
1668
- saveMemoryState(sessionCtx);
1669
- }
1670
- }
1671
1167
 
1672
1168
  const runMemoryExtraction = async () => {
1673
1169
  const extractionMessages = history.slice(-memoryExtractionMaxMessages);
@@ -1680,7 +1176,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1680
1176
  messages: extractionMessages,
1681
1177
  memoryPolicyPack: memoryPolicyResult?.pack || null
1682
1178
  });
1683
- const extractionResult = await openrouter.callModel({
1179
+ const extractionResult = openrouter.callModel({
1684
1180
  model: memoryModel,
1685
1181
  instructions: extractionPrompt.instructions,
1686
1182
  input: extractionPrompt.input,
@@ -1688,7 +1184,7 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1688
1184
  temperature: 0.1,
1689
1185
  reasoning: { effort: 'low' as const }
1690
1186
  });
1691
- const extractionText = await getTextWithFallback(extractionResult, 'memory_extraction');
1187
+ const { text: extractionText } = await getResponseText(extractionResult, 'memory_extraction');
1692
1188
  const extractedItems = parseMemoryExtraction(extractionText);
1693
1189
  if (extractedItems.length === 0) return;
1694
1190
 
@@ -1722,27 +1218,11 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1722
1218
  timings.memory_extraction_ms = (timings.memory_extraction_ms ?? 0) + (Date.now() - extractionStartedAt);
1723
1219
  };
1724
1220
 
1725
- if (memoryExtractionEnabled && (!input.isScheduledTask || memoryExtractScheduled)) {
1726
- const runMemoryExtractionWithRetry = async (maxRetries = 2): Promise<void> => {
1727
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
1728
- try {
1729
- await runMemoryExtraction();
1730
- return;
1731
- } catch (err) {
1732
- log(`Memory extraction attempt ${attempt + 1} failed: ${err instanceof Error ? err.message : String(err)}`);
1733
- if (attempt < maxRetries) {
1734
- await new Promise(r => setTimeout(r, 1000 * Math.pow(2, attempt)));
1735
- }
1736
- }
1737
- }
1738
- log('Memory extraction failed after all retries');
1739
- };
1740
-
1741
- if (memoryExtractionAsync && isDaemon) {
1742
- void runMemoryExtractionWithRetry().catch(() => {});
1743
- } else {
1744
- await runMemoryExtractionWithRetry();
1745
- }
1221
+ if (memoryExtractionEnabled && isDaemon && (!input.isScheduledTask || memoryExtractScheduled)) {
1222
+ // Fire-and-forget in daemon mode; skip entirely in ephemeral mode
1223
+ void runMemoryExtraction().catch((err) => {
1224
+ log(`Memory extraction failed: ${err instanceof Error ? err.message : String(err)}`);
1225
+ });
1746
1226
  }
1747
1227
 
1748
1228
  // Normalize empty/whitespace-only responses to null
@@ -1754,11 +1234,16 @@ export async function runAgentOnce(input: ContainerInput): Promise<ContainerOutp
1754
1234
  }
1755
1235
  }
1756
1236
 
1237
+ // Cleanup MCP connections
1238
+ if (mcpCleanup) {
1239
+ try { await mcpCleanup(); } catch { /* ignore cleanup errors */ }
1240
+ }
1241
+
1757
1242
  return {
1758
1243
  status: 'success',
1759
1244
  result: finalResult,
1760
1245
  newSessionId: isNew ? sessionCtx.sessionId : undefined,
1761
- model,
1246
+ model: currentModel,
1762
1247
  prompt_pack_versions: Object.keys(promptPackVersions).length > 0 ? promptPackVersions : undefined,
1763
1248
  memory_summary: sessionCtx.state.summary,
1764
1249
  memory_facts: sessionCtx.state.facts,