@dotsetlabs/dotclaw 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/config-examples/runtime.json +29 -3
  2. package/container/agent-runner/src/agent-config.ts +19 -3
  3. package/container/agent-runner/src/container-protocol.ts +11 -0
  4. package/container/agent-runner/src/context-overflow-recovery.ts +39 -0
  5. package/container/agent-runner/src/index.ts +744 -123
  6. package/container/agent-runner/src/memory.ts +18 -68
  7. package/container/agent-runner/src/system-prompt.ts +36 -34
  8. package/container/agent-runner/src/tool-loop-policy.ts +724 -0
  9. package/container/agent-runner/src/tools.ts +211 -8
  10. package/dist/agent-context.d.ts +1 -0
  11. package/dist/agent-context.d.ts.map +1 -1
  12. package/dist/agent-context.js +21 -9
  13. package/dist/agent-context.js.map +1 -1
  14. package/dist/agent-execution.d.ts +2 -0
  15. package/dist/agent-execution.d.ts.map +1 -1
  16. package/dist/agent-execution.js +164 -15
  17. package/dist/agent-execution.js.map +1 -1
  18. package/dist/agent-semaphore.d.ts +24 -1
  19. package/dist/agent-semaphore.d.ts.map +1 -1
  20. package/dist/agent-semaphore.js +109 -20
  21. package/dist/agent-semaphore.js.map +1 -1
  22. package/dist/cli.js +3 -11
  23. package/dist/cli.js.map +1 -1
  24. package/dist/config.d.ts +2 -0
  25. package/dist/config.d.ts.map +1 -1
  26. package/dist/config.js +2 -0
  27. package/dist/config.js.map +1 -1
  28. package/dist/container-protocol.d.ts +22 -0
  29. package/dist/container-protocol.d.ts.map +1 -1
  30. package/dist/container-protocol.js.map +1 -1
  31. package/dist/container-runner.d.ts +7 -0
  32. package/dist/container-runner.d.ts.map +1 -1
  33. package/dist/container-runner.js +417 -143
  34. package/dist/container-runner.js.map +1 -1
  35. package/dist/db.d.ts.map +1 -1
  36. package/dist/db.js +46 -12
  37. package/dist/db.js.map +1 -1
  38. package/dist/failover-policy.d.ts +41 -0
  39. package/dist/failover-policy.d.ts.map +1 -0
  40. package/dist/failover-policy.js +261 -0
  41. package/dist/failover-policy.js.map +1 -0
  42. package/dist/index.js +1 -0
  43. package/dist/index.js.map +1 -1
  44. package/dist/ipc-dispatcher.d.ts.map +1 -1
  45. package/dist/ipc-dispatcher.js +27 -43
  46. package/dist/ipc-dispatcher.js.map +1 -1
  47. package/dist/mcp-config.d.ts +22 -0
  48. package/dist/mcp-config.d.ts.map +1 -0
  49. package/dist/mcp-config.js +94 -0
  50. package/dist/mcp-config.js.map +1 -0
  51. package/dist/memory-backend.d.ts +27 -0
  52. package/dist/memory-backend.d.ts.map +1 -0
  53. package/dist/memory-backend.js +112 -0
  54. package/dist/memory-backend.js.map +1 -0
  55. package/dist/memory-recall.d.ts.map +1 -1
  56. package/dist/memory-recall.js +135 -22
  57. package/dist/memory-recall.js.map +1 -1
  58. package/dist/memory-store.d.ts +1 -0
  59. package/dist/memory-store.d.ts.map +1 -1
  60. package/dist/memory-store.js +55 -7
  61. package/dist/memory-store.js.map +1 -1
  62. package/dist/message-pipeline.d.ts +24 -0
  63. package/dist/message-pipeline.d.ts.map +1 -1
  64. package/dist/message-pipeline.js +131 -27
  65. package/dist/message-pipeline.js.map +1 -1
  66. package/dist/metrics.d.ts +1 -0
  67. package/dist/metrics.d.ts.map +1 -1
  68. package/dist/metrics.js +9 -0
  69. package/dist/metrics.js.map +1 -1
  70. package/dist/recall-policy.d.ts +12 -0
  71. package/dist/recall-policy.d.ts.map +1 -0
  72. package/dist/recall-policy.js +89 -0
  73. package/dist/recall-policy.js.map +1 -0
  74. package/dist/runtime-config.d.ts +33 -0
  75. package/dist/runtime-config.d.ts.map +1 -1
  76. package/dist/runtime-config.js +111 -11
  77. package/dist/runtime-config.js.map +1 -1
  78. package/dist/streaming.d.ts.map +1 -1
  79. package/dist/streaming.js +125 -33
  80. package/dist/streaming.js.map +1 -1
  81. package/dist/task-scheduler.d.ts.map +1 -1
  82. package/dist/task-scheduler.js +27 -10
  83. package/dist/task-scheduler.js.map +1 -1
  84. package/dist/tool-policy.d.ts.map +1 -1
  85. package/dist/tool-policy.js +26 -4
  86. package/dist/tool-policy.js.map +1 -1
  87. package/dist/trace-writer.d.ts +12 -0
  88. package/dist/trace-writer.d.ts.map +1 -1
  89. package/dist/trace-writer.js.map +1 -1
  90. package/dist/turn-hygiene.d.ts +14 -0
  91. package/dist/turn-hygiene.d.ts.map +1 -0
  92. package/dist/turn-hygiene.js +214 -0
  93. package/dist/turn-hygiene.js.map +1 -0
  94. package/dist/webhook.d.ts.map +1 -1
  95. package/dist/webhook.js +1 -0
  96. package/dist/webhook.js.map +1 -1
  97. package/package.json +15 -1
  98. package/scripts/benchmark-baseline.js +365 -0
  99. package/scripts/benchmark-harness.js +1413 -0
  100. package/scripts/benchmark-scenarios.js +301 -0
  101. package/scripts/canary-suite.js +123 -0
  102. package/scripts/generate-controlled-traces.js +230 -0
  103. package/scripts/release-slo-check.js +214 -0
  104. package/scripts/run-live-canary.js +339 -0
@@ -371,71 +371,6 @@ export function parseSummaryResponse(text: string): { summary: string; facts: st
371
371
  }
372
372
  }
373
373
 
374
- function tokenize(text: string): string[] {
375
- return (text.toLowerCase().match(/[a-z0-9]+/g) || []).filter(token => token.length > 1);
376
- }
377
-
378
- function scoreCandidate(candidate: string, queryTokens: string[], weight: number): number {
379
- const candidateTokens = tokenize(candidate);
380
- if (candidateTokens.length === 0 || queryTokens.length === 0) return 0;
381
- const tokenSet = new Set(candidateTokens);
382
- let overlap = 0;
383
- for (const token of queryTokens) {
384
- if (tokenSet.has(token)) overlap += 1;
385
- }
386
- if (overlap === 0) return 0;
387
- return (overlap / Math.sqrt(candidateTokens.length)) * weight;
388
- }
389
-
390
- export function retrieveRelevantMemories(params: {
391
- query: string;
392
- summary: string;
393
- facts: string[];
394
- olderMessages: Message[];
395
- config: MemoryConfig;
396
- }): string[] {
397
- const queryTokens = tokenize(params.query);
398
- if (queryTokens.length === 0) return [];
399
-
400
- const candidates: Array<{ text: string; score: number }> = [];
401
-
402
- if (params.summary) {
403
- const summaryLines = params.summary.split('\n').map(line => line.trim()).filter(Boolean);
404
- for (const line of summaryLines) {
405
- const score = scoreCandidate(line, queryTokens, 1.4);
406
- if (score > 0) candidates.push({ text: line, score });
407
- }
408
- }
409
-
410
- for (const fact of params.facts) {
411
- const score = scoreCandidate(fact, queryTokens, 2.0);
412
- if (score > 0) candidates.push({ text: fact, score });
413
- }
414
-
415
- for (const msg of params.olderMessages.slice(-50)) {
416
- const snippet = msg.content.length > 300 ? `${msg.content.slice(0, 300)}...` : msg.content;
417
- const score = scoreCandidate(snippet, queryTokens, 1.0);
418
- if (score > 0) candidates.push({ text: snippet, score });
419
- }
420
-
421
- candidates.sort((a, b) => b.score - a.score);
422
-
423
- // Quality gate: filter out low-scoring candidates to prevent noise injection
424
- const MIN_SESSION_RECALL_SCORE = 0.5;
425
- const filtered = candidates.filter(c => c.score >= MIN_SESSION_RECALL_SCORE);
426
-
427
- const results: string[] = [];
428
- let tokens = 0;
429
- for (const candidate of filtered) {
430
- if (results.length >= params.config.memoryMaxResults) break;
431
- const nextTokens = estimateTokens(candidate.text);
432
- if (tokens + nextTokens > params.config.memoryMaxTokens) break;
433
- results.push(candidate.text);
434
- tokens += nextTokens;
435
- }
436
- return results;
437
- }
438
-
439
374
  export interface ContextPruningConfig {
440
375
  softTrimMaxChars: number;
441
376
  softTrimHeadChars: number;
@@ -475,10 +410,25 @@ export function pruneContextMessages(
475
410
  }
476
411
 
477
412
  /**
478
- * Limit conversation history to the last N messages.
413
+ * Limit conversation history by counting user turns (not total messages).
414
+ * maxTurns=40 means keep the last 40 user messages plus all their associated
415
+ * assistant replies — roughly 80 messages total.
479
416
  * Preserves chronological order.
480
417
  */
481
418
  export function limitHistoryTurns(messages: Message[], maxTurns: number): Message[] {
482
- if (maxTurns <= 0 || messages.length <= maxTurns) return messages;
483
- return messages.slice(-maxTurns);
419
+ if (maxTurns <= 0) return messages;
420
+ // Count user turns from the end
421
+ let userTurnsSeen = 0;
422
+ let cutoff = 0;
423
+ for (let i = messages.length - 1; i >= 0; i--) {
424
+ if (messages[i].role === 'user') {
425
+ userTurnsSeen++;
426
+ if (userTurnsSeen > maxTurns) {
427
+ cutoff = i + 1;
428
+ break;
429
+ }
430
+ }
431
+ }
432
+ if (cutoff === 0) return messages;
433
+ return messages.slice(cutoff);
484
434
  }
@@ -110,7 +110,9 @@ function buildScheduledSection(params: SystemPromptParams): string {
110
110
 
111
111
  function buildResponseGuidanceSection(): string {
112
112
  return [
113
- '- Always answer the user\'s question directly before reaching for tools.',
113
+ '- Answer directly when the request can be completed from conversation context without external state.',
114
+ '- When the request requires file/system/network actions or fresh state, execute tools first before finalizing.',
115
+ '- Never claim an action happened unless corresponding tool calls succeeded in this turn.',
114
116
  '- If the user asks about your previous actions (e.g., "did you use X tool?"), reflect on the conversation history — do not re-execute the task.',
115
117
  '- If the user asks a simple factual question, answer from your knowledge — do not call tools unless you need to verify or act.',
116
118
  '- When you have genuinely nothing to say, respond with ONLY: NO_REPLY (your entire message must be just this token, nothing else).'
@@ -128,6 +130,7 @@ function buildToolCallStyleSection(): string {
128
130
  function buildToolGuidanceSection(params: SystemPromptParams): string {
129
131
  const lines = [
130
132
  'Key tool rules:',
133
+ '- Never claim file/system/web actions succeeded unless tool calls in this turn confirm them.',
131
134
  '- User attachments arrive in /workspace/group/inbox/ (see <attachment> tags). Process with Read/Bash/Python.',
132
135
  '- To send media from the web: download_url → send_photo/send_file/send_audio.',
133
136
  '- Charts/plots: matplotlib → savefig → send_photo. Graphviz → dot -Tpng → send_photo.',
@@ -160,37 +163,30 @@ function buildToolGuidanceSection(params: SystemPromptParams): string {
160
163
 
161
164
  function buildMemorySection(params: SystemPromptParams): string {
162
165
  const parts: string[] = [];
163
- const hasAny = params.memorySummary || params.memoryFacts.length > 0 ||
164
- params.longTermRecall.length > 0 || params.userProfile;
165
-
166
- if (hasAny) {
167
- parts.push('The following memories may or may not be relevant to the current conversation. Use them only if they directly answer the user\'s question.');
168
- if (params.memorySummary) {
169
- parts.push('Long-term memory summary:');
170
- parts.push(params.memorySummary.slice(0, MEMORY_SUMMARY_MAX_CHARS));
171
- }
172
- if (params.memoryFacts.length > 0) {
173
- parts.push('Long-term facts:');
174
- parts.push(params.memoryFacts.map(f => `- ${f}`).join('\n'));
175
- }
176
- if (params.userProfile) {
177
- parts.push('User profile:');
178
- parts.push(params.userProfile);
179
- }
180
- if (params.longTermRecall.length > 0) {
181
- parts.push('What you remember about the user (long-term):');
182
- parts.push(params.longTermRecall.map(item => `- ${item}`).join('\n'));
183
- }
184
- if (params.memoryStats) {
185
- parts.push(`Memory stats: Total: ${params.memoryStats.total}, User: ${params.memoryStats.user}, Group: ${params.memoryStats.group}, Global: ${params.memoryStats.global}`);
186
- }
187
- } else {
188
- parts.push('No long-term memory available yet.');
166
+
167
+ // Session-level context: summary and facts from the current conversation.
168
+ // These are essential for understanding the current thread.
169
+ if (params.memorySummary) {
170
+ parts.push('Conversation summary (this session):');
171
+ parts.push(params.memorySummary.slice(0, MEMORY_SUMMARY_MAX_CHARS));
172
+ }
173
+ if (params.memoryFacts.length > 0) {
174
+ parts.push('Key facts (this session):');
175
+ parts.push(params.memoryFacts.map(f => `- ${f}`).join('\n'));
176
+ }
177
+
178
+ // User profile stays pre-injected — identity and preferences should always be available.
179
+ if (params.userProfile) {
180
+ parts.push('User profile:');
181
+ parts.push(params.userProfile);
189
182
  }
190
183
 
191
- if (params.sessionRecall.length > 0) {
192
- parts.push('Recent conversation context:');
193
- parts.push(params.sessionRecall.map(item => `- ${item}`).join('\n'));
184
+ // Long-term memory is now tool-based: agent searches on demand instead of pre-injection.
185
+ // This prevents context bloat from irrelevant memories and lets the agent decide what's needed.
186
+ parts.push('Long-term memory: Use the mcp__dotclaw__memory_search tool to recall information from past conversations, stored preferences, notes, and knowledge. Search BEFORE answering questions about prior decisions, dates, people, projects, or anything you don\'t see in the conversation above.');
187
+
188
+ if (params.memoryStats && params.memoryStats.total > 0) {
189
+ parts.push(`Memory store: ${params.memoryStats.total} entries available (search with mcp__dotclaw__memory_search).`);
194
190
  }
195
191
 
196
192
  return parts.join('\n');
@@ -292,8 +288,14 @@ export function buildSystemPrompt(params: SystemPromptParams): string {
292
288
  const toolReliability = trimLevel >= 2 ? '' : (
293
289
  params.toolReliability && params.toolReliability.length > 0
294
290
  ? params.toolReliability
295
- .sort((a, b) => a.success_rate - b.success_rate)
296
- .slice(0, 20)
291
+ .filter(t => t.count >= 5 && (t.success_rate < 0.98 || (Number.isFinite(t.avg_duration_ms) && (t.avg_duration_ms || 0) > 2500)))
292
+ .sort((a, b) => {
293
+ if (a.success_rate !== b.success_rate) return a.success_rate - b.success_rate;
294
+ const aDur = Number.isFinite(a.avg_duration_ms) ? (a.avg_duration_ms || 0) : 0;
295
+ const bDur = Number.isFinite(b.avg_duration_ms) ? (b.avg_duration_ms || 0) : 0;
296
+ return bDur - aDur;
297
+ })
298
+ .slice(0, 8)
297
299
  .map(t => {
298
300
  const pct = `${Math.round(t.success_rate * 100)}%`;
299
301
  const avg = Number.isFinite(t.avg_duration_ms) ? `${Math.round(t.avg_duration_ms!)}ms` : 'n/a';
@@ -306,9 +308,9 @@ export function buildSystemPrompt(params: SystemPromptParams): string {
306
308
  // Trim level 1+: drop prompt packs
307
309
  const packBlocks = trimLevel >= 1 ? [] : buildPromptPackSections(params);
308
310
 
309
- // Trim level 3+: reduce memory section (drop session recall, limit long-term recall)
311
+ // Trim level 3+: reduce memory section (drop summary to save space)
310
312
  const memoryParams = trimLevel >= 3
311
- ? { ...params, sessionRecall: [], longTermRecall: params.longTermRecall.slice(0, 2) }
313
+ ? { ...params, memorySummary: params.memorySummary ? params.memorySummary.slice(0, 500) : '', memoryFacts: params.memoryFacts.slice(0, 5) }
312
314
  : params;
313
315
 
314
316
  const sections = [