@dotsetlabs/dotclaw 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/.env.example +9 -10
  2. package/README.md +8 -4
  3. package/config-examples/runtime.json +34 -8
  4. package/config-examples/tool-policy.json +12 -2
  5. package/container/agent-runner/package-lock.json +2 -2
  6. package/container/agent-runner/package.json +1 -1
  7. package/container/agent-runner/src/agent-config.ts +19 -3
  8. package/container/agent-runner/src/container-protocol.ts +11 -0
  9. package/container/agent-runner/src/context-overflow-recovery.ts +39 -0
  10. package/container/agent-runner/src/index.ts +603 -165
  11. package/container/agent-runner/src/openrouter-input.ts +159 -0
  12. package/container/agent-runner/src/system-prompt.ts +13 -3
  13. package/container/agent-runner/src/tool-loop-policy.ts +741 -0
  14. package/container/agent-runner/src/tools.ts +211 -8
  15. package/dist/agent-context.d.ts +1 -0
  16. package/dist/agent-context.d.ts.map +1 -1
  17. package/dist/agent-context.js +21 -9
  18. package/dist/agent-context.js.map +1 -1
  19. package/dist/agent-execution.d.ts +2 -0
  20. package/dist/agent-execution.d.ts.map +1 -1
  21. package/dist/agent-execution.js +164 -15
  22. package/dist/agent-execution.js.map +1 -1
  23. package/dist/agent-semaphore.d.ts +24 -1
  24. package/dist/agent-semaphore.d.ts.map +1 -1
  25. package/dist/agent-semaphore.js +109 -20
  26. package/dist/agent-semaphore.js.map +1 -1
  27. package/dist/cli.js +3 -11
  28. package/dist/cli.js.map +1 -1
  29. package/dist/config.d.ts +2 -0
  30. package/dist/config.d.ts.map +1 -1
  31. package/dist/config.js +2 -0
  32. package/dist/config.js.map +1 -1
  33. package/dist/container-protocol.d.ts +22 -0
  34. package/dist/container-protocol.d.ts.map +1 -1
  35. package/dist/container-protocol.js.map +1 -1
  36. package/dist/container-runner.d.ts +7 -0
  37. package/dist/container-runner.d.ts.map +1 -1
  38. package/dist/container-runner.js +417 -143
  39. package/dist/container-runner.js.map +1 -1
  40. package/dist/db.d.ts.map +1 -1
  41. package/dist/db.js +46 -12
  42. package/dist/db.js.map +1 -1
  43. package/dist/error-messages.d.ts.map +1 -1
  44. package/dist/error-messages.js +18 -4
  45. package/dist/error-messages.js.map +1 -1
  46. package/dist/failover-policy.d.ts +41 -0
  47. package/dist/failover-policy.d.ts.map +1 -0
  48. package/dist/failover-policy.js +261 -0
  49. package/dist/failover-policy.js.map +1 -0
  50. package/dist/index.js +1 -0
  51. package/dist/index.js.map +1 -1
  52. package/dist/ipc-dispatcher.d.ts.map +1 -1
  53. package/dist/ipc-dispatcher.js +27 -43
  54. package/dist/ipc-dispatcher.js.map +1 -1
  55. package/dist/mcp-config.d.ts +22 -0
  56. package/dist/mcp-config.d.ts.map +1 -0
  57. package/dist/mcp-config.js +94 -0
  58. package/dist/mcp-config.js.map +1 -0
  59. package/dist/memory-backend.d.ts +27 -0
  60. package/dist/memory-backend.d.ts.map +1 -0
  61. package/dist/memory-backend.js +112 -0
  62. package/dist/memory-backend.js.map +1 -0
  63. package/dist/memory-recall.d.ts.map +1 -1
  64. package/dist/memory-recall.js +135 -22
  65. package/dist/memory-recall.js.map +1 -1
  66. package/dist/memory-store.d.ts +1 -0
  67. package/dist/memory-store.d.ts.map +1 -1
  68. package/dist/memory-store.js +55 -7
  69. package/dist/memory-store.js.map +1 -1
  70. package/dist/message-pipeline.d.ts +24 -0
  71. package/dist/message-pipeline.d.ts.map +1 -1
  72. package/dist/message-pipeline.js +131 -27
  73. package/dist/message-pipeline.js.map +1 -1
  74. package/dist/metrics.d.ts +1 -0
  75. package/dist/metrics.d.ts.map +1 -1
  76. package/dist/metrics.js +9 -0
  77. package/dist/metrics.js.map +1 -1
  78. package/dist/providers/discord/discord-provider.d.ts.map +1 -1
  79. package/dist/providers/discord/discord-provider.js +72 -4
  80. package/dist/providers/discord/discord-provider.js.map +1 -1
  81. package/dist/providers/telegram/telegram-provider.d.ts.map +1 -1
  82. package/dist/providers/telegram/telegram-provider.js +65 -3
  83. package/dist/providers/telegram/telegram-provider.js.map +1 -1
  84. package/dist/recall-policy.d.ts +12 -0
  85. package/dist/recall-policy.d.ts.map +1 -0
  86. package/dist/recall-policy.js +89 -0
  87. package/dist/recall-policy.js.map +1 -0
  88. package/dist/runtime-config.d.ts +33 -0
  89. package/dist/runtime-config.d.ts.map +1 -1
  90. package/dist/runtime-config.js +109 -9
  91. package/dist/runtime-config.js.map +1 -1
  92. package/dist/streaming.d.ts.map +1 -1
  93. package/dist/streaming.js +125 -33
  94. package/dist/streaming.js.map +1 -1
  95. package/dist/task-scheduler.d.ts.map +1 -1
  96. package/dist/task-scheduler.js +4 -2
  97. package/dist/task-scheduler.js.map +1 -1
  98. package/dist/tool-policy.d.ts.map +1 -1
  99. package/dist/tool-policy.js +26 -4
  100. package/dist/tool-policy.js.map +1 -1
  101. package/dist/trace-writer.d.ts +12 -0
  102. package/dist/trace-writer.d.ts.map +1 -1
  103. package/dist/trace-writer.js.map +1 -1
  104. package/dist/turn-hygiene.d.ts +14 -0
  105. package/dist/turn-hygiene.d.ts.map +1 -0
  106. package/dist/turn-hygiene.js +214 -0
  107. package/dist/turn-hygiene.js.map +1 -0
  108. package/dist/webhook.d.ts.map +1 -1
  109. package/dist/webhook.js +1 -0
  110. package/dist/webhook.js.map +1 -1
  111. package/package.json +15 -1
  112. package/scripts/benchmark-baseline.js +365 -0
  113. package/scripts/benchmark-harness.js +1413 -0
  114. package/scripts/benchmark-scenarios.js +301 -0
  115. package/scripts/canary-suite.js +123 -0
  116. package/scripts/generate-controlled-traces.js +230 -0
  117. package/scripts/release-slo-check.js +214 -0
  118. package/scripts/run-live-canary.js +339 -0
@@ -0,0 +1,159 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+
4
+ import type { ContainerInput } from './container-protocol.js';
5
+
6
+ const MAX_IMAGE_BYTES = 10 * 1024 * 1024; // 10MB per image
7
+ const MAX_TOTAL_IMAGE_BYTES = 20 * 1024 * 1024; // 20MB total across all images
8
+ const IMAGE_MIME_TYPES = new Set(['image/jpeg', 'image/png', 'image/gif', 'image/webp']);
9
+
10
+ export interface OpenRouterInputTextPart {
11
+ type: 'input_text';
12
+ text: string;
13
+ }
14
+
15
+ export interface OpenRouterInputImagePart {
16
+ type: 'input_image';
17
+ detail: 'auto';
18
+ imageUrl: string;
19
+ }
20
+
21
+ export type OpenRouterUserContentPart = OpenRouterInputTextPart | OpenRouterInputImagePart;
22
+
23
+ export interface OpenRouterInputMessage {
24
+ role: 'user' | 'assistant';
25
+ content: string | OpenRouterUserContentPart[];
26
+ }
27
+
28
+ function inferImageMimeFromName(fileName?: string): string | null {
29
+ if (!fileName || typeof fileName !== 'string') return null;
30
+ const extension = path.extname(fileName).toLowerCase();
31
+ if (!extension) return null;
32
+ if (extension === '.jpg' || extension === '.jpeg') return 'image/jpeg';
33
+ if (extension === '.png') return 'image/png';
34
+ if (extension === '.gif') return 'image/gif';
35
+ if (extension === '.webp') return 'image/webp';
36
+ return null;
37
+ }
38
+
39
+ function normalizeMimeType(input: unknown, fallbackName?: string): string | null {
40
+ const fromInput = typeof input === 'string'
41
+ ? input.toLowerCase().split(';')[0].trim()
42
+ : '';
43
+ const candidate = fromInput || inferImageMimeFromName(fallbackName);
44
+ if (!candidate || !IMAGE_MIME_TYPES.has(candidate)) return null;
45
+ return candidate;
46
+ }
47
+
48
+ function jsonStringifySafe(value: unknown): string {
49
+ try {
50
+ const serialized = JSON.stringify(value);
51
+ return typeof serialized === 'string' ? serialized : String(value);
52
+ } catch {
53
+ return String(value);
54
+ }
55
+ }
56
+
57
+ export function coerceInputContentToText(content: unknown): string {
58
+ if (typeof content === 'string') return content;
59
+ if (content == null) return '';
60
+
61
+ const collectFromRecord = (record: Record<string, unknown>): string | null => {
62
+ if (typeof record.text === 'string' && record.text.trim()) return record.text;
63
+ if (typeof record.content === 'string' && record.content.trim()) return record.content;
64
+ if (typeof record.output === 'string' && record.output.trim()) return record.output;
65
+ if (typeof record.refusal === 'string' && record.refusal.trim()) return record.refusal;
66
+ return null;
67
+ };
68
+
69
+ if (Array.isArray(content)) {
70
+ const parts = content
71
+ .map((part) => {
72
+ if (!part || typeof part !== 'object') return null;
73
+ const record = part as Record<string, unknown>;
74
+ return collectFromRecord(record);
75
+ })
76
+ .filter((part): part is string => typeof part === 'string');
77
+ if (parts.length > 0) return parts.join('\n');
78
+ return jsonStringifySafe(content);
79
+ }
80
+
81
+ if (typeof content === 'object') {
82
+ const extracted = collectFromRecord(content as Record<string, unknown>);
83
+ if (extracted) return extracted;
84
+ return jsonStringifySafe(content);
85
+ }
86
+
87
+ return String(content);
88
+ }
89
+
90
+ export function messagesToOpenRouterInput(
91
+ messages: Array<{ role: 'user' | 'assistant'; content: unknown }>
92
+ ): OpenRouterInputMessage[] {
93
+ return messages.map(message => ({
94
+ role: message.role,
95
+ content: coerceInputContentToText(message.content)
96
+ }));
97
+ }
98
+
99
+ export function loadImageAttachmentsForInput(
100
+ attachments?: ContainerInput['attachments'],
101
+ options?: { log?: (message: string) => void }
102
+ ): OpenRouterInputImagePart[] {
103
+ if (!Array.isArray(attachments) || attachments.length === 0) return [];
104
+
105
+ const log = options?.log;
106
+ const images: OpenRouterInputImagePart[] = [];
107
+ let totalBytes = 0;
108
+
109
+ for (const attachment of attachments) {
110
+ if (!attachment || attachment.type !== 'photo' || typeof attachment.path !== 'string' || !attachment.path) {
111
+ continue;
112
+ }
113
+
114
+ const mime = normalizeMimeType(attachment.mime_type, attachment.file_name);
115
+ if (!mime) continue;
116
+
117
+ try {
118
+ const stat = fs.statSync(attachment.path);
119
+ if (stat.size > MAX_IMAGE_BYTES) {
120
+ log?.(`Skipping image ${attachment.path}: ${stat.size} bytes exceeds ${MAX_IMAGE_BYTES}`);
121
+ continue;
122
+ }
123
+ if (totalBytes + stat.size > MAX_TOTAL_IMAGE_BYTES) {
124
+ log?.(`Skipping image ${attachment.path}: cumulative size would exceed ${MAX_TOTAL_IMAGE_BYTES}`);
125
+ break;
126
+ }
127
+
128
+ const data = fs.readFileSync(attachment.path);
129
+ totalBytes += data.length;
130
+ images.push({
131
+ type: 'input_image',
132
+ detail: 'auto',
133
+ imageUrl: `data:${mime};base64,${data.toString('base64')}`
134
+ });
135
+ } catch (err) {
136
+ const detail = err instanceof Error ? err.message : String(err);
137
+ log?.(`Failed to load image ${attachment.path}: ${detail}`);
138
+ }
139
+ }
140
+
141
+ return images;
142
+ }
143
+
144
+ export function injectImagesIntoContextInput(
145
+ contextInput: OpenRouterInputMessage[],
146
+ imageParts: OpenRouterInputImagePart[]
147
+ ): void {
148
+ if (!Array.isArray(contextInput) || contextInput.length === 0 || imageParts.length === 0) return;
149
+ const lastMessage = contextInput[contextInput.length - 1];
150
+ if (!lastMessage || lastMessage.role !== 'user') return;
151
+
152
+ lastMessage.content = [
153
+ {
154
+ type: 'input_text',
155
+ text: coerceInputContentToText(lastMessage.content)
156
+ },
157
+ ...imageParts
158
+ ];
159
+ }
@@ -110,7 +110,9 @@ function buildScheduledSection(params: SystemPromptParams): string {
110
110
 
111
111
  function buildResponseGuidanceSection(): string {
112
112
  return [
113
- '- Always answer the user\'s question directly before reaching for tools.',
113
+ '- Answer directly when the request can be completed from conversation context without external state.',
114
+ '- When the request requires file/system/network actions or fresh state, execute tools first before finalizing.',
115
+ '- Never claim an action happened unless corresponding tool calls succeeded in this turn.',
114
116
  '- If the user asks about your previous actions (e.g., "did you use X tool?"), reflect on the conversation history — do not re-execute the task.',
115
117
  '- If the user asks a simple factual question, answer from your knowledge — do not call tools unless you need to verify or act.',
116
118
  '- When you have genuinely nothing to say, respond with ONLY: NO_REPLY (your entire message must be just this token, nothing else).'
@@ -128,6 +130,8 @@ function buildToolCallStyleSection(): string {
128
130
  function buildToolGuidanceSection(params: SystemPromptParams): string {
129
131
  const lines = [
130
132
  'Key tool rules:',
133
+ '- Never claim file/system/web actions succeeded unless tool calls in this turn confirm them.',
134
+ '- For large file edits, avoid one giant Write payload; split into smaller Write/Edit steps and verify with Read.',
131
135
  '- User attachments arrive in /workspace/group/inbox/ (see <attachment> tags). Process with Read/Bash/Python.',
132
136
  '- To send media from the web: download_url → send_photo/send_file/send_audio.',
133
137
  '- Charts/plots: matplotlib → savefig → send_photo. Graphviz → dot -Tpng → send_photo.',
@@ -285,8 +289,14 @@ export function buildSystemPrompt(params: SystemPromptParams): string {
285
289
  const toolReliability = trimLevel >= 2 ? '' : (
286
290
  params.toolReliability && params.toolReliability.length > 0
287
291
  ? params.toolReliability
288
- .sort((a, b) => a.success_rate - b.success_rate)
289
- .slice(0, 20)
292
+ .filter(t => t.count >= 5 && (t.success_rate < 0.98 || (Number.isFinite(t.avg_duration_ms) && (t.avg_duration_ms || 0) > 2500)))
293
+ .sort((a, b) => {
294
+ if (a.success_rate !== b.success_rate) return a.success_rate - b.success_rate;
295
+ const aDur = Number.isFinite(a.avg_duration_ms) ? (a.avg_duration_ms || 0) : 0;
296
+ const bDur = Number.isFinite(b.avg_duration_ms) ? (b.avg_duration_ms || 0) : 0;
297
+ return bDur - aDur;
298
+ })
299
+ .slice(0, 8)
290
300
  .map(t => {
291
301
  const pct = `${Math.round(t.success_rate * 100)}%`;
292
302
  const avg = Number.isFinite(t.avg_duration_ms) ? `${Math.round(t.avg_duration_ms!)}ms` : 'n/a';