@dotsetlabs/dotclaw 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/.env.example +9 -10
  2. package/README.md +8 -4
  3. package/config-examples/runtime.json +34 -8
  4. package/config-examples/tool-policy.json +12 -2
  5. package/container/agent-runner/package-lock.json +2 -2
  6. package/container/agent-runner/package.json +1 -1
  7. package/container/agent-runner/src/agent-config.ts +19 -3
  8. package/container/agent-runner/src/container-protocol.ts +11 -0
  9. package/container/agent-runner/src/context-overflow-recovery.ts +39 -0
  10. package/container/agent-runner/src/index.ts +603 -165
  11. package/container/agent-runner/src/openrouter-input.ts +159 -0
  12. package/container/agent-runner/src/system-prompt.ts +13 -3
  13. package/container/agent-runner/src/tool-loop-policy.ts +741 -0
  14. package/container/agent-runner/src/tools.ts +211 -8
  15. package/dist/agent-context.d.ts +1 -0
  16. package/dist/agent-context.d.ts.map +1 -1
  17. package/dist/agent-context.js +21 -9
  18. package/dist/agent-context.js.map +1 -1
  19. package/dist/agent-execution.d.ts +2 -0
  20. package/dist/agent-execution.d.ts.map +1 -1
  21. package/dist/agent-execution.js +164 -15
  22. package/dist/agent-execution.js.map +1 -1
  23. package/dist/agent-semaphore.d.ts +24 -1
  24. package/dist/agent-semaphore.d.ts.map +1 -1
  25. package/dist/agent-semaphore.js +109 -20
  26. package/dist/agent-semaphore.js.map +1 -1
  27. package/dist/cli.js +3 -11
  28. package/dist/cli.js.map +1 -1
  29. package/dist/config.d.ts +2 -0
  30. package/dist/config.d.ts.map +1 -1
  31. package/dist/config.js +2 -0
  32. package/dist/config.js.map +1 -1
  33. package/dist/container-protocol.d.ts +22 -0
  34. package/dist/container-protocol.d.ts.map +1 -1
  35. package/dist/container-protocol.js.map +1 -1
  36. package/dist/container-runner.d.ts +7 -0
  37. package/dist/container-runner.d.ts.map +1 -1
  38. package/dist/container-runner.js +417 -143
  39. package/dist/container-runner.js.map +1 -1
  40. package/dist/db.d.ts.map +1 -1
  41. package/dist/db.js +46 -12
  42. package/dist/db.js.map +1 -1
  43. package/dist/error-messages.d.ts.map +1 -1
  44. package/dist/error-messages.js +18 -4
  45. package/dist/error-messages.js.map +1 -1
  46. package/dist/failover-policy.d.ts +41 -0
  47. package/dist/failover-policy.d.ts.map +1 -0
  48. package/dist/failover-policy.js +261 -0
  49. package/dist/failover-policy.js.map +1 -0
  50. package/dist/index.js +1 -0
  51. package/dist/index.js.map +1 -1
  52. package/dist/ipc-dispatcher.d.ts.map +1 -1
  53. package/dist/ipc-dispatcher.js +27 -43
  54. package/dist/ipc-dispatcher.js.map +1 -1
  55. package/dist/mcp-config.d.ts +22 -0
  56. package/dist/mcp-config.d.ts.map +1 -0
  57. package/dist/mcp-config.js +94 -0
  58. package/dist/mcp-config.js.map +1 -0
  59. package/dist/memory-backend.d.ts +27 -0
  60. package/dist/memory-backend.d.ts.map +1 -0
  61. package/dist/memory-backend.js +112 -0
  62. package/dist/memory-backend.js.map +1 -0
  63. package/dist/memory-recall.d.ts.map +1 -1
  64. package/dist/memory-recall.js +135 -22
  65. package/dist/memory-recall.js.map +1 -1
  66. package/dist/memory-store.d.ts +1 -0
  67. package/dist/memory-store.d.ts.map +1 -1
  68. package/dist/memory-store.js +55 -7
  69. package/dist/memory-store.js.map +1 -1
  70. package/dist/message-pipeline.d.ts +24 -0
  71. package/dist/message-pipeline.d.ts.map +1 -1
  72. package/dist/message-pipeline.js +131 -27
  73. package/dist/message-pipeline.js.map +1 -1
  74. package/dist/metrics.d.ts +1 -0
  75. package/dist/metrics.d.ts.map +1 -1
  76. package/dist/metrics.js +9 -0
  77. package/dist/metrics.js.map +1 -1
  78. package/dist/providers/discord/discord-provider.d.ts.map +1 -1
  79. package/dist/providers/discord/discord-provider.js +72 -4
  80. package/dist/providers/discord/discord-provider.js.map +1 -1
  81. package/dist/providers/telegram/telegram-provider.d.ts.map +1 -1
  82. package/dist/providers/telegram/telegram-provider.js +65 -3
  83. package/dist/providers/telegram/telegram-provider.js.map +1 -1
  84. package/dist/recall-policy.d.ts +12 -0
  85. package/dist/recall-policy.d.ts.map +1 -0
  86. package/dist/recall-policy.js +89 -0
  87. package/dist/recall-policy.js.map +1 -0
  88. package/dist/runtime-config.d.ts +33 -0
  89. package/dist/runtime-config.d.ts.map +1 -1
  90. package/dist/runtime-config.js +109 -9
  91. package/dist/runtime-config.js.map +1 -1
  92. package/dist/streaming.d.ts.map +1 -1
  93. package/dist/streaming.js +125 -33
  94. package/dist/streaming.js.map +1 -1
  95. package/dist/task-scheduler.d.ts.map +1 -1
  96. package/dist/task-scheduler.js +4 -2
  97. package/dist/task-scheduler.js.map +1 -1
  98. package/dist/tool-policy.d.ts.map +1 -1
  99. package/dist/tool-policy.js +26 -4
  100. package/dist/tool-policy.js.map +1 -1
  101. package/dist/trace-writer.d.ts +12 -0
  102. package/dist/trace-writer.d.ts.map +1 -1
  103. package/dist/trace-writer.js.map +1 -1
  104. package/dist/turn-hygiene.d.ts +14 -0
  105. package/dist/turn-hygiene.d.ts.map +1 -0
  106. package/dist/turn-hygiene.js +214 -0
  107. package/dist/turn-hygiene.js.map +1 -0
  108. package/dist/webhook.d.ts.map +1 -1
  109. package/dist/webhook.js +1 -0
  110. package/dist/webhook.js.map +1 -1
  111. package/package.json +15 -1
  112. package/scripts/benchmark-baseline.js +365 -0
  113. package/scripts/benchmark-harness.js +1413 -0
  114. package/scripts/benchmark-scenarios.js +301 -0
  115. package/scripts/canary-suite.js +123 -0
  116. package/scripts/generate-controlled-traces.js +230 -0
  117. package/scripts/release-slo-check.js +214 -0
  118. package/scripts/run-live-canary.js +339 -0
@@ -0,0 +1,365 @@
1
+ #!/usr/bin/env node
2
+
3
+ import fs from 'node:fs';
4
+ import os from 'node:os';
5
+ import path from 'node:path';
6
+
7
+ function parseArgs(argv) {
8
+ const args = {
9
+ days: 7,
10
+ dir: '',
11
+ since: '',
12
+ until: '',
13
+ source: '',
14
+ excludeSource: '',
15
+ chatId: ''
16
+ };
17
+ for (let i = 0; i < argv.length; i += 1) {
18
+ const arg = argv[i];
19
+ if (arg === '--days' && i + 1 < argv.length) {
20
+ const value = Number(argv[i + 1]);
21
+ if (Number.isFinite(value) && value > 0) args.days = Math.floor(value);
22
+ i += 1;
23
+ continue;
24
+ }
25
+ if (arg === '--dir' && i + 1 < argv.length) {
26
+ args.dir = argv[i + 1];
27
+ i += 1;
28
+ continue;
29
+ }
30
+ if (arg === '--since' && i + 1 < argv.length) {
31
+ args.since = argv[i + 1];
32
+ i += 1;
33
+ continue;
34
+ }
35
+ if (arg === '--until' && i + 1 < argv.length) {
36
+ args.until = argv[i + 1];
37
+ i += 1;
38
+ continue;
39
+ }
40
+ if (arg === '--source' && i + 1 < argv.length) {
41
+ args.source = argv[i + 1];
42
+ i += 1;
43
+ continue;
44
+ }
45
+ if (arg === '--exclude-source' && i + 1 < argv.length) {
46
+ args.excludeSource = argv[i + 1];
47
+ i += 1;
48
+ continue;
49
+ }
50
+ if (arg === '--chat-id' && i + 1 < argv.length) {
51
+ args.chatId = argv[i + 1];
52
+ i += 1;
53
+ }
54
+ }
55
+ return args;
56
+ }
57
+
58
+ export function percentile(values, p) {
59
+ if (!values.length) return null;
60
+ const sorted = [...values].sort((a, b) => a - b);
61
+ const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil((p / 100) * sorted.length) - 1));
62
+ return sorted[idx];
63
+ }
64
+
65
+ function safeJsonParse(line) {
66
+ try {
67
+ return JSON.parse(line);
68
+ } catch {
69
+ return null;
70
+ }
71
+ }
72
+
73
+ function normalizeSource(value) {
74
+ const source = String(value || '').trim().toLowerCase();
75
+ return source || 'unknown';
76
+ }
77
+
78
+ function deriveSource(row) {
79
+ const source = normalizeSource(row?.source);
80
+ const inputText = String(row?.input_text || '');
81
+ // Historical canary rows could be mislabeled as "dotclaw". Reclassify by prompt marker
82
+ // so production-weighted SLOs are not skewed by benchmark traffic.
83
+ if ((source === 'dotclaw' || source === 'unknown') && /^\s*\[CANARY(?::|])/i.test(inputText)) {
84
+ return 'live-canary';
85
+ }
86
+ return source;
87
+ }
88
+
89
+ function parseCsvSet(value, options = {}) {
90
+ const lower = options.lower !== false;
91
+ const entries = String(value || '')
92
+ .split(',')
93
+ .map(item => {
94
+ const trimmed = item.trim();
95
+ return lower ? trimmed.toLowerCase() : trimmed;
96
+ })
97
+ .filter(Boolean);
98
+ return entries.length > 0 ? new Set(entries) : null;
99
+ }
100
+
101
+ function resolveTimestamp(value, fallback) {
102
+ const trimmed = String(value || '').trim();
103
+ if (!trimmed) return fallback;
104
+ const numeric = Number(trimmed);
105
+ if (Number.isFinite(numeric) && numeric > 0) return numeric;
106
+ const parsed = Date.parse(trimmed);
107
+ return Number.isFinite(parsed) ? parsed : fallback;
108
+ }
109
+
110
+ export function loadTraces(traceDir, filtersOrSinceMs) {
111
+ const legacySince = Number.isFinite(filtersOrSinceMs)
112
+ ? Number(filtersOrSinceMs)
113
+ : null;
114
+ const filters = (!legacySince && filtersOrSinceMs && typeof filtersOrSinceMs === 'object')
115
+ ? filtersOrSinceMs
116
+ : {};
117
+ const sinceMs = legacySince ?? Number(filters.sinceMs || 0);
118
+ const untilMs = Number.isFinite(filters.untilMs) ? Number(filters.untilMs) : Infinity;
119
+ const includeSources = filters.includeSources instanceof Set ? filters.includeSources : null;
120
+ const excludeSources = filters.excludeSources instanceof Set ? filters.excludeSources : null;
121
+ const includeChats = filters.includeChats instanceof Set ? filters.includeChats : null;
122
+
123
+ if (!fs.existsSync(traceDir)) return [];
124
+ const files = fs.readdirSync(traceDir)
125
+ .filter(name => /^trace-\d{4}-\d{2}-\d{2}\.jsonl$/.test(name))
126
+ .sort();
127
+
128
+ const rows = [];
129
+ for (const fileName of files) {
130
+ const filePath = path.join(traceDir, fileName);
131
+ let content = '';
132
+ try {
133
+ content = fs.readFileSync(filePath, 'utf-8');
134
+ } catch {
135
+ continue;
136
+ }
137
+ for (const line of content.split('\n')) {
138
+ if (!line.trim()) continue;
139
+ const parsed = safeJsonParse(line);
140
+ if (!parsed || typeof parsed !== 'object') continue;
141
+ const ts = Date.parse(String(parsed.timestamp || ''));
142
+ if (!Number.isFinite(ts) || ts < sinceMs || ts > untilMs) continue;
143
+ const source = deriveSource(parsed);
144
+ if (includeSources && !includeSources.has(source)) continue;
145
+ if (excludeSources && excludeSources.has(source)) continue;
146
+ if (includeChats) {
147
+ const chatId = String(parsed.chat_id || '').trim();
148
+ if (!includeChats.has(chatId)) continue;
149
+ }
150
+ rows.push({
151
+ ...parsed,
152
+ source
153
+ });
154
+ }
155
+ }
156
+ return rows;
157
+ }
158
+
159
+ function topEntries(map, limit = 10) {
160
+ return Array.from(map.entries())
161
+ .sort((a, b) => b[1] - a[1])
162
+ .slice(0, limit)
163
+ .map(([key, count]) => ({ key, count }));
164
+ }
165
+
166
+ export function buildReport(records, traceDir, days) {
167
+ const total = records.length;
168
+ const errorRecords = records.filter(r => typeof r.error_code === 'string' && r.error_code.trim());
169
+ const successRecords = total - errorRecords.length;
170
+ const emptySuccess = records.filter(r => {
171
+ if (typeof r.error_code === 'string' && r.error_code.trim()) return false;
172
+ const text = typeof r.output_text === 'string' ? r.output_text : '';
173
+ return !text.trim();
174
+ }).length;
175
+
176
+ const latencyMs = records
177
+ .map(r => Number(r.latency_ms))
178
+ .filter(v => Number.isFinite(v) && v >= 0);
179
+
180
+ const promptTokens = records
181
+ .map(r => Number(r.tokens_prompt))
182
+ .filter(v => Number.isFinite(v) && v >= 0);
183
+ const completionTokens = records
184
+ .map(r => Number(r.tokens_completion))
185
+ .filter(v => Number.isFinite(v) && v >= 0);
186
+
187
+ const toolCalls = records.flatMap(r => Array.isArray(r.tool_calls) ? r.tool_calls : []);
188
+ const toolFailures = toolCalls.filter(call => !call?.ok);
189
+ const failoverAttempts = records
190
+ .map(r => Number(r.host_failover_attempts))
191
+ .filter(v => Number.isFinite(v) && v > 1).length;
192
+ const failoverRecovered = records
193
+ .filter(r => r.host_failover_recovered === true).length;
194
+
195
+ const errorCounts = new Map();
196
+ for (const row of errorRecords) {
197
+ const key = String(row.error_code || 'unknown');
198
+ errorCounts.set(key, (errorCounts.get(key) || 0) + 1);
199
+ }
200
+
201
+ const modelCounts = new Map();
202
+ for (const row of records) {
203
+ const modelId = String(row.model_id || 'unknown');
204
+ modelCounts.set(modelId, (modelCounts.get(modelId) || 0) + 1);
205
+ }
206
+
207
+ const sourceStats = new Map();
208
+ for (const row of records) {
209
+ const source = normalizeSource(row.source);
210
+ if (!sourceStats.has(source)) {
211
+ sourceStats.set(source, {
212
+ records: 0,
213
+ errors: 0,
214
+ emptySuccess: 0,
215
+ toolTotal: 0,
216
+ toolFailed: 0,
217
+ latencies: [],
218
+ promptTokens: 0,
219
+ completionTokens: 0
220
+ });
221
+ }
222
+ const bucket = sourceStats.get(source);
223
+ bucket.records += 1;
224
+ const hasError = typeof row.error_code === 'string' && row.error_code.trim();
225
+ if (hasError) {
226
+ bucket.errors += 1;
227
+ } else {
228
+ const outputText = typeof row.output_text === 'string' ? row.output_text : '';
229
+ if (!outputText.trim()) bucket.emptySuccess += 1;
230
+ }
231
+ const tools = Array.isArray(row.tool_calls) ? row.tool_calls : [];
232
+ bucket.toolTotal += tools.length;
233
+ bucket.toolFailed += tools.filter(call => !call?.ok).length;
234
+ const latency = Number(row.latency_ms);
235
+ if (Number.isFinite(latency) && latency >= 0) {
236
+ bucket.latencies.push(latency);
237
+ }
238
+ const prompt = Number(row.tokens_prompt);
239
+ if (Number.isFinite(prompt) && prompt >= 0) {
240
+ bucket.promptTokens += prompt;
241
+ }
242
+ const completion = Number(row.tokens_completion);
243
+ if (Number.isFinite(completion) && completion >= 0) {
244
+ bucket.completionTokens += completion;
245
+ }
246
+ }
247
+
248
+ const recordsBySource = Array.from(sourceStats.entries())
249
+ .map(([source, bucket]) => {
250
+ const success = Math.max(0, bucket.records - bucket.errors);
251
+ const totalTokens = bucket.promptTokens + bucket.completionTokens;
252
+ return {
253
+ source,
254
+ records: bucket.records,
255
+ success,
256
+ errors: bucket.errors,
257
+ success_rate: bucket.records > 0 ? Number((success / bucket.records).toFixed(4)) : null,
258
+ empty_success: bucket.emptySuccess,
259
+ empty_success_rate: success > 0 ? Number((bucket.emptySuccess / success).toFixed(4)) : null,
260
+ tool_calls_total: bucket.toolTotal,
261
+ tool_calls_failed: bucket.toolFailed,
262
+ tool_success_rate: bucket.toolTotal > 0
263
+ ? Number(((bucket.toolTotal - bucket.toolFailed) / bucket.toolTotal).toFixed(4))
264
+ : null,
265
+ token_usage: {
266
+ prompt_total: bucket.promptTokens,
267
+ completion_total: bucket.completionTokens,
268
+ total: totalTokens,
269
+ prompt_per_success: success > 0 ? Number((bucket.promptTokens / success).toFixed(2)) : null,
270
+ completion_per_success: success > 0 ? Number((bucket.completionTokens / success).toFixed(2)) : null,
271
+ total_per_success: success > 0 ? Number((totalTokens / success).toFixed(2)) : null
272
+ },
273
+ latency_ms: {
274
+ p50: percentile(bucket.latencies, 50),
275
+ p95: percentile(bucket.latencies, 95),
276
+ p99: percentile(bucket.latencies, 99)
277
+ }
278
+ };
279
+ })
280
+ .sort((a, b) => b.records - a.records);
281
+
282
+ return {
283
+ window_days: days,
284
+ trace_dir: traceDir,
285
+ records_total: total,
286
+ records_success: successRecords,
287
+ records_error: errorRecords.length,
288
+ success_rate: total > 0 ? Number((successRecords / total).toFixed(4)) : null,
289
+ empty_success_responses: emptySuccess,
290
+ latency_ms: {
291
+ p50: percentile(latencyMs, 50),
292
+ p90: percentile(latencyMs, 90),
293
+ p95: percentile(latencyMs, 95),
294
+ p99: percentile(latencyMs, 99),
295
+ },
296
+ token_usage: {
297
+ prompt_total: promptTokens.reduce((a, b) => a + b, 0),
298
+ completion_total: completionTokens.reduce((a, b) => a + b, 0),
299
+ prompt_p50: percentile(promptTokens, 50),
300
+ completion_p50: percentile(completionTokens, 50),
301
+ },
302
+ tool_calls: {
303
+ total: toolCalls.length,
304
+ failed: toolFailures.length,
305
+ success_rate: toolCalls.length > 0
306
+ ? Number(((toolCalls.length - toolFailures.length) / toolCalls.length).toFixed(4))
307
+ : null,
308
+ },
309
+ host_failover: {
310
+ attempted_runs: failoverAttempts,
311
+ recovered_runs: failoverRecovered,
312
+ recovery_rate: failoverAttempts > 0
313
+ ? Number((failoverRecovered / failoverAttempts).toFixed(4))
314
+ : null
315
+ },
316
+ records_by_source: recordsBySource,
317
+ top_models: topEntries(modelCounts, 8),
318
+ top_errors: topEntries(errorCounts, 12),
319
+ };
320
+ }
321
+
322
+ function main() {
323
+ const args = parseArgs(process.argv.slice(2));
324
+ const dotclawHome = process.env.DOTCLAW_HOME || path.join(os.homedir(), '.dotclaw');
325
+ const traceDir = args.dir || path.join(dotclawHome, 'traces');
326
+ const sinceMs = resolveTimestamp(args.since, Date.now() - (args.days * 24 * 60 * 60 * 1000));
327
+ const untilMs = resolveTimestamp(args.until, Infinity);
328
+ const includeSources = parseCsvSet(args.source);
329
+ const excludeSources = parseCsvSet(args.excludeSource);
330
+ const includeChats = parseCsvSet(args.chatId, { lower: false });
331
+ const records = loadTraces(traceDir, {
332
+ sinceMs,
333
+ untilMs,
334
+ includeSources,
335
+ excludeSources,
336
+ includeChats
337
+ });
338
+ const report = buildReport(records, traceDir, args.days);
339
+ report.window = {
340
+ since: Number.isFinite(sinceMs) ? new Date(sinceMs).toISOString() : null,
341
+ until: Number.isFinite(untilMs) ? new Date(untilMs).toISOString() : null
342
+ };
343
+ if (includeSources) {
344
+ report.filters = {
345
+ source: Array.from(includeSources.values())
346
+ };
347
+ }
348
+ if (excludeSources) {
349
+ report.filters = {
350
+ ...(report.filters || {}),
351
+ exclude_source: Array.from(excludeSources.values())
352
+ };
353
+ }
354
+ if (includeChats) {
355
+ report.filters = {
356
+ ...(report.filters || {}),
357
+ chat_id: Array.from(includeChats.values())
358
+ };
359
+ }
360
+ console.log(JSON.stringify(report, null, 2));
361
+ }
362
+
363
+ if (import.meta.url === `file://${process.argv[1]}`) {
364
+ main();
365
+ }