bosun 0.36.2 → 0.36.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,926 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Agent Work Analytics CLI
4
+ *
5
+ * Offline analysis of agent work logs for:
6
+ * - Backlog task analysis
7
+ * - Task planning insights
8
+ * - Executor performance comparison
9
+ * - Error clustering
10
+ * - Success metrics
11
+ *
12
+ * Usage:
13
+ * node analyze-agent-work.mjs --backlog-tasks 10
14
+ * node analyze-agent-work.mjs --error-clustering --days 7
15
+ * node analyze-agent-work.mjs --error-correlation --days 30 --top 5
16
+ * node analyze-agent-work.mjs --executor-comparison CODEX COPILOT
17
+ * node analyze-agent-work.mjs --task-planning --failed-only
18
+ * node analyze-agent-work.mjs --weekly-report
19
+ */
20
+
21
+ import { readFile, readdir } from "fs/promises";
22
+ import { createReadStream, existsSync } from "fs";
23
+ import { createInterface } from "readline";
24
+ import { resolve, dirname } from "path";
25
+ import { fileURLToPath } from "url";
26
+ import {
27
+ buildErrorClusters,
28
+ buildErrorCorrelationJsonPayload,
29
+ buildErrorCorrelationSummary,
30
+ normalizeErrorFingerprint,
31
+ normalizeTimestamp,
32
+ } from "./analyze-agent-work-helpers.mjs";
33
+
34
+ export {
35
+ buildErrorClusters,
36
+ buildErrorCorrelationJsonPayload,
37
+ buildErrorCorrelationSummary,
38
+ filterRecordsByWindow,
39
+ normalizeErrorFingerprint,
40
+ normalizeTimestamp,
41
+ } from "./analyze-agent-work-helpers.mjs";
42
+
43
+ const __filename = fileURLToPath(import.meta.url);
44
+ const __dirname = dirname(__filename);
45
+ const repoRoot = resolve(__dirname, "../..");
46
+
47
+ // ── Log Paths ───────────────────────────────────────────────────────────────
48
+ const LOG_DIR = resolve(repoRoot, ".cache/agent-work-logs");
49
+ const STREAM_LOG = resolve(LOG_DIR, "agent-work-stream.jsonl");
50
+ const ERRORS_LOG = resolve(LOG_DIR, "agent-errors.jsonl");
51
+ const METRICS_LOG = resolve(LOG_DIR, "agent-metrics.jsonl");
52
+ const SESSIONS_DIR = resolve(LOG_DIR, "agent-sessions");
53
+
54
+ // ── Data Loaders ────────────────────────────────────────────────────────────
55
+
56
+ /**
57
+ * Load all events from stream log
58
+ */
59
+ async function loadEvents(options = {}) {
60
+ const events = [];
61
+
62
+ if (!existsSync(STREAM_LOG)) {
63
+ return events;
64
+ }
65
+
66
+ const stream = createReadStream(STREAM_LOG, { encoding: "utf8" });
67
+ const rl = createInterface({ input: stream });
68
+
69
+ for await (const line of rl) {
70
+ try {
71
+ const event = JSON.parse(line);
72
+
73
+ // Filter by date if specified
74
+ if (options.days) {
75
+ const cutoff = Date.now() - options.days * 24 * 60 * 60 * 1000;
76
+ if (new Date(event.timestamp).getTime() < cutoff) continue;
77
+ }
78
+
79
+ events.push(event);
80
+ } catch (err) {
81
+ // Skip malformed lines
82
+ }
83
+ }
84
+
85
+ return events;
86
+ }
87
+
88
+ /**
89
+ * Load all session metrics
90
+ */
91
+ async function loadMetrics(options = {}) {
92
+ const metrics = [];
93
+
94
+ if (!existsSync(METRICS_LOG)) {
95
+ return metrics;
96
+ }
97
+
98
+ const stream = createReadStream(METRICS_LOG, { encoding: "utf8" });
99
+ const rl = createInterface({ input: stream });
100
+
101
+ for await (const line of rl) {
102
+ try {
103
+ const metric = JSON.parse(line);
104
+
105
+ // Filter by date
106
+ if (options.days) {
107
+ const cutoff = Date.now() - options.days * 24 * 60 * 60 * 1000;
108
+ if (new Date(metric.timestamp).getTime() < cutoff) continue;
109
+ }
110
+
111
+ metrics.push(metric);
112
+ } catch (err) {
113
+ // Skip malformed lines
114
+ }
115
+ }
116
+
117
+ return metrics;
118
+ }
119
+
120
+ function buildDerivedMetrics(events, options = {}) {
121
+ const cutoff =
122
+ options.days != null
123
+ ? Date.now() - options.days * 24 * 60 * 60 * 1000
124
+ : null;
125
+ const sessions = new Map();
126
+
127
+ for (const event of events) {
128
+ const tsValue = normalizeTimestamp(event.timestamp);
129
+ if (cutoff && tsValue && tsValue < cutoff) continue;
130
+
131
+ const attemptId = event.attempt_id;
132
+ if (!attemptId) continue;
133
+ if (!sessions.has(attemptId)) {
134
+ sessions.set(attemptId, {
135
+ attempt_id: attemptId,
136
+ task_id: event.task_id || "",
137
+ task_title: event.task_title || "",
138
+ task_description: event.task_description || "",
139
+ executor: event.executor || "unknown",
140
+ model: event.model || event.data?.model || "unknown",
141
+ started_at: null,
142
+ ended_at: null,
143
+ duration_ms: null,
144
+ tool_calls: 0,
145
+ tool_results: 0,
146
+ errors: 0,
147
+ error_fingerprints: new Set(),
148
+ error_categories: new Set(),
149
+ status: null,
150
+ });
151
+ }
152
+ const session = sessions.get(attemptId);
153
+ if (event.task_id && !session.task_id) session.task_id = event.task_id;
154
+ if (event.task_title && !session.task_title) session.task_title = event.task_title;
155
+ if (event.task_description && !session.task_description)
156
+ session.task_description = event.task_description;
157
+ if (event.executor && session.executor === "unknown")
158
+ session.executor = event.executor;
159
+ if ((event.model || event.data?.model) && session.model === "unknown") {
160
+ session.model = event.model || event.data?.model;
161
+ }
162
+
163
+ if (event.event_type === "session_start") {
164
+ session.started_at = session.started_at || tsValue;
165
+ }
166
+ if (event.event_type === "session_end") {
167
+ session.ended_at = tsValue || session.ended_at;
168
+ session.status = event.data?.completion_status || session.status;
169
+ if (event.data?.duration_ms) {
170
+ session.duration_ms = event.data.duration_ms;
171
+ }
172
+ }
173
+ if (event.event_type === "tool_call") session.tool_calls += 1;
174
+ if (event.event_type === "tool_result") session.tool_results += 1;
175
+ if (event.event_type === "error") {
176
+ session.errors += 1;
177
+ const fingerprint =
178
+ event.data?.error_fingerprint ||
179
+ normalizeErrorFingerprint(event.data?.error_message);
180
+ session.error_fingerprints.add(fingerprint);
181
+ if (event.data?.error_category) {
182
+ session.error_categories.add(event.data.error_category);
183
+ }
184
+ }
185
+ }
186
+
187
+ const derived = [];
188
+ for (const session of sessions.values()) {
189
+ if (!session.duration_ms && session.started_at && session.ended_at) {
190
+ session.duration_ms = Math.max(0, session.ended_at - session.started_at);
191
+ }
192
+ derived.push({
193
+ timestamp: new Date().toISOString(),
194
+ attempt_id: session.attempt_id,
195
+ task_id: session.task_id,
196
+ task_title: session.task_title,
197
+ task_description: session.task_description,
198
+ executor: session.executor,
199
+ model: session.model,
200
+ metrics: {
201
+ duration_ms: session.duration_ms || 0,
202
+ tool_calls: session.tool_calls,
203
+ errors: session.errors,
204
+ },
205
+ outcome: {
206
+ status:
207
+ session.status ||
208
+ (session.errors > 0 ? "failed" : "completed"),
209
+ },
210
+ error_summary: {
211
+ total_errors: session.errors,
212
+ error_categories: [...session.error_categories],
213
+ error_fingerprints: [...session.error_fingerprints],
214
+ },
215
+ });
216
+ }
217
+
218
+ return derived;
219
+ }
220
+
221
+ async function loadMetricsWithFallback(options = {}) {
222
+ const metrics = await loadMetrics(options);
223
+ if (metrics.length > 0) return metrics;
224
+ const events = await loadEvents(options);
225
+ if (events.length === 0) return [];
226
+ return buildDerivedMetrics(events, options);
227
+ }
228
+
229
+ /**
230
+ * Load errors from error log
231
+ */
232
+ async function loadErrors(options = {}) {
233
+ const errors = [];
234
+
235
+ if (!existsSync(ERRORS_LOG)) {
236
+ return errors;
237
+ }
238
+
239
+ const stream = createReadStream(ERRORS_LOG, { encoding: "utf8" });
240
+ const rl = createInterface({ input: stream });
241
+
242
+ for await (const line of rl) {
243
+ try {
244
+ const error = JSON.parse(line);
245
+
246
+ // Filter by date
247
+ if (options.days) {
248
+ const cutoff = Date.now() - options.days * 24 * 60 * 60 * 1000;
249
+ if (new Date(error.timestamp).getTime() < cutoff) continue;
250
+ }
251
+
252
+ errors.push(error);
253
+ } catch (err) {
254
+ // Skip malformed lines
255
+ }
256
+ }
257
+
258
+ return errors;
259
+ }
260
+
261
+ /**
262
+ * Load events for a specific session
263
+ */
264
+ async function loadSessionEvents(attemptId) {
265
+ const events = [];
266
+ const sessionLog = resolve(SESSIONS_DIR, `${attemptId}.jsonl`);
267
+
268
+ if (!existsSync(sessionLog)) {
269
+ return events;
270
+ }
271
+
272
+ const stream = createReadStream(sessionLog, { encoding: "utf8" });
273
+ const rl = createInterface({ input: stream });
274
+
275
+ for await (const line of rl) {
276
+ try {
277
+ events.push(JSON.parse(line));
278
+ } catch (err) {
279
+ // Skip malformed lines
280
+ }
281
+ }
282
+
283
+ return events;
284
+ }
285
+
286
+ // ── Utility Functions ───────────────────────────────────────────────────────
287
+
288
+ function groupBy(array, keyFn) {
289
+ const groups = {};
290
+ for (const item of array) {
291
+ const key = typeof keyFn === "function" ? keyFn(item) : item[keyFn];
292
+ if (!groups[key]) groups[key] = [];
293
+ groups[key].push(item);
294
+ }
295
+ return groups;
296
+ }
297
+
298
+ function average(numbers) {
299
+ if (numbers.length === 0) return 0;
300
+ return numbers.reduce((a, b) => a + b, 0) / numbers.length;
301
+ }
302
+
303
+ function sum(numbers) {
304
+ return numbers.reduce((a, b) => a + b, 0);
305
+ }
306
+
307
+ function percentage(array, predicate) {
308
+ if (array.length === 0) return 0;
309
+ const count = array.filter(predicate).length;
310
+ return (count * 100.0) / array.length;
311
+ }
312
+
313
+ function countFrequency(array) {
314
+ const freq = {};
315
+ for (const item of array) {
316
+ freq[item] = (freq[item] || 0) + 1;
317
+ }
318
+ return freq;
319
+ }
320
+
321
+ function buildDistribution(counts, total) {
322
+ return Object.entries(counts)
323
+ .map(([label, count]) => ({
324
+ label,
325
+ count,
326
+ percent: total > 0 ? (count * 100.0) / total : 0,
327
+ }))
328
+ .sort((a, b) => b.count - a.count || a.label.localeCompare(b.label));
329
+ }
330
+
331
+ function formatDistribution(counts, total, limit = 5) {
332
+ const entries = buildDistribution(counts, total).slice(0, limit);
333
+ if (entries.length === 0) return "none";
334
+ return entries
335
+ .map(
336
+ (entry) =>
337
+ `${entry.label} ${entry.count} (${entry.percent.toFixed(1)}%)`,
338
+ )
339
+ .join(", ");
340
+ }
341
+
342
+ function topN(obj, n) {
343
+ return Object.entries(obj)
344
+ .sort((a, b) => b[1] - a[1])
345
+ .slice(0, n);
346
+ }
347
+
348
+ // ── Analysis Commands ───────────────────────────────────────────────────────
349
+
350
+ /**
351
+ * Analyze backlog tasks
352
+ */
353
+ async function analyzeBacklog(options) {
354
+ console.log("\n=== Backlog Task Analysis ===\n");
355
+
356
+ const metrics = await loadMetricsWithFallback({ days: options.days || 30 });
357
+
358
+ if (metrics.length === 0) {
359
+ console.log("No metrics data found");
360
+ return;
361
+ }
362
+
363
+ // Group by task
364
+ const byTask = groupBy(metrics, "task_id");
365
+
366
+ // Sort by most attempts first
367
+ const taskSummaries = Object.entries(byTask)
368
+ .map(([taskId, sessions]) => {
369
+ const firstSession = sessions[0];
370
+ const completed = sessions.some((s) => s.outcome?.status === "completed");
371
+ const firstShotSuccess =
372
+ sessions.length === 1 && sessions[0].outcome?.status === "completed";
373
+
374
+ return {
375
+ task_id: taskId,
376
+ task_title: firstSession.task_title || firstSession.task_id || "",
377
+ attempts: sessions.length,
378
+ success: completed,
379
+ first_shot_success: firstShotSuccess,
380
+ total_duration_ms: sum(sessions.map((s) => s.metrics?.duration_ms || 0)),
381
+ total_cost: sum(sessions.map((s) => s.metrics?.cost_usd || 0)),
382
+ total_errors: sum(sessions.map((s) => s.error_summary?.total_errors || 0)),
383
+ executors: [...new Set(sessions.map((s) => s.executor))],
384
+ error_fingerprints: [
385
+ ...new Set(
386
+ sessions.flatMap((s) => s.error_summary?.error_fingerprints || []),
387
+ ),
388
+ ],
389
+ };
390
+ })
391
+ .sort((a, b) => b.attempts - a.attempts);
392
+
393
+ // Show top N tasks
394
+ const limit = options.limit || 10;
395
+ const topTasks = taskSummaries.slice(0, limit);
396
+
397
+ for (const task of topTasks) {
398
+ console.log(`\nTask: ${task.task_id}`);
399
+ console.log(` Attempts: ${task.attempts}`);
400
+ console.log(` Success: ${task.success ? "✓" : "✗"}`);
401
+ console.log(
402
+ ` First-shot: ${task.first_shot_success ? "✓" : "✗"}`,
403
+ );
404
+ console.log(
405
+ ` Duration: ${Math.round(task.total_duration_ms / 1000)}s`,
406
+ );
407
+ console.log(` Cost: $${task.total_cost.toFixed(3)}`);
408
+ console.log(` Errors: ${task.total_errors}`);
409
+ console.log(` Executors: ${task.executors.join(", ")}`);
410
+
411
+ if (task.error_fingerprints.length > 0) {
412
+ console.log(` Common errors: ${task.error_fingerprints.slice(0, 3).join(", ")}`);
413
+ }
414
+ }
415
+
416
+ // Summary stats
417
+ console.log("\n=== Summary ===");
418
+ console.log(`Total unique tasks: ${taskSummaries.length}`);
419
+ console.log(
420
+ `Success rate: ${percentage(taskSummaries, (t) => t.success).toFixed(1)}%`,
421
+ );
422
+ console.log(
423
+ `First-shot rate: ${percentage(taskSummaries, (t) => t.first_shot_success).toFixed(1)}%`,
424
+ );
425
+ console.log(
426
+ `Avg attempts per task: ${average(taskSummaries.map((t) => t.attempts)).toFixed(1)}`,
427
+ );
428
+ }
429
+
430
+ /**
431
+ * Cluster errors by fingerprint
432
+ */
433
+ async function clusterErrors(options) {
434
+ console.log("\n=== Error Clustering Analysis ===\n");
435
+
436
+ const errors = await loadErrors({ days: options.days || 7 });
437
+
438
+ if (errors.length === 0) {
439
+ console.log("No error data found");
440
+ return;
441
+ }
442
+
443
+ const clusters = buildErrorClusters(errors);
444
+
445
+ // Show top N clusters
446
+ const topN = options.top || 10;
447
+ const topClusters = clusters.slice(0, topN);
448
+
449
+ for (const cluster of topClusters) {
450
+ console.log(`\n${cluster.fingerprint}`);
451
+ console.log(` Occurrences: ${cluster.count}`);
452
+ console.log(` Affected tasks: ${cluster.affected_tasks}`);
453
+ console.log(` Affected attempts: ${cluster.affected_attempts}`);
454
+ console.log(` Categories: ${cluster.categories.join(", ") || "unknown"}`);
455
+ console.log(
456
+ ` Sample: ${cluster.sample_message.slice(0, 100)}${cluster.sample_message.length > 100 ? "..." : ""}`,
457
+ );
458
+ console.log(
459
+ ` First seen: ${new Date(cluster.first_seen).toISOString()}`,
460
+ );
461
+ console.log(
462
+ ` Last seen: ${new Date(cluster.last_seen).toISOString()}`,
463
+ );
464
+ }
465
+
466
+ console.log(`\n\nTotal unique error types: ${clusters.length}`);
467
+ console.log(`Total error events: ${errors.length}`);
468
+ }
469
+
470
+ /**
471
+ * Correlate error fingerprints with task characteristics
472
+ */
473
+ async function correlateErrors(options) {
474
+ const windowDays = options.days || 7;
475
+ const topLimit = options.top || 10;
476
+ const useJson = options.json === true;
477
+
478
+ const errors = await loadErrors({ days: windowDays });
479
+
480
+ if (errors.length === 0) {
481
+ const message = "No error data found";
482
+ if (useJson) {
483
+ console.log(
484
+ JSON.stringify(
485
+ {
486
+ message,
487
+ window_days: windowDays,
488
+ total_errors: 0,
489
+ total_fingerprints: 0,
490
+ correlations: [],
491
+ },
492
+ null,
493
+ 2,
494
+ ),
495
+ );
496
+ } else {
497
+ console.log(message);
498
+ }
499
+ return;
500
+ }
501
+
502
+ const metrics = await loadMetricsWithFallback({ days: windowDays });
503
+ const summary = buildErrorCorrelationSummary({
504
+ errors,
505
+ metrics,
506
+ windowDays,
507
+ top: topLimit,
508
+ });
509
+
510
+ if (useJson) {
511
+ const payload = buildErrorCorrelationJsonPayload(summary);
512
+ console.log(JSON.stringify(payload, null, 2));
513
+ return;
514
+ }
515
+
516
+ console.log("\n=== Error Correlation Report ===\n");
517
+
518
+ for (const entry of summary.correlations) {
519
+ console.log(`\n${entry.fingerprint}`);
520
+ console.log(` Occurrences: ${entry.count}`);
521
+ console.log(` Affected tasks: ${entry.task_ids.size}`);
522
+ console.log(
523
+ ` Executors: ${formatDistribution(entry.by_executor, entry.count)}`,
524
+ );
525
+ console.log(` Sizes: ${formatDistribution(entry.by_size, entry.count)}`);
526
+ console.log(
527
+ ` Complexity: ${formatDistribution(entry.by_complexity, entry.count)}`,
528
+ );
529
+ if (entry.sample_message) {
530
+ console.log(
531
+ ` Sample: ${entry.sample_message.slice(0, 100)}${entry.sample_message.length > 100 ? "..." : ""}`,
532
+ );
533
+ }
534
+ }
535
+
536
+ console.log(`\nTotal unique error types: ${summary.total_fingerprints}`);
537
+ console.log(`Total error events: ${summary.total_errors}`);
538
+ }
539
+
540
+ /**
541
+ * Compare executor performance
542
+ */
543
+ async function compareExecutors(executors) {
544
+ console.log("\n=== Executor Performance Comparison ===\n");
545
+
546
+ const metrics = await loadMetricsWithFallback({ days: 30 });
547
+
548
+ if (metrics.length === 0) {
549
+ console.log("No metrics data found");
550
+ return;
551
+ }
552
+
553
+ const comparison = {};
554
+
555
+ for (const executor of executors) {
556
+ const executorSessions = metrics.filter((m) => m.executor === executor);
557
+
558
+ if (executorSessions.length === 0) {
559
+ console.log(`No data for executor: ${executor}`);
560
+ continue;
561
+ }
562
+
563
+ comparison[executor] = {
564
+ total_sessions: executorSessions.length,
565
+ success_rate: percentage(
566
+ executorSessions,
567
+ (s) => s.outcome?.status === "completed",
568
+ ),
569
+ first_shot_rate: percentage(
570
+ executorSessions,
571
+ (s) => s.metrics?.first_shot_success === true,
572
+ ),
573
+ avg_duration_s: average(
574
+ executorSessions.map((s) => (s.metrics?.duration_ms || 0) / 1000),
575
+ ),
576
+ avg_cost_usd: average(
577
+ executorSessions.map((s) => s.metrics?.cost_usd || 0),
578
+ ),
579
+ avg_tokens: average(
580
+ executorSessions.map((s) => s.metrics?.total_tokens || 0),
581
+ ),
582
+ avg_errors: average(
583
+ executorSessions.map((s) => s.error_summary?.total_errors || 0),
584
+ ),
585
+ total_cost_usd: sum(executorSessions.map((s) => s.metrics?.cost_usd || 0)),
586
+ };
587
+ }
588
+
589
+ // Display as table
590
+ console.log(
591
+ "┌────────────┬──────────┬───────────┬──────────────┬──────────┬──────────┬────────────┐",
592
+ );
593
+ console.log(
594
+ "│ Executor │ Sessions │ Success % │ First-shot % │ Avg Time │ Avg Cost │ Total Cost │",
595
+ );
596
+ console.log(
597
+ "├────────────┼──────────┼───────────┼──────────────┼──────────┼──────────┼────────────┤",
598
+ );
599
+
600
+ for (const [executor, stats] of Object.entries(comparison)) {
601
+ console.log(
602
+ `│ ${executor.padEnd(10)} │ ${String(stats.total_sessions).padStart(8)} │ ${stats.success_rate.toFixed(1).padStart(8)}% │ ${stats.first_shot_rate.toFixed(1).padStart(11)}% │ ${stats.avg_duration_s.toFixed(1).padStart(7)}s │ ${stats.avg_cost_usd.toFixed(3).padStart(8)} │ ${stats.total_cost_usd.toFixed(2).padStart(10)} │`,
603
+ );
604
+ }
605
+
606
+ console.log(
607
+ "└────────────┴──────────┴───────────┴──────────────┴──────────┴──────────┴────────────┘",
608
+ );
609
+
610
+ // Additional stats
611
+ console.log("\nDetailed Stats:");
612
+ for (const [executor, stats] of Object.entries(comparison)) {
613
+ console.log(`\n${executor}:`);
614
+ console.log(` Avg tokens: ${Math.round(stats.avg_tokens)}`);
615
+ console.log(` Avg errors: ${stats.avg_errors.toFixed(1)}`);
616
+ }
617
+ }
618
+
619
+ /**
620
+ * Analyze task planning effectiveness
621
+ */
622
+ async function analyzePlanning(options) {
623
+ console.log("\n=== Task Planning Analysis ===\n");
624
+
625
+ const metrics = await loadMetricsWithFallback({ days: 30 });
626
+
627
+ if (metrics.length === 0) {
628
+ console.log("No metrics data found");
629
+ return;
630
+ }
631
+
632
+ // Group by task
633
+ const byTask = groupBy(metrics, "task_id");
634
+
635
+ // Filter to failed/problematic tasks
636
+ const problematicTasks = Object.entries(byTask)
637
+ .map(([taskId, sessions]) => {
638
+ const completed = sessions.some((s) => s.outcome?.status === "completed");
639
+ const multipleAttempts = sessions.length > 1;
640
+
641
+ return {
642
+ task_id: taskId,
643
+ sessions,
644
+ completed,
645
+ multipleAttempts,
646
+ };
647
+ })
648
+ .filter((t) => (options.failedOnly ? !t.completed : t.multipleAttempts));
649
+
650
+ if (problematicTasks.length === 0) {
651
+ console.log("No problematic tasks found");
652
+ return;
653
+ }
654
+
655
+ for (const task of problematicTasks) {
656
+ console.log(`\n=== Task: ${task.task_id} ===`);
657
+ console.log(`Status: ${task.completed ? "completed" : "failed"}`);
658
+ console.log(`Attempts: ${task.sessions.length}`);
659
+
660
+ // Analyze error patterns
661
+ const allErrors = task.sessions.flatMap(
662
+ (s) => s.error_summary?.error_categories || [],
663
+ );
664
+ const errorFreq = countFrequency(allErrors);
665
+
666
+ if (Object.keys(errorFreq).length > 0) {
667
+ console.log("\nRoot Cause Categories:");
668
+ for (const [category, count] of Object.entries(errorFreq)) {
669
+ console.log(` ${category}: ${count}`);
670
+ }
671
+ }
672
+
673
+ // Identify planning issues
674
+ const planningIssues = [];
675
+
676
+ if (errorFreq["dependency"]) {
677
+ planningIssues.push("Missing dependency setup in task description");
678
+ }
679
+ if (errorFreq["api_key"] || errorFreq["auth"]) {
680
+ planningIssues.push("Missing environment/auth setup instructions");
681
+ }
682
+ if (errorFreq["context_window"]) {
683
+ planningIssues.push(
684
+ "Task scope too large, should be broken into subtasks",
685
+ );
686
+ }
687
+ if (errorFreq["test"]) {
688
+ planningIssues.push("Missing test setup or test data requirements");
689
+ }
690
+ if (errorFreq["build"]) {
691
+ planningIssues.push("Missing build configuration or tooling setup");
692
+ }
693
+ if (task.sessions.length >= 3 && !task.completed) {
694
+ planningIssues.push(
695
+ "Task may be too complex or poorly specified for automation",
696
+ );
697
+ }
698
+
699
+ // Executor switching analysis
700
+ const executors = task.sessions.map((s) => s.executor);
701
+ const uniqueExecutors = [...new Set(executors)];
702
+ if (uniqueExecutors.length > 1) {
703
+ planningIssues.push(
704
+ `Executor switching detected (${uniqueExecutors.join(" → ")}) - may indicate persistent issues`,
705
+ );
706
+ }
707
+
708
+ if (planningIssues.length > 0) {
709
+ console.log("\n:lightbulb: Planning Improvements:");
710
+ for (const issue of planningIssues) {
711
+ console.log(` - ${issue}`);
712
+ }
713
+ }
714
+ }
715
+
716
+ // Summary
717
+ console.log("\n=== Summary ===");
718
+ console.log(`Analyzed ${problematicTasks.length} tasks`);
719
+
720
+ const allPlanningIssues = problematicTasks.flatMap((t) => {
721
+ const issues = [];
722
+ const errorFreq = countFrequency(
723
+ t.sessions.flatMap((s) => s.error_summary?.error_categories || []),
724
+ );
725
+
726
+ if (errorFreq["dependency"]) issues.push("dependency");
727
+ if (errorFreq["api_key"] || errorFreq["auth"]) issues.push("auth");
728
+ if (errorFreq["context_window"]) issues.push("scope");
729
+ if (t.sessions.length >= 3 && !t.completed) issues.push("complexity");
730
+
731
+ return issues;
732
+ });
733
+
734
+ const issueFreq = countFrequency(allPlanningIssues);
735
+ console.log("\nMost common planning issues:");
736
+ for (const [issue, count] of topN(issueFreq, 5)) {
737
+ console.log(` ${issue}: ${count} tasks`);
738
+ }
739
+ }
740
+
741
+ /**
742
+ * Generate weekly report
743
+ */
744
+ async function generateWeeklyReport() {
745
+ console.log("\n=== Weekly Agent Work Report ===\n");
746
+
747
+ const metrics = await loadMetricsWithFallback({ days: 7 });
748
+
749
+ if (metrics.length === 0) {
750
+ console.log("No data for the past 7 days");
751
+ return;
752
+ }
753
+
754
+ // Overall stats
755
+ const totalSessions = metrics.length;
756
+ const completedSessions = metrics.filter(
757
+ (m) => m.outcome?.status === "completed",
758
+ ).length;
759
+ const successRate = (completedSessions * 100.0) / totalSessions;
760
+
761
+ const totalDuration = sum(
762
+ metrics.map((m) => (m.metrics?.duration_ms || 0) / 1000),
763
+ );
764
+ const totalCost = sum(metrics.map((m) => m.metrics?.cost_usd || 0));
765
+ const totalErrors = sum(metrics.map((m) => m.error_summary?.total_errors || 0));
766
+
767
+ console.log("Period: Last 7 days");
768
+ console.log(`Generated: ${new Date().toISOString()}\n`);
769
+
770
+ console.log(":chart: Overall Metrics:");
771
+ console.log(` Total Sessions: ${totalSessions}`);
772
+ console.log(` Completed: ${completedSessions} (${successRate.toFixed(1)}%)`);
773
+ console.log(` Total Duration: ${Math.round(totalDuration / 60)} minutes`);
774
+ console.log(` Total Cost: $${totalCost.toFixed(2)}`);
775
+ console.log(` Total Errors: ${totalErrors}`);
776
+
777
+ // Executor comparison
778
+ const byExecutor = groupBy(metrics, "executor");
779
+ console.log("\n:bot: By Executor:");
780
+ for (const [executor, sessions] of Object.entries(byExecutor)) {
781
+ const execSuccessRate = percentage(
782
+ sessions,
783
+ (s) => s.outcome?.status === "completed",
784
+ );
785
+ console.log(
786
+ ` ${executor}: ${sessions.length} sessions, ${execSuccessRate.toFixed(1)}% success`,
787
+ );
788
+ }
789
+
790
+ // Top errors
791
+ const errors = await loadErrors({ days: 7 });
792
+ const byFingerprint = groupBy(
793
+ errors,
794
+ (e) => e.data?.error_fingerprint || "unknown",
795
+ );
796
+ const topErrors = topN(
797
+ Object.fromEntries(
798
+ Object.entries(byFingerprint).map(([k, v]) => [k, v.length]),
799
+ ),
800
+ 5,
801
+ );
802
+
803
+ console.log("\n:close: Top Errors:");
804
+ for (const [fingerprint, count] of topErrors) {
805
+ console.log(` ${fingerprint}: ${count} occurrences`);
806
+ }
807
+
808
+ // Recommendations
809
+ console.log("\n:lightbulb: Recommendations:");
810
+
811
+ if (successRate < 70) {
812
+ console.log(" - Success rate is below 70% - review task planning");
813
+ }
814
+ if (totalCost / totalSessions > 0.1) {
815
+ console.log(
816
+ " - Average cost per session is high - consider prompt optimization",
817
+ );
818
+ }
819
+ if (totalErrors / totalSessions > 2) {
820
+ console.log(" - High error rate - investigate common failure patterns");
821
+ }
822
+ }
823
+
824
+ // ── CLI Entry Point ─────────────────────────────────────────────────────────
825
+
826
+ const isDirectRun = process.argv[1] && resolve(process.argv[1]) === __filename;
827
+
828
+ if (isDirectRun) {
829
+
830
+ const args = process.argv.slice(2);
831
+ const command = args[0];
832
+
833
+ if (!command) {
834
+ console.log(`
835
+ Agent Work Analytics CLI
836
+
837
+ Usage:
838
+ node analyze-agent-work.mjs --backlog-tasks [N] [--days N]
839
+ node analyze-agent-work.mjs --error-clustering [--days N] [--top N]
840
+ node analyze-agent-work.mjs --error-correlation [--days N] [--top N] [--json]
841
+ node analyze-agent-work.mjs --executor-comparison <executor1> <executor2> ...
842
+ node analyze-agent-work.mjs --task-planning [--failed-only]
843
+ node analyze-agent-work.mjs --weekly-report
844
+
845
+ Examples:
846
+ node analyze-agent-work.mjs --backlog-tasks 10 --days 30
847
+ node analyze-agent-work.mjs --error-clustering --days 7 --top 15
848
+ node analyze-agent-work.mjs --error-correlation --days 30 --top 5
849
+ node analyze-agent-work.mjs --executor-comparison CODEX COPILOT
850
+ node analyze-agent-work.mjs --task-planning --failed-only
851
+ `);
852
+ process.exit(0);
853
+ }
854
+
855
+ // Parse options
856
+ const options = {};
857
+ for (let i = 0; i < args.length; i++) {
858
+ if (args[i].startsWith("--")) {
859
+ const key = args[i].slice(2);
860
+ const value = args[i + 1];
861
+ if (value && !value.startsWith("--")) {
862
+ options[key] = isNaN(value) ? value : Number(value);
863
+ i++;
864
+ } else {
865
+ options[key] = true;
866
+ }
867
+ }
868
+ }
869
+
870
+ // Execute command
871
+ try {
872
+ switch (command) {
873
+ case "--backlog-tasks":
874
+ await analyzeBacklog({
875
+ limit: args[1] && !args[1].startsWith("--") ? Number(args[1]) : 10,
876
+ days: options.days || 30,
877
+ });
878
+ break;
879
+
880
+ case "--error-clustering":
881
+ await clusterErrors({
882
+ days: options.days || 7,
883
+ top: options.top || 10,
884
+ });
885
+ break;
886
+
887
+ case "--error-correlation":
888
+ await correlateErrors({
889
+ days: options.days || 7,
890
+ top: options.top || 10,
891
+ json: options.json || false,
892
+ });
893
+ break;
894
+
895
+ case "--executor-comparison":
896
+ const executors = args
897
+ .slice(1)
898
+ .filter((a) => !a.startsWith("--"));
899
+ if (executors.length === 0) {
900
+ console.error("Error: Specify at least one executor");
901
+ process.exit(1);
902
+ }
903
+ await compareExecutors(executors);
904
+ break;
905
+
906
+ case "--task-planning":
907
+ await analyzePlanning({
908
+ failedOnly: options["failed-only"] || false,
909
+ });
910
+ break;
911
+
912
+ case "--weekly-report":
913
+ await generateWeeklyReport();
914
+ break;
915
+
916
+ default:
917
+ console.error(`Unknown command: ${command}`);
918
+ process.exit(1);
919
+ }
920
+ } catch (err) {
921
+ console.error(`Error: ${err.message}`);
922
+ console.error(err.stack);
923
+ process.exit(1);
924
+ }
925
+
926
+ }