@os-eco/overstory-cli 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +381 -0
- package/agents/builder.md +137 -0
- package/agents/coordinator.md +263 -0
- package/agents/lead.md +301 -0
- package/agents/merger.md +160 -0
- package/agents/monitor.md +214 -0
- package/agents/reviewer.md +140 -0
- package/agents/scout.md +119 -0
- package/agents/supervisor.md +423 -0
- package/package.json +47 -0
- package/src/agents/checkpoint.test.ts +88 -0
- package/src/agents/checkpoint.ts +101 -0
- package/src/agents/hooks-deployer.test.ts +2040 -0
- package/src/agents/hooks-deployer.ts +607 -0
- package/src/agents/identity.test.ts +603 -0
- package/src/agents/identity.ts +384 -0
- package/src/agents/lifecycle.test.ts +196 -0
- package/src/agents/lifecycle.ts +183 -0
- package/src/agents/manifest.test.ts +746 -0
- package/src/agents/manifest.ts +354 -0
- package/src/agents/overlay.test.ts +676 -0
- package/src/agents/overlay.ts +308 -0
- package/src/beads/client.test.ts +217 -0
- package/src/beads/client.ts +202 -0
- package/src/beads/molecules.test.ts +338 -0
- package/src/beads/molecules.ts +198 -0
- package/src/commands/agents.test.ts +322 -0
- package/src/commands/agents.ts +287 -0
- package/src/commands/clean.test.ts +670 -0
- package/src/commands/clean.ts +618 -0
- package/src/commands/completions.test.ts +342 -0
- package/src/commands/completions.ts +887 -0
- package/src/commands/coordinator.test.ts +1530 -0
- package/src/commands/coordinator.ts +733 -0
- package/src/commands/costs.test.ts +1119 -0
- package/src/commands/costs.ts +564 -0
- package/src/commands/dashboard.test.ts +308 -0
- package/src/commands/dashboard.ts +838 -0
- package/src/commands/doctor.test.ts +294 -0
- package/src/commands/doctor.ts +213 -0
- package/src/commands/errors.test.ts +647 -0
- package/src/commands/errors.ts +248 -0
- package/src/commands/feed.test.ts +578 -0
- package/src/commands/feed.ts +361 -0
- package/src/commands/group.test.ts +262 -0
- package/src/commands/group.ts +511 -0
- package/src/commands/hooks.test.ts +458 -0
- package/src/commands/hooks.ts +253 -0
- package/src/commands/init.test.ts +347 -0
- package/src/commands/init.ts +650 -0
- package/src/commands/inspect.test.ts +670 -0
- package/src/commands/inspect.ts +431 -0
- package/src/commands/log.test.ts +1454 -0
- package/src/commands/log.ts +724 -0
- package/src/commands/logs.test.ts +379 -0
- package/src/commands/logs.ts +546 -0
- package/src/commands/mail.test.ts +1270 -0
- package/src/commands/mail.ts +771 -0
- package/src/commands/merge.test.ts +670 -0
- package/src/commands/merge.ts +355 -0
- package/src/commands/metrics.test.ts +444 -0
- package/src/commands/metrics.ts +143 -0
- package/src/commands/monitor.test.ts +191 -0
- package/src/commands/monitor.ts +390 -0
- package/src/commands/nudge.test.ts +230 -0
- package/src/commands/nudge.ts +372 -0
- package/src/commands/prime.test.ts +470 -0
- package/src/commands/prime.ts +381 -0
- package/src/commands/replay.test.ts +741 -0
- package/src/commands/replay.ts +360 -0
- package/src/commands/run.test.ts +431 -0
- package/src/commands/run.ts +351 -0
- package/src/commands/sling.test.ts +657 -0
- package/src/commands/sling.ts +661 -0
- package/src/commands/spec.test.ts +203 -0
- package/src/commands/spec.ts +168 -0
- package/src/commands/status.test.ts +430 -0
- package/src/commands/status.ts +398 -0
- package/src/commands/stop.test.ts +420 -0
- package/src/commands/stop.ts +151 -0
- package/src/commands/supervisor.test.ts +187 -0
- package/src/commands/supervisor.ts +535 -0
- package/src/commands/trace.test.ts +745 -0
- package/src/commands/trace.ts +325 -0
- package/src/commands/watch.test.ts +145 -0
- package/src/commands/watch.ts +247 -0
- package/src/commands/worktree.test.ts +786 -0
- package/src/commands/worktree.ts +311 -0
- package/src/config.test.ts +822 -0
- package/src/config.ts +829 -0
- package/src/doctor/agents.test.ts +454 -0
- package/src/doctor/agents.ts +396 -0
- package/src/doctor/config-check.test.ts +190 -0
- package/src/doctor/config-check.ts +183 -0
- package/src/doctor/consistency.test.ts +651 -0
- package/src/doctor/consistency.ts +294 -0
- package/src/doctor/databases.test.ts +290 -0
- package/src/doctor/databases.ts +218 -0
- package/src/doctor/dependencies.test.ts +184 -0
- package/src/doctor/dependencies.ts +175 -0
- package/src/doctor/logs.test.ts +251 -0
- package/src/doctor/logs.ts +295 -0
- package/src/doctor/merge-queue.test.ts +216 -0
- package/src/doctor/merge-queue.ts +144 -0
- package/src/doctor/structure.test.ts +291 -0
- package/src/doctor/structure.ts +198 -0
- package/src/doctor/types.ts +37 -0
- package/src/doctor/version.test.ts +136 -0
- package/src/doctor/version.ts +129 -0
- package/src/e2e/init-sling-lifecycle.test.ts +277 -0
- package/src/errors.ts +217 -0
- package/src/events/store.test.ts +660 -0
- package/src/events/store.ts +369 -0
- package/src/events/tool-filter.test.ts +330 -0
- package/src/events/tool-filter.ts +126 -0
- package/src/index.ts +316 -0
- package/src/insights/analyzer.test.ts +466 -0
- package/src/insights/analyzer.ts +203 -0
- package/src/logging/color.test.ts +142 -0
- package/src/logging/color.ts +71 -0
- package/src/logging/logger.test.ts +813 -0
- package/src/logging/logger.ts +266 -0
- package/src/logging/reporter.test.ts +259 -0
- package/src/logging/reporter.ts +109 -0
- package/src/logging/sanitizer.test.ts +190 -0
- package/src/logging/sanitizer.ts +57 -0
- package/src/mail/broadcast.test.ts +203 -0
- package/src/mail/broadcast.ts +92 -0
- package/src/mail/client.test.ts +773 -0
- package/src/mail/client.ts +223 -0
- package/src/mail/store.test.ts +705 -0
- package/src/mail/store.ts +387 -0
- package/src/merge/queue.test.ts +359 -0
- package/src/merge/queue.ts +231 -0
- package/src/merge/resolver.test.ts +1345 -0
- package/src/merge/resolver.ts +645 -0
- package/src/metrics/store.test.ts +667 -0
- package/src/metrics/store.ts +445 -0
- package/src/metrics/summary.test.ts +398 -0
- package/src/metrics/summary.ts +178 -0
- package/src/metrics/transcript.test.ts +356 -0
- package/src/metrics/transcript.ts +175 -0
- package/src/mulch/client.test.ts +671 -0
- package/src/mulch/client.ts +332 -0
- package/src/sessions/compat.test.ts +280 -0
- package/src/sessions/compat.ts +104 -0
- package/src/sessions/store.test.ts +873 -0
- package/src/sessions/store.ts +494 -0
- package/src/test-helpers.test.ts +124 -0
- package/src/test-helpers.ts +126 -0
- package/src/tracker/beads.ts +56 -0
- package/src/tracker/factory.test.ts +80 -0
- package/src/tracker/factory.ts +64 -0
- package/src/tracker/seeds.ts +182 -0
- package/src/tracker/types.ts +52 -0
- package/src/types.ts +724 -0
- package/src/watchdog/daemon.test.ts +1975 -0
- package/src/watchdog/daemon.ts +671 -0
- package/src/watchdog/health.test.ts +431 -0
- package/src/watchdog/health.ts +264 -0
- package/src/watchdog/triage.test.ts +164 -0
- package/src/watchdog/triage.ts +179 -0
- package/src/worktree/manager.test.ts +439 -0
- package/src/worktree/manager.ts +198 -0
- package/src/worktree/tmux.test.ts +1009 -0
- package/src/worktree/tmux.ts +509 -0
- package/templates/CLAUDE.md.tmpl +89 -0
- package/templates/hooks.json.tmpl +105 -0
- package/templates/overlay.md.tmpl +81 -0
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for metrics summary generation and formatting.
|
|
3
|
+
*
|
|
4
|
+
* Uses real MetricsStore with temp DB. No mocks.
|
|
5
|
+
* Philosophy: "never mock what you can use for real" (mx-252b16).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
9
|
+
import { mkdtemp } from "node:fs/promises";
|
|
10
|
+
import { tmpdir } from "node:os";
|
|
11
|
+
import { join } from "node:path";
|
|
12
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
13
|
+
import type { SessionMetrics } from "../types.ts";
|
|
14
|
+
import { createMetricsStore, type MetricsStore } from "./store.ts";
|
|
15
|
+
import { formatSummary, generateSummary } from "./summary.ts";
|
|
16
|
+
|
|
17
|
+
let tempDir: string;
|
|
18
|
+
let dbPath: string;
|
|
19
|
+
let store: MetricsStore;
|
|
20
|
+
|
|
21
|
+
beforeEach(async () => {
|
|
22
|
+
tempDir = await mkdtemp(join(tmpdir(), "overstory-metrics-test-"));
|
|
23
|
+
dbPath = join(tempDir, "metrics.db");
|
|
24
|
+
store = createMetricsStore(dbPath);
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
afterEach(async () => {
|
|
28
|
+
store.close();
|
|
29
|
+
await cleanupTempDir(tempDir);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
/** Helper to create a SessionMetrics object with optional overrides. */
|
|
33
|
+
function makeSession(overrides: Partial<SessionMetrics> = {}): SessionMetrics {
|
|
34
|
+
return {
|
|
35
|
+
agentName: "test-agent",
|
|
36
|
+
beadId: "test-task-123",
|
|
37
|
+
capability: "builder",
|
|
38
|
+
startedAt: new Date("2026-01-01T00:00:00Z").toISOString(),
|
|
39
|
+
completedAt: new Date("2026-01-01T00:05:00Z").toISOString(),
|
|
40
|
+
durationMs: 300_000,
|
|
41
|
+
exitCode: 0,
|
|
42
|
+
mergeResult: "auto-resolve",
|
|
43
|
+
parentAgent: "coordinator",
|
|
44
|
+
inputTokens: 0,
|
|
45
|
+
outputTokens: 0,
|
|
46
|
+
cacheReadTokens: 0,
|
|
47
|
+
cacheCreationTokens: 0,
|
|
48
|
+
estimatedCostUsd: null,
|
|
49
|
+
modelUsed: null,
|
|
50
|
+
runId: null,
|
|
51
|
+
...overrides,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// === generateSummary ===
|
|
56
|
+
|
|
57
|
+
describe("generateSummary", () => {
|
|
58
|
+
test("empty store returns zeros and empty arrays", () => {
|
|
59
|
+
const summary = generateSummary(store);
|
|
60
|
+
|
|
61
|
+
expect(summary.totalSessions).toBe(0);
|
|
62
|
+
expect(summary.completedSessions).toBe(0);
|
|
63
|
+
expect(summary.averageDurationMs).toBe(0);
|
|
64
|
+
expect(summary.byCapability).toEqual({});
|
|
65
|
+
expect(summary.recentSessions).toEqual([]);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test("counts total and completed sessions correctly", () => {
|
|
69
|
+
store.recordSession(makeSession({ beadId: "task-1", completedAt: "2026-01-01T00:05:00Z" }));
|
|
70
|
+
store.recordSession(makeSession({ beadId: "task-2", completedAt: null }));
|
|
71
|
+
store.recordSession(makeSession({ beadId: "task-3", completedAt: "2026-01-01T00:10:00Z" }));
|
|
72
|
+
|
|
73
|
+
const summary = generateSummary(store);
|
|
74
|
+
|
|
75
|
+
expect(summary.totalSessions).toBe(3);
|
|
76
|
+
expect(summary.completedSessions).toBe(2);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
test("groups by capability with correct counts and avg durations", () => {
|
|
80
|
+
store.recordSession(
|
|
81
|
+
makeSession({
|
|
82
|
+
beadId: "task-1",
|
|
83
|
+
capability: "builder",
|
|
84
|
+
durationMs: 100_000,
|
|
85
|
+
}),
|
|
86
|
+
);
|
|
87
|
+
store.recordSession(
|
|
88
|
+
makeSession({
|
|
89
|
+
beadId: "task-2",
|
|
90
|
+
capability: "builder",
|
|
91
|
+
durationMs: 200_000,
|
|
92
|
+
}),
|
|
93
|
+
);
|
|
94
|
+
store.recordSession(
|
|
95
|
+
makeSession({
|
|
96
|
+
beadId: "task-3",
|
|
97
|
+
capability: "scout",
|
|
98
|
+
durationMs: 50_000,
|
|
99
|
+
}),
|
|
100
|
+
);
|
|
101
|
+
|
|
102
|
+
const summary = generateSummary(store);
|
|
103
|
+
|
|
104
|
+
expect(summary.byCapability.builder).toEqual({
|
|
105
|
+
count: 2,
|
|
106
|
+
avgDurationMs: 150_000,
|
|
107
|
+
});
|
|
108
|
+
expect(summary.byCapability.scout).toEqual({
|
|
109
|
+
count: 1,
|
|
110
|
+
avgDurationMs: 50_000,
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
test("respects the limit parameter for recentSessions", () => {
|
|
115
|
+
store.recordSession(makeSession({ beadId: "task-1" }));
|
|
116
|
+
store.recordSession(makeSession({ beadId: "task-2" }));
|
|
117
|
+
store.recordSession(makeSession({ beadId: "task-3" }));
|
|
118
|
+
store.recordSession(makeSession({ beadId: "task-4" }));
|
|
119
|
+
|
|
120
|
+
const summary = generateSummary(store, 2);
|
|
121
|
+
|
|
122
|
+
expect(summary.totalSessions).toBe(4);
|
|
123
|
+
expect(summary.recentSessions).toHaveLength(2);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("sessions without completedAt counted in total but not completed", () => {
|
|
127
|
+
store.recordSession(makeSession({ beadId: "task-1", completedAt: null }));
|
|
128
|
+
store.recordSession(makeSession({ beadId: "task-2", completedAt: null }));
|
|
129
|
+
store.recordSession(makeSession({ beadId: "task-3", completedAt: "2026-01-01T00:05:00Z" }));
|
|
130
|
+
|
|
131
|
+
const summary = generateSummary(store);
|
|
132
|
+
|
|
133
|
+
expect(summary.totalSessions).toBe(3);
|
|
134
|
+
expect(summary.completedSessions).toBe(1);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
test("aggregates token totals across all sessions", () => {
|
|
138
|
+
store.recordSession(
|
|
139
|
+
makeSession({
|
|
140
|
+
beadId: "task-1",
|
|
141
|
+
inputTokens: 10_000,
|
|
142
|
+
outputTokens: 2_000,
|
|
143
|
+
cacheReadTokens: 50_000,
|
|
144
|
+
cacheCreationTokens: 5_000,
|
|
145
|
+
estimatedCostUsd: 1.5,
|
|
146
|
+
}),
|
|
147
|
+
);
|
|
148
|
+
store.recordSession(
|
|
149
|
+
makeSession({
|
|
150
|
+
beadId: "task-2",
|
|
151
|
+
inputTokens: 20_000,
|
|
152
|
+
outputTokens: 3_000,
|
|
153
|
+
cacheReadTokens: 80_000,
|
|
154
|
+
cacheCreationTokens: 10_000,
|
|
155
|
+
estimatedCostUsd: 2.5,
|
|
156
|
+
}),
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
const summary = generateSummary(store);
|
|
160
|
+
|
|
161
|
+
expect(summary.tokenTotals.inputTokens).toBe(30_000);
|
|
162
|
+
expect(summary.tokenTotals.outputTokens).toBe(5_000);
|
|
163
|
+
expect(summary.tokenTotals.cacheReadTokens).toBe(130_000);
|
|
164
|
+
expect(summary.tokenTotals.cacheCreationTokens).toBe(15_000);
|
|
165
|
+
expect(summary.tokenTotals.estimatedCostUsd).toBeCloseTo(4.0, 2);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
test("token totals are zero when no sessions have token data", () => {
|
|
169
|
+
store.recordSession(makeSession({ beadId: "task-1" }));
|
|
170
|
+
|
|
171
|
+
const summary = generateSummary(store);
|
|
172
|
+
|
|
173
|
+
expect(summary.tokenTotals.inputTokens).toBe(0);
|
|
174
|
+
expect(summary.tokenTotals.outputTokens).toBe(0);
|
|
175
|
+
expect(summary.tokenTotals.cacheReadTokens).toBe(0);
|
|
176
|
+
expect(summary.tokenTotals.cacheCreationTokens).toBe(0);
|
|
177
|
+
expect(summary.tokenTotals.estimatedCostUsd).toBe(0);
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
test("token totals skip null cost entries gracefully", () => {
|
|
181
|
+
store.recordSession(
|
|
182
|
+
makeSession({
|
|
183
|
+
beadId: "task-1",
|
|
184
|
+
inputTokens: 100,
|
|
185
|
+
estimatedCostUsd: 0.5,
|
|
186
|
+
}),
|
|
187
|
+
);
|
|
188
|
+
store.recordSession(
|
|
189
|
+
makeSession({
|
|
190
|
+
beadId: "task-2",
|
|
191
|
+
inputTokens: 200,
|
|
192
|
+
estimatedCostUsd: null, // no cost data
|
|
193
|
+
}),
|
|
194
|
+
);
|
|
195
|
+
|
|
196
|
+
const summary = generateSummary(store);
|
|
197
|
+
|
|
198
|
+
expect(summary.tokenTotals.inputTokens).toBe(300);
|
|
199
|
+
expect(summary.tokenTotals.estimatedCostUsd).toBeCloseTo(0.5, 2);
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
test("capability breakdown excludes incomplete sessions from avgDurationMs", () => {
|
|
203
|
+
store.recordSession(
|
|
204
|
+
makeSession({
|
|
205
|
+
beadId: "task-1",
|
|
206
|
+
capability: "builder",
|
|
207
|
+
durationMs: 100_000,
|
|
208
|
+
completedAt: null,
|
|
209
|
+
}),
|
|
210
|
+
);
|
|
211
|
+
store.recordSession(
|
|
212
|
+
makeSession({
|
|
213
|
+
beadId: "task-2",
|
|
214
|
+
capability: "builder",
|
|
215
|
+
durationMs: 200_000,
|
|
216
|
+
}),
|
|
217
|
+
);
|
|
218
|
+
store.recordSession(
|
|
219
|
+
makeSession({
|
|
220
|
+
beadId: "task-3",
|
|
221
|
+
capability: "builder",
|
|
222
|
+
durationMs: 300_000,
|
|
223
|
+
}),
|
|
224
|
+
);
|
|
225
|
+
|
|
226
|
+
const summary = generateSummary(store);
|
|
227
|
+
|
|
228
|
+
// 3 total sessions, but only 2 completed
|
|
229
|
+
expect(summary.byCapability.builder?.count).toBe(3);
|
|
230
|
+
expect(summary.byCapability.builder?.avgDurationMs).toBe(250_000); // (200_000 + 300_000) / 2
|
|
231
|
+
});
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
// === formatSummary ===
|
|
235
|
+
|
|
236
|
+
describe("formatSummary", () => {
|
|
237
|
+
test("contains header '=== Session Metrics ==='", () => {
|
|
238
|
+
const summary = generateSummary(store);
|
|
239
|
+
const formatted = formatSummary(summary);
|
|
240
|
+
|
|
241
|
+
expect(formatted).toContain("=== Session Metrics ===");
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
test("shows total/completed/average duration", () => {
|
|
245
|
+
store.recordSession(makeSession({ beadId: "task-1", durationMs: 100_000 }));
|
|
246
|
+
store.recordSession(makeSession({ beadId: "task-2", durationMs: 200_000 }));
|
|
247
|
+
|
|
248
|
+
const summary = generateSummary(store);
|
|
249
|
+
const formatted = formatSummary(summary);
|
|
250
|
+
|
|
251
|
+
expect(formatted).toContain("Total sessions: 2");
|
|
252
|
+
expect(formatted).toContain("Completed: 2");
|
|
253
|
+
expect(formatted).toContain("Average duration:");
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
test("shows capability breakdown", () => {
|
|
257
|
+
store.recordSession(makeSession({ beadId: "task-1", capability: "builder" }));
|
|
258
|
+
store.recordSession(makeSession({ beadId: "task-2", capability: "scout" }));
|
|
259
|
+
|
|
260
|
+
const summary = generateSummary(store);
|
|
261
|
+
const formatted = formatSummary(summary);
|
|
262
|
+
|
|
263
|
+
expect(formatted).toContain("By capability:");
|
|
264
|
+
expect(formatted).toContain("builder:");
|
|
265
|
+
expect(formatted).toContain("scout:");
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
test("shows recent sessions with status (done vs running)", () => {
|
|
269
|
+
store.recordSession(
|
|
270
|
+
makeSession({
|
|
271
|
+
beadId: "task-1",
|
|
272
|
+
agentName: "agent-done",
|
|
273
|
+
completedAt: "2026-01-01T00:05:00Z",
|
|
274
|
+
}),
|
|
275
|
+
);
|
|
276
|
+
store.recordSession(
|
|
277
|
+
makeSession({
|
|
278
|
+
beadId: "task-2",
|
|
279
|
+
agentName: "agent-running",
|
|
280
|
+
completedAt: null,
|
|
281
|
+
}),
|
|
282
|
+
);
|
|
283
|
+
|
|
284
|
+
const summary = generateSummary(store);
|
|
285
|
+
const formatted = formatSummary(summary);
|
|
286
|
+
|
|
287
|
+
expect(formatted).toContain("Recent sessions:");
|
|
288
|
+
expect(formatted).toContain("agent-done");
|
|
289
|
+
expect(formatted).toContain("done");
|
|
290
|
+
expect(formatted).toContain("agent-running");
|
|
291
|
+
expect(formatted).toContain("running");
|
|
292
|
+
expect(formatted).toContain("in progress");
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
test("formatDuration: <1000ms shows ms", () => {
|
|
296
|
+
store.recordSession(makeSession({ beadId: "task-1", durationMs: 500 }));
|
|
297
|
+
|
|
298
|
+
const summary = generateSummary(store);
|
|
299
|
+
const formatted = formatSummary(summary);
|
|
300
|
+
|
|
301
|
+
expect(formatted).toContain("500ms");
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
test("formatDuration: <60000ms shows seconds", () => {
|
|
305
|
+
store.recordSession(makeSession({ beadId: "task-1", durationMs: 5_500 }));
|
|
306
|
+
|
|
307
|
+
const summary = generateSummary(store);
|
|
308
|
+
const formatted = formatSummary(summary);
|
|
309
|
+
|
|
310
|
+
expect(formatted).toContain("5.5s");
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
test("formatDuration: >=60000ms shows minutes+seconds", () => {
|
|
314
|
+
store.recordSession(makeSession({ beadId: "task-1", durationMs: 125_000 }));
|
|
315
|
+
|
|
316
|
+
const summary = generateSummary(store);
|
|
317
|
+
const formatted = formatSummary(summary);
|
|
318
|
+
|
|
319
|
+
expect(formatted).toContain("2m 5s");
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
test("shows token usage section when sessions have token data", () => {
|
|
323
|
+
store.recordSession(
|
|
324
|
+
makeSession({
|
|
325
|
+
beadId: "task-1",
|
|
326
|
+
inputTokens: 15_000,
|
|
327
|
+
outputTokens: 3_000,
|
|
328
|
+
cacheReadTokens: 100_000,
|
|
329
|
+
cacheCreationTokens: 10_000,
|
|
330
|
+
estimatedCostUsd: 2.47,
|
|
331
|
+
}),
|
|
332
|
+
);
|
|
333
|
+
|
|
334
|
+
const summary = generateSummary(store);
|
|
335
|
+
const formatted = formatSummary(summary);
|
|
336
|
+
|
|
337
|
+
expect(formatted).toContain("Token usage:");
|
|
338
|
+
expect(formatted).toContain("Input:");
|
|
339
|
+
expect(formatted).toContain("Output:");
|
|
340
|
+
expect(formatted).toContain("Cache read:");
|
|
341
|
+
expect(formatted).toContain("Cache creation:");
|
|
342
|
+
expect(formatted).toContain("Estimated cost:");
|
|
343
|
+
expect(formatted).toContain("$2.47");
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
test("hides token usage section when no token data exists", () => {
|
|
347
|
+
store.recordSession(makeSession({ beadId: "task-1" }));
|
|
348
|
+
|
|
349
|
+
const summary = generateSummary(store);
|
|
350
|
+
const formatted = formatSummary(summary);
|
|
351
|
+
|
|
352
|
+
expect(formatted).not.toContain("Token usage:");
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
test("shows per-session cost in recent sessions", () => {
|
|
356
|
+
store.recordSession(
|
|
357
|
+
makeSession({
|
|
358
|
+
beadId: "task-1",
|
|
359
|
+
agentName: "agent-costly",
|
|
360
|
+
inputTokens: 10_000,
|
|
361
|
+
outputTokens: 2_000,
|
|
362
|
+
estimatedCostUsd: 1.23,
|
|
363
|
+
}),
|
|
364
|
+
);
|
|
365
|
+
|
|
366
|
+
const summary = generateSummary(store);
|
|
367
|
+
const formatted = formatSummary(summary);
|
|
368
|
+
|
|
369
|
+
expect(formatted).toContain("agent-costly");
|
|
370
|
+
expect(formatted).toContain("$1.23");
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
test("formats large token counts with M suffix", () => {
|
|
374
|
+
store.recordSession(
|
|
375
|
+
makeSession({
|
|
376
|
+
beadId: "task-1",
|
|
377
|
+
inputTokens: 2_500_000,
|
|
378
|
+
outputTokens: 500_000,
|
|
379
|
+
cacheReadTokens: 0,
|
|
380
|
+
cacheCreationTokens: 0,
|
|
381
|
+
estimatedCostUsd: 10.0,
|
|
382
|
+
}),
|
|
383
|
+
);
|
|
384
|
+
|
|
385
|
+
const summary = generateSummary(store);
|
|
386
|
+
const formatted = formatSummary(summary);
|
|
387
|
+
|
|
388
|
+
expect(formatted).toContain("2.5M");
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
test("empty summary does not include 'By capability' or 'Recent sessions' sections", () => {
|
|
392
|
+
const summary = generateSummary(store);
|
|
393
|
+
const formatted = formatSummary(summary);
|
|
394
|
+
|
|
395
|
+
expect(formatted).not.toContain("By capability:");
|
|
396
|
+
expect(formatted).not.toContain("Recent sessions:");
|
|
397
|
+
});
|
|
398
|
+
});
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics reporting utilities.
|
|
3
|
+
*
|
|
4
|
+
* Generates summary statistics from a MetricsStore and formats them
|
|
5
|
+
* for human-readable console output.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { SessionMetrics } from "../types.ts";
|
|
9
|
+
import type { MetricsStore } from "./store.ts";
|
|
10
|
+
|
|
11
|
+
export interface TokenTotals {
|
|
12
|
+
inputTokens: number;
|
|
13
|
+
outputTokens: number;
|
|
14
|
+
cacheReadTokens: number;
|
|
15
|
+
cacheCreationTokens: number;
|
|
16
|
+
estimatedCostUsd: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface MetricsSummary {
|
|
20
|
+
totalSessions: number;
|
|
21
|
+
completedSessions: number;
|
|
22
|
+
averageDurationMs: number;
|
|
23
|
+
byCapability: Record<string, { count: number; avgDurationMs: number }>;
|
|
24
|
+
recentSessions: SessionMetrics[];
|
|
25
|
+
tokenTotals: TokenTotals;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Generate an aggregate summary from the metrics store.
|
|
30
|
+
*
|
|
31
|
+
* @param store - The MetricsStore to query
|
|
32
|
+
* @param limit - Maximum number of recent sessions to include (default 10)
|
|
33
|
+
*/
|
|
34
|
+
export function generateSummary(store: MetricsStore, limit = 10): MetricsSummary {
|
|
35
|
+
const recentSessions = store.getRecentSessions(limit);
|
|
36
|
+
|
|
37
|
+
// Fetch all sessions for aggregate stats (use a generous limit)
|
|
38
|
+
const allSessions = store.getRecentSessions(10_000);
|
|
39
|
+
|
|
40
|
+
const totalSessions = allSessions.length;
|
|
41
|
+
const completedSessions = allSessions.filter((s) => s.completedAt !== null).length;
|
|
42
|
+
const averageDurationMs = store.getAverageDuration();
|
|
43
|
+
|
|
44
|
+
// Group by capability
|
|
45
|
+
const capabilityMap = new Map<string, { count: number; totalMs: number }>();
|
|
46
|
+
for (const session of allSessions) {
|
|
47
|
+
const existing = capabilityMap.get(session.capability);
|
|
48
|
+
if (existing) {
|
|
49
|
+
existing.count++;
|
|
50
|
+
if (session.completedAt !== null) {
|
|
51
|
+
existing.totalMs += session.durationMs;
|
|
52
|
+
}
|
|
53
|
+
} else {
|
|
54
|
+
capabilityMap.set(session.capability, {
|
|
55
|
+
count: 1,
|
|
56
|
+
totalMs: session.completedAt !== null ? session.durationMs : 0,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const byCapability: Record<string, { count: number; avgDurationMs: number }> = {};
|
|
62
|
+
for (const [capability, data] of capabilityMap) {
|
|
63
|
+
const completedInCap = allSessions.filter(
|
|
64
|
+
(s) => s.capability === capability && s.completedAt !== null,
|
|
65
|
+
).length;
|
|
66
|
+
byCapability[capability] = {
|
|
67
|
+
count: data.count,
|
|
68
|
+
avgDurationMs: completedInCap > 0 ? Math.round(data.totalMs / completedInCap) : 0,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Aggregate token totals across all sessions
|
|
73
|
+
const tokenTotals: TokenTotals = {
|
|
74
|
+
inputTokens: 0,
|
|
75
|
+
outputTokens: 0,
|
|
76
|
+
cacheReadTokens: 0,
|
|
77
|
+
cacheCreationTokens: 0,
|
|
78
|
+
estimatedCostUsd: 0,
|
|
79
|
+
};
|
|
80
|
+
for (const session of allSessions) {
|
|
81
|
+
tokenTotals.inputTokens += session.inputTokens;
|
|
82
|
+
tokenTotals.outputTokens += session.outputTokens;
|
|
83
|
+
tokenTotals.cacheReadTokens += session.cacheReadTokens;
|
|
84
|
+
tokenTotals.cacheCreationTokens += session.cacheCreationTokens;
|
|
85
|
+
if (session.estimatedCostUsd !== null) {
|
|
86
|
+
tokenTotals.estimatedCostUsd += session.estimatedCostUsd;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
totalSessions,
|
|
92
|
+
completedSessions,
|
|
93
|
+
averageDurationMs: Math.round(averageDurationMs),
|
|
94
|
+
byCapability,
|
|
95
|
+
recentSessions,
|
|
96
|
+
tokenTotals,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Format a MetricsSummary into a human-readable string for console output.
|
|
102
|
+
*/
|
|
103
|
+
export function formatSummary(summary: MetricsSummary): string {
|
|
104
|
+
const lines: string[] = [];
|
|
105
|
+
|
|
106
|
+
lines.push("=== Session Metrics ===");
|
|
107
|
+
lines.push("");
|
|
108
|
+
lines.push(`Total sessions: ${summary.totalSessions}`);
|
|
109
|
+
lines.push(`Completed: ${summary.completedSessions}`);
|
|
110
|
+
lines.push(`Average duration: ${formatDuration(summary.averageDurationMs)}`);
|
|
111
|
+
|
|
112
|
+
const capabilities = Object.entries(summary.byCapability);
|
|
113
|
+
if (capabilities.length > 0) {
|
|
114
|
+
lines.push("");
|
|
115
|
+
lines.push("By capability:");
|
|
116
|
+
for (const [cap, data] of capabilities) {
|
|
117
|
+
lines.push(` ${cap}: ${data.count} sessions, avg ${formatDuration(data.avgDurationMs)}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Token usage section (only if any tokens were recorded)
|
|
122
|
+
const tt = summary.tokenTotals;
|
|
123
|
+
const hasTokenData =
|
|
124
|
+
tt.inputTokens > 0 ||
|
|
125
|
+
tt.outputTokens > 0 ||
|
|
126
|
+
tt.cacheReadTokens > 0 ||
|
|
127
|
+
tt.cacheCreationTokens > 0;
|
|
128
|
+
|
|
129
|
+
if (hasTokenData) {
|
|
130
|
+
lines.push("");
|
|
131
|
+
lines.push("Token usage:");
|
|
132
|
+
lines.push(` Input: ${formatTokenCount(tt.inputTokens)}`);
|
|
133
|
+
lines.push(` Output: ${formatTokenCount(tt.outputTokens)}`);
|
|
134
|
+
lines.push(` Cache read: ${formatTokenCount(tt.cacheReadTokens)}`);
|
|
135
|
+
lines.push(` Cache creation: ${formatTokenCount(tt.cacheCreationTokens)}`);
|
|
136
|
+
if (tt.estimatedCostUsd > 0) {
|
|
137
|
+
lines.push(` Estimated cost: $${tt.estimatedCostUsd.toFixed(2)}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (summary.recentSessions.length > 0) {
|
|
142
|
+
lines.push("");
|
|
143
|
+
lines.push("Recent sessions:");
|
|
144
|
+
for (const session of summary.recentSessions) {
|
|
145
|
+
const status = session.completedAt !== null ? "done" : "running";
|
|
146
|
+
const duration =
|
|
147
|
+
session.completedAt !== null ? formatDuration(session.durationMs) : "in progress";
|
|
148
|
+
const costSuffix =
|
|
149
|
+
session.estimatedCostUsd !== null ? ` $${session.estimatedCostUsd.toFixed(2)}` : "";
|
|
150
|
+
lines.push(
|
|
151
|
+
` ${session.agentName} [${session.capability}] ${status} (${duration})${costSuffix}`,
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return lines.join("\n");
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/** Format milliseconds into a human-friendly duration string. */
|
|
160
|
+
function formatDuration(ms: number): string {
|
|
161
|
+
if (ms < 1_000) {
|
|
162
|
+
return `${ms}ms`;
|
|
163
|
+
}
|
|
164
|
+
if (ms < 60_000) {
|
|
165
|
+
return `${(ms / 1_000).toFixed(1)}s`;
|
|
166
|
+
}
|
|
167
|
+
const minutes = Math.floor(ms / 60_000);
|
|
168
|
+
const seconds = Math.round((ms % 60_000) / 1_000);
|
|
169
|
+
return `${minutes}m ${seconds}s`;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Format a token count into a human-friendly string (e.g., 1,234,567 or 1.2M). */
|
|
173
|
+
function formatTokenCount(count: number): string {
|
|
174
|
+
if (count >= 1_000_000) {
|
|
175
|
+
return `${(count / 1_000_000).toFixed(1)}M`;
|
|
176
|
+
}
|
|
177
|
+
return count.toLocaleString("en-US");
|
|
178
|
+
}
|