bb-cc-lite 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -61
- package/dist/baseline-builder.js +64 -64
- package/dist/baseline-builder.js.map +1 -1
- package/dist/baseline-refresh.d.ts +66 -0
- package/dist/baseline-refresh.js +214 -0
- package/dist/baseline-refresh.js.map +1 -0
- package/dist/baseline.d.ts +11 -1
- package/dist/baseline.js +128 -4
- package/dist/baseline.js.map +1 -1
- package/dist/cli.js +36 -10
- package/dist/cli.js.map +1 -1
- package/dist/doctor.d.ts +1 -1
- package/dist/doctor.js +30 -12
- package/dist/doctor.js.map +1 -1
- package/dist/event-store-persistence.js +21 -3
- package/dist/event-store-persistence.js.map +1 -1
- package/dist/event-store-queries.d.ts +4 -1
- package/dist/event-store-queries.js +38 -11
- package/dist/event-store-queries.js.map +1 -1
- package/dist/failure-episodes.d.ts +21 -0
- package/dist/failure-episodes.js +266 -0
- package/dist/failure-episodes.js.map +1 -0
- package/dist/historical-replay.d.ts +21 -0
- package/dist/historical-replay.js +263 -0
- package/dist/historical-replay.js.map +1 -0
- package/dist/hook-payload.js +11 -7
- package/dist/hook-payload.js.map +1 -1
- package/dist/hook-summary.d.ts +4 -5
- package/dist/hook-summary.js +37 -11
- package/dist/hook-summary.js.map +1 -1
- package/dist/recovery-stats.d.ts +57 -0
- package/dist/recovery-stats.js +234 -0
- package/dist/recovery-stats.js.map +1 -0
- package/dist/settings.d.ts +1 -1
- package/dist/settings.js +7 -0
- package/dist/settings.js.map +1 -1
- package/dist/signals.js +137 -58
- package/dist/signals.js.map +1 -1
- package/dist/statusline.d.ts +5 -1
- package/dist/statusline.js +8 -3
- package/dist/statusline.js.map +1 -1
- package/dist/tool-metadata.d.ts +10 -0
- package/dist/tool-metadata.js +50 -2
- package/dist/tool-metadata.js.map +1 -1
- package/dist/transcript.js +106 -25
- package/dist/transcript.js.map +1 -1
- package/dist/types.d.ts +53 -0
- package/package.json +1 -1
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import { asRecord, stringField } from "./status-input.js";
|
|
2
|
+
import { classifyResultPurpose, classifyToolIdentity, isEditTool, isReadSearchTool } from "./tool-metadata.js";
|
|
3
|
+
import { categoryFailureSingular } from "./recovery-stats.js";
|
|
4
|
+
export function extractFailureEpisodesFromTranscriptLines(lines) {
|
|
5
|
+
return summarizeFailureEpisodes(extractSafeToolResultEventsFromTranscriptLines(lines));
|
|
6
|
+
}
|
|
7
|
+
export function extractSafeToolResultEventsFromTranscriptLines(lines) {
|
|
8
|
+
const toolById = new Map();
|
|
9
|
+
const events = [];
|
|
10
|
+
for (const line of lines) {
|
|
11
|
+
let parsed;
|
|
12
|
+
try {
|
|
13
|
+
parsed = JSON.parse(line);
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
const entry = asRecord(parsed);
|
|
19
|
+
if (!entry) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
for (const toolUse of extractToolUses(entry)) {
|
|
23
|
+
if (toolUse.id) {
|
|
24
|
+
toolById.set(toolUse.id, metaFromToolName(toolUse.name, toolUse.input));
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
for (const toolResult of extractToolResults(entry)) {
|
|
28
|
+
const meta = resolveMeta(toolResult, toolById);
|
|
29
|
+
events.push(safeToolResultEvent(meta, toolResult.isError ? "failure" : "success"));
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return events;
|
|
33
|
+
}
|
|
34
|
+
export function safeToolResultEventFromHookEvent(event) {
|
|
35
|
+
if (event.kind !== "tool_success" && event.kind !== "tool_failure") {
|
|
36
|
+
return undefined;
|
|
37
|
+
}
|
|
38
|
+
const meta = metaFromStoredHookEvent(event);
|
|
39
|
+
return safeToolResultEvent(meta, event.kind === "tool_failure" ? "failure" : "success");
|
|
40
|
+
}
|
|
41
|
+
export function summarizeFailureEpisodes(events) {
|
|
42
|
+
const active = new Map();
|
|
43
|
+
const completed = [];
|
|
44
|
+
for (const event of events) {
|
|
45
|
+
if (event.outcome === "success") {
|
|
46
|
+
const sameIdentityEpisode = active.get(event.identity);
|
|
47
|
+
if (sameIdentityEpisode) {
|
|
48
|
+
sameIdentityEpisode.interventionEvidence.add("same_failure_success");
|
|
49
|
+
completed.push(toEpisodeSummary(sameIdentityEpisode, true, false));
|
|
50
|
+
active.delete(event.identity);
|
|
51
|
+
}
|
|
52
|
+
if (event.isEdit || event.isValidation) {
|
|
53
|
+
markMeaningfulIntervention(active, event.isEdit ? "edit" : "validation_success");
|
|
54
|
+
}
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
const episode = active.get(event.identity) || newEpisode(event);
|
|
58
|
+
if (episode.attemptCount === 0) {
|
|
59
|
+
episode.blindRunCount = 1;
|
|
60
|
+
}
|
|
61
|
+
else if (episode.meaningfulInterventionSinceFailure) {
|
|
62
|
+
episode.blindRunCount = 1;
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
episode.blindRunCount += 1;
|
|
66
|
+
}
|
|
67
|
+
episode.attemptCount += 1;
|
|
68
|
+
episode.maxBlindRunCount = Math.max(episode.maxBlindRunCount, episode.blindRunCount);
|
|
69
|
+
episode.meaningfulInterventionSinceFailure = false;
|
|
70
|
+
active.set(event.identity, episode);
|
|
71
|
+
}
|
|
72
|
+
for (const episode of active.values()) {
|
|
73
|
+
completed.push(toEpisodeSummary(episode, false, true));
|
|
74
|
+
}
|
|
75
|
+
return completed;
|
|
76
|
+
}
|
|
77
|
+
export function summarizeBlindRetry(episodes) {
|
|
78
|
+
return episodes
|
|
79
|
+
.filter((episode) => episode.activeEnded && episode.blindRetryFailureCount >= 2)
|
|
80
|
+
.sort((left, right) => right.blindRetryFailureCount - left.blindRetryFailureCount || right.attemptCount - left.attemptCount)[0];
|
|
81
|
+
}
|
|
82
|
+
function markMeaningfulIntervention(active, kind) {
|
|
83
|
+
for (const episode of active.values()) {
|
|
84
|
+
episode.meaningfulInterventionSinceFailure = true;
|
|
85
|
+
episode.interventionEvidence.add(kind);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
function newEpisode(event) {
|
|
89
|
+
return {
|
|
90
|
+
identity: event.identity,
|
|
91
|
+
category: event.category,
|
|
92
|
+
label: event.label,
|
|
93
|
+
identityHash: event.identityHash,
|
|
94
|
+
attemptCount: 0,
|
|
95
|
+
blindRunCount: 0,
|
|
96
|
+
maxBlindRunCount: 0,
|
|
97
|
+
meaningfulInterventionSinceFailure: false,
|
|
98
|
+
interventionEvidence: new Set()
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
function toEpisodeSummary(episode, recovered, activeEnded) {
|
|
102
|
+
return {
|
|
103
|
+
identity: episode.identity,
|
|
104
|
+
category: episode.category,
|
|
105
|
+
label: episode.label,
|
|
106
|
+
identityHash: episode.identityHash,
|
|
107
|
+
attemptCount: episode.attemptCount,
|
|
108
|
+
recovered,
|
|
109
|
+
activeEnded,
|
|
110
|
+
meaningfulIntervention: episode.interventionEvidence.size > 0 ? [...episode.interventionEvidence].sort() : undefined,
|
|
111
|
+
blindRetryFailureCount: episode.maxBlindRunCount
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
function safeToolResultEvent(meta, outcome) {
|
|
115
|
+
const category = failureRecoveryCategory(meta);
|
|
116
|
+
return {
|
|
117
|
+
outcome,
|
|
118
|
+
identity: failureIdentity(meta, category),
|
|
119
|
+
category,
|
|
120
|
+
label: failureLabel(category),
|
|
121
|
+
toolName: meta.category === "MCP" ? "MCP tool" : meta.name,
|
|
122
|
+
purpose: meta.purpose,
|
|
123
|
+
identityHash: meta.category === "MCP" ? meta.identityHash : undefined,
|
|
124
|
+
isEdit: meta.isEdit,
|
|
125
|
+
isValidation: isValidationCategory(category),
|
|
126
|
+
isReadSearch: meta.isReadSearch
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
function failureRecoveryCategory(meta) {
|
|
130
|
+
if (meta.category === "MCP") {
|
|
131
|
+
return "mcp";
|
|
132
|
+
}
|
|
133
|
+
if (meta.name === "Bash") {
|
|
134
|
+
const category = validationCategoryForPurpose(meta.purpose);
|
|
135
|
+
return category || "tool";
|
|
136
|
+
}
|
|
137
|
+
if (meta.name === "Read") {
|
|
138
|
+
return "read";
|
|
139
|
+
}
|
|
140
|
+
if (meta.name === "Grep") {
|
|
141
|
+
return "grep";
|
|
142
|
+
}
|
|
143
|
+
if (meta.name === "Glob") {
|
|
144
|
+
return "glob";
|
|
145
|
+
}
|
|
146
|
+
if (meta.name === "LS") {
|
|
147
|
+
return "ls";
|
|
148
|
+
}
|
|
149
|
+
if (meta.isEdit) {
|
|
150
|
+
return "edit";
|
|
151
|
+
}
|
|
152
|
+
return "tool";
|
|
153
|
+
}
|
|
154
|
+
function failureIdentity(meta, category) {
|
|
155
|
+
if (category === "mcp") {
|
|
156
|
+
return `mcp:${meta.identityHash || "aggregate"}`;
|
|
157
|
+
}
|
|
158
|
+
if (category === "tests" || category === "lint" || category === "typecheck" || category === "build") {
|
|
159
|
+
return `validation:${category}`;
|
|
160
|
+
}
|
|
161
|
+
return `category:${category}`;
|
|
162
|
+
}
|
|
163
|
+
function failureLabel(category) {
|
|
164
|
+
return categoryFailureSingular(category);
|
|
165
|
+
}
|
|
166
|
+
function isValidationCategory(category) {
|
|
167
|
+
return category === "tests" || category === "lint" || category === "typecheck" || category === "build";
|
|
168
|
+
}
|
|
169
|
+
function validationCategoryForPurpose(purpose) {
|
|
170
|
+
return purpose === "tests" || purpose === "lint" || purpose === "typecheck" || purpose === "build" ? purpose : undefined;
|
|
171
|
+
}
|
|
172
|
+
function metaFromToolName(toolName, input) {
|
|
173
|
+
const identity = classifyToolIdentity(toolName, input, { basenameOnly: true });
|
|
174
|
+
return {
|
|
175
|
+
name: identity.displayName,
|
|
176
|
+
purpose: identity.purpose,
|
|
177
|
+
category: identity.category,
|
|
178
|
+
identityHash: identity.identityHash,
|
|
179
|
+
isEdit: identity.isEdit,
|
|
180
|
+
isReadSearch: identity.isReadSearch
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
function metaFromStoredHookEvent(event) {
|
|
184
|
+
const name = event.category === "MCP" ? "MCP tool" : event.toolName || "tool";
|
|
185
|
+
return {
|
|
186
|
+
name,
|
|
187
|
+
purpose: event.purpose,
|
|
188
|
+
category: event.category,
|
|
189
|
+
identityHash: event.identityHash,
|
|
190
|
+
isEdit: isEditTool(name),
|
|
191
|
+
isReadSearch: isReadSearchTool(name)
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
function resolveMeta(toolResult, toolById) {
|
|
195
|
+
const byId = toolResult.toolUseId ? toolById.get(toolResult.toolUseId) : undefined;
|
|
196
|
+
if (byId) {
|
|
197
|
+
return { ...byId, purpose: byId.name === "Bash" ? toolResult.purpose || byId.purpose : byId.purpose };
|
|
198
|
+
}
|
|
199
|
+
const meta = metaFromToolName(toolResult.toolName, undefined);
|
|
200
|
+
return { ...meta, purpose: meta.name === "Bash" ? toolResult.purpose : meta.purpose };
|
|
201
|
+
}
|
|
202
|
+
function extractToolUses(entry) {
|
|
203
|
+
const result = [];
|
|
204
|
+
for (const part of contentParts(entry)) {
|
|
205
|
+
if (part.type === "tool_use") {
|
|
206
|
+
const name = stringField(part.name);
|
|
207
|
+
if (name) {
|
|
208
|
+
result.push({ id: stringField(part.id), name, input: part.input });
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
const toolUse = asRecord(entry.tool_use) || asRecord(entry.toolUse);
|
|
213
|
+
const directName = stringField(toolUse?.name) || stringField(entry.tool_name) || stringField(entry.toolName);
|
|
214
|
+
if (directName && (entry.type === "tool_use" || toolUse)) {
|
|
215
|
+
result.push({ id: stringField(toolUse?.id) || stringField(entry.tool_use_id), name: directName, input: toolUse?.input });
|
|
216
|
+
}
|
|
217
|
+
return result;
|
|
218
|
+
}
|
|
219
|
+
function extractToolResults(entry) {
|
|
220
|
+
const result = [];
|
|
221
|
+
for (const part of contentParts(entry)) {
|
|
222
|
+
if (part.type === "tool_result") {
|
|
223
|
+
result.push({
|
|
224
|
+
toolUseId: stringField(part.tool_use_id) || stringField(part.toolUseId),
|
|
225
|
+
toolName: stringField(part.name) || stringField(part.tool_name),
|
|
226
|
+
isError: truthyError(part),
|
|
227
|
+
purpose: classifyResultPurpose(part)
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
if (entry.type === "tool_result" || entry.type === "tool_result_delta") {
|
|
232
|
+
result.push({
|
|
233
|
+
toolUseId: stringField(entry.tool_use_id) || stringField(entry.toolUseId),
|
|
234
|
+
toolName: stringField(entry.name) || stringField(entry.tool_name) || stringField(entry.toolName),
|
|
235
|
+
isError: truthyError(entry),
|
|
236
|
+
purpose: classifyResultPurpose(entry)
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
return result;
|
|
240
|
+
}
|
|
241
|
+
function contentParts(entry) {
|
|
242
|
+
const message = asRecord(entry.message);
|
|
243
|
+
const candidates = [entry.content, message?.content];
|
|
244
|
+
const parts = [];
|
|
245
|
+
for (const candidate of candidates) {
|
|
246
|
+
if (Array.isArray(candidate)) {
|
|
247
|
+
parts.push(...candidate.flatMap((part) => (asRecord(part) ? [asRecord(part)] : [])));
|
|
248
|
+
}
|
|
249
|
+
else if (asRecord(candidate)) {
|
|
250
|
+
parts.push(asRecord(candidate));
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return parts;
|
|
254
|
+
}
|
|
255
|
+
function truthyError(value) {
|
|
256
|
+
if (value.is_error === true || value.isError === true || value.error === true) {
|
|
257
|
+
return true;
|
|
258
|
+
}
|
|
259
|
+
const status = stringField(value.status) || stringField(value.result);
|
|
260
|
+
if (status && /error|failed|failure/i.test(status)) {
|
|
261
|
+
return true;
|
|
262
|
+
}
|
|
263
|
+
const exitCode = value.exit_code ?? value.exitCode;
|
|
264
|
+
return typeof exitCode === "number" && exitCode !== 0;
|
|
265
|
+
}
|
|
266
|
+
//# sourceMappingURL=failure-episodes.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"failure-episodes.js","sourceRoot":"","sources":["../src/failure-episodes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAC1D,OAAO,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,UAAU,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAC/G,OAAO,EAAE,uBAAuB,EAAgC,MAAM,qBAAqB,CAAC;AAwC5F,MAAM,UAAU,yCAAyC,CAAC,KAAe;IACvE,OAAO,wBAAwB,CAAC,8CAA8C,CAAC,KAAK,CAAC,CAAC,CAAC;AACzF,CAAC;AAED,MAAM,UAAU,8CAA8C,CAAC,KAAe;IAC5E,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC7C,MAAM,MAAM,GAA0B,EAAE,CAAC;IAEzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QAED,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;QAC/B,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,KAAK,MAAM,OAAO,IAAI,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;YAC7C,IAAI,OAAO,CAAC,EAAE,EAAE,CAAC;gBACf,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,gBAAgB,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;YAC1E,CAAC;QACH,CAAC;QAED,KAAK,MAAM,UAAU,IAAI,kBAAkB,CAAC,KAAK,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,GAAG,WAAW,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;YAC/C,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;QACrF,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,gCAAgC,CAAC,KAAsB;IACrE,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;QACnE,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,MAAM,IAAI,GAAG,uBAAuB,CAAC,KAAK,CAAC,CAAC;IAC5C,OAAO,mBAAmB,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,KAAK,cAAc,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;AAC1F,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,MAA6B;IACpE,MAAM,MAAM,GAAG,IAAI,GAAG,EAAyB,CAAC;IAChD,MAAM,SAAS,GAA4B,EAAE,CAAC;IAE9C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;YAChC,MAAM,mBAAmB,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACvD,IAAI,mBAAmB,EAAE,CAAC;gBACxB,mBAAmB,CAAC,oBAAoB,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;gBACrE,SAAS,CAAC,IAAI,CAAC,gBAAgB,CAAC,mBAAmB,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;gBACnE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAChC,CAAC;YACD,IAAI,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;gBACvC,0BAA0B,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC;YACnF,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC;QAChE,IAAI,OAAO,CAAC,YAAY,KAAK,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,aAAa,GAAG,CAAC,CAAC;QAC5B,CAAC;aAAM,IAAI,OAAO,CAAC,kCAAkC,EAAE,CAAC;YACtD,OAAO,CAAC,aAAa,GAAG,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,aAAa,IAAI,CAAC,CAAC;QAC7B,CAAC;QACD,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC;QAC1B,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,gBAAgB,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;QACrF,OAAO,CAAC,kCAAkC,GAAG,KAAK,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACtC,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QACtC,SAAS,CAAC,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;IACzD,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,QAAiC;IACnE,OAAO,QAAQ;SACZ,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,sBAAsB,IAAI,CAAC,CAAC;SAC/E,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,sBAAsB,GAAG,IAAI,CAAC,sBAAsB,IAAI,KAAK,CAAC,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;AACpI,CAAC;AAED,SAAS,0BAA0B,CAAC,MAAkC,EAAE,IAAsB;IAC5F,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QACtC,OAAO,CAAC,kCAAkC,GAAG,IAAI,CAAC;QAClD,OAAO,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACzC,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CAAC,KAA0B;IAC5C,OAAO;QACL,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,YAAY,EAAE,CAAC;QACf,aAAa,EAAE,CAAC;QAChB,gBAAgB,EAAE,CAAC;QACnB,kCAAkC,EAAE,KAAK;QACzC,oBAAoB,EAAE,IAAI,GAAG,EAAE;KAChC,CAAC;AACJ,CAAC;AAED,SAAS,gBAAgB,CAAC,OAAsB,EAAE,SAAkB,EAAE,WAAoB;IACxF,OAAO;QACL,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,YAAY,EAAE,OAAO,CAAC,YAAY;QAClC,YAAY,EAAE,OAAO,CAAC,YAAY;QAClC,SAAS;QACT,WAAW;QACX,sBAAsB,EACpB,OAAO,CAAC,oBAAoB,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS;QAC9F,sBAAsB,EAAE,OAAO,CAAC,gBAAgB;KACjD,CAAC;AACJ,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAc,EAAE,OAA0B;IACrE,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAC;IAC/C,OAAO;QACL,OAAO;QACP,QAAQ,EAAE,eAAe,CAAC,IAAI,EAAE,QAAQ,CAAC;QACzC,QAAQ;QACR,KAAK,EAAE,YAAY,CAAC,QAAQ,CAAC;QAC7B,QAAQ,EAAE,IAAI,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI;QAC1D,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,YAAY,EAAE,IAAI,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS;QACrE,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,YAAY,EAAE,oBAAoB,CAAC,QAAQ,CAAC;QAC5C,YAAY,EAAE,IAAI,CAAC,YAAY;KAChC,CAAC;AACJ,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAc;IAC7C,IAAI,IAAI,CAAC,QAAQ,KAAK,KAAK,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5D,OAAO,QAAQ,IAAI,MAAM,CAAC;IAC5B,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACzB,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACzB,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QACzB,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,eAAe,CAAC,IAAc,EAAE,QAAiC;IACxE,IAAI,QAAQ,KAAK,KAAK,EAAE,CAAC;QACvB,OAAO,OAAO,IAAI,CAAC,YAAY,IAAI,WAAW,EAAE,CAAC;IACnD,CAAC;IACD,IAAI,QAAQ,KAAK,OAAO,IAAI,QAAQ,KAAK,MAAM,IAAI,QAAQ,KAAK,WAAW,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;QACpG,OAAO,cAAc,QAAQ,EAAE,CAAC;IAClC,CAAC;IACD,OAAO,YAAY,QAAQ,EAAE,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAiC;IACrD,OAAO,uBAAuB,CAAC,QAAQ,CAAC,CAAC;AAC3C,CAAC;AAED,SAAS,oBAAoB,CAAC,QAAiC;IAC7D,OAAO,QAAQ,KAAK,OAAO,IAAI,QAAQ,KAAK,MAAM,IAAI,QAAQ,KAAK,WAAW,IAAI,QAAQ,KAAK,OAAO,CAAC;AACzG,CAAC;AAED,SAAS,4BAA4B,CAAC,OAA2B;IAC/D,OAAO,OAAO,KAAK,OAAO,IAAI,OAAO,KAAK,MAAM,IAAI,OAAO,KAAK,WAAW,IAAI,OAAO,KAAK,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;AAC3H,CAAC;AAED,SAAS,gBAAgB,CAAC,QAA4B,EAAE,KAAc;IACpE,MAAM,QAAQ,GAAG,oBAAoB,CAAC,QAAQ,EAAE,KAAK,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/E,OAAO;QACL,IAAI,EAAE,QAAQ,CAAC,WAAW;QAC1B,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,YAAY,EAAE,QAAQ,CAAC,YAAY;QACnC,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,YAAY,EAAE,QAAQ,CAAC,YAAY;KACpC,CAAC;AACJ,CAAC;AAED,SAAS,uBAAuB,CAAC,KAAsB;IACrD,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,KAAK,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,IAAI,MAAM,CAAC;IAC9E,OAAO;QACL,IAAI;QACJ,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,YAAY,EAAE,KAAK,CAAC,YAAY;QAChC,MAAM,EAAE,UAAU,CAAC,IAAI,CAAC;QACxB,YAAY,EAAE,gBAAgB,CAAC,IAAI,CAAC;KACrC,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAClB,UAAuE,EACvE,QAA+B;IAE/B,MAAM,IAAI,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IACnF,IAAI,IAAI,EAAE,CAAC;QACT,OAAO,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;IACxG,CAAC;IACD,MAAM,IAAI,GAAG,gBAAgB,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC9D,OAAO,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;AACxF,CAAC;AAED,SAAS,eAAe,CAAC,KAA8B;IACrD,MAAM,MAAM,GAA0D,EAAE,CAAC;IACzE,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;QACvC,IAAI,IAAI,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACpC,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;YACrE,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACpE,MAAM,UAAU,GAAG,WAAW,CAAC,OAAO,EAAE,IAAI,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC7G,IAAI,UAAU,IAAI,CAAC,KAAK,CAAC,IAAI,KAAK,UAAU,IAAI,OAAO,CAAC,EAAE,CAAC;QACzD,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;IAC3H,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAA8B;IAMxD,MAAM,MAAM,GAAyF,EAAE,CAAC;IACxG,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;QACvC,IAAI,IAAI,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YAChC,MAAM,CAAC,IAAI,CAAC;gBACV,SAAS,EAAE,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC;gBACvE,QAAQ,EAAE,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC;gBAC/D,OAAO,EAAE,WAAW,CAAC,IAAI,CAAC;gBAC1B,OAAO,EAAE,qBAAqB,CAAC,IAAI,CAAC;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,IAAI,KAAK,CAAC,IAAI,KAAK,mBAAmB,EAAE,CAAC;QACvE,MAAM,CAAC,IAAI,CAAC;YACV,SAAS,EAAE,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,SAAS,CAAC;YACzE,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,QAAQ,CAAC;YAChG,OAAO,EAAE,WAAW,CAAC,KAAK,CAAC;YAC3B,OAAO,EAAE,qBAAqB,CAAC,KAAK,CAAC;SACtC,CAAC,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,YAAY,CAAC,KAA8B;IAClD,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IACrD,MAAM,KAAK,GAA8B,EAAE,CAAC;IAC5C,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;YAC7B,KAAK,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACxF,CAAC;aAAM,IAAI,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;YAC/B,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAE,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,WAAW,CAAC,KAA8B;IACjD,IAAI,KAAK,CAAC,QAAQ,KAAK,IAAI,IAAI,KAAK,CAAC,OAAO,KAAK,IAAI,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;QAC9E,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IACtE,IAAI,MAAM,IAAI,uBAAuB,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;QACnD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,QAAQ,GAAG,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,QAAQ,CAAC;IACnD,OAAO,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,KAAK,CAAC,CAAC;AACxD,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { type FailureRecoveryCategory } from "./recovery-stats.js";
|
|
2
|
+
export interface HistoricalReplayOptions {
|
|
3
|
+
homeDir?: string;
|
|
4
|
+
claudeProjectsDir?: string;
|
|
5
|
+
maxFiles?: number;
|
|
6
|
+
maxBytesPerTranscript?: number;
|
|
7
|
+
holdoutRatio?: number;
|
|
8
|
+
}
|
|
9
|
+
export interface HistoricalReplayMetrics {
|
|
10
|
+
holdoutSessions: number;
|
|
11
|
+
evaluatedFailureEpisodes: number;
|
|
12
|
+
stopPrecisionOnUnrecoveredEpisodes: number | undefined;
|
|
13
|
+
falseStopCountOnRecoveredEpisodes: number;
|
|
14
|
+
missedUnrecoveredLoopCount: number;
|
|
15
|
+
blindRetryPrecision: number | undefined;
|
|
16
|
+
averageAttemptsBeforeWarning: number;
|
|
17
|
+
lowSampleSuppressions: number;
|
|
18
|
+
categoryCoverage: Partial<Record<FailureRecoveryCategory, number>>;
|
|
19
|
+
}
|
|
20
|
+
export declare function evaluateHistoricalReplay(options?: HistoricalReplayOptions): Promise<HistoricalReplayMetrics>;
|
|
21
|
+
export declare function formatHistoricalReplayMetrics(metrics: HistoricalReplayMetrics): string;
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import { readdir, stat } from "node:fs/promises";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { extractFailureEpisodesFromTranscriptLines, extractSafeToolResultEventsFromTranscriptLines } from "./failure-episodes.js";
|
|
5
|
+
import { addFailureEpisodeToRecoveryCounters, blindRetryAggregatesFromCounters, emptyRecoveryBuildCounters, FAILURE_RECOVERY_CATEGORIES, recoveryAggregatesFromCounters, recoveryInsight } from "./recovery-stats.js";
|
|
6
|
+
import { readTranscriptTail } from "./transcript-reader.js";
|
|
7
|
+
const DEFAULT_MAX_FILES = 1500;
|
|
8
|
+
const DEFAULT_MAX_BYTES_PER_TRANSCRIPT = 1024 * 1024;
|
|
9
|
+
const DEFAULT_HOLDOUT_RATIO = 0.2;
|
|
10
|
+
export async function evaluateHistoricalReplay(options = {}) {
|
|
11
|
+
const homeDir = options.homeDir ?? homedir();
|
|
12
|
+
const claudeProjectsDir = options.claudeProjectsDir ?? join(homeDir, ".claude", "projects");
|
|
13
|
+
const maxFiles = options.maxFiles ?? DEFAULT_MAX_FILES;
|
|
14
|
+
const maxBytesPerTranscript = options.maxBytesPerTranscript ?? DEFAULT_MAX_BYTES_PER_TRANSCRIPT;
|
|
15
|
+
const files = await listTranscriptFiles(claudeProjectsDir, maxFiles);
|
|
16
|
+
const holdoutCount = holdoutSessionCount(files.length, options.holdoutRatio ?? DEFAULT_HOLDOUT_RATIO);
|
|
17
|
+
const holdoutFiles = files.slice(0, holdoutCount);
|
|
18
|
+
const trainingFiles = files.slice(holdoutCount);
|
|
19
|
+
const trainingEpisodes = await readEpisodes(trainingFiles, maxBytesPerTranscript);
|
|
20
|
+
const holdoutEventsBySession = await readEventsBySession(holdoutFiles, maxBytesPerTranscript);
|
|
21
|
+
const baseline = baselineFromEpisodes(trainingEpisodes);
|
|
22
|
+
return evaluateHoldout(holdoutEventsBySession, baseline);
|
|
23
|
+
}
|
|
24
|
+
export function formatHistoricalReplayMetrics(metrics) {
|
|
25
|
+
return [
|
|
26
|
+
`holdout sessions ${metrics.holdoutSessions}`,
|
|
27
|
+
`evaluated failure episodes ${metrics.evaluatedFailureEpisodes}`,
|
|
28
|
+
`Stop precision on unrecovered episodes ${formatMetricRate(metrics.stopPrecisionOnUnrecoveredEpisodes)}`,
|
|
29
|
+
`false Stop count on recovered episodes ${metrics.falseStopCountOnRecoveredEpisodes}`,
|
|
30
|
+
`missed unrecovered loop count ${metrics.missedUnrecoveredLoopCount}`,
|
|
31
|
+
`blind retry precision ${formatMetricRate(metrics.blindRetryPrecision)}`,
|
|
32
|
+
`average attempts before warning ${metrics.averageAttemptsBeforeWarning.toFixed(2)}`,
|
|
33
|
+
`low-sample suppressions ${metrics.lowSampleSuppressions}`,
|
|
34
|
+
`category coverage ${formatCategoryCoverage(metrics.categoryCoverage)}`
|
|
35
|
+
].join("; ");
|
|
36
|
+
}
|
|
37
|
+
function evaluateHoldout(holdoutEventsBySession, baseline) {
|
|
38
|
+
let evaluatedFailureEpisodes = 0;
|
|
39
|
+
let stopTruePositive = 0;
|
|
40
|
+
let stopTotal = 0;
|
|
41
|
+
let falseStopCountOnRecoveredEpisodes = 0;
|
|
42
|
+
let missedUnrecoveredLoopCount = 0;
|
|
43
|
+
let blindRetryTruePositive = 0;
|
|
44
|
+
let blindRetryTotal = 0;
|
|
45
|
+
let lowSampleSuppressions = 0;
|
|
46
|
+
const attemptsBeforeWarning = [];
|
|
47
|
+
const categoryCoverage = {};
|
|
48
|
+
for (const events of holdoutEventsBySession) {
|
|
49
|
+
const replay = replaySession(events, baseline);
|
|
50
|
+
lowSampleSuppressions += replay.lowSampleSuppressions;
|
|
51
|
+
for (const outcome of replay.outcomes) {
|
|
52
|
+
evaluatedFailureEpisodes += 1;
|
|
53
|
+
categoryCoverage[outcome.category] = (categoryCoverage[outcome.category] || 0) + 1;
|
|
54
|
+
if (outcome.warningAttempt !== undefined) {
|
|
55
|
+
attemptsBeforeWarning.push(outcome.warningAttempt);
|
|
56
|
+
}
|
|
57
|
+
if (outcome.stopIssued) {
|
|
58
|
+
stopTotal += 1;
|
|
59
|
+
if (outcome.recovered) {
|
|
60
|
+
falseStopCountOnRecoveredEpisodes += 1;
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
stopTruePositive += 1;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
const unrecoveredLoop = !outcome.recovered && outcome.attemptCount >= 3;
|
|
67
|
+
if (unrecoveredLoop && !outcome.stopIssued) {
|
|
68
|
+
missedUnrecoveredLoopCount += 1;
|
|
69
|
+
}
|
|
70
|
+
if (outcome.blindRetryWarningIssued) {
|
|
71
|
+
blindRetryTotal += 1;
|
|
72
|
+
if (!outcome.recovered) {
|
|
73
|
+
blindRetryTruePositive += 1;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
holdoutSessions: holdoutEventsBySession.length,
|
|
80
|
+
evaluatedFailureEpisodes,
|
|
81
|
+
stopPrecisionOnUnrecoveredEpisodes: stopTotal > 0 ? roundRate(stopTruePositive / stopTotal) : undefined,
|
|
82
|
+
falseStopCountOnRecoveredEpisodes,
|
|
83
|
+
missedUnrecoveredLoopCount,
|
|
84
|
+
blindRetryPrecision: blindRetryTotal > 0 ? roundRate(blindRetryTruePositive / blindRetryTotal) : undefined,
|
|
85
|
+
averageAttemptsBeforeWarning: attemptsBeforeWarning.length > 0
|
|
86
|
+
? Number((attemptsBeforeWarning.reduce((total, value) => total + value, 0) / attemptsBeforeWarning.length).toFixed(2))
|
|
87
|
+
: 0,
|
|
88
|
+
lowSampleSuppressions,
|
|
89
|
+
categoryCoverage
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
function replaySession(events, baseline) {
|
|
93
|
+
const active = new Map();
|
|
94
|
+
const outcomes = [];
|
|
95
|
+
let lowSampleSuppressions = 0;
|
|
96
|
+
for (const event of events) {
|
|
97
|
+
if (event.outcome === "success") {
|
|
98
|
+
const sameIdentityEpisode = active.get(event.identity);
|
|
99
|
+
if (sameIdentityEpisode) {
|
|
100
|
+
outcomes.push(toReplayOutcome(sameIdentityEpisode, true));
|
|
101
|
+
active.delete(event.identity);
|
|
102
|
+
}
|
|
103
|
+
if (event.isEdit || event.isValidation) {
|
|
104
|
+
for (const episode of active.values()) {
|
|
105
|
+
episode.meaningfulInterventionSinceFailure = true;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
const episode = active.get(event.identity) || {
|
|
111
|
+
identity: event.identity,
|
|
112
|
+
category: event.category,
|
|
113
|
+
attemptCount: 0,
|
|
114
|
+
blindRunCount: 0,
|
|
115
|
+
maxBlindRunCount: 0,
|
|
116
|
+
meaningfulInterventionSinceFailure: false,
|
|
117
|
+
stopIssued: false,
|
|
118
|
+
blindRetryWarningIssued: false
|
|
119
|
+
};
|
|
120
|
+
if (episode.attemptCount === 0 || episode.meaningfulInterventionSinceFailure) {
|
|
121
|
+
episode.blindRunCount = 1;
|
|
122
|
+
}
|
|
123
|
+
else {
|
|
124
|
+
episode.blindRunCount += 1;
|
|
125
|
+
}
|
|
126
|
+
episode.attemptCount += 1;
|
|
127
|
+
episode.maxBlindRunCount = Math.max(episode.maxBlindRunCount, episode.blindRunCount);
|
|
128
|
+
episode.meaningfulInterventionSinceFailure = false;
|
|
129
|
+
const currentState = replayDecisionState(episode);
|
|
130
|
+
if (currentState !== "Healthy" && episode.warningAttempt === undefined) {
|
|
131
|
+
episode.warningAttempt = episode.attemptCount;
|
|
132
|
+
}
|
|
133
|
+
if (currentState !== "Healthy" && !recoveryInsight(baseline, episode.category, episode.attemptCount)) {
|
|
134
|
+
lowSampleSuppressions += 1;
|
|
135
|
+
}
|
|
136
|
+
if (episode.maxBlindRunCount >= 2) {
|
|
137
|
+
episode.blindRetryWarningIssued = true;
|
|
138
|
+
}
|
|
139
|
+
if (currentState === "Stop") {
|
|
140
|
+
episode.stopIssued = true;
|
|
141
|
+
}
|
|
142
|
+
active.set(event.identity, episode);
|
|
143
|
+
}
|
|
144
|
+
for (const episode of active.values()) {
|
|
145
|
+
outcomes.push(toReplayOutcome(episode, false));
|
|
146
|
+
}
|
|
147
|
+
return { outcomes, lowSampleSuppressions };
|
|
148
|
+
}
|
|
149
|
+
function replayDecisionState(episode) {
|
|
150
|
+
if (episode.maxBlindRunCount >= 3 || episode.attemptCount >= 3) {
|
|
151
|
+
return "Stop";
|
|
152
|
+
}
|
|
153
|
+
if (episode.maxBlindRunCount >= 2 || episode.attemptCount >= 2) {
|
|
154
|
+
return "Careful";
|
|
155
|
+
}
|
|
156
|
+
return "Healthy";
|
|
157
|
+
}
|
|
158
|
+
function toReplayOutcome(episode, recovered) {
|
|
159
|
+
return {
|
|
160
|
+
category: episode.category,
|
|
161
|
+
attemptCount: episode.attemptCount,
|
|
162
|
+
recovered,
|
|
163
|
+
warningAttempt: episode.warningAttempt,
|
|
164
|
+
stopIssued: episode.stopIssued,
|
|
165
|
+
blindRetryWarningIssued: episode.blindRetryWarningIssued
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
function baselineFromEpisodes(episodes) {
|
|
169
|
+
const counters = emptyRecoveryBuildCounters();
|
|
170
|
+
for (const episode of episodes) {
|
|
171
|
+
addFailureEpisodeToRecoveryCounters(counters, episode);
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
failureRecovery: recoveryAggregatesFromCounters(counters),
|
|
175
|
+
blindRetry: blindRetryAggregatesFromCounters(counters)
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
async function readEpisodes(files, maxBytesPerTranscript) {
|
|
179
|
+
const sessions = await readEpisodesBySession(files, maxBytesPerTranscript);
|
|
180
|
+
return sessions.flat();
|
|
181
|
+
}
|
|
182
|
+
async function readEpisodesBySession(files, maxBytesPerTranscript) {
|
|
183
|
+
const sessions = [];
|
|
184
|
+
for (const file of files) {
|
|
185
|
+
const tail = await readTranscriptTail(file, { maxBytes: maxBytesPerTranscript });
|
|
186
|
+
if (tail.pathReadable) {
|
|
187
|
+
sessions.push(extractFailureEpisodesFromTranscriptLines(tail.lines));
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return sessions;
|
|
191
|
+
}
|
|
192
|
+
async function readEventsBySession(files, maxBytesPerTranscript) {
|
|
193
|
+
const sessions = [];
|
|
194
|
+
for (const file of files) {
|
|
195
|
+
const tail = await readTranscriptTail(file, { maxBytes: maxBytesPerTranscript });
|
|
196
|
+
if (tail.pathReadable) {
|
|
197
|
+
sessions.push(extractSafeToolResultEventsFromTranscriptLines(tail.lines));
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return sessions;
|
|
201
|
+
}
|
|
202
|
+
async function listTranscriptFiles(root, maxFiles) {
|
|
203
|
+
if (maxFiles <= 0) {
|
|
204
|
+
return [];
|
|
205
|
+
}
|
|
206
|
+
const candidates = [];
|
|
207
|
+
const pending = [root];
|
|
208
|
+
while (pending.length > 0) {
|
|
209
|
+
const current = pending.pop();
|
|
210
|
+
let entries;
|
|
211
|
+
try {
|
|
212
|
+
entries = await readdir(current, { withFileTypes: true });
|
|
213
|
+
}
|
|
214
|
+
catch {
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
for (const entry of entries) {
|
|
218
|
+
const child = join(current, entry.name);
|
|
219
|
+
if (entry.isDirectory()) {
|
|
220
|
+
pending.push(child);
|
|
221
|
+
}
|
|
222
|
+
else if (entry.isFile() && entry.name.endsWith(".jsonl")) {
|
|
223
|
+
const mtimeMs = await readableFileMtimeMs(child);
|
|
224
|
+
if (mtimeMs !== undefined) {
|
|
225
|
+
candidates.push({ path: child, mtimeMs });
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return candidates
|
|
231
|
+
.sort((left, right) => right.mtimeMs - left.mtimeMs || left.path.localeCompare(right.path))
|
|
232
|
+
.slice(0, maxFiles)
|
|
233
|
+
.map((candidate) => candidate.path);
|
|
234
|
+
}
|
|
235
|
+
async function readableFileMtimeMs(path) {
|
|
236
|
+
try {
|
|
237
|
+
const fileStat = await stat(path);
|
|
238
|
+
return fileStat.isFile() ? fileStat.mtimeMs : undefined;
|
|
239
|
+
}
|
|
240
|
+
catch {
|
|
241
|
+
return undefined;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
function holdoutSessionCount(total, ratio) {
|
|
245
|
+
if (total <= 1) {
|
|
246
|
+
return total;
|
|
247
|
+
}
|
|
248
|
+
return Math.max(1, Math.min(total - 1, Math.ceil(total * Math.max(0.05, Math.min(0.8, ratio)))));
|
|
249
|
+
}
|
|
250
|
+
function formatMetricRate(value) {
|
|
251
|
+
return value === undefined ? "n/a" : value.toFixed(2);
|
|
252
|
+
}
|
|
253
|
+
function formatCategoryCoverage(coverage) {
|
|
254
|
+
const parts = FAILURE_RECOVERY_CATEGORIES.flatMap((category) => {
|
|
255
|
+
const count = coverage[category] || 0;
|
|
256
|
+
return count > 0 ? [`${category}:${count}`] : [];
|
|
257
|
+
});
|
|
258
|
+
return parts.length > 0 ? parts.join(", ") : "none";
|
|
259
|
+
}
|
|
260
|
+
function roundRate(value) {
|
|
261
|
+
return Number(value.toFixed(4));
|
|
262
|
+
}
|
|
263
|
+
//# sourceMappingURL=historical-replay.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"historical-replay.js","sourceRoot":"","sources":["../src/historical-replay.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EACL,yCAAyC,EACzC,8CAA8C,EAE/C,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACL,mCAAmC,EACnC,gCAAgC,EAChC,0BAA0B,EAC1B,2BAA2B,EAC3B,8BAA8B,EAC9B,eAAe,EAEhB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAG5D,MAAM,iBAAiB,GAAG,IAAI,CAAC;AAC/B,MAAM,gCAAgC,GAAG,IAAI,GAAG,IAAI,CAAC;AACrD,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAgDlC,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,UAAmC,EAAE;IAClF,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,OAAO,EAAE,CAAC;IAC7C,MAAM,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;IAC5F,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,iBAAiB,CAAC;IACvD,MAAM,qBAAqB,GAAG,OAAO,CAAC,qBAAqB,IAAI,gCAAgC,CAAC;IAChG,MAAM,KAAK,GAAG,MAAM,mBAAmB,CAAC,iBAAiB,EAAE,QAAQ,CAAC,CAAC;IACrE,MAAM,YAAY,GAAG,mBAAmB,CAAC,KAAK,CAAC,MAAM,EAAE,OAAO,CAAC,YAAY,IAAI,qBAAqB,CAAC,CAAC;IACtG,MAAM,YAAY,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,YAAY,CAAC,CAAC;IAClD,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAChD,MAAM,gBAAgB,GAAG,MAAM,YAAY,CAAC,aAAa,EAAE,qBAAqB,CAAC,CAAC;IAClF,MAAM,sBAAsB,GAAG,MAAM,mBAAmB,CAAC,YAAY,EAAE,qBAAqB,CAAC,CAAC;IAC9F,MAAM,QAAQ,GAAG,oBAAoB,CAAC,gBAAgB,CAAC,CAAC;IACxD,OAAO,eAAe,CAAC,sBAAsB,EAAE,QAAQ,CAAC,CAAC;AAC3D,CAAC;AAED,MAAM,UAAU,6BAA6B,CAAC,OAAgC;IAC5E,OAAO;QACL,oBAAoB,OAAO,CAAC,eAAe,EAAE;QAC7C,8BAA8B,OAAO,CAAC,wBAAwB,EAAE;QAChE,0CAA0C,gBAAgB,CAAC,OAAO,CAAC,kCAAkC,CAAC,EAAE;QACxG,0CAA0C,OAAO,CAAC,iCAAiC,EAAE;QACrF,iCAAiC,OAAO,CAAC,0BAA0B,EAAE;QACrE,yBAAyB,gBAAgB,CAAC,OAAO,CAAC,mBAAmB,CAAC,EAAE;QACxE,mCAAmC,OAAO,CAAC,4BAA4B,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;QACpF,2BAA2B,OAAO,CAAC,qBAAqB,EAAE;QAC1D,qBAAqB,sBAAsB,CAAC,OAAO,CAAC,gBAAgB,CAAC,EAAE;KACxE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,eAAe,CACtB,sBAA+C,EAC/C,QAAkC;IAElC,IAAI,wBAAwB,GAAG,CAAC,CAAC;IACjC,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,iCAAiC,GAAG,CAAC,CAAC;IAC1C,IAAI,0BAA0B,GAAG,CAAC,CAAC;IACnC,IAAI,sBAAsB,GAAG,CAAC,CAAC;IAC/B,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,qBAAqB,GAAG,CAAC,CAAC;IAC9B,MAAM,qBAAqB,GAAa,EAAE,CAAC;IAC3C,MAAM,gBAAgB,GAAqD,EAAE,CAAC;IAE9E,KAAK,MAAM,MAAM,IAAI,sBAAsB,EAAE,CAAC;QAC5C,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC/C,qBAAqB,IAAI,MAAM,CAAC,qBAAqB,CAAC;QACtD,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACtC,wBAAwB,IAAI,CAAC,CAAC;YAC9B,gBAAgB,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACnF,IAAI,OAAO,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;gBACzC,qBAAqB,CAAC,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;YACrD,CAAC;YAED,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,SAAS,IAAI,CAAC,CAAC;gBACf,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;oBACtB,iCAAiC,IAAI,CAAC,CAAC;gBACzC,CAAC;qBAAM,CAAC;oBACN,gBAAgB,IAAI,CAAC,CAAC;gBACxB,CAAC;YACH,CAAC;YAED,MAAM,eAAe,GAAG,CAAC,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC;YACxE,IAAI,eAAe,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;gBAC3C,0BAA0B,IAAI,CAAC,CAAC;YAClC,CAAC;YAED,IAAI,OAAO,CAAC,uBAAuB,EAAE,CAAC;gBACpC,eAAe,IAAI,CAAC,CAAC;gBACrB,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;oBACvB,sBAAsB,IAAI,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO;QACL,eAAe,EAAE,sBAAsB,CAAC,MAAM;QAC9C,wBAAwB;QACxB,kCAAkC,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,gBAAgB,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS;QACvG,iCAAiC;QACjC,0BAA0B;QAC1B,mBAAmB,EAAE,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC,SAAS;QAC1G,4BAA4B,EAC1B,qBAAqB,CAAC,MAAM,GAAG,CAAC;YAC9B,CAAC,CAAC,MAAM,CAAC,CAAC,qBAAqB,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,KAAK,EAAE,CAAC,CAAC,GAAG,qBAAqB,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACtH,CAAC,CAAC,CAAC;QACP,qBAAqB;QACrB,gBAAgB;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CACpB,MAA6B,EAC7B,QAAkC;IAElC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAyB,CAAC;IAChD,MAAM,QAAQ,GAAoB,EAAE,CAAC;IACrC,IAAI,qBAAqB,GAAG,CAAC,CAAC;IAE9B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;YAChC,MAAM,mBAAmB,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YACvD,IAAI,mBAAmB,EAAE,CAAC;gBACxB,QAAQ,CAAC,IAAI,CAAC,eAAe,CAAC,mBAAmB,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC1D,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAChC,CAAC;YACD,IAAI,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;gBACvC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;oBACtC,OAAO,CAAC,kCAAkC,GAAG,IAAI,CAAC;gBACpD,CAAC;YACH,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI;YAC5C,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,YAAY,EAAE,CAAC;YACf,aAAa,EAAE,CAAC;YAChB,gBAAgB,EAAE,CAAC;YACnB,kCAAkC,EAAE,KAAK;YACzC,UAAU,EAAE,KAAK;YACjB,uBAAuB,EAAE,KAAK;SAC/B,CAAC;QACF,IAAI,OAAO,CAAC,YAAY,KAAK,CAAC,IAAI,OAAO,CAAC,kCAAkC,EAAE,CAAC;YAC7E,OAAO,CAAC,aAAa,GAAG,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,aAAa,IAAI,CAAC,CAAC;QAC7B,CAAC;QACD,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC;QAC1B,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,gBAAgB,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;QACrF,OAAO,CAAC,kCAAkC,GAAG,KAAK,CAAC;QAEnD,MAAM,YAAY,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;QAClD,IAAI,YAAY,KAAK,SAAS,IAAI,OAAO,CAAC,cAAc,KAAK,SAAS,EAAE,CAAC;YACvE,OAAO,CAAC,cAAc,GAAG,OAAO,CAAC,YAAY,CAAC;QAChD,CAAC;QACD,IAAI,YAAY,KAAK,SAAS,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;YACrG,qBAAqB,IAAI,CAAC,CAAC;QAC7B,CAAC;QACD,IAAI,OAAO,CAAC,gBAAgB,IAAI,CAAC,EAAE,CAAC;YAClC,OAAO,CAAC,uBAAuB,GAAG,IAAI,CAAC;QACzC,CAAC;QACD,IAAI,YAAY,KAAK,MAAM,EAAE,CAAC;YAC5B,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;QAC5B,CAAC;QACD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACtC,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;QACtC,QAAQ,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,qBAAqB,EAAE,CAAC;AAC7C,CAAC;AAED,SAAS,mBAAmB,CAAC,OAAsB;IACjD,IAAI,OAAO,CAAC,gBAAgB,IAAI,CAAC,IAAI,OAAO,CAAC,YAAY,IAAI,CAAC,EAAE,CAAC;QAC/D,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,OAAO,CAAC,gBAAgB,IAAI,CAAC,IAAI,OAAO,CAAC,YAAY,IAAI,CAAC,EAAE,CAAC;QAC/D,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,eAAe,CAAC,OAAsB,EAAE,SAAkB;IACjE,OAAO;QACL,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,YAAY,EAAE,OAAO,CAAC,YAAY;QAClC,SAAS;QACT,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,UAAU,EAAE,OAAO,CAAC,UAAU;QAC9B,uBAAuB,EAAE,OAAO,CAAC,uBAAuB;KACzD,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAAC,QAAiC;IAC7D,MAAM,QAAQ,GAAG,0BAA0B,EAAE,CAAC;IAC9C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,mCAAmC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACzD,CAAC;IACD,OAAO;QACL,eAAe,EAAE,8BAA8B,CAAC,QAAQ,CAAC;QACzD,UAAU,EAAE,gCAAgC,CAAC,QAAQ,CAAC;KACvD,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,KAAe,EAAE,qBAA6B;IACxE,MAAM,QAAQ,GAAG,MAAM,qBAAqB,CAAC,KAAK,EAAE,qBAAqB,CAAC,CAAC;IAC3E,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,KAAe,EAAE,qBAA6B;IACjF,MAAM,QAAQ,GAA8B,EAAE,CAAC;IAC/C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,qBAAqB,EAAE,CAAC,CAAC;QACjF,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,QAAQ,CAAC,IAAI,CAAC,yCAAyC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,KAAe,EAAE,qBAA6B;IAC/E,MAAM,QAAQ,GAA4B,EAAE,CAAC;IAC7C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,qBAAqB,EAAE,CAAC,CAAC;QACjF,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,QAAQ,CAAC,IAAI,CAAC,8CAA8C,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC5E,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,IAAY,EAAE,QAAgB;IAC/D,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;QAClB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAA0B,EAAE,CAAC;IAC7C,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC;IACvB,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,EAAG,CAAC;QAC/B,IAAI,OAAO,CAAC;QACZ,IAAI,CAAC;YACH,OAAO,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5D,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YACxC,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACtB,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC3D,MAAM,OAAO,GAAG,MAAM,mBAAmB,CAAC,KAAK,CAAC,CAAC;gBACjD,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;oBAC1B,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;gBAC5C,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,UAAU;SACd,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;SAC1F,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC;SAClB,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;AACxC,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,IAAY;IAC7C,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC;IAC1D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,KAAa,EAAE,KAAa;IACvD,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;QACf,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACnG,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAyB;IACjD,OAAO,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,sBAAsB,CAAC,QAA0D;IACxF,MAAM,KAAK,GAAG,2BAA2B,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;QAC7D,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACtC,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IACH,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;AACtD,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,OAAO,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;AAClC,CAAC"}
|
package/dist/hook-payload.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { hashValue } from "./paths.js";
|
|
2
2
|
import { asRecord, numberField, stringField } from "./status-input.js";
|
|
3
|
-
import {
|
|
3
|
+
import { classifyToolIdentity } from "./tool-metadata.js";
|
|
4
4
|
export const SAFE_HOOK_EVENTS = [
|
|
5
5
|
"PostToolUse",
|
|
6
6
|
"PostToolUseFailure",
|
|
@@ -30,21 +30,25 @@ export function parseHookPayload(raw, fallbackEventName) {
|
|
|
30
30
|
sessionKey: hashValue(sessionId)
|
|
31
31
|
};
|
|
32
32
|
if (hookEventName === "PostToolUseFailure") {
|
|
33
|
-
const
|
|
33
|
+
const identity = classifyToolIdentity(stringField(root.tool_name) || stringField(root.toolName), root.tool_input ?? root.toolInput);
|
|
34
34
|
return {
|
|
35
35
|
...base,
|
|
36
36
|
kind: "tool_failure",
|
|
37
|
-
toolName,
|
|
38
|
-
purpose:
|
|
37
|
+
toolName: identity.displayName,
|
|
38
|
+
purpose: identity.purpose,
|
|
39
|
+
category: identity.category,
|
|
40
|
+
identityHash: identity.identityHash
|
|
39
41
|
};
|
|
40
42
|
}
|
|
41
43
|
if (hookEventName === "PostToolUse") {
|
|
42
|
-
const
|
|
44
|
+
const identity = classifyToolIdentity(stringField(root.tool_name) || stringField(root.toolName), root.tool_input ?? root.toolInput);
|
|
43
45
|
return {
|
|
44
46
|
...base,
|
|
45
47
|
kind: "tool_success",
|
|
46
|
-
toolName,
|
|
47
|
-
purpose:
|
|
48
|
+
toolName: identity.displayName,
|
|
49
|
+
purpose: identity.purpose,
|
|
50
|
+
category: identity.category,
|
|
51
|
+
identityHash: identity.identityHash
|
|
48
52
|
};
|
|
49
53
|
}
|
|
50
54
|
if (hookEventName === "PostToolBatch") {
|
package/dist/hook-payload.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hook-payload.js","sourceRoot":"","sources":["../src/hook-payload.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"hook-payload.js","sourceRoot":"","sources":["../src/hook-payload.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACvE,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAG1D,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,aAAa;IACb,oBAAoB;IACpB,eAAe;IACf,YAAY;IACZ,aAAa;IACb,MAAM;IACN,YAAY;CACJ,CAAC;AAEX,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,iBAA0B;IACtE,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC;IAC1C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC9B,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,MAAM,aAAa,GAAG,WAAW,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,iBAAiB,IAAI,SAAS,CAAC;IACrH,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC9E,MAAM,IAAI,GAAG;QACX,SAAS,EAAE,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QAClE,aAAa;QACb,UAAU,EAAE,SAAS,CAAC,SAAS,CAAC;KACjC,CAAC;IAEF,IAAI,aAAa,KAAK,oBAAoB,EAAE,CAAC;QAC3C,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC;QACpI,OAAO;YACL,GAAG,IAAI;YACP,IAAI,EAAE,cAAc;YACpB,QAAQ,EAAE,QAAQ,CAAC,WAAW;YAC9B,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,YAAY,EAAE,QAAQ,CAAC,YAAY;SACpC,CAAC;IACJ,CAAC;IAED,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC;QACpI,OAAO;YACL,GAAG,IAAI;YACP,IAAI,EAAE,cAAc;YACpB,QAAQ,EAAE,QAAQ,CAAC,WAAW;YAC9B,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,YAAY,EAAE,QAAQ,CAAC,YAAY;SACpC,CAAC;IACJ,CAAC;IAED,IAAI,aAAa,KAAK,eAAe,EAAE,CAAC;QACtC,OAAO;YACL,GAAG,IAAI;YACP,IAAI,EAAE,YAAY;YAClB,SAAS,EAAE,eAAe,CAAC,IAAI,CAAC;SACjC,CAAC;IACJ,CAAC;IAED,IAAI,aAAa,KAAK,YAAY,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;QACtE,OAAO;YACL,GAAG,IAAI;YACP,IAAI,EAAE,YAAY;SACnB,CAAC;IACJ,CAAC;IAED,IAAI,aAAa,KAAK,MAAM,IAAI,aAAa,KAAK,aAAa,EAAE,CAAC;QAChE,OAAO;YACL,GAAG,IAAI;YACP,IAAI,EAAE,MAAM;SACb,CAAC;IACJ,CAAC;IAED,IAAI,aAAa,KAAK,YAAY,EAAE,CAAC;QACnC,OAAO;YACL,GAAG,IAAI;YACP,IAAI,EAAE,aAAa;SACpB,CAAC;IACJ,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,eAAe,CAAC,IAA6B;IACpD,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAC9G,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO,KAAK,CAAC,MAAM,CAAC;QACtB,CAAC;QACD,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACxB,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC"}
|
package/dist/hook-summary.d.ts
CHANGED
|
@@ -3,10 +3,9 @@ export declare function mergeHookSummary(transcript: TranscriptSummary, hookData
|
|
|
3
3
|
failedToolResults: number;
|
|
4
4
|
toolCalls: number;
|
|
5
5
|
compactionEvents: number;
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
purpose?: string;
|
|
10
|
-
}>;
|
|
6
|
+
postCompactionActivity: number;
|
|
7
|
+
repeatedFailures: TranscriptSummary["repeatedFailures"];
|
|
8
|
+
blindRetry?: TranscriptSummary["blindRetry"];
|
|
11
9
|
latestTimestamp?: string;
|
|
10
|
+
latestCompactionTimestamp?: string;
|
|
12
11
|
}): TranscriptSummary;
|