claude-code-replay 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +279 -0
- package/dist/main.mjs +3150 -0
- package/package.json +57 -0
package/dist/main.mjs
ADDED
|
@@ -0,0 +1,3150 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/main.ts
|
|
4
|
+
import { realpathSync as realpathSync3 } from "node:fs";
|
|
5
|
+
import { pathToFileURL } from "node:url";
|
|
6
|
+
|
|
7
|
+
// src/cli.ts
|
|
8
|
+
import * as fs7 from "node:fs";
|
|
9
|
+
import * as os3 from "node:os";
|
|
10
|
+
import * as path10 from "node:path";
|
|
11
|
+
import { Command, CommanderError, InvalidArgumentError } from "commander";
|
|
12
|
+
|
|
13
|
+
// src/actions.ts
|
|
14
|
+
import { spawnSync } from "node:child_process";
|
|
15
|
+
import * as fs3 from "node:fs";
|
|
16
|
+
import * as path4 from "node:path";
|
|
17
|
+
|
|
18
|
+
// src/collect.ts
|
|
19
|
+
import * as fs2 from "node:fs";
|
|
20
|
+
import * as path2 from "node:path";
|
|
21
|
+
|
|
22
|
+
// src/fs-walk.ts
|
|
23
|
+
import * as fs from "node:fs";
|
|
24
|
+
import * as path from "node:path";
|
|
25
|
+
function walkFiles(root, opts) {
|
|
26
|
+
const { filter, onError, followSymlinks = false } = opts;
|
|
27
|
+
const out = [];
|
|
28
|
+
const visited = followSymlinks ? /* @__PURE__ */ new Set() : null;
|
|
29
|
+
function walk(dir) {
|
|
30
|
+
let target = dir;
|
|
31
|
+
if (visited) {
|
|
32
|
+
let real;
|
|
33
|
+
let st;
|
|
34
|
+
try {
|
|
35
|
+
real = fs.realpathSync(dir);
|
|
36
|
+
st = fs.statSync(real);
|
|
37
|
+
} catch (e) {
|
|
38
|
+
onError?.(dir, e instanceof Error ? e : new Error(String(e)));
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
const key = `${st.dev}:${st.ino}`;
|
|
42
|
+
if (visited.has(key)) return;
|
|
43
|
+
visited.add(key);
|
|
44
|
+
target = real;
|
|
45
|
+
}
|
|
46
|
+
let entries;
|
|
47
|
+
try {
|
|
48
|
+
entries = fs.readdirSync(target, { withFileTypes: true });
|
|
49
|
+
} catch (e) {
|
|
50
|
+
onError?.(dir, e instanceof Error ? e : new Error(String(e)));
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
for (const e of entries) {
|
|
54
|
+
const full = path.join(target, e.name);
|
|
55
|
+
if (e.isDirectory()) {
|
|
56
|
+
walk(full);
|
|
57
|
+
} else if (filter(e)) {
|
|
58
|
+
out.push(full);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
walk(root);
|
|
63
|
+
out.sort();
|
|
64
|
+
return out;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// src/collect.ts
|
|
68
|
+
function strField(input, key) {
|
|
69
|
+
const v = input[key];
|
|
70
|
+
return typeof v === "string" ? v : "";
|
|
71
|
+
}
|
|
72
|
+
function asToolInput(ev) {
|
|
73
|
+
const input = ev.input;
|
|
74
|
+
switch (ev.toolName) {
|
|
75
|
+
case "Bash":
|
|
76
|
+
return { tool: "Bash", command: strField(input, "command") };
|
|
77
|
+
case "Write":
|
|
78
|
+
return {
|
|
79
|
+
tool: "Write",
|
|
80
|
+
file_path: strField(input, "file_path"),
|
|
81
|
+
content: strField(input, "content")
|
|
82
|
+
};
|
|
83
|
+
case "Edit":
|
|
84
|
+
return {
|
|
85
|
+
tool: "Edit",
|
|
86
|
+
file_path: strField(input, "file_path"),
|
|
87
|
+
// For Edit we preserve the raw form: callers (actions.doEdit) need
|
|
88
|
+
// to distinguish missing/non-string old_string from "" to halt.
|
|
89
|
+
// strField collapses both to ""; the only consumer that cares
|
|
90
|
+
// (actions.doEdit) does its own typeof check on event.input directly.
|
|
91
|
+
old_string: typeof input.old_string === "string" ? input.old_string : "",
|
|
92
|
+
new_string: strField(input, "new_string"),
|
|
93
|
+
replace_all: Boolean(input.replace_all)
|
|
94
|
+
};
|
|
95
|
+
case "Read":
|
|
96
|
+
return {
|
|
97
|
+
tool: "Read",
|
|
98
|
+
file_path: strField(input, "file_path"),
|
|
99
|
+
offset: typeof input.offset === "number" ? input.offset : null,
|
|
100
|
+
limit: typeof input.limit === "number" ? input.limit : null
|
|
101
|
+
};
|
|
102
|
+
default:
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
var CollectFailure = class extends Error {
|
|
107
|
+
constructor(message) {
|
|
108
|
+
super(message);
|
|
109
|
+
this.name = "CollectFailure";
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
var TOOL_PARSE_HALT_RE = /"type"\s*:\s*"(?:tool_use|tool_result)"/;
|
|
113
|
+
function* iterJsonl(p) {
|
|
114
|
+
const CHUNK_SIZE = 64 * 1024;
|
|
115
|
+
const fd = fs2.openSync(p, "r");
|
|
116
|
+
try {
|
|
117
|
+
const buf = Buffer.alloc(CHUNK_SIZE);
|
|
118
|
+
let partial = "";
|
|
119
|
+
let lineNo = 0;
|
|
120
|
+
let bytesRead;
|
|
121
|
+
while ((bytesRead = fs2.readSync(fd, buf, 0, CHUNK_SIZE, null)) > 0) {
|
|
122
|
+
partial += buf.toString("utf8", 0, bytesRead);
|
|
123
|
+
let nl;
|
|
124
|
+
while ((nl = partial.indexOf("\n")) !== -1) {
|
|
125
|
+
const raw = partial.slice(0, nl).trim();
|
|
126
|
+
partial = partial.slice(nl + 1);
|
|
127
|
+
const i = lineNo++;
|
|
128
|
+
if (!raw) continue;
|
|
129
|
+
const parsed = parseOrThrow(raw, p, i);
|
|
130
|
+
if (parsed !== void 0) yield [i, parsed];
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
const tail = partial.trim();
|
|
134
|
+
if (tail) {
|
|
135
|
+
const parsed = parseOrThrow(tail, p, lineNo);
|
|
136
|
+
if (parsed !== void 0) yield [lineNo, parsed];
|
|
137
|
+
}
|
|
138
|
+
} finally {
|
|
139
|
+
fs2.closeSync(fd);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
function parseOrThrow(raw, p, lineNo) {
|
|
143
|
+
try {
|
|
144
|
+
return JSON.parse(raw);
|
|
145
|
+
} catch (e) {
|
|
146
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
147
|
+
if (TOOL_PARSE_HALT_RE.test(raw)) {
|
|
148
|
+
throw new CollectFailure(
|
|
149
|
+
`unparseable tool_use/tool_result line at ${path2.basename(p)}:${lineNo}: ${msg} (silently skipping would drop a Write/Edit/Read event from replay)`
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
process.stderr.write(`warn: skip ${path2.basename(p)}:${lineNo}: ${msg}
|
|
153
|
+
`);
|
|
154
|
+
return void 0;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
function flattenText(content) {
|
|
158
|
+
if (typeof content === "string") return content;
|
|
159
|
+
if (Array.isArray(content)) {
|
|
160
|
+
return content.filter((p) => p != null && typeof p === "object" && "text" in p).map((p) => typeof p.text === "string" ? p.text : "").join("\n");
|
|
161
|
+
}
|
|
162
|
+
return "";
|
|
163
|
+
}
|
|
164
|
+
function collect(logsDirs, cutoffTs) {
|
|
165
|
+
const dirs = typeof logsDirs === "string" ? [logsDirs] : logsDirs;
|
|
166
|
+
const rawEvents = [];
|
|
167
|
+
const jsonls = [];
|
|
168
|
+
for (const dir of dirs) {
|
|
169
|
+
for (const j of walkFiles(dir, {
|
|
170
|
+
filter: (e) => e.isFile() && e.name.endsWith(".jsonl")
|
|
171
|
+
})) {
|
|
172
|
+
jsonls.push(j);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
for (const jsonl of jsonls) {
|
|
176
|
+
const uses = [];
|
|
177
|
+
const results = /* @__PURE__ */ new Map();
|
|
178
|
+
for (const [lineNo, obj] of iterJsonl(jsonl)) {
|
|
179
|
+
const otype = obj.type;
|
|
180
|
+
if (otype !== "assistant" && otype !== "user") continue;
|
|
181
|
+
const message = obj.message ?? {};
|
|
182
|
+
const content = message.content;
|
|
183
|
+
if (!Array.isArray(content)) continue;
|
|
184
|
+
for (const item of content) {
|
|
185
|
+
if (!item || typeof item !== "object") continue;
|
|
186
|
+
const it = item;
|
|
187
|
+
const itype = it.type;
|
|
188
|
+
if (itype === "tool_use") {
|
|
189
|
+
uses.push({
|
|
190
|
+
timestamp: obj.timestamp ?? "",
|
|
191
|
+
sessionId: obj.sessionId ?? null,
|
|
192
|
+
sessionFile: path2.basename(jsonl),
|
|
193
|
+
lineInSession: lineNo,
|
|
194
|
+
isSidechain: Boolean(obj.isSidechain),
|
|
195
|
+
cwd: obj.cwd ?? null,
|
|
196
|
+
toolUseId: it.id ?? "",
|
|
197
|
+
toolName: it.name ?? "",
|
|
198
|
+
input: it.input ?? {}
|
|
199
|
+
});
|
|
200
|
+
} else if (itype === "tool_result") {
|
|
201
|
+
const tuId = it.tool_use_id;
|
|
202
|
+
if (typeof tuId !== "string" || !tuId) continue;
|
|
203
|
+
results.set(tuId, {
|
|
204
|
+
isError: Boolean(it.is_error),
|
|
205
|
+
text: flattenText(it.content)
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
for (const u of uses) {
|
|
211
|
+
const r = results.get(u.toolUseId);
|
|
212
|
+
const resultText = r ? r.text : null;
|
|
213
|
+
const re = {
|
|
214
|
+
...u,
|
|
215
|
+
isError: Boolean(r?.isError),
|
|
216
|
+
resultText
|
|
217
|
+
};
|
|
218
|
+
if (cutoffTs != null) {
|
|
219
|
+
if (!re.timestamp) {
|
|
220
|
+
process.stderr.write(
|
|
221
|
+
`warn: cutoff active; dropping ${u.sessionFile}:${u.lineInSession} (${u.toolName}) \u2014 event has no timestamp
|
|
222
|
+
`
|
|
223
|
+
);
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
if (re.timestamp >= cutoffTs) continue;
|
|
227
|
+
}
|
|
228
|
+
rawEvents.push(re);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
rawEvents.sort((a, b) => {
|
|
232
|
+
if (a.timestamp < b.timestamp) return -1;
|
|
233
|
+
if (a.timestamp > b.timestamp) return 1;
|
|
234
|
+
if (a.sessionFile < b.sessionFile) return -1;
|
|
235
|
+
if (a.sessionFile > b.sessionFile) return 1;
|
|
236
|
+
return a.lineInSession - b.lineInSession;
|
|
237
|
+
});
|
|
238
|
+
return rawEvents.map((e, i) => ({ index: i, ...e }));
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// src/paths.ts
|
|
242
|
+
import * as path3 from "node:path";
|
|
243
|
+
var OutsideSourceRoot = class extends Error {
|
|
244
|
+
constructor(message) {
|
|
245
|
+
super(message);
|
|
246
|
+
this.name = "OutsideSourceRoot";
|
|
247
|
+
}
|
|
248
|
+
};
|
|
249
|
+
function relativeIfInside(p, root) {
|
|
250
|
+
const rel = path3.relative(root, p);
|
|
251
|
+
if (rel === "") return "";
|
|
252
|
+
if (rel.startsWith("..") || path3.isAbsolute(rel)) return null;
|
|
253
|
+
return rel;
|
|
254
|
+
}
|
|
255
|
+
function sortByLengthDesc(roots) {
|
|
256
|
+
return [...roots].sort((a, b) => b.length - a.length);
|
|
257
|
+
}
|
|
258
|
+
function rewritePath(p, sourceRoots, target) {
|
|
259
|
+
if (!path3.isAbsolute(p)) {
|
|
260
|
+
return p;
|
|
261
|
+
}
|
|
262
|
+
for (const root of sortByLengthDesc(sourceRoots)) {
|
|
263
|
+
const rel = relativeIfInside(p, root);
|
|
264
|
+
if (rel === null) continue;
|
|
265
|
+
return rel === "" ? target : path3.join(target, rel);
|
|
266
|
+
}
|
|
267
|
+
throw new OutsideSourceRoot(
|
|
268
|
+
`${p} is not under any of: ${sourceRoots.join(", ")}`
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
function rewriteCommand(cmd, sourceRoots, target) {
|
|
272
|
+
let out = cmd;
|
|
273
|
+
for (const root of sortByLengthDesc(sourceRoots)) {
|
|
274
|
+
const re = new RegExp(
|
|
275
|
+
escapeRegExp(root) + "(?=/|$|[^A-Za-z0-9._/-])",
|
|
276
|
+
"g"
|
|
277
|
+
);
|
|
278
|
+
out = out.replace(re, target);
|
|
279
|
+
}
|
|
280
|
+
return out;
|
|
281
|
+
}
|
|
282
|
+
function escapeRegExp(s) {
|
|
283
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
284
|
+
}
|
|
285
|
+
function pathInsideAny(p, sourceRoots) {
|
|
286
|
+
for (const root of sourceRoots) {
|
|
287
|
+
if (relativeIfInside(p, root) !== null) return true;
|
|
288
|
+
}
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
function pathIsInsideOrEqual(inner, outer) {
|
|
292
|
+
const rel = path3.relative(outer, inner);
|
|
293
|
+
if (rel === "") return true;
|
|
294
|
+
return !rel.startsWith("..") && !path3.isAbsolute(rel);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// src/actions.ts
|
|
298
|
+
var REPLAY_SHELL = process.env.SHELL || "/bin/bash";
|
|
299
|
+
var SAFE_ENV_KEYS = ["PATH", "LANG", "LC_ALL", "TERM", "TMPDIR", "TZ"];
|
|
300
|
+
var CMD_SPLITTER = /(\s*(?:&&|\|\||;|\n|\|)\s*)/;
|
|
301
|
+
var HEX_TOKEN_RE = /(?<=\s|^)[0-9a-f]{7,40}(?=\s|$)/g;
|
|
302
|
+
var GIT_SHA_CONSUMING_RE = /^\s*git(?:\s+-[A-Za-z]\S*)*\s+(?:cherry-pick|revert|reset|rev-parse|show|log|branch|tag|merge|rebase|checkout|diff|range-diff|describe)\b/;
|
|
303
|
+
function toLocalGitDate(ts) {
|
|
304
|
+
const d = new Date(ts);
|
|
305
|
+
if (Number.isNaN(d.getTime())) return ts;
|
|
306
|
+
const pad = (n) => String(n).padStart(2, "0");
|
|
307
|
+
const offMin = -d.getTimezoneOffset();
|
|
308
|
+
const sign = offMin >= 0 ? "+" : "-";
|
|
309
|
+
const abs = Math.abs(offMin);
|
|
310
|
+
const offStr = `${sign}${pad(Math.floor(abs / 60))}${pad(abs % 60)}`;
|
|
311
|
+
return `${d.getFullYear()}-${pad(d.getMonth() + 1)}-${pad(d.getDate())} ${pad(d.getHours())}:${pad(d.getMinutes())}:${pad(d.getSeconds())} ` + offStr;
|
|
312
|
+
}
|
|
313
|
+
var HOST_GIT_IDENTITY;
|
|
314
|
+
function readHostGitIdentity() {
|
|
315
|
+
if (HOST_GIT_IDENTITY !== void 0) return HOST_GIT_IDENTITY;
|
|
316
|
+
const read = (key) => {
|
|
317
|
+
const r = spawnSync("git", ["config", "--get", key], { encoding: "utf8" });
|
|
318
|
+
return r.status === 0 ? r.stdout.trim() : "";
|
|
319
|
+
};
|
|
320
|
+
const name = read("user.name");
|
|
321
|
+
const email = read("user.email");
|
|
322
|
+
HOST_GIT_IDENTITY = name && email ? { name, email } : null;
|
|
323
|
+
return HOST_GIT_IDENTITY;
|
|
324
|
+
}
|
|
325
|
+
var ActionFailure = class extends Error {
|
|
326
|
+
constructor(message) {
|
|
327
|
+
super(message);
|
|
328
|
+
this.name = "ActionFailure";
|
|
329
|
+
}
|
|
330
|
+
};
|
|
331
|
+
var ExecuteFailure = class extends Error {
|
|
332
|
+
constructor(message) {
|
|
333
|
+
super(message);
|
|
334
|
+
this.name = "ExecuteFailure";
|
|
335
|
+
}
|
|
336
|
+
};
|
|
337
|
+
function currentTargetHead(target) {
|
|
338
|
+
try {
|
|
339
|
+
const result = spawnSync(
|
|
340
|
+
"git",
|
|
341
|
+
["-C", target, "rev-parse", "HEAD"],
|
|
342
|
+
{ encoding: "utf8", timeout: 5e3 }
|
|
343
|
+
);
|
|
344
|
+
if (result.error) return { kind: "error", err: result.error };
|
|
345
|
+
if (result.signal) {
|
|
346
|
+
return {
|
|
347
|
+
kind: "error",
|
|
348
|
+
err: new Error(`git rev-parse killed by signal ${result.signal}`)
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
if (result.status !== 0) {
|
|
352
|
+
const stderr = (result.stderr || "").trim();
|
|
353
|
+
if (/unknown revision|ambiguous argument 'HEAD'|does not have any commits/i.test(
|
|
354
|
+
stderr
|
|
355
|
+
)) {
|
|
356
|
+
return { kind: "empty" };
|
|
357
|
+
}
|
|
358
|
+
return {
|
|
359
|
+
kind: "error",
|
|
360
|
+
err: new Error(
|
|
361
|
+
`git rev-parse HEAD exited ${result.status}${stderr ? `: ${stderr}` : ""}`
|
|
362
|
+
)
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
const head = result.stdout.trim();
|
|
366
|
+
return head ? { kind: "sha", sha: head } : { kind: "empty" };
|
|
367
|
+
} catch (e) {
|
|
368
|
+
return {
|
|
369
|
+
kind: "error",
|
|
370
|
+
err: e instanceof Error ? e : new Error(String(e))
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
function cmdHasShaToken(cmd) {
|
|
375
|
+
const parts = cmd.split(CMD_SPLITTER);
|
|
376
|
+
for (let i = 0; i < parts.length; i += 2) {
|
|
377
|
+
const part = parts[i];
|
|
378
|
+
if (GIT_SHA_CONSUMING_RE.test(part) && HEX_TOKEN_RE.test(part)) return true;
|
|
379
|
+
GIT_SHA_CONSUMING_RE.lastIndex = 0;
|
|
380
|
+
HEX_TOKEN_RE.lastIndex = 0;
|
|
381
|
+
}
|
|
382
|
+
return false;
|
|
383
|
+
}
|
|
384
|
+
function rewriteGitHashes(cmd, target, emit, context) {
|
|
385
|
+
const head = currentTargetHead(target);
|
|
386
|
+
switch (head.kind) {
|
|
387
|
+
case "empty":
|
|
388
|
+
return cmd;
|
|
389
|
+
case "error":
|
|
390
|
+
if (emit && cmdHasShaToken(cmd)) {
|
|
391
|
+
const idx = context?.index;
|
|
392
|
+
const where = typeof idx === "number" ? ` at event index ${idx}` : "";
|
|
393
|
+
emit(
|
|
394
|
+
`warn: rewriteGitHashes skipped${where} (${head.err.message}); original SHAs preserved verbatim in command: ${cmd}`
|
|
395
|
+
);
|
|
396
|
+
}
|
|
397
|
+
return cmd;
|
|
398
|
+
case "sha": {
|
|
399
|
+
const sha = head.sha;
|
|
400
|
+
const parts = cmd.split(CMD_SPLITTER);
|
|
401
|
+
const out = [];
|
|
402
|
+
for (let i = 0; i < parts.length; i++) {
|
|
403
|
+
if (i % 2 === 1) {
|
|
404
|
+
out.push(parts[i]);
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
let part = parts[i];
|
|
408
|
+
if (GIT_SHA_CONSUMING_RE.test(part)) {
|
|
409
|
+
part = part.replace(HEX_TOKEN_RE, sha);
|
|
410
|
+
}
|
|
411
|
+
out.push(part);
|
|
412
|
+
}
|
|
413
|
+
return out.join("");
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
function doWrite(event, target, sourceRoots) {
|
|
418
|
+
const ti = asToolInput(event);
|
|
419
|
+
if (ti?.tool !== "Write") {
|
|
420
|
+
throw new ActionFailure(`doWrite called for non-Write tool '${event.toolName}'`);
|
|
421
|
+
}
|
|
422
|
+
const filePath = ti.file_path;
|
|
423
|
+
const content = ti.content;
|
|
424
|
+
let dst;
|
|
425
|
+
try {
|
|
426
|
+
dst = rewritePath(filePath, sourceRoots, target);
|
|
427
|
+
} catch (e) {
|
|
428
|
+
if (e instanceof OutsideSourceRoot) {
|
|
429
|
+
throw new ActionFailure(`Write outside source root: ${e.message}`);
|
|
430
|
+
}
|
|
431
|
+
throw e;
|
|
432
|
+
}
|
|
433
|
+
fs3.mkdirSync(path4.dirname(dst), { recursive: true });
|
|
434
|
+
fs3.writeFileSync(dst, content);
|
|
435
|
+
return `wrote ${dst} (${Buffer.byteLength(content, "utf8")} bytes)`;
|
|
436
|
+
}
|
|
437
|
+
function doEdit(event, target, sourceRoots) {
|
|
438
|
+
const ti = asToolInput(event);
|
|
439
|
+
if (ti?.tool !== "Edit") {
|
|
440
|
+
throw new ActionFailure(`doEdit called for non-Edit tool '${event.toolName}'`);
|
|
441
|
+
}
|
|
442
|
+
const filePath = ti.file_path;
|
|
443
|
+
if (ti.old_string === "") {
|
|
444
|
+
throw new ActionFailure(
|
|
445
|
+
`Edit has missing/empty old_string (would silently prepend new_string): ${filePath}`
|
|
446
|
+
);
|
|
447
|
+
}
|
|
448
|
+
const oldStr = ti.old_string;
|
|
449
|
+
const newStr = ti.new_string;
|
|
450
|
+
const replaceAll = ti.replace_all;
|
|
451
|
+
let dst;
|
|
452
|
+
try {
|
|
453
|
+
dst = rewritePath(filePath, sourceRoots, target);
|
|
454
|
+
} catch (e) {
|
|
455
|
+
if (e instanceof OutsideSourceRoot) {
|
|
456
|
+
throw new ActionFailure(`Edit outside source root: ${e.message}`);
|
|
457
|
+
}
|
|
458
|
+
throw e;
|
|
459
|
+
}
|
|
460
|
+
if (!fs3.existsSync(dst)) {
|
|
461
|
+
throw new ActionFailure(`Edit target missing: ${dst}`);
|
|
462
|
+
}
|
|
463
|
+
const current = fs3.readFileSync(dst, "utf8");
|
|
464
|
+
if (!current.includes(oldStr)) {
|
|
465
|
+
const preview = JSON.stringify(oldStr.slice(0, 80));
|
|
466
|
+
throw new ActionFailure(`Edit old_string not in ${dst}: ${preview}`);
|
|
467
|
+
}
|
|
468
|
+
let updated;
|
|
469
|
+
if (replaceAll) {
|
|
470
|
+
updated = current.split(oldStr).join(newStr);
|
|
471
|
+
} else {
|
|
472
|
+
const idx = current.indexOf(oldStr);
|
|
473
|
+
updated = current.slice(0, idx) + newStr + current.slice(idx + oldStr.length);
|
|
474
|
+
}
|
|
475
|
+
fs3.writeFileSync(dst, updated);
|
|
476
|
+
return `edited ${dst}`;
|
|
477
|
+
}
|
|
478
|
+
function executeBash(event, target, sourceRoots, commandOverride, emit) {
|
|
479
|
+
const ti = asToolInput(event);
|
|
480
|
+
const originalCmd = ti?.tool === "Bash" ? ti.command : "";
|
|
481
|
+
const cmd = typeof commandOverride === "string" && commandOverride !== "" ? commandOverride : originalCmd;
|
|
482
|
+
let rewritten = rewriteCommand(cmd, sourceRoots, target);
|
|
483
|
+
rewritten = rewriteGitHashes(rewritten, target, emit, { index: event.index });
|
|
484
|
+
const env = { HOME: target };
|
|
485
|
+
for (const k of SAFE_ENV_KEYS) {
|
|
486
|
+
if (process.env[k] !== void 0) env[k] = process.env[k];
|
|
487
|
+
}
|
|
488
|
+
const ts = event.timestamp || "";
|
|
489
|
+
if (ts) {
|
|
490
|
+
const pinned = toLocalGitDate(ts);
|
|
491
|
+
env.GIT_AUTHOR_DATE = pinned;
|
|
492
|
+
env.GIT_COMMITTER_DATE = pinned;
|
|
493
|
+
}
|
|
494
|
+
const identity = readHostGitIdentity();
|
|
495
|
+
if (identity) {
|
|
496
|
+
env.GIT_AUTHOR_NAME = identity.name;
|
|
497
|
+
env.GIT_AUTHOR_EMAIL = identity.email;
|
|
498
|
+
env.GIT_COMMITTER_NAME = identity.name;
|
|
499
|
+
env.GIT_COMMITTER_EMAIL = identity.email;
|
|
500
|
+
}
|
|
501
|
+
const result = spawnSync(rewritten, {
|
|
502
|
+
shell: REPLAY_SHELL,
|
|
503
|
+
cwd: target,
|
|
504
|
+
encoding: "utf8",
|
|
505
|
+
env
|
|
506
|
+
});
|
|
507
|
+
if (result.status !== 0) {
|
|
508
|
+
const tail = (result.stderr || result.stdout || "").slice(-600);
|
|
509
|
+
const cause = result.error ? `spawn error: ${result.error.message}` : result.signal ? `signal ${result.signal}` : `exit ${result.status}`;
|
|
510
|
+
throw new ExecuteFailure(
|
|
511
|
+
`classifier-approved Bash failed (${cause}): ${rewritten}
|
|
512
|
+
${tail}`
|
|
513
|
+
);
|
|
514
|
+
}
|
|
515
|
+
return `exit 0 (${rewritten.slice(0, 80)})`;
|
|
516
|
+
}
|
|
517
|
+
function applyAction(event, target, sourceRoots) {
|
|
518
|
+
const tool = event.toolName;
|
|
519
|
+
if (tool === "Write") return doWrite(event, target, sourceRoots);
|
|
520
|
+
if (tool === "Edit") return doEdit(event, target, sourceRoots);
|
|
521
|
+
throw new ActionFailure(`no action handler for tool '${tool}'`);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// src/checkpoints.ts
|
|
525
|
+
import * as fs4 from "node:fs";
|
|
526
|
+
import * as path5 from "node:path";
|
|
527
|
+
var CheckpointFailure = class extends Error {
|
|
528
|
+
constructor(message) {
|
|
529
|
+
super(message);
|
|
530
|
+
this.name = "CheckpointFailure";
|
|
531
|
+
}
|
|
532
|
+
};
|
|
533
|
+
var CAT_N_LINE = /^\s*\d+\t(.*)$/;
|
|
534
|
+
var TRUNCATION_MARKER = /\n\s*\[Lines \d+-\d+ omitted\]\s*$/;
|
|
535
|
+
var SENTINEL_ENVELOPE = /^<system-reminder>[\s\S]*<\/system-reminder>\s*$/;
|
|
536
|
+
function trimTrailingNewline(s) {
|
|
537
|
+
return s.endsWith("\n") ? s.slice(0, -1) : s;
|
|
538
|
+
}
|
|
539
|
+
function stripCatN(text) {
|
|
540
|
+
const out = [];
|
|
541
|
+
for (const raw of text.split("\n")) {
|
|
542
|
+
const m = CAT_N_LINE.exec(raw);
|
|
543
|
+
if (m) {
|
|
544
|
+
out.push(m[1]);
|
|
545
|
+
} else {
|
|
546
|
+
out.push(raw);
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
return out.join("\n");
|
|
550
|
+
}
|
|
551
|
+
function lineDiff(a, b) {
|
|
552
|
+
const out = [];
|
|
553
|
+
const maxLen = Math.max(a.length, b.length);
|
|
554
|
+
for (let i = 0; i < maxLen; i++) {
|
|
555
|
+
const av = a[i];
|
|
556
|
+
const bv = b[i];
|
|
557
|
+
if (av === bv) continue;
|
|
558
|
+
if (av !== void 0) out.push(`-${av}`);
|
|
559
|
+
if (bv !== void 0) out.push(`+${bv}`);
|
|
560
|
+
}
|
|
561
|
+
return out.join("\n");
|
|
562
|
+
}
|
|
563
|
+
function applyFullRead(dst, expected) {
|
|
564
|
+
fs4.mkdirSync(path5.dirname(dst), { recursive: true });
|
|
565
|
+
fs4.writeFileSync(dst, expected);
|
|
566
|
+
return `APPLIED Read ${dst} (${Buffer.byteLength(expected, "utf8")} bytes)`;
|
|
567
|
+
}
|
|
568
|
+
function snapshotMatchesExpected(body, expected, isChunked, offset, truncated) {
|
|
569
|
+
const text = body.toString("utf8");
|
|
570
|
+
const reencoded = Buffer.from(text, "utf8");
|
|
571
|
+
const isBinary = !reencoded.equals(body);
|
|
572
|
+
if (isBinary) {
|
|
573
|
+
if (isChunked || truncated) return false;
|
|
574
|
+
return Buffer.from(expected, "utf8").equals(body);
|
|
575
|
+
}
|
|
576
|
+
if (isChunked) {
|
|
577
|
+
const textLines = text.split("\n");
|
|
578
|
+
const start = Math.max(0, (offset ?? 1) - 1);
|
|
579
|
+
const expectedLines = expected.split("\n");
|
|
580
|
+
const end = start + expectedLines.length;
|
|
581
|
+
return textLines.slice(start, end).join("\n") === expected;
|
|
582
|
+
}
|
|
583
|
+
if (truncated) {
|
|
584
|
+
const prefixLen = expected.length;
|
|
585
|
+
return text.slice(0, prefixLen) === expected;
|
|
586
|
+
}
|
|
587
|
+
return trimTrailingNewline(text) === trimTrailingNewline(expected);
|
|
588
|
+
}
|
|
589
|
+
function trySnapshot(event, target, sourceRoots, snapshotIndex, expected, isChunked, offset, truncated) {
|
|
590
|
+
const ti = asToolInput(event);
|
|
591
|
+
const filePath = ti?.tool === "Read" ? ti.file_path : "";
|
|
592
|
+
let dst;
|
|
593
|
+
try {
|
|
594
|
+
dst = rewritePath(filePath, sourceRoots, target);
|
|
595
|
+
} catch (e) {
|
|
596
|
+
if (e instanceof OutsideSourceRoot) return null;
|
|
597
|
+
throw e;
|
|
598
|
+
}
|
|
599
|
+
const rel = path5.relative(target, dst);
|
|
600
|
+
if (rel.startsWith("..") || path5.isAbsolute(rel)) return null;
|
|
601
|
+
const relative4 = rel === "" ? "." : rel;
|
|
602
|
+
const body = snapshotIndex.findAtOrBefore(relative4, event.timestamp || "");
|
|
603
|
+
if (body === null) return null;
|
|
604
|
+
if (!snapshotMatchesExpected(body, expected, isChunked, offset, truncated)) {
|
|
605
|
+
return null;
|
|
606
|
+
}
|
|
607
|
+
fs4.mkdirSync(path5.dirname(dst), { recursive: true });
|
|
608
|
+
fs4.writeFileSync(dst, body);
|
|
609
|
+
return `SNAPSHOT Read ${dst} (${body.length} bytes)`;
|
|
610
|
+
}
|
|
611
|
+
function doRead(event, target, sourceRoots, applyReads, snapshotIndex) {
|
|
612
|
+
const ti = asToolInput(event);
|
|
613
|
+
if (ti?.tool !== "Read") {
|
|
614
|
+
throw new CheckpointFailure(`doRead called for non-Read tool '${event.toolName}'`);
|
|
615
|
+
}
|
|
616
|
+
const filePath = ti.file_path;
|
|
617
|
+
let dst;
|
|
618
|
+
try {
|
|
619
|
+
dst = rewritePath(filePath, sourceRoots, target);
|
|
620
|
+
} catch (e) {
|
|
621
|
+
if (e instanceof OutsideSourceRoot) {
|
|
622
|
+
throw new CheckpointFailure(`Read outside source root: ${e.message}`);
|
|
623
|
+
}
|
|
624
|
+
throw e;
|
|
625
|
+
}
|
|
626
|
+
let raw = event.resultText || "";
|
|
627
|
+
if (SENTINEL_ENVELOPE.test(raw.trim())) {
|
|
628
|
+
raw = "";
|
|
629
|
+
}
|
|
630
|
+
const truncated = TRUNCATION_MARKER.test(raw);
|
|
631
|
+
if (truncated) {
|
|
632
|
+
raw = raw.replace(TRUNCATION_MARKER, "");
|
|
633
|
+
}
|
|
634
|
+
const expected = stripCatN(raw);
|
|
635
|
+
const offset = ti.offset;
|
|
636
|
+
const limit = ti.limit;
|
|
637
|
+
const isChunked = offset !== null || limit !== null;
|
|
638
|
+
const tryHealOrHalt = (failureMsg) => {
|
|
639
|
+
if (snapshotIndex !== null) {
|
|
640
|
+
const heal = trySnapshot(
|
|
641
|
+
event,
|
|
642
|
+
target,
|
|
643
|
+
sourceRoots,
|
|
644
|
+
snapshotIndex,
|
|
645
|
+
expected,
|
|
646
|
+
isChunked,
|
|
647
|
+
offset,
|
|
648
|
+
truncated
|
|
649
|
+
);
|
|
650
|
+
if (heal !== null) return heal;
|
|
651
|
+
}
|
|
652
|
+
if (applyReads && !isChunked && !truncated) {
|
|
653
|
+
return applyFullRead(dst, expected);
|
|
654
|
+
}
|
|
655
|
+
throw new CheckpointFailure(failureMsg);
|
|
656
|
+
};
|
|
657
|
+
if (!fs4.existsSync(dst)) {
|
|
658
|
+
return tryHealOrHalt(`Read target missing: ${dst}`);
|
|
659
|
+
}
|
|
660
|
+
const onDisk = fs4.readFileSync(dst, "utf8");
|
|
661
|
+
if (isChunked) {
|
|
662
|
+
const diskLines = onDisk.split("\n");
|
|
663
|
+
const startIdx = Math.max(0, (offset ?? 1) - 1);
|
|
664
|
+
const expectedLines = expected.split("\n");
|
|
665
|
+
const endIdx = startIdx + expectedLines.length;
|
|
666
|
+
const diskSlice = diskLines.slice(startIdx, endIdx).join("\n");
|
|
667
|
+
if (diskSlice !== expected) {
|
|
668
|
+
const diff = lineDiff(expectedLines, diskLines.slice(startIdx, endIdx));
|
|
669
|
+
return tryHealOrHalt(
|
|
670
|
+
`Read chunk mismatch ${dst} [off=${offset} limit=${limit}]:
|
|
671
|
+
${diff}`
|
|
672
|
+
);
|
|
673
|
+
}
|
|
674
|
+
return `OK Read ${dst} chunk`;
|
|
675
|
+
}
|
|
676
|
+
if (truncated) {
|
|
677
|
+
const prefixLen = expected.length;
|
|
678
|
+
if (onDisk.slice(0, prefixLen) !== expected) {
|
|
679
|
+
return tryHealOrHalt(`Read truncated-prefix mismatch ${dst}`);
|
|
680
|
+
}
|
|
681
|
+
return `OK Read ${dst} (truncated prefix)`;
|
|
682
|
+
}
|
|
683
|
+
if (trimTrailingNewline(onDisk) !== trimTrailingNewline(expected)) {
|
|
684
|
+
const diff = lineDiff(expected.split("\n"), onDisk.split("\n"));
|
|
685
|
+
return tryHealOrHalt(`Read full mismatch ${dst}:
|
|
686
|
+
${diff.slice(0, 1e3)}`);
|
|
687
|
+
}
|
|
688
|
+
return `OK Read ${dst}`;
|
|
689
|
+
}
|
|
690
|
+
function runCheckpoint(event, target, sourceRoots, opts = {}) {
|
|
691
|
+
const applyReads = opts.applyReads ?? false;
|
|
692
|
+
const snapshotIndex = opts.snapshotIndex ?? null;
|
|
693
|
+
if (event.toolName === "Read") {
|
|
694
|
+
return doRead(event, target, sourceRoots, applyReads, snapshotIndex);
|
|
695
|
+
}
|
|
696
|
+
throw new CheckpointFailure(
|
|
697
|
+
`no checkpoint handler for tool '${event.toolName}'`
|
|
698
|
+
);
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
// src/llm-classifier/index.ts
|
|
702
|
+
import * as path7 from "node:path";
|
|
703
|
+
|
|
704
|
+
// src/llm-classifier/cache.ts
|
|
705
|
+
import * as crypto from "node:crypto";
|
|
706
|
+
import * as fs5 from "node:fs";
|
|
707
|
+
import * as os from "node:os";
|
|
708
|
+
import * as path6 from "node:path";
|
|
709
|
+
var META_FILENAME = "meta.json";
|
|
710
|
+
var INCLUDED_TOOLS = /* @__PURE__ */ new Set(["Bash", "Write", "Edit", "Read"]);
|
|
711
|
+
function buildPayload(events) {
|
|
712
|
+
const out = [];
|
|
713
|
+
for (const ev of events) {
|
|
714
|
+
if (ev.isError) continue;
|
|
715
|
+
const tool = ev.toolName;
|
|
716
|
+
if (!INCLUDED_TOOLS.has(tool)) continue;
|
|
717
|
+
const ti = asToolInput(ev);
|
|
718
|
+
let inputValue = "";
|
|
719
|
+
if (ti?.tool === "Bash") inputValue = ti.command;
|
|
720
|
+
else if (ti?.tool === "Write" || ti?.tool === "Edit" || ti?.tool === "Read") {
|
|
721
|
+
inputValue = ti.file_path;
|
|
722
|
+
}
|
|
723
|
+
out.push([ev.index, tool, inputValue]);
|
|
724
|
+
}
|
|
725
|
+
return out;
|
|
726
|
+
}
|
|
727
|
+
function payloadIndices(events) {
|
|
728
|
+
const out = /* @__PURE__ */ new Set();
|
|
729
|
+
for (const e of events) {
|
|
730
|
+
if (!e.isError && INCLUDED_TOOLS.has(e.toolName)) out.add(e.index);
|
|
731
|
+
}
|
|
732
|
+
return out;
|
|
733
|
+
}
|
|
734
|
+
function bashIndices(events) {
|
|
735
|
+
const out = /* @__PURE__ */ new Set();
|
|
736
|
+
for (const e of events) {
|
|
737
|
+
if (e.toolName === "Bash" && !e.isError) out.add(e.index);
|
|
738
|
+
}
|
|
739
|
+
return out;
|
|
740
|
+
}
|
|
741
|
+
var DEFAULT_BATCH_THRESHOLD = 50;
|
|
742
|
+
var DEFAULT_BATCH_MAX_SIZE = 100;
|
|
743
|
+
function buildBatches(payload, threshold = DEFAULT_BATCH_THRESHOLD, maxSize = DEFAULT_BATCH_MAX_SIZE) {
|
|
744
|
+
const batches = [];
|
|
745
|
+
let current = [];
|
|
746
|
+
for (const ev of payload) {
|
|
747
|
+
current.push(ev);
|
|
748
|
+
const [, tool, input] = ev;
|
|
749
|
+
const isGitCommit = tool === "Bash" && input.includes("git commit");
|
|
750
|
+
if (current.length >= maxSize) {
|
|
751
|
+
batches.push(current);
|
|
752
|
+
current = [];
|
|
753
|
+
} else if (current.length >= threshold && isGitCommit) {
|
|
754
|
+
batches.push(current);
|
|
755
|
+
current = [];
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
if (current.length > 0) batches.push(current);
|
|
759
|
+
return batches;
|
|
760
|
+
}
|
|
761
|
+
function hashOne(v) {
|
|
762
|
+
const h = crypto.createHash("sha256");
|
|
763
|
+
h.update(JSON.stringify(v), "utf8");
|
|
764
|
+
return "sha256:" + h.digest("hex");
|
|
765
|
+
}
|
|
766
|
+
function cacheKeyComponents(systemPrompt, intents, sourceRoots, initialState, events) {
|
|
767
|
+
const components = {
|
|
768
|
+
systemPrompt: hashOne(systemPrompt),
|
|
769
|
+
intents: hashOne([...intents]),
|
|
770
|
+
sourceRoots: hashOne([...sourceRoots]),
|
|
771
|
+
initialState: hashOne([...initialState]),
|
|
772
|
+
events: hashOne(events)
|
|
773
|
+
};
|
|
774
|
+
const h = crypto.createHash("sha256");
|
|
775
|
+
h.update(
|
|
776
|
+
JSON.stringify([
|
|
777
|
+
components.systemPrompt,
|
|
778
|
+
components.intents,
|
|
779
|
+
components.sourceRoots,
|
|
780
|
+
components.initialState,
|
|
781
|
+
components.events
|
|
782
|
+
]),
|
|
783
|
+
"utf8"
|
|
784
|
+
);
|
|
785
|
+
const key = "sha256:" + h.digest("hex");
|
|
786
|
+
return { key, components };
|
|
787
|
+
}
|
|
788
|
+
function batchFilename(batchNo) {
|
|
789
|
+
return `batch-${batchNo.toString().padStart(4, "0")}.json`;
|
|
790
|
+
}
|
|
791
|
+
function encodeTargetForCacheKey(target) {
|
|
792
|
+
return target.replace(/\//g, "-");
|
|
793
|
+
}
|
|
794
|
+
function cacheDirForTarget(target) {
|
|
795
|
+
const root = process.env.XDG_CACHE_HOME || path6.join(os.homedir(), ".cache");
|
|
796
|
+
return path6.join(root, "claude-code-replay", encodeTargetForCacheKey(target));
|
|
797
|
+
}
|
|
798
|
+
function readMeta(cacheDir) {
|
|
799
|
+
const file = path6.join(cacheDir, META_FILENAME);
|
|
800
|
+
if (!fs5.existsSync(file)) return null;
|
|
801
|
+
try {
|
|
802
|
+
const data = JSON.parse(fs5.readFileSync(file, "utf8"));
|
|
803
|
+
if (typeof data.key !== "string") return null;
|
|
804
|
+
if (typeof data.num_batches !== "number") return null;
|
|
805
|
+
const components = data.components && typeof data.components === "object" ? data.components : void 0;
|
|
806
|
+
const last_event_ts = typeof data.last_event_ts === "string" ? data.last_event_ts : void 0;
|
|
807
|
+
return { key: data.key, num_batches: data.num_batches, components, last_event_ts };
|
|
808
|
+
} catch {
|
|
809
|
+
return null;
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
function readCachedLastEventTs(cacheDir) {
|
|
813
|
+
return readMeta(cacheDir)?.last_event_ts ?? null;
|
|
814
|
+
}
|
|
815
|
+
function readBatchEntry(cacheDir, batchNo) {
|
|
816
|
+
const file = path6.join(cacheDir, batchFilename(batchNo));
|
|
817
|
+
if (!fs5.existsSync(file)) return null;
|
|
818
|
+
try {
|
|
819
|
+
const data = JSON.parse(fs5.readFileSync(file, "utf8"));
|
|
820
|
+
const primary = data.primary;
|
|
821
|
+
if (typeof primary !== "string") return null;
|
|
822
|
+
const retry = typeof data.retry === "string" ? data.retry : null;
|
|
823
|
+
return { primary, retry };
|
|
824
|
+
} catch {
|
|
825
|
+
return null;
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
var ALL_COMPONENTS = [
|
|
829
|
+
"systemPrompt",
|
|
830
|
+
"intents",
|
|
831
|
+
"sourceRoots",
|
|
832
|
+
"initialState",
|
|
833
|
+
"events"
|
|
834
|
+
];
|
|
835
|
+
function loadAllBatches(cacheDir, expectedKey, expectedNumBatches, currentComponents) {
|
|
836
|
+
const meta = readMeta(cacheDir);
|
|
837
|
+
if (meta === null) {
|
|
838
|
+
return { kind: "miss", reason: "no-meta" };
|
|
839
|
+
}
|
|
840
|
+
if (meta.key !== expectedKey) {
|
|
841
|
+
const changed = diffComponents(meta.components, currentComponents);
|
|
842
|
+
wipeCacheDir(cacheDir);
|
|
843
|
+
return { kind: "miss", reason: "key-mismatch", changedComponents: changed };
|
|
844
|
+
}
|
|
845
|
+
if (meta.num_batches !== expectedNumBatches) {
|
|
846
|
+
wipeCacheDir(cacheDir);
|
|
847
|
+
return {
|
|
848
|
+
kind: "miss",
|
|
849
|
+
reason: "count-mismatch",
|
|
850
|
+
stored: meta.num_batches,
|
|
851
|
+
expected: expectedNumBatches
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
const entries = [];
|
|
855
|
+
for (let i = 0; i < expectedNumBatches; i++) {
|
|
856
|
+
const entry = readBatchEntry(cacheDir, i + 1);
|
|
857
|
+
if (entry === null) {
|
|
858
|
+
wipeCacheDir(cacheDir);
|
|
859
|
+
return { kind: "miss", reason: "missing-batch", batchNo: i + 1 };
|
|
860
|
+
}
|
|
861
|
+
entries.push(entry);
|
|
862
|
+
}
|
|
863
|
+
return { kind: "hit", entries };
|
|
864
|
+
}
|
|
865
|
+
function diffComponents(stored, current) {
|
|
866
|
+
if (!stored || !current) return [];
|
|
867
|
+
const changed = [];
|
|
868
|
+
for (const name of ALL_COMPONENTS) {
|
|
869
|
+
if (stored[name] !== current[name]) changed.push(name);
|
|
870
|
+
}
|
|
871
|
+
return changed;
|
|
872
|
+
}
|
|
873
|
+
function wipeCacheDir(cacheDir) {
|
|
874
|
+
if (!fs5.existsSync(cacheDir)) return;
|
|
875
|
+
for (const name of fs5.readdirSync(cacheDir)) {
|
|
876
|
+
if (name === META_FILENAME || /^batch-\d+\.json$/.test(name)) {
|
|
877
|
+
fs5.rmSync(path6.join(cacheDir, name), { force: true });
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
function writeBatchEntry(cacheDir, batchNo, primary, retry = null) {
|
|
882
|
+
fs5.mkdirSync(cacheDir, { recursive: true });
|
|
883
|
+
const data = { primary };
|
|
884
|
+
if (retry !== null) data.retry = retry;
|
|
885
|
+
fs5.writeFileSync(
|
|
886
|
+
path6.join(cacheDir, batchFilename(batchNo)),
|
|
887
|
+
JSON.stringify(data, null, 2)
|
|
888
|
+
);
|
|
889
|
+
}
|
|
890
|
+
function commitMeta(cacheDir, key, numBatches, components, lastEventTs) {
|
|
891
|
+
fs5.mkdirSync(cacheDir, { recursive: true });
|
|
892
|
+
const data = { key, num_batches: numBatches };
|
|
893
|
+
if (components) data.components = components;
|
|
894
|
+
if (lastEventTs) data.last_event_ts = lastEventTs;
|
|
895
|
+
fs5.writeFileSync(
|
|
896
|
+
path6.join(cacheDir, META_FILENAME),
|
|
897
|
+
JSON.stringify(data, null, 2)
|
|
898
|
+
);
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
// src/llm-classifier/prompts.ts
|
|
902
|
+
var CLASSIFY_SYSTEM_PROMPT = `I am reconstructing lost project state from Claude Code logs.
|
|
903
|
+
You help me pick the right commands to restore it. It's critically important that you're very thorough
|
|
904
|
+
and precise in your response.
|
|
905
|
+
|
|
906
|
+
## Input format
|
|
907
|
+
JSON list of events. Each event is a compact 3-tuple: [index, tool, input] where:
|
|
908
|
+
- index: integer event identifier
|
|
909
|
+
- tool: "Bash", "Write", "Edit", or "Read"
|
|
910
|
+
- input: the Bash command (for "Bash") or the file path (for "Write" / "Edit" / "Read").
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
## Decision
|
|
914
|
+
All Write, Edit, Read events are ALWAYS replayed natively \u2014 you must not return entries for them.
|
|
915
|
+
Your only job is to decide, per Bash event, what (if anything) should run.
|
|
916
|
+
|
|
917
|
+
Runtime tolerance: Read events whose file is missing at checkpoint time are
|
|
918
|
+
**silently skipped**, not halted. This means you can confidently cascade-OMIT
|
|
919
|
+
a Bash chain when its sources are missing, without worrying that a downstream
|
|
920
|
+
Read of the would-have-been-produced file will stop the replay. Cascade-omits
|
|
921
|
+
are the correct, safe choice \u2014 the runtime has your back on the downstream
|
|
922
|
+
Read side. (Bash subprocess failures STILL halt the replay, so the rule
|
|
923
|
+
about omitting Bash events that read from missing files is unchanged.)
|
|
924
|
+
|
|
925
|
+
For each Bash event, choose one of:
|
|
926
|
+
(a) Keep verbatim \u2192 emit \`[index, "#exact#"]\`.
|
|
927
|
+
(b) Rewrite \u2192 emit \`[index, "<subset of the chain>"]\` when only part of a chain should run.
|
|
928
|
+
(c) Skip \u2192 OMIT the index from your response entirely.
|
|
929
|
+
|
|
930
|
+
The replay executes the returned \`command\` exactly as given, in event-index order.
|
|
931
|
+
You're free to keep \`&&\`, \`||\`, \`;\`, \`|\`, redirects, heredocs \u2014 whatever is
|
|
932
|
+
needed for the kept portion to be runnable on its own.
|
|
933
|
+
|
|
934
|
+
## CWD Safety
|
|
935
|
+
|
|
936
|
+
Why this matters: the replay runs every Bash event inside a single target
|
|
937
|
+
directory derived from <source-roots>. If an event's command changes the
|
|
938
|
+
working directory to a path NOT listed under <source-roots>, the rest of
|
|
939
|
+
the chain escapes the target directory and operates against the real,
|
|
940
|
+
present-day filesystem \u2014 usually with the wrong content. This is unsafe
|
|
941
|
+
and silently corrupts the replay.
|
|
942
|
+
|
|
943
|
+
THE RULE: For every Bash event you consider keeping, walk through its top-level
|
|
944
|
+
subcommands. If ANY subcommand is \`cd <abs-path>\` or \`pushd <abs-path>\`
|
|
945
|
+
AND <abs-path> is NOT equal to (or a subdirectory of) one of the paths
|
|
946
|
+
listed inside <source-roots>, OMIT the whole event.
|
|
947
|
+
Do NOT strip the \`cd\` and keep the rest of the chain \u2014 the remainder
|
|
948
|
+
was written assuming that working directory; running it in the sandbox
|
|
949
|
+
cwd produces silently wrong results.
|
|
950
|
+
|
|
951
|
+
How to check, step by step:
|
|
952
|
+
1. Split the command at top-level chain operators: \`&&\`, \`||\`, \`;\`, \`|\`, and newlines.
|
|
953
|
+
2. For each subcommand that starts with \`cd \` or \`pushd \`, extract the target path.
|
|
954
|
+
3. If the target is RELATIVE (\`cd src/\`, \`cd ..\`, \`cd .\`) \u2192 fine, the sandbox cwd is preserved.
|
|
955
|
+
4. If the target is ABSOLUTE (starts with \`/\` or \`~/\`):
|
|
956
|
+
- Check if it equals OR is a subdirectory of any entry in
|
|
957
|
+
<source-roots>. If yes \u2192 in-scope, fine.
|
|
958
|
+
- If no \u2192 OMIT THE ENTIRE EVENT. Do not return any entry for it.
|
|
959
|
+
5. Note: even one out-of-scope cd anywhere in the chain disqualifies the whole event.
|
|
960
|
+
|
|
961
|
+
### CWD Safety Examples
|
|
962
|
+
|
|
963
|
+
Source-roots in this example: ["/users/x/proj/orig"]; \`i\` = the event's index
|
|
964
|
+
|
|
965
|
+
original \u2192 emit
|
|
966
|
+
\`cd /users/x/proj/orig && git add foo && git commit -m x\` \u2192 \`[i, "#exact#"]\` (cd target IS a source root)
|
|
967
|
+
\`cd /users/x/proj/orig/internal && go test ./...\` \u2192 \`[i, "#exact#"]\` (subdir of a source root)
|
|
968
|
+
\`cd /users/x/proj/old-name && git commit -m x\` \u2192 OMIT (old-name not in source-roots)
|
|
969
|
+
\`cd /users/x/proj/old-name && git add . && git commit -q -m foo\` \u2192 OMIT (do NOT strip the cd to keep \`git add && git commit\`)
|
|
970
|
+
\`cd /opt/scratch && ./build.sh\` \u2192 OMIT (/opt/scratch out of scope)
|
|
971
|
+
\`git status && cd /tmp/other && rm -rf .\` \u2192 OMIT (out-of-scope cd appears mid-chain \u2014 still OMIT)
|
|
972
|
+
\`pushd /var/log && tail -n 50 foo.log\` \u2192 OMIT (pushd treated like cd)
|
|
973
|
+
\`cd src/ && go build\` \u2192 (cd is relative; cwd-safety doesn't apply \u2014 apply normal REWRITE RULES below)
|
|
974
|
+
\`cd .. && ls\` \u2192 (relative; cwd-safety doesn't apply)
|
|
975
|
+
\`go test ./...\` \u2192 (no cd; cwd-safety doesn't apply)
|
|
976
|
+
|
|
977
|
+
## Rewrite Rules
|
|
978
|
+
- Keep state-mutating subcommands: git history changes, codegen, package installs, heredoc/sed/awk/printf writes, scaffolding mkdirs, dependency syncs.
|
|
979
|
+
- **Environment-setup commands MUST be kept** \u2014 they produce committable artifacts (lock files, version pins, generated sources) that later \`git add\`/\`git commit\` chains depend on. If you skip an environment-setup command, every subsequent commit that references its outputs will fail.
|
|
980
|
+
Common families (NON-exhaustive \u2014 extend the same reasoning to peers):
|
|
981
|
+
- **Python / uv**: \`uv sync\`, \`uv lock\`, \`uv add\`, \`uv remove\`, \`uv python pin\`, \`uv init\` \u2192 produce \`uv.lock\`, \`.python-version\`, populate \`.venv/\`.
|
|
982
|
+
- **Python / pip+venv**: \`python -m venv .venv\`, \`pip install -r requirements.txt\`, \`pip freeze > requirements.txt\` \u2192 produce \`requirements.txt\`, \`.venv/\`.
|
|
983
|
+
- **Python / poetry**: \`poetry install\`, \`poetry lock\`, \`poetry add\` \u2192 produce \`poetry.lock\`.
|
|
984
|
+
- **Node**: \`npm install\`, \`npm ci\`, \`yarn install\`, \`pnpm install\`, \`bun install\` \u2192 produce \`package-lock.json\`, \`yarn.lock\`, \`pnpm-lock.yaml\`, \`bun.lockb\`, \`node_modules/\`.
|
|
985
|
+
- **Go**: \`go mod init\`, \`go mod tidy\`, \`go mod download\`, \`go get \u2026\` \u2192 produce/update \`go.mod\`, \`go.sum\`.
|
|
986
|
+
- **Rust**: \`cargo init\`, \`cargo new\`, \`cargo add\`, \`cargo update\`, \`cargo generate-lockfile\` \u2192 produce \`Cargo.lock\`.
|
|
987
|
+
- **Ruby**: \`bundle install\`, \`bundle update\`, \`bundle add\` \u2192 produce \`Gemfile.lock\`.
|
|
988
|
+
- **Codegen**: \`make proto\`, \`protoc \u2026\`, \`buf generate\`, \`prisma generate\`, \`sqlc generate\`, \`openapi-generator \u2026\`, \`graphql-codegen\` \u2192 produce generated source files (often committed alongside the spec).
|
|
989
|
+
- **Init scripts**: \`scripts/bootstrap.sh\`, \`./setup\`, \`make setup\` \u2014 anything the project relies on to materialise lockfiles/configs before a commit captures them.
|
|
990
|
+
Rule of thumb: if an event creates or modifies a file that a LATER \`git add\` or \`git commit\` references, KEEP that event verbatim (with \`#exact#\`).
|
|
991
|
+
- Drop subcommands that:
|
|
992
|
+
- Compile the project's own binary (\`go build\`, \`cargo build\`, \`tsc\` emit, bundler invocations).
|
|
993
|
+
If the ONLY mutating part of a chain is a build, OMIT the whole chain.
|
|
994
|
+
If other mutating subcommands exist alongside (e.g. \`git commit\`), keep just those and drop the build.
|
|
995
|
+
NOTE: do NOT confuse \`cargo build\` (drop) with \`cargo update\`/\`cargo add\` (KEEP \u2014 env setup, see above).
|
|
996
|
+
Similarly: \`go build\` (drop) vs \`go mod tidy\` (KEEP); \`tsc\` emit (drop) vs \`npm install\` (KEEP).
|
|
997
|
+
- Invoke the project's own built binary (\`./myserver \u2026\`, \`./mybinary \u2026\`, \`bin/cli \u2026\`).
|
|
998
|
+
Builds + runs do not affect git state.
|
|
999
|
+
- Read/inspect external state we cannot reproduce.
|
|
1000
|
+
- Are purely read-only (\`ls\`, \`cat\`, \`find\` without \`-delete\`, \`grep\`, \`git status/log/diff\`, \`which\`, \`*\\\`--version\`).
|
|
1001
|
+
- Never invent commands the original didn't contain. The returned command MUST be an exact substring of the original command.
|
|
1002
|
+
- CWD-safety is a prerequisite \u2014 apply it BEFORE these rules. If cwd-safety says OMIT, the rewrite rules don't even get a vote.
|
|
1003
|
+
|
|
1004
|
+
### Rewrite examples
|
|
1005
|
+
|
|
1006
|
+
assume intent is "preserve git history; skip CLI build/run"; \`i\` = the event's index; cwd-safety already cleared)
|
|
1007
|
+
|
|
1008
|
+
original \u2192 emit
|
|
1009
|
+
\`ls -la\` \u2192 OMIT
|
|
1010
|
+
\`go mod init github.com/u/p\` \u2192 \`[i, "#exact#"]\`
|
|
1011
|
+
\`git init && git add . && git commit -m "init"\` \u2192 \`[i, "#exact#"]\`
|
|
1012
|
+
\`go build -o myserver ./cmd/myserver && ./myserver run\` \u2192 OMIT (both halves excluded)
|
|
1013
|
+
\`make proto && git add gen/ && git commit -m "proto"\` \u2192 \`[i, "#exact#"]\`
|
|
1014
|
+
\`go test ./... && go build -o bin/cli ./cmd/cli\` \u2192 \`[i, "go test ./..."]\`
|
|
1015
|
+
\`go build && go test ./...\` \u2192 \`[i, "go test ./..."]\`
|
|
1016
|
+
\`git status && git commit -am wip\` \u2192 \`[i, "git commit -am wip"]\`
|
|
1017
|
+
\`find /etc -name '*.conf' && echo done\` \u2192 OMIT (pure inspection)
|
|
1018
|
+
|
|
1019
|
+
Environment-setup examples (KEEP these \u2014 they create committable artifacts):
|
|
1020
|
+
|
|
1021
|
+
original \u2192 emit
|
|
1022
|
+
\`uv sync\` \u2192 \`[i, "#exact#"]\` (creates .venv/, may write .python-version)
|
|
1023
|
+
\`uv sync 2>&1 | tail -20\` \u2192 \`[i, "#exact#"]\` (tail is cosmetic; uv sync mutates)
|
|
1024
|
+
\`uv add somepackage>=0.2.20\` \u2192 \`[i, "#exact#"]\` (updates pyproject.toml + uv.lock)
|
|
1025
|
+
\`uv python pin 3.12\` \u2192 \`[i, "#exact#"]\` (writes .python-version)
|
|
1026
|
+
\`rm uv.lock && uv sync && uv run pytest -v 2>&1 | tail -20\` \u2192 \`[i, "rm uv.lock && uv sync"]\` (drop pytest; keep the lock rebuild)
|
|
1027
|
+
\`npm install\` \u2192 \`[i, "#exact#"]\` (writes package-lock.json + node_modules/)
|
|
1028
|
+
\`npm install lodash --save\` \u2192 \`[i, "#exact#"]\` (mutates package.json + lock)
|
|
1029
|
+
\`pnpm install --frozen-lockfile\` \u2192 \`[i, "#exact#"]\`
|
|
1030
|
+
\`go mod tidy\` \u2192 \`[i, "#exact#"]\` (writes go.sum)
|
|
1031
|
+
\`go mod tidy && go test ./...\` \u2192 \`[i, "go mod tidy"]\` (drop tests; tidy mutates)
|
|
1032
|
+
\`go get github.com/foo/bar@latest\` \u2192 \`[i, "#exact#"]\`
|
|
1033
|
+
\`cargo update\` \u2192 \`[i, "#exact#"]\` (rewrites Cargo.lock)
|
|
1034
|
+
\`cargo add serde\` \u2192 \`[i, "#exact#"]\`
|
|
1035
|
+
\`bundle install\` \u2192 \`[i, "#exact#"]\` (writes Gemfile.lock)
|
|
1036
|
+
\`make proto && git add gen/ && git commit -m proto\` \u2192 \`[i, "#exact#"]\` (codegen + commit)
|
|
1037
|
+
\`buf generate && git add gen/\` \u2192 \`[i, "#exact#"]\`
|
|
1038
|
+
\`./scripts/bootstrap.sh\` \u2192 \`[i, "#exact#"]\` (project-defined env setup)
|
|
1039
|
+
|
|
1040
|
+
## File-dependency awareness
|
|
1041
|
+
|
|
1042
|
+
A Bash event that reads from a file path will FAIL during replay if that
|
|
1043
|
+
file isn't on disk. Each user message starts with a
|
|
1044
|
+
<files-present></files-present> block listing every file guaranteed to be
|
|
1045
|
+
on disk at the start of that batch. Files in the block come from:
|
|
1046
|
+
|
|
1047
|
+
(a) the replay target's initial state (user-provided, if any); plus
|
|
1048
|
+
(b) every Write or Edit event in PRIOR batches (Writes/Edits are
|
|
1049
|
+
ALWAYS replayed natively, so their files materialise on disk).
|
|
1050
|
+
|
|
1051
|
+
The block does NOT enumerate Bash outputs. If a Bash event YOU KEPT
|
|
1052
|
+
earlier obviously produces a file (e.g. \`mkdir <path>\`, \`touch <path>\`,
|
|
1053
|
+
\`cp <src> <path>\` from an available source, heredoc/redirect to
|
|
1054
|
+
<path>, codegen whose output lands at <path>), you may reason that the
|
|
1055
|
+
file is also available \u2014 but only when the production is unambiguous.
|
|
1056
|
+
When in doubt, treat the file as absent.
|
|
1057
|
+
|
|
1058
|
+
<files-present> is GUARANTEED \u2014 these files are on disk at the start of this
|
|
1059
|
+
batch. <files-speculated> is CLAIMED \u2014 a prior batch's kept event was
|
|
1060
|
+
expected to produce them, but the replay has not verified the claim. Treat
|
|
1061
|
+
speculated entries as PROBABLY present when the producing chain is
|
|
1062
|
+
unambiguous (mkdir, simple heredoc, cp with both ends accounted for); treat
|
|
1063
|
+
them as UNCERTAIN when the producing event has failure modes the speculation
|
|
1064
|
+
cannot predict (network fetch, archive extraction with unknown contents,
|
|
1065
|
+
codegen with variable outputs). When uncertain \u2192 OMIT the reader.
|
|
1066
|
+
|
|
1067
|
+
CRITICAL CLARIFICATION about source roots: "the path is inside
|
|
1068
|
+
<source-roots>" does NOT imply the file exists. <source-roots> governs
|
|
1069
|
+
CWD safety only; <files-present> is the authoritative list of
|
|
1070
|
+
guaranteed-present files. User-supplied binaries \u2014 \`.apk\`, \`.apkm\`,
|
|
1071
|
+
\`.zip\`, \`.tar.gz\`, downloaded fixtures, screenshots, data dumps \u2014 that
|
|
1072
|
+
the original session reads but no payload Write/Edit/kept-Bash produces
|
|
1073
|
+
are NEVER available in the replay, regardless of their path. Don't be
|
|
1074
|
+
fooled by their location.
|
|
1075
|
+
|
|
1076
|
+
For each Bash event you are considering keeping, examine EVERY source
|
|
1077
|
+
path it reads from. If ANY source path is not in <files-present> AND
|
|
1078
|
+
not obviously produced by an earlier KEPT Bash event, OMIT the event.
|
|
1079
|
+
The replay would halt on it otherwise.
|
|
1080
|
+
|
|
1081
|
+
Cascading rule: if you OMIT an event because its sources are missing,
|
|
1082
|
+
DO NOT count that event's outputs as available for later events. A
|
|
1083
|
+
chain whose first link is OMITted has ALL downstream links OMITted too
|
|
1084
|
+
(until a later event re-introduces the missing files via Write/Edit
|
|
1085
|
+
or a freshly-kept Bash event whose own sources are accounted for).
|
|
1086
|
+
|
|
1087
|
+
This is a HEURISTIC, not a guarantee:
|
|
1088
|
+
- Be conservative on read sources. If you're unsure whether a file
|
|
1089
|
+
exists by this point, OMIT.
|
|
1090
|
+
- Be permissive on write destinations. A Bash event that creates a NEW
|
|
1091
|
+
file (e.g. \`mkdir\`, \`touch\`, \`cp\` whose source is in initial-state)
|
|
1092
|
+
doesn't violate the rule even though the destination didn't exist
|
|
1093
|
+
before \u2014 the event itself produces it.
|
|
1094
|
+
- INTENTS WIN. If the user's intent says to keep a category that this
|
|
1095
|
+
rule would skip, keep it.
|
|
1096
|
+
|
|
1097
|
+
Examples (assume <files-present> is empty unless noted; \`i\` = event index):
|
|
1098
|
+
|
|
1099
|
+
prior events | this event | decision
|
|
1100
|
+
--------------------------------------------------------------------------+-------------------------------------------------------------+---------
|
|
1101
|
+
<files-present> contains \`src/foo.go\` | \`cp /src/foo.go /src/foo.bak\` \u2192 \`[i, "#exact#"]\` (foo.go is listed as present)
|
|
1102
|
+
<files-present> does NOT include \`src/codegen-out/Bar.java\` | \`cp /src/codegen-out/Bar.java /src/Bar.java\` \u2192 OMIT (source not in <files-present> and not produced by a kept Bash)
|
|
1103
|
+
Bash KEPT: \`codegen decompile foo.apk -o codegen-out/\` AND foo.apk in <files-present> | \`cp codegen-out/Bar.java Bar.java\` \u2192 \`[i, "#exact#"]\` (the kept codegen run produces it)
|
|
1104
|
+
Bash that would produce codegen-out/ was OMITted (cascading) | \`cp codegen-out/Bar.java Bar.java\` \u2192 OMIT (cascade \u2014 producer omitted, so output absent)
|
|
1105
|
+
Bash SKIPPED: \`codegen decompile foo.apk -o codegen-out/\` (per rewrite rules) | \`cp codegen-out/Bar.java Bar.java\` \u2192 OMIT (the producer was skipped)
|
|
1106
|
+
<files-present> contains \`src/legacy.py\` | \`sed -i 's/old/new/' src/legacy.py\` \u2192 \`[i, "#exact#"]\` (file is listed)
|
|
1107
|
+
<files-present> does NOT include \`src/lock.json\` | \`cat /src/lock.json\` \u2192 OMIT (pure inspection AND source-missing \u2014 drop either way)
|
|
1108
|
+
<files-present> is EMPTY | \`unzip foo.apkm -d apkm-contents\` \u2192 OMIT (foo.apkm not in <files-present>; do not be misled by the path looking project-local \u2014 empty means empty)
|
|
1109
|
+
|
|
1110
|
+
Speculation entries only come from KEPT events. An omitted event contributes nothing \u2014 its outputs do not appear in <files-speculated> for later batches. Cascading OMITs are the correct behavior.
|
|
1111
|
+
|
|
1112
|
+
### Worked example: user-supplied APK cascade
|
|
1113
|
+
|
|
1114
|
+
The original session manually placed an \`.apkm\` file inside the project
|
|
1115
|
+
dir before running. The replay has NO knowledge of that binary \u2014 it's too
|
|
1116
|
+
large for a Write event and no Bash event in the payload produces it. So
|
|
1117
|
+
<files-present> will NEVER list it. The classifier must cascade omits:
|
|
1118
|
+
|
|
1119
|
+
event | command (abridged) | source reads | in <files-present>? | decision
|
|
1120
|
+
-------+-------------------------------------------------------------------+-------------------------------+---------------------+---------
|
|
1121
|
+
151 | \`brew install codegen apktool\` | (system pkg manager) | n/a | depends on rewrite rules (no source-roots reads)
|
|
1122
|
+
154 | \`mkdir -p apkm-contents && unzip /src/foo.apkm -d apkm-contents\` | \`/src/foo.apkm\` | NO | OMIT
|
|
1123
|
+
156 | \`codegen -d codegen-out apkm-contents/base.apk\` | \`apkm-contents/base.apk\` | NO (154 cascaded) | OMIT
|
|
1124
|
+
160 | \`cp codegen-out/sources/.../BTBle.java .../BTBle.java && cp ...\` | \`codegen-out/sources/...\` | NO (156 cascaded) | OMIT
|
|
1125
|
+
|
|
1126
|
+
Even though the .apkm path is inside <source-roots>, it isn't in
|
|
1127
|
+
<files-present>. That alone is enough to OMIT event 154, and the cascade
|
|
1128
|
+
omits 156 and 160. Without this cascade, the replay halts at 160 on a
|
|
1129
|
+
missing source file.`;
|
|
1130
|
+
var OUTPUT_FORMAT = `## Output Format
|
|
1131
|
+
|
|
1132
|
+
**CRITICAL**: Your response is a JSON 2-array \`[ [decisions...], [speculations...] ]\` \u2014 NOT a bare list of decisions. A response like \`[[10, "#exact#"]]\` is WRONG even when there are no speculations to declare; the correct form with no speculations is \`[[[10, "#exact#"]], []]\` (outer 2-array, decisions inside, empty speculations array).
|
|
1133
|
+
|
|
1134
|
+
Your entire response is a JSON 2-array:
|
|
1135
|
+
|
|
1136
|
+
\`\`\`
|
|
1137
|
+
[
|
|
1138
|
+
[ ...decisions... ],
|
|
1139
|
+
[ ...speculations... ]
|
|
1140
|
+
]
|
|
1141
|
+
\`\`\`
|
|
1142
|
+
|
|
1143
|
+
Each decision is a 2-array \`[index, command]\` where \`command\` is EITHER the literal sentinel string \`"#exact#"\` OR a modified shell command string (substring of the original).
|
|
1144
|
+
|
|
1145
|
+
- \`[index, "#exact#"]\` \u2014 execute the event's ORIGINAL Bash command verbatim.
|
|
1146
|
+
- \`[index, "<command>"]\` \u2014 execute the rewritten command (a substring of the original chain). Use this ONLY when part of the chain must be dropped.
|
|
1147
|
+
- omit the index entirely \u2192 skip.
|
|
1148
|
+
|
|
1149
|
+
You must consciously CHOOSE one of these for every event you keep \u2014 \`"#exact#"\` is the verbatim-keep signal; everything else is a rewrite that runs literally. Every kept decision entry MUST have exactly 2 elements (\`[index, command]\`); 1-element arrays like \`[42]\` are INVALID. Decide first, then write \u2014 do not emit a draft and restart.
|
|
1150
|
+
|
|
1151
|
+
Each speculation is a 2-array \`[path, producing_idx]\` where:
|
|
1152
|
+
- \`path\`: a project-relative file path (no leading "/", no leading "./", no ".." segments)
|
|
1153
|
+
- \`producing_idx\`: the index of an event in THIS BATCH'S payload that produces \`path\` on disk (Bash, Write, or Edit).
|
|
1154
|
+
|
|
1155
|
+
Speculation rules:
|
|
1156
|
+
- Speculate only files that one of YOUR KEPT events in this batch will produce on disk.
|
|
1157
|
+
Skipped events contribute nothing. (Write/Edit are always replayed natively, so
|
|
1158
|
+
they always count as "kept" for speculation purposes.)
|
|
1159
|
+
- Prefer not to repeat paths already in <files-present> or <files-speculated> \u2014 they
|
|
1160
|
+
are tracked already and will be normalized away. Repeats are not an error.
|
|
1161
|
+
- A single producing event can claim multiple paths \u2014 emit one [path, idx] entry per claim.
|
|
1162
|
+
- Do NOT speculate deletions. Speculation is additive only.
|
|
1163
|
+
- When production is uncertain (archive extraction with unknown contents, network fetch,
|
|
1164
|
+
codegen with variable outputs), be conservative \u2014 claim only what is unambiguously created.
|
|
1165
|
+
|
|
1166
|
+
Empty cases:
|
|
1167
|
+
- Nothing kept, nothing speculated: \`[[], []]\`
|
|
1168
|
+
- Decisions but no speculations: \`[[[42, "#exact#"]], []]\`
|
|
1169
|
+
|
|
1170
|
+
WRONG (bare list \u2014 old format, do not emit):
|
|
1171
|
+
[[10, "#exact#"], [11, "git commit"]]
|
|
1172
|
+
[]
|
|
1173
|
+
|
|
1174
|
+
RIGHT (always 2-array, even when speculations are empty):
|
|
1175
|
+
[[[10, "#exact#"], [11, "git commit"]], []]
|
|
1176
|
+
[[], []]
|
|
1177
|
+
|
|
1178
|
+
The opening of your response is literally the characters \`[[\` (two left brackets \u2014 the outer 2-array and the start of the decisions list). The closing is \`]]\`.
|
|
1179
|
+
|
|
1180
|
+
No preamble. No commentary. No markdown fences (no \`\`\`json). Compact JSON.
|
|
1181
|
+
`;
|
|
1182
|
+
var CLASSIFY_TAIL = `
|
|
1183
|
+
|
|
1184
|
+
## Source roots
|
|
1185
|
+
Apply the CWD SAFETY rule from earlier in this prompt with
|
|
1186
|
+
these paths. The paths inside <source-roots></source-roots> are the
|
|
1187
|
+
ONLY absolute paths the replay treats as project-local. Every \`cd <abs>\`
|
|
1188
|
+
or \`pushd <abs>\` you keep MUST target one of these paths (or a subdir of
|
|
1189
|
+
one). Anything else \u2192 OMIT the event in full.
|
|
1190
|
+
|
|
1191
|
+
<source-roots>
|
|
1192
|
+
</source-roots>
|
|
1193
|
+
|
|
1194
|
+
# User intent
|
|
1195
|
+
Read carefully \u2014 the user's specific intents for THIS replay are inside <intents></intents>.
|
|
1196
|
+
Respect these rules even when the rules or supplements would otherwise suggest including X.
|
|
1197
|
+
Intent-stated exclusions are a HARD constraint, not a hint.
|
|
1198
|
+
|
|
1199
|
+
|
|
1200
|
+
<intents>
|
|
1201
|
+
</intents>
|
|
1202
|
+
|
|
1203
|
+
## Multi-turn protocol
|
|
1204
|
+
|
|
1205
|
+
Commands arrive one BATCH per user turn. Each user message contains:
|
|
1206
|
+
- a \`Batch N/M\` header,
|
|
1207
|
+
- a \`<files-present>...</files-present>\` block listing files that are
|
|
1208
|
+
guaranteed to be on disk in the replay target at the start of this
|
|
1209
|
+
batch (see "File-dependency awareness" above),
|
|
1210
|
+
- a \`<files-speculated>...</files-speculated>\` block listing files
|
|
1211
|
+
that a PRIOR batch's kept event claimed to produce, with provenance
|
|
1212
|
+
\`(from idx N)\`. Apply the asymmetric-trust rule from the
|
|
1213
|
+
"File-dependency awareness" section above when reading from these.
|
|
1214
|
+
- a \`<commands>...</commands>\` block: a JSON list of [index, tool,
|
|
1215
|
+
input] tuples for THIS batch.
|
|
1216
|
+
|
|
1217
|
+
Respond with a JSON list per the output format above \u2014 decisions for THAT
|
|
1218
|
+
BATCH ONLY. Do not reference indices from prior or future batches.
|
|
1219
|
+
|
|
1220
|
+
Some user messages will be retry follow-ups: they contain a
|
|
1221
|
+
\`<failures>...</failures>\` block listing entries from your previous response
|
|
1222
|
+
that failed substring validation. For retry turns, return a JSON list of
|
|
1223
|
+
corrected entries (same output format).
|
|
1224
|
+
`;
|
|
1225
|
+
var RESTORE_GIT_COMMITS_SUPPLEMENT = `## Additional Focus: Restore Git Commit History
|
|
1226
|
+
|
|
1227
|
+
Beyond the general rules above, this run cares specifically about
|
|
1228
|
+
reconstructing the original session's git history on the replay target.
|
|
1229
|
+
Every commit, branch, tag, ref move, merge/rebase/revert result, and
|
|
1230
|
+
history rewrite must be reconstructible from the executed Bash events.
|
|
1231
|
+
|
|
1232
|
+
Pay SPECIAL attention to these commands when they appear (non-exhaustive,
|
|
1233
|
+
extend the same reasoning to any equivalent command not listed below):
|
|
1234
|
+
- \`git init\` \u2014 repo creation
|
|
1235
|
+
- \`git add\` (any form: file list, \`-A\`, \`.\`, glob)
|
|
1236
|
+
- \`git commit\` (including \`--amend\`, \`-m\`, heredoc messages, \`--no-verify\`)
|
|
1237
|
+
- \`git branch\`, \`git checkout\`, \`git switch\` when they create or move a ref
|
|
1238
|
+
- \`git merge\`, \`git rebase\`, \`git cherry-pick\`, \`git revert\`
|
|
1239
|
+
- \`git reset\` when it moves HEAD or modifies the index
|
|
1240
|
+
- \`git tag\` (annotated or lightweight)
|
|
1241
|
+
- \`git stash push\` / \`pop\` / \`apply\` when working tree state matters
|
|
1242
|
+
- \`git filter-repo\`, \`git filter-branch\` \u2014 history rewrites
|
|
1243
|
+
- \`git apply\`, \`git am\`, \`patch\` when applying a stored diff
|
|
1244
|
+
- \`git remote add\`, \`git fetch\`, \`git pull\` when needed to make a ref local
|
|
1245
|
+
- \`git update-ref\`, \`git symbolic-ref\` \u2014 direct ref manipulation
|
|
1246
|
+
- \`git config\` writes used by subsequent commits (e.g. user.email)
|
|
1247
|
+
- heredoc / \`cat <<EOF\`, \`sed -i\`, \`awk\` redirects, \`printf > file\` that
|
|
1248
|
+
produce content a later \`git commit\` captures
|
|
1249
|
+
|
|
1250
|
+
When in doubt about whether a subcommand contributes to git state \u2014 KEEP IT.
|
|
1251
|
+
A spurious execute of git-adjacent work is recoverable; a missed mutation
|
|
1252
|
+
breaks the chain. Still drop CLI-build / CLI-run subcommands per the rewrite
|
|
1253
|
+
rules above.
|
|
1254
|
+
|
|
1255
|
+
CRUCIAL EXCEPTION \u2014 git commands in OTHER directories must be IGNORED:
|
|
1256
|
+
The "preserve all git history" goal above applies ONLY to commands that
|
|
1257
|
+
operate on a repo INSIDE <source-roots>. A git command that runs in a
|
|
1258
|
+
different directory is editing a DIFFERENT REPO \u2014 replaying it against the
|
|
1259
|
+
replay target either silently corrupts our target (if the cd is stripped
|
|
1260
|
+
or rewritten) or executes against a real-world directory on disk
|
|
1261
|
+
(sandbox escape).
|
|
1262
|
+
|
|
1263
|
+
Apply this AS PART of cwd-safety, not in spite of it:
|
|
1264
|
+
- If a Bash event begins with \`cd <path>\` (or contains \`&& cd <path>\`
|
|
1265
|
+
mid-chain) AND <path> is NOT listed in <source-roots>, OMIT the event
|
|
1266
|
+
in full \u2014 including any \`git init\`, \`git add\`, \`git commit\`,
|
|
1267
|
+
\`git checkout\`, \`git branch\`, \`git merge\`, \`git rebase\`, \`git tag\`,
|
|
1268
|
+
\`git filter-repo\` that follows on that chain. The "keep all git
|
|
1269
|
+
mutations" rule does NOT override cwd-safety.
|
|
1270
|
+
- Do NOT strip the \`cd\` to recover the trailing git command. The
|
|
1271
|
+
trailing git command was operating in the OTHER repo and would
|
|
1272
|
+
fabricate a commit in our repo (wrong tree, wrong author state).
|
|
1273
|
+
|
|
1274
|
+
Concrete examples (assume <source-roots> = [\`/u/x/proj/orig\`]):
|
|
1275
|
+
\`cd /u/x/proj/orig && git init && git add . && git commit -m init\` \u2192 \`[i, "#exact#"]\` (cd is in-scope)
|
|
1276
|
+
\`cd /u/x/proj/old-name && git init && git add . && git commit -m init\` \u2192 OMIT (different repo)
|
|
1277
|
+
\`cd /u/x/proj/old-name && git commit -q -m "Initial old-name\u2026"\` \u2192 OMIT (different repo's commit chain)
|
|
1278
|
+
\`cd /u/x/proj/old-name && git filter-repo --invert-paths --path .env\` \u2192 OMIT (history rewrite, but in a different repo)
|
|
1279
|
+
\`git -C /u/x/proj/old-name commit -m foo\` \u2192 OMIT (\`-C <out-of-scope>\` is equivalent to cd-out-of-scope)
|
|
1280
|
+
\`git add foo && git commit -m bar\` \u2192 \`[i, "#exact#"]\` (no cd; runs in sandbox cwd \u2014 fine)
|
|
1281
|
+
|
|
1282
|
+
For "git -C <dir>" usage, apply the same rule as cd: the <dir> argument
|
|
1283
|
+
to \`-C\` must be inside <source-roots> or the event is OMITTED.`;
|
|
1284
|
+
var EXACT_SENTINEL = "#exact#";
|
|
1285
|
+
function buildBatchUserMessage(batchNo, totalBatches, filesPresent, filesSpeculated, commandsJson) {
|
|
1286
|
+
const presentBody = filesPresent.length === 0 ? "(empty \u2014 target has no guaranteed files at the start of this batch)" : [...filesPresent].sort().map((p) => `- ${p}`).join("\n");
|
|
1287
|
+
const specSorted = [...filesSpeculated].sort(
|
|
1288
|
+
(a, b) => a.path.localeCompare(b.path)
|
|
1289
|
+
);
|
|
1290
|
+
const speculatedBody = specSorted.length === 0 ? "(empty \u2014 no prior-batch event has claimed file outputs)" : specSorted.map((s) => `- ${s.path} (from idx ${s.producingIdx})`).join("\n");
|
|
1291
|
+
return `Batch ${batchNo}/${totalBatches}. Classify the commands below per the rules in the system prompt. Return the JSON response only \u2014 opening characters MUST be \`[[\` (the outer 2-array start), closing MUST be \`]]\`.
|
|
1292
|
+
|
|
1293
|
+
<files-present>
|
|
1294
|
+
${presentBody}
|
|
1295
|
+
</files-present>
|
|
1296
|
+
|
|
1297
|
+
<files-speculated>
|
|
1298
|
+
${speculatedBody}
|
|
1299
|
+
</files-speculated>
|
|
1300
|
+
|
|
1301
|
+
<commands>
|
|
1302
|
+
${commandsJson}
|
|
1303
|
+
</commands>`;
|
|
1304
|
+
}
|
|
1305
|
+
function buildRetryUserMessage(failures) {
|
|
1306
|
+
const failuresJson = JSON.stringify(
|
|
1307
|
+
failures.map((f) => [f.idx, f.original, f.returned])
|
|
1308
|
+
);
|
|
1309
|
+
return `Retry. The entries below from your previous response failed validation: the \`command\` was NOT derivable from the original event's command.
|
|
1310
|
+
|
|
1311
|
+
A valid rewrite is EITHER the literal sentinel \`"#exact#"\` OR an exact substring of the original's subcommands (joined with the same operators as the original \u2014 \`&&\`, \`||\`, \`;\`, \`|\`). You CANNOT introduce new tokens, new flags, new arguments, or new commands.
|
|
1312
|
+
|
|
1313
|
+
If you cannot produce a valid rewrite for an entry, OMIT that index entirely.
|
|
1314
|
+
|
|
1315
|
+
Each failure is a 3-tuple \`[index, original, your-prior-rewrite]\`:
|
|
1316
|
+
|
|
1317
|
+
<failures>
|
|
1318
|
+
${failuresJson}
|
|
1319
|
+
</failures>
|
|
1320
|
+
|
|
1321
|
+
Return the corrected response in the 2-array shape from the system prompt, with the speculations array empty: \`[[corrected_decisions...], []]\`. An empty corrections array \`[[], []]\` is valid if every failure should be omitted.`;
|
|
1322
|
+
}
|
|
1323
|
+
function composeMode(opts = {}) {
|
|
1324
|
+
const labels = ["base"];
|
|
1325
|
+
const systemPrompt = `${OUTPUT_FORMAT}
|
|
1326
|
+
${CLASSIFY_SYSTEM_PROMPT}
|
|
1327
|
+
|
|
1328
|
+
---
|
|
1329
|
+
|
|
1330
|
+
${RESTORE_GIT_COMMITS_SUPPLEMENT}${CLASSIFY_TAIL}`;
|
|
1331
|
+
const intents = (opts.customIntents ?? []).map((s) => s.trim()).filter((s) => s.length > 0);
|
|
1332
|
+
if (intents.length > 0) labels.push("custom-intent");
|
|
1333
|
+
return { label: labels.join("+"), intents, systemPrompt };
|
|
1334
|
+
}
|
|
1335
|
+
function composeSystemPrompt(template, intents, sourceRoots) {
|
|
1336
|
+
const intentsBody = intents.length === 0 ? "(none)" : intents.map((s) => `- ${s}`).join("\n");
|
|
1337
|
+
const sourceRootsBody = sourceRoots.length === 0 ? "(none \u2014 treat every absolute path with skepticism)" : sourceRoots.map((s) => `- ${s}`).join("\n");
|
|
1338
|
+
return template.replace(
|
|
1339
|
+
/<source-roots>[\s\S]*?<\/source-roots>/,
|
|
1340
|
+
`<source-roots>
|
|
1341
|
+
${sourceRootsBody}
|
|
1342
|
+
</source-roots>`
|
|
1343
|
+
).replace(/<intents>[\s\S]*?<\/intents>/, `<intents>
|
|
1344
|
+
${intentsBody}
|
|
1345
|
+
</intents>`);
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
// src/llm-classifier/parse.ts
|
|
1349
|
+
var ClassifierError = class extends Error {
|
|
1350
|
+
constructor(message) {
|
|
1351
|
+
super(message);
|
|
1352
|
+
this.name = "ClassifierError";
|
|
1353
|
+
}
|
|
1354
|
+
};
|
|
1355
|
+
function validateSpeculationEntry(entry, payloadIdxs, bashKeptIdxs, nonBashPayloadIdxs) {
|
|
1356
|
+
if (typeof entry.path !== "string" || entry.path.length === 0) {
|
|
1357
|
+
return { ok: false, error: "path is empty" };
|
|
1358
|
+
}
|
|
1359
|
+
if (entry.path.startsWith("/")) {
|
|
1360
|
+
return { ok: false, error: "path has leading '/' (must be project-relative)" };
|
|
1361
|
+
}
|
|
1362
|
+
if (entry.path.split("/").some((seg) => seg === "..")) {
|
|
1363
|
+
return { ok: false, error: "path contains '..' segment" };
|
|
1364
|
+
}
|
|
1365
|
+
if (!payloadIdxs.has(entry.producingIdx)) {
|
|
1366
|
+
return {
|
|
1367
|
+
ok: false,
|
|
1368
|
+
error: `producing_idx ${entry.producingIdx} is not in this batch's payload`
|
|
1369
|
+
};
|
|
1370
|
+
}
|
|
1371
|
+
const isNonBash = nonBashPayloadIdxs?.has(entry.producingIdx) ?? false;
|
|
1372
|
+
if (!isNonBash && !bashKeptIdxs.has(entry.producingIdx)) {
|
|
1373
|
+
return {
|
|
1374
|
+
ok: false,
|
|
1375
|
+
error: `producing_idx ${entry.producingIdx} references a Bash event the classifier omitted (kept events only)`
|
|
1376
|
+
};
|
|
1377
|
+
}
|
|
1378
|
+
return { ok: true };
|
|
1379
|
+
}
|
|
1380
|
+
var NON_VERB_CHARS_RE = /^[\s&|;]*$/;
|
|
1381
|
+
function isValidRewrite(rewritten, original) {
|
|
1382
|
+
if (rewritten.length === 0) return false;
|
|
1383
|
+
if (NON_VERB_CHARS_RE.test(rewritten)) return false;
|
|
1384
|
+
return original.includes(rewritten);
|
|
1385
|
+
}
|
|
1386
|
+
function extractJson(text) {
|
|
1387
|
+
const trimmed = text.trim();
|
|
1388
|
+
const fence = /^```(?:json)?\s*([\s\S]*?)\s*```$/.exec(trimmed);
|
|
1389
|
+
return fence ? fence[1].trim() : trimmed;
|
|
1390
|
+
}
|
|
1391
|
+
function parseEntry(entry, bashIdxs, originalByIdx) {
|
|
1392
|
+
if (!Array.isArray(entry) || entry.length !== 2) {
|
|
1393
|
+
return {
|
|
1394
|
+
ok: false,
|
|
1395
|
+
idx: null,
|
|
1396
|
+
bad: {
|
|
1397
|
+
raw: entry,
|
|
1398
|
+
error: 'not a 2-element array [index, command] (use "#exact#" for verbatim)'
|
|
1399
|
+
}
|
|
1400
|
+
};
|
|
1401
|
+
}
|
|
1402
|
+
const idx = entry[0];
|
|
1403
|
+
if (typeof idx !== "number" || !Number.isInteger(idx)) {
|
|
1404
|
+
return {
|
|
1405
|
+
ok: false,
|
|
1406
|
+
idx: null,
|
|
1407
|
+
bad: { raw: entry, error: "index is not int" }
|
|
1408
|
+
};
|
|
1409
|
+
}
|
|
1410
|
+
if (!bashIdxs.has(idx)) {
|
|
1411
|
+
return {
|
|
1412
|
+
ok: false,
|
|
1413
|
+
idx,
|
|
1414
|
+
bad: {
|
|
1415
|
+
raw: entry,
|
|
1416
|
+
error: `idx ${idx} is not a Bash event in the input (fabricated)`
|
|
1417
|
+
}
|
|
1418
|
+
};
|
|
1419
|
+
}
|
|
1420
|
+
const command = entry[1];
|
|
1421
|
+
if (typeof command !== "string" || command.trim() === "") {
|
|
1422
|
+
return {
|
|
1423
|
+
ok: false,
|
|
1424
|
+
idx,
|
|
1425
|
+
bad: { raw: entry, error: "command is missing or empty" }
|
|
1426
|
+
};
|
|
1427
|
+
}
|
|
1428
|
+
const original = originalByIdx.get(idx) ?? "";
|
|
1429
|
+
if (command === EXACT_SENTINEL) {
|
|
1430
|
+
return {
|
|
1431
|
+
ok: true,
|
|
1432
|
+
idx,
|
|
1433
|
+
decision: {
|
|
1434
|
+
intent: "execute",
|
|
1435
|
+
command: original,
|
|
1436
|
+
reason: "execute (verbatim)"
|
|
1437
|
+
}
|
|
1438
|
+
};
|
|
1439
|
+
}
|
|
1440
|
+
if (!isValidRewrite(command, original)) {
|
|
1441
|
+
return {
|
|
1442
|
+
ok: false,
|
|
1443
|
+
idx,
|
|
1444
|
+
bad: {
|
|
1445
|
+
raw: entry,
|
|
1446
|
+
error: `rewrite at idx ${idx} is not a subset of the original command (fabricated)`,
|
|
1447
|
+
retry: { idx, original, returned: command }
|
|
1448
|
+
}
|
|
1449
|
+
};
|
|
1450
|
+
}
|
|
1451
|
+
const rewritten = command !== original;
|
|
1452
|
+
const reason = rewritten ? "execute (rewritten)" : "execute (verbatim)";
|
|
1453
|
+
return {
|
|
1454
|
+
ok: true,
|
|
1455
|
+
idx,
|
|
1456
|
+
decision: { intent: "execute", command, reason }
|
|
1457
|
+
};
|
|
1458
|
+
}
|
|
1459
|
+
function detectResponseShape(top) {
|
|
1460
|
+
if (!Array.isArray(top)) return "unknown";
|
|
1461
|
+
if (top.length === 1 && Array.isArray(top[0]) && top[0].length === 2 && Array.isArray(top[0][0]) && Array.isArray(top[0][1])) {
|
|
1462
|
+
return "wrapped";
|
|
1463
|
+
}
|
|
1464
|
+
if (top.length === 0) return "bare-list";
|
|
1465
|
+
if (top.length === 2 && Array.isArray(top[0]) && Array.isArray(top[1])) {
|
|
1466
|
+
const spec = top[1];
|
|
1467
|
+
if (spec.length === 0) return "two-array";
|
|
1468
|
+
const specHead = spec[0];
|
|
1469
|
+
if (Array.isArray(specHead) || specHead !== null && typeof specHead === "object") {
|
|
1470
|
+
return "two-array";
|
|
1471
|
+
}
|
|
1472
|
+
}
|
|
1473
|
+
return "bare-list";
|
|
1474
|
+
}
|
|
1475
|
+
function parseBatchResponse(raw, bashPayloadIdxs, originalByIdx, payloadIdxs, nonBashPayloadIdxs, emit) {
|
|
1476
|
+
const cleaned = extractJson(raw);
|
|
1477
|
+
let top;
|
|
1478
|
+
try {
|
|
1479
|
+
top = JSON.parse(cleaned);
|
|
1480
|
+
} catch (e) {
|
|
1481
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1482
|
+
throw new ClassifierError(
|
|
1483
|
+
`classifier returned non-JSON response: ${msg}
|
|
1484
|
+
raw response (first 500 chars):
|
|
1485
|
+
${raw.slice(0, 500)}`
|
|
1486
|
+
);
|
|
1487
|
+
}
|
|
1488
|
+
const shape = detectResponseShape(top);
|
|
1489
|
+
let decRaw;
|
|
1490
|
+
let specRaw;
|
|
1491
|
+
if (shape === "two-array") {
|
|
1492
|
+
[decRaw, specRaw] = top;
|
|
1493
|
+
} else if (shape === "wrapped") {
|
|
1494
|
+
[decRaw, specRaw] = top[0];
|
|
1495
|
+
} else if (shape === "bare-list") {
|
|
1496
|
+
if (emit) {
|
|
1497
|
+
emit(
|
|
1498
|
+
"INFO classifier lenient parse: model returned bare decisions list (no speculations arm); treating as decisions-only"
|
|
1499
|
+
);
|
|
1500
|
+
}
|
|
1501
|
+
decRaw = top;
|
|
1502
|
+
specRaw = [];
|
|
1503
|
+
} else {
|
|
1504
|
+
throw new ClassifierError(
|
|
1505
|
+
`classifier response is not a recognized shape (expected 2-array [decisions, speculations] or bare-list of decisions): ` + JSON.stringify(top).slice(0, 200)
|
|
1506
|
+
);
|
|
1507
|
+
}
|
|
1508
|
+
if (!Array.isArray(decRaw)) {
|
|
1509
|
+
throw new ClassifierError(
|
|
1510
|
+
`decisions arm is not a list: ${JSON.stringify(decRaw).slice(0, 200)}`
|
|
1511
|
+
);
|
|
1512
|
+
}
|
|
1513
|
+
if (!Array.isArray(specRaw)) {
|
|
1514
|
+
throw new ClassifierError(
|
|
1515
|
+
`speculations arm is not a list: ${JSON.stringify(specRaw).slice(0, 200)}`
|
|
1516
|
+
);
|
|
1517
|
+
}
|
|
1518
|
+
const decisions = /* @__PURE__ */ new Map();
|
|
1519
|
+
const bad = [];
|
|
1520
|
+
const seenIdx = /* @__PURE__ */ new Set();
|
|
1521
|
+
for (const entry of decRaw) {
|
|
1522
|
+
const res = parseEntry(entry, bashPayloadIdxs, originalByIdx);
|
|
1523
|
+
if (!res.ok) {
|
|
1524
|
+
bad.push(res.bad);
|
|
1525
|
+
continue;
|
|
1526
|
+
}
|
|
1527
|
+
if (seenIdx.has(res.idx)) {
|
|
1528
|
+
bad.push({ raw: entry, error: `duplicate entry for idx ${res.idx}` });
|
|
1529
|
+
continue;
|
|
1530
|
+
}
|
|
1531
|
+
seenIdx.add(res.idx);
|
|
1532
|
+
decisions.set(res.idx, res.decision);
|
|
1533
|
+
}
|
|
1534
|
+
const bashKeptIdxs = /* @__PURE__ */ new Set();
|
|
1535
|
+
for (const [idx, d] of decisions.entries()) {
|
|
1536
|
+
if (d.intent === "execute") bashKeptIdxs.add(idx);
|
|
1537
|
+
}
|
|
1538
|
+
const speculations = [];
|
|
1539
|
+
const seenPaths = /* @__PURE__ */ new Set();
|
|
1540
|
+
for (const entry of specRaw) {
|
|
1541
|
+
const parsed = parseSpeculationShape(entry);
|
|
1542
|
+
if (parsed === null) {
|
|
1543
|
+
bad.push({ raw: entry, error: "speculation entry has unexpected shape" });
|
|
1544
|
+
continue;
|
|
1545
|
+
}
|
|
1546
|
+
const v = validateSpeculationEntry(
|
|
1547
|
+
parsed,
|
|
1548
|
+
payloadIdxs,
|
|
1549
|
+
bashKeptIdxs,
|
|
1550
|
+
nonBashPayloadIdxs
|
|
1551
|
+
);
|
|
1552
|
+
if (!v.ok) {
|
|
1553
|
+
bad.push({ raw: entry, error: `speculation rejected: ${v.error}` });
|
|
1554
|
+
continue;
|
|
1555
|
+
}
|
|
1556
|
+
if (seenPaths.has(parsed.path)) continue;
|
|
1557
|
+
seenPaths.add(parsed.path);
|
|
1558
|
+
speculations.push(parsed);
|
|
1559
|
+
}
|
|
1560
|
+
return { decisions, speculations, bad };
|
|
1561
|
+
}
|
|
1562
|
+
function parseSpeculationShape(entry) {
|
|
1563
|
+
if (Array.isArray(entry) && entry.length === 2) {
|
|
1564
|
+
const [p, i] = entry;
|
|
1565
|
+
if (typeof p === "string" && typeof i === "number" && Number.isInteger(i)) {
|
|
1566
|
+
return { path: p, producingIdx: i };
|
|
1567
|
+
}
|
|
1568
|
+
return null;
|
|
1569
|
+
}
|
|
1570
|
+
if (entry && typeof entry === "object") {
|
|
1571
|
+
const o = entry;
|
|
1572
|
+
if (typeof o.path === "string" && typeof o.producingIdx === "number" && Number.isInteger(o.producingIdx)) {
|
|
1573
|
+
return { path: o.path, producingIdx: o.producingIdx };
|
|
1574
|
+
}
|
|
1575
|
+
}
|
|
1576
|
+
return null;
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
// src/llm-classifier/sdk.ts
|
|
1580
|
+
var CLASSIFIER_MODEL = "claude-sonnet-4-6";
|
|
1581
|
+
var CLASSIFIER_RESPONSE_SCHEMA = {
|
|
1582
|
+
type: "array",
|
|
1583
|
+
minItems: 2,
|
|
1584
|
+
maxItems: 2,
|
|
1585
|
+
prefixItems: [
|
|
1586
|
+
{
|
|
1587
|
+
// Decisions arm: list of [idx, command] tuples.
|
|
1588
|
+
type: "array",
|
|
1589
|
+
items: {
|
|
1590
|
+
type: "array",
|
|
1591
|
+
prefixItems: [{ type: "integer" }, { type: "string", minLength: 1 }],
|
|
1592
|
+
minItems: 2,
|
|
1593
|
+
maxItems: 2
|
|
1594
|
+
}
|
|
1595
|
+
},
|
|
1596
|
+
{
|
|
1597
|
+
// Speculations arm: list of [path, producing_idx] tuples.
|
|
1598
|
+
type: "array",
|
|
1599
|
+
items: {
|
|
1600
|
+
type: "array",
|
|
1601
|
+
prefixItems: [
|
|
1602
|
+
{ type: "string", minLength: 1 },
|
|
1603
|
+
{ type: "integer" }
|
|
1604
|
+
],
|
|
1605
|
+
minItems: 2,
|
|
1606
|
+
maxItems: 2
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
]
|
|
1610
|
+
};
|
|
1611
|
+
var sdkHooks = {
|
|
1612
|
+
callClassifierMultiTurn: defaultCallClassifierMultiTurn
|
|
1613
|
+
};
|
|
1614
|
+
var UserMessageQueue = class {
|
|
1615
|
+
resolvers = [];
|
|
1616
|
+
pending = [];
|
|
1617
|
+
closed = false;
|
|
1618
|
+
push(msg) {
|
|
1619
|
+
if (this.closed) return;
|
|
1620
|
+
const r = this.resolvers.shift();
|
|
1621
|
+
if (r) r(msg);
|
|
1622
|
+
else this.pending.push(msg);
|
|
1623
|
+
}
|
|
1624
|
+
end() {
|
|
1625
|
+
if (this.closed) return;
|
|
1626
|
+
this.closed = true;
|
|
1627
|
+
while (this.resolvers.length > 0) this.resolvers.shift()(null);
|
|
1628
|
+
}
|
|
1629
|
+
async *iterator() {
|
|
1630
|
+
while (true) {
|
|
1631
|
+
if (this.pending.length > 0) {
|
|
1632
|
+
yield this.pending.shift();
|
|
1633
|
+
continue;
|
|
1634
|
+
}
|
|
1635
|
+
if (this.closed) return;
|
|
1636
|
+
const msg = await new Promise((r) => {
|
|
1637
|
+
this.resolvers.push(r);
|
|
1638
|
+
});
|
|
1639
|
+
if (msg === null) return;
|
|
1640
|
+
yield msg;
|
|
1641
|
+
}
|
|
1642
|
+
}
|
|
1643
|
+
};
|
|
1644
|
+
function makeUserMessage(text) {
|
|
1645
|
+
return {
|
|
1646
|
+
type: "user",
|
|
1647
|
+
message: {
|
|
1648
|
+
role: "user",
|
|
1649
|
+
content: [{ type: "text", text }]
|
|
1650
|
+
},
|
|
1651
|
+
parent_tool_use_id: null,
|
|
1652
|
+
session_id: ""
|
|
1653
|
+
};
|
|
1654
|
+
}
|
|
1655
|
+
function collectAssistantText(message, out) {
|
|
1656
|
+
const m = message;
|
|
1657
|
+
if (m.type !== "assistant" && m.type !== "user") return;
|
|
1658
|
+
const inner = m.message;
|
|
1659
|
+
const content = inner?.content;
|
|
1660
|
+
if (typeof content === "string") {
|
|
1661
|
+
out.push(content);
|
|
1662
|
+
return;
|
|
1663
|
+
}
|
|
1664
|
+
if (!Array.isArray(content)) return;
|
|
1665
|
+
for (const block of content) {
|
|
1666
|
+
if (block && typeof block === "object" && "text" in block) {
|
|
1667
|
+
const t = block.text;
|
|
1668
|
+
if (typeof t === "string") out.push(t);
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
}
|
|
1672
|
+
function dumpMessageForDebug(message, msgCount, elapsed, emit) {
|
|
1673
|
+
const m = message;
|
|
1674
|
+
const msgType = typeof m.type === "string" ? String(m.type) : "unknown";
|
|
1675
|
+
emit(`DEBUG[${elapsed.toFixed(1).padStart(6)}s] msg #${msgCount}: ${msgType}`);
|
|
1676
|
+
for (const attr of [
|
|
1677
|
+
"subtype",
|
|
1678
|
+
"session_id",
|
|
1679
|
+
"stop_reason",
|
|
1680
|
+
"stop_sequence",
|
|
1681
|
+
"duration_ms",
|
|
1682
|
+
"duration_api_ms",
|
|
1683
|
+
"num_turns",
|
|
1684
|
+
"is_error",
|
|
1685
|
+
"total_cost_usd",
|
|
1686
|
+
"usage"
|
|
1687
|
+
]) {
|
|
1688
|
+
const v = m[attr];
|
|
1689
|
+
if (v !== void 0 && v !== null) {
|
|
1690
|
+
let shown = JSON.stringify(v);
|
|
1691
|
+
if (shown.length > 240) shown = shown.slice(0, 240) + "...(truncated)";
|
|
1692
|
+
emit(`DEBUG .${attr} = ${shown}`);
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
async function defaultCallClassifierMultiTurn(mode, sourceRoots, batches, perBatchFilesPresent, getFilesSpeculatedFor, decideRetry, opts) {
|
|
1697
|
+
if (batches.length === 0) return [];
|
|
1698
|
+
if (perBatchFilesPresent.length !== batches.length) {
|
|
1699
|
+
throw new ClassifierError(
|
|
1700
|
+
`internal: perBatchFilesPresent length (${perBatchFilesPresent.length}) must match batches length (${batches.length})`
|
|
1701
|
+
);
|
|
1702
|
+
}
|
|
1703
|
+
const model = opts.model || CLASSIFIER_MODEL;
|
|
1704
|
+
const debug = opts.debug ?? false;
|
|
1705
|
+
const emit = opts.emit;
|
|
1706
|
+
const systemPrompt = composeSystemPrompt(
|
|
1707
|
+
mode.systemPrompt,
|
|
1708
|
+
mode.intents,
|
|
1709
|
+
sourceRoots
|
|
1710
|
+
);
|
|
1711
|
+
let query;
|
|
1712
|
+
try {
|
|
1713
|
+
({ query } = await import("@anthropic-ai/claude-agent-sdk"));
|
|
1714
|
+
} catch (e) {
|
|
1715
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1716
|
+
throw new ClassifierError(
|
|
1717
|
+
`@anthropic-ai/claude-agent-sdk not installed: ${msg}. Try: npm install @anthropic-ai/claude-agent-sdk`
|
|
1718
|
+
);
|
|
1719
|
+
}
|
|
1720
|
+
if (debug && emit) {
|
|
1721
|
+
emit("DEBUG --- multi-turn classifier: pre-flight ---");
|
|
1722
|
+
emit(`DEBUG model: ${model}`);
|
|
1723
|
+
emit(`DEBUG mode: ${mode.label}`);
|
|
1724
|
+
emit(
|
|
1725
|
+
`DEBUG system prompt: ${systemPrompt.length.toLocaleString()} chars (~${Math.floor(systemPrompt.length / 4).toLocaleString()} tokens)`
|
|
1726
|
+
);
|
|
1727
|
+
emit(`DEBUG batches: ${batches.length}`);
|
|
1728
|
+
}
|
|
1729
|
+
const queue = new UserMessageQueue();
|
|
1730
|
+
const enqueueBatch = (batchIdx2) => {
|
|
1731
|
+
queue.push(
|
|
1732
|
+
makeUserMessage(
|
|
1733
|
+
buildBatchUserMessage(
|
|
1734
|
+
batchIdx2 + 1,
|
|
1735
|
+
batches.length,
|
|
1736
|
+
perBatchFilesPresent[batchIdx2],
|
|
1737
|
+
getFilesSpeculatedFor(batchIdx2),
|
|
1738
|
+
JSON.stringify(batches[batchIdx2])
|
|
1739
|
+
)
|
|
1740
|
+
)
|
|
1741
|
+
);
|
|
1742
|
+
};
|
|
1743
|
+
enqueueBatch(0);
|
|
1744
|
+
const stderrCb = debug && emit ? (line) => emit(`DEBUG[sdk-stderr] ${line.replace(/\s+$/, "")}`) : void 0;
|
|
1745
|
+
const q = query({
|
|
1746
|
+
prompt: queue.iterator(),
|
|
1747
|
+
options: {
|
|
1748
|
+
model,
|
|
1749
|
+
systemPrompt,
|
|
1750
|
+
tools: [],
|
|
1751
|
+
allowedTools: [],
|
|
1752
|
+
settingSources: [],
|
|
1753
|
+
thinking: { type: "enabled", budgetTokens: 8e3 },
|
|
1754
|
+
outputFormat: {
|
|
1755
|
+
type: "json_schema",
|
|
1756
|
+
schema: CLASSIFIER_RESPONSE_SCHEMA
|
|
1757
|
+
},
|
|
1758
|
+
// Sessions are persisted to ~/.claude/projects/ by the SDK. We don't
|
|
1759
|
+
// need cross-process resume (cached per-batch responses serve that
|
|
1760
|
+
// role); within a single run, streaming input keeps the same
|
|
1761
|
+
// subprocess + connection alive, so we don't depend on disk-resume.
|
|
1762
|
+
env: {
|
|
1763
|
+
...process.env,
|
|
1764
|
+
DISABLE_COMPACT: "1",
|
|
1765
|
+
DISABLE_AUTO_COMPACT: "1",
|
|
1766
|
+
CLAUDE_CODE_MAX_OUTPUT_TOKENS: "64000"
|
|
1767
|
+
},
|
|
1768
|
+
stderr: stderrCb
|
|
1769
|
+
}
|
|
1770
|
+
});
|
|
1771
|
+
const results = batches.map(() => ({
|
|
1772
|
+
primary: "",
|
|
1773
|
+
retry: null
|
|
1774
|
+
}));
|
|
1775
|
+
let batchIdx = 0;
|
|
1776
|
+
let inRetry = false;
|
|
1777
|
+
let currentParts = [];
|
|
1778
|
+
const overallStart = Date.now();
|
|
1779
|
+
let turnStart = Date.now();
|
|
1780
|
+
let msgCount = 0;
|
|
1781
|
+
for await (const message of q) {
|
|
1782
|
+
msgCount++;
|
|
1783
|
+
if (debug && emit) {
|
|
1784
|
+
const elapsed = (Date.now() - overallStart) / 1e3;
|
|
1785
|
+
dumpMessageForDebug(message, msgCount, elapsed, emit);
|
|
1786
|
+
}
|
|
1787
|
+
collectAssistantText(message, currentParts);
|
|
1788
|
+
const m = message;
|
|
1789
|
+
if (m.type !== "result") continue;
|
|
1790
|
+
const text = currentParts.join("");
|
|
1791
|
+
currentParts = [];
|
|
1792
|
+
const turnElapsed = (Date.now() - turnStart) / 1e3;
|
|
1793
|
+
turnStart = Date.now();
|
|
1794
|
+
if (!inRetry) {
|
|
1795
|
+
results[batchIdx].primary = text;
|
|
1796
|
+
if (emit) {
|
|
1797
|
+
emit(
|
|
1798
|
+
`INFO classifier batch ${batchIdx + 1}/${batches.length} primary done (${turnElapsed.toFixed(1)}s, ${text.length.toLocaleString()} chars)`
|
|
1799
|
+
);
|
|
1800
|
+
}
|
|
1801
|
+
const failures = decideRetry(batchIdx + 1, text);
|
|
1802
|
+
if (failures && failures.length > 0) {
|
|
1803
|
+
if (emit) {
|
|
1804
|
+
emit(
|
|
1805
|
+
`INFO classifier batch ${batchIdx + 1}/${batches.length}: ${failures.length} rewrite(s) failed validation \u2014 retrying`
|
|
1806
|
+
);
|
|
1807
|
+
}
|
|
1808
|
+
inRetry = true;
|
|
1809
|
+
queue.push(makeUserMessage(buildRetryUserMessage(failures)));
|
|
1810
|
+
continue;
|
|
1811
|
+
}
|
|
1812
|
+
} else {
|
|
1813
|
+
results[batchIdx].retry = text;
|
|
1814
|
+
if (emit) {
|
|
1815
|
+
emit(
|
|
1816
|
+
`INFO classifier batch ${batchIdx + 1}/${batches.length} retry done (${turnElapsed.toFixed(1)}s, ${text.length.toLocaleString()} chars)`
|
|
1817
|
+
);
|
|
1818
|
+
}
|
|
1819
|
+
inRetry = false;
|
|
1820
|
+
}
|
|
1821
|
+
batchIdx++;
|
|
1822
|
+
if (batchIdx >= batches.length) {
|
|
1823
|
+
queue.end();
|
|
1824
|
+
continue;
|
|
1825
|
+
}
|
|
1826
|
+
enqueueBatch(batchIdx);
|
|
1827
|
+
}
|
|
1828
|
+
if (batchIdx < batches.length) {
|
|
1829
|
+
throw new ClassifierError(
|
|
1830
|
+
`multi-turn classifier ended after ${batchIdx} of ${batches.length} batches; expected one result message per batch`
|
|
1831
|
+
);
|
|
1832
|
+
}
|
|
1833
|
+
return results;
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
// src/llm-classifier/index.ts
|
|
1837
|
+
function batchIndices(batch) {
|
|
1838
|
+
const bash = /* @__PURE__ */ new Set();
|
|
1839
|
+
const payload = /* @__PURE__ */ new Set();
|
|
1840
|
+
const nonBash = /* @__PURE__ */ new Set();
|
|
1841
|
+
for (const [idx, tool] of batch) {
|
|
1842
|
+
payload.add(idx);
|
|
1843
|
+
if (tool === "Bash") bash.add(idx);
|
|
1844
|
+
else nonBash.add(idx);
|
|
1845
|
+
}
|
|
1846
|
+
return { bash, payload, nonBash };
|
|
1847
|
+
}
|
|
1848
|
+
function parseBatchPrimary(primary, bashIdxs, originalByIdx, payloadIdxs, nonBashPayloadIdxs, emit) {
|
|
1849
|
+
const parsed = parseBatchResponse(
|
|
1850
|
+
primary,
|
|
1851
|
+
bashIdxs,
|
|
1852
|
+
originalByIdx,
|
|
1853
|
+
payloadIdxs,
|
|
1854
|
+
nonBashPayloadIdxs,
|
|
1855
|
+
emit
|
|
1856
|
+
);
|
|
1857
|
+
const failures = [];
|
|
1858
|
+
for (const b of parsed.bad) {
|
|
1859
|
+
if (b.retry) failures.push(b.retry);
|
|
1860
|
+
}
|
|
1861
|
+
return {
|
|
1862
|
+
decisions: parsed.decisions,
|
|
1863
|
+
bad: parsed.bad,
|
|
1864
|
+
failures,
|
|
1865
|
+
speculations: parsed.speculations
|
|
1866
|
+
};
|
|
1867
|
+
}
|
|
1868
|
+
function mergeRetryDecisions(primaryDecisions, bad, retryRaw, bashIdxs, originalByIdx, payloadIdxs, nonBashPayloadIdxs, emit) {
|
|
1869
|
+
let retryParsed;
|
|
1870
|
+
try {
|
|
1871
|
+
const r = parseBatchResponse(
|
|
1872
|
+
retryRaw,
|
|
1873
|
+
bashIdxs,
|
|
1874
|
+
originalByIdx,
|
|
1875
|
+
payloadIdxs,
|
|
1876
|
+
nonBashPayloadIdxs,
|
|
1877
|
+
emit
|
|
1878
|
+
);
|
|
1879
|
+
retryParsed = { decisions: r.decisions, bad: r.bad };
|
|
1880
|
+
} catch (e) {
|
|
1881
|
+
if (emit) {
|
|
1882
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1883
|
+
emit(
|
|
1884
|
+
`WARN classifier retry response failed shape check (${msg}); dropping retry corrections for this batch`
|
|
1885
|
+
);
|
|
1886
|
+
}
|
|
1887
|
+
return { decisions: primaryDecisions, bad };
|
|
1888
|
+
}
|
|
1889
|
+
const corrected = /* @__PURE__ */ new Set();
|
|
1890
|
+
for (const [idx, decision] of retryParsed.decisions.entries()) {
|
|
1891
|
+
primaryDecisions.set(idx, decision);
|
|
1892
|
+
corrected.add(idx);
|
|
1893
|
+
}
|
|
1894
|
+
const finalBad = bad.filter((b) => {
|
|
1895
|
+
if (b.retry && corrected.has(b.retry.idx)) return false;
|
|
1896
|
+
return true;
|
|
1897
|
+
});
|
|
1898
|
+
for (const sb of retryParsed.bad) finalBad.push(sb);
|
|
1899
|
+
return { decisions: primaryDecisions, bad: finalBad };
|
|
1900
|
+
}
|
|
1901
|
+
function revalidateSpeculationsAfterRetry(primarySpeculations, finalDecisions, payloadIdxs, nonBashPayloadIdxs) {
|
|
1902
|
+
const surviving = [];
|
|
1903
|
+
const dropped = [];
|
|
1904
|
+
for (const s of primarySpeculations) {
|
|
1905
|
+
if (!payloadIdxs.has(s.producingIdx)) {
|
|
1906
|
+
dropped.push(s);
|
|
1907
|
+
continue;
|
|
1908
|
+
}
|
|
1909
|
+
if (nonBashPayloadIdxs.has(s.producingIdx)) {
|
|
1910
|
+
surviving.push(s);
|
|
1911
|
+
continue;
|
|
1912
|
+
}
|
|
1913
|
+
const d = finalDecisions.get(s.producingIdx);
|
|
1914
|
+
if (d && d.intent === "execute") {
|
|
1915
|
+
surviving.push(s);
|
|
1916
|
+
} else {
|
|
1917
|
+
dropped.push(s);
|
|
1918
|
+
}
|
|
1919
|
+
}
|
|
1920
|
+
return { surviving, dropped };
|
|
1921
|
+
}
|
|
1922
|
+
function computeDisplayedSpeculated(priorSpeculationsByBatch, guaranteed) {
|
|
1923
|
+
const seen = /* @__PURE__ */ new Map();
|
|
1924
|
+
for (const batch of priorSpeculationsByBatch) {
|
|
1925
|
+
for (const s of batch) {
|
|
1926
|
+
if (seen.has(s.path)) continue;
|
|
1927
|
+
seen.set(s.path, s.producingIdx);
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
const out = [];
|
|
1931
|
+
for (const [path11, producingIdx] of seen.entries()) {
|
|
1932
|
+
if (guaranteed.has(path11)) continue;
|
|
1933
|
+
out.push({ path: path11, producingIdx });
|
|
1934
|
+
}
|
|
1935
|
+
return out;
|
|
1936
|
+
}
|
|
1937
|
+
async function getDecisions(cacheDir, mode, events, sourceRoots, opts = {}) {
|
|
1938
|
+
const useCache = opts.useCache ?? true;
|
|
1939
|
+
const overrideCache = opts.overrideCache ?? false;
|
|
1940
|
+
const debug = opts.debug ?? false;
|
|
1941
|
+
const emit = opts.emit;
|
|
1942
|
+
const initialState = opts.initialState ?? [];
|
|
1943
|
+
const payloadIdxs = payloadIndices(events);
|
|
1944
|
+
const bashIdxs = bashIndices(events);
|
|
1945
|
+
const nonBashIdxs = /* @__PURE__ */ new Set();
|
|
1946
|
+
for (const i of payloadIdxs) if (!bashIdxs.has(i)) nonBashIdxs.add(i);
|
|
1947
|
+
if (bashIdxs.size === 0) return /* @__PURE__ */ new Map();
|
|
1948
|
+
const payload = buildPayload(events);
|
|
1949
|
+
const batches = buildBatches(payload);
|
|
1950
|
+
const originalByIdx = /* @__PURE__ */ new Map();
|
|
1951
|
+
for (const [idx, tool, inp] of payload) {
|
|
1952
|
+
if (tool === "Bash") originalByIdx.set(idx, inp);
|
|
1953
|
+
}
|
|
1954
|
+
const perBatchFilesPresent = [];
|
|
1955
|
+
{
|
|
1956
|
+
const cumulative = new Set(initialState);
|
|
1957
|
+
for (let i = 0; i < batches.length; i++) {
|
|
1958
|
+
perBatchFilesPresent.push(Array.from(cumulative).sort());
|
|
1959
|
+
for (const [, tool, inp] of batches[i]) {
|
|
1960
|
+
if (tool === "Write" || tool === "Edit") cumulative.add(inp);
|
|
1961
|
+
}
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
if (emit) {
|
|
1965
|
+
const sizes = batches.map((b) => b.length).join(",");
|
|
1966
|
+
const bashPerBatch = batches.map(
|
|
1967
|
+
(b) => b.filter((e) => e[1] === "Bash").length
|
|
1968
|
+
);
|
|
1969
|
+
emit(
|
|
1970
|
+
`INFO classifier: ${batches.length} batch(es) over ${payload.length} payload events (${bashIdxs.size} Bash, ${nonBashIdxs.size} context); sizes=[${sizes}]; bash-per-batch=[${bashPerBatch.join(",")}]`
|
|
1971
|
+
);
|
|
1972
|
+
emit(
|
|
1973
|
+
`INFO classifier model=${CLASSIFIER_MODEL}, mode=${mode.label}, source-roots=${sourceRoots.length}, initial-state-entries=${initialState.length}, cache-dir=${cacheDir}`
|
|
1974
|
+
);
|
|
1975
|
+
}
|
|
1976
|
+
const { key, components } = cacheKeyComponents(
|
|
1977
|
+
mode.systemPrompt,
|
|
1978
|
+
mode.intents,
|
|
1979
|
+
sourceRoots,
|
|
1980
|
+
initialState,
|
|
1981
|
+
payload
|
|
1982
|
+
);
|
|
1983
|
+
const cachedEntries = batches.map(() => null);
|
|
1984
|
+
if (useCache && !overrideCache) {
|
|
1985
|
+
const result = loadAllBatches(cacheDir, key, batches.length, components);
|
|
1986
|
+
if (result.kind === "hit") {
|
|
1987
|
+
for (let i = 0; i < batches.length; i++) cachedEntries[i] = result.entries[i];
|
|
1988
|
+
if (emit)
|
|
1989
|
+
emit(`INFO classifier cache hit: ${batches.length} batch(es) reused`);
|
|
1990
|
+
} else if (emit) {
|
|
1991
|
+
emit(`INFO classifier cache miss: ${describeMiss(result, batches.length)}`);
|
|
1992
|
+
}
|
|
1993
|
+
} else if (overrideCache && emit) {
|
|
1994
|
+
emit(`INFO classifier cache override: recomputing all ${batches.length} batch(es)`);
|
|
1995
|
+
}
|
|
1996
|
+
if (opts.stats) {
|
|
1997
|
+
opts.stats.totalBatches = batches.length;
|
|
1998
|
+
opts.stats.cacheHits = cachedEntries.filter((e) => e !== null).length;
|
|
1999
|
+
}
|
|
2000
|
+
const firstMissIdx = cachedEntries.findIndex((e) => e === null);
|
|
2001
|
+
const liveStart = firstMissIdx === -1 ? batches.length : firstMissIdx;
|
|
2002
|
+
const liveBatches = batches.slice(liveStart);
|
|
2003
|
+
const perBatchSpeculations = batches.map(() => []);
|
|
2004
|
+
for (let i = 0; i < liveStart; i++) {
|
|
2005
|
+
const entry = cachedEntries[i];
|
|
2006
|
+
const { bash: batchBashIdxs, payload: batchPayloadIdxs, nonBash: nonBashPayloadIdxs } = batchIndices(batches[i]);
|
|
2007
|
+
try {
|
|
2008
|
+
const parsed = parseBatchResponse(
|
|
2009
|
+
entry.primary,
|
|
2010
|
+
batchBashIdxs,
|
|
2011
|
+
originalByIdx,
|
|
2012
|
+
batchPayloadIdxs,
|
|
2013
|
+
nonBashPayloadIdxs,
|
|
2014
|
+
emit
|
|
2015
|
+
);
|
|
2016
|
+
perBatchSpeculations[i] = parsed.speculations;
|
|
2017
|
+
} catch (e) {
|
|
2018
|
+
if (emit) {
|
|
2019
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
2020
|
+
emit(
|
|
2021
|
+
`WARN classifier cached batch ${i + 1} failed reparse: ${msg}; treating speculations as empty`
|
|
2022
|
+
);
|
|
2023
|
+
}
|
|
2024
|
+
}
|
|
2025
|
+
}
|
|
2026
|
+
const stagedPrimaryParses = batches.map(() => null);
|
|
2027
|
+
const decideRetry = (batchNo, primaryText) => {
|
|
2028
|
+
const i = liveStart + batchNo - 1;
|
|
2029
|
+
const { bash: batchBashIdxs, payload: batchPayloadIdxs, nonBash: batchNonBashIdxs } = batchIndices(batches[i]);
|
|
2030
|
+
let parsed;
|
|
2031
|
+
try {
|
|
2032
|
+
parsed = parseBatchPrimary(
|
|
2033
|
+
primaryText,
|
|
2034
|
+
batchBashIdxs,
|
|
2035
|
+
originalByIdx,
|
|
2036
|
+
batchPayloadIdxs,
|
|
2037
|
+
batchNonBashIdxs,
|
|
2038
|
+
emit
|
|
2039
|
+
);
|
|
2040
|
+
} catch (e) {
|
|
2041
|
+
throw e;
|
|
2042
|
+
}
|
|
2043
|
+
stagedPrimaryParses[i] = { decisions: parsed.decisions, bad: parsed.bad };
|
|
2044
|
+
perBatchSpeculations[i] = parsed.speculations;
|
|
2045
|
+
if (parsed.failures.length > 0) {
|
|
2046
|
+
return parsed.failures;
|
|
2047
|
+
}
|
|
2048
|
+
return null;
|
|
2049
|
+
};
|
|
2050
|
+
let liveResults = [];
|
|
2051
|
+
if (liveBatches.length > 0) {
|
|
2052
|
+
if (emit) {
|
|
2053
|
+
emit(
|
|
2054
|
+
`INFO classifier dispatching ${liveBatches.length} live batch(es) (cache hits for ${liveStart} batch(es) preceding)`
|
|
2055
|
+
);
|
|
2056
|
+
}
|
|
2057
|
+
const liveFilesPresent = perBatchFilesPresent.slice(liveStart);
|
|
2058
|
+
const getLiveFilesSpeculated = (liveIdx) => {
|
|
2059
|
+
const absBatchIdx = liveStart + liveIdx;
|
|
2060
|
+
const guaranteedSet = new Set(perBatchFilesPresent[absBatchIdx]);
|
|
2061
|
+
return computeDisplayedSpeculated(
|
|
2062
|
+
perBatchSpeculations.slice(0, absBatchIdx),
|
|
2063
|
+
guaranteedSet
|
|
2064
|
+
);
|
|
2065
|
+
};
|
|
2066
|
+
try {
|
|
2067
|
+
liveResults = await sdkHooks.callClassifierMultiTurn(
|
|
2068
|
+
mode,
|
|
2069
|
+
sourceRoots,
|
|
2070
|
+
liveBatches,
|
|
2071
|
+
liveFilesPresent,
|
|
2072
|
+
getLiveFilesSpeculated,
|
|
2073
|
+
decideRetry,
|
|
2074
|
+
{ debug, emit }
|
|
2075
|
+
);
|
|
2076
|
+
} finally {
|
|
2077
|
+
flushLiveBatchesToCache(cacheDir, liveStart, liveResults);
|
|
2078
|
+
}
|
|
2079
|
+
}
|
|
2080
|
+
const allEntries = [];
|
|
2081
|
+
for (let i = 0; i < batches.length; i++) {
|
|
2082
|
+
if (i < liveStart) {
|
|
2083
|
+
allEntries.push(cachedEntries[i]);
|
|
2084
|
+
} else {
|
|
2085
|
+
const lr = liveResults[i - liveStart];
|
|
2086
|
+
if (!lr) {
|
|
2087
|
+
throw new ClassifierError(
|
|
2088
|
+
`internal: missing live result for batch ${i + 1}/${batches.length}`
|
|
2089
|
+
);
|
|
2090
|
+
}
|
|
2091
|
+
allEntries.push({ primary: lr.primary, retry: lr.retry });
|
|
2092
|
+
}
|
|
2093
|
+
}
|
|
2094
|
+
const good = /* @__PURE__ */ new Map();
|
|
2095
|
+
const allBad = [];
|
|
2096
|
+
for (let i = 0; i < batches.length; i++) {
|
|
2097
|
+
const batch = batches[i];
|
|
2098
|
+
const { bash: batchBashIdxs, payload: batchPayloadIdxs, nonBash: batchNonBashIdxs } = batchIndices(batch);
|
|
2099
|
+
const entry = allEntries[i];
|
|
2100
|
+
let parsed = parseBatchResponse(
|
|
2101
|
+
entry.primary,
|
|
2102
|
+
batchBashIdxs,
|
|
2103
|
+
originalByIdx,
|
|
2104
|
+
batchPayloadIdxs,
|
|
2105
|
+
batchNonBashIdxs,
|
|
2106
|
+
emit
|
|
2107
|
+
);
|
|
2108
|
+
if (entry.retry !== null) {
|
|
2109
|
+
const merged = mergeRetryDecisions(
|
|
2110
|
+
parsed.decisions,
|
|
2111
|
+
parsed.bad,
|
|
2112
|
+
entry.retry,
|
|
2113
|
+
batchBashIdxs,
|
|
2114
|
+
originalByIdx,
|
|
2115
|
+
batchPayloadIdxs,
|
|
2116
|
+
batchNonBashIdxs,
|
|
2117
|
+
emit
|
|
2118
|
+
);
|
|
2119
|
+
const reval = revalidateSpeculationsAfterRetry(
|
|
2120
|
+
parsed.speculations,
|
|
2121
|
+
merged.decisions,
|
|
2122
|
+
batchPayloadIdxs,
|
|
2123
|
+
batchNonBashIdxs
|
|
2124
|
+
);
|
|
2125
|
+
if (emit && reval.dropped.length > 0) {
|
|
2126
|
+
for (const d of reval.dropped) {
|
|
2127
|
+
emit(
|
|
2128
|
+
`WARN classifier dropped speculation after retry: ${d.path} (from idx ${d.producingIdx}) \u2014 producer omitted on retry`
|
|
2129
|
+
);
|
|
2130
|
+
}
|
|
2131
|
+
}
|
|
2132
|
+
parsed = {
|
|
2133
|
+
decisions: merged.decisions,
|
|
2134
|
+
bad: merged.bad,
|
|
2135
|
+
speculations: reval.surviving
|
|
2136
|
+
};
|
|
2137
|
+
perBatchSpeculations[i] = reval.surviving;
|
|
2138
|
+
}
|
|
2139
|
+
if (debug && emit && parsed.speculations.length > 0) {
|
|
2140
|
+
const sample = parsed.speculations.slice(0, 3).map((s) => `${s.path} (from ${s.producingIdx})`).join(", ");
|
|
2141
|
+
const more = parsed.speculations.length > 3 ? `, +${parsed.speculations.length - 3} more` : "";
|
|
2142
|
+
emit(
|
|
2143
|
+
`DEBUG classifier batch ${i + 1}/${batches.length} speculated ${parsed.speculations.length} file(s): ${sample}${more}`
|
|
2144
|
+
);
|
|
2145
|
+
}
|
|
2146
|
+
for (const [idx, d] of parsed.decisions.entries()) good.set(idx, d);
|
|
2147
|
+
for (const b of parsed.bad) allBad.push(b);
|
|
2148
|
+
}
|
|
2149
|
+
for (const b of allBad) {
|
|
2150
|
+
if (emit) {
|
|
2151
|
+
emit(`WARN classifier dropped entry: ${JSON.stringify(b.raw)} \u2014 ${b.error}`);
|
|
2152
|
+
}
|
|
2153
|
+
}
|
|
2154
|
+
for (const idx of bashIdxs) {
|
|
2155
|
+
if (!good.has(idx)) {
|
|
2156
|
+
good.set(idx, {
|
|
2157
|
+
intent: "skip",
|
|
2158
|
+
originalCommand: originalByIdx.get(idx) ?? "",
|
|
2159
|
+
reason: "classifier omitted from replay list"
|
|
2160
|
+
});
|
|
2161
|
+
}
|
|
2162
|
+
}
|
|
2163
|
+
for (const idx of nonBashIdxs) {
|
|
2164
|
+
good.set(idx, { intent: "execute", command: "", reason: "native" });
|
|
2165
|
+
}
|
|
2166
|
+
if (useCache && liveStart < batches.length) {
|
|
2167
|
+
const lastEventTs = events[events.length - 1]?.timestamp;
|
|
2168
|
+
commitMeta(cacheDir, key, batches.length, components, lastEventTs);
|
|
2169
|
+
if (emit) emit(`INFO classifier cache meta committed at ${path7.join(cacheDir, "meta.json")}`);
|
|
2170
|
+
}
|
|
2171
|
+
if (debug && emit) {
|
|
2172
|
+
const histo = { execute: 0, skip: 0, rewritten: 0 };
|
|
2173
|
+
for (const idx of bashIdxs) {
|
|
2174
|
+
const d = good.get(idx);
|
|
2175
|
+
if (!d) continue;
|
|
2176
|
+
histo[d.intent] = (histo[d.intent] || 0) + 1;
|
|
2177
|
+
if (d.intent === "execute" && d.command !== (originalByIdx.get(idx) ?? "")) {
|
|
2178
|
+
histo.rewritten++;
|
|
2179
|
+
}
|
|
2180
|
+
}
|
|
2181
|
+
emit(`DEBUG bash decision histogram: ${JSON.stringify(histo)}`);
|
|
2182
|
+
const sortedBash = Array.from(bashIdxs).sort((a, b) => a - b);
|
|
2183
|
+
const sampled = [...sortedBash.slice(0, 5), ...sortedBash.slice(-5)];
|
|
2184
|
+
for (const idx of sampled) {
|
|
2185
|
+
const d = good.get(idx);
|
|
2186
|
+
if (d) {
|
|
2187
|
+
const cmd = d.intent === "execute" ? d.command : d.originalCommand;
|
|
2188
|
+
emit(
|
|
2189
|
+
`DEBUG idx ${idx}: ${d.intent} \u2014 ${cmd.slice(0, 80)} (${d.reason})`
|
|
2190
|
+
);
|
|
2191
|
+
}
|
|
2192
|
+
}
|
|
2193
|
+
}
|
|
2194
|
+
return good;
|
|
2195
|
+
}
|
|
2196
|
+
var COMPONENT_LABELS = {
|
|
2197
|
+
systemPrompt: "classifier prompt",
|
|
2198
|
+
intents: "custom intents",
|
|
2199
|
+
sourceRoots: "source roots",
|
|
2200
|
+
initialState: "initial-state file list",
|
|
2201
|
+
events: "session logs"
|
|
2202
|
+
};
|
|
2203
|
+
function describeMiss(result, numBatches) {
|
|
2204
|
+
switch (result.reason) {
|
|
2205
|
+
case "no-meta":
|
|
2206
|
+
return `no prior cache found; computing ${numBatches} batch(es) from scratch`;
|
|
2207
|
+
case "key-mismatch": {
|
|
2208
|
+
const { changedComponents } = result;
|
|
2209
|
+
const tail = `wiping stale entries and recomputing ${numBatches} batch(es)`;
|
|
2210
|
+
if (changedComponents.length === 0) {
|
|
2211
|
+
return `cached run used a legacy meta with no component breakdown; ${tail}`;
|
|
2212
|
+
}
|
|
2213
|
+
const labels = changedComponents.map((c) => COMPONENT_LABELS[c]).join(", ");
|
|
2214
|
+
return `inputs changed (${labels}); ${tail}`;
|
|
2215
|
+
}
|
|
2216
|
+
case "count-mismatch":
|
|
2217
|
+
return `batch count differs (cached ${result.stored}, now ${result.expected}); wiping and recomputing ${numBatches} batch(es)`;
|
|
2218
|
+
case "missing-batch":
|
|
2219
|
+
return `meta said hit but batch ${result.batchNo} file is missing; wiping and recomputing ${numBatches} batch(es)`;
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
function flushLiveBatchesToCache(cacheDir, liveStart, liveResults) {
|
|
2223
|
+
for (let i = 0; i < liveResults.length; i++) {
|
|
2224
|
+
const lr = liveResults[i];
|
|
2225
|
+
if (!lr || lr.primary === "") continue;
|
|
2226
|
+
const batchPos = liveStart + i;
|
|
2227
|
+
writeBatchEntry(cacheDir, batchPos + 1, lr.primary, lr.retry);
|
|
2228
|
+
}
|
|
2229
|
+
}
|
|
2230
|
+
|
|
2231
|
+
// src/bucketer.ts
|
|
2232
|
+
import * as path8 from "node:path";
|
|
2233
|
+
function classify(event, sourceRoots) {
|
|
2234
|
+
if (event.isError) return "skip";
|
|
2235
|
+
if (!event.cwd) return "skip";
|
|
2236
|
+
if (!pathInsideAny(event.cwd, sourceRoots)) return "skip";
|
|
2237
|
+
const ti = asToolInput(event);
|
|
2238
|
+
if (ti === null) return "skip";
|
|
2239
|
+
if (ti.tool === "Write" || ti.tool === "Edit") {
|
|
2240
|
+
const target = ti.file_path;
|
|
2241
|
+
if (!target || !path8.isAbsolute(target)) return "skip";
|
|
2242
|
+
if (!pathInsideAny(target, sourceRoots)) return "skip";
|
|
2243
|
+
return "action";
|
|
2244
|
+
}
|
|
2245
|
+
if (ti.tool === "Read") {
|
|
2246
|
+
const target = ti.file_path;
|
|
2247
|
+
if (!target || !path8.isAbsolute(target)) return "skip";
|
|
2248
|
+
if (!pathInsideAny(target, sourceRoots)) return "skip";
|
|
2249
|
+
return "checkpoint";
|
|
2250
|
+
}
|
|
2251
|
+
return "skip";
|
|
2252
|
+
}
|
|
2253
|
+
|
|
2254
|
+
// src/snapshots.ts
|
|
2255
|
+
import * as fs6 from "node:fs";
|
|
2256
|
+
import * as os2 from "node:os";
|
|
2257
|
+
import * as path9 from "node:path";
|
|
2258
|
+
var DEFAULT_FILE_HISTORY_ROOT = path9.join(
|
|
2259
|
+
os2.homedir(),
|
|
2260
|
+
".claude",
|
|
2261
|
+
"file-history"
|
|
2262
|
+
);
|
|
2263
|
+
var SnapshotIndex = class _SnapshotIndex {
|
|
2264
|
+
// Map: relative path → sorted list of [backupTime, backupFileName] (no dupes)
|
|
2265
|
+
index;
|
|
2266
|
+
fileHistoryRoot;
|
|
2267
|
+
backupCache = /* @__PURE__ */ new Map();
|
|
2268
|
+
// Dedupe warnings emitted from findAtOrBefore/resolveBackup so a single
|
|
2269
|
+
// unreadable snapshot doesn't flood the log per Read event.
|
|
2270
|
+
warnedKeys = /* @__PURE__ */ new Set();
|
|
2271
|
+
warnEmit;
|
|
2272
|
+
setEmit(emit) {
|
|
2273
|
+
this.warnEmit = emit;
|
|
2274
|
+
}
|
|
2275
|
+
warnOnce(key, message) {
|
|
2276
|
+
if (!this.warnEmit) return;
|
|
2277
|
+
if (this.warnedKeys.has(key)) return;
|
|
2278
|
+
this.warnedKeys.add(key);
|
|
2279
|
+
this.warnEmit(`warn: ${message}`);
|
|
2280
|
+
}
|
|
2281
|
+
constructor(rawIndex, fileHistoryRoot = DEFAULT_FILE_HISTORY_ROOT) {
|
|
2282
|
+
this.index = /* @__PURE__ */ new Map();
|
|
2283
|
+
for (const [p, list] of rawIndex.entries()) {
|
|
2284
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2285
|
+
const deduped = [];
|
|
2286
|
+
for (const pair of list) {
|
|
2287
|
+
const key = `${pair[0]}\0${pair[1]}`;
|
|
2288
|
+
if (seen.has(key)) continue;
|
|
2289
|
+
seen.add(key);
|
|
2290
|
+
deduped.push([pair[0], pair[1]]);
|
|
2291
|
+
}
|
|
2292
|
+
deduped.sort((a, b) => {
|
|
2293
|
+
if (a[0] < b[0]) return -1;
|
|
2294
|
+
if (a[0] > b[0]) return 1;
|
|
2295
|
+
if (a[1] < b[1]) return -1;
|
|
2296
|
+
if (a[1] > b[1]) return 1;
|
|
2297
|
+
return 0;
|
|
2298
|
+
});
|
|
2299
|
+
this.index.set(p, deduped);
|
|
2300
|
+
}
|
|
2301
|
+
this.fileHistoryRoot = fileHistoryRoot;
|
|
2302
|
+
}
|
|
2303
|
+
static fromLogsDir(logsDirs, fileHistoryRoot = DEFAULT_FILE_HISTORY_ROOT, emit) {
|
|
2304
|
+
const dirs = typeof logsDirs === "string" ? [logsDirs] : logsDirs;
|
|
2305
|
+
const raw = /* @__PURE__ */ new Map();
|
|
2306
|
+
const seenPerPath = /* @__PURE__ */ new Map();
|
|
2307
|
+
const jsonls = [];
|
|
2308
|
+
for (const dir of dirs) {
|
|
2309
|
+
for (const j of walkFiles(dir, {
|
|
2310
|
+
filter: (e) => e.isFile() && e.name.endsWith(".jsonl"),
|
|
2311
|
+
followSymlinks: true,
|
|
2312
|
+
onError: (d, err) => emit?.(`warn: snapshot index readdir failed ${d}: ${err.message}`)
|
|
2313
|
+
})) {
|
|
2314
|
+
jsonls.push(j);
|
|
2315
|
+
}
|
|
2316
|
+
}
|
|
2317
|
+
for (const j of jsonls) {
|
|
2318
|
+
let data;
|
|
2319
|
+
try {
|
|
2320
|
+
data = fs6.readFileSync(j, "utf8");
|
|
2321
|
+
} catch (e) {
|
|
2322
|
+
if (emit) {
|
|
2323
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
2324
|
+
emit(`warn: snapshot index could not read ${j}: ${msg}`);
|
|
2325
|
+
}
|
|
2326
|
+
continue;
|
|
2327
|
+
}
|
|
2328
|
+
const lines = data.split("\n");
|
|
2329
|
+
for (let lineNo = 0; lineNo < lines.length; lineNo++) {
|
|
2330
|
+
const line = lines[lineNo];
|
|
2331
|
+
const trimmed = line.trim();
|
|
2332
|
+
if (!trimmed) continue;
|
|
2333
|
+
let obj;
|
|
2334
|
+
try {
|
|
2335
|
+
obj = JSON.parse(trimmed);
|
|
2336
|
+
} catch (e) {
|
|
2337
|
+
if (emit) {
|
|
2338
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
2339
|
+
emit(
|
|
2340
|
+
`warn: snapshot index skipped malformed JSON ${path9.basename(j)}:${lineNo}: ${msg}`
|
|
2341
|
+
);
|
|
2342
|
+
}
|
|
2343
|
+
continue;
|
|
2344
|
+
}
|
|
2345
|
+
if (!obj || typeof obj !== "object") continue;
|
|
2346
|
+
const o = obj;
|
|
2347
|
+
if (o.type !== "file-history-snapshot") continue;
|
|
2348
|
+
const snapshot = o.snapshot;
|
|
2349
|
+
const backups = snapshot?.trackedFileBackups ?? {};
|
|
2350
|
+
for (const [p, info] of Object.entries(backups)) {
|
|
2351
|
+
if (!info || typeof info !== "object") continue;
|
|
2352
|
+
const ii = info;
|
|
2353
|
+
const backupTime = ii.backupTime || "";
|
|
2354
|
+
const backupFilename = ii.backupFileName || "";
|
|
2355
|
+
if (!backupTime || !backupFilename) continue;
|
|
2356
|
+
let seen = seenPerPath.get(p);
|
|
2357
|
+
if (!seen) {
|
|
2358
|
+
seen = /* @__PURE__ */ new Set();
|
|
2359
|
+
seenPerPath.set(p, seen);
|
|
2360
|
+
}
|
|
2361
|
+
const key = `${backupTime}\0${backupFilename}`;
|
|
2362
|
+
if (seen.has(key)) continue;
|
|
2363
|
+
seen.add(key);
|
|
2364
|
+
let list = raw.get(p);
|
|
2365
|
+
if (!list) {
|
|
2366
|
+
list = [];
|
|
2367
|
+
raw.set(p, list);
|
|
2368
|
+
}
|
|
2369
|
+
list.push([backupTime, backupFilename]);
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
}
|
|
2373
|
+
return new _SnapshotIndex(raw, fileHistoryRoot);
|
|
2374
|
+
}
|
|
2375
|
+
// Walks the file-history tree per *unique* filename (memoised in
|
|
2376
|
+
// backupCache, so each filename is at most one walk per index instance).
|
|
2377
|
+
// A first miss on a deep tree with thousands of sessions can be slow —
|
|
2378
|
+
// if that ever becomes a problem, switch to an eager pass that builds a
|
|
2379
|
+
// single filename → path map on first call.
|
|
2380
|
+
resolveBackup(backupFilename) {
|
|
2381
|
+
if (this.backupCache.has(backupFilename)) {
|
|
2382
|
+
return this.backupCache.get(backupFilename);
|
|
2383
|
+
}
|
|
2384
|
+
let chosen = null;
|
|
2385
|
+
const stack = [this.fileHistoryRoot];
|
|
2386
|
+
while (stack.length > 0) {
|
|
2387
|
+
const dir = stack.pop();
|
|
2388
|
+
let entries;
|
|
2389
|
+
try {
|
|
2390
|
+
entries = fs6.readdirSync(dir, { withFileTypes: true });
|
|
2391
|
+
} catch (e) {
|
|
2392
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
2393
|
+
this.warnOnce(
|
|
2394
|
+
`readdir:${dir}`,
|
|
2395
|
+
`file-history readdir failed ${dir}: ${msg} \u2014 snapshot resolution may miss entries under this directory`
|
|
2396
|
+
);
|
|
2397
|
+
continue;
|
|
2398
|
+
}
|
|
2399
|
+
for (const e of entries) {
|
|
2400
|
+
const full = path9.join(dir, e.name);
|
|
2401
|
+
if (e.isDirectory()) {
|
|
2402
|
+
stack.push(full);
|
|
2403
|
+
} else if (e.isFile() && e.name === backupFilename) {
|
|
2404
|
+
chosen = full;
|
|
2405
|
+
break;
|
|
2406
|
+
}
|
|
2407
|
+
}
|
|
2408
|
+
if (chosen) break;
|
|
2409
|
+
}
|
|
2410
|
+
if (chosen === null) {
|
|
2411
|
+
this.warnOnce(
|
|
2412
|
+
`missing:${backupFilename}`,
|
|
2413
|
+
`file-history backup '${backupFilename}' not found under ${this.fileHistoryRoot}`
|
|
2414
|
+
);
|
|
2415
|
+
}
|
|
2416
|
+
this.backupCache.set(backupFilename, chosen);
|
|
2417
|
+
return chosen;
|
|
2418
|
+
}
|
|
2419
|
+
findAtOrBefore(relativePath, ts) {
|
|
2420
|
+
const entries = this.index.get(relativePath);
|
|
2421
|
+
if (!entries || entries.length === 0) return null;
|
|
2422
|
+
const atOrBefore = entries.filter((e) => e[0] <= ts);
|
|
2423
|
+
if (atOrBefore.length === 0) return null;
|
|
2424
|
+
for (let i = atOrBefore.length - 1; i >= 0; i--) {
|
|
2425
|
+
const [, fname] = atOrBefore[i];
|
|
2426
|
+
const backup = this.resolveBackup(fname);
|
|
2427
|
+
if (backup === null) {
|
|
2428
|
+
continue;
|
|
2429
|
+
}
|
|
2430
|
+
try {
|
|
2431
|
+
return fs6.readFileSync(backup);
|
|
2432
|
+
} catch (e) {
|
|
2433
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
2434
|
+
this.warnOnce(
|
|
2435
|
+
`read:${backup}`,
|
|
2436
|
+
`snapshot read failed ${backup}: ${msg} \u2014 falling through to next-oldest snapshot`
|
|
2437
|
+
);
|
|
2438
|
+
continue;
|
|
2439
|
+
}
|
|
2440
|
+
}
|
|
2441
|
+
return null;
|
|
2442
|
+
}
|
|
2443
|
+
};
|
|
2444
|
+
|
|
2445
|
+
// src/logs.ts
|
|
2446
|
+
function formatClassify(opts) {
|
|
2447
|
+
const { index, ts, session, tool, bucket, reason } = opts;
|
|
2448
|
+
const bucketCell = reason ? `${bucket} (${reason})` : bucket;
|
|
2449
|
+
return `CLASSIFY ${index} ${ts} | ${session} | ${tool} | ${bucketCell}`;
|
|
2450
|
+
}
|
|
2451
|
+
function formatApply(opts) {
|
|
2452
|
+
const { index, ts, tool, summary } = opts;
|
|
2453
|
+
return `APPLY ${index} ${ts} | ${tool} | ${summary}`;
|
|
2454
|
+
}
|
|
2455
|
+
function formatCheck(opts) {
|
|
2456
|
+
const { index, ts, tool, target, status } = opts;
|
|
2457
|
+
return `CHECK ${index} ${ts} | ${tool} | ${target} | ${status}`;
|
|
2458
|
+
}
|
|
2459
|
+
function formatHalt(opts) {
|
|
2460
|
+
return `HALT ${opts.index} ${opts.reason}
|
|
2461
|
+
${opts.detail}`;
|
|
2462
|
+
}
|
|
2463
|
+
function formatInfo(msg) {
|
|
2464
|
+
return `INFO ${msg}`;
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
// src/report.ts
|
|
2468
|
+
function newRunCounts(total) {
|
|
2469
|
+
return {
|
|
2470
|
+
total,
|
|
2471
|
+
skipped: 0,
|
|
2472
|
+
actions: { classified: 0, applied: 0 },
|
|
2473
|
+
checkpoints: { classified: 0, passed: 0 },
|
|
2474
|
+
reads: { applied: 0, snapshotApplied: 0, lenientlySkipped: 0 },
|
|
2475
|
+
bash: {
|
|
2476
|
+
classifiedExecute: 0,
|
|
2477
|
+
classifiedSkip: 0,
|
|
2478
|
+
executed: 0,
|
|
2479
|
+
cwdFiltered: 0
|
|
2480
|
+
},
|
|
2481
|
+
overrides: { skips: 0, executes: 0 },
|
|
2482
|
+
classifier: null,
|
|
2483
|
+
elapsedSeconds: 0,
|
|
2484
|
+
targetFiles: 0,
|
|
2485
|
+
targetBytes: 0,
|
|
2486
|
+
haltedAt: null,
|
|
2487
|
+
haltReason: null
|
|
2488
|
+
};
|
|
2489
|
+
}
|
|
2490
|
+
function formatSummary(c) {
|
|
2491
|
+
const replayed = c.actions.applied + c.checkpoints.passed + c.reads.applied + c.reads.snapshotApplied + c.bash.executed;
|
|
2492
|
+
const replayedOf = c.actions.classified + c.checkpoints.classified + c.bash.classifiedExecute;
|
|
2493
|
+
const rows = [
|
|
2494
|
+
{ label: "events total", value: c.total },
|
|
2495
|
+
{ label: "replayed", value: `${replayed} (of ${replayedOf})`, indent: " " },
|
|
2496
|
+
{ label: "skipped", value: c.skipped, indent: " " }
|
|
2497
|
+
];
|
|
2498
|
+
if (c.reads.snapshotApplied > 0) {
|
|
2499
|
+
rows.push({ label: "reads from snapshot", value: c.reads.snapshotApplied });
|
|
2500
|
+
}
|
|
2501
|
+
if (c.reads.lenientlySkipped > 0) {
|
|
2502
|
+
rows.push({
|
|
2503
|
+
label: "reads leniently skipped",
|
|
2504
|
+
value: c.reads.lenientlySkipped
|
|
2505
|
+
});
|
|
2506
|
+
}
|
|
2507
|
+
const bashAny = c.bash.classifiedExecute + c.bash.classifiedSkip + c.bash.cwdFiltered > 0;
|
|
2508
|
+
if (bashAny) {
|
|
2509
|
+
rows.push({
|
|
2510
|
+
label: "bash executed",
|
|
2511
|
+
value: `${c.bash.executed} of ${c.bash.classifiedExecute}`
|
|
2512
|
+
});
|
|
2513
|
+
if (c.bash.cwdFiltered > 0) {
|
|
2514
|
+
rows.push({ label: "bash cwd-filtered", value: c.bash.cwdFiltered });
|
|
2515
|
+
}
|
|
2516
|
+
}
|
|
2517
|
+
if (c.classifier !== null) {
|
|
2518
|
+
const { totalBatches, cacheHits } = c.classifier;
|
|
2519
|
+
const live = totalBatches - cacheHits;
|
|
2520
|
+
rows.push({
|
|
2521
|
+
label: "classifier batches",
|
|
2522
|
+
value: `${totalBatches} (${cacheHits} cached, ${live} live)`
|
|
2523
|
+
});
|
|
2524
|
+
}
|
|
2525
|
+
if (c.overrides.skips + c.overrides.executes > 0) {
|
|
2526
|
+
rows.push({
|
|
2527
|
+
label: "overrides",
|
|
2528
|
+
value: `skip=${c.overrides.skips} execute=${c.overrides.executes}`
|
|
2529
|
+
});
|
|
2530
|
+
}
|
|
2531
|
+
rows.push({
|
|
2532
|
+
label: "halted",
|
|
2533
|
+
value: c.haltedAt !== null ? `at index ${c.haltedAt} (${c.haltReason})` : "no"
|
|
2534
|
+
});
|
|
2535
|
+
rows.push({ label: "elapsed", value: `${c.elapsedSeconds.toFixed(2)}s` });
|
|
2536
|
+
rows.push({
|
|
2537
|
+
label: "target files",
|
|
2538
|
+
value: `${c.targetFiles} (${c.targetBytes} bytes total)`
|
|
2539
|
+
});
|
|
2540
|
+
const colWidth = Math.max(...rows.map((r) => (r.indent ?? "").length + r.label.length + 1)) + 1;
|
|
2541
|
+
const lines = rows.map((r) => {
|
|
2542
|
+
const indent = r.indent ?? "";
|
|
2543
|
+
const key = `${r.label}:`;
|
|
2544
|
+
const pad = colWidth - indent.length - key.length;
|
|
2545
|
+
return `${indent}${key}${" ".repeat(pad)}${r.value}`;
|
|
2546
|
+
});
|
|
2547
|
+
return ["=== claude-code-replay summary ===", ...lines].join("\n");
|
|
2548
|
+
}
|
|
2549
|
+
|
|
2550
|
+
// src/cli.ts
|
|
2551
|
+
var HALT_EXIT = 10;
|
|
2552
|
+
function fail(msg) {
|
|
2553
|
+
process.stderr.write(msg + "\n");
|
|
2554
|
+
return 2;
|
|
2555
|
+
}
|
|
2556
|
+
function expandHome(p) {
|
|
2557
|
+
if (p === "~") return os3.homedir();
|
|
2558
|
+
if (p.startsWith("~/")) return path10.join(os3.homedir(), p.slice(2));
|
|
2559
|
+
return p;
|
|
2560
|
+
}
|
|
2561
|
+
function inferLogsDirFromSourceRoot(absSourceRoot) {
|
|
2562
|
+
const encoded = absSourceRoot.replace(/\//g, "-");
|
|
2563
|
+
return path10.join(os3.homedir(), ".claude", "projects", encoded);
|
|
2564
|
+
}
|
|
2565
|
+
function countTarget(target) {
|
|
2566
|
+
const paths = walkFiles(target, { filter: (e) => e.isFile() });
|
|
2567
|
+
let totalBytes = 0;
|
|
2568
|
+
for (const p of paths) {
|
|
2569
|
+
try {
|
|
2570
|
+
totalBytes += fs7.statSync(p).size;
|
|
2571
|
+
} catch {
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
return [paths.length, totalBytes];
|
|
2575
|
+
}
|
|
2576
|
+
function pathIsInsideAnyResolved(p, roots) {
|
|
2577
|
+
let absP;
|
|
2578
|
+
try {
|
|
2579
|
+
absP = fs7.realpathSync(p);
|
|
2580
|
+
} catch {
|
|
2581
|
+
absP = path10.resolve(p);
|
|
2582
|
+
}
|
|
2583
|
+
for (const root of roots) {
|
|
2584
|
+
let absRoot;
|
|
2585
|
+
try {
|
|
2586
|
+
absRoot = fs7.realpathSync(root);
|
|
2587
|
+
} catch {
|
|
2588
|
+
absRoot = path10.resolve(root);
|
|
2589
|
+
}
|
|
2590
|
+
if (absP === absRoot) return true;
|
|
2591
|
+
const rel = path10.relative(absRoot, absP);
|
|
2592
|
+
if (rel !== "" && !rel.startsWith("..") && !path10.isAbsolute(rel)) return true;
|
|
2593
|
+
}
|
|
2594
|
+
return false;
|
|
2595
|
+
}
|
|
2596
|
+
function runEventLoop(events, counts, ctx) {
|
|
2597
|
+
const {
|
|
2598
|
+
target,
|
|
2599
|
+
sourceRoots,
|
|
2600
|
+
classifierDecisions,
|
|
2601
|
+
overrides,
|
|
2602
|
+
snapshotIndex,
|
|
2603
|
+
applyReads,
|
|
2604
|
+
dryRun,
|
|
2605
|
+
lenientReads,
|
|
2606
|
+
debug,
|
|
2607
|
+
emit
|
|
2608
|
+
} = ctx;
|
|
2609
|
+
for (const ev of events) {
|
|
2610
|
+
let bucket = classify(ev, sourceRoots);
|
|
2611
|
+
let classifierSkipped = false;
|
|
2612
|
+
let classifyReason;
|
|
2613
|
+
let overrideCommand = null;
|
|
2614
|
+
const override = overrides.get(ev.index);
|
|
2615
|
+
if (override !== void 0) {
|
|
2616
|
+
if (override.kind === "skip") {
|
|
2617
|
+
bucket = "skip";
|
|
2618
|
+
classifyReason = "override-skip";
|
|
2619
|
+
} else {
|
|
2620
|
+
bucket = "execute";
|
|
2621
|
+
classifyReason = "override-execute";
|
|
2622
|
+
overrideCommand = override.command;
|
|
2623
|
+
}
|
|
2624
|
+
} else if (bucket === "skip" && ev.toolName === "Bash" && classifierDecisions.has(ev.index)) {
|
|
2625
|
+
const decision = classifierDecisions.get(ev.index);
|
|
2626
|
+
if (decision.intent === "execute") {
|
|
2627
|
+
bucket = "execute";
|
|
2628
|
+
} else {
|
|
2629
|
+
classifierSkipped = true;
|
|
2630
|
+
}
|
|
2631
|
+
}
|
|
2632
|
+
if (debug) {
|
|
2633
|
+
emit(
|
|
2634
|
+
formatClassify({
|
|
2635
|
+
index: ev.index,
|
|
2636
|
+
ts: ev.timestamp,
|
|
2637
|
+
session: ev.sessionFile,
|
|
2638
|
+
tool: ev.toolName,
|
|
2639
|
+
bucket,
|
|
2640
|
+
reason: classifyReason
|
|
2641
|
+
})
|
|
2642
|
+
);
|
|
2643
|
+
}
|
|
2644
|
+
if (bucket === "skip") {
|
|
2645
|
+
counts.skipped++;
|
|
2646
|
+
if (override !== void 0 && override.kind === "skip") {
|
|
2647
|
+
counts.overrides.skips++;
|
|
2648
|
+
} else if (classifierSkipped) {
|
|
2649
|
+
counts.bash.classifiedSkip++;
|
|
2650
|
+
}
|
|
2651
|
+
continue;
|
|
2652
|
+
}
|
|
2653
|
+
if (bucket === "action") {
|
|
2654
|
+
counts.actions.classified++;
|
|
2655
|
+
if (dryRun) continue;
|
|
2656
|
+
let summary;
|
|
2657
|
+
try {
|
|
2658
|
+
summary = applyAction(ev, target, sourceRoots);
|
|
2659
|
+
} catch (e) {
|
|
2660
|
+
if (e instanceof ActionFailure) {
|
|
2661
|
+
emit(formatHalt({ index: ev.index, reason: "ActionFailure", detail: e.message }));
|
|
2662
|
+
counts.haltedAt = ev.index;
|
|
2663
|
+
counts.haltReason = "ActionFailure";
|
|
2664
|
+
return;
|
|
2665
|
+
}
|
|
2666
|
+
throw e;
|
|
2667
|
+
}
|
|
2668
|
+
if (debug) {
|
|
2669
|
+
emit(formatApply({ index: ev.index, ts: ev.timestamp, tool: ev.toolName, summary }));
|
|
2670
|
+
}
|
|
2671
|
+
counts.actions.applied++;
|
|
2672
|
+
} else if (bucket === "execute") {
|
|
2673
|
+
const isOverride = override !== void 0 && override.kind === "execute";
|
|
2674
|
+
if (isOverride) {
|
|
2675
|
+
counts.overrides.executes++;
|
|
2676
|
+
} else {
|
|
2677
|
+
counts.bash.classifiedExecute++;
|
|
2678
|
+
}
|
|
2679
|
+
if (dryRun) continue;
|
|
2680
|
+
const cwdOk = ev.cwd ? pathIsInsideAnyResolved(ev.cwd, sourceRoots) : false;
|
|
2681
|
+
if (!cwdOk) {
|
|
2682
|
+
counts.bash.cwdFiltered++;
|
|
2683
|
+
const decisionReason = isOverride ? "user-supplied --override-execute" : classifierDecisions.get(ev.index)?.reason ?? "";
|
|
2684
|
+
const label = isOverride ? "override-approved" : "classifier-approved";
|
|
2685
|
+
const cwdDesc = ev.cwd ? `has cwd '${ev.cwd}' outside source roots` : `has no cwd (can't verify it's inside any source root)`;
|
|
2686
|
+
emit(
|
|
2687
|
+
formatInfo(
|
|
2688
|
+
`warn: ${label} execute at index ${ev.index} ${cwdDesc}; skipping. reason: ${decisionReason}`
|
|
2689
|
+
)
|
|
2690
|
+
);
|
|
2691
|
+
continue;
|
|
2692
|
+
}
|
|
2693
|
+
let decisionCommand;
|
|
2694
|
+
if (isOverride) {
|
|
2695
|
+
decisionCommand = overrideCommand ?? void 0;
|
|
2696
|
+
} else {
|
|
2697
|
+
const d = classifierDecisions.get(ev.index);
|
|
2698
|
+
decisionCommand = d?.intent === "execute" ? d.command : void 0;
|
|
2699
|
+
}
|
|
2700
|
+
let summary;
|
|
2701
|
+
try {
|
|
2702
|
+
summary = executeBash(ev, target, sourceRoots, decisionCommand, emit);
|
|
2703
|
+
} catch (e) {
|
|
2704
|
+
if (e instanceof ExecuteFailure) {
|
|
2705
|
+
emit(formatHalt({ index: ev.index, reason: "ExecuteFailure", detail: e.message }));
|
|
2706
|
+
counts.haltedAt = ev.index;
|
|
2707
|
+
counts.haltReason = "ExecuteFailure";
|
|
2708
|
+
return;
|
|
2709
|
+
}
|
|
2710
|
+
throw e;
|
|
2711
|
+
}
|
|
2712
|
+
if (debug) {
|
|
2713
|
+
emit(formatApply({ index: ev.index, ts: ev.timestamp, tool: ev.toolName, summary }));
|
|
2714
|
+
}
|
|
2715
|
+
counts.bash.executed++;
|
|
2716
|
+
} else if (bucket === "checkpoint") {
|
|
2717
|
+
counts.checkpoints.classified++;
|
|
2718
|
+
if (dryRun) continue;
|
|
2719
|
+
try {
|
|
2720
|
+
const summary = runCheckpoint(ev, target, sourceRoots, {
|
|
2721
|
+
applyReads,
|
|
2722
|
+
snapshotIndex
|
|
2723
|
+
});
|
|
2724
|
+
if (debug) {
|
|
2725
|
+
emit(
|
|
2726
|
+
formatCheck({
|
|
2727
|
+
index: ev.index,
|
|
2728
|
+
ts: ev.timestamp,
|
|
2729
|
+
tool: ev.toolName,
|
|
2730
|
+
target: (() => {
|
|
2731
|
+
const ti = asToolInput(ev);
|
|
2732
|
+
return ti?.tool === "Read" ? ti.file_path : "";
|
|
2733
|
+
})(),
|
|
2734
|
+
status: summary
|
|
2735
|
+
})
|
|
2736
|
+
);
|
|
2737
|
+
}
|
|
2738
|
+
if (summary.includes("SNAPSHOT")) counts.reads.snapshotApplied++;
|
|
2739
|
+
else if (summary.includes("APPLIED")) counts.reads.applied++;
|
|
2740
|
+
else counts.checkpoints.passed++;
|
|
2741
|
+
} catch (e) {
|
|
2742
|
+
if (e instanceof CheckpointFailure) {
|
|
2743
|
+
if (lenientReads && ev.toolName === "Read") {
|
|
2744
|
+
emit(
|
|
2745
|
+
formatInfo(
|
|
2746
|
+
`lenient-reads: skipping failed Read checkpoint at index ${ev.index}: ${e.message}`
|
|
2747
|
+
)
|
|
2748
|
+
);
|
|
2749
|
+
counts.reads.lenientlySkipped++;
|
|
2750
|
+
continue;
|
|
2751
|
+
}
|
|
2752
|
+
emit(formatHalt({ index: ev.index, reason: "CheckpointFailure", detail: e.message }));
|
|
2753
|
+
counts.haltedAt = ev.index;
|
|
2754
|
+
counts.haltReason = "CheckpointFailure";
|
|
2755
|
+
return;
|
|
2756
|
+
}
|
|
2757
|
+
throw e;
|
|
2758
|
+
}
|
|
2759
|
+
} else {
|
|
2760
|
+
const _exhaustive = bucket;
|
|
2761
|
+
throw new Error(`unreachable bucket: ${_exhaustive}`);
|
|
2762
|
+
}
|
|
2763
|
+
}
|
|
2764
|
+
}
|
|
2765
|
+
function collectStrings(value, prev) {
|
|
2766
|
+
return [...prev, value];
|
|
2767
|
+
}
|
|
2768
|
+
function parseNonNegInt(raw) {
|
|
2769
|
+
const t = raw.trim();
|
|
2770
|
+
if (!/^\d+$/.test(t)) {
|
|
2771
|
+
throw new InvalidArgumentError("must be a non-negative integer");
|
|
2772
|
+
}
|
|
2773
|
+
const n = Number.parseInt(t, 10);
|
|
2774
|
+
if (!Number.isFinite(n) || n < 0) {
|
|
2775
|
+
throw new InvalidArgumentError("must be a non-negative integer");
|
|
2776
|
+
}
|
|
2777
|
+
return n;
|
|
2778
|
+
}
|
|
2779
|
+
function parseIsoTimestamp(raw) {
|
|
2780
|
+
if (Number.isNaN(Date.parse(raw))) {
|
|
2781
|
+
throw new InvalidArgumentError(`could not parse timestamp '${raw}'`);
|
|
2782
|
+
}
|
|
2783
|
+
return raw;
|
|
2784
|
+
}
|
|
2785
|
+
function buildProgram() {
|
|
2786
|
+
const program = new Command();
|
|
2787
|
+
program.name("replay").description(
|
|
2788
|
+
"Replay Claude Code session logs (*.jsonl) to reconstruct the filesystem state a session left behind."
|
|
2789
|
+
).showHelpAfterError("(run with --help for usage)").exitOverride();
|
|
2790
|
+
program.optionsGroup("Required:").requiredOption(
|
|
2791
|
+
"--target <dir>",
|
|
2792
|
+
"directory the replay writes into; must be distinct from any logs dir"
|
|
2793
|
+
).requiredOption(
|
|
2794
|
+
"--source-root <abs-path>",
|
|
2795
|
+
"original absolute cwd from the session; compared verbatim against event.cwd. Repeatable.",
|
|
2796
|
+
collectStrings,
|
|
2797
|
+
[]
|
|
2798
|
+
);
|
|
2799
|
+
program.optionsGroup("Replay window:").option(
|
|
2800
|
+
"--logs-dir <dir>",
|
|
2801
|
+
"directory containing session *.jsonl files. Repeatable. By default, one logs dir is inferred from each --source-root (~/.claude/projects/<encoded-source-root>); explicit --logs-dir values are added to the inferred set.",
|
|
2802
|
+
collectStrings,
|
|
2803
|
+
[]
|
|
2804
|
+
).option("--cutoff <iso-ts>", "drop events at or after this ISO 8601 timestamp").option(
|
|
2805
|
+
"--start <iso-ts>",
|
|
2806
|
+
"start replay at the first event whose timestamp is at or after this",
|
|
2807
|
+
parseIsoTimestamp
|
|
2808
|
+
).option(
|
|
2809
|
+
"--from-index <N>",
|
|
2810
|
+
"start replay at event index N (use to resume after a halt)",
|
|
2811
|
+
parseNonNegInt,
|
|
2812
|
+
0
|
|
2813
|
+
);
|
|
2814
|
+
program.optionsGroup("Verification:").option("--strict", "disable both snapshot and apply-reads heal layers").option("--strict-reads", "halt on the first failed Read checkpoint (default: log + continue)");
|
|
2815
|
+
program.optionsGroup("Bash classifier (opt-in):").option("--enable-llm-classifier", "opt in to LLM calls (required for classifier)").option(
|
|
2816
|
+
"--custom-intent <text>",
|
|
2817
|
+
"append a natural-language intent to the classifier prompt. Repeatable.",
|
|
2818
|
+
collectStrings,
|
|
2819
|
+
[]
|
|
2820
|
+
).option(
|
|
2821
|
+
"--override-classifier-cache",
|
|
2822
|
+
"force a fresh classifier call and overwrite the cache"
|
|
2823
|
+
).option(
|
|
2824
|
+
"--skip-uncached-tail",
|
|
2825
|
+
"if cached events form a prefix of current logs, drop the appended tail and reuse the cache (no classifier call for the new events)"
|
|
2826
|
+
);
|
|
2827
|
+
program.optionsGroup("Per-event overrides:").option(
|
|
2828
|
+
"--override-skip <INDEX>",
|
|
2829
|
+
"force event INDEX to skip. Repeatable.",
|
|
2830
|
+
collectStrings,
|
|
2831
|
+
[]
|
|
2832
|
+
).option(
|
|
2833
|
+
"--override-execute <INDEX[=CMD]>",
|
|
2834
|
+
"force Bash event INDEX to execute. CMD must be a substring of the event's original command. Repeatable.",
|
|
2835
|
+
collectStrings,
|
|
2836
|
+
[]
|
|
2837
|
+
);
|
|
2838
|
+
program.optionsGroup("Diagnostics:").option("--dry-run", "classify and log only; no filesystem mutation").option(
|
|
2839
|
+
"--debug",
|
|
2840
|
+
"per-event CLASSIFY/APPLY/CHECK trace plus classifier diagnostics"
|
|
2841
|
+
);
|
|
2842
|
+
program.addHelpText(
|
|
2843
|
+
"after",
|
|
2844
|
+
"\nExit codes: 0 success, 2 argv error, 10 halted on command failure.\n"
|
|
2845
|
+
);
|
|
2846
|
+
return program;
|
|
2847
|
+
}
|
|
2848
|
+
function parseCliArgs(argv) {
|
|
2849
|
+
const program = buildProgram();
|
|
2850
|
+
let opts;
|
|
2851
|
+
try {
|
|
2852
|
+
program.parse(argv, { from: "user" });
|
|
2853
|
+
opts = program.opts();
|
|
2854
|
+
} catch (e) {
|
|
2855
|
+
if (e instanceof CommanderError) {
|
|
2856
|
+
return e.exitCode === 0 ? 0 : 2;
|
|
2857
|
+
}
|
|
2858
|
+
throw e;
|
|
2859
|
+
}
|
|
2860
|
+
const customIntents = opts.customIntent;
|
|
2861
|
+
if (customIntents.length > 0 && !opts.enableLlmClassifier) {
|
|
2862
|
+
return fail(
|
|
2863
|
+
"error: --custom-intent requires --enable-llm-classifier (opt-in to LLM calls)"
|
|
2864
|
+
);
|
|
2865
|
+
}
|
|
2866
|
+
let classifierMode = null;
|
|
2867
|
+
if (opts.enableLlmClassifier) {
|
|
2868
|
+
classifierMode = composeMode({ customIntents });
|
|
2869
|
+
}
|
|
2870
|
+
const target = path10.resolve(expandHome(opts.target));
|
|
2871
|
+
const sourceRoots = [];
|
|
2872
|
+
for (const raw of opts.sourceRoot) {
|
|
2873
|
+
const expanded = expandHome(raw);
|
|
2874
|
+
if (!path10.isAbsolute(expanded)) {
|
|
2875
|
+
return fail(
|
|
2876
|
+
`error: --source-root must be absolute (got '${raw}'); source roots are compared verbatim against event.cwd in the logs.`
|
|
2877
|
+
);
|
|
2878
|
+
}
|
|
2879
|
+
if (!sourceRoots.includes(expanded)) sourceRoots.push(expanded);
|
|
2880
|
+
}
|
|
2881
|
+
const logsDirs = [];
|
|
2882
|
+
const seenLogsDirs = /* @__PURE__ */ new Set();
|
|
2883
|
+
const pushLogsDir = (d) => {
|
|
2884
|
+
if (seenLogsDirs.has(d)) return;
|
|
2885
|
+
seenLogsDirs.add(d);
|
|
2886
|
+
logsDirs.push(d);
|
|
2887
|
+
};
|
|
2888
|
+
for (const root of sourceRoots) {
|
|
2889
|
+
const inferred = inferLogsDirFromSourceRoot(root);
|
|
2890
|
+
if (fs7.existsSync(inferred) && fs7.statSync(inferred).isDirectory()) {
|
|
2891
|
+
pushLogsDir(inferred);
|
|
2892
|
+
}
|
|
2893
|
+
}
|
|
2894
|
+
for (const raw of opts.logsDir) {
|
|
2895
|
+
const d = path10.resolve(expandHome(raw));
|
|
2896
|
+
if (!fs7.existsSync(d) || !fs7.statSync(d).isDirectory()) {
|
|
2897
|
+
return fail(`error: logs dir not found: ${d}`);
|
|
2898
|
+
}
|
|
2899
|
+
pushLogsDir(d);
|
|
2900
|
+
}
|
|
2901
|
+
if (logsDirs.length === 0) {
|
|
2902
|
+
return fail(
|
|
2903
|
+
"error: no logs dirs to read from; either pass --logs-dir explicitly or use a --source-root whose inferred logs dir (~/.claude/projects/<encoded-source-root>) exists."
|
|
2904
|
+
);
|
|
2905
|
+
}
|
|
2906
|
+
for (const d of logsDirs) {
|
|
2907
|
+
if (pathIsInsideOrEqual(target, d) || pathIsInsideOrEqual(d, target)) {
|
|
2908
|
+
return fail(
|
|
2909
|
+
`error: --target (${target}) must be distinct from logs dir (${d}); replayed commands could otherwise corrupt the logs.`
|
|
2910
|
+
);
|
|
2911
|
+
}
|
|
2912
|
+
}
|
|
2913
|
+
return {
|
|
2914
|
+
logsDirs,
|
|
2915
|
+
target,
|
|
2916
|
+
sourceRoots,
|
|
2917
|
+
classifierMode,
|
|
2918
|
+
overrideClassifierCache: opts.overrideClassifierCache ?? false,
|
|
2919
|
+
skipUncachedTail: opts.skipUncachedTail ?? false,
|
|
2920
|
+
strict: opts.strict ?? false,
|
|
2921
|
+
strictReads: opts.strictReads ?? false,
|
|
2922
|
+
debug: opts.debug ?? false,
|
|
2923
|
+
dryRun: opts.dryRun ?? false,
|
|
2924
|
+
cutoff: opts.cutoff ?? null,
|
|
2925
|
+
fromIndex: opts.fromIndex,
|
|
2926
|
+
startTs: opts.start ?? null,
|
|
2927
|
+
overrideSkipRaw: opts.overrideSkip,
|
|
2928
|
+
overrideExecuteRaw: opts.overrideExecute
|
|
2929
|
+
};
|
|
2930
|
+
}
|
|
2931
|
+
function parseOverrides(args, allEvents) {
|
|
2932
|
+
const overrides = /* @__PURE__ */ new Map();
|
|
2933
|
+
const parseIndex = (raw, flag) => {
|
|
2934
|
+
const trimmed = raw.trim();
|
|
2935
|
+
if (!/^-?\d+$/.test(trimmed)) {
|
|
2936
|
+
fail(`error: ${flag} expects an integer event index (got '${raw}')`);
|
|
2937
|
+
return null;
|
|
2938
|
+
}
|
|
2939
|
+
const n = Number.parseInt(trimmed, 10);
|
|
2940
|
+
if (!Number.isFinite(n) || n < 0 || n >= allEvents.length) {
|
|
2941
|
+
fail(`error: ${flag} index ${n} out of range [0, ${allEvents.length})`);
|
|
2942
|
+
return null;
|
|
2943
|
+
}
|
|
2944
|
+
return n;
|
|
2945
|
+
};
|
|
2946
|
+
for (const raw of args.overrideSkipRaw) {
|
|
2947
|
+
const idx = parseIndex(raw, "--override-skip");
|
|
2948
|
+
if (idx === null) return 2;
|
|
2949
|
+
if (overrides.has(idx)) {
|
|
2950
|
+
return fail(
|
|
2951
|
+
`error: duplicate override for index ${idx} (--override-skip / --override-execute may each appear once per index)`
|
|
2952
|
+
);
|
|
2953
|
+
}
|
|
2954
|
+
overrides.set(idx, { kind: "skip", command: null });
|
|
2955
|
+
}
|
|
2956
|
+
for (const raw of args.overrideExecuteRaw) {
|
|
2957
|
+
const eq = raw.indexOf("=");
|
|
2958
|
+
const idxStr = eq === -1 ? raw : raw.slice(0, eq);
|
|
2959
|
+
const cmd = eq === -1 ? null : raw.slice(eq + 1);
|
|
2960
|
+
const idx = parseIndex(idxStr, "--override-execute");
|
|
2961
|
+
if (idx === null) return 2;
|
|
2962
|
+
if (overrides.has(idx)) {
|
|
2963
|
+
return fail(
|
|
2964
|
+
`error: duplicate override for index ${idx} (--override-skip / --override-execute may each appear once per index)`
|
|
2965
|
+
);
|
|
2966
|
+
}
|
|
2967
|
+
const ev = allEvents[idx];
|
|
2968
|
+
if (ev.toolName !== "Bash") {
|
|
2969
|
+
return fail(
|
|
2970
|
+
`error: --override-execute index ${idx} is a ${ev.toolName} event, not a Bash event; execute overrides only apply to Bash`
|
|
2971
|
+
);
|
|
2972
|
+
}
|
|
2973
|
+
if (cmd !== null) {
|
|
2974
|
+
const ti = asToolInput(ev);
|
|
2975
|
+
const original = ti?.tool === "Bash" ? ti.command : "";
|
|
2976
|
+
if (!original.includes(cmd)) {
|
|
2977
|
+
return fail(
|
|
2978
|
+
`error: --override-execute CMD for index ${idx} is not a substring of the event's original command.
|
|
2979
|
+
original: ${JSON.stringify(original)}
|
|
2980
|
+
cmd: ${JSON.stringify(cmd)}`
|
|
2981
|
+
);
|
|
2982
|
+
}
|
|
2983
|
+
}
|
|
2984
|
+
overrides.set(idx, { kind: "execute", command: cmd });
|
|
2985
|
+
}
|
|
2986
|
+
return overrides;
|
|
2987
|
+
}
|
|
2988
|
+
function applyResumeWindow(allEvents, fromIndex, startTs, emit) {
|
|
2989
|
+
let startArrayPos = 0;
|
|
2990
|
+
if (startTs !== null) {
|
|
2991
|
+
while (startArrayPos < allEvents.length && (allEvents[startArrayPos].timestamp || "") < startTs) {
|
|
2992
|
+
startArrayPos++;
|
|
2993
|
+
}
|
|
2994
|
+
}
|
|
2995
|
+
let fromIndexArrayPos = 0;
|
|
2996
|
+
if (fromIndex > 0) {
|
|
2997
|
+
while (fromIndexArrayPos < allEvents.length && allEvents[fromIndexArrayPos].index < fromIndex) {
|
|
2998
|
+
fromIndexArrayPos++;
|
|
2999
|
+
}
|
|
3000
|
+
}
|
|
3001
|
+
const effectiveStartPos = Math.max(startArrayPos, fromIndexArrayPos);
|
|
3002
|
+
const events = allEvents.slice(effectiveStartPos);
|
|
3003
|
+
if (effectiveStartPos > 0) {
|
|
3004
|
+
const firstIdx = events.length > 0 ? events[0].index : allEvents[allEvents.length - 1]?.index ?? -1;
|
|
3005
|
+
emit(
|
|
3006
|
+
formatInfo(
|
|
3007
|
+
`resume window: skipping ${effectiveStartPos} pre-start event(s) (fromIndex=${fromIndex}, start=${startTs ?? "-"}); replaying ${events.length} event(s)` + (events.length > 0 ? ` starting at event.index=${firstIdx}` : "")
|
|
3008
|
+
)
|
|
3009
|
+
);
|
|
3010
|
+
}
|
|
3011
|
+
return events;
|
|
3012
|
+
}
|
|
3013
|
+
async function main(argv) {
|
|
3014
|
+
const args = parseCliArgs(argv);
|
|
3015
|
+
if (args === 0 || args === 2) return args;
|
|
3016
|
+
const {
|
|
3017
|
+
logsDirs,
|
|
3018
|
+
target,
|
|
3019
|
+
sourceRoots,
|
|
3020
|
+
classifierMode,
|
|
3021
|
+
overrideClassifierCache,
|
|
3022
|
+
skipUncachedTail,
|
|
3023
|
+
strict,
|
|
3024
|
+
strictReads,
|
|
3025
|
+
debug,
|
|
3026
|
+
dryRun,
|
|
3027
|
+
cutoff,
|
|
3028
|
+
fromIndex,
|
|
3029
|
+
startTs
|
|
3030
|
+
} = args;
|
|
3031
|
+
const emit = (line) => {
|
|
3032
|
+
process.stdout.write(line + "\n");
|
|
3033
|
+
};
|
|
3034
|
+
fs7.mkdirSync(target, { recursive: true });
|
|
3035
|
+
emit(formatInfo(`collecting events from ${logsDirs.join(", ")}`));
|
|
3036
|
+
if (cutoff) {
|
|
3037
|
+
emit(formatInfo(`cutoff active: dropping events at/after '${cutoff}'`));
|
|
3038
|
+
}
|
|
3039
|
+
let allEvents = collect(logsDirs, cutoff);
|
|
3040
|
+
emit(formatInfo(`collected ${allEvents.length} events`));
|
|
3041
|
+
if (skipUncachedTail && classifierMode !== null) {
|
|
3042
|
+
const cacheDir = cacheDirForTarget(target);
|
|
3043
|
+
const cachedLastTs = readCachedLastEventTs(cacheDir);
|
|
3044
|
+
if (cachedLastTs === null) {
|
|
3045
|
+
emit(
|
|
3046
|
+
formatInfo(
|
|
3047
|
+
"--skip-uncached-tail: no prior cache found (or legacy meta without last_event_ts); proceeding without truncation"
|
|
3048
|
+
)
|
|
3049
|
+
);
|
|
3050
|
+
} else {
|
|
3051
|
+
const before = allEvents.length;
|
|
3052
|
+
allEvents = allEvents.filter((e) => (e.timestamp ?? "") <= cachedLastTs);
|
|
3053
|
+
const dropped = before - allEvents.length;
|
|
3054
|
+
emit(
|
|
3055
|
+
formatInfo(
|
|
3056
|
+
`--skip-uncached-tail: capping at cached last_event_ts='${cachedLastTs}' (dropped ${dropped} appended event(s) past the cache)`
|
|
3057
|
+
)
|
|
3058
|
+
);
|
|
3059
|
+
}
|
|
3060
|
+
}
|
|
3061
|
+
const events = applyResumeWindow(allEvents, fromIndex, startTs, emit);
|
|
3062
|
+
const overrides = parseOverrides(args, allEvents);
|
|
3063
|
+
if (overrides === 2) return 2;
|
|
3064
|
+
if (overrides.size > 0) {
|
|
3065
|
+
emit(
|
|
3066
|
+
formatInfo(
|
|
3067
|
+
`applying ${overrides.size} per-event override(s) (skip=${args.overrideSkipRaw.length}, execute=${args.overrideExecuteRaw.length})`
|
|
3068
|
+
)
|
|
3069
|
+
);
|
|
3070
|
+
}
|
|
3071
|
+
let snapshotIndex = null;
|
|
3072
|
+
if (!strict) {
|
|
3073
|
+
emit(formatInfo("building snapshot index from file-history-snapshot entries"));
|
|
3074
|
+
snapshotIndex = SnapshotIndex.fromLogsDir(logsDirs, void 0, emit);
|
|
3075
|
+
snapshotIndex.setEmit(emit);
|
|
3076
|
+
emit(formatInfo(`snapshot index covers ${snapshotIndex.index.size} paths`));
|
|
3077
|
+
}
|
|
3078
|
+
const applyReads = !strict;
|
|
3079
|
+
let classifierDecisions = /* @__PURE__ */ new Map();
|
|
3080
|
+
const classifierStats = { totalBatches: 0, cacheHits: 0 };
|
|
3081
|
+
if (classifierMode !== null) {
|
|
3082
|
+
const cacheDir = cacheDirForTarget(target);
|
|
3083
|
+
try {
|
|
3084
|
+
classifierDecisions = await getDecisions(
|
|
3085
|
+
cacheDir,
|
|
3086
|
+
classifierMode,
|
|
3087
|
+
allEvents,
|
|
3088
|
+
sourceRoots,
|
|
3089
|
+
{
|
|
3090
|
+
useCache: true,
|
|
3091
|
+
overrideCache: overrideClassifierCache,
|
|
3092
|
+
debug,
|
|
3093
|
+
emit,
|
|
3094
|
+
stats: classifierStats
|
|
3095
|
+
}
|
|
3096
|
+
);
|
|
3097
|
+
} catch (e) {
|
|
3098
|
+
if (e instanceof ClassifierError) {
|
|
3099
|
+
return fail(`error: classifier failed: ${e.message}`);
|
|
3100
|
+
}
|
|
3101
|
+
throw e;
|
|
3102
|
+
}
|
|
3103
|
+
emit(formatInfo(`classifier returned ${classifierDecisions.size} decisions`));
|
|
3104
|
+
}
|
|
3105
|
+
const counts = newRunCounts(events.length);
|
|
3106
|
+
if (classifierMode !== null) {
|
|
3107
|
+
counts.classifier = classifierStats;
|
|
3108
|
+
}
|
|
3109
|
+
const started = process.hrtime.bigint();
|
|
3110
|
+
runEventLoop(events, counts, {
|
|
3111
|
+
target,
|
|
3112
|
+
sourceRoots,
|
|
3113
|
+
classifierDecisions,
|
|
3114
|
+
overrides,
|
|
3115
|
+
snapshotIndex,
|
|
3116
|
+
applyReads,
|
|
3117
|
+
dryRun,
|
|
3118
|
+
// Lenient-reads is the default: a failed Read checkpoint just records
|
|
3119
|
+
// a skip and the replay continues. The classifier sometimes correctly
|
|
3120
|
+
// omits a producing Bash chain, leaving downstream Reads with nothing
|
|
3121
|
+
// to verify — halting at those would mask the cascade fix and stop
|
|
3122
|
+
// every long replay short. Opt out with --strict-reads (or the broader
|
|
3123
|
+
// --strict, which also disables the snapshot heal path).
|
|
3124
|
+
lenientReads: !(strictReads || strict),
|
|
3125
|
+
debug,
|
|
3126
|
+
emit
|
|
3127
|
+
});
|
|
3128
|
+
const elapsedNs = process.hrtime.bigint() - started;
|
|
3129
|
+
counts.elapsedSeconds = Number(elapsedNs) / 1e9;
|
|
3130
|
+
const [files, bytes] = countTarget(target);
|
|
3131
|
+
counts.targetFiles = files;
|
|
3132
|
+
counts.targetBytes = bytes;
|
|
3133
|
+
emit(formatSummary(counts));
|
|
3134
|
+
return counts.haltedAt !== null ? HALT_EXIT : 0;
|
|
3135
|
+
}
|
|
3136
|
+
|
|
3137
|
+
// src/main.ts
|
|
3138
|
+
var isDirectRun = !!process.argv[1] && import.meta.url === pathToFileURL(realpathSync3(process.argv[1])).href;
|
|
3139
|
+
if (isDirectRun) {
|
|
3140
|
+
main(process.argv.slice(2)).then(
|
|
3141
|
+
(code) => process.exit(code),
|
|
3142
|
+
(err) => {
|
|
3143
|
+
console.error(err);
|
|
3144
|
+
process.exit(1);
|
|
3145
|
+
}
|
|
3146
|
+
);
|
|
3147
|
+
}
|
|
3148
|
+
export {
|
|
3149
|
+
main
|
|
3150
|
+
};
|