replay-labs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +134 -0
- package/examples/password-reset-transcript.md +27 -0
- package/examples/password-reset.diff +101 -0
- package/package.json +47 -0
- package/scripts/capture-git-working-diff.js +56 -0
- package/scripts/create-added-files-diff.js +33 -0
- package/scripts/extract-claude-transcript.js +86 -0
- package/scripts/extract-codex-transcript.js +119 -0
- package/src/cli.js +316 -0
- package/src/discovery.js +715 -0
- package/src/generate.js +406 -0
- package/src/ingest.js +124 -0
- package/src/interaction.js +1161 -0
- package/src/lab-ui.js +1339 -0
- package/src/modules.js +643 -0
- package/src/overview.js +147 -0
- package/src/patterns.js +322 -0
- package/src/pipeline.js +68 -0
- package/src/report.js +516 -0
- package/src/review.js +238 -0
- package/src/server.js +199 -0
- package/src/storage.js +34 -0
package/src/generate.js
ADDED
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
3
|
+
import { resolve, dirname } from "node:path";
|
|
4
|
+
|
|
5
|
+
// Generate a full lab module for a decision we have NO hand-authored module for,
|
|
6
|
+
// from the decision metadata + the session evidence. This is what lets Replay
|
|
7
|
+
// "do well for things it has never seen" — any detected decision becomes a deep lab.
|
|
8
|
+
//
|
|
9
|
+
// The lab UI and reviewer already render/grade ANY schema-conforming module.
|
|
10
|
+
// Only the CONTENT is decision-specific, so that is all we generate; a deterministic
|
|
11
|
+
// assembler fills the structural defaults so a partial generation still yields a
|
|
12
|
+
// working lab.
|
|
13
|
+
|
|
14
|
+
const GEN_SCHEMA_PROMPT = `You are authoring ONE decision lab for Replay, which turns local AI
|
|
15
|
+
coding sessions into practice. You are given a DECISION the AI made and the session
|
|
16
|
+
DIFF evidence from the session. Produce the decision-specific teaching content.
|
|
17
|
+
|
|
18
|
+
Hard rules:
|
|
19
|
+
- Ground everything in the session language/stack of the evidence (Go, Python, Rust,
|
|
20
|
+
Terraform, SQL — whatever it is). Do NOT default to JavaScript/React.
|
|
21
|
+
- naiveCode: a short, runnable-looking snippet in the evidence's language that
|
|
22
|
+
embodies the decision done the risky/naive way. 3-12 lines.
|
|
23
|
+
- The failure must be a specific failure mode of THIS decision in THIS stack.
|
|
24
|
+
- arbitrate: two engineers disagree about WHY it failed. Exactly one is correct. The
|
|
25
|
+
wrong one is a plausible, common misconception with a specific rebuttal.
|
|
26
|
+
- repair blocks: slice a correct solution into 5-7 ordered blocks, PLUS 2-3 trap
|
|
27
|
+
blocks (plausible-looking wrong choices). The correct blocks in order must form
|
|
28
|
+
valid code. Mark traps with "trap": true.
|
|
29
|
+
- reviewCriteria.repair: 3 required + 2 optional, each OBSERVABLE in a submission.
|
|
30
|
+
- Keep it tight. This is judgment training, not a tutorial.
|
|
31
|
+
|
|
32
|
+
Respond with STRICT JSON only (no markdown fences), this exact shape:
|
|
33
|
+
{
|
|
34
|
+
"name": "<short pattern name, 1-3 words>",
|
|
35
|
+
"minutes": <int 5-12>,
|
|
36
|
+
"takeaway": "<one sentence: the durable rule>",
|
|
37
|
+
"why": "<2 sentences: why this decision appeared in this session>",
|
|
38
|
+
"naive": "<one sentence describing the naive approach>",
|
|
39
|
+
"naiveFile": "<plausible file path in the session stack>",
|
|
40
|
+
"naiveCode": "<the naive snippet, code, \\n for newlines>",
|
|
41
|
+
"breaks": "<one sentence: what breaks and why>",
|
|
42
|
+
"aiVersion": "<one sentence: what the AI actually did in the session>",
|
|
43
|
+
"production": "<one sentence: the more complete version with the missing safeguards>",
|
|
44
|
+
"smell": "<2-4 word smell name>",
|
|
45
|
+
"smellCopy": "<one sentence describing the smell>",
|
|
46
|
+
"failureTerminal": "<realistic terminal output showing the failure, \\n for newlines>",
|
|
47
|
+
"failureNarration": "<2 sentences explaining why it failed — the lesson>",
|
|
48
|
+
"arbitrate": {
|
|
49
|
+
"wrong": {"handle": "iyke.dev", "text": "<plausible wrong diagnosis>", "verdict": "<why rejecting it matters>"},
|
|
50
|
+
"right": {"handle": "ada.builds", "text": "<correct diagnosis>", "verdict": "<why approving it is right>"}
|
|
51
|
+
},
|
|
52
|
+
"traceTarget": "<regex matching the line in naiveCode where it breaks>",
|
|
53
|
+
"traceHit": "<one sentence: what that line reveals>",
|
|
54
|
+
"diffTarget": "<a short literal substring (5-25 chars) copied EXACTLY from a line in the SESSION DIFF EVIDENCE below that carries this decision — must appear verbatim in the evidence>",
|
|
55
|
+
"repairInstructions": "<one sentence telling the learner what to build>",
|
|
56
|
+
"repairFilename": "<file path label for the editor>",
|
|
57
|
+
"repairStarter": "<the naive code as the editor starting point, \\n for newlines>",
|
|
58
|
+
"repairBlocks": [{"code": "<slice, \\n for newlines>", "trap": false}, ...],
|
|
59
|
+
"repairSolution": "<the full correct solution, \\n for newlines>",
|
|
60
|
+
"reviewCriteria": {
|
|
61
|
+
"repair": [{"id": "<slug>", "name": "<observable criterion>", "required": true}, ...]
|
|
62
|
+
},
|
|
63
|
+
"transferScenario": "<2 sentences: a NEW situation needing the same judgment>",
|
|
64
|
+
"transferRule": "<the reusable rule>",
|
|
65
|
+
"transferFields": [{"key": "<slug>", "label": "<short question>", "ph": "<placeholder>"}, ...],
|
|
66
|
+
"artifactFailure": "<the failure signature to remember>",
|
|
67
|
+
"artifactStandard": "<the completion standard to remember>"
|
|
68
|
+
}`;
|
|
69
|
+
|
|
70
|
+
export function generateModule(decision, evidence, { timeoutMs = 180000 } = {}) {
|
|
71
|
+
if (!hasUsableDiffEvidence(evidence)) return Promise.resolve(null);
|
|
72
|
+
const prompt =
|
|
73
|
+
GEN_SCHEMA_PROMPT +
|
|
74
|
+
"\n\n## DECISION\nname: " + decision.title +
|
|
75
|
+
"\nwhy it matters: " + (decision.why || "") +
|
|
76
|
+
"\nbeginner miss: " + (decision.beginnerMiss || "") +
|
|
77
|
+
"\nwhat to check: " + (decision.seniorCheck || "") +
|
|
78
|
+
"\n\n## SESSION DIFF EVIDENCE (the actual stack — match it)\n" +
|
|
79
|
+
String(evidence).slice(0, 6000) +
|
|
80
|
+
"\n\nOutput ONLY the JSON.";
|
|
81
|
+
|
|
82
|
+
return new Promise((resolvePromise) => {
|
|
83
|
+
const child = execFile(
|
|
84
|
+
"claude",
|
|
85
|
+
["-p", "--model", "sonnet"],
|
|
86
|
+
{ timeout: timeoutMs, maxBuffer: 4 * 1024 * 1024 },
|
|
87
|
+
(error, stdout) => {
|
|
88
|
+
if (error) return resolvePromise(null);
|
|
89
|
+
const raw = stdout.trim();
|
|
90
|
+
const start = raw.indexOf("{");
|
|
91
|
+
const end = raw.lastIndexOf("}");
|
|
92
|
+
if (start === -1 || end === -1) return resolvePromise(null);
|
|
93
|
+
try {
|
|
94
|
+
resolvePromise(JSON.parse(raw.slice(start, end + 1)));
|
|
95
|
+
} catch {
|
|
96
|
+
resolvePromise(null);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
);
|
|
100
|
+
child.stdin.write(prompt);
|
|
101
|
+
child.stdin.end();
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Merge generated content with structural defaults into a full lab module.
|
|
106
|
+
// Anything the generator omitted gets a sane default so the lab always renders.
|
|
107
|
+
export function assembleGeneratedModule(decision, gen, evidence = "") {
|
|
108
|
+
const id = "gen-" + slug(decision.id || decision.title);
|
|
109
|
+
// Spot targets MUST exist in the real diff evidence or the diagnose beat is
|
|
110
|
+
// unwinnable. Derive them from the decision's own detection patterns (which by
|
|
111
|
+
// construction matched the evidence), filtered to those actually present.
|
|
112
|
+
const evLines = String(evidence).split("\n");
|
|
113
|
+
const inEvidence = (re) => {
|
|
114
|
+
try { const rx = new RegExp(re, "i"); return evLines.some((l) => rx.test(l)); }
|
|
115
|
+
catch { return false; }
|
|
116
|
+
};
|
|
117
|
+
// Prefer the generator's literal diffTarget (a line it quoted from the real
|
|
118
|
+
// evidence); fall back to the decision's detection patterns. Keep only what
|
|
119
|
+
// actually appears in the evidence so the diagnose beat is always winnable.
|
|
120
|
+
const candidates = [];
|
|
121
|
+
if (gen.diffTarget && gen.diffTarget.length >= 4) candidates.push(escapeForRe(gen.diffTarget));
|
|
122
|
+
(decision.patterns || []).forEach((p) => candidates.push((p && p.source) ? p.source : String(p)));
|
|
123
|
+
const spotTargets = candidates
|
|
124
|
+
.filter(inEvidence)
|
|
125
|
+
.slice(0, 2)
|
|
126
|
+
.map((re) => ({ re, note: "a line that carries the " + (gen.name || decision.title) + " decision." }));
|
|
127
|
+
const blocks = Array.isArray(gen.repairBlocks) && gen.repairBlocks.length
|
|
128
|
+
? gen.repairBlocks
|
|
129
|
+
: null;
|
|
130
|
+
const repairCriteria = (gen.reviewCriteria && Array.isArray(gen.reviewCriteria.repair) && gen.reviewCriteria.repair.length)
|
|
131
|
+
? normalizeCriteria(gen.reviewCriteria.repair)
|
|
132
|
+
: [
|
|
133
|
+
{ id: "addresses", name: "Addresses the core decision in code", required: true },
|
|
134
|
+
{ id: "failure", name: "Handles the failure mode the naive version hits", required: true },
|
|
135
|
+
{ id: "reasoning", name: "Reasoning is about the decision, not surface syntax", required: true },
|
|
136
|
+
{ id: "edges", name: "Considers an edge case or failure state", required: false },
|
|
137
|
+
{ id: "verify", name: "Names how it would be verified", required: false }
|
|
138
|
+
];
|
|
139
|
+
const fields = (Array.isArray(gen.transferFields) && gen.transferFields.length)
|
|
140
|
+
? gen.transferFields
|
|
141
|
+
: [
|
|
142
|
+
{ key: "approach", label: "How would you make this decision here?", ph: "the core move is…" },
|
|
143
|
+
{ key: "failure", label: "What failure are you designing against?", ph: "the risk is…" },
|
|
144
|
+
{ key: "verify", label: "How do you verify before shipping?", ph: "I'd check…" }
|
|
145
|
+
];
|
|
146
|
+
|
|
147
|
+
const module = {
|
|
148
|
+
id,
|
|
149
|
+
name: gen.name || decision.title,
|
|
150
|
+
generated: true,
|
|
151
|
+
minutes: clampInt(gen.minutes, 5, 12, 8),
|
|
152
|
+
why: gen.why || decision.why || "This decision shaped how the session's code behaves.",
|
|
153
|
+
takeaway: gen.takeaway || decision.seniorCheck || "Name the decision before you accept the code.",
|
|
154
|
+
naive: gen.naive || decision.beginnerMiss || "Accept the first working version without naming the tradeoff.",
|
|
155
|
+
naiveFile: gen.naiveFile || "session",
|
|
156
|
+
naiveCode: gen.naiveCode || "// naive version",
|
|
157
|
+
breaks: gen.breaks || "The code runs in this context, but the tradeoff is not clear enough to reuse.",
|
|
158
|
+
aiVersion: gen.aiVersion || "The AI produced a working implementation.",
|
|
159
|
+
production: gen.production || gen.safeguarded || "Name the decision, handle its failure mode, and verify it.",
|
|
160
|
+
exercise: "Apply the same judgment to the next place this decision appears.",
|
|
161
|
+
patternHref: null,
|
|
162
|
+
challenge: {
|
|
163
|
+
pattern: gen.name || decision.title,
|
|
164
|
+
patternCopy: "A technical decision the learner should be able to explain and reuse.",
|
|
165
|
+
smell: gen.smell || "Unowned decision",
|
|
166
|
+
smellCopy: gen.smellCopy || "The code works but the tradeoff was never named.",
|
|
167
|
+
proof: "Transfer, not recall",
|
|
168
|
+
proofCopy: "You pass when you can apply the same judgment to a new case."
|
|
169
|
+
},
|
|
170
|
+
criteria: {
|
|
171
|
+
diagnose: "Find the decision inside the session changes before any explanation appears.",
|
|
172
|
+
break: "Trace the naive version and click the line where it breaks.",
|
|
173
|
+
repair: "Rubric check against a checklist generated for this decision.",
|
|
174
|
+
transfer: "Rubric check. Apply the judgment to a new situation."
|
|
175
|
+
},
|
|
176
|
+
reviewCriteria: {
|
|
177
|
+
repair: repairCriteria.map((c) => c.name),
|
|
178
|
+
transfer: fields.map((f) => f.label)
|
|
179
|
+
},
|
|
180
|
+
artifact: {
|
|
181
|
+
failure: gen.artifactFailure || gen.breaks || "The decision's failure mode went unhandled.",
|
|
182
|
+
standard: gen.artifactStandard || gen.production || gen.safeguarded || "Own the decision and verify it."
|
|
183
|
+
},
|
|
184
|
+
nextPatterns: [],
|
|
185
|
+
lenses: {
|
|
186
|
+
diagnose: { title: "Look for the decision", items: ["The line that makes the call", "What depends on it", "The tradeoff being made"] },
|
|
187
|
+
break: { title: "Look for the failure", items: ["Where it breaks", "What changes under pressure", "What was assumed"] },
|
|
188
|
+
repair: { title: "Look for the missing safeguard", items: repairCriteria.filter((c) => c.required).map((c) => c.name) },
|
|
189
|
+
transfer: { title: "Look for reuse", items: ["A new context", "The same judgment", "A new failure to design against"] }
|
|
190
|
+
},
|
|
191
|
+
diagnose: { prompt: "What kind of decision is this?", choices: [] },
|
|
192
|
+
spot: {
|
|
193
|
+
prompt: "Find the " + (gen.name || decision.title) + " decision in this diff. Click the line that shows it.",
|
|
194
|
+
targetRe: spotTargets.length ? spotTargets[0].re : escapeForRe(firstCodeToken(gen.naiveCode)),
|
|
195
|
+
targets: spotTargets.length ? spotTargets : undefined,
|
|
196
|
+
hit: gen.traceHit || "That line shows the decision this lab is about.",
|
|
197
|
+
misses: [],
|
|
198
|
+
missDefault: "That line is downstream of the decision. Look for where the call is actually made."
|
|
199
|
+
},
|
|
200
|
+
break: { prompt: "Where does the naive version break?", choices: [] },
|
|
201
|
+
investigate: {
|
|
202
|
+
prompt: "Trace it yourself: click the line where it breaks.",
|
|
203
|
+
targetLine: lineMatching(gen.naiveCode, gen.traceTarget) || 1,
|
|
204
|
+
hit: gen.traceHit || "That is where the decision's failure mode bites.",
|
|
205
|
+
misses: {},
|
|
206
|
+
missDefault: "That line survives. Look for the first line that breaks under the real failure mode."
|
|
207
|
+
},
|
|
208
|
+
failureSim: {
|
|
209
|
+
terminal: gen.failureTerminal || "$ run\nError: the naive version fails here.",
|
|
210
|
+
narration: gen.failureNarration || "The code changed nothing; the conditions it ran under did. That is the decision.",
|
|
211
|
+
arbitrate: gen.arbitrate ? {
|
|
212
|
+
intro: "Two engineers read the same failure. Click the review you would approve.",
|
|
213
|
+
comments: [
|
|
214
|
+
{ handle: (gen.arbitrate.wrong && gen.arbitrate.wrong.handle) || "iyke.dev", text: gen.arbitrate.wrong.text, correct: false, verdict: gen.arbitrate.wrong.verdict },
|
|
215
|
+
{ handle: (gen.arbitrate.right && gen.arbitrate.right.handle) || "ada.builds", text: gen.arbitrate.right.text, correct: true, verdict: gen.arbitrate.right.verdict }
|
|
216
|
+
]
|
|
217
|
+
} : null
|
|
218
|
+
},
|
|
219
|
+
repairLab: {
|
|
220
|
+
filename: gen.repairFilename || gen.naiveFile || "your repair",
|
|
221
|
+
instructions: gen.repairInstructions || "Edit it until you would trust it. Comments can explain intent; the core mechanism must be concrete.",
|
|
222
|
+
starter: gen.repairStarter || gen.naiveCode || "",
|
|
223
|
+
blocks: blocks,
|
|
224
|
+
solution: gen.repairSolution || null
|
|
225
|
+
},
|
|
226
|
+
repair: { prompt: gen.repairInstructions || "Repair it so you would trust it." },
|
|
227
|
+
transferLab: {
|
|
228
|
+
instructions: "Capture the handoff rule — a sentence each is enough.",
|
|
229
|
+
fields: fields
|
|
230
|
+
},
|
|
231
|
+
transfer: {
|
|
232
|
+
prompt: "Apply the same judgment to a new situation.",
|
|
233
|
+
scenario: gen.transferScenario || "A future session makes the same kind of decision in a different feature.",
|
|
234
|
+
rule: gen.transferRule || gen.takeaway || "A decision is learned only when you can reapply it.",
|
|
235
|
+
choices: []
|
|
236
|
+
},
|
|
237
|
+
// carried so the server can review generated labs with the right rubric
|
|
238
|
+
rubric: {
|
|
239
|
+
repair: {
|
|
240
|
+
title: "Repair: " + (gen.name || decision.title),
|
|
241
|
+
context: (gen.naiveCode || "").slice(0, 800),
|
|
242
|
+
criteria: repairCriteria,
|
|
243
|
+
passRule: "all required criteria pass, plus at least one optional",
|
|
244
|
+
intentNote: "Comments can explain intent; the core mechanism still needs to be concrete. Check the decision, not only syntax."
|
|
245
|
+
},
|
|
246
|
+
transfer: {
|
|
247
|
+
title: "Transfer: " + (gen.name || decision.title),
|
|
248
|
+
context: gen.transferScenario || "",
|
|
249
|
+
criteria: fields.map((f, i) => ({ id: f.key, name: f.label, required: i < Math.max(1, fields.length - 1) })),
|
|
250
|
+
passRule: "all required criteria pass, plus at least one optional"
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
};
|
|
254
|
+
if (!blocks) module.repairLab.blocks = null; // editor-only if no blocks generated
|
|
255
|
+
return module;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Guarantee a satisfiable rubric: at least one required and at least one optional
|
|
259
|
+
// criterion, or the pass rule ("all required + 1 optional") can never be met.
|
|
260
|
+
function normalizeCriteria(criteria) {
|
|
261
|
+
const cs = criteria.map((c, i) => ({
|
|
262
|
+
id: c.id || ("c" + i),
|
|
263
|
+
name: c.name || ("criterion " + i),
|
|
264
|
+
required: Boolean(c.required)
|
|
265
|
+
}));
|
|
266
|
+
if (!cs.some((c) => c.required)) cs[0].required = true;
|
|
267
|
+
if (!cs.some((c) => !c.required) && cs.length > 1) cs[cs.length - 1].required = false;
|
|
268
|
+
return cs;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Deterministic self-check (zero extra tokens). Blockers mean the lab would be
|
|
272
|
+
// broken or unwinnable; warnings mean degraded-but-usable.
|
|
273
|
+
export function validateGeneratedModule(module, evidence) {
|
|
274
|
+
const blockers = [];
|
|
275
|
+
const warnings = [];
|
|
276
|
+
if (!hasUsableDiffEvidence(evidence)) {
|
|
277
|
+
blockers.push("evidence does not include concrete changed lines");
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// naive code must be real-ish code, not a stub
|
|
281
|
+
const naive = String(module.naiveCode || "");
|
|
282
|
+
const naiveLines = naive.split("\n").filter((l) => l.trim().length > 1);
|
|
283
|
+
if (naiveLines.length < 2 || naive.length < 25 || !/[{}()=;:]|def |func /.test(naive)) {
|
|
284
|
+
blockers.push("naiveCode is trivial or not reviewable");
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// arbitrate must have exactly one correct of two
|
|
288
|
+
const arb = module.failureSim && module.failureSim.arbitrate;
|
|
289
|
+
if (!arb || !Array.isArray(arb.comments) || arb.comments.length !== 2) {
|
|
290
|
+
blockers.push("arbitrate thread missing or not exactly two comments");
|
|
291
|
+
} else {
|
|
292
|
+
const correct = arb.comments.filter((c) => c.correct).length;
|
|
293
|
+
if (correct !== 1) blockers.push("arbitrate must have exactly one correct comment, has " + correct);
|
|
294
|
+
if (arb.comments.some((c) => !c.text || !c.verdict)) blockers.push("an arbitrate comment is missing text or verdict");
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// repair must be reviewable: satisfiable rubric
|
|
298
|
+
const rc = (module.rubric && module.rubric.repair && module.rubric.repair.criteria) || [];
|
|
299
|
+
if (rc.length < 3) blockers.push("repair rubric has fewer than 3 criteria");
|
|
300
|
+
if (!rc.some((c) => c.required) || !rc.some((c) => !c.required)) {
|
|
301
|
+
blockers.push("repair rubric is not satisfiable (needs >=1 required and >=1 optional)");
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// assemble mode: if blocks exist, the non-trap blocks must form a real solution
|
|
305
|
+
// and traps must be distinct from correct blocks
|
|
306
|
+
const blocks = module.repairLab && module.repairLab.blocks;
|
|
307
|
+
if (Array.isArray(blocks) && blocks.length) {
|
|
308
|
+
const correctBlocks = blocks.filter((b) => !b.trap);
|
|
309
|
+
const traps = blocks.filter((b) => b.trap);
|
|
310
|
+
if (correctBlocks.length < 3) blockers.push("fewer than 3 correct assemble blocks");
|
|
311
|
+
if (traps.length < 1) warnings.push("no trap blocks — assembly is too easy");
|
|
312
|
+
const correctCodes = new Set(correctBlocks.map((b) => (b.code || "").trim()));
|
|
313
|
+
if (traps.some((t) => correctCodes.has((t.code || "").trim()))) {
|
|
314
|
+
blockers.push("a trap block is identical to a correct block");
|
|
315
|
+
}
|
|
316
|
+
const assembled = correctBlocks.map((b) => b.code).join("\n");
|
|
317
|
+
if (assembled.length < 30) blockers.push("assembled correct blocks are too short to review");
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// failure simulation present
|
|
321
|
+
if (!module.failureSim || String(module.failureSim.terminal || "").length < 15) {
|
|
322
|
+
blockers.push("failure terminal missing or trivial");
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// transfer must be reviewable
|
|
326
|
+
const tf = (module.transferLab && module.transferLab.fields) || [];
|
|
327
|
+
if (tf.length < 2) blockers.push("fewer than 2 transfer fields");
|
|
328
|
+
|
|
329
|
+
// quality warnings (usable via safety nets, but degraded)
|
|
330
|
+
const ev = String(evidence || "");
|
|
331
|
+
const spotDefs = module.spot && (module.spot.targets || (module.spot.targetRe ? [{ re: module.spot.targetRe }] : []));
|
|
332
|
+
const spotInEvidence = (spotDefs || []).some((t) => {
|
|
333
|
+
try { return new RegExp(t.re, "i").test(ev); } catch { return false; }
|
|
334
|
+
});
|
|
335
|
+
if (!spotInEvidence) warnings.push("spot target not found in evidence — diagnose uses the click-any-line safety net");
|
|
336
|
+
if (!module.repairLab || !module.repairLab.solution) warnings.push("no reference solution — the 'show solution' button is hidden");
|
|
337
|
+
|
|
338
|
+
return { ok: blockers.length === 0, blockers, warnings };
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
export async function loadOrGenerate(cacheDir, decision, evidence) {
|
|
342
|
+
const id = "gen-" + slug(decision.id || decision.title);
|
|
343
|
+
const cachePath = resolve(cacheDir, id + ".json");
|
|
344
|
+
if (!hasUsableDiffEvidence(evidence)) {
|
|
345
|
+
console.log(` generation skipped for "${decision.title}" — evidence does not include concrete changed lines.`);
|
|
346
|
+
return null;
|
|
347
|
+
}
|
|
348
|
+
try {
|
|
349
|
+
const cached = JSON.parse(await readFile(cachePath, "utf8"));
|
|
350
|
+
const check = validateGeneratedModule(cached, evidence);
|
|
351
|
+
if (check.ok) return cached;
|
|
352
|
+
console.log(` cached generated lab rejected for "${decision.title}": ${check.blockers.join("; ")}`);
|
|
353
|
+
} catch { /* not cached */ }
|
|
354
|
+
|
|
355
|
+
// Generate, self-check, and retry once if the result is broken. Never serve a
|
|
356
|
+
// failing lab — fall back to "lab coming" rather than a frustrating one.
|
|
357
|
+
let module = null;
|
|
358
|
+
for (let attempt = 1; attempt <= 2; attempt++) {
|
|
359
|
+
const gen = await generateModule(decision, evidence);
|
|
360
|
+
if (!gen) continue;
|
|
361
|
+
const candidate = assembleGeneratedModule(decision, gen, evidence);
|
|
362
|
+
const check = validateGeneratedModule(candidate, evidence);
|
|
363
|
+
if (check.ok) {
|
|
364
|
+
candidate._warnings = check.warnings;
|
|
365
|
+
module = candidate;
|
|
366
|
+
if (check.warnings.length) console.log(` generated "${candidate.name}" with warnings: ${check.warnings.join("; ")}`);
|
|
367
|
+
break;
|
|
368
|
+
}
|
|
369
|
+
console.log(` generation attempt ${attempt} rejected: ${check.blockers.join("; ")}`);
|
|
370
|
+
}
|
|
371
|
+
if (!module) {
|
|
372
|
+
console.log(` generation failed self-check twice for "${decision.title}" — leaving as "lab coming".`);
|
|
373
|
+
return null;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
await mkdir(dirname(cachePath), { recursive: true });
|
|
377
|
+
await writeFile(cachePath, JSON.stringify(module, null, 1), "utf8");
|
|
378
|
+
return module;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function slug(s) { return String(s).toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 40); }
|
|
382
|
+
function clampInt(v, lo, hi, dflt) { const n = parseInt(v, 10); return Number.isFinite(n) ? Math.min(hi, Math.max(lo, n)) : dflt; }
|
|
383
|
+
function escapeForRe(s) { return String(s || "x").replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); }
|
|
384
|
+
export function hasUsableDiffEvidence(evidence) {
|
|
385
|
+
const text = String(evidence || "");
|
|
386
|
+
if (!text.includes("diff --git") && !text.includes("@@")) return false;
|
|
387
|
+
if (/Codex session touched or inspected this file; use transcript evidence for details/i.test(text)) return false;
|
|
388
|
+
const changed = text.split("\n").filter((line) =>
|
|
389
|
+
/^[+-]/.test(line) &&
|
|
390
|
+
!/^(---|\+\+\+)/.test(line) &&
|
|
391
|
+
line.replace(/^[+-]\s*/, "").trim().length > 3
|
|
392
|
+
);
|
|
393
|
+
return changed.length >= 2;
|
|
394
|
+
}
|
|
395
|
+
function firstCodeToken(code) {
|
|
396
|
+
const m = String(code || "").split("\n").find((l) => l.trim().length > 3);
|
|
397
|
+
return m ? m.trim().split(/\s+/).slice(0, 2).join(" ") : "x";
|
|
398
|
+
}
|
|
399
|
+
function lineMatching(code, re) {
|
|
400
|
+
if (!code || !re) return null;
|
|
401
|
+
const lines = String(code).split("\n");
|
|
402
|
+
for (let i = 0; i < lines.length; i++) {
|
|
403
|
+
try { if (new RegExp(re, "i").test(lines[i])) return i + 1; } catch { return null; }
|
|
404
|
+
}
|
|
405
|
+
return null;
|
|
406
|
+
}
|
package/src/ingest.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
// Ingest a real Claude Code session transcript (.jsonl) into the inputs the
|
|
2
|
+
// lab pipeline needs: goal, transcript text, and a reconstructed diff.
|
|
3
|
+
//
|
|
4
|
+
// Ground truth from auditing real transcripts (2026-06):
|
|
5
|
+
// - thinking blocks are persisted empty -> mine visible text only
|
|
6
|
+
// - most "user" records are tool_results, not humans -> filter hard
|
|
7
|
+
// - the diff is reconstructable from Edit/Write tool_use inputs
|
|
8
|
+
// - sessions can end on rate-limit corpses and API errors -> tolerate junk lines
|
|
9
|
+
|
|
10
|
+
const MAX_TRANSCRIPT_CHARS = 60000;
|
|
11
|
+
const MAX_TURN_CHARS = 700;
|
|
12
|
+
const MAX_WRITE_LINES = 120;
|
|
13
|
+
|
|
14
|
+
export function ingestClaudeSession(jsonlText) {
|
|
15
|
+
const records = [];
|
|
16
|
+
for (const line of jsonlText.split("\n")) {
|
|
17
|
+
if (!line.trim()) continue;
|
|
18
|
+
try { records.push(JSON.parse(line)); } catch { /* tolerate junk lines */ }
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
let goal = null;
|
|
22
|
+
const turns = [];
|
|
23
|
+
const diffParts = [];
|
|
24
|
+
const filesTouched = new Set();
|
|
25
|
+
|
|
26
|
+
for (const record of records) {
|
|
27
|
+
const content = record?.message?.content;
|
|
28
|
+
if (record.type === "user") {
|
|
29
|
+
const text = humanText(content);
|
|
30
|
+
if (!text) continue;
|
|
31
|
+
if (!goal && text.length > 20) goal = text.slice(0, 200).replace(/\s+/g, " ").trim();
|
|
32
|
+
turns.push("User: " + clip(text));
|
|
33
|
+
} else if (record.type === "assistant" && Array.isArray(content)) {
|
|
34
|
+
for (const block of content) {
|
|
35
|
+
if (block?.type === "text" && block.text?.trim()) {
|
|
36
|
+
turns.push("Assistant: " + clip(block.text));
|
|
37
|
+
} else if (block?.type === "tool_use") {
|
|
38
|
+
const part = diffFromToolUse(block);
|
|
39
|
+
if (part) {
|
|
40
|
+
diffParts.push(part.text);
|
|
41
|
+
filesTouched.add(part.file);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
let transcript = turns.join("\n");
|
|
49
|
+
if (transcript.length > MAX_TRANSCRIPT_CHARS) {
|
|
50
|
+
// Keep the opening (goal context) and the most recent work.
|
|
51
|
+
transcript = transcript.slice(0, MAX_TRANSCRIPT_CHARS / 2) +
|
|
52
|
+
"\n[...session truncated...]\n" +
|
|
53
|
+
transcript.slice(-MAX_TRANSCRIPT_CHARS / 2);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return {
|
|
57
|
+
goal: goal || "Untitled session",
|
|
58
|
+
transcript,
|
|
59
|
+
diff: diffParts.join("\n"),
|
|
60
|
+
stats: {
|
|
61
|
+
records: records.length,
|
|
62
|
+
turns: turns.length,
|
|
63
|
+
edits: diffParts.length,
|
|
64
|
+
files: [...filesTouched]
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function clip(text) {
|
|
70
|
+
const cleaned = String(text).trim();
|
|
71
|
+
return cleaned.length > MAX_TURN_CHARS ? cleaned.slice(0, MAX_TURN_CHARS) + " […]" : cleaned;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function humanText(content) {
|
|
75
|
+
// Real human turns are strings or text blocks; tool_results also arrive as
|
|
76
|
+
// type:"user" records and must be dropped.
|
|
77
|
+
if (typeof content === "string") return isMachine(content) ? null : content;
|
|
78
|
+
if (!Array.isArray(content)) return null;
|
|
79
|
+
const texts = content
|
|
80
|
+
.filter((b) => b?.type === "text" && typeof b.text === "string")
|
|
81
|
+
.map((b) => b.text);
|
|
82
|
+
if (!texts.length) return null;
|
|
83
|
+
const joined = texts.join("\n");
|
|
84
|
+
return isMachine(joined) ? null : joined;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function isMachine(text) {
|
|
88
|
+
const t = text.trim();
|
|
89
|
+
return /^<(command-|local-command|task-notification|system-reminder|ide_)/.test(t) ||
|
|
90
|
+
/^\[Request interrupted/.test(t) ||
|
|
91
|
+
/^Caveat: The messages below/.test(t) ||
|
|
92
|
+
/^<analysis>|^<summary>/.test(t) ||
|
|
93
|
+
/Base directory for this skill:/.test(t); // skill payloads arrive as user records
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function diffFromToolUse(block) {
|
|
97
|
+
const input = block.input || {};
|
|
98
|
+
const file = relPath(input.file_path || input.notebook_path || "");
|
|
99
|
+
if (!file) return null;
|
|
100
|
+
|
|
101
|
+
if (block.name === "Edit" && (input.old_string || input.new_string)) {
|
|
102
|
+
const oldLines = String(input.old_string || "").split("\n").map((l) => "-" + l);
|
|
103
|
+
const newLines = String(input.new_string || "").split("\n").map((l) => "+" + l);
|
|
104
|
+
return {
|
|
105
|
+
file,
|
|
106
|
+
text: `diff --git a/${file} b/${file}\n--- a/${file}\n+++ b/${file}\n@@\n${oldLines.join("\n")}\n${newLines.join("\n")}`
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
if (block.name === "Write" && input.content) {
|
|
110
|
+
const lines = String(input.content).split("\n").slice(0, MAX_WRITE_LINES).map((l) => "+" + l);
|
|
111
|
+
return {
|
|
112
|
+
file,
|
|
113
|
+
text: `diff --git a/${file} b/${file}\nnew file\n--- /dev/null\n+++ b/${file}\n@@\n${lines.join("\n")}`
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function relPath(absPath) {
|
|
120
|
+
if (!absPath) return null;
|
|
121
|
+
// Strip everything up to and including the project directory.
|
|
122
|
+
const match = String(absPath).match(/(?:Projects|repos|src|code)\/[^/]+\/(.+)$/);
|
|
123
|
+
return match ? match[1] : String(absPath).split("/").slice(-3).join("/");
|
|
124
|
+
}
|