throughline 0.3.23 → 0.3.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/.claude/commands/tl-trim.md +42 -0
  2. package/.codex-sidecar.yml +62 -0
  3. package/CHANGELOG.md +583 -0
  4. package/README.ja.md +42 -5
  5. package/README.md +400 -23
  6. package/bin/throughline.mjs +168 -4
  7. package/codex/skills/throughline/SKILL.md +157 -0
  8. package/codex/skills/throughline/agents/openai.yaml +7 -0
  9. package/docs/INHERITANCE_ON_CLEAR_ONLY.md +146 -0
  10. package/docs/L1_L2_L3_REDESIGN.md +415 -0
  11. package/docs/PUBLIC_RELEASE_PLAN.md +184 -0
  12. package/docs/THROUGHLINE_CODEX_DUAL_SUPPORT.md +249 -0
  13. package/docs/THROUGHLINE_CODEX_FIRST_ROADMAP.md +555 -0
  14. package/docs/THROUGHLINE_CODEX_MONITOR_IMPLEMENTATION_PLAN.md +220 -0
  15. package/docs/THROUGHLINE_CODEX_TRIM_IMPLEMENTATION_PLAN.md +528 -0
  16. package/docs/THROUGHLINE_CODEX_TRIM_ROLLBACK_FIX_PLAN.md +672 -0
  17. package/docs/archive/CONCEPT.md +476 -0
  18. package/docs/archive/EXPERIMENT.md +371 -0
  19. package/docs/archive/README.md +22 -0
  20. package/docs/archive/SESSION_LINKING_DESIGN.md +231 -0
  21. package/docs/archive/THROUGHLINE_NEXT_STEPS.md +134 -0
  22. package/docs/throughline-codex-trim-rollback-incident-report.md +306 -0
  23. package/docs/throughline-handoff-context.example.json +57 -0
  24. package/docs/throughline-rollback-context-trim-insight.md +455 -0
  25. package/package.json +6 -2
  26. package/src/cli/codex-capture.mjs +95 -0
  27. package/src/cli/codex-handoff-model-smoke.mjs +292 -0
  28. package/src/cli/codex-handoff-model-smoke.test.mjs +262 -0
  29. package/src/cli/codex-handoff-smoke.mjs +163 -0
  30. package/src/cli/codex-handoff-smoke.test.mjs +149 -0
  31. package/src/cli/codex-handoff-start.mjs +291 -0
  32. package/src/cli/codex-handoff-start.test.mjs +194 -0
  33. package/src/cli/codex-hook.mjs +276 -0
  34. package/src/cli/codex-hook.test.mjs +293 -0
  35. package/src/cli/codex-host-primitive-audit.mjs +110 -0
  36. package/src/cli/codex-host-primitive-audit.test.mjs +75 -0
  37. package/src/cli/codex-restore-smoke.mjs +357 -0
  38. package/src/cli/codex-restore-source-audit.mjs +304 -0
  39. package/src/cli/codex-resume.mjs +138 -0
  40. package/src/cli/codex-rollback-model-visible-smoke.mjs +373 -0
  41. package/src/cli/codex-rollback-model-visible-smoke.test.mjs +255 -0
  42. package/src/cli/codex-sidecar-diagnostics.mjs +48 -0
  43. package/src/cli/codex-sidecar-dry-run.mjs +85 -0
  44. package/src/cli/codex-summarize.mjs +224 -0
  45. package/src/cli/codex-threads.mjs +89 -0
  46. package/src/cli/codex-visibility-smoke.mjs +196 -0
  47. package/src/cli/codex-vscode-restore-smoke.mjs +226 -0
  48. package/src/cli/codex-vscode-rollback-smoke.mjs +114 -0
  49. package/src/cli/doctor.mjs +503 -1
  50. package/src/cli/doctor.test.mjs +542 -3
  51. package/src/cli/handoff-preview.mjs +78 -0
  52. package/src/cli/help.test.mjs +64 -0
  53. package/src/cli/install.mjs +227 -4
  54. package/src/cli/install.test.mjs +207 -4
  55. package/src/cli/trim.mjs +564 -0
  56. package/src/codex-app-server.mjs +1816 -0
  57. package/src/codex-app-server.test.mjs +512 -0
  58. package/src/codex-auto-refresh.mjs +194 -0
  59. package/src/codex-auto-refresh.test.mjs +182 -0
  60. package/src/codex-capture.mjs +235 -0
  61. package/src/codex-capture.test.mjs +393 -0
  62. package/src/codex-handoff-model-smoke.mjs +114 -0
  63. package/src/codex-handoff-model-smoke.test.mjs +89 -0
  64. package/src/codex-handoff-smoke.mjs +124 -0
  65. package/src/codex-handoff-smoke.test.mjs +103 -0
  66. package/src/codex-handoff.mjs +331 -0
  67. package/src/codex-handoff.test.mjs +220 -0
  68. package/src/codex-host-primitive-audit.mjs +374 -0
  69. package/src/codex-host-primitive-audit.test.mjs +208 -0
  70. package/src/codex-restore-smoke.test.mjs +639 -0
  71. package/src/codex-restore-source-audit.mjs +1348 -0
  72. package/src/codex-restore-source-audit.test.mjs +623 -0
  73. package/src/codex-resume.test.mjs +242 -0
  74. package/src/codex-rollout-memory.mjs +711 -0
  75. package/src/codex-rollout-memory.test.mjs +610 -0
  76. package/src/codex-sidecar-cli.test.mjs +75 -0
  77. package/src/codex-sidecar.mjs +246 -0
  78. package/src/codex-sidecar.test.mjs +172 -0
  79. package/src/codex-summarize.test.mjs +143 -0
  80. package/src/codex-thread-identity.mjs +23 -0
  81. package/src/codex-thread-index.mjs +173 -0
  82. package/src/codex-thread-index.test.mjs +164 -0
  83. package/src/codex-usage.mjs +110 -0
  84. package/src/codex-usage.test.mjs +140 -0
  85. package/src/codex-visibility-smoke.test.mjs +222 -0
  86. package/src/codex-vscode-restore-smoke.mjs +206 -0
  87. package/src/codex-vscode-restore-smoke.test.mjs +325 -0
  88. package/src/codex-vscode-rollback-smoke.mjs +90 -0
  89. package/src/codex-vscode-rollback-smoke.test.mjs +290 -0
  90. package/src/db-schema.test.mjs +97 -0
  91. package/src/haiku-summarizer.mjs +267 -26
  92. package/src/haiku-summarizer.test.mjs +282 -0
  93. package/src/handoff-preview.test.mjs +108 -0
  94. package/src/handoff-record.mjs +294 -0
  95. package/src/handoff-record.test.mjs +226 -0
  96. package/src/hook-entrypoints.test.mjs +326 -0
  97. package/src/package-files.test.mjs +19 -0
  98. package/src/prompt-submit.mjs +9 -6
  99. package/src/resume-context.mjs +44 -140
  100. package/src/resume-context.test.mjs +172 -0
  101. package/src/session-start.mjs +8 -5
  102. package/src/state-file.mjs +50 -6
  103. package/src/state-file.test.mjs +50 -0
  104. package/src/token-monitor.mjs +14 -10
  105. package/src/token-monitor.test.mjs +27 -0
  106. package/src/trim-cli.test.mjs +1584 -0
  107. package/src/trim-model.mjs +584 -0
  108. package/src/trim-model.test.mjs +568 -0
  109. package/src/turn-processor.mjs +17 -10
  110. package/src/vscode-task.mjs +94 -6
  111. package/src/vscode-task.test.mjs +186 -6
@@ -0,0 +1,1584 @@
1
+ import { test } from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import { spawnSync } from 'node:child_process';
4
+ import { chmodSync, existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
5
+ import { tmpdir } from 'node:os';
6
+ import { dirname, join } from 'node:path';
7
+ import { fileURLToPath } from 'node:url';
8
+
9
+ const REPO_ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
10
+
11
+ function makeTempHome() {
12
+ return mkdtempSync(join(tmpdir(), 'tl-trim-home-'));
13
+ }
14
+
15
+ function makeTempProject() {
16
+ return mkdtempSync(join(tmpdir(), 'tl-trim-project-'));
17
+ }
18
+
19
+ function makeFakeCodexAppServer(
20
+ dir,
21
+ {
22
+ allowMutation = false,
23
+ threadId = '019dfabf-thread',
24
+ turnCount = 2,
25
+ delayedInjectVisibilityReads = 0,
26
+ durableRolloutPath = null,
27
+ durableRolloutAppendDelayMs = 0,
28
+ injectCreatesTurn = true,
29
+ injectResponseIncludesTurns = true,
30
+ injectResponseAdvertisesPendingTurn = false,
31
+ hostRemediationPrimitive = true,
32
+ hostResumeHistoryCandidate = hostRemediationPrimitive,
33
+ } = {},
34
+ ) {
35
+ const script = join(dir, 'fake-codex-app-server.mjs');
36
+ const log = join(dir, 'fake-codex-app-server.log');
37
+ writeFileSync(
38
+ script,
39
+ `#!/usr/bin/env node
40
+ import { spawn } from 'node:child_process';
41
+ import { appendFileSync } from 'node:fs';
42
+ import { mkdirSync, writeFileSync } from 'node:fs';
43
+ import { createInterface } from 'node:readline';
44
+
45
+ const log = ${JSON.stringify(log)};
46
+ const allowMutation = ${JSON.stringify(allowMutation)};
47
+ const threadId = ${JSON.stringify(threadId)};
48
+ const durableRolloutPath = ${JSON.stringify(durableRolloutPath)};
49
+ const durableRolloutAppendDelayMs = ${JSON.stringify(durableRolloutAppendDelayMs)};
50
+ const injectCreatesTurn = ${JSON.stringify(injectCreatesTurn)};
51
+ const injectResponseIncludesTurns = ${JSON.stringify(injectResponseIncludesTurns)};
52
+ const injectResponseAdvertisesPendingTurn = ${JSON.stringify(injectResponseAdvertisesPendingTurn)};
53
+ const hostRemediationPrimitive = ${JSON.stringify(hostRemediationPrimitive)};
54
+ const hostResumeHistoryCandidate = ${JSON.stringify(hostResumeHistoryCandidate)};
55
+ let turns = Array.from({ length: ${JSON.stringify(turnCount)} }, (_, index) => ({ id: 'turn-' + (index + 1) }));
56
+ let pendingInjectedTurn = null;
57
+ let delayedInjectVisibilityReads = ${JSON.stringify(delayedInjectVisibilityReads)};
58
+ const rl = createInterface({ input: process.stdin });
59
+
60
+ if (process.argv.includes('generate-json-schema')) {
61
+ const outIndex = process.argv.indexOf('--out');
62
+ const outDir = outIndex >= 0 ? process.argv[outIndex + 1] : null;
63
+ if (!outDir) process.exit(2);
64
+ mkdirSync(outDir + '/v2', { recursive: true });
65
+ const methods = [
66
+ 'initialize',
67
+ 'thread/read',
68
+ 'thread/resume',
69
+ 'thread/rollback',
70
+ 'thread/inject_items',
71
+ 'thread/compact/start',
72
+ ];
73
+ if (hostRemediationPrimitive) methods.push('thread/history/clear');
74
+ writeFileSync(outDir + '/ClientRequest.json', JSON.stringify({ enum: methods }, null, 2));
75
+ writeFileSync(
76
+ outDir + '/v2/ThreadResumeParams.json',
77
+ JSON.stringify(
78
+ {
79
+ properties: {
80
+ history: {
81
+ description: hostResumeHistoryCandidate
82
+ ? 'test-only history candidate'
83
+ : '[UNSTABLE] FOR CODEX CLOUD - DO NOT USE',
84
+ },
85
+ },
86
+ },
87
+ null,
88
+ 2,
89
+ ),
90
+ );
91
+ process.exit(0);
92
+ }
93
+
94
+ function send(message) {
95
+ process.stdout.write(JSON.stringify(message) + '\\n');
96
+ }
97
+
98
+ function appendRollout(payload) {
99
+ if (!durableRolloutPath) return;
100
+ const row = JSON.stringify({
101
+ timestamp: '2026-05-06T00:42:00.000Z',
102
+ ...payload,
103
+ }) + '\\n';
104
+ if (durableRolloutAppendDelayMs > 0) {
105
+ const code = 'setTimeout(() => { require("node:fs").appendFileSync('
106
+ + JSON.stringify(durableRolloutPath)
107
+ + ', '
108
+ + JSON.stringify(row)
109
+ + '); }, '
110
+ + String(durableRolloutAppendDelayMs)
111
+ + ');';
112
+ spawn(process.execPath, ['-e', code], { detached: true, stdio: 'ignore' }).unref();
113
+ return;
114
+ }
115
+ appendFileSync(durableRolloutPath, row);
116
+ }
117
+
118
+ rl.on('line', (line) => {
119
+ const msg = JSON.parse(line);
120
+ if (msg.method === 'initialized') return;
121
+ appendFileSync(log, msg.method + '\\n');
122
+ if (msg.method === 'initialize') {
123
+ send({ id: msg.id, result: { userAgent: 'fake-codex', codexHome: '/tmp/codex' } });
124
+ } else if (msg.method === 'thread/read') {
125
+ if (pendingInjectedTurn && delayedInjectVisibilityReads <= 0) {
126
+ turns = [...turns, pendingInjectedTurn];
127
+ pendingInjectedTurn = null;
128
+ } else if (pendingInjectedTurn) {
129
+ delayedInjectVisibilityReads--;
130
+ }
131
+ send({ id: msg.id, result: { thread: { id: threadId, turns } } });
132
+ } else if (msg.method === 'thread/resume') {
133
+ send({ id: msg.id, result: { thread: { id: threadId, turns } } });
134
+ } else if (msg.method === 'thread/rollback') {
135
+ if (!allowMutation) {
136
+ appendFileSync(log, 'UNEXPECTED_MUTATION:' + msg.method + '\\n');
137
+ send({ id: msg.id, error: { code: -32000, message: 'mutation must not be called' } });
138
+ return;
139
+ }
140
+ turns = turns.slice(0, Math.max(0, turns.length - msg.params.numTurns));
141
+ appendRollout({ type: 'event_msg', payload: { type: 'thread_rolled_back', num_turns: msg.params.numTurns } });
142
+ send({ id: msg.id, result: { thread: { id: threadId, turns } } });
143
+ } else if (msg.method === 'thread/inject_items') {
144
+ if (!allowMutation) {
145
+ appendFileSync(log, 'UNEXPECTED_MUTATION:' + msg.method + '\\n');
146
+ send({ id: msg.id, error: { code: -32000, message: 'mutation must not be called' } });
147
+ return;
148
+ }
149
+ const injected = msg.params.items?.[0]?.content?.[0]?.text ?? '';
150
+ appendFileSync(log, 'INJECT_TEXT:' + injected.replace(/\\n/g, ' ') + '\\n');
151
+ appendRollout({
152
+ type: 'response_item',
153
+ payload: {
154
+ type: 'message',
155
+ role: 'developer',
156
+ content: [{ type: 'input_text', text: injected }],
157
+ },
158
+ });
159
+ pendingInjectedTurn = injectCreatesTurn ? { id: 'injected-memory' } : null;
160
+ if (pendingInjectedTurn && delayedInjectVisibilityReads <= 0) {
161
+ turns = [...turns, pendingInjectedTurn];
162
+ pendingInjectedTurn = null;
163
+ }
164
+ const injectResponseTurns =
165
+ injectResponseAdvertisesPendingTurn && pendingInjectedTurn
166
+ ? [...turns, pendingInjectedTurn]
167
+ : turns;
168
+ send({
169
+ id: msg.id,
170
+ result: injectResponseIncludesTurns ? { thread: { id: threadId, turns: injectResponseTurns } } : {},
171
+ });
172
+ } else {
173
+ send({ id: msg.id, error: { code: -32601, message: 'unknown method' } });
174
+ }
175
+ });
176
+ `,
177
+ );
178
+ chmodSync(script, 0o755);
179
+ return { script, log };
180
+ }
181
+
182
+ async function seedDb(home, project) {
183
+ const originalHome = process.env.HOME;
184
+ const originalUserProfile = process.env.USERPROFILE;
185
+ process.env.HOME = home;
186
+ process.env.USERPROFILE = home;
187
+ try {
188
+ const mod = await import(`./db.mjs?trimCli=${Date.now()}-${Math.random()}`);
189
+ const db = mod.getDb();
190
+ db.prepare(
191
+ `INSERT INTO sessions (session_id, project_path, status, created_at, updated_at)
192
+ VALUES ('sess-trim-cli', ?, 'active', 1, 2)`,
193
+ ).run(project);
194
+ for (let turn = 1; turn <= 22; turn++) {
195
+ db.prepare(
196
+ `INSERT INTO bodies
197
+ (session_id, origin_session_id, turn_number, role, text, token_count, created_at)
198
+ VALUES ('sess-trim-cli', 'sess-trim-cli', ?, 'assistant', ?, 1, ?)`,
199
+ ).run(turn, `assistant body ${turn}`, turn * 1000);
200
+ }
201
+ db.close();
202
+ } finally {
203
+ if (originalHome === undefined) delete process.env.HOME;
204
+ else process.env.HOME = originalHome;
205
+ if (originalUserProfile === undefined) delete process.env.USERPROFILE;
206
+ else process.env.USERPROFILE = originalUserProfile;
207
+ }
208
+ }
209
+
210
+ async function seedEmptyDb(home, project) {
211
+ const originalHome = process.env.HOME;
212
+ const originalUserProfile = process.env.USERPROFILE;
213
+ process.env.HOME = home;
214
+ process.env.USERPROFILE = home;
215
+ try {
216
+ const mod = await import(`./db.mjs?trimCliEmpty=${Date.now()}-${Math.random()}`);
217
+ const db = mod.getDb();
218
+ db.prepare(
219
+ `INSERT INTO sessions (session_id, project_path, status, created_at, updated_at)
220
+ VALUES ('sess-empty-codex', ?, 'active', 1, 2)`,
221
+ ).run(project);
222
+ db.close();
223
+ } finally {
224
+ if (originalHome === undefined) delete process.env.HOME;
225
+ else process.env.HOME = originalHome;
226
+ if (originalUserProfile === undefined) delete process.env.USERPROFILE;
227
+ else process.env.USERPROFILE = originalUserProfile;
228
+ }
229
+ }
230
+
231
+ function runTrim(home, project, args = [], input = null, extraEnv = {}) {
232
+ return spawnSync(process.execPath, [join(REPO_ROOT, 'bin/throughline.mjs'), 'trim', ...args], {
233
+ cwd: project,
234
+ env: {
235
+ ...process.env,
236
+ HOME: home,
237
+ USERPROFILE: home,
238
+ ...extraEnv,
239
+ },
240
+ input,
241
+ encoding: 'utf8',
242
+ });
243
+ }
244
+
245
+ test('trim CLI prints JSON dry-run plan for latest project session', async () => {
246
+ const home = makeTempHome();
247
+ const project = makeTempProject();
248
+ try {
249
+ await seedDb(home, project);
250
+ const result = runTrim(home, project, ['--dry-run', '--host', 'claude', '--json']);
251
+
252
+ assert.equal(result.status, 0, result.stderr);
253
+ const plan = JSON.parse(result.stdout);
254
+ assert.equal(plan.session.id, 'sess-trim-cli');
255
+ assert.equal(plan.status, 'manual-only');
256
+ assert.equal(plan.trim.capturedTurns, 22);
257
+ assert.equal(plan.trim.rollbackTurns, 2);
258
+ assert.equal(plan.trim.automaticExecutionAllowed, false);
259
+ } finally {
260
+ rmSync(project, { recursive: true, force: true });
261
+ rmSync(home, { recursive: true, force: true });
262
+ }
263
+ });
264
+
265
+ test('trim CLI carries explicit Codex thread id in dry-run JSON', async () => {
266
+ const home = makeTempHome();
267
+ const project = makeTempProject();
268
+ try {
269
+ await seedDb(home, project);
270
+ const result = runTrim(home, project, [
271
+ '--dry-run',
272
+ '--host',
273
+ 'codex',
274
+ '--codex-thread-id',
275
+ '019dfabf-thread',
276
+ '--json',
277
+ ]);
278
+
279
+ assert.equal(result.status, 0, result.stderr);
280
+ const plan = JSON.parse(result.stdout);
281
+ assert.deepEqual(plan.hostIdentity, {
282
+ host: 'codex',
283
+ codexThreadId: '019dfabf-thread',
284
+ explicit: true,
285
+ reason: 'explicit_codex_thread_id',
286
+ });
287
+ assert.equal(plan.trim.automaticExecutionAllowed, true);
288
+ } finally {
289
+ rmSync(project, { recursive: true, force: true });
290
+ rmSync(home, { recursive: true, force: true });
291
+ }
292
+ });
293
+
294
+ test('trim CLI accepts --preview-max-chars for text dry-run reports', async () => {
295
+ const home = makeTempHome();
296
+ const project = makeTempProject();
297
+ try {
298
+ await seedDb(home, project);
299
+ const result = runTrim(home, project, [
300
+ '--dry-run',
301
+ '--host',
302
+ 'codex',
303
+ '--codex-thread-id',
304
+ '019dfabf-thread',
305
+ '--preview-max-chars',
306
+ '120',
307
+ ]);
308
+
309
+ assert.equal(result.status, 0, result.stderr);
310
+ assert.match(result.stdout, /\[preview truncated to 120 chars/);
311
+ assert.match(result.stdout, /throughline codex-handoff-start --session codex:019dfabf-thread/);
312
+ assert.match(result.stdout, /throughline codex-handoff-smoke --session codex:019dfabf-thread/);
313
+ assert.match(result.stdout, /throughline codex-handoff-model-smoke --session codex:019dfabf-thread --dry-run --json/);
314
+ assert.match(result.stdout, /throughline codex-resume --session codex:019dfabf-thread --format handoff/);
315
+ } finally {
316
+ rmSync(project, { recursive: true, force: true });
317
+ rmSync(home, { recursive: true, force: true });
318
+ }
319
+ });
320
+
321
+ test('trim CLI rejects invalid --preview-max-chars', () => {
322
+ const home = makeTempHome();
323
+ const project = makeTempProject();
324
+ try {
325
+ const result = runTrim(home, project, ['--dry-run', '--preview-max-chars', '0']);
326
+
327
+ assert.equal(result.status, 1);
328
+ assert.match(result.stderr, /--preview-max-chars must be a positive integer/);
329
+ } finally {
330
+ rmSync(project, { recursive: true, force: true });
331
+ rmSync(home, { recursive: true, force: true });
332
+ }
333
+ });
334
+
335
+ test('trim CLI uses explicit Codex rollout source when DB has no captured turns', async () => {
336
+ const home = makeTempHome();
337
+ const codexHome = makeTempHome();
338
+ const project = makeTempProject();
339
+ try {
340
+ await seedEmptyDb(home, project);
341
+ writeCodexRollout(codexHome, {
342
+ project,
343
+ threadId: '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9',
344
+ turnCount: 22,
345
+ });
346
+
347
+ const result = runTrim(
348
+ home,
349
+ project,
350
+ [
351
+ '--dry-run',
352
+ '--host',
353
+ 'codex',
354
+ '--codex-thread-id',
355
+ '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9',
356
+ '--json',
357
+ ],
358
+ null,
359
+ { CODEX_HOME: codexHome },
360
+ );
361
+
362
+ assert.equal(result.status, 0, result.stderr);
363
+ const plan = JSON.parse(result.stdout);
364
+ assert.equal(plan.session.id, 'sess-empty-codex');
365
+ assert.equal(plan.trim.source, 'codex-rollout');
366
+ assert.equal(plan.trim.sourceReason, 'explicit_codex_thread_rollout');
367
+ assert.equal(plan.trim.capturedTurns, 22);
368
+ assert.equal(plan.trim.rollbackTurns, 2);
369
+ assert.match(plan.memoryPreview.text, /Active Work Thread \(Codex Rollout\)/);
370
+ assert.match(plan.memoryPreview.text, /codex user turn 22/);
371
+ } finally {
372
+ rmSync(project, { recursive: true, force: true });
373
+ rmSync(home, { recursive: true, force: true });
374
+ rmSync(codexHome, { recursive: true, force: true });
375
+ }
376
+ });
377
+
378
+ test('trim CLI uses env Codex thread id when no explicit thread id is passed', async () => {
379
+ const home = makeTempHome();
380
+ const codexHome = makeTempHome();
381
+ const project = makeTempProject();
382
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
383
+ try {
384
+ await seedDb(home, project);
385
+ writeCodexRollout(codexHome, {
386
+ project,
387
+ threadId,
388
+ turnCount: 22,
389
+ });
390
+
391
+ const result = runTrim(
392
+ home,
393
+ project,
394
+ ['--dry-run', '--host', 'codex', '--json'],
395
+ null,
396
+ {
397
+ CODEX_HOME: codexHome,
398
+ THROUGHLINE_CODEX_THREAD_ID: threadId,
399
+ },
400
+ );
401
+
402
+ assert.equal(result.status, 0, result.stderr);
403
+ const plan = JSON.parse(result.stdout);
404
+ assert.deepEqual(plan.hostIdentity, {
405
+ host: 'codex',
406
+ codexThreadId: threadId,
407
+ explicit: false,
408
+ reason: 'env_codex_thread_id',
409
+ source: 'env:THROUGHLINE_CODEX_THREAD_ID',
410
+ });
411
+ assert.equal(plan.trim.source, 'codex-rollout');
412
+ assert.equal(plan.trim.sourceReason, 'env_codex_thread_rollout');
413
+ assert.equal(plan.trim.capturedTurns, 22);
414
+ } finally {
415
+ rmSync(project, { recursive: true, force: true });
416
+ rmSync(home, { recursive: true, force: true });
417
+ rmSync(codexHome, { recursive: true, force: true });
418
+ }
419
+ });
420
+
421
+ test('trim CLI explicit Codex thread id overrides env thread id', async () => {
422
+ const home = makeTempHome();
423
+ const codexHome = makeTempHome();
424
+ const project = makeTempProject();
425
+ const explicitThreadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
426
+ try {
427
+ await seedEmptyDb(home, project);
428
+ writeCodexRollout(codexHome, {
429
+ project,
430
+ threadId: explicitThreadId,
431
+ turnCount: 22,
432
+ });
433
+ writeCodexRollout(codexHome, {
434
+ project,
435
+ threadId: '019dfabb-1111-7111-8111-111111111111',
436
+ turnCount: 30,
437
+ });
438
+
439
+ const result = runTrim(
440
+ home,
441
+ project,
442
+ [
443
+ '--dry-run',
444
+ '--host',
445
+ 'codex',
446
+ '--codex-thread-id',
447
+ explicitThreadId,
448
+ '--json',
449
+ ],
450
+ null,
451
+ {
452
+ CODEX_HOME: codexHome,
453
+ THROUGHLINE_CODEX_THREAD_ID: '019dfabb-1111-7111-8111-111111111111',
454
+ },
455
+ );
456
+
457
+ assert.equal(result.status, 0, result.stderr);
458
+ const plan = JSON.parse(result.stdout);
459
+ assert.deepEqual(plan.hostIdentity, {
460
+ host: 'codex',
461
+ codexThreadId: explicitThreadId,
462
+ explicit: true,
463
+ reason: 'explicit_codex_thread_id',
464
+ });
465
+ assert.equal(plan.trim.capturedTurns, 22);
466
+ } finally {
467
+ rmSync(project, { recursive: true, force: true });
468
+ rmSync(home, { recursive: true, force: true });
469
+ rmSync(codexHome, { recursive: true, force: true });
470
+ }
471
+ });
472
+
473
+ test('trim CLI refuses Claude non-dry-run automatic rollback/inject', async () => {
474
+ const home = makeTempHome();
475
+ const project = makeTempProject();
476
+ try {
477
+ await seedDb(home, project);
478
+ const result = runTrim(home, project, ['--host', 'claude']);
479
+
480
+ assert.equal(result.status, 1);
481
+ assert.match(result.stderr, /automatic rollback\/inject is not implemented yet/);
482
+ } finally {
483
+ rmSync(project, { recursive: true, force: true });
484
+ rmSync(home, { recursive: true, force: true });
485
+ }
486
+ });
487
+
488
+ test('trim CLI guarded execute does not require experimental env once --execute is explicit', async () => {
489
+ const home = makeTempHome();
490
+ const project = makeTempProject();
491
+ try {
492
+ await seedDb(home, project);
493
+ const { script, log } = makeFakeCodexAppServer(project, { allowMutation: true });
494
+ const result = runTrim(home, project, [
495
+ '--host',
496
+ 'codex',
497
+ '--codex-thread-id',
498
+ '019dfabf-thread',
499
+ '--execute',
500
+ '--codex-app-server-bin',
501
+ script,
502
+ '--json',
503
+ ]);
504
+
505
+ assert.equal(result.status, 1);
506
+ const payload = JSON.parse(result.stdout);
507
+ assert.equal(payload.status, 'execute-sent-live-only');
508
+ assert.equal(payload.reason, 'rollback_and_inject_sent_live_only');
509
+ assert.equal(payload.execution.rollbackSent, true);
510
+ assert.equal(payload.execution.injectSent, true);
511
+ assert.equal(existsSync(log), true, 'app-server should start for explicit execute');
512
+ } finally {
513
+ rmSync(project, { recursive: true, force: true });
514
+ rmSync(home, { recursive: true, force: true });
515
+ }
516
+ });
517
+
518
+ test('trim CLI guarded execute no longer blocks on missing host same-thread repair contract', async () => {
519
+ const home = makeTempHome();
520
+ const project = makeTempProject();
521
+ try {
522
+ await seedDb(home, project);
523
+ const { script, log } = makeFakeCodexAppServer(project, {
524
+ allowMutation: true,
525
+ hostRemediationPrimitive: false,
526
+ });
527
+ const result = runTrim(
528
+ home,
529
+ project,
530
+ [
531
+ '--host',
532
+ 'codex',
533
+ '--codex-thread-id',
534
+ '019dfabf-thread',
535
+ '--execute',
536
+ '--codex-app-server-bin',
537
+ script,
538
+ '--json',
539
+ ],
540
+ null,
541
+ { THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1' },
542
+ );
543
+
544
+ assert.equal(result.status, 1);
545
+ const payload = JSON.parse(result.stdout);
546
+ assert.equal(payload.status, 'execute-sent-live-only');
547
+ assert.equal(payload.reason, 'rollback_and_inject_sent_live_only');
548
+ assert.equal(payload.execution.rollbackSent, true);
549
+ assert.equal(payload.execution.injectSent, true);
550
+ assert.equal(existsSync(log), true, 'app-server mutation path should start without host repair primitive');
551
+ } finally {
552
+ rmSync(project, { recursive: true, force: true });
553
+ rmSync(home, { recursive: true, force: true });
554
+ }
555
+ });
556
+
557
+ test('trim CLI guarded execute does not require resume history as current-thread repair', async () => {
558
+ const home = makeTempHome();
559
+ const project = makeTempProject();
560
+ try {
561
+ await seedDb(home, project);
562
+ const { script, log } = makeFakeCodexAppServer(project, {
563
+ allowMutation: true,
564
+ hostRemediationPrimitive: false,
565
+ hostResumeHistoryCandidate: true,
566
+ });
567
+ const result = runTrim(
568
+ home,
569
+ project,
570
+ [
571
+ '--host',
572
+ 'codex',
573
+ '--codex-thread-id',
574
+ '019dfabf-thread',
575
+ '--execute',
576
+ '--codex-app-server-bin',
577
+ script,
578
+ '--json',
579
+ ],
580
+ null,
581
+ { THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1' },
582
+ );
583
+
584
+ assert.equal(result.status, 1);
585
+ const payload = JSON.parse(result.stdout);
586
+ assert.equal(payload.status, 'execute-sent-live-only');
587
+ assert.equal(payload.reason, 'rollback_and_inject_sent_live_only');
588
+ assert.equal(payload.execution.rollbackSent, true);
589
+ assert.equal(payload.execution.injectSent, true);
590
+ assert.equal(existsSync(log), true, 'app-server mutation path should start without resume-history repair');
591
+ } finally {
592
+ rmSync(project, { recursive: true, force: true });
593
+ rmSync(home, { recursive: true, force: true });
594
+ }
595
+ });
596
+
597
+ test('trim CLI preflight reads and resumes Codex thread without rollback or inject', async () => {
598
+ const home = makeTempHome();
599
+ const project = makeTempProject();
600
+ try {
601
+ await seedDb(home, project);
602
+ const { script, log } = makeFakeCodexAppServer(project);
603
+ const result = runTrim(home, project, [
604
+ '--host',
605
+ 'codex',
606
+ '--codex-thread-id',
607
+ '019dfabf-thread',
608
+ '--preflight',
609
+ '--codex-app-server-bin',
610
+ script,
611
+ '--json',
612
+ ]);
613
+
614
+ assert.equal(result.status, 0, result.stderr);
615
+ const payload = JSON.parse(result.stdout);
616
+ assert.equal(payload.status, 'preflight-ready');
617
+ assert.equal(payload.preflight.rollbackSent, false);
618
+ assert.equal(payload.preflight.injectSent, false);
619
+ assert.equal(payload.preflight.readTurns, 2);
620
+ assert.equal(payload.preflight.resumedTurns, 2);
621
+ assert.equal(payload.preflight.rollbackRequestPreview.method, 'thread/rollback');
622
+ assert.equal(payload.preflight.rollbackRequestPreview.params.numTurns, 2);
623
+
624
+ const calledMethods = readFileSync(log, 'utf8');
625
+ assert.match(calledMethods, /initialize/);
626
+ assert.match(calledMethods, /thread\/read/);
627
+ assert.match(calledMethods, /thread\/resume/);
628
+ assert.doesNotMatch(calledMethods, /UNEXPECTED_MUTATION/);
629
+ } finally {
630
+ rmSync(project, { recursive: true, force: true });
631
+ rmSync(home, { recursive: true, force: true });
632
+ }
633
+ });
634
+
635
+ test('trim CLI preflight checks Codex rollout source against app-server turns', async () => {
636
+ const home = makeTempHome();
637
+ const codexHome = makeTempHome();
638
+ const project = makeTempProject();
639
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
640
+ try {
641
+ await seedDb(home, project);
642
+ writeCodexRollout(codexHome, {
643
+ project,
644
+ threadId,
645
+ turnCount: 22,
646
+ });
647
+ const { script } = makeFakeCodexAppServer(project, { threadId, turnCount: 22 });
648
+ const result = runTrim(
649
+ home,
650
+ project,
651
+ [
652
+ '--host',
653
+ 'codex',
654
+ '--codex-thread-id',
655
+ threadId,
656
+ '--preflight',
657
+ '--codex-app-server-bin',
658
+ script,
659
+ '--json',
660
+ ],
661
+ null,
662
+ { CODEX_HOME: codexHome },
663
+ );
664
+
665
+ assert.equal(result.status, 0, result.stderr);
666
+ const payload = JSON.parse(result.stdout);
667
+ assert.equal(payload.status, 'preflight-ready');
668
+ assert.deepEqual(payload.preflight.turnCountCheck, {
669
+ status: 'match',
670
+ reason: 'rollout_and_app_server_turn_counts_match',
671
+ expectedTurns: 22,
672
+ readTurns: 22,
673
+ resumedTurns: 22,
674
+ });
675
+ } finally {
676
+ rmSync(project, { recursive: true, force: true });
677
+ rmSync(home, { recursive: true, force: true });
678
+ rmSync(codexHome, { recursive: true, force: true });
679
+ }
680
+ });
681
+
682
+ test('trim CLI preflight accepts env Codex thread id', async () => {
683
+ const home = makeTempHome();
684
+ const codexHome = makeTempHome();
685
+ const project = makeTempProject();
686
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
687
+ try {
688
+ await seedEmptyDb(home, project);
689
+ writeCodexRollout(codexHome, {
690
+ project,
691
+ threadId,
692
+ turnCount: 22,
693
+ });
694
+ const { script } = makeFakeCodexAppServer(project, { threadId, turnCount: 22 });
695
+ const result = runTrim(
696
+ home,
697
+ project,
698
+ [
699
+ '--host',
700
+ 'codex',
701
+ '--preflight',
702
+ '--codex-app-server-bin',
703
+ script,
704
+ '--json',
705
+ ],
706
+ null,
707
+ {
708
+ CODEX_HOME: codexHome,
709
+ CODEX_THREAD_ID: threadId,
710
+ },
711
+ );
712
+
713
+ assert.equal(result.status, 0, result.stderr);
714
+ const payload = JSON.parse(result.stdout);
715
+ assert.equal(payload.status, 'preflight-ready');
716
+ assert.equal(payload.plan.hostIdentity.reason, 'env_codex_thread_id');
717
+ assert.equal(payload.plan.hostIdentity.source, 'env:CODEX_THREAD_ID');
718
+ assert.equal(payload.preflight.threadId, threadId);
719
+ assert.equal(payload.preflight.turnCountCheck.status, 'match');
720
+ } finally {
721
+ rmSync(project, { recursive: true, force: true });
722
+ rmSync(home, { recursive: true, force: true });
723
+ rmSync(codexHome, { recursive: true, force: true });
724
+ }
725
+ });
726
+
727
+ test('trim CLI preflight proceeds when rollout restore safety is risky', async () => {
728
+ const home = makeTempHome();
729
+ const codexHome = makeTempHome();
730
+ const project = makeTempProject();
731
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
732
+ try {
733
+ await seedDb(home, project);
734
+ writeCodexRollout(codexHome, {
735
+ project,
736
+ threadId,
737
+ turnCount: 22,
738
+ restoreRisk: true,
739
+ });
740
+ const { script, log } = makeFakeCodexAppServer(project, {
741
+ threadId,
742
+ turnCount: 22,
743
+ });
744
+ const result = runTrim(
745
+ home,
746
+ project,
747
+ [
748
+ '--host',
749
+ 'codex',
750
+ '--codex-thread-id',
751
+ threadId,
752
+ '--preflight',
753
+ '--codex-app-server-bin',
754
+ script,
755
+ '--json',
756
+ ],
757
+ null,
758
+ {
759
+ CODEX_HOME: codexHome,
760
+ },
761
+ );
762
+
763
+ assert.equal(result.status, 0, result.stderr);
764
+ const payload = JSON.parse(result.stdout);
765
+ assert.equal(payload.status, 'preflight-ready');
766
+ assert.equal(payload.plan.trim.restoreSafety.status, 'risk');
767
+ assert.equal(payload.preflight.rollbackSent, false);
768
+ assert.equal(payload.preflight.injectSent, false);
769
+ assert.equal(existsSync(log), true, 'app-server should start despite restore-safety diagnostics');
770
+ } finally {
771
+ rmSync(project, { recursive: true, force: true });
772
+ rmSync(home, { recursive: true, force: true });
773
+ rmSync(codexHome, { recursive: true, force: true });
774
+ }
775
+ });
776
+
777
+ test('trim CLI preflight proceeds when compacted history already retains target text', async () => {
778
+ const home = makeTempHome();
779
+ const codexHome = makeTempHome();
780
+ const project = makeTempProject();
781
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
782
+ try {
783
+ await seedDb(home, project);
784
+ writeCodexRollout(codexHome, {
785
+ project,
786
+ threadId,
787
+ turnCount: 22,
788
+ retainedCompactedText: true,
789
+ });
790
+ const { script, log } = makeFakeCodexAppServer(project, {
791
+ threadId,
792
+ turnCount: 22,
793
+ });
794
+ const result = runTrim(
795
+ home,
796
+ project,
797
+ [
798
+ '--host',
799
+ 'codex',
800
+ '--codex-thread-id',
801
+ threadId,
802
+ '--preflight',
803
+ '--codex-app-server-bin',
804
+ script,
805
+ '--json',
806
+ ],
807
+ null,
808
+ {
809
+ CODEX_HOME: codexHome,
810
+ },
811
+ );
812
+
813
+ assert.equal(result.status, 0, result.stderr);
814
+ const payload = JSON.parse(result.stdout);
815
+ assert.equal(payload.status, 'preflight-ready');
816
+ assert.equal(payload.plan.trim.restoreSafety.status, 'ok');
817
+ assert.equal(payload.plan.trim.plannedRollbackRestoreSafety.status, 'risk');
818
+ assert.equal(
819
+ payload.plan.trim.plannedRollbackRestoreSafety.risks[0].type,
820
+ 'planned_rollback_text_retained_in_compacted_replacement_history',
821
+ );
822
+ assert.equal(payload.preflight.rollbackSent, false);
823
+ assert.equal(payload.preflight.injectSent, false);
824
+ assert.equal(existsSync(log), true, 'app-server should start despite planned restore-safety diagnostics');
825
+ } finally {
826
+ rmSync(project, { recursive: true, force: true });
827
+ rmSync(home, { recursive: true, force: true });
828
+ rmSync(codexHome, { recursive: true, force: true });
829
+ }
830
+ });
831
+
832
+ test('trim CLI execute refuses before rollback when rollout and app-server turn counts differ', async () => {
833
+ const home = makeTempHome();
834
+ const codexHome = makeTempHome();
835
+ const project = makeTempProject();
836
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
837
+ try {
838
+ await seedDb(home, project);
839
+ writeCodexRollout(codexHome, {
840
+ project,
841
+ threadId,
842
+ turnCount: 22,
843
+ });
844
+ const { script, log } = makeFakeCodexAppServer(project, {
845
+ allowMutation: true,
846
+ threadId,
847
+ turnCount: 21,
848
+ });
849
+ const result = runTrim(
850
+ home,
851
+ project,
852
+ [
853
+ '--host',
854
+ 'codex',
855
+ '--codex-thread-id',
856
+ threadId,
857
+ '--keep-recent',
858
+ '20',
859
+ '--execute',
860
+ '--codex-app-server-bin',
861
+ script,
862
+ '--json',
863
+ ],
864
+ null,
865
+ { CODEX_HOME: codexHome, THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1' },
866
+ );
867
+
868
+ assert.equal(result.status, 1);
869
+ const payload = JSON.parse(result.stdout);
870
+ assert.equal(payload.status, 'execute-refused');
871
+ assert.equal(payload.reason, 'codex_rollout_app_server_turn_mismatch');
872
+ assert.equal(payload.execution.rollbackSent, false);
873
+ assert.equal(payload.execution.injectSent, false);
874
+ assert.equal(payload.execution.turnCountCheck.status, 'mismatch');
875
+
876
+ const calledMethods = readFileSync(log, 'utf8');
877
+ assert.match(calledMethods, /thread\/read/);
878
+ assert.match(calledMethods, /thread\/resume/);
879
+ assert.doesNotMatch(calledMethods, /thread\/rollback/);
880
+ assert.doesNotMatch(calledMethods, /thread\/inject_items/);
881
+ } finally {
882
+ rmSync(project, { recursive: true, force: true });
883
+ rmSync(home, { recursive: true, force: true });
884
+ rmSync(codexHome, { recursive: true, force: true });
885
+ }
886
+ });
887
+
888
+ test('trim CLI execute refuses rollout preview injection when Throughline DB memory is absent', async () => {
889
+ const home = makeTempHome();
890
+ const codexHome = makeTempHome();
891
+ const project = makeTempProject();
892
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
893
+ try {
894
+ await seedEmptyDb(home, project);
895
+ writeCodexRollout(codexHome, {
896
+ project,
897
+ threadId,
898
+ turnCount: 22,
899
+ });
900
+ const { script, log } = makeFakeCodexAppServer(project, {
901
+ allowMutation: true,
902
+ threadId,
903
+ turnCount: 22,
904
+ });
905
+ const result = runTrim(
906
+ home,
907
+ project,
908
+ [
909
+ '--host',
910
+ 'codex',
911
+ '--codex-thread-id',
912
+ threadId,
913
+ '--execute',
914
+ '--codex-app-server-bin',
915
+ script,
916
+ '--json',
917
+ ],
918
+ null,
919
+ { CODEX_HOME: codexHome, THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1' },
920
+ );
921
+
922
+ assert.equal(result.status, 1);
923
+ const payload = JSON.parse(result.stdout);
924
+ assert.equal(payload.status, 'execute-refused');
925
+ assert.equal(payload.reason, 'injectable_memory_required');
926
+ assert.equal(payload.plan.memoryPreview.stats.source, 'codex-rollout');
927
+
928
+ assert.equal(existsSync(log), false, 'app-server should not start without DB injectable memory');
929
+ } finally {
930
+ rmSync(project, { recursive: true, force: true });
931
+ rmSync(home, { recursive: true, force: true });
932
+ rmSync(codexHome, { recursive: true, force: true });
933
+ }
934
+ });
935
+
936
+ test('trim CLI execute proceeds when rollout restore safety is risky', async () => {
937
+ const home = makeTempHome();
938
+ const codexHome = makeTempHome();
939
+ const project = makeTempProject();
940
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
941
+ try {
942
+ await seedDb(home, project);
943
+ writeCodexRollout(codexHome, {
944
+ project,
945
+ threadId,
946
+ turnCount: 22,
947
+ restoreRisk: true,
948
+ });
949
+ const { script, log } = makeFakeCodexAppServer(project, {
950
+ allowMutation: true,
951
+ threadId,
952
+ turnCount: 22,
953
+ });
954
+ const result = runTrim(
955
+ home,
956
+ project,
957
+ [
958
+ '--host',
959
+ 'codex',
960
+ '--codex-thread-id',
961
+ threadId,
962
+ '--execute',
963
+ '--codex-app-server-bin',
964
+ script,
965
+ '--json',
966
+ ],
967
+ null,
968
+ {
969
+ CODEX_HOME: codexHome,
970
+ THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1',
971
+ },
972
+ );
973
+
974
+ assert.equal(result.status, 1);
975
+ const payload = JSON.parse(result.stdout);
976
+ assert.equal(payload.status, 'execute-unverified');
977
+ assert.equal(payload.reason, 'rollback_marker_not_observed_in_rollout');
978
+ assert.equal(payload.plan.trim.restoreSafety.status, 'risk');
979
+ assert.equal(payload.execution.rollbackSent, true);
980
+ assert.equal(payload.execution.injectSent, true);
981
+ assert.equal(existsSync(log), true, 'app-server should start despite restore-safety diagnostics');
982
+ } finally {
983
+ rmSync(project, { recursive: true, force: true });
984
+ rmSync(home, { recursive: true, force: true });
985
+ rmSync(codexHome, { recursive: true, force: true });
986
+ }
987
+ });
988
+
989
+ test('trim CLI guarded execute rolls back then injects curated memory', async () => {
990
+ const home = makeTempHome();
991
+ const project = makeTempProject();
992
+ try {
993
+ await seedDb(home, project);
994
+ const { script, log } = makeFakeCodexAppServer(project, { allowMutation: true });
995
+ const result = runTrim(
996
+ home,
997
+ project,
998
+ [
999
+ '--host',
1000
+ 'codex',
1001
+ '--codex-thread-id',
1002
+ '019dfabf-thread',
1003
+ '--execute',
1004
+ '--codex-app-server-bin',
1005
+ script,
1006
+ '--json',
1007
+ ],
1008
+ null,
1009
+ { THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1' },
1010
+ );
1011
+
1012
+ assert.equal(result.status, 1);
1013
+ const payload = JSON.parse(result.stdout);
1014
+ assert.equal(payload.status, 'execute-sent-live-only');
1015
+ assert.equal(payload.reason, 'rollback_and_inject_sent_live_only');
1016
+ assert.deepEqual(payload.durableVerification, {
1017
+ liveMutationSent: true,
1018
+ durableVerified: false,
1019
+ postInjectVisibilityStatus: 'match',
1020
+ restoreSafetyStatus: 'unknown',
1021
+ rolloutPath: null,
1022
+ rolloutChecked: false,
1023
+ postExecuteRestoreSafetyStatus: null,
1024
+ observedNewRollbackEvent: false,
1025
+ observedInjectedMemory: false,
1026
+ reasons: ['rollout_path_unavailable_for_durable_verification'],
1027
+ });
1028
+ assert.equal(payload.execution.rollbackSent, true);
1029
+ assert.equal(payload.execution.injectSent, true);
1030
+ assert.equal(payload.execution.injectedItems, 1);
1031
+ assert.equal(payload.execution.afterTurns, 1);
1032
+ assert.equal(payload.execution.postInjectReadAttempts, 1);
1033
+ assert.equal(payload.execution.postInjectVisibilityCheck.status, 'match');
1034
+ assert.equal(payload.execution.postInjectVisibilityCheck.expectedTurns, 1);
1035
+ assert.equal(payload.plan.mode, 'execute');
1036
+
1037
+ const calledMethods = readFileSync(log, 'utf8');
1038
+ assertInOrder(calledMethods, [
1039
+ 'initialize\n',
1040
+ 'thread/read\n',
1041
+ 'thread/resume\n',
1042
+ 'thread/rollback\n',
1043
+ 'thread/inject_items\n',
1044
+ 'thread/read\n',
1045
+ ]);
1046
+ assert.match(calledMethods, /INJECT_TEXT:## Throughline: Active Work Context/);
1047
+ assert.match(calledMethods, /Active Work Thread \(Recent L2\)/);
1048
+ assert.doesNotMatch(calledMethods, /UNEXPECTED_MUTATION/);
1049
+ } finally {
1050
+ rmSync(project, { recursive: true, force: true });
1051
+ rmSync(home, { recursive: true, force: true });
1052
+ }
1053
+ });
1054
+
1055
+ test('trim CLI guarded execute reports durable verified when rollout records rollback and injected memory', async () => {
1056
+ const home = makeTempHome();
1057
+ const codexHome = makeTempHome();
1058
+ const project = makeTempProject();
1059
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
1060
+ try {
1061
+ await seedDb(home, project);
1062
+ const rolloutPath = writeCodexRollout(codexHome, {
1063
+ project,
1064
+ threadId,
1065
+ turnCount: 22,
1066
+ });
1067
+ const { script } = makeFakeCodexAppServer(project, {
1068
+ allowMutation: true,
1069
+ threadId,
1070
+ turnCount: 22,
1071
+ durableRolloutPath: rolloutPath,
1072
+ });
1073
+ const result = runTrim(
1074
+ home,
1075
+ project,
1076
+ [
1077
+ '--host',
1078
+ 'codex',
1079
+ '--codex-thread-id',
1080
+ threadId,
1081
+ '--execute',
1082
+ '--codex-app-server-bin',
1083
+ script,
1084
+ '--json',
1085
+ ],
1086
+ null,
1087
+ {
1088
+ CODEX_HOME: codexHome,
1089
+ THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1',
1090
+ },
1091
+ );
1092
+
1093
+ assert.equal(result.status, 0, result.stderr);
1094
+ const payload = JSON.parse(result.stdout);
1095
+ assert.equal(payload.status, 'execute-durable-verified');
1096
+ assert.equal(payload.reason, 'rollback_and_inject_durable_verified');
1097
+ assert.equal(payload.durableVerification.durableVerified, true);
1098
+ assert.equal(payload.durableVerification.rolloutPath, rolloutPath);
1099
+ assert.equal(payload.durableVerification.rolloutChecked, true);
1100
+ assert.equal(payload.durableVerification.postExecuteRestoreSafetyStatus, 'ok');
1101
+ assert.equal(payload.durableVerification.observedNewRollbackEvent, true);
1102
+ assert.equal(payload.durableVerification.observedInjectedMemory, true);
1103
+ assert.deepEqual(payload.durableVerification.reasons, ['rollout_durable_evidence_verified']);
1104
+ } finally {
1105
+ rmSync(project, { recursive: true, force: true });
1106
+ rmSync(home, { recursive: true, force: true });
1107
+ rmSync(codexHome, { recursive: true, force: true });
1108
+ }
1109
+ });
1110
+
1111
+ test('trim CLI guarded execute treats developer memory injection as item-level when it creates no turn', async () => {
1112
+ const home = makeTempHome();
1113
+ const codexHome = makeTempHome();
1114
+ const project = makeTempProject();
1115
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
1116
+ try {
1117
+ await seedDb(home, project);
1118
+ const rolloutPath = writeCodexRollout(codexHome, {
1119
+ project,
1120
+ threadId,
1121
+ turnCount: 22,
1122
+ });
1123
+ const { script } = makeFakeCodexAppServer(project, {
1124
+ allowMutation: true,
1125
+ threadId,
1126
+ turnCount: 22,
1127
+ durableRolloutPath: rolloutPath,
1128
+ injectCreatesTurn: false,
1129
+ injectResponseIncludesTurns: false,
1130
+ });
1131
+ const result = runTrim(
1132
+ home,
1133
+ project,
1134
+ [
1135
+ '--host',
1136
+ 'codex',
1137
+ '--codex-thread-id',
1138
+ threadId,
1139
+ '--execute',
1140
+ '--codex-app-server-bin',
1141
+ script,
1142
+ '--json',
1143
+ ],
1144
+ null,
1145
+ {
1146
+ CODEX_HOME: codexHome,
1147
+ THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1',
1148
+ },
1149
+ );
1150
+
1151
+ assert.equal(result.status, 0, result.stderr);
1152
+ const payload = JSON.parse(result.stdout);
1153
+ assert.equal(payload.status, 'execute-durable-verified');
1154
+ assert.equal(payload.execution.rollbackResultTurns, 20);
1155
+ assert.equal(payload.execution.injectResultTurns, null);
1156
+ assert.deepEqual(payload.execution.postInjectVisibilityCheck, {
1157
+ status: 'match',
1158
+ reason: 'post_inject_turn_count_visible',
1159
+ expectedTurns: 20,
1160
+ actualTurns: 20,
1161
+ });
1162
+ assert.equal(payload.durableVerification.observedInjectedMemory, true);
1163
+ assert.deepEqual(payload.durableVerification.reasons, ['rollout_durable_evidence_verified']);
1164
+ } finally {
1165
+ rmSync(project, { recursive: true, force: true });
1166
+ rmSync(home, { recursive: true, force: true });
1167
+ rmSync(codexHome, { recursive: true, force: true });
1168
+ }
1169
+ });
1170
+
1171
+ test('trim CLI guarded execute polls rollout until delayed durable evidence appears', async () => {
1172
+ const home = makeTempHome();
1173
+ const codexHome = makeTempHome();
1174
+ const project = makeTempProject();
1175
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
1176
+ try {
1177
+ await seedDb(home, project);
1178
+ const rolloutPath = writeCodexRollout(codexHome, {
1179
+ project,
1180
+ threadId,
1181
+ turnCount: 22,
1182
+ });
1183
+ const { script } = makeFakeCodexAppServer(project, {
1184
+ allowMutation: true,
1185
+ threadId,
1186
+ turnCount: 22,
1187
+ durableRolloutPath: rolloutPath,
1188
+ durableRolloutAppendDelayMs: 50,
1189
+ });
1190
+ const result = runTrim(
1191
+ home,
1192
+ project,
1193
+ [
1194
+ '--host',
1195
+ 'codex',
1196
+ '--codex-thread-id',
1197
+ threadId,
1198
+ '--execute',
1199
+ '--codex-app-server-bin',
1200
+ script,
1201
+ '--json',
1202
+ ],
1203
+ null,
1204
+ {
1205
+ CODEX_HOME: codexHome,
1206
+ THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1',
1207
+ },
1208
+ );
1209
+
1210
+ assert.equal(result.status, 0, result.stderr);
1211
+ const payload = JSON.parse(result.stdout);
1212
+ assert.equal(payload.status, 'execute-durable-verified');
1213
+ assert.equal(payload.durableVerification.durableVerified, true);
1214
+ assert.equal(payload.durableVerification.observedNewRollbackEvent, true);
1215
+ assert.equal(payload.durableVerification.observedInjectedMemory, true);
1216
+ } finally {
1217
+ rmSync(project, { recursive: true, force: true });
1218
+ rmSync(home, { recursive: true, force: true });
1219
+ rmSync(codexHome, { recursive: true, force: true });
1220
+ }
1221
+ });
1222
+
1223
+ test('trim CLI execute report says L3 bodies are not injected', async () => {
1224
+ const home = makeTempHome();
1225
+ const project = makeTempProject();
1226
+ try {
1227
+ await seedDb(home, project);
1228
+ const { script } = makeFakeCodexAppServer(project, { allowMutation: true });
1229
+ const result = runTrim(
1230
+ home,
1231
+ project,
1232
+ [
1233
+ '--host',
1234
+ 'codex',
1235
+ '--codex-thread-id',
1236
+ '019dfabf-thread',
1237
+ '--execute',
1238
+ '--codex-app-server-bin',
1239
+ script,
1240
+ ],
1241
+ null,
1242
+ { THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1' },
1243
+ );
1244
+
1245
+ assert.equal(result.status, 1);
1246
+ assert.match(result.stdout, /Status: execute-sent-live-only/);
1247
+ assert.match(result.stdout, /Durable verified: no/);
1248
+ assert.match(result.stdout, /Injected items: 1/);
1249
+ assert.match(result.stdout, /Injected memory source: throughline-db/);
1250
+ assert.match(
1251
+ result.stdout,
1252
+ /Memory contract: older L1 \+ latest 20 L2 full bodies \+ L3 references only/,
1253
+ );
1254
+ assert.match(result.stdout, /Recent L2 bodies: 20 rows \(latest 20 turns\)/);
1255
+ assert.match(result.stdout, /L3 bodies injected: no \(references only: 0\)/);
1256
+ } finally {
1257
+ rmSync(project, { recursive: true, force: true });
1258
+ rmSync(home, { recursive: true, force: true });
1259
+ }
1260
+ });
1261
+
1262
+ test('trim CLI guarded execute waits until injected Codex memory is visible', async () => {
1263
+ const home = makeTempHome();
1264
+ const project = makeTempProject();
1265
+ try {
1266
+ await seedDb(home, project);
1267
+ const { script, log } = makeFakeCodexAppServer(project, {
1268
+ allowMutation: true,
1269
+ delayedInjectVisibilityReads: 1,
1270
+ injectResponseAdvertisesPendingTurn: true,
1271
+ });
1272
+ const result = runTrim(
1273
+ home,
1274
+ project,
1275
+ [
1276
+ '--host',
1277
+ 'codex',
1278
+ '--codex-thread-id',
1279
+ '019dfabf-thread',
1280
+ '--execute',
1281
+ '--codex-app-server-bin',
1282
+ script,
1283
+ '--json',
1284
+ ],
1285
+ null,
1286
+ { THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1' },
1287
+ );
1288
+
1289
+ assert.equal(result.status, 1);
1290
+ const payload = JSON.parse(result.stdout);
1291
+ assert.equal(payload.status, 'execute-sent-live-only');
1292
+ assert.equal(payload.execution.afterTurns, 1);
1293
+ assert.equal(payload.execution.postInjectReadAttempts, 2);
1294
+ assert.deepEqual(payload.execution.postInjectVisibilityCheck, {
1295
+ status: 'match',
1296
+ reason: 'post_inject_turn_count_visible',
1297
+ expectedTurns: 1,
1298
+ actualTurns: 1,
1299
+ });
1300
+
1301
+ const calledMethods = readFileSync(log, 'utf8');
1302
+ assertInOrder(calledMethods, [
1303
+ 'initialize\n',
1304
+ 'thread/read\n',
1305
+ 'thread/resume\n',
1306
+ 'thread/rollback\n',
1307
+ 'thread/inject_items\n',
1308
+ 'thread/read\n',
1309
+ 'thread/read\n',
1310
+ ]);
1311
+ assert.equal([...calledMethods.matchAll(/^thread\/read$/gm)].length, 3);
1312
+ assert.match(calledMethods, /INJECT_TEXT:## Throughline: Active Work Context/);
1313
+ assert.doesNotMatch(calledMethods, /UNEXPECTED_MUTATION/);
1314
+ } finally {
1315
+ rmSync(project, { recursive: true, force: true });
1316
+ rmSync(home, { recursive: true, force: true });
1317
+ }
1318
+ });
1319
+
1320
+ test('trim CLI guarded execute reports unverified when injected memory visibility times out', async () => {
1321
+ const home = makeTempHome();
1322
+ const project = makeTempProject();
1323
+ try {
1324
+ await seedDb(home, project);
1325
+ const { script, log } = makeFakeCodexAppServer(project, {
1326
+ allowMutation: true,
1327
+ delayedInjectVisibilityReads: 10,
1328
+ injectResponseAdvertisesPendingTurn: true,
1329
+ });
1330
+ const result = runTrim(
1331
+ home,
1332
+ project,
1333
+ [
1334
+ '--host',
1335
+ 'codex',
1336
+ '--codex-thread-id',
1337
+ '019dfabf-thread',
1338
+ '--execute',
1339
+ '--codex-app-server-bin',
1340
+ script,
1341
+ '--json',
1342
+ ],
1343
+ null,
1344
+ { THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1' },
1345
+ );
1346
+
1347
+ assert.equal(result.status, 1);
1348
+ const payload = JSON.parse(result.stdout);
1349
+ assert.equal(payload.status, 'execute-unverified');
1350
+ assert.equal(payload.reason, 'post_inject_turn_count_not_visible_after_reads');
1351
+ assert.deepEqual(payload.durableVerification, {
1352
+ liveMutationSent: true,
1353
+ durableVerified: false,
1354
+ postInjectVisibilityStatus: 'timeout',
1355
+ restoreSafetyStatus: 'unknown',
1356
+ rolloutPath: null,
1357
+ rolloutChecked: false,
1358
+ postExecuteRestoreSafetyStatus: null,
1359
+ observedNewRollbackEvent: false,
1360
+ observedInjectedMemory: false,
1361
+ reasons: [
1362
+ 'post_inject_turn_count_not_visible_after_reads',
1363
+ 'rollout_path_unavailable_for_durable_verification',
1364
+ ],
1365
+ });
1366
+ assert.equal(payload.execution.postInjectVisibilityCheck.status, 'timeout');
1367
+
1368
+ const calledMethods = readFileSync(log, 'utf8');
1369
+ assert.match(calledMethods, /thread\/rollback/);
1370
+ assert.match(calledMethods, /thread\/inject_items/);
1371
+ } finally {
1372
+ rmSync(project, { recursive: true, force: true });
1373
+ rmSync(home, { recursive: true, force: true });
1374
+ }
1375
+ });
1376
+
1377
+ test('trim CLI execute checks durable rollout evidence even when post-inject visibility times out', async () => {
1378
+ const home = makeTempHome();
1379
+ const codexHome = makeTempHome();
1380
+ const project = makeTempProject();
1381
+ const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
1382
+ try {
1383
+ await seedDb(home, project);
1384
+ const rolloutPath = writeCodexRollout(codexHome, {
1385
+ project,
1386
+ threadId,
1387
+ turnCount: 22,
1388
+ });
1389
+ const { script } = makeFakeCodexAppServer(project, {
1390
+ allowMutation: true,
1391
+ threadId,
1392
+ turnCount: 22,
1393
+ durableRolloutPath: rolloutPath,
1394
+ delayedInjectVisibilityReads: 10,
1395
+ injectResponseAdvertisesPendingTurn: true,
1396
+ });
1397
+ const result = runTrim(
1398
+ home,
1399
+ project,
1400
+ [
1401
+ '--host',
1402
+ 'codex',
1403
+ '--codex-thread-id',
1404
+ threadId,
1405
+ '--execute',
1406
+ '--codex-app-server-bin',
1407
+ script,
1408
+ '--json',
1409
+ ],
1410
+ null,
1411
+ {
1412
+ CODEX_HOME: codexHome,
1413
+ THROUGHLINE_EXPERIMENTAL_CODEX_TRIM_EXECUTE: '1',
1414
+ },
1415
+ );
1416
+
1417
+ assert.equal(result.status, 1);
1418
+ const payload = JSON.parse(result.stdout);
1419
+ assert.equal(payload.status, 'execute-unverified');
1420
+ assert.equal(payload.reason, 'post_inject_turn_count_not_visible_after_reads');
1421
+ assert.equal(payload.durableVerification.postInjectVisibilityStatus, 'timeout');
1422
+ assert.equal(payload.durableVerification.rolloutPath, rolloutPath);
1423
+ assert.equal(payload.durableVerification.rolloutChecked, true);
1424
+ assert.equal(payload.durableVerification.postExecuteRestoreSafetyStatus, 'ok');
1425
+ assert.equal(payload.durableVerification.observedNewRollbackEvent, true);
1426
+ assert.equal(payload.durableVerification.observedInjectedMemory, true);
1427
+ assert.deepEqual(payload.durableVerification.reasons, ['post_inject_turn_count_not_visible_after_reads']);
1428
+ } finally {
1429
+ rmSync(project, { recursive: true, force: true });
1430
+ rmSync(home, { recursive: true, force: true });
1431
+ rmSync(codexHome, { recursive: true, force: true });
1432
+ }
1433
+ });
1434
+
1435
+ function assertInOrder(text, needles) {
1436
+ let offset = 0;
1437
+ for (const needle of needles) {
1438
+ const index = text.indexOf(needle, offset);
1439
+ assert.notEqual(index, -1, `missing ${needle.trim()} after offset ${offset}`);
1440
+ offset = index + needle.length;
1441
+ }
1442
+ }
1443
+
1444
+ function writeCodexRollout(
1445
+ codexHome,
1446
+ { project, threadId, turnCount, restoreRisk = false, retainedCompactedText = false },
1447
+ ) {
1448
+ const dir = join(codexHome, 'sessions', '2026', '05', '06');
1449
+ mkdirSync(dir, { recursive: true });
1450
+ const path = join(dir, `rollout-2026-05-06T09-40-50-${threadId}.jsonl`);
1451
+ const rows = [
1452
+ {
1453
+ timestamp: '2026-05-06T00:40:50.000Z',
1454
+ type: 'session_meta',
1455
+ payload: {
1456
+ id: threadId,
1457
+ timestamp: '2026-05-06T00:40:50.000Z',
1458
+ cwd: project,
1459
+ source: 'vscode',
1460
+ cli_version: '0.128.0-alpha.1',
1461
+ },
1462
+ },
1463
+ ];
1464
+
1465
+ for (let turn = 1; turn <= turnCount; turn++) {
1466
+ rows.push({
1467
+ timestamp: `2026-05-06T00:41:${String(turn).padStart(2, '0')}.000Z`,
1468
+ type: 'event_msg',
1469
+ payload: {
1470
+ type: 'user_message',
1471
+ message: `codex user turn ${turn}`,
1472
+ },
1473
+ });
1474
+ rows.push({
1475
+ timestamp: `2026-05-06T00:41:${String(turn).padStart(2, '0')}.100Z`,
1476
+ type: 'event_msg',
1477
+ payload: { type: 'task_started' },
1478
+ });
1479
+ rows.push({
1480
+ timestamp: `2026-05-06T00:41:${String(turn).padStart(2, '0')}.200Z`,
1481
+ type: 'event_msg',
1482
+ payload: {
1483
+ type: 'agent_message',
1484
+ message: `codex assistant turn ${turn}`,
1485
+ },
1486
+ });
1487
+ rows.push({
1488
+ timestamp: `2026-05-06T00:41:${String(turn).padStart(2, '0')}.300Z`,
1489
+ type: 'event_msg',
1490
+ payload: { type: 'task_complete' },
1491
+ });
1492
+ }
1493
+
1494
+ if (retainedCompactedText) {
1495
+ rows.push({
1496
+ timestamp: '2026-05-06T00:41:59.000Z',
1497
+ type: 'compacted',
1498
+ payload: {
1499
+ message: '',
1500
+ replacement_history: [
1501
+ {
1502
+ type: 'message',
1503
+ role: 'user',
1504
+ content: [{ type: 'input_text', text: `codex user turn ${turnCount}` }],
1505
+ },
1506
+ ],
1507
+ },
1508
+ });
1509
+ rows.push({
1510
+ timestamp: '2026-05-06T00:41:59.100Z',
1511
+ type: 'event_msg',
1512
+ payload: { type: 'context_compacted' },
1513
+ });
1514
+ }
1515
+
1516
+ if (restoreRisk) {
1517
+ const riskyText = `codex user turn ${turnCount}`;
1518
+ rows.push({
1519
+ timestamp: '2026-05-06T00:42:00.000Z',
1520
+ type: 'compacted',
1521
+ payload: {
1522
+ message: '',
1523
+ replacement_history: [
1524
+ {
1525
+ type: 'message',
1526
+ role: 'user',
1527
+ content: [{ type: 'input_text', text: riskyText }],
1528
+ },
1529
+ ],
1530
+ },
1531
+ });
1532
+ rows.push({
1533
+ timestamp: '2026-05-06T00:42:00.100Z',
1534
+ type: 'event_msg',
1535
+ payload: { type: 'context_compacted' },
1536
+ });
1537
+ rows.push({
1538
+ timestamp: '2026-05-06T00:42:00.200Z',
1539
+ type: 'event_msg',
1540
+ payload: { type: 'thread_rolled_back', num_turns: 1 },
1541
+ });
1542
+ rows.push({
1543
+ timestamp: '2026-05-06T00:42:00.300Z',
1544
+ type: 'response_item',
1545
+ payload: {
1546
+ type: 'message',
1547
+ role: 'user',
1548
+ content: [{ type: 'input_text', text: riskyText }],
1549
+ },
1550
+ });
1551
+ rows.push({
1552
+ timestamp: '2026-05-06T00:42:00.400Z',
1553
+ type: 'event_msg',
1554
+ payload: {
1555
+ type: 'user_message',
1556
+ message: riskyText,
1557
+ },
1558
+ });
1559
+ }
1560
+
1561
+ writeFileSync(path, rows.map((row) => JSON.stringify(row)).join('\n') + '\n');
1562
+ return path;
1563
+ }
1564
+
1565
+ test('trim CLI accepts current-work memo on stdin for dry-run preview', async () => {
1566
+ const home = makeTempHome();
1567
+ const project = makeTempProject();
1568
+ try {
1569
+ await seedDb(home, project);
1570
+ const result = runTrim(
1571
+ home,
1572
+ project,
1573
+ ['--dry-run', '--host', 'claude', '--memo-stdin'],
1574
+ '**次の一手**: preserve current work framing',
1575
+ );
1576
+
1577
+ assert.equal(result.status, 0, result.stderr);
1578
+ assert.match(result.stdout, /In-flight Memo/);
1579
+ assert.match(result.stdout, /preserve current work framing/);
1580
+ } finally {
1581
+ rmSync(project, { recursive: true, force: true });
1582
+ rmSync(home, { recursive: true, force: true });
1583
+ }
1584
+ });