throughline 0.3.24 → 0.3.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/tl-trim.md +42 -0
- package/.codex-sidecar.yml +62 -0
- package/CHANGELOG.md +583 -0
- package/README.ja.md +42 -5
- package/README.md +383 -23
- package/bin/throughline.mjs +168 -4
- package/codex/skills/throughline/SKILL.md +157 -0
- package/codex/skills/throughline/agents/openai.yaml +7 -0
- package/docs/INHERITANCE_ON_CLEAR_ONLY.md +146 -0
- package/docs/L1_L2_L3_REDESIGN.md +415 -0
- package/docs/PUBLIC_RELEASE_PLAN.md +184 -0
- package/docs/THROUGHLINE_CODEX_DUAL_SUPPORT.md +249 -0
- package/docs/THROUGHLINE_CODEX_FIRST_ROADMAP.md +555 -0
- package/docs/THROUGHLINE_CODEX_MONITOR_IMPLEMENTATION_PLAN.md +220 -0
- package/docs/THROUGHLINE_CODEX_TRIM_IMPLEMENTATION_PLAN.md +528 -0
- package/docs/THROUGHLINE_CODEX_TRIM_ROLLBACK_FIX_PLAN.md +672 -0
- package/docs/archive/CONCEPT.md +476 -0
- package/docs/archive/EXPERIMENT.md +371 -0
- package/docs/archive/README.md +22 -0
- package/docs/archive/SESSION_LINKING_DESIGN.md +231 -0
- package/docs/archive/THROUGHLINE_NEXT_STEPS.md +134 -0
- package/docs/throughline-codex-trim-rollback-incident-report.md +306 -0
- package/docs/throughline-handoff-context.example.json +57 -0
- package/docs/throughline-rollback-context-trim-insight.md +455 -0
- package/package.json +6 -2
- package/src/cli/codex-capture.mjs +95 -0
- package/src/cli/codex-handoff-model-smoke.mjs +292 -0
- package/src/cli/codex-handoff-model-smoke.test.mjs +262 -0
- package/src/cli/codex-handoff-smoke.mjs +163 -0
- package/src/cli/codex-handoff-smoke.test.mjs +149 -0
- package/src/cli/codex-handoff-start.mjs +291 -0
- package/src/cli/codex-handoff-start.test.mjs +194 -0
- package/src/cli/codex-hook.mjs +276 -0
- package/src/cli/codex-hook.test.mjs +293 -0
- package/src/cli/codex-host-primitive-audit.mjs +110 -0
- package/src/cli/codex-host-primitive-audit.test.mjs +75 -0
- package/src/cli/codex-restore-smoke.mjs +357 -0
- package/src/cli/codex-restore-source-audit.mjs +304 -0
- package/src/cli/codex-resume.mjs +138 -0
- package/src/cli/codex-rollback-model-visible-smoke.mjs +373 -0
- package/src/cli/codex-rollback-model-visible-smoke.test.mjs +255 -0
- package/src/cli/codex-sidecar-diagnostics.mjs +48 -0
- package/src/cli/codex-sidecar-dry-run.mjs +85 -0
- package/src/cli/codex-summarize.mjs +224 -0
- package/src/cli/codex-threads.mjs +89 -0
- package/src/cli/codex-visibility-smoke.mjs +196 -0
- package/src/cli/codex-vscode-restore-smoke.mjs +226 -0
- package/src/cli/codex-vscode-rollback-smoke.mjs +114 -0
- package/src/cli/doctor.mjs +503 -1
- package/src/cli/doctor.test.mjs +542 -3
- package/src/cli/handoff-preview.mjs +78 -0
- package/src/cli/help.test.mjs +64 -0
- package/src/cli/install.mjs +227 -4
- package/src/cli/install.test.mjs +207 -4
- package/src/cli/trim.mjs +564 -0
- package/src/codex-app-server.mjs +1816 -0
- package/src/codex-app-server.test.mjs +512 -0
- package/src/codex-auto-refresh.mjs +194 -0
- package/src/codex-auto-refresh.test.mjs +182 -0
- package/src/codex-capture.mjs +235 -0
- package/src/codex-capture.test.mjs +393 -0
- package/src/codex-handoff-model-smoke.mjs +114 -0
- package/src/codex-handoff-model-smoke.test.mjs +89 -0
- package/src/codex-handoff-smoke.mjs +124 -0
- package/src/codex-handoff-smoke.test.mjs +103 -0
- package/src/codex-handoff.mjs +331 -0
- package/src/codex-handoff.test.mjs +220 -0
- package/src/codex-host-primitive-audit.mjs +374 -0
- package/src/codex-host-primitive-audit.test.mjs +208 -0
- package/src/codex-restore-smoke.test.mjs +639 -0
- package/src/codex-restore-source-audit.mjs +1348 -0
- package/src/codex-restore-source-audit.test.mjs +623 -0
- package/src/codex-resume.test.mjs +242 -0
- package/src/codex-rollout-memory.mjs +711 -0
- package/src/codex-rollout-memory.test.mjs +610 -0
- package/src/codex-sidecar-cli.test.mjs +75 -0
- package/src/codex-sidecar.mjs +246 -0
- package/src/codex-sidecar.test.mjs +172 -0
- package/src/codex-summarize.test.mjs +143 -0
- package/src/codex-thread-identity.mjs +23 -0
- package/src/codex-thread-index.mjs +173 -0
- package/src/codex-thread-index.test.mjs +164 -0
- package/src/codex-usage.mjs +110 -0
- package/src/codex-usage.test.mjs +140 -0
- package/src/codex-visibility-smoke.test.mjs +222 -0
- package/src/codex-vscode-restore-smoke.mjs +206 -0
- package/src/codex-vscode-restore-smoke.test.mjs +325 -0
- package/src/codex-vscode-rollback-smoke.mjs +90 -0
- package/src/codex-vscode-rollback-smoke.test.mjs +290 -0
- package/src/db-schema.test.mjs +97 -0
- package/src/haiku-summarizer.mjs +267 -26
- package/src/haiku-summarizer.test.mjs +282 -0
- package/src/handoff-preview.test.mjs +108 -0
- package/src/handoff-record.mjs +294 -0
- package/src/handoff-record.test.mjs +226 -0
- package/src/hook-entrypoints.test.mjs +326 -0
- package/src/package-files.test.mjs +19 -0
- package/src/prompt-submit.mjs +9 -6
- package/src/resume-context.mjs +44 -140
- package/src/resume-context.test.mjs +172 -0
- package/src/session-start.mjs +8 -5
- package/src/state-file.mjs +50 -6
- package/src/state-file.test.mjs +50 -0
- package/src/token-monitor.mjs +14 -10
- package/src/token-monitor.test.mjs +27 -0
- package/src/trim-cli.test.mjs +1584 -0
- package/src/trim-model.mjs +584 -0
- package/src/trim-model.test.mjs +568 -0
- package/src/turn-processor.mjs +17 -10
- package/src/vscode-task.mjs +33 -10
- package/src/vscode-task.test.mjs +19 -9
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
import assert from 'node:assert/strict';
|
|
2
|
+
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
|
|
3
|
+
import { tmpdir } from 'node:os';
|
|
4
|
+
import { join } from 'node:path';
|
|
5
|
+
import { spawnSync } from 'node:child_process';
|
|
6
|
+
import { test } from 'node:test';
|
|
7
|
+
import { DatabaseSync } from 'node:sqlite';
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
buildDetailRowsFromActiveTurns,
|
|
11
|
+
buildBodyRowsFromActiveTurns,
|
|
12
|
+
buildCodexThroughlineSessionId,
|
|
13
|
+
captureCodexRolloutToDb,
|
|
14
|
+
codexSessionIdToThreadId,
|
|
15
|
+
} from './codex-capture.mjs';
|
|
16
|
+
import { buildHandoffRecord } from './handoff-record.mjs';
|
|
17
|
+
import { toThroughlineHandoffBlock } from './codex-handoff.mjs';
|
|
18
|
+
|
|
19
|
+
function makeDb() {
|
|
20
|
+
const db = new DatabaseSync(':memory:');
|
|
21
|
+
db.exec(`
|
|
22
|
+
CREATE TABLE sessions (
|
|
23
|
+
session_id TEXT PRIMARY KEY,
|
|
24
|
+
project_path TEXT NOT NULL,
|
|
25
|
+
status TEXT NOT NULL DEFAULT 'active',
|
|
26
|
+
created_at INTEGER NOT NULL,
|
|
27
|
+
updated_at INTEGER NOT NULL,
|
|
28
|
+
merged_into TEXT
|
|
29
|
+
);
|
|
30
|
+
CREATE TABLE skeletons (
|
|
31
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
32
|
+
session_id TEXT NOT NULL,
|
|
33
|
+
origin_session_id TEXT,
|
|
34
|
+
turn_number INTEGER NOT NULL,
|
|
35
|
+
role TEXT NOT NULL,
|
|
36
|
+
summary TEXT NOT NULL,
|
|
37
|
+
created_at INTEGER NOT NULL
|
|
38
|
+
);
|
|
39
|
+
CREATE TABLE bodies (
|
|
40
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
41
|
+
session_id TEXT NOT NULL,
|
|
42
|
+
origin_session_id TEXT NOT NULL,
|
|
43
|
+
turn_number INTEGER NOT NULL,
|
|
44
|
+
role TEXT NOT NULL,
|
|
45
|
+
text TEXT NOT NULL,
|
|
46
|
+
token_count INTEGER,
|
|
47
|
+
created_at INTEGER NOT NULL
|
|
48
|
+
);
|
|
49
|
+
CREATE TABLE details (
|
|
50
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
51
|
+
session_id TEXT NOT NULL,
|
|
52
|
+
origin_session_id TEXT,
|
|
53
|
+
turn_number INTEGER,
|
|
54
|
+
tool_name TEXT NOT NULL,
|
|
55
|
+
input_text TEXT,
|
|
56
|
+
output_text TEXT,
|
|
57
|
+
token_count INTEGER NOT NULL DEFAULT 0,
|
|
58
|
+
created_at INTEGER NOT NULL,
|
|
59
|
+
kind TEXT,
|
|
60
|
+
source_id TEXT
|
|
61
|
+
);
|
|
62
|
+
`);
|
|
63
|
+
return db;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
test('buildCodexThroughlineSessionId: namespaces Codex thread ids', () => {
|
|
67
|
+
assert.equal(
|
|
68
|
+
buildCodexThroughlineSessionId('019dfaba-f87e-7f41-a144-d5ca7c6dd7f9'),
|
|
69
|
+
'codex:019dfaba-f87e-7f41-a144-d5ca7c6dd7f9',
|
|
70
|
+
);
|
|
71
|
+
assert.equal(
|
|
72
|
+
codexSessionIdToThreadId('codex:019dfaba-f87e-7f41-a144-d5ca7c6dd7f9'),
|
|
73
|
+
'019dfaba-f87e-7f41-a144-d5ca7c6dd7f9',
|
|
74
|
+
);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test('buildBodyRowsFromActiveTurns: groups roles and preserves developer injected memory', () => {
|
|
78
|
+
const sessionId = buildCodexThroughlineSessionId('019dfaba-f87e-7f41-a144-d5ca7c6dd7f9');
|
|
79
|
+
const rows = buildBodyRowsFromActiveTurns(
|
|
80
|
+
[
|
|
81
|
+
{
|
|
82
|
+
messages: [
|
|
83
|
+
{ role: 'user', text: 'first request', time: '2026-05-06T00:00:01.000Z' },
|
|
84
|
+
{ role: 'assistant', text: 'first answer', time: '2026-05-06T00:00:02.000Z' },
|
|
85
|
+
],
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
messages: [
|
|
89
|
+
{
|
|
90
|
+
role: 'developer',
|
|
91
|
+
text: '## Throughline: Active Work Context\nactive memory',
|
|
92
|
+
time: '2026-05-06T00:00:03.000Z',
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
},
|
|
96
|
+
],
|
|
97
|
+
{ sessionId, now: 1 },
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
assert.deepEqual(
|
|
101
|
+
rows.map((row) => [row.turnNumber, row.role, row.text]),
|
|
102
|
+
[
|
|
103
|
+
[1, 'user', 'first request'],
|
|
104
|
+
[1, 'assistant', 'first answer'],
|
|
105
|
+
[2, 'developer', '## Throughline: Active Work Context\nactive memory'],
|
|
106
|
+
],
|
|
107
|
+
);
|
|
108
|
+
assert.equal(rows[0].originSessionId, sessionId);
|
|
109
|
+
assert.equal(rows[0].createdAt, Date.parse('2026-05-06T00:00:01.000Z'));
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('buildDetailRowsFromActiveTurns: stores Codex function call input and output', () => {
|
|
113
|
+
const sessionId = buildCodexThroughlineSessionId('019dfaba-f87e-7f41-a144-d5ca7c6dd7f9');
|
|
114
|
+
const rows = buildDetailRowsFromActiveTurns(
|
|
115
|
+
[
|
|
116
|
+
{
|
|
117
|
+
messages: [{ role: 'assistant', text: 'I will inspect files' }],
|
|
118
|
+
details: [
|
|
119
|
+
{
|
|
120
|
+
kind: 'tool_input',
|
|
121
|
+
tool_name: 'exec_command',
|
|
122
|
+
source_id: 'call_123',
|
|
123
|
+
input_text: '{"cmd":"rtk rg TODO"}',
|
|
124
|
+
output_text: null,
|
|
125
|
+
time: '2026-05-06T00:00:02.000Z',
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
kind: 'tool_output',
|
|
129
|
+
tool_name: 'exec_command',
|
|
130
|
+
source_id: 'call_123:output',
|
|
131
|
+
input_text: null,
|
|
132
|
+
output_text: 'TODO item\n',
|
|
133
|
+
time: '2026-05-06T00:00:03.000Z',
|
|
134
|
+
},
|
|
135
|
+
],
|
|
136
|
+
},
|
|
137
|
+
],
|
|
138
|
+
{ sessionId, now: 1 },
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
assert.deepEqual(
|
|
142
|
+
rows.map((row) => [
|
|
143
|
+
row.turnNumber,
|
|
144
|
+
row.kind,
|
|
145
|
+
row.toolName,
|
|
146
|
+
row.sourceId,
|
|
147
|
+
row.inputText,
|
|
148
|
+
row.outputText,
|
|
149
|
+
]),
|
|
150
|
+
[
|
|
151
|
+
[1, 'tool_input', 'exec_command', 'call_123', '{"cmd":"rtk rg TODO"}', null],
|
|
152
|
+
[1, 'tool_output', 'exec_command', 'call_123:output', null, 'TODO item\n'],
|
|
153
|
+
],
|
|
154
|
+
);
|
|
155
|
+
assert.equal(rows[0].createdAt, Date.parse('2026-05-06T00:00:02.000Z'));
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
test('captureCodexRolloutToDb: rebuilds namespaced Codex session from active rollout turns', () => {
|
|
159
|
+
const db = makeDb();
|
|
160
|
+
const home = mkdtempSync(join(tmpdir(), 'tl-codex-home-'));
|
|
161
|
+
const project = mkdtempSync(join(tmpdir(), 'tl-codex-project-'));
|
|
162
|
+
const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
|
|
163
|
+
try {
|
|
164
|
+
writeRollout(home, {
|
|
165
|
+
id: threadId,
|
|
166
|
+
cwd: project,
|
|
167
|
+
events: [
|
|
168
|
+
event('user_message', { message: 'keep request' }),
|
|
169
|
+
event('task_started'),
|
|
170
|
+
responseItem({
|
|
171
|
+
type: 'function_call',
|
|
172
|
+
name: 'exec_command',
|
|
173
|
+
arguments: '{"cmd":"rtk pwd"}',
|
|
174
|
+
call_id: 'call_keep',
|
|
175
|
+
}),
|
|
176
|
+
responseItem({
|
|
177
|
+
type: 'function_call_output',
|
|
178
|
+
call_id: 'call_keep',
|
|
179
|
+
output: 'Output:\n/tmp/project\n',
|
|
180
|
+
}),
|
|
181
|
+
event('agent_message', { message: 'keep answer' }),
|
|
182
|
+
event('task_complete'),
|
|
183
|
+
event('user_message', { message: 'rolled back request' }),
|
|
184
|
+
event('task_started'),
|
|
185
|
+
event('agent_message', { message: 'rolled back answer' }),
|
|
186
|
+
event('task_complete'),
|
|
187
|
+
event('thread_rolled_back', { num_turns: 1 }),
|
|
188
|
+
responseItem({
|
|
189
|
+
type: 'message',
|
|
190
|
+
role: 'developer',
|
|
191
|
+
content: [{ type: 'input_text', text: '## Throughline Trim Memory Preview\nmemory' }],
|
|
192
|
+
}),
|
|
193
|
+
],
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
const result = captureCodexRolloutToDb(db, {
|
|
197
|
+
threadId,
|
|
198
|
+
codexHome: home,
|
|
199
|
+
projectPath: project,
|
|
200
|
+
now: 1234,
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
assert.equal(result.status, 'captured');
|
|
204
|
+
assert.equal(result.sessionId, `codex:${threadId}`);
|
|
205
|
+
assert.equal(result.capturedTurns, 1);
|
|
206
|
+
assert.equal(result.capturedRows, 2);
|
|
207
|
+
assert.equal(result.capturedDetails, 2);
|
|
208
|
+
|
|
209
|
+
const rows = db
|
|
210
|
+
.prepare('SELECT session_id, origin_session_id, turn_number, role, text FROM bodies ORDER BY id')
|
|
211
|
+
.all();
|
|
212
|
+
assert.deepEqual(
|
|
213
|
+
rows.map((row) => [row.session_id, row.origin_session_id, row.turn_number, row.role, row.text]),
|
|
214
|
+
[
|
|
215
|
+
[`codex:${threadId}`, `codex:${threadId}`, 1, 'user', 'keep request'],
|
|
216
|
+
[`codex:${threadId}`, `codex:${threadId}`, 1, 'assistant', 'keep answer'],
|
|
217
|
+
],
|
|
218
|
+
);
|
|
219
|
+
assert.equal(rows.some((row) => row.text.includes('rolled back')), false);
|
|
220
|
+
|
|
221
|
+
const details = db
|
|
222
|
+
.prepare(
|
|
223
|
+
'SELECT turn_number, kind, tool_name, source_id, input_text, output_text FROM details ORDER BY id',
|
|
224
|
+
)
|
|
225
|
+
.all();
|
|
226
|
+
assert.deepEqual(
|
|
227
|
+
details.map((row) => [
|
|
228
|
+
row.turn_number,
|
|
229
|
+
row.kind,
|
|
230
|
+
row.tool_name,
|
|
231
|
+
row.source_id,
|
|
232
|
+
row.input_text,
|
|
233
|
+
row.output_text,
|
|
234
|
+
]),
|
|
235
|
+
[
|
|
236
|
+
[1, 'tool_input', 'exec_command', 'call_keep', '{"cmd":"rtk pwd"}', null],
|
|
237
|
+
[1, 'tool_output', 'exec_command', 'call_keep:output', null, 'Output:\n/tmp/project\n'],
|
|
238
|
+
],
|
|
239
|
+
);
|
|
240
|
+
|
|
241
|
+
const record = buildHandoffRecord(db, { sessionId: `codex:${threadId}` });
|
|
242
|
+
assert.equal(record.source.adapter, 'codex');
|
|
243
|
+
const block = toThroughlineHandoffBlock(record, { hostMode: 'codex-primary' });
|
|
244
|
+
assert.equal(block.data.sourceAgent, 'codex');
|
|
245
|
+
} finally {
|
|
246
|
+
rmSync(home, { recursive: true, force: true });
|
|
247
|
+
rmSync(project, { recursive: true, force: true });
|
|
248
|
+
}
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
test('captureCodexRolloutToDb: second capture removes stale rows from previous active tail', () => {
|
|
252
|
+
const db = makeDb();
|
|
253
|
+
const home = mkdtempSync(join(tmpdir(), 'tl-codex-home-'));
|
|
254
|
+
const project = mkdtempSync(join(tmpdir(), 'tl-codex-project-'));
|
|
255
|
+
const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
|
|
256
|
+
try {
|
|
257
|
+
const rollout = writeRollout(home, {
|
|
258
|
+
id: threadId,
|
|
259
|
+
cwd: project,
|
|
260
|
+
events: [
|
|
261
|
+
event('user_message', { message: 'stable request' }),
|
|
262
|
+
event('task_started'),
|
|
263
|
+
event('agent_message', { message: 'stable answer' }),
|
|
264
|
+
event('task_complete'),
|
|
265
|
+
event('user_message', { message: 'old tail request' }),
|
|
266
|
+
event('task_started'),
|
|
267
|
+
event('agent_message', { message: 'old tail answer' }),
|
|
268
|
+
event('task_complete'),
|
|
269
|
+
],
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
captureCodexRolloutToDb(db, { threadId, codexHome: home, projectPath: project, now: 1 });
|
|
273
|
+
assert.equal(db.prepare('SELECT COUNT(*) AS c FROM bodies').get().c, 4);
|
|
274
|
+
|
|
275
|
+
writeRolloutRows(rollout, [
|
|
276
|
+
sessionMeta(threadId, project),
|
|
277
|
+
event('user_message', { message: 'stable request' }),
|
|
278
|
+
event('task_started'),
|
|
279
|
+
event('agent_message', { message: 'stable answer' }),
|
|
280
|
+
event('task_complete'),
|
|
281
|
+
event('user_message', { message: 'old tail request' }),
|
|
282
|
+
event('task_started'),
|
|
283
|
+
event('agent_message', { message: 'old tail answer' }),
|
|
284
|
+
event('task_complete'),
|
|
285
|
+
event('thread_rolled_back', { num_turns: 1 }),
|
|
286
|
+
]);
|
|
287
|
+
|
|
288
|
+
captureCodexRolloutToDb(db, { threadId, codexHome: home, projectPath: project, now: 2 });
|
|
289
|
+
|
|
290
|
+
const texts = db.prepare('SELECT text FROM bodies ORDER BY id').all().map((row) => row.text);
|
|
291
|
+
assert.deepEqual(texts, ['stable request', 'stable answer']);
|
|
292
|
+
} finally {
|
|
293
|
+
rmSync(home, { recursive: true, force: true });
|
|
294
|
+
rmSync(project, { recursive: true, force: true });
|
|
295
|
+
}
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
test('codex-capture CLI captures an explicit Codex thread as JSON', () => {
|
|
299
|
+
const home = mkdtempSync(join(tmpdir(), 'tl-codex-home-'));
|
|
300
|
+
const userHome = mkdtempSync(join(tmpdir(), 'tl-user-home-'));
|
|
301
|
+
const project = mkdtempSync(join(tmpdir(), 'tl-codex-project-'));
|
|
302
|
+
const threadId = '019dfaba-f87e-7f41-a144-d5ca7c6dd7f9';
|
|
303
|
+
try {
|
|
304
|
+
writeRollout(home, {
|
|
305
|
+
id: threadId,
|
|
306
|
+
cwd: project,
|
|
307
|
+
events: [
|
|
308
|
+
event('user_message', { message: 'cli request' }),
|
|
309
|
+
event('task_started'),
|
|
310
|
+
event('agent_message', { message: 'cli answer' }),
|
|
311
|
+
event('task_complete'),
|
|
312
|
+
],
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
const result = spawnSync(
|
|
316
|
+
process.execPath,
|
|
317
|
+
[
|
|
318
|
+
join(process.cwd(), 'bin/throughline.mjs'),
|
|
319
|
+
'codex-capture',
|
|
320
|
+
'--json',
|
|
321
|
+
'--codex-thread-id',
|
|
322
|
+
threadId,
|
|
323
|
+
'--codex-home',
|
|
324
|
+
home,
|
|
325
|
+
],
|
|
326
|
+
{
|
|
327
|
+
cwd: project,
|
|
328
|
+
encoding: 'utf8',
|
|
329
|
+
env: {
|
|
330
|
+
...process.env,
|
|
331
|
+
HOME: userHome,
|
|
332
|
+
USERPROFILE: userHome,
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
);
|
|
336
|
+
|
|
337
|
+
assert.equal(result.status, 0, result.stderr);
|
|
338
|
+
const output = JSON.parse(result.stdout);
|
|
339
|
+
assert.equal(output.status, 'captured');
|
|
340
|
+
assert.equal(output.sessionId, `codex:${threadId}`);
|
|
341
|
+
assert.equal(output.capturedTurns, 1);
|
|
342
|
+
assert.equal(output.capturedRows, 2);
|
|
343
|
+
} finally {
|
|
344
|
+
rmSync(home, { recursive: true, force: true });
|
|
345
|
+
rmSync(userHome, { recursive: true, force: true });
|
|
346
|
+
rmSync(project, { recursive: true, force: true });
|
|
347
|
+
}
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
function writeRollout(home, { id, cwd, events }) {
|
|
351
|
+
const dir = join(home, 'sessions', '2026', '05', '06');
|
|
352
|
+
mkdirSync(dir, { recursive: true });
|
|
353
|
+
const path = join(dir, `rollout-2026-05-06T09-40-50-${id}.jsonl`);
|
|
354
|
+
writeRolloutRows(path, [sessionMeta(id, cwd), ...events]);
|
|
355
|
+
return path;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
function writeRolloutRows(path, rows) {
|
|
359
|
+
writeFileSync(path, rows.map((row) => JSON.stringify(row)).join('\n') + '\n');
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function sessionMeta(id, cwd) {
|
|
363
|
+
return {
|
|
364
|
+
timestamp: '2026-05-06T00:40:50.000Z',
|
|
365
|
+
type: 'session_meta',
|
|
366
|
+
payload: {
|
|
367
|
+
id,
|
|
368
|
+
timestamp: '2026-05-06T00:40:50.000Z',
|
|
369
|
+
cwd,
|
|
370
|
+
source: 'vscode',
|
|
371
|
+
cli_version: '0.128.0-alpha.1',
|
|
372
|
+
},
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
function event(type, payload = {}) {
|
|
377
|
+
return {
|
|
378
|
+
timestamp: '2026-05-06T00:40:51.000Z',
|
|
379
|
+
type: 'event_msg',
|
|
380
|
+
payload: {
|
|
381
|
+
type,
|
|
382
|
+
...payload,
|
|
383
|
+
},
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
function responseItem(payload) {
|
|
388
|
+
return {
|
|
389
|
+
timestamp: '2026-05-06T00:40:52.000Z',
|
|
390
|
+
type: 'response_item',
|
|
391
|
+
payload,
|
|
392
|
+
};
|
|
393
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { spawnSync } from 'node:child_process';
|
|
2
|
+
|
|
3
|
+
export const CODEX_HANDOFF_MODEL_SMOKE_ENV = 'THROUGHLINE_EXPERIMENTAL_CODEX_HANDOFF_MODEL_SMOKE';
|
|
4
|
+
export const DEFAULT_CODEX_HANDOFF_MODEL_SMOKE_TIMEOUT_MS = 120_000;
|
|
5
|
+
|
|
6
|
+
function compactStderr(stderr) {
|
|
7
|
+
if (!stderr) return '';
|
|
8
|
+
const text = String(stderr);
|
|
9
|
+
if (text.length <= 4_000) return text;
|
|
10
|
+
return `${text.slice(0, 1_500)}\n...[stderr truncated]...\n${text.slice(-2_000)}`;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function assertPositiveInteger(value, label) {
|
|
14
|
+
if (!Number.isInteger(value) || value < 1) {
|
|
15
|
+
throw new Error(`${label} must be a positive integer`);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function assertNonEmptyString(value, label) {
|
|
20
|
+
if (typeof value !== 'string' || value.trim().length === 0) {
|
|
21
|
+
throw new Error(`${label} is required`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function buildCodexHandoffModelSmokePrompt({ handoffPrompt, marker }) {
|
|
26
|
+
assertNonEmptyString(handoffPrompt, 'handoffPrompt');
|
|
27
|
+
assertNonEmptyString(marker, 'marker');
|
|
28
|
+
return [
|
|
29
|
+
handoffPrompt,
|
|
30
|
+
'',
|
|
31
|
+
'### Throughline Fresh-Thread Handoff Model Smoke',
|
|
32
|
+
'Read the handoff above as the initial context for a fresh Codex thread.',
|
|
33
|
+
`Reply exactly with this marker and nothing else: ${marker}`,
|
|
34
|
+
].join('\n');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function runCodexHandoffModelSmoke({
|
|
38
|
+
prompt,
|
|
39
|
+
marker,
|
|
40
|
+
cwd,
|
|
41
|
+
command = 'codex',
|
|
42
|
+
timeoutMs = DEFAULT_CODEX_HANDOFF_MODEL_SMOKE_TIMEOUT_MS,
|
|
43
|
+
env = process.env,
|
|
44
|
+
} = {}) {
|
|
45
|
+
assertNonEmptyString(prompt, 'prompt');
|
|
46
|
+
assertNonEmptyString(marker, 'marker');
|
|
47
|
+
assertNonEmptyString(cwd, 'cwd');
|
|
48
|
+
assertNonEmptyString(command, 'command');
|
|
49
|
+
assertPositiveInteger(timeoutMs, 'timeoutMs');
|
|
50
|
+
|
|
51
|
+
const result = spawnSync(
|
|
52
|
+
command,
|
|
53
|
+
[
|
|
54
|
+
'exec',
|
|
55
|
+
'--ephemeral',
|
|
56
|
+
'--ignore-user-config',
|
|
57
|
+
'--ignore-rules',
|
|
58
|
+
'--skip-git-repo-check',
|
|
59
|
+
'--sandbox',
|
|
60
|
+
'read-only',
|
|
61
|
+
'-C',
|
|
62
|
+
cwd,
|
|
63
|
+
prompt,
|
|
64
|
+
],
|
|
65
|
+
{
|
|
66
|
+
encoding: 'utf8',
|
|
67
|
+
timeout: timeoutMs,
|
|
68
|
+
shell: process.platform === 'win32',
|
|
69
|
+
env,
|
|
70
|
+
cwd,
|
|
71
|
+
},
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
const stdout = result.stdout ?? '';
|
|
75
|
+
const stderr = compactStderr(result.stderr);
|
|
76
|
+
if (result.error) {
|
|
77
|
+
return {
|
|
78
|
+
status: 'error',
|
|
79
|
+
reason: result.error.name === 'TimeoutError' ? 'codex_cli_timeout' : 'codex_cli_spawn_error',
|
|
80
|
+
marker,
|
|
81
|
+
markerVisible: false,
|
|
82
|
+
exitCode: result.status,
|
|
83
|
+
signal: result.signal,
|
|
84
|
+
stdout,
|
|
85
|
+
stderr,
|
|
86
|
+
error: result.error.message,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (result.status !== 0) {
|
|
91
|
+
return {
|
|
92
|
+
status: 'error',
|
|
93
|
+
reason: 'codex_cli_failed',
|
|
94
|
+
marker,
|
|
95
|
+
markerVisible: false,
|
|
96
|
+
exitCode: result.status,
|
|
97
|
+
signal: result.signal,
|
|
98
|
+
stdout,
|
|
99
|
+
stderr,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const markerVisible = stdout.includes(marker);
|
|
104
|
+
return {
|
|
105
|
+
status: markerVisible ? 'visible' : 'not-visible',
|
|
106
|
+
reason: markerVisible ? 'marker_found_in_codex_exec_output' : 'marker_missing_from_codex_exec_output',
|
|
107
|
+
marker,
|
|
108
|
+
markerVisible,
|
|
109
|
+
exitCode: result.status,
|
|
110
|
+
signal: result.signal,
|
|
111
|
+
stdout,
|
|
112
|
+
stderr,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { test } from 'node:test';
|
|
2
|
+
import assert from 'node:assert/strict';
|
|
3
|
+
import { chmodSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
|
|
4
|
+
import { tmpdir } from 'node:os';
|
|
5
|
+
import { join } from 'node:path';
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
buildCodexHandoffModelSmokePrompt,
|
|
9
|
+
runCodexHandoffModelSmoke,
|
|
10
|
+
} from './codex-handoff-model-smoke.mjs';
|
|
11
|
+
|
|
12
|
+
function makeFakeCodexCli(dir, { visible = true } = {}) {
|
|
13
|
+
const script = join(dir, 'fake-codex-cli.mjs');
|
|
14
|
+
writeFileSync(
|
|
15
|
+
script,
|
|
16
|
+
`#!/usr/bin/env node
|
|
17
|
+
const args = process.argv.slice(2);
|
|
18
|
+
const prompt = args.at(-1) ?? '';
|
|
19
|
+
if (args[0] !== 'exec') process.exit(7);
|
|
20
|
+
if (!args.includes('--ephemeral')) process.exit(8);
|
|
21
|
+
if (!args.includes('--ignore-user-config')) process.exit(9);
|
|
22
|
+
if (!args.includes('--ignore-rules')) process.exit(10);
|
|
23
|
+
if (!args.includes('--sandbox') || !args.includes('read-only')) process.exit(11);
|
|
24
|
+
if (!prompt.includes('Throughline: New Codex Thread Handoff')) process.exit(12);
|
|
25
|
+
const marker = (prompt.match(/TL_FAKE_HANDOFF_[A-Z]+/) ?? [''])[0];
|
|
26
|
+
process.stdout.write(${JSON.stringify(visible)} ? marker + '\\n' : 'no marker\\n');
|
|
27
|
+
`,
|
|
28
|
+
);
|
|
29
|
+
chmodSync(script, 0o755);
|
|
30
|
+
return script;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
test('buildCodexHandoffModelSmokePrompt: appends exact marker instruction', () => {
|
|
34
|
+
const prompt = buildCodexHandoffModelSmokePrompt({
|
|
35
|
+
handoffPrompt: '## Throughline: New Codex Thread Handoff\nbody',
|
|
36
|
+
marker: 'TL_FAKE_HANDOFF_READY',
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
assert.match(prompt, /Throughline: New Codex Thread Handoff/);
|
|
40
|
+
assert.match(prompt, /Throughline Fresh-Thread Handoff Model Smoke/);
|
|
41
|
+
assert.match(prompt, /Reply exactly with this marker and nothing else: TL_FAKE_HANDOFF_READY/);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
test('runCodexHandoffModelSmoke: detects marker from ephemeral Codex exec output', () => {
|
|
45
|
+
const dir = mkdtempSync(join(tmpdir(), 'tl-codex-handoff-model-smoke-'));
|
|
46
|
+
try {
|
|
47
|
+
const fake = makeFakeCodexCli(dir);
|
|
48
|
+
const prompt = buildCodexHandoffModelSmokePrompt({
|
|
49
|
+
handoffPrompt: '## Throughline: New Codex Thread Handoff\nbody',
|
|
50
|
+
marker: 'TL_FAKE_HANDOFF_READY',
|
|
51
|
+
});
|
|
52
|
+
const result = runCodexHandoffModelSmoke({
|
|
53
|
+
prompt,
|
|
54
|
+
marker: 'TL_FAKE_HANDOFF_READY',
|
|
55
|
+
cwd: dir,
|
|
56
|
+
command: fake,
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
assert.equal(result.status, 'visible');
|
|
60
|
+
assert.equal(result.reason, 'marker_found_in_codex_exec_output');
|
|
61
|
+
assert.equal(result.markerVisible, true);
|
|
62
|
+
assert.match(result.stdout, /TL_FAKE_HANDOFF_READY/);
|
|
63
|
+
} finally {
|
|
64
|
+
rmSync(dir, { recursive: true, force: true });
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test('runCodexHandoffModelSmoke: reports not-visible when marker is absent', () => {
|
|
69
|
+
const dir = mkdtempSync(join(tmpdir(), 'tl-codex-handoff-model-smoke-'));
|
|
70
|
+
try {
|
|
71
|
+
const fake = makeFakeCodexCli(dir, { visible: false });
|
|
72
|
+
const prompt = buildCodexHandoffModelSmokePrompt({
|
|
73
|
+
handoffPrompt: '## Throughline: New Codex Thread Handoff\nbody',
|
|
74
|
+
marker: 'TL_FAKE_HANDOFF_MISSING',
|
|
75
|
+
});
|
|
76
|
+
const result = runCodexHandoffModelSmoke({
|
|
77
|
+
prompt,
|
|
78
|
+
marker: 'TL_FAKE_HANDOFF_MISSING',
|
|
79
|
+
cwd: dir,
|
|
80
|
+
command: fake,
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
assert.equal(result.status, 'not-visible');
|
|
84
|
+
assert.equal(result.reason, 'marker_missing_from_codex_exec_output');
|
|
85
|
+
assert.equal(result.markerVisible, false);
|
|
86
|
+
} finally {
|
|
87
|
+
rmSync(dir, { recursive: true, force: true });
|
|
88
|
+
}
|
|
89
|
+
});
|