throughline 0.3.24 → 0.3.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/tl-trim.md +42 -0
- package/.codex-sidecar.yml +62 -0
- package/CHANGELOG.md +583 -0
- package/README.ja.md +42 -5
- package/README.md +383 -23
- package/bin/throughline.mjs +168 -4
- package/codex/skills/throughline/SKILL.md +157 -0
- package/codex/skills/throughline/agents/openai.yaml +7 -0
- package/docs/INHERITANCE_ON_CLEAR_ONLY.md +146 -0
- package/docs/L1_L2_L3_REDESIGN.md +415 -0
- package/docs/PUBLIC_RELEASE_PLAN.md +184 -0
- package/docs/THROUGHLINE_CODEX_DUAL_SUPPORT.md +249 -0
- package/docs/THROUGHLINE_CODEX_FIRST_ROADMAP.md +555 -0
- package/docs/THROUGHLINE_CODEX_MONITOR_IMPLEMENTATION_PLAN.md +220 -0
- package/docs/THROUGHLINE_CODEX_TRIM_IMPLEMENTATION_PLAN.md +528 -0
- package/docs/THROUGHLINE_CODEX_TRIM_ROLLBACK_FIX_PLAN.md +672 -0
- package/docs/archive/CONCEPT.md +476 -0
- package/docs/archive/EXPERIMENT.md +371 -0
- package/docs/archive/README.md +22 -0
- package/docs/archive/SESSION_LINKING_DESIGN.md +231 -0
- package/docs/archive/THROUGHLINE_NEXT_STEPS.md +134 -0
- package/docs/throughline-codex-trim-rollback-incident-report.md +306 -0
- package/docs/throughline-handoff-context.example.json +57 -0
- package/docs/throughline-rollback-context-trim-insight.md +455 -0
- package/package.json +6 -2
- package/src/cli/codex-capture.mjs +95 -0
- package/src/cli/codex-handoff-model-smoke.mjs +292 -0
- package/src/cli/codex-handoff-model-smoke.test.mjs +262 -0
- package/src/cli/codex-handoff-smoke.mjs +163 -0
- package/src/cli/codex-handoff-smoke.test.mjs +149 -0
- package/src/cli/codex-handoff-start.mjs +291 -0
- package/src/cli/codex-handoff-start.test.mjs +194 -0
- package/src/cli/codex-hook.mjs +276 -0
- package/src/cli/codex-hook.test.mjs +293 -0
- package/src/cli/codex-host-primitive-audit.mjs +110 -0
- package/src/cli/codex-host-primitive-audit.test.mjs +75 -0
- package/src/cli/codex-restore-smoke.mjs +357 -0
- package/src/cli/codex-restore-source-audit.mjs +304 -0
- package/src/cli/codex-resume.mjs +138 -0
- package/src/cli/codex-rollback-model-visible-smoke.mjs +373 -0
- package/src/cli/codex-rollback-model-visible-smoke.test.mjs +255 -0
- package/src/cli/codex-sidecar-diagnostics.mjs +48 -0
- package/src/cli/codex-sidecar-dry-run.mjs +85 -0
- package/src/cli/codex-summarize.mjs +224 -0
- package/src/cli/codex-threads.mjs +89 -0
- package/src/cli/codex-visibility-smoke.mjs +196 -0
- package/src/cli/codex-vscode-restore-smoke.mjs +226 -0
- package/src/cli/codex-vscode-rollback-smoke.mjs +114 -0
- package/src/cli/doctor.mjs +503 -1
- package/src/cli/doctor.test.mjs +542 -3
- package/src/cli/handoff-preview.mjs +78 -0
- package/src/cli/help.test.mjs +64 -0
- package/src/cli/install.mjs +227 -4
- package/src/cli/install.test.mjs +207 -4
- package/src/cli/trim.mjs +564 -0
- package/src/codex-app-server.mjs +1816 -0
- package/src/codex-app-server.test.mjs +512 -0
- package/src/codex-auto-refresh.mjs +194 -0
- package/src/codex-auto-refresh.test.mjs +182 -0
- package/src/codex-capture.mjs +235 -0
- package/src/codex-capture.test.mjs +393 -0
- package/src/codex-handoff-model-smoke.mjs +114 -0
- package/src/codex-handoff-model-smoke.test.mjs +89 -0
- package/src/codex-handoff-smoke.mjs +124 -0
- package/src/codex-handoff-smoke.test.mjs +103 -0
- package/src/codex-handoff.mjs +331 -0
- package/src/codex-handoff.test.mjs +220 -0
- package/src/codex-host-primitive-audit.mjs +374 -0
- package/src/codex-host-primitive-audit.test.mjs +208 -0
- package/src/codex-restore-smoke.test.mjs +639 -0
- package/src/codex-restore-source-audit.mjs +1348 -0
- package/src/codex-restore-source-audit.test.mjs +623 -0
- package/src/codex-resume.test.mjs +242 -0
- package/src/codex-rollout-memory.mjs +711 -0
- package/src/codex-rollout-memory.test.mjs +610 -0
- package/src/codex-sidecar-cli.test.mjs +75 -0
- package/src/codex-sidecar.mjs +246 -0
- package/src/codex-sidecar.test.mjs +172 -0
- package/src/codex-summarize.test.mjs +143 -0
- package/src/codex-thread-identity.mjs +23 -0
- package/src/codex-thread-index.mjs +173 -0
- package/src/codex-thread-index.test.mjs +164 -0
- package/src/codex-usage.mjs +110 -0
- package/src/codex-usage.test.mjs +140 -0
- package/src/codex-visibility-smoke.test.mjs +222 -0
- package/src/codex-vscode-restore-smoke.mjs +206 -0
- package/src/codex-vscode-restore-smoke.test.mjs +325 -0
- package/src/codex-vscode-rollback-smoke.mjs +90 -0
- package/src/codex-vscode-rollback-smoke.test.mjs +290 -0
- package/src/db-schema.test.mjs +97 -0
- package/src/haiku-summarizer.mjs +267 -26
- package/src/haiku-summarizer.test.mjs +282 -0
- package/src/handoff-preview.test.mjs +108 -0
- package/src/handoff-record.mjs +294 -0
- package/src/handoff-record.test.mjs +226 -0
- package/src/hook-entrypoints.test.mjs +326 -0
- package/src/package-files.test.mjs +19 -0
- package/src/prompt-submit.mjs +9 -6
- package/src/resume-context.mjs +44 -140
- package/src/resume-context.test.mjs +172 -0
- package/src/session-start.mjs +8 -5
- package/src/state-file.mjs +50 -6
- package/src/state-file.test.mjs +50 -0
- package/src/token-monitor.mjs +14 -10
- package/src/token-monitor.test.mjs +27 -0
- package/src/trim-cli.test.mjs +1584 -0
- package/src/trim-model.mjs +584 -0
- package/src/trim-model.test.mjs +568 -0
- package/src/turn-processor.mjs +17 -10
- package/src/vscode-task.mjs +33 -10
- package/src/vscode-task.test.mjs +19 -9
|
@@ -0,0 +1,512 @@
|
|
|
1
|
+
import assert from 'node:assert/strict';
|
|
2
|
+
import { chmodSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
|
|
3
|
+
import { tmpdir } from 'node:os';
|
|
4
|
+
import { join } from 'node:path';
|
|
5
|
+
import test from 'node:test';
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
CODEX_APP_SERVER_METHODS,
|
|
9
|
+
buildDeveloperMessageItem,
|
|
10
|
+
buildInitializeRequest,
|
|
11
|
+
buildInitializedNotification,
|
|
12
|
+
buildTextInputItem,
|
|
13
|
+
buildThreadInjectItemsRequest,
|
|
14
|
+
buildThreadReadRequest,
|
|
15
|
+
buildThreadResumeRequest,
|
|
16
|
+
buildThreadRollbackRequest,
|
|
17
|
+
buildThreadTurnsListRequest,
|
|
18
|
+
buildTurnStartRequest,
|
|
19
|
+
compareTurnCounts,
|
|
20
|
+
encodeAppServerMessage,
|
|
21
|
+
parseAppServerLine,
|
|
22
|
+
runCodexModelVisibilitySmoke,
|
|
23
|
+
runCodexRollbackModelVisiblePrepare,
|
|
24
|
+
runCodexRollbackModelVisibleVerify,
|
|
25
|
+
runCodexTrimPreflight,
|
|
26
|
+
summarizeAppServerStderr,
|
|
27
|
+
} from './codex-app-server.mjs';
|
|
28
|
+
|
|
29
|
+
test('encodeAppServerMessage writes one newline-delimited JSON object', () => {
|
|
30
|
+
assert.equal(
|
|
31
|
+
encodeAppServerMessage({ id: 1, method: 'initialize', params: { ok: true } }),
|
|
32
|
+
'{"id":1,"method":"initialize","params":{"ok":true}}\n',
|
|
33
|
+
);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test('parseAppServerLine parses responses, errors, notifications, and server requests', () => {
|
|
37
|
+
assert.deepEqual(parseAppServerLine('{"id":1,"result":{"ok":true}}'), {
|
|
38
|
+
kind: 'response',
|
|
39
|
+
id: 1,
|
|
40
|
+
result: { ok: true },
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
assert.deepEqual(parseAppServerLine('{"id":"x","error":{"code":-32600,"message":"bad"}}'), {
|
|
44
|
+
kind: 'error',
|
|
45
|
+
id: 'x',
|
|
46
|
+
error: { code: -32600, message: 'bad', data: undefined },
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
assert.deepEqual(parseAppServerLine('{"method":"thread/status/changed","params":{}}'), {
|
|
50
|
+
kind: 'notification',
|
|
51
|
+
method: 'thread/status/changed',
|
|
52
|
+
params: {},
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
assert.deepEqual(parseAppServerLine('{"id":2,"method":"client/request","params":{"a":1}}'), {
|
|
56
|
+
kind: 'request',
|
|
57
|
+
id: 2,
|
|
58
|
+
method: 'client/request',
|
|
59
|
+
params: { a: 1 },
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test('buildInitializeRequest opts into the experimental app-server API', () => {
|
|
64
|
+
assert.deepEqual(buildInitializeRequest({ id: 'init-1', version: '1.2.3' }), {
|
|
65
|
+
id: 'init-1',
|
|
66
|
+
method: CODEX_APP_SERVER_METHODS.initialize,
|
|
67
|
+
params: {
|
|
68
|
+
clientInfo: {
|
|
69
|
+
name: 'throughline',
|
|
70
|
+
title: 'Throughline',
|
|
71
|
+
version: '1.2.3',
|
|
72
|
+
},
|
|
73
|
+
capabilities: {
|
|
74
|
+
experimentalApi: true,
|
|
75
|
+
optOutNotificationMethods: [],
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
assert.deepEqual(buildInitializedNotification(), {
|
|
81
|
+
method: CODEX_APP_SERVER_METHODS.initialized,
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
test('thread request builders encode the verified rollback/inject flow', () => {
|
|
86
|
+
const threadId = 'thread-1';
|
|
87
|
+
assert.deepEqual(buildThreadReadRequest({ id: 1, threadId }), {
|
|
88
|
+
id: 1,
|
|
89
|
+
method: CODEX_APP_SERVER_METHODS.threadRead,
|
|
90
|
+
params: { threadId, includeTurns: true },
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
assert.deepEqual(
|
|
94
|
+
buildThreadResumeRequest({
|
|
95
|
+
id: 2,
|
|
96
|
+
threadId,
|
|
97
|
+
cwd: '/repo',
|
|
98
|
+
approvalPolicy: 'never',
|
|
99
|
+
sandbox: 'danger-full-access',
|
|
100
|
+
model: 'gpt-5.5',
|
|
101
|
+
}),
|
|
102
|
+
{
|
|
103
|
+
id: 2,
|
|
104
|
+
method: CODEX_APP_SERVER_METHODS.threadResume,
|
|
105
|
+
params: {
|
|
106
|
+
threadId,
|
|
107
|
+
cwd: '/repo',
|
|
108
|
+
approvalPolicy: 'never',
|
|
109
|
+
sandbox: 'danger-full-access',
|
|
110
|
+
model: 'gpt-5.5',
|
|
111
|
+
excludeTurns: false,
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
);
|
|
115
|
+
|
|
116
|
+
assert.deepEqual(buildThreadTurnsListRequest({ id: 'turns-list', threadId, limit: 50 }), {
|
|
117
|
+
id: 'turns-list',
|
|
118
|
+
method: CODEX_APP_SERVER_METHODS.threadTurnsList,
|
|
119
|
+
params: { threadId, limit: 50, sortDirection: 'asc' },
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
assert.deepEqual(buildThreadRollbackRequest({ id: 3, threadId, numTurns: 1 }), {
|
|
123
|
+
id: 3,
|
|
124
|
+
method: CODEX_APP_SERVER_METHODS.threadRollback,
|
|
125
|
+
params: { threadId, numTurns: 1 },
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
const item = buildDeveloperMessageItem('active work marker');
|
|
129
|
+
assert.deepEqual(buildThreadInjectItemsRequest({ id: 4, threadId, items: [item] }), {
|
|
130
|
+
id: 4,
|
|
131
|
+
method: CODEX_APP_SERVER_METHODS.threadInjectItems,
|
|
132
|
+
params: { threadId, items: [item] },
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test('turn and item builders match the app-server shapes observed in the spike', () => {
|
|
137
|
+
assert.deepEqual(buildTextInputItem('hello'), {
|
|
138
|
+
type: 'text',
|
|
139
|
+
text: 'hello',
|
|
140
|
+
text_elements: [],
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
assert.deepEqual(buildDeveloperMessageItem('remember this'), {
|
|
144
|
+
type: 'message',
|
|
145
|
+
role: 'developer',
|
|
146
|
+
content: [{ type: 'input_text', text: 'remember this' }],
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
assert.deepEqual(buildTurnStartRequest({ id: 1, threadId: 'thread-1', text: 'continue' }), {
|
|
150
|
+
id: 1,
|
|
151
|
+
method: CODEX_APP_SERVER_METHODS.turnStart,
|
|
152
|
+
params: {
|
|
153
|
+
threadId: 'thread-1',
|
|
154
|
+
input: [{ type: 'text', text: 'continue', text_elements: [] }],
|
|
155
|
+
},
|
|
156
|
+
});
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
test('buildThreadRollbackRequest rejects numTurns below the documented minimum', () => {
|
|
160
|
+
assert.throws(
|
|
161
|
+
() => buildThreadRollbackRequest({ id: 1, threadId: 'thread-1', numTurns: 0 }),
|
|
162
|
+
/numTurns must be an integer >= 1/,
|
|
163
|
+
);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
test('compareTurnCounts: reports match, mismatch, unchecked, and unknown states', () => {
|
|
167
|
+
assert.deepEqual(
|
|
168
|
+
compareTurnCounts({
|
|
169
|
+
expectedTurns: 2,
|
|
170
|
+
readTurns: 2,
|
|
171
|
+
resumedTurns: 2,
|
|
172
|
+
}),
|
|
173
|
+
{
|
|
174
|
+
status: 'match',
|
|
175
|
+
reason: 'rollout_and_app_server_turn_counts_match',
|
|
176
|
+
expectedTurns: 2,
|
|
177
|
+
readTurns: 2,
|
|
178
|
+
resumedTurns: 2,
|
|
179
|
+
},
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
assert.equal(
|
|
183
|
+
compareTurnCounts({
|
|
184
|
+
expectedTurns: 3,
|
|
185
|
+
readTurns: 2,
|
|
186
|
+
resumedTurns: 2,
|
|
187
|
+
}).status,
|
|
188
|
+
'mismatch',
|
|
189
|
+
);
|
|
190
|
+
assert.equal(compareTurnCounts({ readTurns: 2, resumedTurns: 2 }).status, 'unchecked');
|
|
191
|
+
assert.equal(compareTurnCounts({ expectedTurns: 2, readTurns: null, resumedTurns: 2 }).status, 'unknown');
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
test('compareTurnCounts: rejects invalid expected turn count', () => {
|
|
195
|
+
assert.throws(
|
|
196
|
+
() =>
|
|
197
|
+
compareTurnCounts({
|
|
198
|
+
expectedTurns: -1,
|
|
199
|
+
readTurns: 2,
|
|
200
|
+
resumedTurns: 2,
|
|
201
|
+
}),
|
|
202
|
+
/expectedTurns must be a non-negative integer/,
|
|
203
|
+
);
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
test('summarizeAppServerStderr: compacts repeated unknown-turn item warnings', () => {
|
|
207
|
+
const stderr = [
|
|
208
|
+
'2026-05-06T00:00:00Z WARN codex_app_server_protocol::protocol::thread_history: dropping turn-scoped item for unknown turn id `turn-a` item_id="call_1"',
|
|
209
|
+
'2026-05-06T00:00:01Z WARN codex_app_server_protocol::protocol::thread_history: dropping turn-scoped item for unknown turn id `turn-a` item_id="call_2"',
|
|
210
|
+
'unrelated warning',
|
|
211
|
+
'2026-05-06T00:00:02Z WARN codex_app_server_protocol::protocol::thread_history: dropping turn-scoped item for unknown turn id `turn-a` item_id="call_3"',
|
|
212
|
+
'',
|
|
213
|
+
].join('\n');
|
|
214
|
+
|
|
215
|
+
assert.equal(
|
|
216
|
+
summarizeAppServerStderr(stderr),
|
|
217
|
+
[
|
|
218
|
+
'2026-05-06T00:00:00Z WARN codex_app_server_protocol::protocol::thread_history: dropping turn-scoped item for unknown turn id `turn-a` item_id="call_1"',
|
|
219
|
+
'unrelated warning',
|
|
220
|
+
'[throughline] suppressed 2 repeated Codex app-server unknown-turn item warnings for turn turn-a',
|
|
221
|
+
'',
|
|
222
|
+
].join('\n'),
|
|
223
|
+
);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
test('summarizeAppServerStderr: caps very large app-server stderr', () => {
|
|
227
|
+
const stderr = `warning ${'x'.repeat(5000)}`;
|
|
228
|
+
|
|
229
|
+
const summarized = summarizeAppServerStderr(stderr);
|
|
230
|
+
|
|
231
|
+
assert(summarized.length < stderr.length);
|
|
232
|
+
assert.match(summarized, /\[throughline\] truncated \d+ chars of Codex app-server stderr/);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
test('runCodexTrimPreflight reports app-server spawn failure explicitly', async () => {
|
|
236
|
+
await assert.rejects(
|
|
237
|
+
() =>
|
|
238
|
+
runCodexTrimPreflight({
|
|
239
|
+
threadId: 'thread-1',
|
|
240
|
+
cwd: process.cwd(),
|
|
241
|
+
rollbackTurns: 1,
|
|
242
|
+
command: `/tmp/throughline-missing-codex-app-server-${process.pid}`,
|
|
243
|
+
requestTimeoutMs: 1_000,
|
|
244
|
+
}),
|
|
245
|
+
/codex app-server failed to start|codex app-server is unavailable/,
|
|
246
|
+
);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
test('runCodexModelVisibilitySmoke injects developer memory and detects marker in agent delta', async () => {
|
|
250
|
+
const dir = mkdtempSync(join(tmpdir(), 'tl-codex-visible-smoke-'));
|
|
251
|
+
try {
|
|
252
|
+
const script = join(dir, 'fake-codex-app-server.mjs');
|
|
253
|
+
writeFileSync(
|
|
254
|
+
script,
|
|
255
|
+
`#!/usr/bin/env node
|
|
256
|
+
import { createInterface } from 'node:readline';
|
|
257
|
+
const rl = createInterface({ input: process.stdin });
|
|
258
|
+
function send(message) { process.stdout.write(JSON.stringify(message) + '\\n'); }
|
|
259
|
+
rl.on('line', (line) => {
|
|
260
|
+
const msg = JSON.parse(line);
|
|
261
|
+
if (msg.method === 'initialized') return;
|
|
262
|
+
if (msg.method === 'initialize') {
|
|
263
|
+
send({ id: msg.id, result: { userAgent: 'fake-codex' } });
|
|
264
|
+
} else if (msg.method === 'thread/read' || msg.method === 'thread/resume') {
|
|
265
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns: [{ id: 'turn-1' }] } } });
|
|
266
|
+
} else if (msg.method === 'thread/inject_items') {
|
|
267
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns: [{ id: 'turn-1' }, { id: 'memory' }] } } });
|
|
268
|
+
} else if (msg.method === 'turn/start') {
|
|
269
|
+
send({ method: 'item/agentMessage/delta', params: { threadId: msg.params.threadId, turnId: 'turn-2', itemId: 'item-1', delta: 'TL_VISIBLE_MARKER' } });
|
|
270
|
+
send({ method: 'turn/completed', params: { threadId: msg.params.threadId, turn: { id: 'turn-2' } } });
|
|
271
|
+
send({ id: msg.id, result: { turn: { id: 'turn-2' } } });
|
|
272
|
+
} else {
|
|
273
|
+
send({ id: msg.id, error: { code: -32601, message: 'unknown method' } });
|
|
274
|
+
}
|
|
275
|
+
});
|
|
276
|
+
`,
|
|
277
|
+
);
|
|
278
|
+
chmodSync(script, 0o755);
|
|
279
|
+
|
|
280
|
+
const result = await runCodexModelVisibilitySmoke({
|
|
281
|
+
threadId: 'thread-visible',
|
|
282
|
+
cwd: process.cwd(),
|
|
283
|
+
memoryText: 'developer memory containing TL_VISIBLE_MARKER',
|
|
284
|
+
marker: 'TL_VISIBLE_MARKER',
|
|
285
|
+
command: script,
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
assert.equal(result.status, 'visible');
|
|
289
|
+
assert.equal(result.reason, 'marker_found_in_agent_message');
|
|
290
|
+
assert.equal(result.injectSent, true);
|
|
291
|
+
assert.equal(result.turnStartSent, true);
|
|
292
|
+
assert.match(result.agentText, /TL_VISIBLE_MARKER/);
|
|
293
|
+
assert.deepEqual(result.notifications, ['item/agentMessage/delta', 'turn/completed']);
|
|
294
|
+
} finally {
|
|
295
|
+
rmSync(dir, { recursive: true, force: true });
|
|
296
|
+
}
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
test('runCodexModelVisibilitySmoke can resume after inject before marker turn', async () => {
|
|
300
|
+
const dir = mkdtempSync(join(tmpdir(), 'tl-codex-visible-resume-smoke-'));
|
|
301
|
+
try {
|
|
302
|
+
const script = join(dir, 'fake-codex-app-server.mjs');
|
|
303
|
+
writeFileSync(
|
|
304
|
+
script,
|
|
305
|
+
`#!/usr/bin/env node
|
|
306
|
+
import { createInterface } from 'node:readline';
|
|
307
|
+
const rl = createInterface({ input: process.stdin });
|
|
308
|
+
let injected = false;
|
|
309
|
+
let resumedAfterInject = false;
|
|
310
|
+
function send(message) { process.stdout.write(JSON.stringify(message) + '\\n'); }
|
|
311
|
+
rl.on('line', (line) => {
|
|
312
|
+
const msg = JSON.parse(line);
|
|
313
|
+
if (msg.method === 'initialized') return;
|
|
314
|
+
if (msg.method === 'initialize') {
|
|
315
|
+
send({ id: msg.id, result: { userAgent: 'fake-codex' } });
|
|
316
|
+
} else if (msg.method === 'thread/read') {
|
|
317
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns: [{ id: 'turn-1' }] } } });
|
|
318
|
+
} else if (msg.method === 'thread/resume') {
|
|
319
|
+
if (injected) resumedAfterInject = true;
|
|
320
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns: [{ id: 'turn-1' }, { id: 'resume' }] } } });
|
|
321
|
+
} else if (msg.method === 'thread/inject_items') {
|
|
322
|
+
injected = true;
|
|
323
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns: [{ id: 'turn-1' }, { id: 'memory' }] } } });
|
|
324
|
+
} else if (msg.method === 'turn/start') {
|
|
325
|
+
const delta = resumedAfterInject ? 'TL_VISIBLE_AFTER_RESUME' : 'missing';
|
|
326
|
+
send({ method: 'item/agentMessage/delta', params: { threadId: msg.params.threadId, turnId: 'turn-2', itemId: 'item-1', delta } });
|
|
327
|
+
send({ id: msg.id, result: { turn: { id: 'turn-2' } } });
|
|
328
|
+
} else {
|
|
329
|
+
send({ id: msg.id, error: { code: -32601, message: 'unknown method' } });
|
|
330
|
+
}
|
|
331
|
+
});
|
|
332
|
+
`,
|
|
333
|
+
);
|
|
334
|
+
chmodSync(script, 0o755);
|
|
335
|
+
|
|
336
|
+
const result = await runCodexModelVisibilitySmoke({
|
|
337
|
+
threadId: 'thread-visible',
|
|
338
|
+
cwd: process.cwd(),
|
|
339
|
+
memoryText: 'developer memory containing TL_VISIBLE_AFTER_RESUME',
|
|
340
|
+
marker: 'TL_VISIBLE_AFTER_RESUME',
|
|
341
|
+
command: script,
|
|
342
|
+
resumeAfterInject: true,
|
|
343
|
+
});
|
|
344
|
+
|
|
345
|
+
assert.equal(result.status, 'visible');
|
|
346
|
+
assert.equal(result.resumeAfterInject, true);
|
|
347
|
+
assert.equal(result.postInjectResumedTurns, 2);
|
|
348
|
+
assert.match(result.agentText, /TL_VISIBLE_AFTER_RESUME/);
|
|
349
|
+
} finally {
|
|
350
|
+
rmSync(dir, { recursive: true, force: true });
|
|
351
|
+
}
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
test('runCodexRollbackModelVisiblePrepare starts a marker turn and rolls it back', async () => {
|
|
355
|
+
const dir = mkdtempSync(join(tmpdir(), 'tl-codex-rollback-model-visible-prepare-'));
|
|
356
|
+
try {
|
|
357
|
+
const script = join(dir, 'fake-codex-app-server.mjs');
|
|
358
|
+
writeFileSync(
|
|
359
|
+
script,
|
|
360
|
+
`#!/usr/bin/env node
|
|
361
|
+
import { createInterface } from 'node:readline';
|
|
362
|
+
const rl = createInterface({ input: process.stdin });
|
|
363
|
+
let turns = [{ id: 'turn-1' }];
|
|
364
|
+
function send(message) { process.stdout.write(JSON.stringify(message) + '\\n'); }
|
|
365
|
+
rl.on('line', (line) => {
|
|
366
|
+
const msg = JSON.parse(line);
|
|
367
|
+
if (msg.method === 'initialized') return;
|
|
368
|
+
if (msg.method === 'initialize') {
|
|
369
|
+
send({ id: msg.id, result: { userAgent: 'fake-codex' } });
|
|
370
|
+
} else if (msg.method === 'thread/read' || msg.method === 'thread/resume') {
|
|
371
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns } } });
|
|
372
|
+
} else if (msg.method === 'turn/start') {
|
|
373
|
+
turns = [...turns, { id: 'marker-turn' }];
|
|
374
|
+
send({ method: 'turn/completed', params: { threadId: msg.params.threadId, turn: { id: 'marker-turn' } } });
|
|
375
|
+
send({ id: msg.id, result: { turn: { id: 'marker-turn' } } });
|
|
376
|
+
} else if (msg.method === 'thread/rollback') {
|
|
377
|
+
turns = turns.slice(0, Math.max(0, turns.length - msg.params.numTurns));
|
|
378
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns } } });
|
|
379
|
+
} else {
|
|
380
|
+
send({ id: msg.id, error: { code: -32601, message: 'unknown method' } });
|
|
381
|
+
}
|
|
382
|
+
});
|
|
383
|
+
`,
|
|
384
|
+
);
|
|
385
|
+
chmodSync(script, 0o755);
|
|
386
|
+
|
|
387
|
+
const result = await runCodexRollbackModelVisiblePrepare({
|
|
388
|
+
threadId: 'thread-rollback-visible',
|
|
389
|
+
cwd: process.cwd(),
|
|
390
|
+
marker: 'TL_ROLLBACK_MODEL_VISIBLE_PREPARE',
|
|
391
|
+
command: script,
|
|
392
|
+
});
|
|
393
|
+
|
|
394
|
+
assert.equal(result.status, 'prepared');
|
|
395
|
+
assert.equal(result.reason, 'controlled_marker_turn_started_and_rolled_back');
|
|
396
|
+
assert.equal(result.restartSafe, false);
|
|
397
|
+
assert.equal(result.setupTurnStartSent, true);
|
|
398
|
+
assert.equal(result.setupTurnCompletedObserved, true);
|
|
399
|
+
assert.equal(result.rollbackSent, true);
|
|
400
|
+
assert.equal(result.beforeTurns, 1);
|
|
401
|
+
assert.equal(result.afterRollbackTurns, 1);
|
|
402
|
+
assert.deepEqual(result.notifications, ['turn/completed']);
|
|
403
|
+
} finally {
|
|
404
|
+
rmSync(dir, { recursive: true, force: true });
|
|
405
|
+
}
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
test('runCodexRollbackModelVisibleVerify reports not-reproduced without putting full marker in prompt', async () => {
|
|
409
|
+
const dir = mkdtempSync(join(tmpdir(), 'tl-codex-rollback-model-visible-verify-hidden-'));
|
|
410
|
+
try {
|
|
411
|
+
const script = join(dir, 'fake-codex-app-server.mjs');
|
|
412
|
+
writeFileSync(
|
|
413
|
+
script,
|
|
414
|
+
`#!/usr/bin/env node
|
|
415
|
+
import { createInterface } from 'node:readline';
|
|
416
|
+
const rl = createInterface({ input: process.stdin });
|
|
417
|
+
function send(message) { process.stdout.write(JSON.stringify(message) + '\\n'); }
|
|
418
|
+
rl.on('line', (line) => {
|
|
419
|
+
const msg = JSON.parse(line);
|
|
420
|
+
if (msg.method === 'initialized') return;
|
|
421
|
+
if (msg.method === 'initialize') {
|
|
422
|
+
send({ id: msg.id, result: { userAgent: 'fake-codex' } });
|
|
423
|
+
} else if (msg.method === 'thread/read' || msg.method === 'thread/resume') {
|
|
424
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns: [{ id: 'turn-1' }] } } });
|
|
425
|
+
} else if (msg.method === 'turn/start') {
|
|
426
|
+
const prompt = JSON.stringify(msg.params.input);
|
|
427
|
+
if (prompt.includes('TL_ROLLBACK_MODEL_VISIBLE_SECRET')) {
|
|
428
|
+
send({ id: msg.id, error: { code: -32000, message: 'full marker leaked into prompt' } });
|
|
429
|
+
return;
|
|
430
|
+
}
|
|
431
|
+
send({ method: 'item/agentMessage/delta', params: { threadId: msg.params.threadId, turnId: 'turn-2', itemId: 'item-1', delta: 'TL_ROLLBACK_MODEL_VISIBLE_NOT_VISIBLE' } });
|
|
432
|
+
send({ method: 'turn/completed', params: { threadId: msg.params.threadId, turn: { id: 'turn-2' } } });
|
|
433
|
+
send({ id: msg.id, result: { turn: { id: 'turn-2' } } });
|
|
434
|
+
} else {
|
|
435
|
+
send({ id: msg.id, error: { code: -32601, message: 'unknown method' } });
|
|
436
|
+
}
|
|
437
|
+
});
|
|
438
|
+
`,
|
|
439
|
+
);
|
|
440
|
+
chmodSync(script, 0o755);
|
|
441
|
+
|
|
442
|
+
const result = await runCodexRollbackModelVisibleVerify({
|
|
443
|
+
threadId: 'thread-rollback-visible',
|
|
444
|
+
cwd: process.cwd(),
|
|
445
|
+
marker: 'TL_ROLLBACK_MODEL_VISIBLE_SECRET',
|
|
446
|
+
command: script,
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
assert.equal(result.status, 'not-reproduced');
|
|
450
|
+
assert.equal(result.reason, 'model_reported_rolled_back_marker_not_visible');
|
|
451
|
+
assert.equal(result.promptIncludesMarker, false);
|
|
452
|
+
assert.equal(result.rolledBackMarkerModelVisible, false);
|
|
453
|
+
assert.equal(result.modelReportedNotVisible, true);
|
|
454
|
+
assert.deepEqual(result.observedMarkers, ['TL_ROLLBACK_MODEL_VISIBLE_NOT_VISIBLE']);
|
|
455
|
+
assert.match(result.agentText, /TL_ROLLBACK_MODEL_VISIBLE_NOT_VISIBLE/);
|
|
456
|
+
} finally {
|
|
457
|
+
rmSync(dir, { recursive: true, force: true });
|
|
458
|
+
}
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
test('runCodexRollbackModelVisibleVerify reports reproduced when model returns the hidden marker', async () => {
|
|
462
|
+
const dir = mkdtempSync(join(tmpdir(), 'tl-codex-rollback-model-visible-verify-visible-'));
|
|
463
|
+
try {
|
|
464
|
+
const script = join(dir, 'fake-codex-app-server.mjs');
|
|
465
|
+
writeFileSync(
|
|
466
|
+
script,
|
|
467
|
+
`#!/usr/bin/env node
|
|
468
|
+
import { createInterface } from 'node:readline';
|
|
469
|
+
const rl = createInterface({ input: process.stdin });
|
|
470
|
+
function send(message) { process.stdout.write(JSON.stringify(message) + '\\n'); }
|
|
471
|
+
rl.on('line', (line) => {
|
|
472
|
+
const msg = JSON.parse(line);
|
|
473
|
+
if (msg.method === 'initialized') return;
|
|
474
|
+
if (msg.method === 'initialize') {
|
|
475
|
+
send({ id: msg.id, result: { userAgent: 'fake-codex' } });
|
|
476
|
+
} else if (msg.method === 'thread/read' || msg.method === 'thread/resume') {
|
|
477
|
+
send({ id: msg.id, result: { thread: { id: msg.params.threadId, turns: [{ id: 'turn-1' }] } } });
|
|
478
|
+
} else if (msg.method === 'turn/start') {
|
|
479
|
+
const prompt = JSON.stringify(msg.params.input);
|
|
480
|
+
if (prompt.includes('TL_ROLLBACK_MODEL_VISIBLE_SECRET')) {
|
|
481
|
+
send({ id: msg.id, error: { code: -32000, message: 'full marker leaked into prompt' } });
|
|
482
|
+
return;
|
|
483
|
+
}
|
|
484
|
+
send({ method: 'item/agentMessage/delta', params: { threadId: msg.params.threadId, turnId: 'turn-2', itemId: 'item-1', delta: 'TL_ROLLBACK_MODEL_VISIBLE_SECRET' } });
|
|
485
|
+
send({ method: 'turn/completed', params: { threadId: msg.params.threadId, turn: { id: 'turn-2' } } });
|
|
486
|
+
send({ id: msg.id, result: { turn: { id: 'turn-2' } } });
|
|
487
|
+
} else {
|
|
488
|
+
send({ id: msg.id, error: { code: -32601, message: 'unknown method' } });
|
|
489
|
+
}
|
|
490
|
+
});
|
|
491
|
+
`,
|
|
492
|
+
);
|
|
493
|
+
chmodSync(script, 0o755);
|
|
494
|
+
|
|
495
|
+
const result = await runCodexRollbackModelVisibleVerify({
|
|
496
|
+
threadId: 'thread-rollback-visible',
|
|
497
|
+
cwd: process.cwd(),
|
|
498
|
+
marker: 'TL_ROLLBACK_MODEL_VISIBLE_SECRET',
|
|
499
|
+
command: script,
|
|
500
|
+
});
|
|
501
|
+
|
|
502
|
+
assert.equal(result.status, 'reproduced');
|
|
503
|
+
assert.equal(result.reason, 'rolled_back_marker_returned_by_model');
|
|
504
|
+
assert.equal(result.promptIncludesMarker, false);
|
|
505
|
+
assert.equal(result.rolledBackMarkerModelVisible, true);
|
|
506
|
+
assert.equal(result.modelReportedNotVisible, false);
|
|
507
|
+
assert.deepEqual(result.observedMarkers, ['TL_ROLLBACK_MODEL_VISIBLE_SECRET']);
|
|
508
|
+
assert.match(result.agentText, /TL_ROLLBACK_MODEL_VISIBLE_SECRET/);
|
|
509
|
+
} finally {
|
|
510
|
+
rmSync(dir, { recursive: true, force: true });
|
|
511
|
+
}
|
|
512
|
+
});
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import { runCodexTrimExecution } from './codex-app-server.mjs';
|
|
2
|
+
import { buildCodexRolloutTrimSource } from './codex-rollout-memory.mjs';
|
|
3
|
+
import { buildTrimPlan } from './trim-model.mjs';
|
|
4
|
+
|
|
5
|
+
export const CODEX_AUTO_REFRESH_THRESHOLD = 0.9;
|
|
6
|
+
|
|
7
|
+
export function evaluateCodexAutoRefreshUsage(usage, { threshold = CODEX_AUTO_REFRESH_THRESHOLD } = {}) {
|
|
8
|
+
if (!usage) {
|
|
9
|
+
return {
|
|
10
|
+
shouldRefresh: false,
|
|
11
|
+
reason: 'usage_unavailable',
|
|
12
|
+
threshold,
|
|
13
|
+
ratio: null,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (usage.estimated) {
|
|
18
|
+
return {
|
|
19
|
+
shouldRefresh: false,
|
|
20
|
+
reason: 'estimated_usage_not_allowed',
|
|
21
|
+
threshold,
|
|
22
|
+
ratio: null,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (usage.contextWindowEstimated) {
|
|
27
|
+
return {
|
|
28
|
+
shouldRefresh: false,
|
|
29
|
+
reason: 'estimated_context_window_not_allowed',
|
|
30
|
+
threshold,
|
|
31
|
+
ratio: null,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const tokens = Number(usage.tokens);
|
|
36
|
+
const contextWindowSize = Number(usage.contextWindowSize);
|
|
37
|
+
if (!Number.isFinite(tokens) || tokens < 0) {
|
|
38
|
+
return {
|
|
39
|
+
shouldRefresh: false,
|
|
40
|
+
reason: 'invalid_usage_tokens',
|
|
41
|
+
threshold,
|
|
42
|
+
ratio: null,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
if (!Number.isFinite(contextWindowSize) || contextWindowSize <= 0) {
|
|
46
|
+
return {
|
|
47
|
+
shouldRefresh: false,
|
|
48
|
+
reason: 'invalid_context_window',
|
|
49
|
+
threshold,
|
|
50
|
+
ratio: null,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const ratio = tokens / contextWindowSize;
|
|
55
|
+
return {
|
|
56
|
+
shouldRefresh: ratio >= threshold,
|
|
57
|
+
reason: ratio >= threshold ? 'threshold_reached' : 'below_threshold',
|
|
58
|
+
threshold,
|
|
59
|
+
ratio,
|
|
60
|
+
tokens,
|
|
61
|
+
contextWindowSize,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export async function runCodexAutoRefresh({
|
|
66
|
+
db,
|
|
67
|
+
threadId,
|
|
68
|
+
codexThreadIdSource = null,
|
|
69
|
+
codexHome = undefined,
|
|
70
|
+
projectPath = process.cwd(),
|
|
71
|
+
sessionId = null,
|
|
72
|
+
usage = null,
|
|
73
|
+
threshold = CODEX_AUTO_REFRESH_THRESHOLD,
|
|
74
|
+
command = process.env.THROUGHLINE_CODEX_APP_SERVER_BIN ?? 'codex',
|
|
75
|
+
deps = {},
|
|
76
|
+
} = {}) {
|
|
77
|
+
if (!db) throw new Error('runCodexAutoRefresh: db is required');
|
|
78
|
+
if (typeof threadId !== 'string' || threadId.length === 0) {
|
|
79
|
+
throw new Error('runCodexAutoRefresh: threadId is required');
|
|
80
|
+
}
|
|
81
|
+
if (typeof projectPath !== 'string' || projectPath.length === 0) {
|
|
82
|
+
throw new Error('runCodexAutoRefresh: projectPath is required');
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const decision = evaluateCodexAutoRefreshUsage(usage, { threshold });
|
|
86
|
+
if (!decision.shouldRefresh) {
|
|
87
|
+
return {
|
|
88
|
+
status: 'skipped',
|
|
89
|
+
reason: decision.reason,
|
|
90
|
+
decision,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const buildTrimSource = deps.buildTrimSource ?? buildCodexRolloutTrimSource;
|
|
95
|
+
const buildPlan = deps.buildTrimPlan ?? buildTrimPlan;
|
|
96
|
+
const executeTrim = deps.runTrimExecution ?? runCodexTrimExecution;
|
|
97
|
+
const trimSource = buildTrimSource({
|
|
98
|
+
threadId,
|
|
99
|
+
codexHome,
|
|
100
|
+
projectPath,
|
|
101
|
+
sourceReason:
|
|
102
|
+
codexThreadIdSource && codexThreadIdSource.startsWith('payload:')
|
|
103
|
+
? 'payload_codex_thread_rollout'
|
|
104
|
+
: 'auto_refresh_codex_thread_rollout',
|
|
105
|
+
});
|
|
106
|
+
if (!trimSource) {
|
|
107
|
+
return {
|
|
108
|
+
status: 'skipped',
|
|
109
|
+
reason: 'codex_rollout_unavailable',
|
|
110
|
+
decision,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const plan = buildPlan(db, {
|
|
115
|
+
sessionId: sessionId ?? `codex:${threadId}`,
|
|
116
|
+
projectPath,
|
|
117
|
+
host: 'codex',
|
|
118
|
+
trimAll: true,
|
|
119
|
+
codexThreadId: threadId,
|
|
120
|
+
codexThreadIdSource,
|
|
121
|
+
trimSource,
|
|
122
|
+
});
|
|
123
|
+
if (plan.status === 'unavailable') {
|
|
124
|
+
return {
|
|
125
|
+
status: 'skipped',
|
|
126
|
+
reason: plan.reason,
|
|
127
|
+
decision,
|
|
128
|
+
plan,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
if (plan.trim.rollbackTurns < 1) {
|
|
132
|
+
return {
|
|
133
|
+
status: 'skipped',
|
|
134
|
+
reason: 'nothing_to_trim',
|
|
135
|
+
decision,
|
|
136
|
+
plan,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
if (!hasInjectableMemory(plan.memoryPreview)) {
|
|
140
|
+
return {
|
|
141
|
+
status: 'skipped',
|
|
142
|
+
reason: 'injectable_memory_required',
|
|
143
|
+
decision,
|
|
144
|
+
plan,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const execution = await executeTrim({
|
|
149
|
+
threadId,
|
|
150
|
+
cwd: projectPath,
|
|
151
|
+
rollbackTurns: plan.trim.rollbackTurns,
|
|
152
|
+
memoryText: plan.memoryPreview.text,
|
|
153
|
+
expectedTurns: plan.trim.source === 'codex-rollout' ? plan.trim.capturedTurns : null,
|
|
154
|
+
command,
|
|
155
|
+
});
|
|
156
|
+
if (execution.status === 'refused') {
|
|
157
|
+
return {
|
|
158
|
+
status: 'refused',
|
|
159
|
+
reason: execution.reason,
|
|
160
|
+
decision,
|
|
161
|
+
plan,
|
|
162
|
+
execution,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const postInjectVisibilityStatus = execution.postInjectVisibilityCheck?.status ?? 'unchecked';
|
|
167
|
+
if (postInjectVisibilityStatus !== 'match') {
|
|
168
|
+
return {
|
|
169
|
+
status: 'unverified',
|
|
170
|
+
reason: execution.postInjectVisibilityCheck?.reason ?? 'post_inject_visibility_unverified',
|
|
171
|
+
decision,
|
|
172
|
+
plan,
|
|
173
|
+
execution,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
status: 'refreshed-live',
|
|
179
|
+
reason: 'rollback_and_inject_sent_live',
|
|
180
|
+
decision,
|
|
181
|
+
plan,
|
|
182
|
+
execution,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function hasInjectableMemory(memoryPreview) {
|
|
187
|
+
const text = memoryPreview?.text;
|
|
188
|
+
return (
|
|
189
|
+
memoryPreview?.stats?.source === 'throughline-db' &&
|
|
190
|
+
typeof text === 'string' &&
|
|
191
|
+
text.trim().length > 0 &&
|
|
192
|
+
text !== '(no captured memory available)'
|
|
193
|
+
);
|
|
194
|
+
}
|