@excitedjs/agent-runtime-codex 0.2.0-alpha.g0ddd418597ca
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +44 -0
- package/dist/approval.d.ts +30 -0
- package/dist/approval.d.ts.map +1 -0
- package/dist/approval.js +42 -0
- package/dist/approval.js.map +1 -0
- package/dist/args.d.ts +55 -0
- package/dist/args.d.ts.map +1 -0
- package/dist/args.js +113 -0
- package/dist/args.js.map +1 -0
- package/dist/bin.d.ts +14 -0
- package/dist/bin.d.ts.map +1 -0
- package/dist/bin.js +18 -0
- package/dist/bin.js.map +1 -0
- package/dist/codex-home.d.ts +42 -0
- package/dist/codex-home.d.ts.map +1 -0
- package/dist/codex-home.js +112 -0
- package/dist/codex-home.js.map +1 -0
- package/dist/config.d.ts +76 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +106 -0
- package/dist/config.js.map +1 -0
- package/dist/diagnostic.d.ts +14 -0
- package/dist/diagnostic.d.ts.map +1 -0
- package/dist/diagnostic.js +58 -0
- package/dist/diagnostic.js.map +1 -0
- package/dist/events.d.ts +88 -0
- package/dist/events.d.ts.map +1 -0
- package/dist/events.js +225 -0
- package/dist/events.js.map +1 -0
- package/dist/handshake.d.ts +44 -0
- package/dist/handshake.d.ts.map +1 -0
- package/dist/handshake.js +85 -0
- package/dist/handshake.js.map +1 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/internal/completion-body.d.ts +38 -0
- package/dist/internal/completion-body.d.ts.map +1 -0
- package/dist/internal/completion-body.js +62 -0
- package/dist/internal/completion-body.js.map +1 -0
- package/dist/internal/config-validate.d.ts +23 -0
- package/dist/internal/config-validate.d.ts.map +1 -0
- package/dist/internal/config-validate.js +122 -0
- package/dist/internal/config-validate.js.map +1 -0
- package/dist/internal/os.d.ts +30 -0
- package/dist/internal/os.d.ts.map +1 -0
- package/dist/internal/os.js +81 -0
- package/dist/internal/os.js.map +1 -0
- package/dist/internal/socket.d.ts +23 -0
- package/dist/internal/socket.d.ts.map +1 -0
- package/dist/internal/socket.js +74 -0
- package/dist/internal/socket.js.map +1 -0
- package/dist/internal/turn-render.d.ts +22 -0
- package/dist/internal/turn-render.d.ts.map +1 -0
- package/dist/internal/turn-render.js +40 -0
- package/dist/internal/turn-render.js.map +1 -0
- package/dist/mcp-config.d.ts +9 -0
- package/dist/mcp-config.d.ts.map +1 -0
- package/dist/mcp-config.js +21 -0
- package/dist/mcp-config.js.map +1 -0
- package/dist/paths.d.ts +7 -0
- package/dist/paths.d.ts.map +1 -0
- package/dist/paths.js +26 -0
- package/dist/paths.js.map +1 -0
- package/dist/provider-ref.d.ts +8 -0
- package/dist/provider-ref.d.ts.map +1 -0
- package/dist/provider-ref.js +8 -0
- package/dist/provider-ref.js.map +1 -0
- package/dist/provider.d.ts +71 -0
- package/dist/provider.d.ts.map +1 -0
- package/dist/provider.js +137 -0
- package/dist/provider.js.map +1 -0
- package/dist/rpc.d.ts +65 -0
- package/dist/rpc.d.ts.map +1 -0
- package/dist/rpc.js +200 -0
- package/dist/rpc.js.map +1 -0
- package/dist/runtime-support.d.ts +27 -0
- package/dist/runtime-support.d.ts.map +1 -0
- package/dist/runtime-support.js +57 -0
- package/dist/runtime-support.js.map +1 -0
- package/dist/runtime.d.ts +246 -0
- package/dist/runtime.d.ts.map +1 -0
- package/dist/runtime.js +648 -0
- package/dist/runtime.js.map +1 -0
- package/dist/supervisor.d.ts +55 -0
- package/dist/supervisor.d.ts.map +1 -0
- package/dist/supervisor.js +183 -0
- package/dist/supervisor.js.map +1 -0
- package/dist/turn-manager.d.ts +92 -0
- package/dist/turn-manager.d.ts.map +1 -0
- package/dist/turn-manager.js +271 -0
- package/dist/turn-manager.js.map +1 -0
- package/dist/types.d.ts +143 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +10 -0
- package/dist/types.js.map +1 -0
- package/dist/version.d.ts +16 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +33 -0
- package/dist/version.js.map +1 -0
- package/package.json +56 -0
package/dist/runtime.js
ADDED
|
@@ -0,0 +1,648 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CodexRuntime — one running Codex-backed AgentRuntime instance.
|
|
3
|
+
*
|
|
4
|
+
* Owns:
|
|
5
|
+
* - CodexProcess (child app-server)
|
|
6
|
+
* - CodexWsClient (WS connection)
|
|
7
|
+
* - thread_id (lazily created via thread/start or resumed)
|
|
8
|
+
* - TurnManager (FIFO worker for this dispatcher)
|
|
9
|
+
*
|
|
10
|
+
* Lifecycle: declared → starting → ready → (degraded) → stopping → stopped.
|
|
11
|
+
*
|
|
12
|
+
* Current MVP:
|
|
13
|
+
* - accepted inbound work is process-local and is dropped on restart;
|
|
14
|
+
* - thread/resume failure does not degrade the whole dispatcher; we
|
|
15
|
+
* start a fresh thread, record the lost one in last_lost_thread_id,
|
|
16
|
+
* and post a visible warning to the next source chat.
|
|
17
|
+
*/
|
|
18
|
+
import { dirname, join } from 'node:path';
|
|
19
|
+
import { CodexProcess, } from './supervisor.js';
|
|
20
|
+
import { CodexWsClient } from './rpc.js';
|
|
21
|
+
import { performInitializeHandshake } from './handshake.js';
|
|
22
|
+
import { TurnManager, } from './turn-manager.js';
|
|
23
|
+
import { injectThreadItems } from './events.js';
|
|
24
|
+
import { renderChannelInput } from '@excitedjs/dreamux-utils';
|
|
25
|
+
import { createFailFastApprovalHandler } from './approval.js';
|
|
26
|
+
import { BUILTIN_CODEX_PROVIDER_REF } from './provider-ref.js';
|
|
27
|
+
import { CODEX_AGENT_RUNTIME_CAPABILITIES } from './provider.js';
|
|
28
|
+
import { buildCodexCompletionItem, CODEX_COMPLETION_TRIGGER_TEXT, codexProcessEnv, } from './runtime-support.js';
|
|
29
|
+
/**
|
|
30
|
+
* The skill-source `layout` this runtime knows how to apply. `path` is one
|
|
31
|
+
* skill's own directory (containing `SKILL.md`); codex's `skills/extraRoots/set`
|
|
32
|
+
* takes the *parent* of such a dir as a skills root (a root whose immediate
|
|
33
|
+
* children are skill dirs — verified against codex 0.137's app-server schema).
|
|
34
|
+
* Sources with any other layout are ignored by the codex mapping.
|
|
35
|
+
*/
|
|
36
|
+
const CODEX_SKILL_DIR_LAYOUT = 'skill-dir';
|
|
37
|
+
const DEFAULT_RESTART_BACKOFF_BASE_MS = 1000;
|
|
38
|
+
const DEFAULT_RESTART_BACKOFF_MAX_MS = 30_000;
|
|
39
|
+
const COMPLETION_ID_CACHE_LIMIT = 256;
|
|
40
|
+
export class CodexRuntime {
|
|
41
|
+
identity;
|
|
42
|
+
deps;
|
|
43
|
+
providerRef = BUILTIN_CODEX_PROVIDER_REF;
|
|
44
|
+
process = null;
|
|
45
|
+
client = null;
|
|
46
|
+
turnManager = null;
|
|
47
|
+
threadId = null;
|
|
48
|
+
/**
|
|
49
|
+
* Whether the most recent thread resolution resumed an existing Codex thread
|
|
50
|
+
* (true) rather than starting a fresh one or recovering from a failed resume.
|
|
51
|
+
* Consulted by the server right after the slot is ready to decide whether a
|
|
52
|
+
* `daemon restart` notice should be injected (issue #78).
|
|
53
|
+
*/
|
|
54
|
+
threadResumed = false;
|
|
55
|
+
status = 'declared';
|
|
56
|
+
/** Monotonic per-attempt suffix for TeamMate delivery turn dedup ids (#110 PR8). */
|
|
57
|
+
teammateDeliverySeq = 0;
|
|
58
|
+
/**
|
|
59
|
+
* Completion deliveries currently being processed. Duplicate settled events can
|
|
60
|
+
* race into `completionInput`; coalescing by completion id keeps one logical
|
|
61
|
+
* completion from injecting or triggering more than once concurrently.
|
|
62
|
+
*/
|
|
63
|
+
inFlightCompletionDeliveries = new Map();
|
|
64
|
+
/**
|
|
65
|
+
* Completion ids whose trigger turn has already been accepted. A later replay
|
|
66
|
+
* of the same settled teammate turn is an idempotent success, not a new wake-up.
|
|
67
|
+
*/
|
|
68
|
+
acceptedCompletionIds = new Set();
|
|
69
|
+
acceptedCompletionOrder = [];
|
|
70
|
+
/**
|
|
71
|
+
* Completion ids whose item has already been injected into the thread. The
|
|
72
|
+
* Dispatcher Service retries `completionInput` on `failed`; if the inject
|
|
73
|
+
* succeeded but the trigger turn failed, the retry must NOT re-inject the same
|
|
74
|
+
* item (that would persist a duplicate completion to the rollout). Bounded so
|
|
75
|
+
* a long-lived dispatcher does not grow this set without limit.
|
|
76
|
+
*/
|
|
77
|
+
injectedCompletionIds = new Set();
|
|
78
|
+
injectedCompletionOrder = [];
|
|
79
|
+
log;
|
|
80
|
+
stopping = false;
|
|
81
|
+
restarting = false;
|
|
82
|
+
restartAttempts = 0;
|
|
83
|
+
restartTimer = null;
|
|
84
|
+
lastResult = null;
|
|
85
|
+
state;
|
|
86
|
+
paths;
|
|
87
|
+
constructor(identity, deps) {
|
|
88
|
+
this.identity = identity;
|
|
89
|
+
this.deps = deps;
|
|
90
|
+
const logger = deps.logger;
|
|
91
|
+
this.log =
|
|
92
|
+
logger !== undefined
|
|
93
|
+
? (lvl, msg, err) => logger[lvl](msg, err !== undefined ? { err } : undefined)
|
|
94
|
+
: (lvl, msg, err) => {
|
|
95
|
+
const prefix = `[dispatcher ${identity.runtime_id}] ${lvl}`;
|
|
96
|
+
if (err !== undefined)
|
|
97
|
+
console.error(prefix, msg, err);
|
|
98
|
+
else
|
|
99
|
+
console.error(prefix, msg);
|
|
100
|
+
};
|
|
101
|
+
this.threadId = identity.checkpoint_id ?? null;
|
|
102
|
+
this.state = deps.state;
|
|
103
|
+
this.paths = deps.paths;
|
|
104
|
+
}
|
|
105
|
+
get dispatcherId() {
|
|
106
|
+
return this.identity.runtime_id;
|
|
107
|
+
}
|
|
108
|
+
getStatus() {
|
|
109
|
+
return this.status;
|
|
110
|
+
}
|
|
111
|
+
getCapabilities() {
|
|
112
|
+
return CODEX_AGENT_RUNTIME_CAPABILITIES;
|
|
113
|
+
}
|
|
114
|
+
getThreadId() {
|
|
115
|
+
return this.threadId;
|
|
116
|
+
}
|
|
117
|
+
/** True when the live thread was resumed (not freshly started/recovered). */
|
|
118
|
+
wasThreadResumed() {
|
|
119
|
+
return this.threadResumed;
|
|
120
|
+
}
|
|
121
|
+
async getLast() {
|
|
122
|
+
return this.lastResult;
|
|
123
|
+
}
|
|
124
|
+
async getContext() {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
async resume(input = {}) {
|
|
128
|
+
if (input.checkpoint !== undefined && input.checkpoint !== null) {
|
|
129
|
+
if (input.checkpoint.kind !== 'codexThread') {
|
|
130
|
+
throw new Error(`unsupported resume checkpoint for Codex runtime: ${input.checkpoint.kind}`);
|
|
131
|
+
}
|
|
132
|
+
this.threadId = input.checkpoint.id;
|
|
133
|
+
}
|
|
134
|
+
await this.start();
|
|
135
|
+
}
|
|
136
|
+
async submitRestartNotice(text) {
|
|
137
|
+
if (this.turnManager === null)
|
|
138
|
+
return { status: 'stopped' };
|
|
139
|
+
const result = await this.turnManager.injectNotice(text);
|
|
140
|
+
if (result.status === 'submitted') {
|
|
141
|
+
this.log('info', 'restart notice injected into resumed thread');
|
|
142
|
+
}
|
|
143
|
+
else if (result.status === 'skipped') {
|
|
144
|
+
this.log('info', 'restart notice skipped; a live inbound already arrived');
|
|
145
|
+
}
|
|
146
|
+
return result;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Bring the dispatcher up. Order:
|
|
150
|
+
* 1. spawn codex app-server child
|
|
151
|
+
* 2. open WS client
|
|
152
|
+
* 3. install fail-fast approval handler
|
|
153
|
+
* 4. thread/start (new) or thread/resume (existing)
|
|
154
|
+
* 5. install turn manager
|
|
155
|
+
* 6. status = ready
|
|
156
|
+
*/
|
|
157
|
+
async start() {
|
|
158
|
+
this.stopping = false;
|
|
159
|
+
this.restarting = false;
|
|
160
|
+
this.clearRestartTimer();
|
|
161
|
+
this.setStatus('starting');
|
|
162
|
+
await this.state.setStatus(this.dispatcherId, 'starting', {
|
|
163
|
+
last_started_at: Date.now(),
|
|
164
|
+
});
|
|
165
|
+
try {
|
|
166
|
+
await this.startCodexRuntime();
|
|
167
|
+
await this.markReady();
|
|
168
|
+
}
|
|
169
|
+
catch (err) {
|
|
170
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
171
|
+
this.log('error', `start failed: ${msg}`, err);
|
|
172
|
+
this.setStatus('degraded');
|
|
173
|
+
await this.state.setStatus(this.dispatcherId, 'degraded', {
|
|
174
|
+
last_error: msg,
|
|
175
|
+
});
|
|
176
|
+
await this.cleanupOnFailure();
|
|
177
|
+
throw err;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
async startCodexRuntime() {
|
|
181
|
+
const cwd = this.deps.cwd;
|
|
182
|
+
// Fresh random rendezvous socket per start (issue #182): held in memory
|
|
183
|
+
// only — never persisted to durable state, never derived from state paths.
|
|
184
|
+
const socketPath = this.deps.allocateSocketPath(this.dispatcherId);
|
|
185
|
+
const extraArgs = this.deps.resolveExtraArgs?.() ?? [];
|
|
186
|
+
if (this.deps.codexHomeDoctor !== undefined) {
|
|
187
|
+
await this.deps.codexHomeDoctor({ runtimeId: this.dispatcherId, cwd });
|
|
188
|
+
}
|
|
189
|
+
// Compose the codex app-server log subpaths under the neutral central logs
|
|
190
|
+
// root (B2): core no longer names a per-runtime log file. The host supplies
|
|
191
|
+
// a unique, filesystem-safe `runtime_id`, so `<logsDir>/codex-app-server/
|
|
192
|
+
// <id>.log` is collision-free across dispatchers and teammates.
|
|
193
|
+
const codexLogDir = join(this.paths.logsDir(), 'codex-app-server');
|
|
194
|
+
const factory = this.deps.codexProcessFactory ?? ((o) => new CodexProcess(o));
|
|
195
|
+
const process = factory({
|
|
196
|
+
socketPath,
|
|
197
|
+
cwd,
|
|
198
|
+
stdoutLogPath: join(codexLogDir, `${this.dispatcherId}.log`),
|
|
199
|
+
stderrLogPath: join(codexLogDir, `${this.dispatcherId}.stderr.log`),
|
|
200
|
+
binPath: this.deps.codexBinPath,
|
|
201
|
+
extraArgs,
|
|
202
|
+
env: codexProcessEnv(this.deps.injectEnv, this.deps.extraEnv),
|
|
203
|
+
});
|
|
204
|
+
this.process = process;
|
|
205
|
+
process.onExit((exit) => {
|
|
206
|
+
if (this.process !== process)
|
|
207
|
+
return;
|
|
208
|
+
this.handleChildExit(exit);
|
|
209
|
+
});
|
|
210
|
+
await process.start();
|
|
211
|
+
const clientFactory = this.deps.codexClientFactory ?? ((sock) => new CodexWsClient({ socketPath: sock }));
|
|
212
|
+
const client = clientFactory(socketPath);
|
|
213
|
+
this.client = client;
|
|
214
|
+
client.onClose((reason) => {
|
|
215
|
+
if (this.client !== client)
|
|
216
|
+
return;
|
|
217
|
+
this.handleClientClose(reason);
|
|
218
|
+
});
|
|
219
|
+
await client.ready();
|
|
220
|
+
const approvalHandler = createFailFastApprovalHandler({
|
|
221
|
+
onReject: async (req) => {
|
|
222
|
+
this.log('warn', `rejected Codex approval request '${req.method}'; Feishu outbound is MCP reply-only`);
|
|
223
|
+
},
|
|
224
|
+
});
|
|
225
|
+
this.client.setServerRequestHandler(approvalHandler);
|
|
226
|
+
// codex 0.134+ LSP-style handshake — must precede thread/start or
|
|
227
|
+
// any other RPC, otherwise codex answers everything with
|
|
228
|
+
// `Not initialized` (see src/codex/handshake.ts).
|
|
229
|
+
const initResponse = await performInitializeHandshake(this.client, {
|
|
230
|
+
...(this.deps.handshakeTimeoutMs !== undefined
|
|
231
|
+
? { timeoutMs: this.deps.handshakeTimeoutMs }
|
|
232
|
+
: {}),
|
|
233
|
+
});
|
|
234
|
+
this.log('info', `codex initialized: ${initResponse.userAgent} (home=${initResponse.codexHome}, ${initResponse.platformOs})`);
|
|
235
|
+
// Role-gated bundled skills (issue #209 slice 6): set the extra skill roots
|
|
236
|
+
// AFTER initialize and BEFORE thread start/resume, and on every restart this
|
|
237
|
+
// method runs again so the roots are reapplied to the fresh app-server.
|
|
238
|
+
await this.applySkillExtraRoots();
|
|
239
|
+
await this.resolveThread();
|
|
240
|
+
this.turnManager = new TurnManager({
|
|
241
|
+
dispatcherId: this.dispatcherId,
|
|
242
|
+
getThreadId: () => this.threadId,
|
|
243
|
+
client: this.client,
|
|
244
|
+
onTurnCompleted: (turn) => this.recordCollectedTurn(turn),
|
|
245
|
+
onTurnSettled: this.deps.onTurnSettled,
|
|
246
|
+
log: this.log,
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Apply the role-gated bundled skill sources to the live app-server via
|
|
251
|
+
* `skills/extraRoots/set`. Codex treats each extra root as a directory whose
|
|
252
|
+
* immediate children are skill dirs, so a `skill-dir` source maps to the
|
|
253
|
+
* *parent* of its own directory; roots are deduped (the bundled Dreamux skills
|
|
254
|
+
* share one parent). Empty input skips the RPC entirely (a fresh per-runtime
|
|
255
|
+
* app-server starts with no extra roots, so nothing to clear).
|
|
256
|
+
*
|
|
257
|
+
* Error handling distinguishes two failure modes (issue #209 slice 6 repair):
|
|
258
|
+
* 1. The app-server does not implement `skills/extraRoots/set` at all — a
|
|
259
|
+
* capability/version skew against an older codex backend (it answers with
|
|
260
|
+
* an `unknown variant`/method-not-found error). This is NOT a real
|
|
261
|
+
* failure: fail open, warn, and continue skill-blind rather than bricking
|
|
262
|
+
* startup against every backend that predates the RPC.
|
|
263
|
+
* 2. The RPC exists but applying the given roots genuinely failed — fail
|
|
264
|
+
* loud, exactly as before, so real misconfiguration is not masked.
|
|
265
|
+
*/
|
|
266
|
+
async applySkillExtraRoots() {
|
|
267
|
+
if (this.client === null)
|
|
268
|
+
throw new Error('client not initialized');
|
|
269
|
+
const sources = this.deps.skillSources ?? [];
|
|
270
|
+
if (sources.length === 0)
|
|
271
|
+
return;
|
|
272
|
+
const extraRoots = [
|
|
273
|
+
...new Set(sources
|
|
274
|
+
.filter((s) => s.layout === CODEX_SKILL_DIR_LAYOUT)
|
|
275
|
+
.map((s) => dirname(s.path))),
|
|
276
|
+
];
|
|
277
|
+
if (extraRoots.length === 0)
|
|
278
|
+
return;
|
|
279
|
+
try {
|
|
280
|
+
await this.client.request('skills/extraRoots/set', { extraRoots });
|
|
281
|
+
}
|
|
282
|
+
catch (err) {
|
|
283
|
+
if (isUnsupportedRpcMethodError(err)) {
|
|
284
|
+
this.log('warn', `skills/extraRoots/set unsupported by this app-server; continuing skill-blind (${extraRoots.length} extra root(s) not applied)`, err);
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
throw err;
|
|
288
|
+
}
|
|
289
|
+
this.log('info', `applied ${extraRoots.length} skill extra root(s): ${extraRoots.join(', ')}`);
|
|
290
|
+
}
|
|
291
|
+
async resolveThread() {
|
|
292
|
+
if (this.client === null)
|
|
293
|
+
throw new Error('client not initialized');
|
|
294
|
+
// Each resolution recomputes whether we resumed; a fresh start or a
|
|
295
|
+
// resume-failure recovery must not look like a resume to the notice gate.
|
|
296
|
+
this.threadResumed = false;
|
|
297
|
+
const existing = this.threadId ?? this.identity.checkpoint_id ?? null;
|
|
298
|
+
if (existing === null) {
|
|
299
|
+
// Fresh thread.
|
|
300
|
+
const params = {
|
|
301
|
+
baseInstructions: this.deps.systemPromptContent,
|
|
302
|
+
};
|
|
303
|
+
const res = await this.client.request('thread/start', params);
|
|
304
|
+
this.threadId = res.thread.id;
|
|
305
|
+
await this.state.setThreadId(this.dispatcherId, this.threadId);
|
|
306
|
+
this.log('info', `started fresh thread ${this.threadId}`);
|
|
307
|
+
return;
|
|
308
|
+
}
|
|
309
|
+
try {
|
|
310
|
+
const params = {
|
|
311
|
+
threadId: existing,
|
|
312
|
+
baseInstructions: this.deps.systemPromptContent,
|
|
313
|
+
};
|
|
314
|
+
await this.client.request('thread/resume', params);
|
|
315
|
+
this.threadId = existing;
|
|
316
|
+
this.threadResumed = true;
|
|
317
|
+
this.log('info', `resumed thread ${this.threadId}`);
|
|
318
|
+
}
|
|
319
|
+
catch (err) {
|
|
320
|
+
// Visible degradation (issue #2 Q11): start a fresh thread, record loss.
|
|
321
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
322
|
+
this.log('warn', `thread/resume failed for ${existing}: ${msg}; starting fresh thread`);
|
|
323
|
+
const res = await this.client.request('thread/start', { baseInstructions: this.deps.systemPromptContent });
|
|
324
|
+
this.threadId = res.thread.id;
|
|
325
|
+
if (this.state.recordLostThread !== undefined) {
|
|
326
|
+
await this.state.recordLostThread(this.dispatcherId, existing, this.threadId, `thread/resume failed: ${msg}`);
|
|
327
|
+
}
|
|
328
|
+
else {
|
|
329
|
+
await this.state.setThreadId(this.dispatcherId, this.threadId);
|
|
330
|
+
await this.state.setStatus(this.dispatcherId, 'degraded', {
|
|
331
|
+
last_error: `thread/resume failed: ${msg}`,
|
|
332
|
+
});
|
|
333
|
+
}
|
|
334
|
+
// Park a warning to be delivered with the next outbound — best-effort
|
|
335
|
+
// queue note. For MVP we just log; full user-visible delivery on next
|
|
336
|
+
// inbound is a follow-up (see PR review).
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
/**
|
|
340
|
+
* Submit any accepted inbound message arriving for this dispatcher. Called by
|
|
341
|
+
* the Feishu inbound layer.
|
|
342
|
+
*/
|
|
343
|
+
async channelInput(input, hooks = {}) {
|
|
344
|
+
if (this.turnManager === null) {
|
|
345
|
+
return { status: 'failed', error: new Error('turn manager not initialized') };
|
|
346
|
+
}
|
|
347
|
+
// This runtime owns wrapping the channel input into its delivery shape: a
|
|
348
|
+
// structured channel turn becomes the native `<channel source="…">` block
|
|
349
|
+
// (same envelope claude renders); a plain turn (e.g. the completion trigger)
|
|
350
|
+
// passes through unchanged.
|
|
351
|
+
return this.turnManager.enqueue({ ...input, text: renderChannelInput(input) }, hooks);
|
|
352
|
+
}
|
|
353
|
+
/** Inject a system-originated notice (e.g. a restart notice). */
|
|
354
|
+
async systemInput(notice) {
|
|
355
|
+
return this.submitRestartNotice(notice.text);
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Codex TeamMate completion delivery — the native inbox-then-trigger idiom.
|
|
359
|
+
*
|
|
360
|
+
* Two steps, in order:
|
|
361
|
+
* 1. `thread/inject_items` appends the completion to the dispatcher thread's
|
|
362
|
+
* model-visible history as a developer-role message (no fake user turn).
|
|
363
|
+
* codex folds the item onto the active turn when one is running and never
|
|
364
|
+
* rejects on a busy thread, so a failure here is a genuine RPC error.
|
|
365
|
+
* 2. a minimal trigger turn through the public `channelInput` seam wakes the
|
|
366
|
+
* idle dispatcher so it reads the just-injected notification and acts.
|
|
367
|
+
*
|
|
368
|
+
* The trigger turn uses a fresh, non-routable source id per attempt. The turn
|
|
369
|
+
* manager commits its dedup id before `turn/start` and does not roll it back
|
|
370
|
+
* on failure, so a retry that reused one id would come back `duplicate` and be
|
|
371
|
+
* mis-counted as delivered when nothing was submitted. The Dispatcher Service
|
|
372
|
+
* only retries on `failed` (definitely not submitted), so a unique id per
|
|
373
|
+
* attempt re-submits the trigger safely.
|
|
374
|
+
*/
|
|
375
|
+
async completionInput(completion) {
|
|
376
|
+
if (this.acceptedCompletionIds.has(completion.id)) {
|
|
377
|
+
return { status: 'accepted' };
|
|
378
|
+
}
|
|
379
|
+
const inFlight = this.inFlightCompletionDeliveries.get(completion.id);
|
|
380
|
+
if (inFlight !== undefined)
|
|
381
|
+
return inFlight;
|
|
382
|
+
const delivery = this.deliverCompletionInput(completion);
|
|
383
|
+
this.inFlightCompletionDeliveries.set(completion.id, delivery);
|
|
384
|
+
try {
|
|
385
|
+
const outcome = await delivery;
|
|
386
|
+
if (outcome.status === 'accepted') {
|
|
387
|
+
this.rememberAcceptedCompletion(completion.id);
|
|
388
|
+
}
|
|
389
|
+
return outcome;
|
|
390
|
+
}
|
|
391
|
+
finally {
|
|
392
|
+
this.inFlightCompletionDeliveries.delete(completion.id);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
async deliverCompletionInput(completion) {
|
|
396
|
+
if (this.client === null || this.turnManager === null || this.stopping) {
|
|
397
|
+
return { status: 'unsupported', reason: 'dispatcher runtime stopped' };
|
|
398
|
+
}
|
|
399
|
+
const threadId = this.threadId;
|
|
400
|
+
if (threadId === null) {
|
|
401
|
+
return {
|
|
402
|
+
status: 'failed',
|
|
403
|
+
error: new Error('teammate completion delivery has no thread id'),
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
// Inject the completion item at most once per completion id. On a retry
|
|
407
|
+
// (trigger turn failed last time) the item is already in the thread, so we
|
|
408
|
+
// skip straight to re-triggering instead of persisting a duplicate.
|
|
409
|
+
if (!this.injectedCompletionIds.has(completion.id)) {
|
|
410
|
+
try {
|
|
411
|
+
await injectThreadItems(this.client, threadId, [
|
|
412
|
+
await buildCodexCompletionItem(completion, this.paths.completionSpillDir(this.dispatcherId)),
|
|
413
|
+
]);
|
|
414
|
+
}
|
|
415
|
+
catch (err) {
|
|
416
|
+
const cause = err instanceof Error ? err.message : String(err);
|
|
417
|
+
// `thread/inject_items` exists only on codex 0.137+. On an older codex
|
|
418
|
+
// it RPC-fails here, so surface the version requirement loudly rather
|
|
419
|
+
// than letting the dispatcher silently never see the completion.
|
|
420
|
+
return {
|
|
421
|
+
status: 'failed',
|
|
422
|
+
error: new Error(`teammate completion thread/inject_items failed (requires codex 0.137+): ${cause}`),
|
|
423
|
+
};
|
|
424
|
+
}
|
|
425
|
+
this.rememberInjectedCompletion(completion.id);
|
|
426
|
+
}
|
|
427
|
+
const deliverySeq = ++this.teammateDeliverySeq;
|
|
428
|
+
const delivery = await this.channelInput({
|
|
429
|
+
sourceId: `teammate:${completion.id}#${deliverySeq}`,
|
|
430
|
+
text: CODEX_COMPLETION_TRIGGER_TEXT,
|
|
431
|
+
});
|
|
432
|
+
switch (delivery.status) {
|
|
433
|
+
case 'submitted':
|
|
434
|
+
return { status: 'accepted' };
|
|
435
|
+
case 'stopped':
|
|
436
|
+
return { status: 'unsupported', reason: 'dispatcher runtime stopped' };
|
|
437
|
+
case 'failed':
|
|
438
|
+
return { status: 'failed', error: delivery.error };
|
|
439
|
+
case 'duplicate':
|
|
440
|
+
// Unreachable with the per-attempt id above; if it ever happens, the
|
|
441
|
+
// turn was NOT freshly submitted, so do not report it as delivered.
|
|
442
|
+
return {
|
|
443
|
+
status: 'failed',
|
|
444
|
+
error: new Error('teammate completion trigger unexpectedly deduplicated'),
|
|
445
|
+
};
|
|
446
|
+
case 'skipped':
|
|
447
|
+
return {
|
|
448
|
+
status: 'failed',
|
|
449
|
+
error: new Error('teammate completion trigger unexpectedly skipped'),
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
/** Graceful stop: stop accepting work, reap codex child. */
|
|
454
|
+
async stop() {
|
|
455
|
+
this.stopping = true;
|
|
456
|
+
this.clearRestartTimer();
|
|
457
|
+
this.setStatus('stopping');
|
|
458
|
+
await this.state.setStatus(this.dispatcherId, 'stopping');
|
|
459
|
+
await this.teardownCodexRuntime();
|
|
460
|
+
this.setStatus('stopped');
|
|
461
|
+
await this.state.setStatus(this.dispatcherId, 'stopped');
|
|
462
|
+
}
|
|
463
|
+
async cleanupOnFailure() {
|
|
464
|
+
this.clearRestartTimer();
|
|
465
|
+
const wasStopping = this.stopping;
|
|
466
|
+
this.stopping = true;
|
|
467
|
+
try {
|
|
468
|
+
await this.teardownCodexRuntime();
|
|
469
|
+
}
|
|
470
|
+
finally {
|
|
471
|
+
this.stopping = wasStopping;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
async teardownCodexRuntime() {
|
|
475
|
+
const turnManager = this.turnManager;
|
|
476
|
+
this.turnManager = null;
|
|
477
|
+
if (turnManager !== null)
|
|
478
|
+
await turnManager.stop();
|
|
479
|
+
const client = this.client;
|
|
480
|
+
this.client = null;
|
|
481
|
+
if (client !== null) {
|
|
482
|
+
try {
|
|
483
|
+
client.close();
|
|
484
|
+
}
|
|
485
|
+
catch {
|
|
486
|
+
/* */
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
const process = this.process;
|
|
490
|
+
this.process = null;
|
|
491
|
+
if (process !== null) {
|
|
492
|
+
await process.reap();
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
handleChildExit(exit) {
|
|
496
|
+
const details = exit.signal !== null ? `signal=${exit.signal}` : `code=${exit.code ?? 'null'}`;
|
|
497
|
+
this.scheduleRestart(`codex app-server child exited (${details})`);
|
|
498
|
+
}
|
|
499
|
+
handleClientClose(reason) {
|
|
500
|
+
this.scheduleRestart(`codex app-server websocket closed: ${reason.message}`);
|
|
501
|
+
}
|
|
502
|
+
scheduleRestart(reason) {
|
|
503
|
+
if (this.stopping || this.restartTimer !== null || this.restarting)
|
|
504
|
+
return;
|
|
505
|
+
const attempt = this.restartAttempts + 1;
|
|
506
|
+
this.restartAttempts = attempt;
|
|
507
|
+
const delay = this.restartDelayMs(attempt);
|
|
508
|
+
this.log('warn', `${reason}; restarting in ${delay}ms`);
|
|
509
|
+
this.setStatus('degraded');
|
|
510
|
+
// scheduleRestart runs from synchronous event handlers (ws close, child
|
|
511
|
+
// exit); the durable status write is best-effort here — persist it without
|
|
512
|
+
// blocking, logging (never throwing) on failure. The restart timer's later
|
|
513
|
+
// 'starting'/'ready' writes are awaited, so they cannot be reordered behind
|
|
514
|
+
// this one within the backoff delay.
|
|
515
|
+
void this.state
|
|
516
|
+
.setStatus(this.dispatcherId, 'degraded', { last_error: reason })
|
|
517
|
+
.catch((err) => this.log('warn', 'failed to persist degraded status', err));
|
|
518
|
+
this.restartTimer = setTimeout(() => {
|
|
519
|
+
this.restartTimer = null;
|
|
520
|
+
void this.restartCodexRuntime(reason);
|
|
521
|
+
}, delay);
|
|
522
|
+
}
|
|
523
|
+
async restartCodexRuntime(reason) {
|
|
524
|
+
if (this.stopping)
|
|
525
|
+
return;
|
|
526
|
+
this.restarting = true;
|
|
527
|
+
let retryReason = null;
|
|
528
|
+
this.setStatus('starting');
|
|
529
|
+
await this.state.setStatus(this.dispatcherId, 'starting', {
|
|
530
|
+
last_started_at: Date.now(),
|
|
531
|
+
});
|
|
532
|
+
try {
|
|
533
|
+
await this.teardownCodexRuntime();
|
|
534
|
+
if (this.stopping)
|
|
535
|
+
return;
|
|
536
|
+
await this.startCodexRuntime();
|
|
537
|
+
if (this.stopping) {
|
|
538
|
+
await this.teardownCodexRuntime();
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
this.restartAttempts = 0;
|
|
542
|
+
await this.markReady();
|
|
543
|
+
this.log('info', `restarted codex app-server after: ${reason}`);
|
|
544
|
+
}
|
|
545
|
+
catch (err) {
|
|
546
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
547
|
+
this.log('error', `restart failed: ${msg}`, err);
|
|
548
|
+
this.setStatus('degraded');
|
|
549
|
+
await this.state.setStatus(this.dispatcherId, 'degraded', {
|
|
550
|
+
last_error: msg,
|
|
551
|
+
});
|
|
552
|
+
await this.teardownCodexRuntime();
|
|
553
|
+
retryReason = `codex app-server restart failed: ${msg}`;
|
|
554
|
+
}
|
|
555
|
+
finally {
|
|
556
|
+
this.restarting = false;
|
|
557
|
+
}
|
|
558
|
+
if (retryReason !== null)
|
|
559
|
+
this.scheduleRestart(retryReason);
|
|
560
|
+
}
|
|
561
|
+
restartDelayMs(attempt) {
|
|
562
|
+
const base = Math.max(0, this.deps.restartBackoffBaseMs ?? DEFAULT_RESTART_BACKOFF_BASE_MS);
|
|
563
|
+
const max = Math.max(base, this.deps.restartBackoffMaxMs ?? DEFAULT_RESTART_BACKOFF_MAX_MS);
|
|
564
|
+
return Math.min(max, base * 2 ** Math.max(0, attempt - 1));
|
|
565
|
+
}
|
|
566
|
+
clearRestartTimer() {
|
|
567
|
+
if (this.restartTimer === null)
|
|
568
|
+
return;
|
|
569
|
+
clearTimeout(this.restartTimer);
|
|
570
|
+
this.restartTimer = null;
|
|
571
|
+
}
|
|
572
|
+
async markReady() {
|
|
573
|
+
this.setStatus('ready');
|
|
574
|
+
await this.state.setStatus(this.dispatcherId, 'ready', {
|
|
575
|
+
last_ready_at: Date.now(),
|
|
576
|
+
last_error: null,
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
recordCollectedTurn(turn) {
|
|
580
|
+
const messages = turn.items.filter((item) => item.type === 'agentMessage');
|
|
581
|
+
const last = messages[messages.length - 1];
|
|
582
|
+
if (typeof last?.text === 'string' && last.text.length > 0) {
|
|
583
|
+
this.lastResult = { text: last.text };
|
|
584
|
+
}
|
|
585
|
+
// A turn reaching `turn/completed` is the `completed` terminal state. The
|
|
586
|
+
// `stopped` settlement for interrupted turns is emitted by the turn manager
|
|
587
|
+
// on `stop()`.
|
|
588
|
+
this.deps.onTurnSettled?.({ turnId: turn.turnId, status: 'completed' });
|
|
589
|
+
}
|
|
590
|
+
/** Record a completion id as injected, evicting the oldest past a small cap. */
|
|
591
|
+
rememberInjectedCompletion(id) {
|
|
592
|
+
if (this.injectedCompletionIds.has(id))
|
|
593
|
+
return;
|
|
594
|
+
this.injectedCompletionIds.add(id);
|
|
595
|
+
this.injectedCompletionOrder.push(id);
|
|
596
|
+
while (this.injectedCompletionOrder.length > COMPLETION_ID_CACHE_LIMIT) {
|
|
597
|
+
const evicted = this.injectedCompletionOrder.shift();
|
|
598
|
+
if (evicted !== undefined)
|
|
599
|
+
this.injectedCompletionIds.delete(evicted);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
/** Record a completion id as fully accepted, evicting the oldest past a cap. */
|
|
603
|
+
rememberAcceptedCompletion(id) {
|
|
604
|
+
if (this.acceptedCompletionIds.has(id))
|
|
605
|
+
return;
|
|
606
|
+
this.acceptedCompletionIds.add(id);
|
|
607
|
+
this.acceptedCompletionOrder.push(id);
|
|
608
|
+
while (this.acceptedCompletionOrder.length > COMPLETION_ID_CACHE_LIMIT) {
|
|
609
|
+
const evicted = this.acceptedCompletionOrder.shift();
|
|
610
|
+
if (evicted !== undefined)
|
|
611
|
+
this.acceptedCompletionIds.delete(evicted);
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
setStatus(s) {
|
|
615
|
+
this.status = s;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
/**
|
|
619
|
+
* Classify an RPC rejection as a capability/version gap — the app-server does
|
|
620
|
+
* not implement the requested method at all — rather than a genuine failure of
|
|
621
|
+
* an existing method.
|
|
622
|
+
*
|
|
623
|
+
* The rpc layer collapses codex's structured error to `Error(message)` (it
|
|
624
|
+
* drops the JSON-RPC error code), so the *message* is all we have. codex
|
|
625
|
+
* surfaces an unimplemented method as a serde enum-deserialization failure of
|
|
626
|
+
* the request's `method` field — `unknown variant \`<method>\`, expected one of
|
|
627
|
+
* …` — while a spec-compliant JSON-RPC peer answers method-not-found (-32601).
|
|
628
|
+
* We match those canonical phrasings only; the test stays deliberately narrow
|
|
629
|
+
* so a real error from an *existing* method (a bad root path, a permission
|
|
630
|
+
* failure) is NOT swallowed and still fails loud.
|
|
631
|
+
*
|
|
632
|
+
* The match is message-based by necessity: the rpc layer drops the structured
|
|
633
|
+
* JSON-RPC error code, so the message is all we have. The one residual
|
|
634
|
+
* false-positive is a server that *implements* the method but rejects a bad
|
|
635
|
+
* *param value* with an "unknown variant `<value>`" serde error. That is safe
|
|
636
|
+
* for our sole caller — `skills/extraRoots/set` takes a `string[]` of paths,
|
|
637
|
+
* which codex never enum-rejects — but a future caller passing an enum-typed
|
|
638
|
+
* param should not reuse this classifier blindly.
|
|
639
|
+
*/
|
|
640
|
+
export function isUnsupportedRpcMethodError(err) {
|
|
641
|
+
const message = (err instanceof Error ? err.message : String(err)).toLowerCase();
|
|
642
|
+
return (message.includes('unknown variant') ||
|
|
643
|
+
message.includes('method not found') ||
|
|
644
|
+
message.includes('unknown method') ||
|
|
645
|
+
message.includes('no such method') ||
|
|
646
|
+
message.includes('unsupported method'));
|
|
647
|
+
}
|
|
648
|
+
//# sourceMappingURL=runtime.js.map
|