mcp-codex-worker 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/app.js +35 -9
- package/dist/src/app.js.map +1 -1
- package/dist/src/execution/base-adapter.d.ts +1 -0
- package/dist/src/execution/base-adapter.js.map +1 -1
- package/dist/src/execution/codex-adapter.js +29 -4
- package/dist/src/execution/codex-adapter.js.map +1 -1
- package/dist/src/mcp/resource-renderers.js +4 -1
- package/dist/src/mcp/resource-renderers.js.map +1 -1
- package/dist/src/mcp/tool-definitions.d.ts +6 -6
- package/dist/src/mcp/tool-definitions.js +83 -54
- package/dist/src/mcp/tool-definitions.js.map +1 -1
- package/dist/src/services/codex-runtime.d.ts +5 -1
- package/dist/src/services/codex-runtime.js +5 -4
- package/dist/src/services/codex-runtime.js.map +1 -1
- package/dist/src/services/reasoning-options.d.ts +20 -0
- package/dist/src/services/reasoning-options.js +42 -0
- package/dist/src/services/reasoning-options.js.map +1 -0
- package/dist/src/task/task-manager.d.ts +1 -0
- package/dist/src/task/task-manager.js +2 -0
- package/dist/src/task/task-manager.js.map +1 -1
- package/dist/src/task/task-state.d.ts +1 -0
- package/package.json +1 -1
- package/src/app.ts +37 -7
- package/src/execution/base-adapter.ts +1 -0
- package/src/execution/codex-adapter.ts +31 -4
- package/src/mcp/resource-renderers.ts +4 -1
- package/src/mcp/tool-definitions.ts +86 -54
- package/src/services/codex-runtime.ts +13 -4
- package/src/services/reasoning-options.ts +57 -0
- package/src/task/task-manager.ts +2 -0
- package/src/task/task-state.ts +1 -0
package/package.json
CHANGED
package/src/app.ts
CHANGED
|
@@ -23,6 +23,7 @@ import {
|
|
|
23
23
|
renderVerboseLog,
|
|
24
24
|
} from './mcp/resource-renderers.js';
|
|
25
25
|
import { validateResponseAgainstCapabilities } from './execution/provider-capabilities.js';
|
|
26
|
+
import { parseReasoning, type ParsedReasoning } from './services/reasoning-options.js';
|
|
26
27
|
// Persistence utilities — re-exported for external use; not called directly in this module.
|
|
27
28
|
import { saveState, loadState, persistenceDir, applyRecovery } from './task/task-persistence.js';
|
|
28
29
|
|
|
@@ -195,6 +196,14 @@ export class CodexWorkerApp {
|
|
|
195
196
|
const provider = input.provider ?? 'codex';
|
|
196
197
|
const taskType = input.task_type ?? 'coder';
|
|
197
198
|
|
|
199
|
+
// Split `gpt-5.4(effort)` into model id + reasoning effort level. The
|
|
200
|
+
// two travel as separate fields through the adapter chain so Codex can
|
|
201
|
+
// receive them as `model` + `reasoningEffort`/`effort`.
|
|
202
|
+
let parsedReasoning: ParsedReasoning | undefined;
|
|
203
|
+
if (input.reasoning !== undefined) {
|
|
204
|
+
parsedReasoning = parseReasoning(input.reasoning);
|
|
205
|
+
}
|
|
206
|
+
|
|
198
207
|
// 1. Create the task
|
|
199
208
|
const createInput: Parameters<TaskManager['createTask']>[0] = {
|
|
200
209
|
prompt: input.prompt,
|
|
@@ -202,7 +211,10 @@ export class CodexWorkerApp {
|
|
|
202
211
|
provider,
|
|
203
212
|
taskType,
|
|
204
213
|
};
|
|
205
|
-
if (
|
|
214
|
+
if (parsedReasoning !== undefined) {
|
|
215
|
+
createInput.model = parsedReasoning.model;
|
|
216
|
+
createInput.effort = parsedReasoning.effort;
|
|
217
|
+
}
|
|
206
218
|
if (input.timeout_ms !== undefined) createInput.timeoutMs = input.timeout_ms;
|
|
207
219
|
if (input.labels !== undefined) createInput.labels = input.labels;
|
|
208
220
|
if (input.depends_on !== undefined) createInput.dependsOn = input.depends_on;
|
|
@@ -227,7 +239,10 @@ export class CodexWorkerApp {
|
|
|
227
239
|
cwd: input.cwd ?? process.cwd(),
|
|
228
240
|
timeout: input.timeout_ms ?? 0,
|
|
229
241
|
};
|
|
230
|
-
if (
|
|
242
|
+
if (parsedReasoning !== undefined) {
|
|
243
|
+
spawnOptions.model = parsedReasoning.model;
|
|
244
|
+
spawnOptions.effort = parsedReasoning.effort;
|
|
245
|
+
}
|
|
231
246
|
|
|
232
247
|
// 5. Dispatch asynchronously (don't block MCP response)
|
|
233
248
|
setImmediate(() => {
|
|
@@ -352,8 +367,12 @@ export class CodexWorkerApp {
|
|
|
352
367
|
}
|
|
353
368
|
}
|
|
354
369
|
|
|
355
|
-
//
|
|
356
|
-
|
|
370
|
+
// Peek at the head of the pending question queue — do NOT dequeue yet.
|
|
371
|
+
// We only remove it after the response is successfully forwarded to
|
|
372
|
+
// Codex, so a transient client crash leaves the question in place for
|
|
373
|
+
// the orchestrator to retry.
|
|
374
|
+
const questions = handle.getPendingQuestions();
|
|
375
|
+
const question = questions[0];
|
|
357
376
|
if (!question) {
|
|
358
377
|
throw new Error(`No pending question for task ${taskId}`);
|
|
359
378
|
}
|
|
@@ -387,8 +406,11 @@ export class CodexWorkerApp {
|
|
|
387
406
|
break;
|
|
388
407
|
}
|
|
389
408
|
|
|
390
|
-
// Forward the response to
|
|
409
|
+
// Forward the response to Codex. Only dequeue the question AFTER this
|
|
410
|
+
// succeeds — if it throws (client crash, process exit), the question
|
|
411
|
+
// stays in the queue so the orchestrator can retry.
|
|
391
412
|
await this.runtime.respondToServerRequest(question.requestId, payload);
|
|
413
|
+
handle.dequeuePendingQuestion();
|
|
392
414
|
|
|
393
415
|
// If queue is now empty and task was WAITING_ANSWER, resume tracking
|
|
394
416
|
// The adapter's executeSession will continue automatically
|
|
@@ -417,13 +439,21 @@ export class CodexWorkerApp {
|
|
|
417
439
|
throw new Error(`Task ${taskId} is in terminal status: ${task.status}`);
|
|
418
440
|
}
|
|
419
441
|
|
|
442
|
+
// Parse `gpt-5.4(effort)` → { model, effort } so the runtime can forward
|
|
443
|
+
// them as separate fields. Fall back to the task's stored values when
|
|
444
|
+
// the caller did not override reasoning for this follow-up turn.
|
|
445
|
+
const parsed = input.reasoning ? parseReasoning(input.reasoning) : undefined;
|
|
446
|
+
const turnModel = parsed?.model ?? task.model;
|
|
447
|
+
const turnEffort = parsed?.effort ?? task.effort;
|
|
448
|
+
|
|
420
449
|
// Start a new turn on the existing thread
|
|
421
450
|
const built = await this.runtime.buildTurnStartParams({
|
|
422
451
|
threadId: task.sessionId,
|
|
423
452
|
userInput: input.message,
|
|
424
|
-
model:
|
|
453
|
+
model: turnModel,
|
|
454
|
+
effort: turnEffort,
|
|
425
455
|
});
|
|
426
|
-
await this.runtime.ensureThreadLoaded(task.sessionId,
|
|
456
|
+
await this.runtime.ensureThreadLoaded(task.sessionId, turnModel, turnEffort);
|
|
427
457
|
const bridged = await this.runtime.requestWithBridge('turn/start', built.params, {
|
|
428
458
|
threadId: task.sessionId,
|
|
429
459
|
});
|
|
@@ -67,11 +67,21 @@ export class CodexAdapter extends BaseProviderAdapter {
|
|
|
67
67
|
): Promise<void> {
|
|
68
68
|
const runtime = this.getRuntime();
|
|
69
69
|
let detachPauseFlow: (() => void) | undefined;
|
|
70
|
+
let removeExitListener: (() => void) | undefined;
|
|
71
|
+
|
|
72
|
+
// Clean up listeners only once, regardless of which path triggers it.
|
|
73
|
+
const cleanup = () => {
|
|
74
|
+
detachPauseFlow?.();
|
|
75
|
+
detachPauseFlow = undefined;
|
|
76
|
+
removeExitListener?.();
|
|
77
|
+
removeExitListener = undefined;
|
|
78
|
+
};
|
|
70
79
|
|
|
71
80
|
try {
|
|
72
81
|
// 1. Create a new thread
|
|
73
82
|
const { params: threadParams } = await runtime.buildThreadStartParams({
|
|
74
83
|
model: options.model,
|
|
84
|
+
effort: options.effort,
|
|
75
85
|
cwd: options.cwd,
|
|
76
86
|
});
|
|
77
87
|
const threadResult = await runtime.request('thread/start', threadParams) as {
|
|
@@ -92,11 +102,24 @@ export class CodexAdapter extends BaseProviderAdapter {
|
|
|
92
102
|
.getCurrentClient();
|
|
93
103
|
detachPauseFlow = attachPauseFlow(client, handle, threadId);
|
|
94
104
|
|
|
105
|
+
// 3b. Listen for app-server crashes. If the process exits while the
|
|
106
|
+
// task is paused (WAITING_ANSWER), mark it failed so the
|
|
107
|
+
// orchestrator doesn't wait on a ghost.
|
|
108
|
+
const onExit = () => {
|
|
109
|
+
if (handle.isAlive()) {
|
|
110
|
+
handle.markFailed('Codex app-server process exited unexpectedly');
|
|
111
|
+
}
|
|
112
|
+
cleanup();
|
|
113
|
+
};
|
|
114
|
+
client.on('exit', onExit);
|
|
115
|
+
removeExitListener = () => client.off('exit', onExit);
|
|
116
|
+
|
|
95
117
|
// 4. Build turn params and start the turn via bridged request
|
|
96
118
|
const { params: turnParams } = await runtime.buildTurnStartParams({
|
|
97
119
|
threadId,
|
|
98
120
|
userInput: prompt,
|
|
99
121
|
model: options.model,
|
|
122
|
+
effort: options.effort,
|
|
100
123
|
});
|
|
101
124
|
const bridgeResult = await runtime.requestWithBridge(
|
|
102
125
|
'turn/start',
|
|
@@ -107,13 +130,15 @@ export class CodexAdapter extends BaseProviderAdapter {
|
|
|
107
130
|
// 5. Handle bridge result
|
|
108
131
|
if (bridgeResult.status === 'pending_request') {
|
|
109
132
|
// Pause flow already queued the question on the handle and
|
|
110
|
-
// markInputRequired was called.
|
|
111
|
-
//
|
|
133
|
+
// markInputRequired was called. Keep listeners attached —
|
|
134
|
+
// they will be cleaned up when the task reaches a terminal
|
|
135
|
+
// state via message-task, or when the client exits.
|
|
112
136
|
return;
|
|
113
137
|
}
|
|
114
138
|
|
|
115
139
|
if (bridgeResult.status === 'completed') {
|
|
116
140
|
handle.markCompleted();
|
|
141
|
+
cleanup();
|
|
117
142
|
return;
|
|
118
143
|
}
|
|
119
144
|
|
|
@@ -131,8 +156,10 @@ export class CodexAdapter extends BaseProviderAdapter {
|
|
|
131
156
|
} else {
|
|
132
157
|
handle.markFailed(op.error ?? 'Turn failed');
|
|
133
158
|
}
|
|
134
|
-
|
|
135
|
-
|
|
159
|
+
cleanup();
|
|
160
|
+
} catch (err) {
|
|
161
|
+
cleanup();
|
|
162
|
+
throw err;
|
|
136
163
|
}
|
|
137
164
|
}
|
|
138
165
|
|
|
@@ -127,7 +127,10 @@ export function renderTaskDetail(task: TaskState): string {
|
|
|
127
127
|
}
|
|
128
128
|
|
|
129
129
|
if (task.model) {
|
|
130
|
-
|
|
130
|
+
const reasoningCell = task.effort
|
|
131
|
+
? `\`${task.model}(${task.effort})\``
|
|
132
|
+
: `\`${task.model}\``;
|
|
133
|
+
lines.push(`| **Reasoning** | ${reasoningCell} |`);
|
|
131
134
|
}
|
|
132
135
|
|
|
133
136
|
lines.push(`| **Task type** | ${task.taskType} |`);
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
|
|
3
|
+
import { REASONING_OPTIONS } from '../services/reasoning-options.js';
|
|
4
|
+
|
|
3
5
|
// ---------------------------------------------------------------------------
|
|
4
6
|
// Unified task tool schemas (provider-agnostic)
|
|
5
7
|
// ---------------------------------------------------------------------------
|
|
6
8
|
|
|
9
|
+
const reasoningEnum = z.enum(REASONING_OPTIONS);
|
|
10
|
+
|
|
7
11
|
const spawnTaskSchema = z.object({
|
|
8
12
|
prompt: z.string().min(1),
|
|
9
13
|
task_type: z.enum(['coder', 'planner', 'tester', 'researcher', 'general']).default('coder'),
|
|
10
14
|
provider: z.enum(['codex', 'copilot', 'claude-cli']).optional(),
|
|
11
|
-
|
|
15
|
+
reasoning: reasoningEnum.optional(),
|
|
12
16
|
cwd: z.string().optional(),
|
|
13
17
|
timeout_ms: z.number().int().min(1000).max(3_600_000).optional(),
|
|
14
18
|
keep_alive: z.number().optional(),
|
|
@@ -60,7 +64,7 @@ const respondTaskSchema = z.discriminatedUnion('type', [
|
|
|
60
64
|
const messageTaskSchema = z.object({
|
|
61
65
|
task_id: z.string().min(1),
|
|
62
66
|
message: z.string().min(1),
|
|
63
|
-
|
|
67
|
+
reasoning: reasoningEnum.optional(),
|
|
64
68
|
});
|
|
65
69
|
|
|
66
70
|
const cancelTaskSchema = z.object({
|
|
@@ -89,69 +93,94 @@ function objectSchema(
|
|
|
89
93
|
};
|
|
90
94
|
}
|
|
91
95
|
|
|
96
|
+
const REASONING_DESCRIPTION = [
|
|
97
|
+
'Model + reasoning effort for this task. The value is a single literal — the server splits it into a model id and a reasoning-effort level and passes them to Codex separately.',
|
|
98
|
+
'',
|
|
99
|
+
'Picking the level:',
|
|
100
|
+
'- `gpt-5.4(medium)` — default workhorse. Use for most coding, refactors, and focused debugging.',
|
|
101
|
+
'- `gpt-5.4(high)` — harder tasks: multi-file reasoning, subtle bugs, non-trivial design decisions.',
|
|
102
|
+
'- `gpt-5.4(xhigh)` — reserved for exceptional deep research, novel architecture work, or problems where you have already tried `high` and it was not enough.',
|
|
103
|
+
'- `gpt-5.4(low)` — rare; only for trivial mechanical edits where latency matters more than quality.',
|
|
104
|
+
'',
|
|
105
|
+
'Omit to use the server default from config.',
|
|
106
|
+
].join('\n');
|
|
107
|
+
|
|
92
108
|
export function createToolDefinitions(): ToolDefinition[] {
|
|
93
109
|
return [
|
|
94
110
|
{
|
|
95
111
|
name: 'spawn-task',
|
|
96
112
|
description: [
|
|
97
|
-
'Create and start a provider-agnostic task.',
|
|
113
|
+
'Create and start a provider-agnostic task, returning a task_id you can track.',
|
|
114
|
+
'',
|
|
115
|
+
'Dispatches the prompt to the provider registered for the given `task_type` (Codex, Copilot, Claude CLI) and returns immediately with a task_id. Use `wait-task` to block until the task reaches a terminal state or needs input, `respond-task` to unblock it, and `message-task` to send follow-ups on the same session.',
|
|
98
116
|
'',
|
|
99
|
-
'
|
|
100
|
-
'Returns immediately with a task_id for tracking. Use wait-task to block until completion or input_required.',
|
|
117
|
+
'PARALLEL EXECUTION: Spawn multiple tasks in the same message to fan out work — each task runs in its own isolated agent workspace and reports back independently. Prefer parallel spawns over sequential ones whenever the subtasks do not depend on each other.',
|
|
101
118
|
'',
|
|
102
|
-
'
|
|
103
|
-
'
|
|
119
|
+
'AFTER SPAWNING: Always follow up with `wait-task`. The agent may pause almost immediately to request a command/file approval or structured user input; the bridge window surfaces that pending question so you can answer it without polling forever.',
|
|
120
|
+
'',
|
|
121
|
+
'WRITING A GOOD PROMPT: Name the exact files, functions, or symbols involved, state the expected behavior or acceptance criteria, and mention anything the agent must NOT touch. Vague prompts produce vague work.',
|
|
104
122
|
].join('\n'),
|
|
105
123
|
inputSchema: objectSchema({
|
|
106
|
-
prompt: {
|
|
124
|
+
prompt: {
|
|
125
|
+
type: 'string',
|
|
126
|
+
minLength: 1,
|
|
127
|
+
description: 'What the task should do. Be specific: include file paths, function or symbol names, the expected outcome, and any constraints. The agent only sees this prompt — treat it as the full brief.',
|
|
128
|
+
},
|
|
107
129
|
task_type: {
|
|
108
130
|
type: 'string',
|
|
109
131
|
enum: ['coder', 'planner', 'tester', 'researcher', 'general'],
|
|
110
132
|
default: 'coder',
|
|
111
|
-
description: '
|
|
133
|
+
description: 'Routing hint that picks a provider and default prompt shape. `coder` for writing/editing code, `planner` for decomposing work, `tester` for writing or running tests, `researcher` for investigation, `general` for anything else. Defaults to `coder`.',
|
|
112
134
|
},
|
|
113
135
|
provider: {
|
|
114
136
|
type: 'string',
|
|
115
137
|
enum: ['codex', 'copilot', 'claude-cli'],
|
|
116
|
-
description: '
|
|
138
|
+
description: 'Force a specific backend instead of the one registered for the `task_type`. Leave unset in almost all cases — only override when you need a particular provider for capability reasons.',
|
|
139
|
+
},
|
|
140
|
+
reasoning: {
|
|
141
|
+
type: 'string',
|
|
142
|
+
enum: [...REASONING_OPTIONS],
|
|
143
|
+
description: REASONING_DESCRIPTION,
|
|
144
|
+
},
|
|
145
|
+
cwd: {
|
|
146
|
+
type: 'string',
|
|
147
|
+
description: 'Absolute working directory the task runs in. Defaults to the server process cwd; set this when the work is scoped to a specific repo or subfolder.',
|
|
117
148
|
},
|
|
118
|
-
model: { type: 'string', description: 'Model to use. Omit to use the provider default.' },
|
|
119
|
-
cwd: { type: 'string', description: 'Working directory for the task. Defaults to server process cwd.' },
|
|
120
149
|
timeout_ms: {
|
|
121
150
|
type: 'integer',
|
|
122
151
|
minimum: 1000,
|
|
123
152
|
maximum: 3600000,
|
|
124
|
-
description: '
|
|
153
|
+
description: 'Hard time limit for the task in milliseconds. The task is marked `timed_out` if it exceeds this. Defaults to the provider default when omitted.',
|
|
125
154
|
},
|
|
126
155
|
keep_alive: {
|
|
127
156
|
type: 'number',
|
|
128
|
-
description: 'SEP-1686
|
|
157
|
+
description: 'SEP-1686 retention window (ms). How long the server keeps the completed task result available for follow-up queries after the task finishes.',
|
|
129
158
|
},
|
|
130
159
|
labels: {
|
|
131
160
|
type: 'array',
|
|
132
161
|
items: { type: 'string' },
|
|
133
|
-
description: '
|
|
162
|
+
description: 'Free-form tags for filtering and grouping in the task scoreboard. Purely organizational — they do not affect execution.',
|
|
134
163
|
},
|
|
135
164
|
depends_on: {
|
|
136
165
|
type: 'array',
|
|
137
166
|
items: { type: 'string' },
|
|
138
|
-
description: 'Task IDs that must
|
|
167
|
+
description: 'Task IDs that must reach a terminal state before this task is allowed to start. Use this to chain dependent work.',
|
|
139
168
|
},
|
|
140
169
|
developer_instructions: {
|
|
141
170
|
type: 'string',
|
|
142
|
-
description: 'System-level instructions injected
|
|
171
|
+
description: 'System-level instructions injected ahead of the user prompt. Use this for hard constraints (coding style, allowed directories, forbidden actions) rather than folding them into the prompt.',
|
|
143
172
|
},
|
|
144
173
|
context_files: {
|
|
145
174
|
type: 'array',
|
|
146
175
|
items: {
|
|
147
176
|
type: 'object',
|
|
148
177
|
properties: {
|
|
149
|
-
path: { type: 'string', description: '
|
|
150
|
-
description: { type: 'string', description: 'Optional
|
|
178
|
+
path: { type: 'string', description: 'Absolute path of a file to include as context for the task.' },
|
|
179
|
+
description: { type: 'string', description: 'Optional note explaining why this file is relevant so the agent knows what to look at.' },
|
|
151
180
|
},
|
|
152
181
|
required: ['path'],
|
|
153
182
|
},
|
|
154
|
-
description: 'Files to
|
|
183
|
+
description: 'Files to prepend as additional context. Use sparingly — context is not free; prefer pointing at files from the prompt when the agent can open them itself.',
|
|
155
184
|
},
|
|
156
185
|
}, ['prompt']),
|
|
157
186
|
validate: (value) => spawnTaskSchema.parse(value),
|
|
@@ -159,27 +188,27 @@ export function createToolDefinitions(): ToolDefinition[] {
|
|
|
159
188
|
{
|
|
160
189
|
name: 'wait-task',
|
|
161
190
|
description: [
|
|
162
|
-
'Block until a task
|
|
191
|
+
'Block until a task settles or asks for input. This is how you synchronously track progress after `spawn-task` or `message-task`.',
|
|
192
|
+
'',
|
|
193
|
+
'Returns as soon as the task reaches a terminal state (`completed`, `failed`, `cancelled`, `timed_out`) or enters `waiting_answer` because the agent needs an approval or structured input. If `timeout_ms` elapses first, it returns the current status and you decide whether to wait again.',
|
|
163
194
|
'',
|
|
164
|
-
'
|
|
165
|
-
'If timeout_ms elapses, returns the current status — the caller decides whether to wait again.',
|
|
166
|
-
'Use after spawn-task to synchronously track task progress.',
|
|
195
|
+
'PATTERN: loop `wait-task` → if `waiting_answer`, call `respond-task` with the matching question payload → loop back to `wait-task`. Do not busy-poll with short timeouts; give each call enough time to catch meaningful progress.',
|
|
167
196
|
].join('\n'),
|
|
168
197
|
inputSchema: objectSchema({
|
|
169
|
-
task_id: { type: 'string', minLength: 1, description: 'ID of the task to wait on
|
|
198
|
+
task_id: { type: 'string', minLength: 1, description: 'ID of the task to wait on, as returned by `spawn-task`.' },
|
|
170
199
|
timeout_ms: {
|
|
171
200
|
type: 'integer',
|
|
172
201
|
minimum: 1,
|
|
173
202
|
maximum: 300000,
|
|
174
203
|
default: 30000,
|
|
175
|
-
description: '
|
|
204
|
+
description: 'Maximum time to block in milliseconds. Returns early on terminal state or `waiting_answer`. Defaults to 30,000 (30 seconds).',
|
|
176
205
|
},
|
|
177
206
|
poll_interval_ms: {
|
|
178
207
|
type: 'integer',
|
|
179
208
|
minimum: 250,
|
|
180
209
|
maximum: 30000,
|
|
181
210
|
default: 1000,
|
|
182
|
-
description: 'Internal poll interval in
|
|
211
|
+
description: 'Internal poll interval in milliseconds. Keep the default (1,000) unless you have a specific reason to tune it.',
|
|
183
212
|
},
|
|
184
213
|
}, ['task_id']),
|
|
185
214
|
validate: (value) => waitTaskSchema.parse(value),
|
|
@@ -187,49 +216,49 @@ export function createToolDefinitions(): ToolDefinition[] {
|
|
|
187
216
|
{
|
|
188
217
|
name: 'respond-task',
|
|
189
218
|
description: [
|
|
190
|
-
'
|
|
219
|
+
'Unblock a task that is in `waiting_answer` because the agent requested input or an approval.',
|
|
191
220
|
'',
|
|
192
|
-
'The
|
|
193
|
-
'- user_input
|
|
194
|
-
'- command_approval: accept
|
|
195
|
-
'- file_approval: accept
|
|
196
|
-
'- elicitation: accept
|
|
197
|
-
'- dynamic_tool
|
|
221
|
+
'The payload shape is discriminated by `type` and must match the pending question surfaced by `wait-task`:',
|
|
222
|
+
'- `user_input` — reply with `answers` as a map of question id → string.',
|
|
223
|
+
'- `command_approval` — `decision: "accept" | "reject"` for a proposed shell command.',
|
|
224
|
+
'- `file_approval` — `decision: "accept" | "reject"` for a proposed file edit.',
|
|
225
|
+
'- `elicitation` — `action: "accept" | "decline"`, plus optional structured `content` for MCP elicitation prompts.',
|
|
226
|
+
'- `dynamic_tool` — return a tool call result via `result`, or an `error` string on failure.',
|
|
198
227
|
'',
|
|
199
|
-
'After responding
|
|
228
|
+
'After responding the task resumes automatically. Follow up with `wait-task` to track the next step.',
|
|
200
229
|
].join('\n'),
|
|
201
230
|
inputSchema: objectSchema({
|
|
202
|
-
task_id: { type: 'string', minLength: 1, description: 'ID of the paused task.' },
|
|
231
|
+
task_id: { type: 'string', minLength: 1, description: 'ID of the paused task. Must currently be in `waiting_answer`.' },
|
|
203
232
|
type: {
|
|
204
233
|
type: 'string',
|
|
205
234
|
enum: ['user_input', 'command_approval', 'file_approval', 'elicitation', 'dynamic_tool'],
|
|
206
|
-
description: '
|
|
235
|
+
description: 'Which pending-question variant you are answering. Must match the `type` of the question returned by `wait-task` exactly.',
|
|
207
236
|
},
|
|
208
237
|
answers: {
|
|
209
238
|
type: 'object',
|
|
210
|
-
description: 'For user_input
|
|
239
|
+
description: 'For `user_input`: map of each question id (as returned in the pending question) to the user-facing answer string.',
|
|
211
240
|
},
|
|
212
241
|
decision: {
|
|
213
242
|
type: 'string',
|
|
214
243
|
enum: ['accept', 'reject'],
|
|
215
|
-
description: 'For command_approval or file_approval
|
|
244
|
+
description: 'For `command_approval` or `file_approval`: whether the agent may run the proposed command / apply the proposed edit.',
|
|
216
245
|
},
|
|
217
246
|
action: {
|
|
218
247
|
type: 'string',
|
|
219
248
|
enum: ['accept', 'decline'],
|
|
220
|
-
description: 'For elicitation
|
|
249
|
+
description: 'For `elicitation`: accept the MCP server\'s elicitation request or decline it.',
|
|
221
250
|
},
|
|
222
251
|
content: {
|
|
223
252
|
type: 'object',
|
|
224
|
-
description: 'For elicitation
|
|
253
|
+
description: 'For `elicitation`: optional structured payload that satisfies the requested schema when accepting.',
|
|
225
254
|
},
|
|
226
255
|
result: {
|
|
227
256
|
type: 'string',
|
|
228
|
-
description: 'For dynamic_tool
|
|
257
|
+
description: 'For `dynamic_tool`: the tool call result string returned to the agent.',
|
|
229
258
|
},
|
|
230
259
|
error: {
|
|
231
260
|
type: 'string',
|
|
232
|
-
description: 'For dynamic_tool
|
|
261
|
+
description: 'For `dynamic_tool`: error string if the tool call failed. Sets `success=false` on the response.',
|
|
233
262
|
},
|
|
234
263
|
}, ['task_id', 'type']),
|
|
235
264
|
validate: (value) => respondTaskSchema.parse(value),
|
|
@@ -237,15 +266,20 @@ export function createToolDefinitions(): ToolDefinition[] {
|
|
|
237
266
|
{
|
|
238
267
|
name: 'message-task',
|
|
239
268
|
description: [
|
|
240
|
-
'Send a follow-up message to an existing task.',
|
|
269
|
+
'Send a follow-up message to an existing task on its original session.',
|
|
241
270
|
'',
|
|
242
|
-
'
|
|
243
|
-
'
|
|
271
|
+
'Use this to add instructions to a still-running task, ask a completed task to refine or extend its work, or steer the agent after reviewing partial results. If the task is idle, the session is resumed first; if it is actively running, the message is queued as the next turn.',
|
|
272
|
+
'',
|
|
273
|
+
'After calling, follow up with `wait-task` exactly like after `spawn-task`.',
|
|
244
274
|
].join('\n'),
|
|
245
275
|
inputSchema: objectSchema({
|
|
246
|
-
task_id: { type: 'string', minLength: 1, description: 'ID of the task
|
|
247
|
-
message: { type: 'string', minLength: 1, description: '
|
|
248
|
-
|
|
276
|
+
task_id: { type: 'string', minLength: 1, description: 'ID of the task whose session should receive the follow-up.' },
|
|
277
|
+
message: { type: 'string', minLength: 1, description: 'The follow-up instruction or question. Be as specific as the original prompt — reference files and expected behavior.' },
|
|
278
|
+
reasoning: {
|
|
279
|
+
type: 'string',
|
|
280
|
+
enum: [...REASONING_OPTIONS],
|
|
281
|
+
description: `${REASONING_DESCRIPTION}\n\nOverrides the reasoning used for this follow-up turn only.`,
|
|
282
|
+
},
|
|
249
283
|
}, ['task_id', 'message']),
|
|
250
284
|
validate: (value) => messageTaskSchema.parse(value),
|
|
251
285
|
},
|
|
@@ -254,17 +288,15 @@ export function createToolDefinitions(): ToolDefinition[] {
|
|
|
254
288
|
description: [
|
|
255
289
|
'Cancel one or more running tasks.',
|
|
256
290
|
'',
|
|
257
|
-
'Accepts a single task_id
|
|
258
|
-
'For running tasks, signals the provider to abort execution.',
|
|
259
|
-
'Tasks already in a terminal state are counted as already_terminal in the response.',
|
|
291
|
+
'Accepts a single task_id or an array. For each running task, asks the provider to abort execution and marks the task `cancelled`. Tasks already in a terminal state are returned under `already_terminal`; unknown ids are returned under `not_found`. Safe to call on a batch — each id is handled independently.',
|
|
260
292
|
].join('\n'),
|
|
261
293
|
inputSchema: objectSchema({
|
|
262
294
|
task_id: {
|
|
263
295
|
oneOf: [
|
|
264
296
|
{ type: 'string', minLength: 1, description: 'Single task ID to cancel.' },
|
|
265
|
-
{ type: 'array', items: { type: 'string', minLength: 1 }, minItems: 1, description: 'Array of task IDs to cancel.' },
|
|
297
|
+
{ type: 'array', items: { type: 'string', minLength: 1 }, minItems: 1, description: 'Array of task IDs to cancel in one call.' },
|
|
266
298
|
],
|
|
267
|
-
description: 'Task ID or array of task IDs to cancel.',
|
|
299
|
+
description: 'Task ID or array of task IDs to cancel. Accepts both a single string and an array for convenience.',
|
|
268
300
|
},
|
|
269
301
|
}, ['task_id']),
|
|
270
302
|
validate: (value) => cancelTaskSchema.parse(value),
|
|
@@ -22,6 +22,7 @@ import {
|
|
|
22
22
|
ProfileManager,
|
|
23
23
|
type CodexProfile,
|
|
24
24
|
} from './profile-manager.js';
|
|
25
|
+
import type { ReasoningEffortLevel } from './reasoning-options.js';
|
|
25
26
|
|
|
26
27
|
interface RuntimeRequestOptions {
|
|
27
28
|
timeoutMs?: number | undefined;
|
|
@@ -299,6 +300,7 @@ export class CodexRuntime {
|
|
|
299
300
|
|
|
300
301
|
buildThreadStartParams(input: {
|
|
301
302
|
model?: string | undefined;
|
|
303
|
+
effort?: ReasoningEffortLevel | undefined;
|
|
302
304
|
cwd?: string | undefined;
|
|
303
305
|
developerInstructions?: string | undefined;
|
|
304
306
|
}): Promise<{ params: Record<string, unknown>; remappedFrom?: string | undefined; }> {
|
|
@@ -308,7 +310,7 @@ export class CodexRuntime {
|
|
|
308
310
|
cwd: input.cwd ?? process.cwd(),
|
|
309
311
|
approvalPolicy: 'on-request',
|
|
310
312
|
sandbox: 'workspace-write',
|
|
311
|
-
reasoningEffort:
|
|
313
|
+
...(input.effort ? { reasoningEffort: input.effort } : {}),
|
|
312
314
|
developerInstructions: appendFleetDeveloperInstructions(input.developerInstructions),
|
|
313
315
|
experimentalRawEvents: false,
|
|
314
316
|
persistExtendedHistory: false,
|
|
@@ -323,6 +325,7 @@ export class CodexRuntime {
|
|
|
323
325
|
buildThreadResumeParams(input: {
|
|
324
326
|
threadId: string;
|
|
325
327
|
model?: string | undefined;
|
|
328
|
+
effort?: ReasoningEffortLevel | undefined;
|
|
326
329
|
cwd?: string | undefined;
|
|
327
330
|
developerInstructions?: string | undefined;
|
|
328
331
|
}): Promise<{ params: Record<string, unknown>; remappedFrom?: string | undefined; }> {
|
|
@@ -333,7 +336,7 @@ export class CodexRuntime {
|
|
|
333
336
|
cwd: input.cwd ?? process.cwd(),
|
|
334
337
|
approvalPolicy: 'on-request',
|
|
335
338
|
sandbox: 'workspace-write',
|
|
336
|
-
reasoningEffort:
|
|
339
|
+
...(input.effort ? { reasoningEffort: input.effort } : {}),
|
|
337
340
|
developerInstructions: appendFleetDeveloperInstructions(input.developerInstructions),
|
|
338
341
|
persistExtendedHistory: false,
|
|
339
342
|
};
|
|
@@ -348,11 +351,13 @@ export class CodexRuntime {
|
|
|
348
351
|
threadId: string;
|
|
349
352
|
userInput: string;
|
|
350
353
|
model?: string | undefined;
|
|
354
|
+
effort?: ReasoningEffortLevel | undefined;
|
|
351
355
|
}): Promise<{ params: Record<string, unknown>; remappedFrom?: string | undefined; }> {
|
|
352
356
|
return this.resolveModelIfRequested(input.model).then((resolved) => ({
|
|
353
357
|
params: {
|
|
354
358
|
threadId: input.threadId,
|
|
355
359
|
...(resolved ? { model: resolved.resolvedModel } : {}),
|
|
360
|
+
...(input.effort ? { effort: input.effort } : {}),
|
|
356
361
|
input: [{
|
|
357
362
|
type: 'text',
|
|
358
363
|
text: input.userInput,
|
|
@@ -407,12 +412,16 @@ export class CodexRuntime {
|
|
|
407
412
|
return [...this.knownThreadIds].sort();
|
|
408
413
|
}
|
|
409
414
|
|
|
410
|
-
async ensureThreadLoaded(
|
|
415
|
+
async ensureThreadLoaded(
|
|
416
|
+
threadId: string,
|
|
417
|
+
model?: string | undefined,
|
|
418
|
+
effort?: ReasoningEffortLevel | undefined,
|
|
419
|
+
): Promise<void> {
|
|
411
420
|
if (!this.knownThreadIds.has(threadId) || this.loadedThreadIds.has(threadId)) {
|
|
412
421
|
return;
|
|
413
422
|
}
|
|
414
423
|
|
|
415
|
-
const built = await this.buildThreadResumeParams({ threadId, model });
|
|
424
|
+
const built = await this.buildThreadResumeParams({ threadId, model, effort });
|
|
416
425
|
await this.request('thread/resume', built.params);
|
|
417
426
|
}
|
|
418
427
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Reasoning option parsing
|
|
3
|
+
//
|
|
4
|
+
// The MCP `reasoning` parameter is a hardcoded allow-list of `gpt-5.4` model
|
|
5
|
+
// variants paired with a Codex reasoning-effort level. Everything flowing
|
|
6
|
+
// through the tools is one of the exact strings in REASONING_OPTIONS — we
|
|
7
|
+
// split it into `{ model, effort }` at the boundary so the adapter chain can
|
|
8
|
+
// pass the two fields independently to Codex (`reasoningEffort` on
|
|
9
|
+
// thread/start, `effort` on turn/start).
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
export type ReasoningEffortLevel = 'low' | 'medium' | 'high' | 'xhigh';
|
|
13
|
+
|
|
14
|
+
/** The only model we expose. Hardcoded — do not add variants without intent. */
|
|
15
|
+
export const ALLOWED_MODEL = 'gpt-5.4';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* The full set of accepted `reasoning` values. Order matters for display:
|
|
19
|
+
* medium/high first (the common cases), xhigh next (exceptional research),
|
|
20
|
+
* low last (rare, kept for completeness).
|
|
21
|
+
*/
|
|
22
|
+
export const REASONING_OPTIONS = [
|
|
23
|
+
'gpt-5.4(medium)',
|
|
24
|
+
'gpt-5.4(high)',
|
|
25
|
+
'gpt-5.4(xhigh)',
|
|
26
|
+
'gpt-5.4(low)',
|
|
27
|
+
] as const;
|
|
28
|
+
|
|
29
|
+
export type ReasoningOption = (typeof REASONING_OPTIONS)[number];
|
|
30
|
+
|
|
31
|
+
const REASONING_PATTERN = /^(gpt-5\.4)\((low|medium|high|xhigh)\)$/;
|
|
32
|
+
|
|
33
|
+
export interface ParsedReasoning {
|
|
34
|
+
model: string;
|
|
35
|
+
effort: ReasoningEffortLevel;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function isReasoningOption(value: unknown): value is ReasoningOption {
|
|
39
|
+
return typeof value === 'string' && (REASONING_OPTIONS as readonly string[]).includes(value);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Parse a `reasoning` value such as `gpt-5.4(high)` into its model id and
|
|
44
|
+
* reasoning-effort level. Throws on any value not in {@link REASONING_OPTIONS}.
|
|
45
|
+
*/
|
|
46
|
+
export function parseReasoning(value: string): ParsedReasoning {
|
|
47
|
+
const match = REASONING_PATTERN.exec(value);
|
|
48
|
+
if (!match) {
|
|
49
|
+
throw new Error(
|
|
50
|
+
`Invalid reasoning option "${value}". Allowed: ${REASONING_OPTIONS.join(', ')}`,
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
return {
|
|
54
|
+
model: match[1]!,
|
|
55
|
+
effort: match[2] as ReasoningEffortLevel,
|
|
56
|
+
};
|
|
57
|
+
}
|