@skelm/codex 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -58,7 +58,7 @@ export default defineConfig({
58
58
  ```
59
59
 
60
60
  ```ts
61
- // codex-smoke.pipeline.ts
61
+ // codex-smoke.pipeline.mts
62
62
  import { agent, pipeline } from 'skelm'
63
63
  import { z } from 'zod'
64
64
 
@@ -84,7 +84,7 @@ export default pipeline({
84
84
  ```
85
85
 
86
86
  ```bash
87
- skelm run codex-smoke.pipeline.ts --input '{"task":"say ok"}'
87
+ skelm run codex-smoke.pipeline.mts --input '{"task":"say ok"}'
88
88
  ```
89
89
 
90
90
  ## Permission mapping
package/dist/backend.js CHANGED
@@ -1,5 +1,9 @@
1
- import { buildSystemPromptFromRequest, loadSkillBodies, resolvePermissions } from '@skelm/core';
1
+ import { mkdtempSync, rmSync, writeFileSync } from 'node:fs';
2
+ import { tmpdir } from 'node:os';
3
+ import { join } from 'node:path';
4
+ import { BackendConfigError, buildSystemPromptFromRequest, extractPromptText, loadSkillBodies, resolvePermissions, } from '@skelm/core';
2
5
  import { buildCodexOptions, buildMcpServerConfig, buildThreadOptions, consumeStream, makeCodexClient, } from './client.js';
6
+ import { toCodexOutputSchema } from './output-schema.js';
3
7
  import { mapPermissionsToCodex } from './permission-mapper.js';
4
8
  /**
5
9
  * SkelmBackend for OpenAI Codex via the official `@openai/codex-sdk`.
@@ -30,6 +34,12 @@ export function createCodexBackend(options = {}) {
30
34
  // Skelm checks at the boundary (refusing unsafe combinations before any
31
35
  // Codex call); Codex enforces at runtime.
32
36
  toolPermissions: 'native',
37
+ // Image content is forwarded as `{type:'local_image', path}` per the
38
+ // codex-sdk schema; bytes are materialized to a temp file for the turn
39
+ // and cleaned up afterwards. Whether the configured codex model can
40
+ // actually process images is up to the model — non-vision models will
41
+ // surface a provider error that propagates as a step failure.
42
+ vision: options.vision ?? true,
33
43
  };
34
44
  const backend = {
35
45
  id: options.id ?? 'codex',
@@ -51,18 +61,10 @@ export function createCodexBackend(options = {}) {
51
61
  policy,
52
62
  ...(request.cwd !== undefined && { workingDirectory: request.cwd }),
53
63
  });
54
- // Filter requested MCP servers through the allowlist.
64
+ // Filter requested MCP servers through the allowlist; the runner's
65
+ // audit writer is the single durable record for denials.
55
66
  const allowed = filterAllowedMcp(request.mcpServers, policy.allowedMcpServers);
56
67
  const mcpConfig = buildMcpServerConfig(allowed.allowed);
57
- const deniedMcp = allowed.denied.map((s) => s.id);
58
- // Audit-only for now; the runner's audit writer is the durable record.
59
- const logDenial = (dimension, ids, reason) => console.warn(JSON.stringify({ event: 'permission.denied', dimension, ids, reason, backend: 'codex' }));
60
- if (deniedMcp.length > 0) {
61
- logDenial('mcp', deniedMcp, 'not-in-allowlist');
62
- }
63
- if (mcpConfig !== null && mcpConfig.dropped.length > 0) {
64
- logDenial('mcp', mcpConfig.dropped, 'transport-unsupported');
65
- }
66
68
  // Construct the SDK client with config + proxy env. Only forward
67
69
  // `mcp_servers` to Codex — never leak the `dropped` bookkeeping field.
68
70
  const codexOpts = buildCodexOptions(options, {
@@ -73,7 +75,19 @@ export function createCodexBackend(options = {}) {
73
75
  // Compose the system prompt via @skelm/core's shared builder so
74
76
  // systemPromptMode / systemPromptIncludeAgentDef take effect here.
75
77
  const systemPrompt = await composeSystemPrompt(request, context, options.model);
76
- const userPrompt = systemPrompt === undefined ? request.prompt : `${systemPrompt}\n\n---\n\n${request.prompt}`;
78
+ // Codex SDK accepts string OR Array<{type:'text'}|{type:'local_image',path}>.
79
+ // For image-bearing prompts we materialize each image to a temp file
80
+ // (Codex requires filesystem paths, not data URLs) and clean up after
81
+ // the turn. Pure-text prompts keep the prior compact "<system>\n\n---\n\n<text>" shape.
82
+ const imageRoots = [];
83
+ const userPrompt = typeof request.prompt === 'string' || extractImageParts(request.prompt).length === 0
84
+ ? (() => {
85
+ const promptText = extractPromptText(request.prompt);
86
+ return systemPrompt === undefined
87
+ ? promptText
88
+ : `${systemPrompt}\n\n---\n\n${promptText}`;
89
+ })()
90
+ : buildCodexMultimodalInput(request.prompt, systemPrompt, imageRoots);
77
91
  // Build the thread (resume vs fresh) honoring per-step sandbox/approval.
78
92
  const threadOpts = buildThreadOptions(options, {
79
93
  sandboxMode: mapped.sandboxMode,
@@ -96,8 +110,13 @@ export function createCodexBackend(options = {}) {
96
110
  // backend's `timeoutMs` (defensive ceiling). The SDK honors a single
97
111
  // AbortSignal on TurnOptions natively.
98
112
  const turnSignal = composeAbortSignal(context.signal, options.timeoutMs ?? 300_000);
113
+ // The SDK writes outputSchema verbatim to the --output-schema file, and
114
+ // the OpenAI structured-output API requires strict JSON Schema
115
+ // (additionalProperties:false + required). Convert the step's schema here
116
+ // rather than handing the SDK a raw standard-schema object.
117
+ const codexOutputSchema = await toCodexOutputSchema(request.outputSchema);
99
118
  const { events } = await thread.runStreamed(userPrompt, {
100
- ...(request.outputSchema !== undefined && { outputSchema: request.outputSchema }),
119
+ ...(codexOutputSchema !== undefined && { outputSchema: codexOutputSchema }),
101
120
  signal: turnSignal.signal,
102
121
  });
103
122
  let result;
@@ -108,6 +127,7 @@ export function createCodexBackend(options = {}) {
108
127
  }
109
128
  finally {
110
129
  turnSignal.cancel();
130
+ cleanupTempImageRoots(imageRoots);
111
131
  }
112
132
  const response = {
113
133
  text: result.finalText,
@@ -129,6 +149,74 @@ export function createCodexBackend(options = {}) {
129
149
  };
130
150
  return backend;
131
151
  }
152
+ function extractImageParts(prompt) {
153
+ if (typeof prompt === 'string')
154
+ return [];
155
+ return prompt.filter((p) => p.type === 'image');
156
+ }
157
+ function mimeToExt(mime) {
158
+ switch (mime) {
159
+ case 'image/png':
160
+ return '.png';
161
+ case 'image/jpeg':
162
+ return '.jpg';
163
+ case 'image/webp':
164
+ return '.webp';
165
+ case 'image/gif':
166
+ return '.gif';
167
+ default:
168
+ return '.bin';
169
+ }
170
+ }
171
+ function buildCodexMultimodalInput(prompt, systemPrompt, imageRoots) {
172
+ const tmp = mkdtempSync(join(tmpdir(), 'skelm-codex-img-'));
173
+ imageRoots.push(tmp);
174
+ const parts = [];
175
+ let imgIdx = 0;
176
+ // Seed the text buffer with the system prompt block so it always lands as
177
+ // the FIRST text part — even when the prompt is `[image, text, ...]` and we
178
+ // would otherwise flush a `local_image` before seeing any user text.
179
+ // Mirrors the pure-text fallback `"<system>\n\n---\n\n<text>"` higher up,
180
+ // so callers see consistent ordering regardless of which path the request
181
+ // takes.
182
+ let textBuf = systemPrompt !== undefined ? `${systemPrompt}\n\n---\n\n` : '';
183
+ if (typeof prompt === 'string') {
184
+ parts.push({ type: 'text', text: `${textBuf}${prompt}` });
185
+ return parts;
186
+ }
187
+ for (const part of prompt) {
188
+ if (part.type === 'text') {
189
+ textBuf += part.text;
190
+ }
191
+ else if (part.type === 'image') {
192
+ if (textBuf.length > 0) {
193
+ parts.push({ type: 'text', text: textBuf });
194
+ textBuf = '';
195
+ }
196
+ const file = join(tmp, `img${imgIdx++}${mimeToExt(part.mimeType)}`);
197
+ try {
198
+ writeFileSync(file, Buffer.from(part.data, 'base64'));
199
+ }
200
+ catch (err) {
201
+ throw new BackendConfigError(`codex backend: failed to materialize image to ${file}: ${err.message}`, 'codex');
202
+ }
203
+ parts.push({ type: 'local_image', path: file });
204
+ }
205
+ }
206
+ if (textBuf.length > 0)
207
+ parts.push({ type: 'text', text: textBuf });
208
+ return parts;
209
+ }
210
+ function cleanupTempImageRoots(roots) {
211
+ for (const root of roots) {
212
+ try {
213
+ rmSync(root, { recursive: true, force: true });
214
+ }
215
+ catch {
216
+ // Best-effort cleanup; OS will eventually reap /tmp anyway.
217
+ }
218
+ }
219
+ }
132
220
  function filterAllowedMcp(servers, allowlist) {
133
221
  if (servers === undefined || servers.length === 0)
134
222
  return { allowed: [], denied: [] };
@@ -170,12 +258,6 @@ async function composeSystemPrompt(req, ctx, model) {
170
258
  return undefined;
171
259
  return parts.join('\n\n---\n\n');
172
260
  }
173
- /**
174
- * AgentRequest doesn't have a typed sessionId field at the moment, but
175
- * runners may attach one through structural typing. Read defensively.
176
- * TODO(@skelm/core): promote `sessionId?: string` to AgentRequest so this
177
- * cast goes away.
178
- */
179
261
  function readSessionId(request) {
180
262
  const sid = request.sessionId;
181
263
  return typeof sid === 'string' && sid.length > 0 ? sid : undefined;
package/dist/client.js CHANGED
@@ -8,6 +8,7 @@
8
8
  * can publish onto skelm's event bus and `onPartial` callback.
9
9
  */
10
10
  import { Codex } from '@openai/codex-sdk';
11
+ import { BackendUpstreamError } from '@skelm/core';
11
12
  /** Build CodexOptions from CodexBackendOptions + per-run overrides. */
12
13
  export function buildCodexOptions(opts, overrides = {}) {
13
14
  const out = {};
@@ -133,11 +134,11 @@ export async function consumeStream(events, callbacks) {
133
134
  case 'turn.failed':
134
135
  stopReason = 'turn.failed';
135
136
  callbacks.onError?.(ev.error.message);
136
- throw new Error(`codex turn failed: ${ev.error.message}`);
137
+ throw new BackendUpstreamError(`codex turn failed: ${ev.error.message}`, 'codex');
137
138
  case 'error':
138
139
  stopReason = 'error';
139
140
  callbacks.onError?.(ev.message);
140
- throw new Error(`codex stream error: ${ev.message}`);
141
+ throw new BackendUpstreamError(`codex stream error: ${ev.message}`, 'codex');
141
142
  // thread.started, turn.started, item.started, item.updated: not
142
143
  // material to the final response; surface via onItem if the caller
143
144
  // wants per-item audit (it doesn't, by default).
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Convert a step's output schema into the strict JSON Schema the Codex SDK
3
+ * requires. Codex writes `outputSchema` to a file and passes it to the OpenAI
4
+ * structured-output API via `--output-schema`; that API rejects any object
5
+ * schema that omits `additionalProperties: false` or doesn't list every
6
+ * property in `required`. The schema reaching this point is a standard-schema
7
+ * (Zod is the documented default) or an already-plain JSON Schema object.
8
+ *
9
+ * Returns `undefined` when no schema was requested.
10
+ */
11
+ export declare function toCodexOutputSchema(schema: unknown): Promise<Record<string, unknown> | undefined>;
@@ -0,0 +1,60 @@
1
+ import { BackendConfigError } from '@skelm/core';
2
+ /**
3
+ * Convert a step's output schema into the strict JSON Schema the Codex SDK
4
+ * requires. Codex writes `outputSchema` to a file and passes it to the OpenAI
5
+ * structured-output API via `--output-schema`; that API rejects any object
6
+ * schema that omits `additionalProperties: false` or doesn't list every
7
+ * property in `required`. The schema reaching this point is a standard-schema
8
+ * (Zod is the documented default) or an already-plain JSON Schema object.
9
+ *
10
+ * Returns `undefined` when no schema was requested.
11
+ */
12
+ export async function toCodexOutputSchema(schema) {
13
+ if (schema === undefined)
14
+ return undefined;
15
+ const json = await toJsonSchema(schema);
16
+ return enforceStrict(json);
17
+ }
18
+ async function toJsonSchema(schema) {
19
+ if (typeof schema !== 'object' || schema === null) {
20
+ throw new BackendConfigError('codex outputSchema must be a Zod schema or a JSON Schema object', 'codex');
21
+ }
22
+ const standard = schema['~standard'];
23
+ if (standard !== undefined) {
24
+ if (standard.vendor !== 'zod') {
25
+ throw new BackendConfigError(`codex outputSchema: cannot convert a '${String(standard.vendor)}' schema to JSON Schema; use Zod or pass a JSON Schema object`, 'codex');
26
+ }
27
+ const z = (await import('zod'));
28
+ const fn = z.toJSONSchema ?? z.default?.toJSONSchema;
29
+ if (typeof fn !== 'function') {
30
+ throw new BackendConfigError('codex outputSchema: installed zod lacks toJSONSchema', 'codex');
31
+ }
32
+ return fn(schema);
33
+ }
34
+ // Already a plain JSON Schema object.
35
+ return schema;
36
+ }
37
+ /**
38
+ * Deep-clone the JSON Schema, forcing `additionalProperties: false` and
39
+ * `required: <all keys>` on every object node (recursing through properties,
40
+ * items, $defs/definitions, and the anyOf/oneOf/allOf combinators). This is
41
+ * what OpenAI strict structured output demands.
42
+ */
43
+ function enforceStrict(node) {
44
+ if (Array.isArray(node))
45
+ return node.map(enforceStrict);
46
+ if (node === null || typeof node !== 'object')
47
+ return node;
48
+ const out = {};
49
+ for (const [key, value] of Object.entries(node)) {
50
+ out[key] = enforceStrict(value);
51
+ }
52
+ const props = out.properties;
53
+ if (out.type === 'object' || (props !== null && typeof props === 'object')) {
54
+ out.additionalProperties = false;
55
+ if (props !== null && typeof props === 'object') {
56
+ out.required = Object.keys(props);
57
+ }
58
+ }
59
+ return out;
60
+ }
package/dist/types.d.ts CHANGED
@@ -38,6 +38,14 @@ export interface CodexBackendOptions {
38
38
  * cancellation; this is a defensive ceiling. Default: 300_000 (5 min).
39
39
  */
40
40
  timeoutMs?: number;
41
+ /**
42
+ * Advertise `capabilities.vision`. Defaults to `true`: image content is
43
+ * materialized to a temp file and forwarded as `{type:'local_image', path}`
44
+ * per the codex-sdk Input schema. Set `false` for codex configurations
45
+ * pinned to a known text-only model — the framework gate then rejects
46
+ * image prompts at step start with no codex turn ever started.
47
+ */
48
+ vision?: boolean;
41
49
  }
42
50
  /**
43
51
  * Resolved Codex SDK options after skelm permissions are applied. Produced
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@skelm/codex",
3
- "version": "0.4.2",
3
+ "version": "0.4.4",
4
4
  "description": "OpenAI Codex backend for skelm via the official @openai/codex-sdk",
5
5
  "license": "MIT",
6
6
  "author": "Scott Glover <scottgl@gmail.com>",
@@ -49,10 +49,10 @@
49
49
  "@openai/codex-sdk": "^0.130.0"
50
50
  },
51
51
  "peerDependencies": {
52
- "@skelm/core": "^0.4.2"
52
+ "@skelm/core": "^0.4.4"
53
53
  },
54
54
  "devDependencies": {
55
- "@skelm/core": "^0.4.2",
55
+ "@skelm/core": "^0.4.4",
56
56
  "@types/node": "^20.10.0",
57
57
  "typescript": "^5.3.0",
58
58
  "vitest": "^2.1.5"