@janole/ai-sdk-provider-codex-asp 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -147,6 +147,7 @@ See [`src/provider.ts`](src/provider.ts) for full type definitions.
147
147
  See the [`examples/`](examples/) directory:
148
148
 
149
149
  - [`generate-text.ts`](examples/generate-text.ts) — Non-streaming text generation
150
+ - [`generate-object.ts`](examples/generate-object.ts) — Structured output with `generateText` + `Output.object`
150
151
  - [`stream-text.ts`](examples/stream-text.ts) — Streaming text generation
151
152
  - [`cross-call-tools.ts`](examples/cross-call-tools.ts) — Standard AI SDK tools via Codex
152
153
  - [`dynamic-tools.ts`](examples/dynamic-tools.ts) — Provider-level dynamic tools
package/dist/index.cjs CHANGED
@@ -1,6 +1,11 @@
1
1
  'use strict';
2
2
 
3
3
  var child_process = require('child_process');
4
+ var crypto = require('crypto');
5
+ var promises = require('fs/promises');
6
+ var os = require('os');
7
+ var path = require('path');
8
+ var url = require('url');
4
9
  var provider = require('@ai-sdk/provider');
5
10
 
6
11
  // src/utils/object.ts
@@ -930,7 +935,7 @@ var DynamicToolsDispatcher = class {
930
935
  // package.json
931
936
  var package_default = {
932
937
  name: "@janole/ai-sdk-provider-codex-asp",
933
- version: "0.2.3"};
938
+ version: "0.3.0"};
934
939
 
935
940
  // src/package-info.ts
936
941
  var PACKAGE_NAME = package_default.name;
@@ -938,14 +943,14 @@ var PACKAGE_VERSION = package_default.version;
938
943
 
939
944
  // src/protocol/provider-metadata.ts
940
945
  var CODEX_PROVIDER_ID = "@janole/ai-sdk-provider-codex-asp";
941
- function codexProviderMetadata(threadId) {
946
+ function codexProviderMetadata(threadId, turnId) {
942
947
  if (!threadId) {
943
948
  return void 0;
944
949
  }
945
- return { [CODEX_PROVIDER_ID]: { threadId } };
950
+ return { [CODEX_PROVIDER_ID]: stripUndefined({ threadId, turnId }) };
946
951
  }
947
- function withProviderMetadata(part, threadId) {
948
- const meta = codexProviderMetadata(threadId);
952
+ function withProviderMetadata(part, threadId, turnId) {
953
+ const meta = codexProviderMetadata(threadId, turnId);
949
954
  return meta ? { ...part, providerMetadata: meta } : part;
950
955
  }
951
956
 
@@ -984,6 +989,7 @@ var CodexEventMapper = class {
984
989
  openToolCalls = /* @__PURE__ */ new Map();
985
990
  planSequenceByTurnId = /* @__PURE__ */ new Map();
986
991
  threadId;
992
+ turnId;
987
993
  latestUsage;
988
994
  constructor(options) {
989
995
  this.options = {
@@ -993,6 +999,12 @@ var CodexEventMapper = class {
993
999
  setThreadId(threadId) {
994
1000
  this.threadId = threadId;
995
1001
  }
1002
+ setTurnId(turnId) {
1003
+ this.turnId = turnId;
1004
+ }
1005
+ getTurnId() {
1006
+ return this.turnId;
1007
+ }
996
1008
  nextPlanSequence(turnId) {
997
1009
  const next = (this.planSequenceByTurnId.get(turnId) ?? 0) + 1;
998
1010
  this.planSequenceByTurnId.set(turnId, next);
@@ -1000,7 +1012,7 @@ var CodexEventMapper = class {
1000
1012
  }
1001
1013
  map(event) {
1002
1014
  const parts = [];
1003
- const withMeta = (part) => withProviderMetadata(part, this.threadId);
1015
+ const withMeta = (part) => withProviderMetadata(part, this.threadId, this.turnId);
1004
1016
  const pushStreamStart = () => {
1005
1017
  if (!this.streamStarted) {
1006
1018
  parts.push({ type: "stream-start", warnings: [] });
@@ -1020,6 +1032,10 @@ var CodexEventMapper = class {
1020
1032
  };
1021
1033
  switch (event.method) {
1022
1034
  case "turn/started": {
1035
+ const turnStartedParams = event.params;
1036
+ if (turnStartedParams?.turn?.id) {
1037
+ this.turnId = turnStartedParams.turn.id;
1038
+ }
1023
1039
  pushStreamStart();
1024
1040
  break;
1025
1041
  }
@@ -1318,7 +1334,74 @@ var CodexEventMapper = class {
1318
1334
  }
1319
1335
  };
1320
1336
 
1321
- // src/protocol/prompt-mapper.ts
1337
+ // src/session.ts
1338
+ var CodexSessionImpl = class {
1339
+ _threadId;
1340
+ _turnId;
1341
+ _active = true;
1342
+ client;
1343
+ interruptTimeoutMs;
1344
+ constructor(opts) {
1345
+ this.client = opts.client;
1346
+ this._threadId = opts.threadId;
1347
+ this._turnId = opts.turnId;
1348
+ this.interruptTimeoutMs = opts.interruptTimeoutMs;
1349
+ }
1350
+ get threadId() {
1351
+ return this._threadId;
1352
+ }
1353
+ get turnId() {
1354
+ return this._turnId;
1355
+ }
1356
+ /** @internal Called by the model when turn/started arrives with a turnId. */
1357
+ setTurnId(turnId) {
1358
+ this._turnId = turnId;
1359
+ }
1360
+ /** @internal Called by the model when the turn completes or the stream closes. */
1361
+ markInactive() {
1362
+ this._active = false;
1363
+ }
1364
+ isActive() {
1365
+ return this._active;
1366
+ }
1367
+ /**
1368
+ * Inject follow-up input into the current thread.
1369
+ *
1370
+ * Uses turn/start which the app-server routes through steer_input when a
1371
+ * turn is already active, or starts a new turn otherwise. This avoids the
1372
+ * strict timing requirements of turn/steer (which needs codex/event/task_started
1373
+ * before it accepts input). We may revisit turn/steer in the future.
1374
+ */
1375
+ async injectMessage(input) {
1376
+ if (!this._active) {
1377
+ throw new Error("Session is no longer active.");
1378
+ }
1379
+ const userInput = typeof input === "string" ? [{ type: "text", text: input, text_elements: [] }] : input;
1380
+ const turnStartParams = {
1381
+ threadId: this._threadId,
1382
+ input: userInput
1383
+ };
1384
+ const result = await this.client.request("turn/start", turnStartParams);
1385
+ const newTurnId = result.turnId ?? result.turn?.id;
1386
+ if (newTurnId) {
1387
+ this._turnId = newTurnId;
1388
+ }
1389
+ }
1390
+ async interrupt() {
1391
+ if (!this._active || !this._turnId) {
1392
+ return;
1393
+ }
1394
+ const interruptParams = {
1395
+ threadId: this._threadId,
1396
+ turnId: this._turnId
1397
+ };
1398
+ await this.client.request(
1399
+ "turn/interrupt",
1400
+ interruptParams,
1401
+ this.interruptTimeoutMs
1402
+ );
1403
+ }
1404
+ };
1322
1405
  function mapSystemPrompt(prompt) {
1323
1406
  const chunks = [];
1324
1407
  for (const message of prompt) {
@@ -1331,8 +1414,111 @@ function mapSystemPrompt(prompt) {
1331
1414
  }
1332
1415
  return chunks.length > 0 ? chunks.join("\n\n") : void 0;
1333
1416
  }
1334
- function mapPromptToTurnInput(prompt, isResume = false) {
1335
- if (isResume) {
1417
+ function textItem(text) {
1418
+ return { type: "text", text, text_elements: [] };
1419
+ }
1420
+ var MEDIA_TYPE_TO_EXT = {
1421
+ "image/png": ".png",
1422
+ "image/jpeg": ".jpg",
1423
+ "image/gif": ".gif",
1424
+ "image/webp": ".webp",
1425
+ "image/svg+xml": ".svg",
1426
+ "image/bmp": ".bmp",
1427
+ "image/tiff": ".tiff"
1428
+ };
1429
+ function extensionForMediaType(mediaType) {
1430
+ return MEDIA_TYPE_TO_EXT[mediaType] ?? ".bin";
1431
+ }
1432
+ var LocalFileWriter = class {
1433
+ async write(data, mediaType) {
1434
+ const ext = extensionForMediaType(mediaType);
1435
+ const filename = `codex-ai-sdk-${crypto.randomUUID()}${ext}`;
1436
+ const filepath = path.join(os.tmpdir(), filename);
1437
+ const buffer = typeof data === "string" ? Buffer.from(data, "base64") : data;
1438
+ await promises.writeFile(filepath, buffer);
1439
+ return url.pathToFileURL(filepath);
1440
+ }
1441
+ async cleanup(urls) {
1442
+ await Promise.allSettled(
1443
+ urls.filter((u) => u.protocol === "file:").map((u) => promises.unlink(u))
1444
+ );
1445
+ }
1446
+ };
1447
+ var PromptFileResolver = class {
1448
+ writer;
1449
+ written = [];
1450
+ constructor(writer) {
1451
+ this.writer = writer ?? new LocalFileWriter();
1452
+ }
1453
+ /**
1454
+ * Resolve inline file data and map user content to Codex input items.
1455
+ *
1456
+ * - Inline image data (base64 / Uint8Array) is written via the
1457
+ * {@link FileWriter} and converted to `localImage` or `image` items.
1458
+ * - URL-based image file parts are converted directly.
1459
+ * - Inline text file data is decoded and inlined as text.
1460
+ * - Unsupported media types are silently skipped.
1461
+ *
1462
+ * @param isResume - When true only the last user message is extracted.
1463
+ * When false (fresh thread) all user text is accumulated with images
1464
+ * flushing the text buffer to preserve ordering.
1465
+ */
1466
+ async resolve(prompt, isResume = false) {
1467
+ if (isResume) {
1468
+ return this.resolveResumed(prompt);
1469
+ }
1470
+ return this.resolveFresh(prompt);
1471
+ }
1472
+ /**
1473
+ * Remove all files created by previous {@link resolve} calls.
1474
+ * Best-effort — never throws.
1475
+ */
1476
+ async cleanup() {
1477
+ const urls = this.written.splice(0);
1478
+ if (urls.length > 0) {
1479
+ await this.writer.cleanup(urls);
1480
+ }
1481
+ }
1482
+ /**
1483
+ * Convert a resolved image URL to a Codex input item.
1484
+ */
1485
+ mapImageUrl(mediaType, data) {
1486
+ if (!mediaType.startsWith("image/")) {
1487
+ return null;
1488
+ }
1489
+ if (data.protocol === "file:") {
1490
+ return { type: "localImage", path: url.fileURLToPath(data) };
1491
+ }
1492
+ return { type: "image", url: data.href };
1493
+ }
1494
+ /**
1495
+ * Resolve a single file part: write inline data via the writer, then
1496
+ * convert to a Codex input item. Text files are decoded and returned
1497
+ * as text items. Returns `null` for unsupported media types.
1498
+ */
1499
+ async resolveFilePart(part) {
1500
+ const { mediaType, data } = part;
1501
+ if (mediaType.startsWith("text/")) {
1502
+ if (data instanceof URL) {
1503
+ return textItem(data.href);
1504
+ }
1505
+ const text = typeof data === "string" ? Buffer.from(data, "base64").toString("utf-8") : new TextDecoder().decode(data);
1506
+ return textItem(text);
1507
+ }
1508
+ if (mediaType.startsWith("image/") && !(data instanceof URL)) {
1509
+ const url = await this.writer.write(data, mediaType);
1510
+ this.written.push(url);
1511
+ return this.mapImageUrl(mediaType, url);
1512
+ }
1513
+ if (data instanceof URL) {
1514
+ return this.mapImageUrl(mediaType, data);
1515
+ }
1516
+ return null;
1517
+ }
1518
+ /**
1519
+ * Resume path: extract parts from the last user message individually.
1520
+ */
1521
+ async resolveResumed(prompt) {
1336
1522
  for (let i = prompt.length - 1; i >= 0; i--) {
1337
1523
  const message = prompt[i];
1338
1524
  if (message?.role === "user") {
@@ -1341,7 +1527,12 @@ function mapPromptToTurnInput(prompt, isResume = false) {
1341
1527
  if (part.type === "text") {
1342
1528
  const text = part.text.trim();
1343
1529
  if (text.length > 0) {
1344
- items.push({ type: "text", text, text_elements: [] });
1530
+ items.push(textItem(text));
1531
+ }
1532
+ } else if (part.type === "file") {
1533
+ const mapped = await this.resolveFilePart(part);
1534
+ if (mapped) {
1535
+ items.push(mapped);
1345
1536
  }
1346
1537
  }
1347
1538
  }
@@ -1350,21 +1541,45 @@ function mapPromptToTurnInput(prompt, isResume = false) {
1350
1541
  }
1351
1542
  return [];
1352
1543
  }
1353
- const chunks = [];
1354
- for (const message of prompt) {
1355
- if (message.role === "user") {
1356
- for (const part of message.content) {
1357
- if (part.type === "text") {
1358
- const text = part.text.trim();
1359
- if (text.length > 0) {
1360
- chunks.push(text);
1544
+ /**
1545
+ * Fresh thread path: accumulate text chunks across all user messages,
1546
+ * flushing before each image to preserve ordering.
1547
+ */
1548
+ async resolveFresh(prompt) {
1549
+ const items = [];
1550
+ const textChunks = [];
1551
+ const flushText = () => {
1552
+ if (textChunks.length > 0) {
1553
+ items.push(textItem(textChunks.join("\n\n")));
1554
+ textChunks.length = 0;
1555
+ }
1556
+ };
1557
+ for (const message of prompt) {
1558
+ if (message.role === "user") {
1559
+ for (const part of message.content) {
1560
+ if (part.type === "text") {
1561
+ const text = part.text.trim();
1562
+ if (text.length > 0) {
1563
+ textChunks.push(text);
1564
+ }
1565
+ } else if (part.type === "file") {
1566
+ const mapped = await this.resolveFilePart(part);
1567
+ if (mapped) {
1568
+ if (mapped.type === "text") {
1569
+ textChunks.push(mapped.text);
1570
+ } else {
1571
+ flushText();
1572
+ items.push(mapped);
1573
+ }
1574
+ }
1361
1575
  }
1362
1576
  }
1363
1577
  }
1364
1578
  }
1579
+ flushText();
1580
+ return items;
1365
1581
  }
1366
- return [{ type: "text", text: chunks.join("\n\n"), text_elements: [] }];
1367
- }
1582
+ };
1368
1583
 
1369
1584
  // src/model.ts
1370
1585
  function createEmptyUsage() {
@@ -1619,6 +1834,7 @@ var CodexLanguageModel = class {
1619
1834
  }));
1620
1835
  let activeThreadId;
1621
1836
  let activeTurnId;
1837
+ let session;
1622
1838
  const interruptTimeoutMs = this.config.providerSettings.interruptTimeoutMs ?? 1e4;
1623
1839
  const interruptTurnIfPossible = async () => {
1624
1840
  if (!activeThreadId || !activeTurnId) {
@@ -1631,6 +1847,7 @@ var CodexLanguageModel = class {
1631
1847
  debugLog?.("outbound", "turn/interrupt", interruptParams);
1632
1848
  await client.request("turn/interrupt", interruptParams, interruptTimeoutMs);
1633
1849
  };
1850
+ const fileResolver = new PromptFileResolver();
1634
1851
  const stream = new ReadableStream({
1635
1852
  start: (controller) => {
1636
1853
  let closed = false;
@@ -1638,11 +1855,13 @@ var CodexLanguageModel = class {
1638
1855
  if (closed) {
1639
1856
  return;
1640
1857
  }
1858
+ session?.markInactive();
1641
1859
  controller.enqueue({ type: "error", error });
1642
1860
  closed = true;
1643
1861
  try {
1644
1862
  controller.close();
1645
1863
  } finally {
1864
+ await fileResolver.cleanup();
1646
1865
  await client.disconnect();
1647
1866
  }
1648
1867
  };
@@ -1650,10 +1869,12 @@ var CodexLanguageModel = class {
1650
1869
  if (closed) {
1651
1870
  return;
1652
1871
  }
1872
+ session?.markInactive();
1653
1873
  closed = true;
1654
1874
  try {
1655
1875
  controller.close();
1656
1876
  } finally {
1877
+ await fileResolver.cleanup();
1657
1878
  await client.disconnect();
1658
1879
  }
1659
1880
  };
@@ -1755,6 +1976,11 @@ var CodexLanguageModel = class {
1755
1976
  approvalsDispatcher.attach(client);
1756
1977
  client.onAnyNotification((method, params) => {
1757
1978
  const parts = mapper.map({ method, params });
1979
+ const mappedTurnId = mapper.getTurnId();
1980
+ if (mappedTurnId && mappedTurnId !== activeTurnId) {
1981
+ activeTurnId = mappedTurnId;
1982
+ session?.setTurnId(mappedTurnId);
1983
+ }
1758
1984
  for (const part of parts) {
1759
1985
  controller.enqueue(part);
1760
1986
  if (part.type === "finish") {
@@ -1852,10 +2078,13 @@ var CodexLanguageModel = class {
1852
2078
  }
1853
2079
  }
1854
2080
  } else {
2081
+ const mcpServers = this.config.providerSettings.mcpServers;
2082
+ const config = mcpServers ? { mcp_servers: mcpServers } : void 0;
1855
2083
  const threadStartParams = stripUndefined({
1856
2084
  model: this.config.providerSettings.defaultModel ?? this.modelId,
1857
2085
  dynamicTools,
1858
2086
  developerInstructions,
2087
+ config,
1859
2088
  cwd: this.config.providerSettings.defaultThreadSettings?.cwd,
1860
2089
  approvalPolicy: this.config.providerSettings.defaultThreadSettings?.approvalPolicy,
1861
2090
  sandbox: this.config.providerSettings.defaultThreadSettings?.sandbox
@@ -1878,7 +2107,7 @@ var CodexLanguageModel = class {
1878
2107
  closeSuccessfully
1879
2108
  );
1880
2109
  }
1881
- const turnInput = mapPromptToTurnInput(options.prompt, !!resumeThreadId);
2110
+ const turnInput = await fileResolver.resolve(options.prompt, !!resumeThreadId);
1882
2111
  const turnStartParams = stripUndefined({
1883
2112
  threadId,
1884
2113
  input: turnInput,
@@ -1887,21 +2116,31 @@ var CodexLanguageModel = class {
1887
2116
  sandboxPolicy: this.config.providerSettings.defaultTurnSettings?.sandboxPolicy,
1888
2117
  model: this.config.providerSettings.defaultTurnSettings?.model,
1889
2118
  effort: this.config.providerSettings.defaultTurnSettings?.effort,
1890
- summary: this.config.providerSettings.defaultTurnSettings?.summary
2119
+ summary: this.config.providerSettings.defaultTurnSettings?.summary,
2120
+ outputSchema: options.responseFormat?.type === "json" ? options.responseFormat.schema : void 0
1891
2121
  });
1892
2122
  debugLog?.("outbound", "turn/start", turnStartParams);
1893
2123
  const turnStartResult = await client.request("turn/start", turnStartParams);
1894
2124
  activeTurnId = extractTurnId(turnStartResult);
2125
+ session = new CodexSessionImpl({
2126
+ client,
2127
+ threadId: activeThreadId,
2128
+ turnId: activeTurnId,
2129
+ interruptTimeoutMs
2130
+ });
2131
+ this.config.providerSettings.onSessionCreated?.(session);
1895
2132
  } catch (error) {
1896
2133
  await closeWithError(error);
1897
2134
  }
1898
2135
  })();
1899
2136
  },
1900
2137
  cancel: async () => {
2138
+ session?.markInactive();
1901
2139
  try {
1902
2140
  await interruptTurnIfPossible();
1903
2141
  } catch {
1904
2142
  }
2143
+ await fileResolver.cleanup();
1905
2144
  await client.disconnect();
1906
2145
  }
1907
2146
  });
@@ -2031,12 +2270,15 @@ function createCodexAppServer(settings = {}) {
2031
2270
  defaultTurnSettings: settings.defaultTurnSettings ? { ...settings.defaultTurnSettings } : void 0,
2032
2271
  compaction: settings.compaction ? { ...settings.compaction } : void 0,
2033
2272
  transportFactory: effectiveTransportFactory,
2273
+ mcpServers: settings.mcpServers ? { ...settings.mcpServers } : void 0,
2034
2274
  tools: settings.tools ? { ...settings.tools } : void 0,
2035
2275
  toolHandlers: settings.toolHandlers ? { ...settings.toolHandlers } : void 0,
2036
2276
  toolTimeoutMs: settings.toolTimeoutMs,
2037
2277
  interruptTimeoutMs: settings.interruptTimeoutMs,
2038
2278
  approvals: settings.approvals ? { ...settings.approvals } : void 0,
2039
- debug: settings.debug ? { ...settings.debug } : void 0
2279
+ debug: settings.debug ? { ...settings.debug } : void 0,
2280
+ emitPlanUpdates: settings.emitPlanUpdates,
2281
+ onSessionCreated: settings.onSessionCreated
2040
2282
  }));
2041
2283
  const createLanguageModel = (modelId, modelSettings = {}) => new CodexLanguageModel(modelId, modelSettings, {
2042
2284
  provider: CODEX_PROVIDER_ID,
@@ -2058,6 +2300,31 @@ function createCodexAppServer(settings = {}) {
2058
2300
  imageModel(modelId) {
2059
2301
  throw createNoSuchModelError(modelId, "imageModel");
2060
2302
  },
2303
+ async listModels(params) {
2304
+ const transport = effectiveTransportFactory ? effectiveTransportFactory() : resolvedSettings.transport?.type === "websocket" ? new WebSocketTransport(resolvedSettings.transport.websocket) : new StdioTransport(resolvedSettings.transport?.stdio);
2305
+ const client = new AppServerClient(transport);
2306
+ try {
2307
+ await client.connect();
2308
+ const initializeParams = stripUndefined({
2309
+ clientInfo: resolvedSettings.clientInfo ?? {
2310
+ name: PACKAGE_NAME,
2311
+ version: PACKAGE_VERSION
2312
+ }
2313
+ });
2314
+ await client.request("initialize", initializeParams);
2315
+ await client.notification("initialized");
2316
+ const models = [];
2317
+ let cursor;
2318
+ do {
2319
+ const response = await client.request("model/list", stripUndefined({ ...params, cursor }));
2320
+ models.push(...response.data);
2321
+ cursor = response.nextCursor ?? void 0;
2322
+ } while (cursor);
2323
+ return models;
2324
+ } finally {
2325
+ await client.disconnect();
2326
+ }
2327
+ },
2061
2328
  async shutdown() {
2062
2329
  if (!persistentPoolHandle) {
2063
2330
  return;
@@ -2083,16 +2350,17 @@ exports.CodexWorker = CodexWorker;
2083
2350
  exports.CodexWorkerPool = CodexWorkerPool;
2084
2351
  exports.DynamicToolsDispatcher = DynamicToolsDispatcher;
2085
2352
  exports.JsonRpcError = JsonRpcError;
2353
+ exports.LocalFileWriter = LocalFileWriter;
2086
2354
  exports.PACKAGE_NAME = PACKAGE_NAME;
2087
2355
  exports.PACKAGE_VERSION = PACKAGE_VERSION;
2088
2356
  exports.PersistentTransport = PersistentTransport;
2357
+ exports.PromptFileResolver = PromptFileResolver;
2089
2358
  exports.StdioTransport = StdioTransport;
2090
2359
  exports.WebSocketTransport = WebSocketTransport;
2091
2360
  exports.codexAppServer = codexAppServer;
2092
2361
  exports.codexProviderMetadata = codexProviderMetadata;
2093
2362
  exports.createCodexAppServer = createCodexAppServer;
2094
2363
  exports.createCodexProvider = createCodexProvider;
2095
- exports.mapPromptToTurnInput = mapPromptToTurnInput;
2096
2364
  exports.mapSystemPrompt = mapSystemPrompt;
2097
2365
  exports.withProviderMetadata = withProviderMetadata;
2098
2366
  //# sourceMappingURL=index.cjs.map