@opengeni/runtime 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/chunk-2PO56VAL.js +3478 -0
  2. package/dist/chunk-2PO56VAL.js.map +1 -0
  3. package/dist/index.d.ts +912 -0
  4. package/dist/index.js +3663 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/sandbox/index.d.ts +1738 -0
  7. package/dist/sandbox/index.js +187 -0
  8. package/dist/sandbox/index.js.map +1 -0
  9. package/package.json +49 -0
  10. package/src/bundled_hashicorp_terraform_skills/LICENSE +373 -0
  11. package/src/bundled_hashicorp_terraform_skills/README.md +18 -0
  12. package/src/bundled_hashicorp_terraform_skills/UPSTREAM_GIT_SHA +1 -0
  13. package/src/bundled_hashicorp_terraform_skills/azure-verified-modules/SKILL.md +613 -0
  14. package/src/bundled_hashicorp_terraform_skills/checkov/SKILL.md +43 -0
  15. package/src/bundled_hashicorp_terraform_skills/refactor-module/SKILL.md +538 -0
  16. package/src/bundled_hashicorp_terraform_skills/social-media-marketing/SKILL.md +35 -0
  17. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/SKILL.md +372 -0
  18. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/references/MANUAL-IMPORT.md +113 -0
  19. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/scripts/list_resources.sh +38 -0
  20. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/SKILL.md +480 -0
  21. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/api-monitoring.md +543 -0
  22. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/component-blocks.md +476 -0
  23. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/deployment-blocks.md +391 -0
  24. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/examples.md +1529 -0
  25. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/linked-stacks.md +187 -0
  26. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/troubleshooting.md +671 -0
  27. package/src/bundled_hashicorp_terraform_skills/terraform-style-guide/SKILL.md +353 -0
  28. package/src/bundled_hashicorp_terraform_skills/terraform-test/SKILL.md +451 -0
  29. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/CI_CD.md +80 -0
  30. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/EXAMPLES.md +314 -0
  31. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/MOCK_PROVIDERS.md +171 -0
  32. package/src/codex-tool-search.ts +267 -0
  33. package/src/context-compaction.ts +538 -0
  34. package/src/history-sanitizer.ts +719 -0
  35. package/src/index.ts +3299 -0
  36. package/src/sandbox/capabilities.ts +69 -0
  37. package/src/sandbox/channel-a.ts +1031 -0
  38. package/src/sandbox/display-stack.ts +231 -0
  39. package/src/sandbox/errors.ts +34 -0
  40. package/src/sandbox/index.ts +832 -0
  41. package/src/sandbox/providers/blaxel.ts +35 -0
  42. package/src/sandbox/providers/cloudflare.ts +24 -0
  43. package/src/sandbox/providers/daytona.ts +34 -0
  44. package/src/sandbox/providers/docker.ts +17 -0
  45. package/src/sandbox/providers/e2b.ts +36 -0
  46. package/src/sandbox/providers/index.ts +107 -0
  47. package/src/sandbox/providers/local.ts +13 -0
  48. package/src/sandbox/providers/modal.ts +55 -0
  49. package/src/sandbox/providers/none.ts +13 -0
  50. package/src/sandbox/providers/runloop.ts +32 -0
  51. package/src/sandbox/providers/selfhosted.ts +96 -0
  52. package/src/sandbox/providers/types.ts +38 -0
  53. package/src/sandbox/providers/vercel.ts +29 -0
  54. package/src/sandbox/recording.ts +286 -0
  55. package/src/sandbox/routing/backend-resolver.ts +189 -0
  56. package/src/sandbox/routing/routing-session.ts +455 -0
  57. package/src/sandbox/select.ts +371 -0
  58. package/src/sandbox/selfhosted/capabilities.ts +255 -0
  59. package/src/sandbox/selfhosted/control-rpc.ts +351 -0
  60. package/src/sandbox/selfhosted/session.ts +930 -0
  61. package/src/sandbox/selfhosted/testing.ts +230 -0
  62. package/src/sandbox/stream-port.ts +185 -0
  63. package/src/sandbox/stream-token.ts +90 -0
  64. package/src/sandbox/terminal-server.ts +203 -0
  65. package/src/sandbox-computer.ts +835 -0
@@ -0,0 +1,719 @@
1
+ /**
2
+ * Read-path sanitizer for replayed conversation history (issue: orphaned
3
+ * tool outputs brick a session).
4
+ *
5
+ * Conversation truth is persisted as a flat list of SDK history items in
6
+ * `session_history_items` and replayed verbatim into the model on every turn.
7
+ * The OpenAI Responses API rejects the whole request (HTTP 400) when that list
8
+ * violates its tool-call pairing rules — most destructively:
9
+ *
10
+ * `400 No tool call found for function call output with call_id <X>`
11
+ *
12
+ * when a `function_call_result` (a.k.a. function_call_output) has no matching
13
+ * `function_call` earlier in the list. Because the corrupt item is replayed on
14
+ * every subsequent turn, one orphaned output permanently bricks the session
15
+ * across revival — it stays dead until the row is hand-deleted.
16
+ *
17
+ * This module is the reliability net: before history items are sent to the
18
+ * model they pass through `sanitizeHistoryItemsForModel`, which removes any
19
+ * item that would make the request invalid. It mirrors the SDK's own
20
+ * `dropOrphanToolCalls` continuation logic (which only runs over the SDK's
21
+ * in-memory `state.history`, not over rows we reload from the database) so a
22
+ * reloaded history is shaped exactly like a freshly-generated one.
23
+ *
24
+ * It is a pure function over plain JSON item shapes (no SDK import, no I/O) so
25
+ * it is cheap to unit-test exhaustively. It NEVER mutates its input items and
26
+ * NEVER touches the stored rows — only the in-memory copy sent to the model is
27
+ * filtered, keeping the persisted audit trail intact.
28
+ */
29
+
30
+ /** A history item is any JSON object; we only inspect a few discriminator fields. */
31
+ export type HistoryItem = Record<string, unknown>;
32
+
33
+ /**
34
+ * Tool-call item types and the result-item type that settles them. Kept in
35
+ * sync with the SDK's `TOOL_CALL_RESULT_TYPE_BY_CALL_TYPE`; `function_call` is
36
+ * the one observed live, the rest are included so the same pairing logic holds
37
+ * for every tool-call kind the SDK can emit.
38
+ */
39
+ const RESULT_TYPE_BY_CALL_TYPE: Record<string, string> = {
40
+ function_call: "function_call_result",
41
+ computer_call: "computer_call_result",
42
+ shell_call: "shell_call_output",
43
+ apply_patch_call: "apply_patch_call_output",
44
+ // Progressive connector disclosure (codex tool_search): a replayed
45
+ // `tool_search_call` must be settled by its `tool_search_output` exactly like a
46
+ // function call — an unpaired one 400s the store:false replay. The SDK pairs
47
+ // these OUTSIDE its own TOOL_CALL_RESULT_TYPE_BY_CALL_TYPE (sessionPersistence's
48
+ // hasToolSearchCallId), so we mirror the semantics here; the correlation id can
49
+ // additionally ride providerData (see callIdOf).
50
+ tool_search_call: "tool_search_output",
51
+ };
52
+
53
+ const RESULT_TYPES = new Set(Object.values(RESULT_TYPE_BY_CALL_TYPE));
54
+
55
+ function itemType(item: unknown): string | undefined {
56
+ if (!item || typeof item !== "object") {
57
+ return undefined;
58
+ }
59
+ const type = (item as { type?: unknown }).type;
60
+ return typeof type === "string" ? type : undefined;
61
+ }
62
+
63
+ /**
64
+ * Correlation id for a tool call / result. The SDK's canonical history shape
65
+ * uses camelCase `callId`; the raw Responses wire shape uses snake_case
66
+ * `call_id`. Persisted rows are the SDK shape, but we accept either so a row
67
+ * written by any code path (or hand-repaired) still correlates.
68
+ */
69
+ function callIdOf(item: unknown): string | undefined {
70
+ if (!item || typeof item !== "object") {
71
+ return undefined;
72
+ }
73
+ const record = item as { callId?: unknown; call_id?: unknown; providerData?: unknown };
74
+ if (typeof record.callId === "string" && record.callId.length > 0) {
75
+ return record.callId;
76
+ }
77
+ if (typeof record.call_id === "string" && record.call_id.length > 0) {
78
+ return record.call_id;
79
+ }
80
+ // tool_search items may carry their correlation id ONLY in providerData
81
+ // (mirrors the SDK's getToolSearchProviderCallId: providerData.call_id ??
82
+ // providerData.callId ?? call_id ?? callId). Harmless for other item kinds —
83
+ // their ids never live there.
84
+ const provider = record.providerData as { call_id?: unknown; callId?: unknown } | null | undefined;
85
+ if (provider && typeof provider === "object") {
86
+ if (typeof provider.call_id === "string" && provider.call_id.length > 0) {
87
+ return provider.call_id;
88
+ }
89
+ if (typeof provider.callId === "string" && provider.callId.length > 0) {
90
+ return provider.callId;
91
+ }
92
+ }
93
+ return undefined;
94
+ }
95
+
96
+ /**
97
+ * Sanitize a replayed history item list into a sequence the Responses API
98
+ * accepts. Pure: returns a new array of the same item references in order,
99
+ * with invalid items omitted. Valid histories come back byte-identical
100
+ * (same references, same order).
101
+ *
102
+ * Rules, each motivated by a concrete 400 the API raises:
103
+ *
104
+ * 1. Drop every tool-call RESULT whose matching tool CALL does not appear
105
+ * earlier in the list. This is the session-bricking orphan: a
106
+ * `function_call_result` with no preceding `function_call` of the same
107
+ * `call_id`. ("No tool call found for function call output…")
108
+ *
109
+ * 2. Drop every tool CALL that has no matching RESULT anywhere after it.
110
+ * The Responses API requires each tool call to be settled by its output
111
+ * before the conversation can continue; a dangling call left in replayed
112
+ * history 400s with "No tool output found for function call…". Dropping
113
+ * the dangling call (rather than synthesizing a fake output) is what the
114
+ * SDK itself does for in-memory continuation, so a reloaded history is
115
+ * shaped identically. The matching result, if it later exists, is kept;
116
+ * only genuinely unpaired calls are removed.
117
+ *
118
+ * 3. Drop any `reasoning` item that immediately precedes (across a run of
119
+ * reasoning items) a dropped tool call. The Responses API ties an
120
+ * encrypted reasoning item to the tool call it produced; a reasoning item
121
+ * orphaned by rule 2 trips "Item 'rs_…' of type 'reasoning' was provided
122
+ * without its required following item". Mirrors the SDK's
123
+ * `dropReasoningItemsPrecedingDroppedCalls`.
124
+ *
125
+ * A `call_id` is paired only when BOTH a call and a result of the matching
126
+ * types exist with that id, the call appearing before the result. Calls and
127
+ * results that satisfy that survive untouched.
128
+ */
129
+ export function sanitizeHistoryItemsForModel<T extends HistoryItem>(items: readonly T[]): T[] {
130
+ if (items.length === 0) {
131
+ return [];
132
+ }
133
+
134
+ // Pre-scan: for every (call-type, call_id) record the index of a RESULT that
135
+ // appears strictly after the call. A call is valid only when such a result
136
+ // exists; a result is valid only when its call appears strictly before it.
137
+ // We resolve pairs in order so ordering is enforced both ways (a result that
138
+ // precedes its call is an orphan, and a call whose only result precedes it is
139
+ // dangling).
140
+ const dropped = new Set<number>();
141
+
142
+ // For each result-type, the call_ids of CALLs we have seen so far that are
143
+ // still waiting to be settled by a following result.
144
+ const openCallIdsByResultType = new Map<string, Set<string>>();
145
+
146
+ items.forEach((item, index) => {
147
+ const type = itemType(item);
148
+ const callId = callIdOf(item);
149
+ if (!type || !callId) {
150
+ return;
151
+ }
152
+ const callResultType = RESULT_TYPE_BY_CALL_TYPE[type];
153
+ if (callResultType) {
154
+ const open = openCallIdsByResultType.get(callResultType) ?? new Set<string>();
155
+ open.add(callId);
156
+ openCallIdsByResultType.set(callResultType, open);
157
+ return;
158
+ }
159
+ if (RESULT_TYPES.has(type)) {
160
+ const open = openCallIdsByResultType.get(type);
161
+ if (open && open.has(callId)) {
162
+ // Settles a call we have already seen — keep both, close the call.
163
+ open.delete(callId);
164
+ } else {
165
+ // Rule 1: result whose call is absent or appears later — the orphan.
166
+ dropped.add(index);
167
+ }
168
+ }
169
+ });
170
+
171
+ // Rule 2: any call still open after the full scan has no result after it —
172
+ // a dangling call the API rejects. Drop those calls. We re-walk to find the
173
+ // indices of the still-open call_ids (the last unmatched call per id).
174
+ const stillOpen = new Map<string, Set<string>>();
175
+ for (const [resultType, open] of openCallIdsByResultType) {
176
+ if (open.size > 0) {
177
+ stillOpen.set(resultType, new Set(open));
178
+ }
179
+ }
180
+ if (stillOpen.size > 0) {
181
+ for (let index = items.length - 1; index >= 0; index -= 1) {
182
+ const item = items[index];
183
+ const type = itemType(item);
184
+ const callId = callIdOf(item);
185
+ if (!type || !callId) {
186
+ continue;
187
+ }
188
+ const resultType = RESULT_TYPE_BY_CALL_TYPE[type];
189
+ if (!resultType) {
190
+ continue;
191
+ }
192
+ const open = stillOpen.get(resultType);
193
+ if (open && open.has(callId)) {
194
+ dropped.add(index);
195
+ open.delete(callId);
196
+ }
197
+ }
198
+ }
199
+
200
+ if (dropped.size > 0) {
201
+ // Rule 3: drop reasoning items stranded by a dropped tool call. A reasoning
202
+ // item is stranded when the next non-reasoning item after it is dropped.
203
+ for (let index = 0; index < items.length; index += 1) {
204
+ if (dropped.has(index) || itemType(items[index]) !== "reasoning") {
205
+ continue;
206
+ }
207
+ for (let next = index + 1; next < items.length; next += 1) {
208
+ if (itemType(items[next]) === "reasoning") {
209
+ continue;
210
+ }
211
+ if (dropped.has(next)) {
212
+ dropped.add(index);
213
+ }
214
+ break;
215
+ }
216
+ }
217
+ }
218
+
219
+ if (dropped.size === 0) {
220
+ return items.slice();
221
+ }
222
+ return items.filter((_item, index) => !dropped.has(index));
223
+ }
224
+
225
+ /**
226
+ * Drop the account/org-bound `reasoning.encrypted_content` blob from a single
227
+ * history item, preserving everything else (the visible chain-of-thought text in
228
+ * `summary`/`content`, and every non-reasoning field). Pure + non-mutating: when
229
+ * there is nothing to strip the SAME reference is returned (so the common,
230
+ * same-account path stays byte-identical); otherwise a shallow clone is returned.
231
+ *
232
+ * WHY. A codex-subscription turn round-trips `reasoning.encrypted_content` — an
233
+ * opaque blob minted by the ChatGPT/Codex backend that is bound to the account
234
+ * (org) that produced it. After a manual switch from codex account A to B, the
235
+ * carried history items still hold A-minted blobs; replaying them into a turn
236
+ * running on B is rejected (400). The blob is purely a chain-of-thought
237
+ * continuity optimization — dropping it costs at most one turn of lost CoT
238
+ * continuity and never any message content.
239
+ *
240
+ * USED FOR `compaction` items only on the history-items read path: a foreign
241
+ * `compaction` summary carries account-bound `encrypted_content` but its summary
242
+ * is real conversation content that must be preserved, so we strip only the blob
243
+ * (we do NOT drop the whole item). Foreign `reasoning` items are instead dropped
244
+ * WHOLESALE by the caller (id + blob), because the Responses backend validates
245
+ * the foreign `rs_…` id and rejects a reasoning item that has a foreign id and no
246
+ * encrypted_content (so blanking the blob alone is not enough — see
247
+ * {@link applyCodexHistoryStrip}).
248
+ *
249
+ * The SDK's Responses converter reads the blob via `providerData.encryptedContent`
250
+ * (camel) or `providerData.encrypted_content` (snake); persisted rows use the
251
+ * snake form, but we delete both casings defensively. We also clear a top-level
252
+ * `encrypted_content` (the `compaction`-item shape) belt-and-braces — that blob
253
+ * is likewise source-bound. Only `reasoning` and `compaction` items are touched;
254
+ * messages, tool calls, and tool outputs pass through untouched by reference.
255
+ */
256
+ export function stripReasoningEncryptedContent<T extends HistoryItem>(item: T): T {
257
+ const type = itemType(item);
258
+ if (type !== "reasoning" && type !== "compaction") {
259
+ return item;
260
+ }
261
+ const record = item as Record<string, unknown>;
262
+ const providerData = record.providerData;
263
+ const providerHasBlob = !!providerData && typeof providerData === "object"
264
+ && ("encryptedContent" in (providerData as Record<string, unknown>)
265
+ || "encrypted_content" in (providerData as Record<string, unknown>));
266
+ const topLevelHasBlob = "encrypted_content" in record;
267
+ if (!providerHasBlob && !topLevelHasBlob) {
268
+ // Nothing encrypted to strip — return the same reference (byte-identical).
269
+ return item;
270
+ }
271
+ const clone: Record<string, unknown> = { ...record };
272
+ if (providerHasBlob) {
273
+ const providerClone = { ...(providerData as Record<string, unknown>) };
274
+ delete providerClone.encryptedContent;
275
+ delete providerClone.encrypted_content;
276
+ clone.providerData = providerClone;
277
+ }
278
+ if (topLevelHasBlob) {
279
+ delete clone.encrypted_content;
280
+ }
281
+ return clone as unknown as T;
282
+ }
283
+
284
+ /**
285
+ * Neutralize the account/org-bound identity of EVERY `reasoning` item embedded
286
+ * in a serialized RunState JSON string, returning the re-serialized string. Pure:
287
+ * a parse failure or a no-op returns the SAME string reference (so an unchanged
288
+ * or non-codex run-state replays byte-for-byte).
289
+ *
290
+ * WHY (HOLE C — the run-state REPLAY paths). The approval-decision resume and the
291
+ * items-mode run-state fallback replay the serialized RunState blob verbatim. That
292
+ * blob round-trips `reasoning.encrypted_content` minted by the ChatGPT/Codex
293
+ * backend (bound to the freezing account/org — a foreign account 400s it) AND the
294
+ * foreign `rs_…` reasoning ids the Responses backend validates (rejected once the
295
+ * blob is gone). Unlike `session_history_items`, the blob carries NO per-item
296
+ * producer tag, so foreign-ness cannot be decided per item; the worker instead
297
+ * records the FREEZING codex account on the run-state row and calls this only when
298
+ * the resuming turn's codex account DIFFERS from it. When the accounts differ we
299
+ * conservatively neutralize every reasoning item: delete its provider id and its
300
+ * `encrypted_content` (both casings, in `providerData`). The visible reasoning
301
+ * `content`/`summary` and every message / tool-call / tool-output item are left
302
+ * intact (message and tool content are never account-bound).
303
+ *
304
+ * A reasoning item with no id and no encrypted_content is exactly the shape the
305
+ * production Azure path already sends (see `stripProviderItemIdsFilter`), so it
306
+ * deserializes and replays cleanly. Reasoning items live in several places in the
307
+ * blob — `originalInput` (when an array), each `modelResponses[].output`,
308
+ * `lastModelResponse.output`, and the `generatedItems` wrappers (`reasoning_item`
309
+ * → `rawItem`) — and we scrub all of them. `compaction` items are deliberately
310
+ * left untouched: their `encrypted_content` is a protocol-REQUIRED field whose
311
+ * removal would fail the SDK's run-state schema validation on deserialize.
312
+ */
313
+ export function stripReasoningIdentityFromSerializedRunState(serialized: string): string {
314
+ let parsed: unknown;
315
+ try {
316
+ parsed = JSON.parse(serialized);
317
+ } catch {
318
+ // Not JSON (e.g. a cleared-state sentinel handled elsewhere): forward as-is.
319
+ return serialized;
320
+ }
321
+ if (!parsed || typeof parsed !== "object") {
322
+ return serialized;
323
+ }
324
+ let changed = false;
325
+ const scrubReasoning = (candidate: unknown): void => {
326
+ if (!candidate || typeof candidate !== "object") {
327
+ return;
328
+ }
329
+ const record = candidate as Record<string, unknown>;
330
+ if (record.type !== "reasoning") {
331
+ return;
332
+ }
333
+ if ("id" in record) {
334
+ delete record.id;
335
+ changed = true;
336
+ }
337
+ const providerData = record.providerData;
338
+ if (providerData && typeof providerData === "object") {
339
+ const provider = providerData as Record<string, unknown>;
340
+ if ("encryptedContent" in provider) {
341
+ delete provider.encryptedContent;
342
+ changed = true;
343
+ }
344
+ if ("encrypted_content" in provider) {
345
+ delete provider.encrypted_content;
346
+ changed = true;
347
+ }
348
+ }
349
+ if ("encrypted_content" in record) {
350
+ delete record.encrypted_content;
351
+ changed = true;
352
+ }
353
+ };
354
+ const scrubItemArray = (arr: unknown): void => {
355
+ if (Array.isArray(arr)) {
356
+ for (const item of arr) {
357
+ scrubReasoning(item);
358
+ }
359
+ }
360
+ };
361
+ const root = parsed as Record<string, unknown>;
362
+ // 1. originalInput is either a string (no items) or an array of protocol items.
363
+ scrubItemArray(root.originalInput);
364
+ // 2. generatedItems are SDK run-item wrappers; a `reasoning_item` carries the
365
+ // protocol reasoning shape under `rawItem`.
366
+ if (Array.isArray(root.generatedItems)) {
367
+ for (const wrapper of root.generatedItems) {
368
+ if (wrapper && typeof wrapper === "object" && "rawItem" in (wrapper as Record<string, unknown>)) {
369
+ scrubReasoning((wrapper as Record<string, unknown>).rawItem);
370
+ }
371
+ }
372
+ }
373
+ // 3. modelResponses[].output and lastModelResponse.output hold protocol items.
374
+ const scrubResponseOutput = (response: unknown): void => {
375
+ if (response && typeof response === "object") {
376
+ scrubItemArray((response as Record<string, unknown>).output);
377
+ }
378
+ };
379
+ if (Array.isArray(root.modelResponses)) {
380
+ for (const response of root.modelResponses) {
381
+ scrubResponseOutput(response);
382
+ }
383
+ }
384
+ scrubResponseOutput(root.lastModelResponse);
385
+ if (!changed) {
386
+ return serialized;
387
+ }
388
+ return JSON.stringify(parsed);
389
+ }
390
+
391
+ /**
392
+ * Neutralize tool_search items IN PLACE in a serialized RunState blob for a
393
+ * cross-account codex resume — the run-state sibling of
394
+ * `applyCodexHistoryStrip`'s tool_search rule, but COUNT-PRESERVING (HOLE E: the
395
+ * blob path's reconcile watermark counts the blob's history length, so items
396
+ * must never be removed — only mutated, exactly like the reasoning
397
+ * neutralization above).
398
+ *
399
+ * The hazard: on deserialize, the SDK re-runs the registered CLIENT tool_search
400
+ * execute callback per frozen pair (`rehydrateToolSearchRuntimeTools`) and
401
+ * THROWS a UserError when the re-run's runtime-tool keys mismatch the serialized
402
+ * expectation — which is exactly what happens when the RESUMING account's
403
+ * connector pool differs from the FREEZING account's. The SDK skips that
404
+ * rehydration entirely for `execution === 'server'` calls, so flipping the
405
+ * frozen pairs' `execution` to `"server"` in place defuses the throw without
406
+ * touching counts, ids, pairing, or content. The flipped shape is wire-safe:
407
+ * LIVE-VERIFIED against /codex/responses — a replayed server-execution pair is
408
+ * accepted (200) and its disclosure still holds. The account-bound `tsc_…` id is
409
+ * separately stripped by the codex transport normalizer (all input item ids).
410
+ *
411
+ * Walks the same blob locations as {@link stripReasoningIdentityFromSerializedRunState}:
412
+ * `originalInput` (array form), `generatedItems` (SDK run-item wrappers — the
413
+ * raw shape under `rawItem`), every `modelResponses[].output`, and
414
+ * `lastModelResponse.output`. Returns the input string unchanged when nothing
415
+ * matched.
416
+ */
417
+ export function neutralizeToolSearchItemsInSerializedRunState(serialized: string): string {
418
+ let parsed: unknown;
419
+ try {
420
+ parsed = JSON.parse(serialized);
421
+ } catch {
422
+ return serialized;
423
+ }
424
+ if (!parsed || typeof parsed !== "object") {
425
+ return serialized;
426
+ }
427
+ let changed = false;
428
+ const neutralize = (candidate: unknown): void => {
429
+ if (!candidate || typeof candidate !== "object") {
430
+ return;
431
+ }
432
+ const record = candidate as Record<string, unknown>;
433
+ if (record.type !== "tool_search_call" && record.type !== "tool_search_output") {
434
+ return;
435
+ }
436
+ if (record.execution !== "server") {
437
+ record.execution = "server";
438
+ changed = true;
439
+ }
440
+ };
441
+ const neutralizeArray = (arr: unknown): void => {
442
+ if (Array.isArray(arr)) {
443
+ for (const item of arr) {
444
+ neutralize(item);
445
+ }
446
+ }
447
+ };
448
+ const root = parsed as Record<string, unknown>;
449
+ neutralizeArray(root.originalInput);
450
+ if (Array.isArray(root.generatedItems)) {
451
+ for (const wrapper of root.generatedItems) {
452
+ if (wrapper && typeof wrapper === "object" && "rawItem" in (wrapper as Record<string, unknown>)) {
453
+ neutralize((wrapper as Record<string, unknown>).rawItem);
454
+ }
455
+ }
456
+ }
457
+ const neutralizeResponseOutput = (response: unknown): void => {
458
+ if (response && typeof response === "object") {
459
+ neutralizeArray((response as Record<string, unknown>).output);
460
+ }
461
+ };
462
+ if (Array.isArray(root.modelResponses)) {
463
+ for (const response of root.modelResponses) {
464
+ neutralizeResponseOutput(response);
465
+ }
466
+ }
467
+ neutralizeResponseOutput(root.lastModelResponse);
468
+ if (!changed) {
469
+ return serialized;
470
+ }
471
+ return JSON.stringify(parsed);
472
+ }
473
+
474
+ /**
475
+ * Normalize `computer_call` items so each carries EXACTLY ONE of the two
476
+ * mutually-exclusive action fields the provider accepts.
477
+ *
478
+ * The OpenAI Agents SDK 0.11.6 `computer_call` schema (protocol.mjs) carries
479
+ * BOTH the legacy singular `action` and the GA batched `actions`, each
480
+ * `.optional()`, and only requires "at least one" (its superRefine errors only
481
+ * when both are absent). The Azure computer-use endpoint is stricter: it
482
+ * requires EXACTLY one and rejects the whole request with
483
+ *
484
+ * `400 Computer call input must include exactly one of `action` or `actions`.`
485
+ *
486
+ * when an emitted `computer_call` carries both (observed live: a screenshot
487
+ * call carrying `action:{type:"screenshot"}` AND `actions:[{type:"screenshot"}]`).
488
+ *
489
+ * Which singular do we keep? LIVE-PROVEN against the deployed Azure deployment
490
+ * (gpt-5.5-2026-04-24): for gpt-5.5 the SDK serializes the GA computer tool as
491
+ * `{type:"computer"}` (not the legacy `computer_use_preview`), and that GA tool
492
+ * accepts ONLY the batched plural `actions`. Probing all three shapes:
493
+ * - `action`-only -> 400 "exactly one of action or actions" (STILL rejected)
494
+ * - `actions`-only -> passes the action/actions structural validation
495
+ * - both -> 400 "exactly one …"
496
+ * The "exactly one" wording is misleading: only the `actions`-only form is
497
+ * accepted by the GA tool. So when both are present we KEEP `actions` (the GA
498
+ * batched plural) and DROP `action`. Calls that already carry exactly one field
499
+ * — or the legacy `action`-only form — pass through untouched (this transform's
500
+ * sole job is to resolve the both-present conflict, not to rewrite singulars).
501
+ *
502
+ * Pure and non-mutating: only the conflicting item(s) are cloned; every other
503
+ * item passes through by reference (byte-identical). Unlike
504
+ * {@link sanitizeHistoryItemsForModel} (which only *filters* items), this is a
505
+ * read-path *transform* of a single item's shape.
506
+ */
507
+ export function normalizeComputerCallActions<T extends HistoryItem>(items: readonly T[]): T[] {
508
+ let changed = false;
509
+ const out = items.map((item) => {
510
+ if (itemType(item) !== "computer_call") {
511
+ return item;
512
+ }
513
+ const record = item as Record<string, unknown>;
514
+ const hasAction = record.action !== undefined && record.action !== null;
515
+ const hasActions = Array.isArray(record.actions) && (record.actions as unknown[]).length > 0;
516
+ if (hasAction && hasActions) {
517
+ changed = true;
518
+ const { action: _droppedAction, ...rest } = record;
519
+ return rest as unknown as T;
520
+ }
521
+ return item;
522
+ });
523
+ return changed ? out : items.slice();
524
+ }
525
+
526
+ /**
527
+ * Rewrite EVERY `computer_call` item in a serialized Responses request body to
528
+ * the ACTIONS-ONLY shape the GA Azure computer tool accepts, mutating the parsed
529
+ * JSON object in place and returning whether anything changed.
530
+ *
531
+ * WHY THIS LIVES AT THE WIRE LEVEL (not the input-item filter). The input-item
532
+ * normalizer above ({@link normalizeComputerCallActions}, wired as a
533
+ * callModelInputFilter) runs BEFORE the SDK's responses converter
534
+ * (`convertAgentItemToResponsesInput`). That converter then re-derives the wire
535
+ * payload from the item: when `actions` is present it emits BOTH
536
+ * `{action: ..., actions: [...]}`, and when only `action` is present it emits
537
+ * `action`-only. It can NEVER emit actions-only. Probed live against the
538
+ * deployed Azure gpt-5.5-2026-04-24 GA computer tool (`{type:"computer"}`):
539
+ * - `action`-only -> 400 "Computer call input must include exactly one of
540
+ * `action` or `actions`." (rejected)
541
+ * - both -> 400 same message (rejected)
542
+ * - `actions`-only -> passes the action/actions structural validation
543
+ * So neither the input-filter nor the converter can produce an accepted body.
544
+ * The ONLY seam that sees — and can rewrite — the final serialized JSON is a
545
+ * custom `fetch` on the OpenAI client (it runs after the converter and after
546
+ * `responses.create` serialization). This function is that rewriter's core.
547
+ *
548
+ * It collapses each computer_call to actions-only: it prefers an existing
549
+ * non-empty `actions` array, else wraps the singular `action` into
550
+ * `actions:[action]`, then deletes `action`. A computer_call with neither field
551
+ * is left untouched (nothing to derive; let the provider report it).
552
+ *
553
+ * Mutates `body` in place (the caller has already JSON.parsed a private copy of
554
+ * the request body). Returns `true` iff at least one computer_call was changed.
555
+ */
556
+ export function rewriteComputerCallsToActionsOnly(body: unknown): boolean {
557
+ if (!body || typeof body !== "object") {
558
+ return false;
559
+ }
560
+ const input = (body as Record<string, unknown>).input;
561
+ if (!Array.isArray(input)) {
562
+ return false;
563
+ }
564
+ let changed = false;
565
+ for (const item of input) {
566
+ if (!item || typeof item !== "object") {
567
+ continue;
568
+ }
569
+ const record = item as Record<string, unknown>;
570
+ if (record.type !== "computer_call") {
571
+ continue;
572
+ }
573
+ const existingActions = Array.isArray(record.actions) && (record.actions as unknown[]).length > 0
574
+ ? (record.actions as unknown[])
575
+ : undefined;
576
+ const actions = existingActions ?? (
577
+ record.action !== undefined && record.action !== null ? [record.action] : undefined
578
+ );
579
+ if (actions === undefined) {
580
+ // Neither action nor actions present: nothing to normalize.
581
+ continue;
582
+ }
583
+ const hadAction = "action" in record;
584
+ const actionsAlreadyExact = existingActions !== undefined && !hadAction;
585
+ if (actionsAlreadyExact) {
586
+ // Already actions-only with a non-empty array — leave byte-identical.
587
+ continue;
588
+ }
589
+ delete record.action;
590
+ record.actions = actions;
591
+ changed = true;
592
+ }
593
+ return changed;
594
+ }
595
+
596
+ /**
597
+ * The 1×1 transparent PNG placeholder used by the SDK for tool-approval-rejection
598
+ * screenshots (`TOOL_APPROVAL_REJECTION_SCREENSHOT_DATA_URL` in agents-core
599
+ * `toolExecution.mjs`). We reuse the exact same constant as a backstop for the
600
+ * action-timeout 400: when an action times out the SDK's catch sets output='' and
601
+ * builds `{type:"computer_call_output",output:{type:"computer_screenshot",image_url:""}}`.
602
+ * Azure rejects `image_url:""` with "400 Invalid input[N].output.image_url". This
603
+ * placeholder is a valid data URI the provider accepts, so the turn continues and
604
+ * the model receives the next real screenshot on its following step.
605
+ */
606
+ const EMPTY_IMAGE_URL_PLACEHOLDER =
607
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
608
+
609
+ /**
610
+ * Backstop for the action-timeout 400: walk the `input` array of a serialized
611
+ * Responses request body and replace any `computer_call_output` item whose
612
+ * `output.image_url` is an empty string, null, undefined, or otherwise not a
613
+ * non-empty string with the 1×1 transparent PNG placeholder data URI.
614
+ *
615
+ * WHY THIS IS NEEDED. When a computer ACTION (click/type/scroll/drag) times out
616
+ * at the 15-second yield window `SandboxComputer.x()` throws `ComputerActionError`.
617
+ * The agents-core SDK `toolExecution.mjs` catch block sets `output = ''` and then
618
+ * builds the wire item:
619
+ *
620
+ * `{type:"computer_call_output", output:{type:"computer_screenshot", image_url:""}}`
621
+ *
622
+ * Azure rejects the whole request with:
623
+ *
624
+ * `400 Invalid 'input[N].output.image_url'. Expected a valid URL, but got a
625
+ * value with an invalid format.`
626
+ *
627
+ * Our screenshot() fail-loud guard (which throws on empty frames) only runs when
628
+ * the SDK calls screenshot() on a SUCCESS path — not on this action-error catch
629
+ * path that sets output='' directly. This wire-level rewrite is the only seam that
630
+ * catches both paths regardless of how the empty image_url was produced. It runs
631
+ * in the same `computerCallNormalizingFetch` wrapper, so a single parse/rewrite
632
+ * pass covers both the action/actions-only rewrite and this placeholder injection.
633
+ *
634
+ * Mutates `body` in place (the caller has already JSON.parsed a private copy).
635
+ * Returns `true` iff at least one image_url was replaced.
636
+ */
637
+ export function rewriteEmptyComputerCallOutputImageUrls(body: unknown): boolean {
638
+ if (!body || typeof body !== "object") {
639
+ return false;
640
+ }
641
+ const input = (body as Record<string, unknown>).input;
642
+ if (!Array.isArray(input)) {
643
+ return false;
644
+ }
645
+ let changed = false;
646
+ for (const item of input) {
647
+ if (!item || typeof item !== "object") {
648
+ continue;
649
+ }
650
+ const record = item as Record<string, unknown>;
651
+ if (record.type !== "computer_call_output") {
652
+ continue;
653
+ }
654
+ const output = record.output;
655
+ if (!output || typeof output !== "object") {
656
+ continue;
657
+ }
658
+ const out = output as Record<string, unknown>;
659
+ const imageUrl = out.image_url;
660
+ // Replace the image_url when it is not a non-empty string (covers: "", null, undefined, missing).
661
+ if (typeof imageUrl !== "string" || imageUrl.length === 0) {
662
+ out.image_url = EMPTY_IMAGE_URL_PLACEHOLDER;
663
+ changed = true;
664
+ }
665
+ }
666
+ return changed;
667
+ }
668
+
669
+ /**
670
+ * Wrap a `fetch` so every outbound OpenAI Responses request body that contains a
671
+ * `computer_call` is rewritten to the ACTIONS-ONLY shape (see
672
+ * {@link rewriteComputerCallsToActionsOnly}) before it reaches the network, AND
673
+ * any `computer_call_output` item with an empty/missing `output.image_url` is
674
+ * patched with the 1×1 transparent PNG placeholder (see
675
+ * {@link rewriteEmptyComputerCallOutputImageUrls}).
676
+ *
677
+ * Installed as the `fetch:` option on the Azure OpenAI client, this is the
678
+ * lowest reachable seam — below the agents-core input filter and below the SDK's
679
+ * responses converter — so it neutralizes the converter's both-fields synthesis
680
+ * regardless of what the input item carried, and backstops the action-timeout
681
+ * empty image_url regardless of how it was produced.
682
+ *
683
+ * Surgical and cheap: it only parses the body when it is a string that contains
684
+ * the prefix `"computer_call` (matching both `"computer_call"` action items and
685
+ * `"computer_call_output"` result items). Every other request — non-computer-use
686
+ * turns, streaming SSE responses, non-string bodies — forwards untouched, the
687
+ * SAME `init` reference, so streaming and other providers are unaffected. A JSON
688
+ * parse failure or a no-op rewrite also forwards the original `init` unchanged.
689
+ *
690
+ * Typed structurally (the `(input, init) => Promise<Response>` call signature)
691
+ * rather than as the DOM `typeof fetch` so it omits the `preconnect` static the
692
+ * global type carries; this matches the OpenAI SDK's `Fetch` option, which only
693
+ * needs the call signature. The wiring site passes it as the client `fetch:`.
694
+ */
695
+ type FetchLike = (
696
+ input: Parameters<typeof fetch>[0],
697
+ init?: Parameters<typeof fetch>[1],
698
+ ) => Promise<Response>;
699
+
700
+ export function computerCallNormalizingFetch(base: FetchLike): FetchLike {
701
+ return (input, init) => {
702
+ // Match any request that mentions either `computer_call` (the action call) or
703
+ // `computer_call_output` (the output/result item). Both strings begin with
704
+ // `"computer_call` so a single prefix-substring check covers both.
705
+ if (init && typeof init.body === "string" && init.body.includes("\"computer_call")) {
706
+ try {
707
+ const parsed = JSON.parse(init.body) as unknown;
708
+ const changed1 = rewriteComputerCallsToActionsOnly(parsed);
709
+ const changed2 = rewriteEmptyComputerCallOutputImageUrls(parsed);
710
+ if (changed1 || changed2) {
711
+ return base(input, { ...init, body: JSON.stringify(parsed) });
712
+ }
713
+ } catch {
714
+ // Non-JSON or parse failure: forward the request unchanged.
715
+ }
716
+ }
717
+ return base(input, init);
718
+ };
719
+ }