@opengeni/runtime 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opengeni/runtime",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.js",
@@ -28,14 +28,14 @@
28
28
  "typecheck": "tsc --noEmit"
29
29
  },
30
30
  "dependencies": {
31
- "@opengeni/agent-proto": "^0.2.0",
32
- "@opengeni/config": "^0.2.1",
31
+ "@opengeni/agent-proto": "^0.2.1",
32
+ "@opengeni/config": "^0.2.2",
33
33
  "@opengeni/contracts": "^0.4.0",
34
34
  "@openai/agents": "^0.11.6",
35
35
  "@openai/agents-extensions": "^0.11.6",
36
36
  "modal": "^0.7.4",
37
37
  "openai": "6.36.0",
38
- "@opengeni/codex": "^0.2.0"
38
+ "@opengeni/codex": "^0.2.1"
39
39
  },
40
40
  "devDependencies": {
41
41
  "tsup": "^8.5.0",
package/src/index.ts CHANGED
@@ -91,7 +91,7 @@ import {
91
91
  restoredSandboxSessionStateFromEntry,
92
92
  setSelfhostedApplyDiff,
93
93
  } from "./sandbox";
94
- import { computerUse } from "./sandbox-computer";
94
+ import { computerUse, type ComputerToolMode } from "./sandbox-computer";
95
95
 
96
96
  // P4.3 computer-use surface (the agent's :0 driver). Re-exported from the barrel
97
97
  // so callers (the worker, live proofs) reach SandboxComputer/ComputerUseCapability
@@ -106,6 +106,7 @@ export {
106
106
  ComputerActionError,
107
107
  type SandboxComputerOptions,
108
108
  type ComputerUseArgs,
109
+ type ComputerToolMode,
109
110
  } from "./sandbox-computer";
110
111
 
111
112
  // The agent-loop-free sandbox leaf (createSandboxClient + resume/recovery
@@ -643,6 +644,12 @@ export type BuildAgentOptions = {
643
644
  encryptedReasoning?: boolean;
644
645
  contextWindowTokens?: number;
645
646
  structuredToolTransport?: boolean;
647
+ // EXPLICIT computer-use tool transport, decided where provider identity is
648
+ // authoritative (the worker's model resolution — agent-turn.ts). Threaded into
649
+ // buildAgentCapabilities → computerUse({toolMode}) so tool selection never rests
650
+ // on the SDK's constructor-name sniff. When omitted, the legacy sniff +
651
+ // `structuredToolTransport` neutralize path is preserved byte-for-byte.
652
+ computerToolMode?: ComputerToolMode;
646
653
  // The LIVE, by-reference connector-namespace Set from prepareAgentTools
647
654
  // (codexConnectorNamespaces): fills during each turn's codex_apps tools/list,
648
655
  // read per model call by the codex tool_search description so the model sees
@@ -864,6 +871,7 @@ export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[],
864
871
  compactionMode,
865
872
  contextWindowTokens,
866
873
  ...(options.structuredToolTransport !== undefined ? { structuredToolTransport: options.structuredToolTransport } : {}),
874
+ ...(options.computerToolMode !== undefined ? { computerToolMode: options.computerToolMode } : {}),
867
875
  }),
868
876
  });
869
877
  agentFileDownloads.set(agent, normalizeSandboxFileDownloads(options.fileResourceDownloads ?? []).filter((download) => !download.content));
@@ -961,7 +969,16 @@ function neutralizeStructuredToolTransport(capability: ReturnType<typeof filesys
961
969
  export function buildAgentCapabilities(
962
970
  settings: Settings,
963
971
  packSkills: PackSkill[],
964
- options: { compactionMode?: ContextCompactionMode; contextWindowTokens?: number; structuredToolTransport?: boolean } = {},
972
+ options: {
973
+ compactionMode?: ContextCompactionMode;
974
+ contextWindowTokens?: number;
975
+ structuredToolTransport?: boolean;
976
+ // EXPLICIT computer-use transport (see BuildAgentOptions.computerToolMode). When
977
+ // present, computerUse() is handed the mode directly and its tools() obeys it
978
+ // without the constructor-name sniff. When absent, the legacy neutralize +
979
+ // imageFunctionResults path (driven by structuredToolTransport) is unchanged.
980
+ computerToolMode?: ComputerToolMode;
981
+ } = {},
965
982
  ): ReturnType<typeof Capabilities.default> {
966
983
  const mode = options.compactionMode ?? resolveContextCompactionMode(settings);
967
984
  const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
@@ -996,25 +1013,37 @@ export function buildAgentCapabilities(
996
1013
  && settings.sandboxDesktopEnabled
997
1014
  && desktopCapableBackend(settings.sandboxBackend)
998
1015
  ) {
999
- // computer-use is now transport-aware, exactly like filesystem: its `tools()`
1000
- // emits the HOSTED `computer_use_preview` tool on the structured transport and a
1001
- // set of FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex
1002
- // backend rejects hosted tool types (only function/custom/web_search accepted),
1003
- // so on the codex path (structuredToolTransport === false) we neutralize the
1004
- // capability's model binding — the SAME trick used for filesystem above — so
1005
- // `tools()` sees no model instance and emits the function tools the backend can
1006
- // call, instead of suppressing the desktop tier entirely.
1016
+ // computer-use is transport-aware, exactly like filesystem: `tools()` emits the
1017
+ // HOSTED `computer_use_preview` tool on the structured transport and a set of
1018
+ // FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex backend
1019
+ // rejects hosted tool types (only function/custom/web_search accepted).
1020
+ //
1021
+ // HARDENING: when the caller declares an EXPLICIT `computerToolMode` (the worker
1022
+ // does, from its authoritative model resolution), thread it straight through
1023
+ // tool selection then never depends on the SDK's model-instance constructor-name
1024
+ // sniff (which a wrapped/proxied model would defeat, silently 400ing a
1025
+ // chat-completions provider handed the hosted tool). When ABSENT, the legacy path
1026
+ // is preserved byte-for-byte: on the codex path (structuredToolTransport === false)
1027
+ // we set imageFunctionResults and neutralize the capability's model binding — the
1028
+ // SAME trick used for filesystem above — so `tools()` sees no model instance and
1029
+ // emits the function tools the backend can call, instead of suppressing the tier.
1030
+ const explicitMode = options.computerToolMode;
1007
1031
  const computerCapability = computerUse({
1008
1032
  dimensions: [settings.streamResolutionWidth, settings.streamResolutionHeight],
1009
1033
  readOnly: settings.computerUseReadOnly,
1010
- // On the codex path the function tools deliver screenshots as a real image the
1011
- // model can see. The ChatGPT/Codex backend rejects HOSTED tool types but DOES
1012
- // accept `input_image` content items inside a `function_call_output` (proven by
1013
- // openai/codex codex-rs, whose view_image tool ships exactly that shape) so a
1014
- // structured image tool result is seen, where a text data-URL would be unreadable.
1015
- ...(options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
1034
+ ...(explicitMode
1035
+ ? { toolMode: explicitMode }
1036
+ // Legacy (no explicit mode): on the codex path the function tools deliver
1037
+ // screenshots as a real image the model can see. The ChatGPT/Codex backend
1038
+ // rejects HOSTED tool types but DOES accept `input_image` content items inside a
1039
+ // `function_call_output` (proven by openai/codex codex-rs, whose view_image tool
1040
+ // ships exactly that shape) — so a structured image tool result is seen, where a
1041
+ // text data-URL would be unreadable.
1042
+ : options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
1016
1043
  });
1017
- if (options.structuredToolTransport === false) {
1044
+ // Neutralize ONLY on the legacy sniff path. With an explicit toolMode the mode
1045
+ // already forces the function tools, so the constructor-name override is moot.
1046
+ if (!explicitMode && options.structuredToolTransport === false) {
1018
1047
  neutralizeStructuredToolTransport(computerCapability);
1019
1048
  }
1020
1049
  caps.push(computerCapability as unknown as ReturnType<typeof Capabilities.default>[number]);
@@ -1088,19 +1117,19 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
1088
1117
  // device-code login may lack the connector scopes, and the backend can
1089
1118
  // reject the bearer at the initialize/tools-list handshake, so a 401/403
1090
1119
  // (or a missing/failed token) drops the server.
1091
- // - an AUTO-ATTACHED workspace-default capability MCP (ToolRef.optional):
1092
- // the caller never explicitly requested it, so a broken/expired
1093
- // capability credential must SKIP the server with a warning, never kill
1094
- // the turn before the model runs. An EXPLICITLY-requested tool omits
1095
- // `optional` and stays strict (below), preserving the fail-loud contract.
1120
+ // - an optional ToolRef: either an auto-attached workspace-default
1121
+ // capability MCP or a client/pack-selected portable ref. A
1122
+ // broken/expired credential or unavailable endpoint skips the server
1123
+ // with a warning, never killing the turn before the model runs. Bare
1124
+ // refs stay strict (below), preserving the fail-loud default.
1096
1125
  const optional = tool.optional === true;
1097
1126
  return { server, bestEffort: isCodexAppsMcpServer(config) || optional, optional };
1098
1127
  }));
1099
1128
  const requiredServers = servers.filter((entry) => !entry.bestEffort).map((entry) => entry.server);
1100
1129
  const bestEffortServers = servers.filter((entry) => entry.bestEffort).map((entry) => entry.server);
1101
- // Names of the OPTIONAL capability servers (not codex_apps) so a drop is
1102
- // surfaced as a warning; codex_apps keeps its historically-quiet drop (a
1103
- // not-logged-in ChatGPT plan is a normal, non-noteworthy state).
1130
+ // Names of the OPTIONAL servers (not codex_apps) so a drop is surfaced as a
1131
+ // warning; codex_apps keeps its historically-quiet drop (a not-logged-in
1132
+ // ChatGPT plan is a normal, non-noteworthy state).
1104
1133
  const optionalServerNames = new Set(
1105
1134
  servers.filter((entry) => entry.optional).map((entry) => entry.server.name),
1106
1135
  );
@@ -1121,7 +1150,7 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
1121
1150
  }
1122
1151
  const error = connectedBestEffort.errors.get(failed);
1123
1152
  console.warn(
1124
- `[mcp] optional capability server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
1153
+ `[mcp] optional server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
1125
1154
  error instanceof Error ? error.message : error,
1126
1155
  );
1127
1156
  }
@@ -761,6 +761,25 @@ export function computerFunctionTools(
761
761
 
762
762
  // ── The capability (the SDK seam) ────────────────────────────────────────────
763
763
 
764
+ /**
765
+ * EXPLICIT tool-transport selection, decided by the caller that knows the
766
+ * provider's true wire identity (the worker's model resolution — see agent-turn.ts),
767
+ * NOT inferred from the bound model instance's constructor name. This is the
768
+ * HARDENING seam: `supportsStructuredToolOutputTransport` string-sniffs the
769
+ * constructor for "ChatCompletions", which a wrapped / proxied / minified model
770
+ * instance would defeat — silently handing a chat-completions provider the HOSTED
771
+ * `computer_use_preview` tool it 400s on every turn. When `toolMode` is set, tools()
772
+ * OBEYS it and never consults the sniff:
773
+ * • "hosted" → the single hosted `computer_use_preview` tool (Responses backends).
774
+ * • "function-image" → the FUNCTION `computer_*` tools with screenshots delivered as a
775
+ * structured `{type:'image'}` output (the codex/ChatGPT backend,
776
+ * which rejects hosted tool types but SEES structured image results).
777
+ * • "function-text" → the FUNCTION tools with screenshots rendered as a text
778
+ * `data:…;base64` URL (chat-completions providers, which can't read
779
+ * structured image tool results).
780
+ */
781
+ export type ComputerToolMode = "hosted" | "function-image" | "function-text";
782
+
764
783
  export type ComputerUseArgs = {
765
784
  dimensions?: [number, number];
766
785
  readOnly?: boolean;
@@ -771,8 +790,14 @@ export type ComputerUseArgs = {
771
790
  // `input_image` content item inside the function_call_output) instead of the text
772
791
  // data-URL string. Only the codex/ChatGPT backend can read structured image tool
773
792
  // results; chat-completions providers cannot, so this stays OFF (text rendering)
774
- // by default and is turned on only on the codex path (see index.ts).
793
+ // by default and is turned on only on the codex path (see index.ts). Ignored when
794
+ // `toolMode` is set (the mode carries its own image-delivery choice).
775
795
  imageFunctionResults?: boolean;
796
+ // EXPLICIT transport selection (see {@link ComputerToolMode}). When present, tools()
797
+ // obeys it directly — the constructor-name sniff is NOT consulted. When ABSENT, the
798
+ // legacy sniff behaviour is preserved byte-for-byte (back-compat for any embedder
799
+ // that constructs the capability without threading a mode).
800
+ toolMode?: ComputerToolMode;
776
801
  };
777
802
 
778
803
  export function computerUse(args: ComputerUseArgs = {}): ComputerUseCapability {
@@ -820,16 +845,36 @@ export class ComputerUseCapability extends Capability {
820
845
  // The SDK base exposes the bound runAs as a protected field.
821
846
  ...(typeof this._runAs === "string" ? { runAs: this._runAs } : {}),
822
847
  });
823
- // Structured transport keeps the HOSTED computer tool (unchanged); the codex /
824
- // text backend gets the FUNCTION tools it can actually call.
848
+ // HARDENING: when the caller declares an EXPLICIT toolMode, obey it and NEVER
849
+ // consult `supportsStructuredToolOutputTransport` tool selection must not
850
+ // depend on the model instance's constructor name (a wrapped/proxied/minified
851
+ // instance would defeat the "ChatCompletions" string-sniff and silently hand a
852
+ // chat-completions provider the hosted tool it 400s on). The mode is decided by
853
+ // the worker, where provider identity is authoritative (see agent-turn.ts).
854
+ switch (this.args.toolMode) {
855
+ case "hosted":
856
+ return [this.hostedComputerTool(computer)];
857
+ case "function-image":
858
+ return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, true);
859
+ case "function-text":
860
+ return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, false);
861
+ case undefined:
862
+ break; // fall through to the legacy sniff (back-compat), preserved byte-for-byte
863
+ }
864
+ // Legacy (no toolMode): structured transport keeps the HOSTED computer tool
865
+ // (unchanged); the codex / text backend gets the FUNCTION tools it can call.
825
866
  if (supportsStructuredToolOutputTransport(this._modelInstance)) {
826
- return [
827
- computerTool({
828
- computer,
829
- ...(this.args.needsApproval !== undefined ? { needsApproval: this.args.needsApproval as never } : {}),
830
- }) as unknown as Tool<unknown>,
831
- ];
867
+ return [this.hostedComputerTool(computer)];
832
868
  }
833
869
  return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, this.args.imageFunctionResults ?? false);
834
870
  }
871
+
872
+ /** The single HOSTED `computer_use_preview` tool bound to `computer` — identical
873
+ * construction for the explicit "hosted" mode and the legacy structured-sniff path. */
874
+ private hostedComputerTool(computer: Computer): Tool<unknown> {
875
+ return computerTool({
876
+ computer,
877
+ ...(this.args.needsApproval !== undefined ? { needsApproval: this.args.needsApproval as never } : {}),
878
+ }) as unknown as Tool<unknown>;
879
+ }
835
880
  }