@opengeni/runtime 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +25 -1
- package/dist/index.js +25 -15
- package/dist/index.js.map +1 -1
- package/package.json +4 -4
- package/src/index.ts +55 -26
- package/src/sandbox-computer.ts +54 -9
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opengeni/runtime",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.js",
|
|
@@ -28,14 +28,14 @@
|
|
|
28
28
|
"typecheck": "tsc --noEmit"
|
|
29
29
|
},
|
|
30
30
|
"dependencies": {
|
|
31
|
-
"@opengeni/agent-proto": "^0.2.
|
|
32
|
-
"@opengeni/config": "^0.2.
|
|
31
|
+
"@opengeni/agent-proto": "^0.2.1",
|
|
32
|
+
"@opengeni/config": "^0.2.2",
|
|
33
33
|
"@opengeni/contracts": "^0.4.0",
|
|
34
34
|
"@openai/agents": "^0.11.6",
|
|
35
35
|
"@openai/agents-extensions": "^0.11.6",
|
|
36
36
|
"modal": "^0.7.4",
|
|
37
37
|
"openai": "6.36.0",
|
|
38
|
-
"@opengeni/codex": "^0.2.
|
|
38
|
+
"@opengeni/codex": "^0.2.1"
|
|
39
39
|
},
|
|
40
40
|
"devDependencies": {
|
|
41
41
|
"tsup": "^8.5.0",
|
package/src/index.ts
CHANGED
|
@@ -91,7 +91,7 @@ import {
|
|
|
91
91
|
restoredSandboxSessionStateFromEntry,
|
|
92
92
|
setSelfhostedApplyDiff,
|
|
93
93
|
} from "./sandbox";
|
|
94
|
-
import { computerUse } from "./sandbox-computer";
|
|
94
|
+
import { computerUse, type ComputerToolMode } from "./sandbox-computer";
|
|
95
95
|
|
|
96
96
|
// P4.3 computer-use surface (the agent's :0 driver). Re-exported from the barrel
|
|
97
97
|
// so callers (the worker, live proofs) reach SandboxComputer/ComputerUseCapability
|
|
@@ -106,6 +106,7 @@ export {
|
|
|
106
106
|
ComputerActionError,
|
|
107
107
|
type SandboxComputerOptions,
|
|
108
108
|
type ComputerUseArgs,
|
|
109
|
+
type ComputerToolMode,
|
|
109
110
|
} from "./sandbox-computer";
|
|
110
111
|
|
|
111
112
|
// The agent-loop-free sandbox leaf (createSandboxClient + resume/recovery
|
|
@@ -643,6 +644,12 @@ export type BuildAgentOptions = {
|
|
|
643
644
|
encryptedReasoning?: boolean;
|
|
644
645
|
contextWindowTokens?: number;
|
|
645
646
|
structuredToolTransport?: boolean;
|
|
647
|
+
// EXPLICIT computer-use tool transport, decided where provider identity is
|
|
648
|
+
// authoritative (the worker's model resolution — agent-turn.ts). Threaded into
|
|
649
|
+
// buildAgentCapabilities → computerUse({toolMode}) so tool selection never rests
|
|
650
|
+
// on the SDK's constructor-name sniff. When omitted, the legacy sniff +
|
|
651
|
+
// `structuredToolTransport` neutralize path is preserved byte-for-byte.
|
|
652
|
+
computerToolMode?: ComputerToolMode;
|
|
646
653
|
// The LIVE, by-reference connector-namespace Set from prepareAgentTools
|
|
647
654
|
// (codexConnectorNamespaces): fills during each turn's codex_apps tools/list,
|
|
648
655
|
// read per model call by the codex tool_search description so the model sees
|
|
@@ -864,6 +871,7 @@ export function buildOpenGeniAgent(settings: Settings, resources: ResourceRef[],
|
|
|
864
871
|
compactionMode,
|
|
865
872
|
contextWindowTokens,
|
|
866
873
|
...(options.structuredToolTransport !== undefined ? { structuredToolTransport: options.structuredToolTransport } : {}),
|
|
874
|
+
...(options.computerToolMode !== undefined ? { computerToolMode: options.computerToolMode } : {}),
|
|
867
875
|
}),
|
|
868
876
|
});
|
|
869
877
|
agentFileDownloads.set(agent, normalizeSandboxFileDownloads(options.fileResourceDownloads ?? []).filter((download) => !download.content));
|
|
@@ -961,7 +969,16 @@ function neutralizeStructuredToolTransport(capability: ReturnType<typeof filesys
|
|
|
961
969
|
export function buildAgentCapabilities(
|
|
962
970
|
settings: Settings,
|
|
963
971
|
packSkills: PackSkill[],
|
|
964
|
-
options: {
|
|
972
|
+
options: {
|
|
973
|
+
compactionMode?: ContextCompactionMode;
|
|
974
|
+
contextWindowTokens?: number;
|
|
975
|
+
structuredToolTransport?: boolean;
|
|
976
|
+
// EXPLICIT computer-use transport (see BuildAgentOptions.computerToolMode). When
|
|
977
|
+
// present, computerUse() is handed the mode directly and its tools() obeys it
|
|
978
|
+
// without the constructor-name sniff. When absent, the legacy neutralize +
|
|
979
|
+
// imageFunctionResults path (driven by structuredToolTransport) is unchanged.
|
|
980
|
+
computerToolMode?: ComputerToolMode;
|
|
981
|
+
} = {},
|
|
965
982
|
): ReturnType<typeof Capabilities.default> {
|
|
966
983
|
const mode = options.compactionMode ?? resolveContextCompactionMode(settings);
|
|
967
984
|
const contextWindowTokens = options.contextWindowTokens ?? settings.contextWindowTokens;
|
|
@@ -996,25 +1013,37 @@ export function buildAgentCapabilities(
|
|
|
996
1013
|
&& settings.sandboxDesktopEnabled
|
|
997
1014
|
&& desktopCapableBackend(settings.sandboxBackend)
|
|
998
1015
|
) {
|
|
999
|
-
// computer-use is
|
|
1000
|
-
//
|
|
1001
|
-
//
|
|
1002
|
-
//
|
|
1003
|
-
//
|
|
1004
|
-
//
|
|
1005
|
-
//
|
|
1006
|
-
//
|
|
1016
|
+
// computer-use is transport-aware, exactly like filesystem: `tools()` emits the
|
|
1017
|
+
// HOSTED `computer_use_preview` tool on the structured transport and a set of
|
|
1018
|
+
// FUNCTION `computer_*` tools on the text transport. The ChatGPT/Codex backend
|
|
1019
|
+
// rejects hosted tool types (only function/custom/web_search accepted).
|
|
1020
|
+
//
|
|
1021
|
+
// HARDENING: when the caller declares an EXPLICIT `computerToolMode` (the worker
|
|
1022
|
+
// does, from its authoritative model resolution), thread it straight through —
|
|
1023
|
+
// tool selection then never depends on the SDK's model-instance constructor-name
|
|
1024
|
+
// sniff (which a wrapped/proxied model would defeat, silently 400ing a
|
|
1025
|
+
// chat-completions provider handed the hosted tool). When ABSENT, the legacy path
|
|
1026
|
+
// is preserved byte-for-byte: on the codex path (structuredToolTransport === false)
|
|
1027
|
+
// we set imageFunctionResults and neutralize the capability's model binding — the
|
|
1028
|
+
// SAME trick used for filesystem above — so `tools()` sees no model instance and
|
|
1029
|
+
// emits the function tools the backend can call, instead of suppressing the tier.
|
|
1030
|
+
const explicitMode = options.computerToolMode;
|
|
1007
1031
|
const computerCapability = computerUse({
|
|
1008
1032
|
dimensions: [settings.streamResolutionWidth, settings.streamResolutionHeight],
|
|
1009
1033
|
readOnly: settings.computerUseReadOnly,
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1034
|
+
...(explicitMode
|
|
1035
|
+
? { toolMode: explicitMode }
|
|
1036
|
+
// Legacy (no explicit mode): on the codex path the function tools deliver
|
|
1037
|
+
// screenshots as a real image the model can see. The ChatGPT/Codex backend
|
|
1038
|
+
// rejects HOSTED tool types but DOES accept `input_image` content items inside a
|
|
1039
|
+
// `function_call_output` (proven by openai/codex codex-rs, whose view_image tool
|
|
1040
|
+
// ships exactly that shape) — so a structured image tool result is seen, where a
|
|
1041
|
+
// text data-URL would be unreadable.
|
|
1042
|
+
: options.structuredToolTransport === false ? { imageFunctionResults: true } : {}),
|
|
1016
1043
|
});
|
|
1017
|
-
|
|
1044
|
+
// Neutralize ONLY on the legacy sniff path. With an explicit toolMode the mode
|
|
1045
|
+
// already forces the function tools, so the constructor-name override is moot.
|
|
1046
|
+
if (!explicitMode && options.structuredToolTransport === false) {
|
|
1018
1047
|
neutralizeStructuredToolTransport(computerCapability);
|
|
1019
1048
|
}
|
|
1020
1049
|
caps.push(computerCapability as unknown as ReturnType<typeof Capabilities.default>[number]);
|
|
@@ -1088,19 +1117,19 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
|
|
|
1088
1117
|
// device-code login may lack the connector scopes, and the backend can
|
|
1089
1118
|
// reject the bearer at the initialize/tools-list handshake, so a 401/403
|
|
1090
1119
|
// (or a missing/failed token) drops the server.
|
|
1091
|
-
// - an
|
|
1092
|
-
//
|
|
1093
|
-
//
|
|
1094
|
-
// the turn before the model runs.
|
|
1095
|
-
//
|
|
1120
|
+
// - an optional ToolRef: either an auto-attached workspace-default
|
|
1121
|
+
// capability MCP or a client/pack-selected portable ref. A
|
|
1122
|
+
// broken/expired credential or unavailable endpoint skips the server
|
|
1123
|
+
// with a warning, never killing the turn before the model runs. Bare
|
|
1124
|
+
// refs stay strict (below), preserving the fail-loud default.
|
|
1096
1125
|
const optional = tool.optional === true;
|
|
1097
1126
|
return { server, bestEffort: isCodexAppsMcpServer(config) || optional, optional };
|
|
1098
1127
|
}));
|
|
1099
1128
|
const requiredServers = servers.filter((entry) => !entry.bestEffort).map((entry) => entry.server);
|
|
1100
1129
|
const bestEffortServers = servers.filter((entry) => entry.bestEffort).map((entry) => entry.server);
|
|
1101
|
-
// Names of the OPTIONAL
|
|
1102
|
-
//
|
|
1103
|
-
//
|
|
1130
|
+
// Names of the OPTIONAL servers (not codex_apps) so a drop is surfaced as a
|
|
1131
|
+
// warning; codex_apps keeps its historically-quiet drop (a not-logged-in
|
|
1132
|
+
// ChatGPT plan is a normal, non-noteworthy state).
|
|
1104
1133
|
const optionalServerNames = new Set(
|
|
1105
1134
|
servers.filter((entry) => entry.optional).map((entry) => entry.server.name),
|
|
1106
1135
|
);
|
|
@@ -1121,7 +1150,7 @@ export async function prepareAgentTools(settings: Settings, tools: ToolRef[], op
|
|
|
1121
1150
|
}
|
|
1122
1151
|
const error = connectedBestEffort.errors.get(failed);
|
|
1123
1152
|
console.warn(
|
|
1124
|
-
`[mcp] optional
|
|
1153
|
+
`[mcp] optional server "${failed.name}" failed to connect/list tools; skipping it for this turn`,
|
|
1125
1154
|
error instanceof Error ? error.message : error,
|
|
1126
1155
|
);
|
|
1127
1156
|
}
|
package/src/sandbox-computer.ts
CHANGED
|
@@ -761,6 +761,25 @@ export function computerFunctionTools(
|
|
|
761
761
|
|
|
762
762
|
// ── The capability (the SDK seam) ────────────────────────────────────────────
|
|
763
763
|
|
|
764
|
+
/**
|
|
765
|
+
* EXPLICIT tool-transport selection, decided by the caller that knows the
|
|
766
|
+
* provider's true wire identity (the worker's model resolution — see agent-turn.ts),
|
|
767
|
+
* NOT inferred from the bound model instance's constructor name. This is the
|
|
768
|
+
* HARDENING seam: `supportsStructuredToolOutputTransport` string-sniffs the
|
|
769
|
+
* constructor for "ChatCompletions", which a wrapped / proxied / minified model
|
|
770
|
+
* instance would defeat — silently handing a chat-completions provider the HOSTED
|
|
771
|
+
* `computer_use_preview` tool it 400s on every turn. When `toolMode` is set, tools()
|
|
772
|
+
* OBEYS it and never consults the sniff:
|
|
773
|
+
* • "hosted" → the single hosted `computer_use_preview` tool (Responses backends).
|
|
774
|
+
* • "function-image" → the FUNCTION `computer_*` tools with screenshots delivered as a
|
|
775
|
+
* structured `{type:'image'}` output (the codex/ChatGPT backend,
|
|
776
|
+
* which rejects hosted tool types but SEES structured image results).
|
|
777
|
+
* • "function-text" → the FUNCTION tools with screenshots rendered as a text
|
|
778
|
+
* `data:…;base64` URL (chat-completions providers, which can't read
|
|
779
|
+
* structured image tool results).
|
|
780
|
+
*/
|
|
781
|
+
export type ComputerToolMode = "hosted" | "function-image" | "function-text";
|
|
782
|
+
|
|
764
783
|
export type ComputerUseArgs = {
|
|
765
784
|
dimensions?: [number, number];
|
|
766
785
|
readOnly?: boolean;
|
|
@@ -771,8 +790,14 @@ export type ComputerUseArgs = {
|
|
|
771
790
|
// `input_image` content item inside the function_call_output) instead of the text
|
|
772
791
|
// data-URL string. Only the codex/ChatGPT backend can read structured image tool
|
|
773
792
|
// results; chat-completions providers cannot, so this stays OFF (text rendering)
|
|
774
|
-
// by default and is turned on only on the codex path (see index.ts).
|
|
793
|
+
// by default and is turned on only on the codex path (see index.ts). Ignored when
|
|
794
|
+
// `toolMode` is set (the mode carries its own image-delivery choice).
|
|
775
795
|
imageFunctionResults?: boolean;
|
|
796
|
+
// EXPLICIT transport selection (see {@link ComputerToolMode}). When present, tools()
|
|
797
|
+
// obeys it directly — the constructor-name sniff is NOT consulted. When ABSENT, the
|
|
798
|
+
// legacy sniff behaviour is preserved byte-for-byte (back-compat for any embedder
|
|
799
|
+
// that constructs the capability without threading a mode).
|
|
800
|
+
toolMode?: ComputerToolMode;
|
|
776
801
|
};
|
|
777
802
|
|
|
778
803
|
export function computerUse(args: ComputerUseArgs = {}): ComputerUseCapability {
|
|
@@ -820,16 +845,36 @@ export class ComputerUseCapability extends Capability {
|
|
|
820
845
|
// The SDK base exposes the bound runAs as a protected field.
|
|
821
846
|
...(typeof this._runAs === "string" ? { runAs: this._runAs } : {}),
|
|
822
847
|
});
|
|
823
|
-
//
|
|
824
|
-
//
|
|
848
|
+
// HARDENING: when the caller declares an EXPLICIT toolMode, obey it and NEVER
|
|
849
|
+
// consult `supportsStructuredToolOutputTransport` — tool selection must not
|
|
850
|
+
// depend on the model instance's constructor name (a wrapped/proxied/minified
|
|
851
|
+
// instance would defeat the "ChatCompletions" string-sniff and silently hand a
|
|
852
|
+
// chat-completions provider the hosted tool it 400s on). The mode is decided by
|
|
853
|
+
// the worker, where provider identity is authoritative (see agent-turn.ts).
|
|
854
|
+
switch (this.args.toolMode) {
|
|
855
|
+
case "hosted":
|
|
856
|
+
return [this.hostedComputerTool(computer)];
|
|
857
|
+
case "function-image":
|
|
858
|
+
return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, true);
|
|
859
|
+
case "function-text":
|
|
860
|
+
return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, false);
|
|
861
|
+
case undefined:
|
|
862
|
+
break; // fall through to the legacy sniff (back-compat), preserved byte-for-byte
|
|
863
|
+
}
|
|
864
|
+
// Legacy (no toolMode): structured transport keeps the HOSTED computer tool
|
|
865
|
+
// (unchanged); the codex / text backend gets the FUNCTION tools it can call.
|
|
825
866
|
if (supportsStructuredToolOutputTransport(this._modelInstance)) {
|
|
826
|
-
return [
|
|
827
|
-
computerTool({
|
|
828
|
-
computer,
|
|
829
|
-
...(this.args.needsApproval !== undefined ? { needsApproval: this.args.needsApproval as never } : {}),
|
|
830
|
-
}) as unknown as Tool<unknown>,
|
|
831
|
-
];
|
|
867
|
+
return [this.hostedComputerTool(computer)];
|
|
832
868
|
}
|
|
833
869
|
return computerFunctionTools(computer, this.args.readOnly ?? false, this.args.needsApproval, this.args.imageFunctionResults ?? false);
|
|
834
870
|
}
|
|
871
|
+
|
|
872
|
+
/** The single HOSTED `computer_use_preview` tool bound to `computer` — identical
|
|
873
|
+
* construction for the explicit "hosted" mode and the legacy structured-sniff path. */
|
|
874
|
+
private hostedComputerTool(computer: Computer): Tool<unknown> {
|
|
875
|
+
return computerTool({
|
|
876
|
+
computer,
|
|
877
|
+
...(this.args.needsApproval !== undefined ? { needsApproval: this.args.needsApproval as never } : {}),
|
|
878
|
+
}) as unknown as Tool<unknown>;
|
|
879
|
+
}
|
|
835
880
|
}
|