@cuylabs/physical-capx-agent-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ import {
2
+ createCapxPhysicalTools
3
+ } from "./chunk-57TF3E2Q.js";
4
+
5
+ // src/session.ts
6
+ import { CapxSession, createCapxSession } from "@cuylabs/physical-capx";
7
+ export {
8
+ CapxSession,
9
+ createCapxPhysicalTools,
10
+ createCapxSession
11
+ };
12
+ //# sourceMappingURL=session.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/session.ts"],"sourcesContent":["export { CapxSession, createCapxSession } from \"@cuylabs/physical-capx\";\nexport type { CapxSessionOptions } from \"@cuylabs/physical-capx\";\nexport { createCapxPhysicalTools } from \"./tools.js\";\n"],"mappings":";;;;;AAAA,SAAS,aAAa,yBAAyB;","names":[]}
@@ -0,0 +1,8 @@
1
+ import { CapxSession } from '@cuylabs/physical-capx';
2
+ import { Tool } from '@cuylabs/agent-core/tool';
3
+ import { PhysicalToolOptions } from '@cuylabs/physical-agent-core';
4
+ import { PhysicalSession } from '@cuylabs/physical-core';
5
+
6
+ declare function createCapxPhysicalTools(session: PhysicalSession | CapxSession, options?: PhysicalToolOptions): Tool.AnyInfo[];
7
+
8
+ export { createCapxPhysicalTools };
package/dist/tools.js ADDED
@@ -0,0 +1,7 @@
1
+ import {
2
+ createCapxPhysicalTools
3
+ } from "./chunk-57TF3E2Q.js";
4
+ export {
5
+ createCapxPhysicalTools
6
+ };
7
+ //# sourceMappingURL=tools.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
package/docs/README.md ADDED
@@ -0,0 +1,23 @@
1
+ # @cuylabs/physical-capx-agent-core Docs
2
+
3
+ `@cuylabs/physical-capx-agent-core` binds CaP-X physical sessions to
4
+ `@cuylabs/agent-core`.
5
+
6
+ Use it for CaP-X tool creation, packaged CaP-X model guidance, and
7
+ `createCapxAgent(...)`.
8
+
9
+ Start here:
10
+
11
+ - [Agent-Core Integration](./agent-core-integration.md) explains how
12
+ `agent-core` supervises a CaP-X session and owns the live observe/execute
13
+ loop.
14
+ - [`../../physical-capx/docs/how-it-works.md`](../../physical-capx/docs/how-it-works.md)
15
+ explains the runtime/session adapter.
16
+ - [`../../physical-capx/docs/limitations.md`](../../physical-capx/docs/limitations.md)
17
+ documents runtime-service and hardware-safety boundaries.
18
+ - [../examples/README.md](../examples/README.md) is the operational runbook for
19
+ running the single-turn and autosolve examples.
20
+
21
+ This adapter exists to integrate CaP-X, a Python robotics Code-as-Policies
22
+ framework, with `@cuylabs/agent-core`. It is not a fork or TypeScript port of
23
+ CaP-X.
@@ -0,0 +1,76 @@
1
+ # Agent-Core Integration
2
+
3
+ `@cuylabs/physical-capx-agent-core` uses `@cuylabs/agent-core` as the
4
+ application-level harness for a running `capx-agent-runtime` service.
5
+
6
+ ## Ownership
7
+
8
+ `agent-core` owns conversation turns, tool choice, approvals, tracing, policy
9
+ code authoring, and the live observe/execute/reset loop.
10
+
11
+ `capx-agent-runtime` owns the CaP-X checkout, YAML config, simulator/API server
12
+ lifecycle, Python execution namespace, rewards, task completion, and artifacts.
13
+
14
+ The TypeScript adapter does not launch CaP-X and does not pass `repoPath` or
15
+ `configPath`. It also omits `outputDir` and `skillLibraryPath` by default.
16
+ Those path choices are runtime-service startup concerns unless a trusted server
17
+ operator explicitly enables client path overrides.
18
+
19
+ ## Recommended Wiring
20
+
21
+ ```typescript
22
+ import { createCapxAgent } from "@cuylabs/physical-capx-agent-core";
23
+
24
+ const { agent, session } = await createCapxAgent({
25
+ model,
26
+ startSession: true,
27
+ toolExecutionMode: "plan",
28
+ sessionOptions: {
29
+ mode: "runtime",
30
+ runtimeServerUrl: "http://127.0.0.1:8210",
31
+ physicalMode: "simulation",
32
+ enablePolicyCodeExecution: true,
33
+ },
34
+ });
35
+ ```
36
+
37
+ The helper defaults to `toolExecutionMode: "plan"` so `agent-core` executes
38
+ tool batches through its capability-aware runner. This is not a prompt-level
39
+ plan or a plan-only agent. The model still reasons and chooses tools, but the
40
+ tool call is deferred to `agent-core`, which then applies approval policy,
41
+ capability metadata, and safe dispatch before committing the tool result back
42
+ into the conversation.
43
+
44
+ `toolExecutionMode: "auto"` is the simpler AI SDK path: executable tools are
45
+ given directly to the SDK and execute inline while the model stream is running.
46
+ That is fine for many text tools. For physical sessions, `plan` is the safer
47
+ default because tools like `capx_run_policy_code` and `capx_stop` are
48
+ side-effecting and should stay under the host harness' approval and scheduling
49
+ policy.
50
+
51
+ The helper also installs a default approval policy with `defaultAction: "deny"`.
52
+ Read-only tools can pass through host policy; mutating tools such as policy
53
+ execution and stop should remain approval-gated. Runtime skill-library mutation
54
+ is available through programmatic APIs for deliberate workflows, but it is not
55
+ part of the default agent tool surface.
56
+
57
+ ## Runtime Tools
58
+
59
+ `capx_run_policy_code` sends Python code to the active runtime session and
60
+ returns stdout, stderr, reward, task-completion metadata, diagnostics, and
61
+ artifacts.
62
+
63
+ `capx_observe` returns prompt, observation, and policy-code context. That
64
+ context can include reusable Python helper summaries and typed affordances from
65
+ the runtime skill library.
66
+
67
+ The active runtime session is durable across tool calls. The agent should use
68
+ `capx_observe`, then submit one selected Python policy step with
69
+ `capx_run_policy_code`, then observe again in the same session. Starting,
70
+ resetting, and replacing sessions are host/runtime orchestration decisions, not
71
+ default model-facing actions.
72
+
73
+ That gives an external agent the core Code-as-Policies boundary: prompt/API
74
+ context comes from CaP-X, generated code runs in CaP-X's Python namespace, and
75
+ reusable helper functions stay inside that runtime instead of being copied into
76
+ TypeScript.
@@ -0,0 +1,37 @@
1
+ # Required agent model config. `_setup.ts` loads this file automatically when
2
+ # either example runs.
3
+ OPENAI_API_KEY=
4
+ OPENAI_MODEL=gpt-4o-mini
5
+ # Optional: set only for OpenAI-compatible providers that are not the default
6
+ # OpenAI endpoint.
7
+ OPENAI_BASE_URL=
8
+
9
+ # Required runtime service URL. Start capx-agent-runtime first, then point this
10
+ # example at the service or SSH tunnel URL. The TypeScript examples do not
11
+ # start CaP-X or select a CaP-X config.
12
+ CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210
13
+
14
+ # Safety switches for the example host. Set CAPX_ALLOW_DESTRUCTIVE=1 to allow
15
+ # capx_run_policy_code; otherwise the examples can observe and propose code but
16
+ # execution is denied.
17
+ CAPX_PHYSICAL_MODE=simulation
18
+ CAPX_ALLOW_DESTRUCTIVE=0
19
+ CAPX_ALLOW_HARDWARE_POLICY_EXECUTION=0
20
+
21
+ # Optional runtime/client tuning.
22
+ CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS=120000
23
+ CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS=1000000
24
+ CAPX_POLICY_EXECUTION_TIMEOUT_MS=1000000
25
+ CAPX_POLICY_EXECUTION_TRIAL=
26
+ # Leave blank to use the CaP-X YAML record_video setting. Set 1 or 0 only to
27
+ # override the server/YAML value for this example run.
28
+ CAPX_POLICY_EXECUTION_RECORD_VIDEO=
29
+ CAPX_STOP_ON_EXIT=0
30
+ CAPX_MAX_SOLVER_TURNS=6
31
+
32
+ # Privileged runtime session path overrides. Leave blank for normal use.
33
+ # capx-agent-runtime rejects these unless the server was started with
34
+ # --allow-client-path-overrides and matching allowed roots.
35
+ CAPX_SESSION_OUTPUT_DIR=
36
+ CAPX_SESSION_SKILL_LIBRARY_PATH=
37
+ CAPX_AGENT_PROMPT=
@@ -0,0 +1,165 @@
1
+ /**
2
+ * 01 - CaP-X runtime solver
3
+ *
4
+ * This is the default bring-your-own-agent flow:
5
+ * 1. Start capx-agent-runtime on the GPU workstation.
6
+ * 2. Point CAPX_RUNTIME_SERVER_URL at that service or SSH tunnel.
7
+ * 3. Run this TypeScript agent locally.
8
+ *
9
+ * The runtime service owns the CaP-X checkout, YAML config, simulator, and API
10
+ * servers. This agent-core example owns the reasoning loop: observe, inspect
11
+ * task context, write policy code, execute one step, observe again, and report.
12
+ *
13
+ * Run:
14
+ * npx tsx examples/01-capx-runtime-solver.ts
15
+ */
16
+
17
+ import { createCapxAgent } from "@cuylabs/physical-capx-agent-core";
18
+ import { createEventPrinter } from "@cuylabs/agent-core";
19
+ import { exampleOpenAIModel } from "./_setup.js";
20
+
21
+ function optionalString(value: string | undefined): string | undefined {
22
+ const trimmed = value?.trim();
23
+ return trimmed ? trimmed : undefined;
24
+ }
25
+
26
+ function optionalBoolean(value: string | undefined): boolean | undefined {
27
+ if (value === undefined || value.trim() === "") {
28
+ return undefined;
29
+ }
30
+ return value === "1" || value.toLowerCase() === "true";
31
+ }
32
+
33
+ function optionalNumber(value: string | undefined): number | undefined {
34
+ if (!value) {
35
+ return undefined;
36
+ }
37
+ const parsed = Number(value);
38
+ return Number.isFinite(parsed) ? parsed : undefined;
39
+ }
40
+
41
+ const runtimeServerUrl = optionalString(process.env.CAPX_RUNTIME_SERVER_URL);
42
+
43
+ if (!runtimeServerUrl) {
44
+ throw new Error(
45
+ [
46
+ "CAPX_RUNTIME_SERVER_URL is required.",
47
+ "Start capx-agent-runtime on the CaP-X workstation, tunnel the port if needed,",
48
+ "then set CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210.",
49
+ ].join(" "),
50
+ );
51
+ }
52
+
53
+ const allowDestructive = process.env.CAPX_ALLOW_DESTRUCTIVE === "1";
54
+ const toolExecutionMode = "plan" as const;
55
+ const runId = Date.now();
56
+ const sessionId =
57
+ optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
58
+ `capx-runtime-solver-${runId}`;
59
+ const sessionOutputDir = optionalString(process.env.CAPX_SESSION_OUTPUT_DIR);
60
+ const sessionSkillLibraryPath = optionalString(
61
+ process.env.CAPX_SESSION_SKILL_LIBRARY_PATH,
62
+ );
63
+ const recordVideo =
64
+ optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
65
+ "runtime-default";
66
+
67
+ function approveExampleTool(tool: string): "allow" | "deny" {
68
+ if (tool === "skill") {
69
+ return "allow";
70
+ }
71
+ return allowDestructive ? "allow" : "deny";
72
+ }
73
+
74
+ const userPrompt =
75
+ optionalString(process.env.CAPX_AGENT_PROMPT) ??
76
+ [
77
+ "You are the external agent solving one CaP-X runtime simulation.",
78
+ "Check capx_status first, then call capx_observe with includeImages=true.",
79
+ "Use the CaP-X task prompt, full prompt, observations, API context, skill library, and turn history as the source of truth.",
80
+ "Propose one concise Python Code-as-Policy action toward the task.",
81
+ "If capx_run_policy_code is available and approval allows it, execute the action, observe again, inspect reward/stdout/stderr/task completion, and summarize the result.",
82
+ "If skill extraction or injection tools are available, only use them after useful successful code and with approval.",
83
+ "If execution is denied, explain the exact policy code you would run and why.",
84
+ ].join(" ");
85
+
86
+ console.log(
87
+ [
88
+ "CaP-X agent mode=runtime",
89
+ "startSession=true",
90
+ "policyExecution=live-runtime",
91
+ `toolDispatch=${toolExecutionMode}`,
92
+ `approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
93
+ `recordVideo=${recordVideo}`,
94
+ `agentSessionId=${sessionId}`,
95
+ `sessionOutputDir=${sessionOutputDir ?? "server-owned"}`,
96
+ `sessionSkillLibraryPath=${sessionSkillLibraryPath ?? "server-owned"}`,
97
+ `pathOverrides=${sessionOutputDir || sessionSkillLibraryPath ? "requested" : "none"}`,
98
+ `runtimeServerUrl=${runtimeServerUrl}`,
99
+ ].join(" "),
100
+ );
101
+
102
+ const { agent, session } = await createCapxAgent({
103
+ model: exampleOpenAIModel(),
104
+ startSession: true,
105
+ toolExecutionMode,
106
+ sessionOptions: {
107
+ mode: "runtime",
108
+ physicalMode:
109
+ process.env.CAPX_PHYSICAL_MODE === "hardware" ? "hardware" : "simulation",
110
+
111
+ // Normal service-first path: connect to an already-running runtime server
112
+ // and let that server's --config-path/--repo-path defaults define the
113
+ // simulation.
114
+ runtimeServerUrl,
115
+ runtimeServerStartupTimeoutMs: optionalNumber(
116
+ process.env.CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS,
117
+ ),
118
+ runtimeServerRequestTimeoutMs: optionalNumber(
119
+ process.env.CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS,
120
+ ),
121
+
122
+ enablePolicyCodeExecution: true,
123
+ policyExecutionMode: "live-runtime",
124
+ allowHardwarePolicyExecution:
125
+ process.env.CAPX_ALLOW_HARDWARE_POLICY_EXECUTION === "1",
126
+ policyExecutionTimeoutMs: optionalNumber(
127
+ process.env.CAPX_POLICY_EXECUTION_TIMEOUT_MS,
128
+ ),
129
+ policyExecutionTrial: optionalNumber(
130
+ process.env.CAPX_POLICY_EXECUTION_TRIAL,
131
+ ),
132
+ policyExecutionRecordVideo: optionalBoolean(
133
+ process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
134
+ ),
135
+
136
+ outputDir: sessionOutputDir,
137
+ skillLibraryPath: sessionSkillLibraryPath,
138
+ },
139
+ approval: {
140
+ defaultAction: "ask",
141
+ onRequest: async (request) => {
142
+ console.log(
143
+ `approval requested: ${request.tool} risk=${request.risk} description=${request.description}`,
144
+ );
145
+ return approveExampleTool(request.tool);
146
+ },
147
+ },
148
+ });
149
+
150
+ try {
151
+ const printEvent = createEventPrinter({
152
+ steps: true,
153
+ completion: true,
154
+ toolResultMaxChars:
155
+ optionalNumber(process.env.CAPX_TOOL_RESULT_MAX_CHARS) ?? 2_000,
156
+ });
157
+ for await (const event of agent.chat(sessionId, userPrompt)) {
158
+ printEvent(event);
159
+ }
160
+ } finally {
161
+ await agent.close();
162
+ if (process.env.CAPX_STOP_ON_EXIT === "1") {
163
+ await session.stop("example exit");
164
+ }
165
+ }
@@ -0,0 +1,314 @@
1
+ /**
2
+ * 02 - CaP-X runtime autosolve loop
3
+ *
4
+ * This example keeps the same agent-core session open across multiple user
5
+ * turns. Each turn lets the agent observe, write policy code, execute it when
6
+ * approval allows, and inspect the new result. The outer loop stops when CaP-X
7
+ * reports task completion or when CAPX_MAX_SOLVER_TURNS is reached.
8
+ *
9
+ * Run:
10
+ * npx tsx examples/02-capx-runtime-autosolve.ts
11
+ */
12
+
13
+ import { createEventPrinter } from "@cuylabs/agent-core";
14
+ import { createCapxAgent } from "@cuylabs/physical-capx-agent-core";
15
+ import type { PhysicalObservation } from "@cuylabs/physical-core";
16
+ import { exampleOpenAIModel } from "./_setup.js";
17
+
18
+ function optionalString(value: string | undefined): string | undefined {
19
+ const trimmed = value?.trim();
20
+ return trimmed ? trimmed : undefined;
21
+ }
22
+
23
+ function optionalBoolean(value: string | undefined): boolean | undefined {
24
+ if (value === undefined || value.trim() === "") {
25
+ return undefined;
26
+ }
27
+ return value === "1" || value.toLowerCase() === "true";
28
+ }
29
+
30
+ function optionalNumber(value: string | undefined): number | undefined {
31
+ if (!value) {
32
+ return undefined;
33
+ }
34
+ const parsed = Number(value);
35
+ return Number.isFinite(parsed) ? parsed : undefined;
36
+ }
37
+
38
+ interface LastRuntimeStep {
39
+ success?: boolean;
40
+ taskCompleted?: boolean | null;
41
+ task_completed?: boolean | null;
42
+ terminated?: boolean;
43
+ truncated?: boolean;
44
+ reward?: number | null;
45
+ sandboxRc?: number;
46
+ sandbox_rc?: number;
47
+ stderr?: string;
48
+ error?: string | null;
49
+ diagnostics?: {
50
+ failurePhase?: string;
51
+ observationPipeline?: boolean;
52
+ depthAssertion?: boolean;
53
+ } | null;
54
+ }
55
+
56
+ function normalizeStep(step: LastRuntimeStep): LastRuntimeStep {
57
+ return {
58
+ ...step,
59
+ taskCompleted: step.taskCompleted ?? step.task_completed,
60
+ sandboxRc: step.sandboxRc ?? step.sandbox_rc,
61
+ };
62
+ }
63
+
64
+ function lastStep(observation: PhysicalObservation): LastRuntimeStep | null {
65
+ const item = [...observation.items]
66
+ .reverse()
67
+ .find((entry) => entry.source === "capx:runtime:last-step");
68
+ if (!item || item.kind !== "text") {
69
+ return null;
70
+ }
71
+ try {
72
+ return normalizeStep(JSON.parse(item.text) as LastRuntimeStep);
73
+ } catch {
74
+ return null;
75
+ }
76
+ }
77
+
78
+ function isUnrecoverableObservationFailure(
79
+ step: LastRuntimeStep | null,
80
+ ): boolean {
81
+ if (!step) {
82
+ return false;
83
+ }
84
+ const stderr = step.stderr ?? "";
85
+ const error = step.error ?? "";
86
+ const sandboxFailed = step.sandboxRc !== undefined && step.sandboxRc !== 0;
87
+ const diagnostics = step.diagnostics ?? null;
88
+ if (diagnostics?.failurePhase === "policy_execution") {
89
+ return false;
90
+ }
91
+ const runtimeRaised = Boolean(error) || Boolean(diagnostics);
92
+ if (!sandboxFailed || !runtimeRaised) {
93
+ return false;
94
+ }
95
+ if (diagnostics) {
96
+ return Boolean(
97
+ step.truncated &&
98
+ (diagnostics.observationPipeline ||
99
+ diagnostics.depthAssertion ||
100
+ error.includes("AssertionError")),
101
+ );
102
+ }
103
+ const mentionsObservationPath =
104
+ stderr.includes("_get_observation") ||
105
+ stderr.includes("get_observation") ||
106
+ stderr.includes("get_real_depth_map");
107
+ const mentionsDepthAssertion =
108
+ stderr.includes("get_real_depth_map") ||
109
+ (stderr.includes("AssertionError") && stderr.includes("depth"));
110
+ return Boolean(
111
+ sandboxFailed &&
112
+ step.truncated &&
113
+ (mentionsObservationPath ||
114
+ mentionsDepthAssertion ||
115
+ error.includes("AssertionError")),
116
+ );
117
+ }
118
+
119
+ function completionSummary(step: LastRuntimeStep): string {
120
+ return [
121
+ `taskCompleted=${step.taskCompleted ?? "n/a"}`,
122
+ `terminated=${step.terminated ?? "n/a"}`,
123
+ `truncated=${step.truncated ?? "n/a"}`,
124
+ `sandboxRc=${step.sandboxRc ?? "n/a"}`,
125
+ `reward=${step.reward ?? "n/a"}`,
126
+ ].join(" ");
127
+ }
128
+
129
+ const runtimeServerUrl = optionalString(process.env.CAPX_RUNTIME_SERVER_URL);
130
+
131
+ if (!runtimeServerUrl) {
132
+ throw new Error(
133
+ [
134
+ "CAPX_RUNTIME_SERVER_URL is required.",
135
+ "Start capx-agent-runtime on the CaP-X workstation, tunnel the port if needed,",
136
+ "then set CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210.",
137
+ ].join(" "),
138
+ );
139
+ }
140
+
141
+ const allowDestructive = process.env.CAPX_ALLOW_DESTRUCTIVE === "1";
142
+ const maxTurns = optionalNumber(process.env.CAPX_MAX_SOLVER_TURNS) ?? 6;
143
+ const recoverOnRuntimeError =
144
+ optionalString(process.env.CAPX_RECOVER_ON_RUNTIME_ERROR) === "reset";
145
+ const maxRuntimeResets =
146
+ optionalNumber(process.env.CAPX_MAX_RUNTIME_RESETS) ??
147
+ (recoverOnRuntimeError ? 1 : 0);
148
+ const initialPolicyExecutionTrial =
149
+ optionalNumber(process.env.CAPX_POLICY_EXECUTION_TRIAL) ?? 1;
150
+ const runId = Date.now();
151
+ const sessionId =
152
+ optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
153
+ `capx-runtime-autosolve-${runId}`;
154
+ const sessionOutputDir = optionalString(process.env.CAPX_SESSION_OUTPUT_DIR);
155
+ const sessionSkillLibraryPath = optionalString(
156
+ process.env.CAPX_SESSION_SKILL_LIBRARY_PATH,
157
+ );
158
+ const toolExecutionMode = "plan" as const;
159
+ const recordVideo =
160
+ optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
161
+ "runtime-default";
162
+ const printEvent = createEventPrinter({
163
+ steps: true,
164
+ completion: true,
165
+ toolResultMaxChars:
166
+ optionalNumber(process.env.CAPX_TOOL_RESULT_MAX_CHARS) ?? 2_000,
167
+ });
168
+
169
+ function approveExampleTool(tool: string): "allow" | "deny" {
170
+ if (tool === "skill") {
171
+ return "allow";
172
+ }
173
+ return allowDestructive ? "allow" : "deny";
174
+ }
175
+
176
+ console.error(
177
+ [
178
+ "CaP-X agent mode=runtime",
179
+ `maxTurns=${maxTurns}`,
180
+ "policyExecution=live-runtime",
181
+ `toolDispatch=${toolExecutionMode}`,
182
+ `approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
183
+ `recordVideo=${recordVideo}`,
184
+ `agentSessionId=${sessionId}`,
185
+ `sessionOutputDir=${sessionOutputDir ?? "server-owned"}`,
186
+ `sessionSkillLibraryPath=${sessionSkillLibraryPath ?? "server-owned"}`,
187
+ `pathOverrides=${sessionOutputDir || sessionSkillLibraryPath ? "requested" : "none"}`,
188
+ `trial=${initialPolicyExecutionTrial}`,
189
+ `recoverOnRuntimeError=${recoverOnRuntimeError ? "reset" : "off"}`,
190
+ `maxRuntimeResets=${maxRuntimeResets}`,
191
+ `runtimeServerUrl=${runtimeServerUrl}`,
192
+ ].join(" "),
193
+ );
194
+
195
+ const { agent, session } = await createCapxAgent({
196
+ model: exampleOpenAIModel(),
197
+ startSession: true,
198
+ toolExecutionMode,
199
+ sessionOptions: {
200
+ mode: "runtime",
201
+ runtimeServerUrl,
202
+ physicalMode:
203
+ process.env.CAPX_PHYSICAL_MODE === "hardware" ? "hardware" : "simulation",
204
+ runtimeServerStartupTimeoutMs: optionalNumber(
205
+ process.env.CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS,
206
+ ),
207
+ runtimeServerRequestTimeoutMs: optionalNumber(
208
+ process.env.CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS,
209
+ ),
210
+ enablePolicyCodeExecution: true,
211
+ policyExecutionMode: "live-runtime",
212
+ allowHardwarePolicyExecution:
213
+ process.env.CAPX_ALLOW_HARDWARE_POLICY_EXECUTION === "1",
214
+ policyExecutionTimeoutMs: optionalNumber(
215
+ process.env.CAPX_POLICY_EXECUTION_TIMEOUT_MS,
216
+ ),
217
+ policyExecutionTrial: initialPolicyExecutionTrial,
218
+ policyExecutionRecordVideo: optionalBoolean(
219
+ process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
220
+ ),
221
+ outputDir: sessionOutputDir,
222
+ skillLibraryPath: sessionSkillLibraryPath,
223
+ },
224
+ approval: {
225
+ defaultAction: "ask",
226
+ onRequest: async (request) => {
227
+ console.error(
228
+ `approval requested: ${request.tool} risk=${request.risk} description=${request.description}`,
229
+ );
230
+ return approveExampleTool(request.tool);
231
+ },
232
+ },
233
+ });
234
+
235
+ try {
236
+ let runtimeResetCount = 0;
237
+ let resetBeforeTurn = false;
238
+
239
+ for (let turn = 1; turn <= maxTurns; turn += 1) {
240
+ console.error(`\n--- solver turn ${turn}/${maxTurns} ---`);
241
+ const prompt = resetBeforeTurn
242
+ ? [
243
+ "The previous CaP-X runtime session hit an observation/depth failure, so the example reset the runtime before this turn.",
244
+ "Treat the current runtime state as fresh. Ignore any broken previous physical state.",
245
+ "Call capx_status and capx_observe with includeImages=true, then solve from the current task prompt and observations.",
246
+ "Execute one useful Python Code-as-Policy step if approval allows it, then observe again.",
247
+ ].join(" ")
248
+ : turn === 1
249
+ ? [
250
+ "Solve the active CaP-X task.",
251
+ "First call capx_status, then capx_observe with includeImages=true.",
252
+ "Use CaP-X's task prompt, API context, observations, skill library, and turn history as source of truth.",
253
+ "Execute one useful Python Code-as-Policy step if approval allows it.",
254
+ "After execution, observe again and report whether the task appears complete.",
255
+ ].join(" ")
256
+ : [
257
+ "Continue solving the same CaP-X task from the current runtime state.",
258
+ "Inspect capx_turn_history and capx_observe before choosing the next action.",
259
+ "If the last step completed the task, say TASK_COMPLETE and do not execute more code.",
260
+ "Otherwise execute one more useful Python Code-as-Policy step if approval allows it, then observe again.",
261
+ ].join(" ");
262
+
263
+ for await (const event of agent.chat(sessionId, prompt)) {
264
+ printEvent(event);
265
+ }
266
+
267
+ const observation = await session.observe({ includeArtifacts: true });
268
+ const step = lastStep(observation);
269
+ if (isUnrecoverableObservationFailure(step)) {
270
+ if (recoverOnRuntimeError && runtimeResetCount < maxRuntimeResets) {
271
+ runtimeResetCount += 1;
272
+ const nextTrial = initialPolicyExecutionTrial + runtimeResetCount;
273
+ session.options.policyExecutionTrial = nextTrial;
274
+ console.error(
275
+ [
276
+ "CaP-X reported an observation/depth failure.",
277
+ `Resetting runtime session to trial ${nextTrial} before the next solver turn (${runtimeResetCount}/${maxRuntimeResets}).`,
278
+ step ? `Last step: ${completionSummary(step)}` : "",
279
+ ]
280
+ .filter(Boolean)
281
+ .join(" "),
282
+ );
283
+ await session.reset?.();
284
+ resetBeforeTurn = true;
285
+ continue;
286
+ }
287
+ console.error(
288
+ [
289
+ "Stopping autosolve: CaP-X reported an unrecoverable observation/depth failure.",
290
+ "The current runtime session cannot execute further policy code because env.step() fails while collecting observations.",
291
+ recoverOnRuntimeError
292
+ ? "Runtime reset budget is exhausted. Restart capx-agent-runtime serve or retry with a fresh session."
293
+ : "Set CAPX_RECOVER_ON_RUNTIME_ERROR=reset to let this example reset once and continue.",
294
+ step ? `Last step: ${completionSummary(step)}` : "",
295
+ ]
296
+ .filter(Boolean)
297
+ .join(" "),
298
+ );
299
+ break;
300
+ }
301
+ if (step?.taskCompleted || step?.terminated) {
302
+ console.error(
303
+ `CaP-X reported completion state: ${completionSummary(step)}`,
304
+ );
305
+ break;
306
+ }
307
+ resetBeforeTurn = false;
308
+ }
309
+ } finally {
310
+ await agent.close();
311
+ if (process.env.CAPX_STOP_ON_EXIT === "1") {
312
+ await session.stop("example exit");
313
+ }
314
+ }