@cuylabs/agent-physical-capx 5.0.2 → 5.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,6 @@
11
11
  * task context, write policy code, execute one step, observe again, and report.
12
12
  *
13
13
  * Run:
14
- * pnpm --filter @cuylabs/agent-physical-capx build
15
14
  * npx tsx examples/01-capx-runtime-solver.ts
16
15
  */
17
16
 
@@ -57,9 +56,10 @@ const runId = Date.now();
57
56
  const sessionId =
58
57
  optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
59
58
  `capx-runtime-solver-${runId}`;
60
- const outputDir =
61
- optionalString(process.env.CAPX_OUTPUT_DIR) ??
62
- `outputs/capx-agent-runtime/${sessionId}`;
59
+ const sessionOutputDir = optionalString(process.env.CAPX_SESSION_OUTPUT_DIR);
60
+ const sessionSkillLibraryPath = optionalString(
61
+ process.env.CAPX_SESSION_SKILL_LIBRARY_PATH,
62
+ );
63
63
  const recordVideo =
64
64
  optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
65
65
  "runtime-default";
@@ -92,10 +92,11 @@ console.log(
92
92
  `approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
93
93
  `recordVideo=${recordVideo}`,
94
94
  `agentSessionId=${sessionId}`,
95
- `outputDir=${outputDir}`,
95
+ `sessionOutputDir=${sessionOutputDir ?? "server-owned"}`,
96
+ `sessionSkillLibraryPath=${sessionSkillLibraryPath ?? "server-owned"}`,
97
+ `pathOverrides=${sessionOutputDir || sessionSkillLibraryPath ? "requested" : "none"}`,
96
98
  `runtimeServerUrl=${runtimeServerUrl}`,
97
- ]
98
- .join(" "),
99
+ ].join(" "),
99
100
  );
100
101
 
101
102
  const { agent, session } = await createCapxAgent({
@@ -125,13 +126,15 @@ const { agent, session } = await createCapxAgent({
125
126
  policyExecutionTimeoutMs: optionalNumber(
126
127
  process.env.CAPX_POLICY_EXECUTION_TIMEOUT_MS,
127
128
  ),
128
- policyExecutionTrial: optionalNumber(process.env.CAPX_POLICY_EXECUTION_TRIAL),
129
+ policyExecutionTrial: optionalNumber(
130
+ process.env.CAPX_POLICY_EXECUTION_TRIAL,
131
+ ),
129
132
  policyExecutionRecordVideo: optionalBoolean(
130
133
  process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
131
134
  ),
132
135
 
133
- outputDir,
134
- skillLibraryPath: optionalString(process.env.CAPX_SKILL_LIBRARY_PATH),
136
+ outputDir: sessionOutputDir,
137
+ skillLibraryPath: sessionSkillLibraryPath,
135
138
  },
136
139
  approval: {
137
140
  defaultAction: "ask",
@@ -7,7 +7,6 @@
7
7
  * reports task completion or when CAPX_MAX_SOLVER_TURNS is reached.
8
8
  *
9
9
  * Run:
10
- * pnpm --filter @cuylabs/agent-physical-capx build
11
10
  * npx tsx examples/02-capx-runtime-autosolve.ts
12
11
  */
13
12
 
@@ -76,7 +75,9 @@ function lastStep(observation: PhysicalObservation): LastRuntimeStep | null {
76
75
  }
77
76
  }
78
77
 
79
- function isUnrecoverableObservationFailure(step: LastRuntimeStep | null): boolean {
78
+ function isUnrecoverableObservationFailure(
79
+ step: LastRuntimeStep | null,
80
+ ): boolean {
80
81
  if (!step) {
81
82
  return false;
82
83
  }
@@ -94,9 +95,9 @@ function isUnrecoverableObservationFailure(step: LastRuntimeStep | null): boolea
94
95
  if (diagnostics) {
95
96
  return Boolean(
96
97
  step.truncated &&
97
- (diagnostics.observationPipeline ||
98
- diagnostics.depthAssertion ||
99
- error.includes("AssertionError")),
98
+ (diagnostics.observationPipeline ||
99
+ diagnostics.depthAssertion ||
100
+ error.includes("AssertionError")),
100
101
  );
101
102
  }
102
103
  const mentionsObservationPath =
@@ -108,8 +109,10 @@ function isUnrecoverableObservationFailure(step: LastRuntimeStep | null): boolea
108
109
  (stderr.includes("AssertionError") && stderr.includes("depth"));
109
110
  return Boolean(
110
111
  sandboxFailed &&
111
- step.truncated &&
112
- (mentionsObservationPath || mentionsDepthAssertion || error.includes("AssertionError")),
112
+ step.truncated &&
113
+ (mentionsObservationPath ||
114
+ mentionsDepthAssertion ||
115
+ error.includes("AssertionError")),
113
116
  );
114
117
  }
115
118
 
@@ -148,9 +151,10 @@ const runId = Date.now();
148
151
  const sessionId =
149
152
  optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
150
153
  `capx-runtime-autosolve-${runId}`;
151
- const outputDir =
152
- optionalString(process.env.CAPX_OUTPUT_DIR) ??
153
- `outputs/capx-agent-runtime/${sessionId}`;
154
+ const sessionOutputDir = optionalString(process.env.CAPX_SESSION_OUTPUT_DIR);
155
+ const sessionSkillLibraryPath = optionalString(
156
+ process.env.CAPX_SESSION_SKILL_LIBRARY_PATH,
157
+ );
154
158
  const toolExecutionMode = "plan" as const;
155
159
  const recordVideo =
156
160
  optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
@@ -178,7 +182,9 @@ console.error(
178
182
  `approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
179
183
  `recordVideo=${recordVideo}`,
180
184
  `agentSessionId=${sessionId}`,
181
- `outputDir=${outputDir}`,
185
+ `sessionOutputDir=${sessionOutputDir ?? "server-owned"}`,
186
+ `sessionSkillLibraryPath=${sessionSkillLibraryPath ?? "server-owned"}`,
187
+ `pathOverrides=${sessionOutputDir || sessionSkillLibraryPath ? "requested" : "none"}`,
182
188
  `trial=${initialPolicyExecutionTrial}`,
183
189
  `recoverOnRuntimeError=${recoverOnRuntimeError ? "reset" : "off"}`,
184
190
  `maxRuntimeResets=${maxRuntimeResets}`,
@@ -212,8 +218,8 @@ const { agent, session } = await createCapxAgent({
212
218
  policyExecutionRecordVideo: optionalBoolean(
213
219
  process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
214
220
  ),
215
- outputDir,
216
- skillLibraryPath: optionalString(process.env.CAPX_SKILL_LIBRARY_PATH),
221
+ outputDir: sessionOutputDir,
222
+ skillLibraryPath: sessionSkillLibraryPath,
217
223
  },
218
224
  approval: {
219
225
  defaultAction: "ask",
@@ -232,15 +238,14 @@ try {
232
238
 
233
239
  for (let turn = 1; turn <= maxTurns; turn += 1) {
234
240
  console.error(`\n--- solver turn ${turn}/${maxTurns} ---`);
235
- const prompt =
236
- resetBeforeTurn
237
- ? [
238
- "The previous CaP-X runtime session hit an observation/depth failure, so the example reset the runtime before this turn.",
239
- "Treat the current runtime state as fresh. Ignore any broken previous physical state.",
240
- "Call capx_status and capx_observe with includeImages=true, then solve from the current task prompt and observations.",
241
- "Execute one useful Python Code-as-Policy step if approval allows it, then observe again.",
242
- ].join(" ")
243
- : turn === 1
241
+ const prompt = resetBeforeTurn
242
+ ? [
243
+ "The previous CaP-X runtime session hit an observation/depth failure, so the example reset the runtime before this turn.",
244
+ "Treat the current runtime state as fresh. Ignore any broken previous physical state.",
245
+ "Call capx_status and capx_observe with includeImages=true, then solve from the current task prompt and observations.",
246
+ "Execute one useful Python Code-as-Policy step if approval allows it, then observe again.",
247
+ ].join(" ")
248
+ : turn === 1
244
249
  ? [
245
250
  "Solve the active CaP-X task.",
246
251
  "First call capx_status, then capx_observe with includeImages=true.",
@@ -294,7 +299,9 @@ try {
294
299
  break;
295
300
  }
296
301
  if (step?.taskCompleted || step?.terminated) {
297
- console.error(`CaP-X reported completion state: ${completionSummary(step)}`);
302
+ console.error(
303
+ `CaP-X reported completion state: ${completionSummary(step)}`,
304
+ );
298
305
  break;
299
306
  }
300
307
  resetBeforeTurn = false;
@@ -8,28 +8,35 @@ runtime by calling its HTTP API directly.
8
8
 
9
9
  ## Main Examples
10
10
 
11
- `01-capx-runtime-solver.ts` is the default single-turn bring-your-own-agent
12
- example. It creates an `agent-core` agent, wires in the CaP-X physical tools,
13
- and gives the agent one user turn. Inside that turn, `agent-core` may still run
14
- multiple model/tool steps, but the example prompt asks for one useful
15
- Code-as-Policy action and a summary:
16
-
17
- 1. observe the CaP-X task and rendered simulator state,
18
- 2. inspect runtime turn history and the CaP-X skill library when available,
19
- 3. write one Python Code-as-Policy step,
20
- 4. execute it through `capx-agent-runtime`,
21
- 5. observe again and summarize reward, stdout/stderr, and task completion.
11
+ There are two solver examples. Both connect an `agent-core` agent to an
12
+ already-running `capx-agent-runtime` service and expose the CaP-X session
13
+ through the `capx_*` tools.
14
+
15
+ `01-capx-runtime-solver.ts` is the default single-turn example. It creates one
16
+ agent, starts one runtime session, and gives the model one user turn. Inside
17
+ that turn, `agent-core` may still run multiple model/tool steps, but the prompt
18
+ asks for one useful Code-as-Policy action and a short result summary.
22
19
 
23
- `02-capx-runtime-autosolve.ts` keeps the same agent session open across several
24
- user turns. After each turn, the script observes the runtime and stops when
25
- CaP-X reports task completion or when `CAPX_MAX_SOLVER_TURNS` is reached. Use
26
- this when you want the harness to continue attempting the task instead of
27
- exiting after one scripted turn.
20
+ That flow is:
28
21
 
29
- The example enables the packaged `capx-code-as-policy` agent-core skill by
30
- default. That skill teaches the model how to use the CaP-X tools. It is not the
31
- same as CaP-X's runtime-side Python skill library, which is exposed dynamically
32
- through observation `codeContext` and programmatic runtime APIs.
22
+ 1. observe the CaP-X task, simulator state, and rendered frame,
23
+ 2. inspect runtime turn history and available policy-code context,
24
+ 3. write one Python Code-as-Policy step,
25
+ 4. execute that Python through `capx-agent-runtime`,
26
+ 5. observe again and summarize reward, stdout/stderr, artifacts, and task
27
+ completion.
28
+
29
+ `02-capx-runtime-autosolve.ts` is the multi-turn example. It keeps the same
30
+ agent and runtime session open across several user turns. After each turn, the
31
+ script observes the runtime result and stops when CaP-X reports task completion
32
+ or when `CAPX_MAX_SOLVER_TURNS` is reached. Use it when the harness should keep
33
+ trying the task instead of exiting after one solver turn.
34
+
35
+ Both examples enable the packaged `capx-code-as-policy` agent-core skill by
36
+ default. That skill teaches the model how to use the `capx_*` tools and how to
37
+ write policy code for the runtime. It is separate from CaP-X's runtime-side
38
+ Python skill library, which appears dynamically in observation `codeContext`
39
+ and deliberate runtime APIs.
33
40
 
34
41
  ## Service-First Setup
35
42
 
@@ -37,9 +44,12 @@ The normal path is to start the runtime service first, usually on a Linux GPU
37
44
  workstation, then run the TypeScript agent from your local machine or another
38
45
  client.
39
46
 
40
- Follow the runtime workstation setup first:
47
+ Follow the runtime project docs first:
41
48
 
42
- [capx-agent-runtime workstation setup](../../../repos/capx-agent-runtime/docs/workstation-setup.md)
49
+ 1. Prepare the GPU workstation with
50
+ [Workstation Setup](https://github.com/cuylabs-ai/capx-agent-runtime/blob/main/docs/workstation-setup.md).
51
+ 2. Start and validate the runtime server with
52
+ [Runtime Server](https://github.com/cuylabs-ai/capx-agent-runtime/blob/main/docs/runtime-server.md).
43
53
 
44
54
  The runtime server is typically started from the CaP-X checkout like this:
45
55
 
@@ -79,19 +89,25 @@ the service:
79
89
  ssh -L 8210:127.0.0.1:8210 <user>@<gpu-host>
80
90
  ```
81
91
 
82
- ## Local Workspace Setup
92
+ ## Client Setup
83
93
 
84
- These examples are meant to run from a local `agents-ts` checkout. The
85
- `@cuylabs/agent-physical` and `@cuylabs/agent-physical-capx` packages are
86
- workspace-linked here, so source changes can be tested before publishing.
94
+ In an application that consumes the released packages, install the TypeScript
95
+ client packages and the example runner dependencies:
87
96
 
88
- From the repo root, install dependencies and build this package plus its local
89
- workspace dependencies:
97
+ ```bash
98
+ npm install @cuylabs/agent-core @cuylabs/agent-physical @cuylabs/agent-physical-capx
99
+ npm install --save-dev @ai-sdk/openai dotenv tsx
100
+ ```
101
+
102
+ The released package already includes its built `dist/` files, so there is no
103
+ workspace build step in the normal install path.
104
+
105
+ If you are running the examples from a local `agents-ts` monorepo checkout
106
+ while changing package source, install workspace dependencies first:
90
107
 
91
108
  ```bash
92
109
  cd /path/to/agents-ts
93
110
  pnpm install
94
- pnpm --filter @cuylabs/agent-physical-capx... build
95
111
  ```
96
112
 
97
113
  Use the `pnpm` already available on your machine. If `pnpm` is missing and your
@@ -99,11 +115,8 @@ Node install includes Corepack, you can enable it with `corepack enable`; if
99
115
  `corepack` is not available, install `pnpm` directly with your normal Node
100
116
  package-manager setup.
101
117
 
102
- The trailing `...` is intentional. It includes the dependency chain, so
103
- `@cuylabs/agent-core`, `@cuylabs/agent-physical`, and
104
- `@cuylabs/agent-physical-capx` are built together.
105
-
106
- Then configure the example environment:
118
+ For the checked-in examples, configure the local example environment from this
119
+ package directory:
107
120
 
108
121
  ```bash
109
122
  cd packages/agent-physical-capx
@@ -129,60 +142,63 @@ gateway or hosted inference endpoint.
129
142
 
130
143
  ## Run Modes
131
144
 
132
- By default, the examples are observe-only. The agent can inspect the runtime
133
- state, propose policy code, and summarize what it would do, but it cannot call
134
- `capx_run_policy_code`:
145
+ The examples default to observe-only mode. In that mode, the agent can inspect
146
+ the task, frame, runtime state, and policy-code context, but it cannot call
147
+ `capx_run_policy_code`.
148
+
149
+ ### Observe Only
135
150
 
136
151
  ```bash
137
- pnpm exec tsx examples/01-capx-runtime-solver.ts
152
+ npx tsx examples/01-capx-runtime-solver.ts
138
153
  ```
139
154
 
140
- Allow the single-turn solver to execute one Python Code-as-Policy action in
141
- simulation:
155
+ Use this first to confirm that the runtime URL, model provider, session
156
+ creation, observation, and tool wiring are working.
157
+
158
+ ### Single Policy Step
159
+
160
+ Allow the single-turn example to execute one Python Code-as-Policy action in
161
+ simulation.
142
162
 
143
163
  ```bash
144
164
  CAPX_ALLOW_DESTRUCTIVE=1 \
145
- pnpm exec tsx examples/01-capx-runtime-solver.ts
165
+ npx tsx examples/01-capx-runtime-solver.ts
146
166
  ```
147
167
 
148
- The startup line should include `approval=policy-code-enabled`. If it still
149
- shows `approval=observe-only`, the environment variable did not reach the Node
150
- process. In that case, use a single-line command:
168
+ The startup line should show `approval=policy-code-enabled`. If it still shows
169
+ `approval=observe-only`, the environment variable did not reach the Node
170
+ process. Use a single-line command to verify:
151
171
 
152
172
  ```bash
153
- env CAPX_ALLOW_DESTRUCTIVE=1 pnpm exec tsx examples/01-capx-runtime-solver.ts
173
+ env CAPX_ALLOW_DESTRUCTIVE=1 npx tsx examples/01-capx-runtime-solver.ts
154
174
  ```
155
175
 
156
- When execution is enabled, the log should also show an approval request for
157
- `capx_run_policy_code` followed by an approval resolution. If the startup line
158
- shows `approval=policy-code-enabled` but the tool result still says
159
- `Approval denied for capx_run_policy_code`, rebuild and rerun the local
160
- workspace; older example code used a hard default deny policy before the
161
- example callback could approve the tool.
162
-
163
- Allow execution and force video recording for that policy-code turn:
176
+ ### Single Policy Step With Video
164
177
 
165
178
  ```bash
166
179
  CAPX_ALLOW_DESTRUCTIVE=1 \
167
180
  CAPX_POLICY_EXECUTION_RECORD_VIDEO=1 \
168
- pnpm exec tsx examples/01-capx-runtime-solver.ts
181
+ npx tsx examples/01-capx-runtime-solver.ts
169
182
  ```
170
183
 
171
- Run the multi-turn solver in observe-only mode:
184
+ ### Multi-Turn Autosolve
185
+
186
+ Run the autosolver in observe-only mode.
172
187
 
173
188
  ```bash
174
- CAPX_MAX_SOLVER_TURNS=6 pnpm exec tsx examples/02-capx-runtime-autosolve.ts
189
+ CAPX_MAX_SOLVER_TURNS=6 npx tsx examples/02-capx-runtime-autosolve.ts
175
190
  ```
176
191
 
177
- Allow the multi-turn solver to execute policy code:
192
+ Allow policy-code execution across the autosolver loop.
178
193
 
179
194
  ```bash
180
195
  CAPX_ALLOW_DESTRUCTIVE=1 \
181
196
  CAPX_MAX_SOLVER_TURNS=6 \
182
- pnpm exec tsx examples/02-capx-runtime-autosolve.ts
197
+ npx tsx examples/02-capx-runtime-autosolve.ts
183
198
  ```
184
199
 
185
- Allow multi-turn execution and force video recording for each policy-code turn:
200
+ For the most complete demo, enable execution, video recording, one runtime
201
+ recovery reset, and stop-on-exit so the combined video artifact is flushed.
186
202
 
187
203
  ```bash
188
204
  CAPX_ALLOW_DESTRUCTIVE=1 \
@@ -191,21 +207,14 @@ CAPX_MAX_SOLVER_TURNS=6 \
191
207
  CAPX_RECOVER_ON_RUNTIME_ERROR=reset \
192
208
  CAPX_MAX_RUNTIME_RESETS=1 \
193
209
  CAPX_STOP_ON_EXIT=1 \
194
- pnpm exec tsx examples/02-capx-runtime-autosolve.ts
210
+ npx tsx examples/02-capx-runtime-autosolve.ts
195
211
  ```
196
212
 
197
- For the video mode, the startup line should include
198
- `approval=policy-code-enabled` and `recordVideo=1`.
199
- `CAPX_STOP_ON_EXIT=1` stops the runtime session at the end of the example so
200
- `capx-agent-runtime` can flush the combined session video artifact. Stopped
201
- sessions still keep their artifacts available through the console and HTTP API;
202
- the session may remain listed there, but the live simulator environment has
203
- been stopped. This does not shut down the top-level `capx-agent-runtime serve`
204
- process.
213
+ ## Expected Output
205
214
 
206
215
  For the default Franka cube-stack config, a healthy run usually finishes after
207
- one useful policy-code turn. The exact sampled poses and artifact URLs vary by
208
- trial, but the important terminal lines look like this:
216
+ one useful policy-code turn. Exact sampled poses and artifact paths vary, but
217
+ the important terminal lines look like this:
209
218
 
210
219
  ```text
211
220
  executionOk=true, taskCompleted=true, reward=1
@@ -224,47 +233,26 @@ Saved interaction video to .../video_session_combined.mp4
224
233
  POST /sessions/<id>/stop ... 200 OK
225
234
  ```
226
235
 
227
- The `video_..._turn_00.mp4` file is the per-policy-turn recording. The
228
- `video_session_combined.mp4` file is written when the session stops, which is
229
- why `CAPX_STOP_ON_EXIT=1` is recommended for video examples. In the console,
230
- the combined session video is shown at the top of the artifact list; per-turn
231
- videos remain linked as individual artifact files.
232
-
233
- By default, each example run writes to a unique remote CaP-X output directory
234
- under `outputs/capx-agent-runtime/<agent-session-id>`. That keeps artifacts
235
- from separate runs from being mixed together in the console. Set
236
- `CAPX_OUTPUT_DIR` only when you intentionally want a specific remote output
237
- directory.
238
-
239
- The autosolver also stops early if CaP-X reports a persistent observation or
240
- depth-rendering failure. This is different from ordinary Python policy-code
241
- failure. CaP-X can ask an agent to regenerate code when `env.step(code)`
242
- returns a normal `info_step` with `stderr`. But if the Robosuite observation
243
- pipeline raises while collecting camera/depth observations, `env.step(code)`
244
- does not return a normal result. In that state even `pass` can fail before user
245
- policy code runs, so continuing to submit more code in the same session is not
246
- useful.
247
-
248
- When `CAPX_RECOVER_ON_RUNTIME_ERROR=reset` is set, the autosolver resets only
249
- for runtime-level `env.step(...)` failures where CaP-X cannot return a normal
250
- step result. It does not reset for ordinary policy-code `stderr`; those are
251
- left to the next agent turn so the model can inspect the error and try a better
252
- policy. Runtime resets use the next CaP-X trial/seed. The default reset budget
253
- is one reset; set `CAPX_MAX_RUNTIME_RESETS` to change that. If
254
- `CAPX_POLICY_EXECUTION_TRIAL` is unset, the first session uses trial `1`, the
255
- first recovery reset uses trial `2`, and so on. `CAPX_STOP_ON_EXIT` is separate
256
- from this recovery behavior: recovery reset happens during the solver loop,
257
- while stop-on-exit runs once the example is done, fails, or exhausts its reset
258
- budget.
259
-
260
- If the reset budget is exhausted, or if you are running without automatic
261
- recovery, do the cleanup first, then retry with a fresh session.
262
-
263
- If you used `CAPX_STOP_ON_EXIT=1`, the example asks the server to stop the
264
- runtime session before exiting and flushes the combined session video. You can
265
- then rerun the example directly.
266
-
267
- If the session is still running, find the runtime session id and stop it:
236
+ The `video_..._turn_00.mp4` file is the per-policy-turn recording.
237
+ `video_session_combined.mp4` is written when the session stops, so
238
+ `CAPX_STOP_ON_EXIT=1` is recommended for video examples. The runtime console
239
+ shows the combined session video first and links the per-turn videos as
240
+ individual artifact files.
241
+
242
+ ## Recovery And Cleanup
243
+
244
+ The autosolver distinguishes ordinary policy-code failures from runtime-level
245
+ CaP-X failures.
246
+
247
+ | Case | What Happens |
248
+ | ------------------------------------------------------------------- | ------------------------------------------------------------------------------ |
249
+ | Python policy returns stderr | The next agent turn can inspect the error and write better code. |
250
+ | Observation or depth pipeline fails before `env.step(code)` returns | The autosolver stops or uses `CAPX_RECOVER_ON_RUNTIME_ERROR=reset` if enabled. |
251
+ | Recovery reset is enabled | The session resets to the next trial/seed. The default reset budget is `1`. |
252
+ | `CAPX_STOP_ON_EXIT=1` is set | The example stops the runtime session at exit and flushes the combined video. |
253
+
254
+ If the reset budget is exhausted, clean up first and retry with a fresh
255
+ session. When a session is still running, find its id and stop it:
268
256
 
269
257
  ```bash
270
258
  curl -sS http://127.0.0.1:8210/sessions
@@ -281,25 +269,13 @@ curl -X POST \
281
269
  http://127.0.0.1:8210/sessions/<session-id>/reset
282
270
  ```
283
271
 
284
- Then run the autosolver again:
285
-
286
- ```bash
287
- CAPX_ALLOW_DESTRUCTIVE=1 \
288
- CAPX_POLICY_EXECUTION_RECORD_VIDEO=1 \
289
- CAPX_MAX_SOLVER_TURNS=6 \
290
- CAPX_RECOVER_ON_RUNTIME_ERROR=reset \
291
- CAPX_MAX_RUNTIME_RESETS=1 \
292
- CAPX_STOP_ON_EXIT=1 \
293
- pnpm exec tsx examples/02-capx-runtime-autosolve.ts
294
- ```
295
-
296
272
  If the depth assertion repeats immediately on a clean session, restart the
297
273
  `capx-agent-runtime serve` process too. That recreates the Python environment
298
274
  and the child API services instead of reusing the same process state.
299
275
 
300
- If you want to isolate the TypeScript adapter and `agent-core` loop from the
301
- vision/depth stack, start `capx-agent-runtime` with a privileged cube-stack
302
- config when available:
276
+ To isolate the TypeScript adapter and `agent-core` loop from the vision/depth
277
+ stack, start `capx-agent-runtime` with a privileged cube-stack config when that
278
+ config is available:
303
279
 
304
280
  ```bash
305
281
  uv run --no-sync --active capx-agent-runtime serve \
@@ -309,79 +285,59 @@ uv run --no-sync --active capx-agent-runtime serve \
309
285
  --port 8210
310
286
  ```
311
287
 
312
- That path avoids some vision-derived object-pose calls and is useful for
313
- checking that HTTP tools, approvals, artifacts, videos, and the external agent
314
- loop are wired correctly before debugging the Robosuite camera/depth pipeline.
315
-
316
- `CAPX_ALLOW_DESTRUCTIVE=1` means "allow side-effecting CaP-X tools" in this
317
- example harness. It is required for `capx_run_policy_code` because that tool
318
- executes model-authored Python inside the live CaP-X runtime. For hardware
319
- configs, policy execution is still blocked unless
320
- `CAPX_ALLOW_HARDWARE_POLICY_EXECUTION=1` is also set.
321
-
322
- The example always uses the live runtime path: `mode: "runtime"`,
323
- `startSession: true`, `enablePolicyCodeExecution: true`,
324
- and `policyExecutionMode: "live-runtime"`. The adapter does not accept
325
- `repoPath` or `configPath`; those belong to the runtime server startup command.
326
- That keeps the example aligned with the production architecture: the Python
327
- runtime service owns the CaP-X repo/config/simulator setup, and `agent-core`
328
- owns the external agent loop.
288
+ That path avoids some vision-derived object-pose calls and is useful when you
289
+ want to validate HTTP tools, approvals, artifacts, videos, and the external
290
+ agent loop before debugging the Robosuite camera/depth pipeline.
291
+
292
+ ## Environment Variables
293
+
294
+ | Variable | Purpose |
295
+ | ---------------------------------------- | --------------------------------------------------------------------------------------------- |
296
+ | `OPENAI_API_KEY` | Configures the `agent-core` model provider. |
297
+ | `OPENAI_MODEL` | Model id. Defaults to `gpt-4o-mini` in `examples/_setup.ts`. |
298
+ | `OPENAI_BASE_URL` | Optional OpenAI-compatible provider endpoint. |
299
+ | `CAPX_RUNTIME_SERVER_URL` | URL for the running `capx-agent-runtime` service. |
300
+ | `CAPX_ALLOW_DESTRUCTIVE=1` | Lets the example approval policy execute `capx_run_policy_code`. |
301
+ | `CAPX_ALLOW_HARDWARE_POLICY_EXECUTION=1` | Extra gate required before policy execution against hardware configs. |
302
+ | `CAPX_MAX_SOLVER_TURNS` | Outer loop limit for `02-capx-runtime-autosolve.ts`. |
303
+ | `CAPX_RECOVER_ON_RUNTIME_ERROR=reset` | Reset the live runtime session after runtime-level observation/depth failures. |
304
+ | `CAPX_MAX_RUNTIME_RESETS` | Recovery reset budget. Defaults to `1` when recovery is enabled. |
305
+ | `CAPX_POLICY_EXECUTION_RECORD_VIDEO` | Optional `1` or `0` override for the selected YAML's video setting. |
306
+ | `CAPX_STOP_ON_EXIT=1` | Stop the runtime session when the example exits and flush combined video artifacts. |
307
+ | `CAPX_SESSION_OUTPUT_DIR` | Privileged per-session output override. Leave unset for normal server-owned paths. |
308
+ | `CAPX_SESSION_SKILL_LIBRARY_PATH` | Privileged per-session skill-library override. Leave unset unless path overrides are enabled. |
309
+ | `CAPX_TOOL_RESULT_MAX_CHARS` | Increase printed tool-result previews while debugging. |
310
+
311
+ By default, each example run uses the runtime server's configured output
312
+ directory and skill-library path. Set `CAPX_SESSION_OUTPUT_DIR` or
313
+ `CAPX_SESSION_SKILL_LIBRARY_PATH` only when the runtime server was started with
314
+ `--allow-client-path-overrides` and allowed roots for those paths.
315
+
316
+ ## Runtime Contract
317
+
318
+ The examples always use the live runtime path: `mode: "runtime"`,
319
+ `startSession: true`, `enablePolicyCodeExecution: true`, and
320
+ `policyExecutionMode: "live-runtime"`.
321
+
322
+ The adapter does not accept `repoPath` or `configPath`, and it omits
323
+ `outputDir` and `skillLibraryPath` by default. Those path choices belong to the
324
+ runtime server startup command. That keeps the architecture clean: the Python
325
+ runtime service owns the CaP-X repo/config/output/simulator setup, and
326
+ `agent-core` owns the external agent loop.
329
327
 
330
328
  The adapter defaults to `toolExecutionMode: "plan"`. In `agent-core`, "plan"
331
- means framework-owned tool dispatch, not "only produce a written plan." The
332
- model still reasons normally and emits tool calls; `agent-core` executes those
333
- tool calls after applying approval and scheduling policy, then records the tool
334
- results before the next model step. The console output lines such as
335
- `capx_status({ ... })` and `capx_run_policy_code({ ... })` are the visible
336
- planned tool calls.
337
-
338
- Both examples use `agent-core`'s `createEventPrinter` to render progress:
339
- steps, tool calls, tool results, approval events, text output, and completion.
340
- For CaP-X, those logs are the easiest way to see the external agent loop:
341
- status, observe, optional policy-code execution, observe again, then final
342
- summary. Set `CAPX_TOOL_RESULT_MAX_CHARS` if you want the terminal to print
343
- longer tool-result previews while debugging.
344
-
345
- ## Environment Model
346
-
347
- `OPENAI_API_KEY` configures the `agent-core` model provider. `OPENAI_MODEL`
348
- defaults to `gpt-4o-mini` through `examples/_setup.ts` if you omit it.
349
- `OPENAI_BASE_URL` is only needed for non-default OpenAI-compatible endpoints.
350
-
351
- `CAPX_RUNTIME_SERVER_URL` points to the `capx-agent-runtime` service. When
352
- the example creates a session, it lets the server's startup arguments define
353
- the CaP-X repo, YAML config, output directory, and simulator context. This
354
- matches the workstation setup command above.
355
-
356
- `CAPX_ALLOW_DESTRUCTIVE=1` lets the example approval policy allow
357
- `capx_run_policy_code`. Without it, the agent can observe and propose code but
358
- will not execute policy code.
359
-
360
- `CAPX_MAX_SOLVER_TURNS` controls the outer loop in
361
- `02-capx-runtime-autosolve.ts`. The same `agent-core` session id is reused for
362
- each turn so the agent keeps conversation and tool history.
363
-
364
- `CAPX_RECOVER_ON_RUNTIME_ERROR=reset` lets
365
- `02-capx-runtime-autosolve.ts` reset the live CaP-X runtime session to the next
366
- trial/seed and continue when CaP-X reports an observation/depth failure. This
367
- is session-level recovery for failures where `env.step(code)` cannot return a
368
- normal multi-turn result. `CAPX_MAX_RUNTIME_RESETS` controls the reset budget
369
- and defaults to `1` when recovery is enabled.
370
-
371
- `CAPX_POLICY_EXECUTION_RECORD_VIDEO` is optional. Leave it unset to use the
372
- selected CaP-X YAML's `record_video` setting. Set it to `1` or `0` only when
373
- you want the TypeScript example to override the runtime server/YAML value.
374
-
375
- ## Prompt Context
329
+ means framework-owned tool dispatch, not "only write a textual plan." The model
330
+ can still emit tool calls; `agent-core` applies approval and scheduling policy,
331
+ executes approved tools, then records tool results before the next model step.
376
332
 
377
- This package does not copy CaP-X prompt templates into TypeScript. In runtime
378
- mode, `capx-agent-runtime` loads the selected CaP-X YAML config and trial. Then
379
- `capx_observe` returns the task prompt, full prompt, observations, API
380
- descriptions, rendered frame when available, and last-step result to the
381
- `agent-core` agent.
333
+ Both examples use `agent-core`'s `createEventPrinter` to render steps, tool
334
+ calls, tool results, approval events, text output, and completion. For CaP-X,
335
+ those logs are the easiest way to see the external agent loop: status, observe,
336
+ optional policy-code execution, observe again, then final summary.
382
337
 
383
- The external agent reads that CaP-X-provided context and acts by calling
338
+ This package does not copy CaP-X prompt templates into TypeScript. In runtime
339
+ mode, `capx-agent-runtime` loads the selected CaP-X YAML config and trial.
340
+ `capx_observe` returns the CaP-X task prompt, full prompt, observations, API
341
+ descriptions, rendered frame when available, and last-step result. The external
342
+ agent reads that CaP-X-provided context and acts by calling
384
343
  `capx_run_policy_code`.
385
-
386
- This is the clean bring-your-own-agent reference: start the runtime service
387
- first, then connect an external `agent-core` agent to it.