@cuylabs/agent-physical-capx 5.0.2 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +124 -85
- package/dist/agent.d.ts +1 -1
- package/dist/agent.js +2 -2
- package/dist/{chunk-MYO63CWO.js → chunk-EBBHF633.js} +4 -8
- package/dist/chunk-EBBHF633.js.map +1 -0
- package/dist/{chunk-C53NNB7T.js → chunk-F3JNO443.js} +2 -2
- package/dist/index.d.ts +3 -3
- package/dist/index.js +2 -2
- package/dist/{session-BxaROlXW.d.ts → session-BwE260Mn.d.ts} +11 -3
- package/dist/session.d.ts +1 -1
- package/dist/session.js +1 -1
- package/docs/agent-core-integration.md +3 -1
- package/docs/how-it-works.md +3 -0
- package/examples/.env.example +6 -5
- package/examples/01-capx-runtime-solver.ts +13 -10
- package/examples/02-capx-runtime-autosolve.ts +30 -23
- package/examples/README.md +153 -197
- package/package.json +3 -3
- package/dist/chunk-MYO63CWO.js.map +0 -1
- /package/dist/{chunk-C53NNB7T.js.map → chunk-F3JNO443.js.map} +0 -0
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
* task context, write policy code, execute one step, observe again, and report.
|
|
12
12
|
*
|
|
13
13
|
* Run:
|
|
14
|
-
* pnpm --filter @cuylabs/agent-physical-capx build
|
|
15
14
|
* npx tsx examples/01-capx-runtime-solver.ts
|
|
16
15
|
*/
|
|
17
16
|
|
|
@@ -57,9 +56,10 @@ const runId = Date.now();
|
|
|
57
56
|
const sessionId =
|
|
58
57
|
optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
|
|
59
58
|
`capx-runtime-solver-${runId}`;
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
const sessionOutputDir = optionalString(process.env.CAPX_SESSION_OUTPUT_DIR);
|
|
60
|
+
const sessionSkillLibraryPath = optionalString(
|
|
61
|
+
process.env.CAPX_SESSION_SKILL_LIBRARY_PATH,
|
|
62
|
+
);
|
|
63
63
|
const recordVideo =
|
|
64
64
|
optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
|
|
65
65
|
"runtime-default";
|
|
@@ -92,10 +92,11 @@ console.log(
|
|
|
92
92
|
`approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
|
|
93
93
|
`recordVideo=${recordVideo}`,
|
|
94
94
|
`agentSessionId=${sessionId}`,
|
|
95
|
-
`
|
|
95
|
+
`sessionOutputDir=${sessionOutputDir ?? "server-owned"}`,
|
|
96
|
+
`sessionSkillLibraryPath=${sessionSkillLibraryPath ?? "server-owned"}`,
|
|
97
|
+
`pathOverrides=${sessionOutputDir || sessionSkillLibraryPath ? "requested" : "none"}`,
|
|
96
98
|
`runtimeServerUrl=${runtimeServerUrl}`,
|
|
97
|
-
]
|
|
98
|
-
.join(" "),
|
|
99
|
+
].join(" "),
|
|
99
100
|
);
|
|
100
101
|
|
|
101
102
|
const { agent, session } = await createCapxAgent({
|
|
@@ -125,13 +126,15 @@ const { agent, session } = await createCapxAgent({
|
|
|
125
126
|
policyExecutionTimeoutMs: optionalNumber(
|
|
126
127
|
process.env.CAPX_POLICY_EXECUTION_TIMEOUT_MS,
|
|
127
128
|
),
|
|
128
|
-
policyExecutionTrial: optionalNumber(
|
|
129
|
+
policyExecutionTrial: optionalNumber(
|
|
130
|
+
process.env.CAPX_POLICY_EXECUTION_TRIAL,
|
|
131
|
+
),
|
|
129
132
|
policyExecutionRecordVideo: optionalBoolean(
|
|
130
133
|
process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
|
|
131
134
|
),
|
|
132
135
|
|
|
133
|
-
outputDir,
|
|
134
|
-
skillLibraryPath:
|
|
136
|
+
outputDir: sessionOutputDir,
|
|
137
|
+
skillLibraryPath: sessionSkillLibraryPath,
|
|
135
138
|
},
|
|
136
139
|
approval: {
|
|
137
140
|
defaultAction: "ask",
|
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
* reports task completion or when CAPX_MAX_SOLVER_TURNS is reached.
|
|
8
8
|
*
|
|
9
9
|
* Run:
|
|
10
|
-
* pnpm --filter @cuylabs/agent-physical-capx build
|
|
11
10
|
* npx tsx examples/02-capx-runtime-autosolve.ts
|
|
12
11
|
*/
|
|
13
12
|
|
|
@@ -76,7 +75,9 @@ function lastStep(observation: PhysicalObservation): LastRuntimeStep | null {
|
|
|
76
75
|
}
|
|
77
76
|
}
|
|
78
77
|
|
|
79
|
-
function isUnrecoverableObservationFailure(
|
|
78
|
+
function isUnrecoverableObservationFailure(
|
|
79
|
+
step: LastRuntimeStep | null,
|
|
80
|
+
): boolean {
|
|
80
81
|
if (!step) {
|
|
81
82
|
return false;
|
|
82
83
|
}
|
|
@@ -94,9 +95,9 @@ function isUnrecoverableObservationFailure(step: LastRuntimeStep | null): boolea
|
|
|
94
95
|
if (diagnostics) {
|
|
95
96
|
return Boolean(
|
|
96
97
|
step.truncated &&
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
98
|
+
(diagnostics.observationPipeline ||
|
|
99
|
+
diagnostics.depthAssertion ||
|
|
100
|
+
error.includes("AssertionError")),
|
|
100
101
|
);
|
|
101
102
|
}
|
|
102
103
|
const mentionsObservationPath =
|
|
@@ -108,8 +109,10 @@ function isUnrecoverableObservationFailure(step: LastRuntimeStep | null): boolea
|
|
|
108
109
|
(stderr.includes("AssertionError") && stderr.includes("depth"));
|
|
109
110
|
return Boolean(
|
|
110
111
|
sandboxFailed &&
|
|
111
|
-
|
|
112
|
-
|
|
112
|
+
step.truncated &&
|
|
113
|
+
(mentionsObservationPath ||
|
|
114
|
+
mentionsDepthAssertion ||
|
|
115
|
+
error.includes("AssertionError")),
|
|
113
116
|
);
|
|
114
117
|
}
|
|
115
118
|
|
|
@@ -148,9 +151,10 @@ const runId = Date.now();
|
|
|
148
151
|
const sessionId =
|
|
149
152
|
optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
|
|
150
153
|
`capx-runtime-autosolve-${runId}`;
|
|
151
|
-
const
|
|
152
|
-
|
|
153
|
-
|
|
154
|
+
const sessionOutputDir = optionalString(process.env.CAPX_SESSION_OUTPUT_DIR);
|
|
155
|
+
const sessionSkillLibraryPath = optionalString(
|
|
156
|
+
process.env.CAPX_SESSION_SKILL_LIBRARY_PATH,
|
|
157
|
+
);
|
|
154
158
|
const toolExecutionMode = "plan" as const;
|
|
155
159
|
const recordVideo =
|
|
156
160
|
optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
|
|
@@ -178,7 +182,9 @@ console.error(
|
|
|
178
182
|
`approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
|
|
179
183
|
`recordVideo=${recordVideo}`,
|
|
180
184
|
`agentSessionId=${sessionId}`,
|
|
181
|
-
`
|
|
185
|
+
`sessionOutputDir=${sessionOutputDir ?? "server-owned"}`,
|
|
186
|
+
`sessionSkillLibraryPath=${sessionSkillLibraryPath ?? "server-owned"}`,
|
|
187
|
+
`pathOverrides=${sessionOutputDir || sessionSkillLibraryPath ? "requested" : "none"}`,
|
|
182
188
|
`trial=${initialPolicyExecutionTrial}`,
|
|
183
189
|
`recoverOnRuntimeError=${recoverOnRuntimeError ? "reset" : "off"}`,
|
|
184
190
|
`maxRuntimeResets=${maxRuntimeResets}`,
|
|
@@ -212,8 +218,8 @@ const { agent, session } = await createCapxAgent({
|
|
|
212
218
|
policyExecutionRecordVideo: optionalBoolean(
|
|
213
219
|
process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
|
|
214
220
|
),
|
|
215
|
-
outputDir,
|
|
216
|
-
skillLibraryPath:
|
|
221
|
+
outputDir: sessionOutputDir,
|
|
222
|
+
skillLibraryPath: sessionSkillLibraryPath,
|
|
217
223
|
},
|
|
218
224
|
approval: {
|
|
219
225
|
defaultAction: "ask",
|
|
@@ -232,15 +238,14 @@ try {
|
|
|
232
238
|
|
|
233
239
|
for (let turn = 1; turn <= maxTurns; turn += 1) {
|
|
234
240
|
console.error(`\n--- solver turn ${turn}/${maxTurns} ---`);
|
|
235
|
-
const prompt =
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
: turn === 1
|
|
241
|
+
const prompt = resetBeforeTurn
|
|
242
|
+
? [
|
|
243
|
+
"The previous CaP-X runtime session hit an observation/depth failure, so the example reset the runtime before this turn.",
|
|
244
|
+
"Treat the current runtime state as fresh. Ignore any broken previous physical state.",
|
|
245
|
+
"Call capx_status and capx_observe with includeImages=true, then solve from the current task prompt and observations.",
|
|
246
|
+
"Execute one useful Python Code-as-Policy step if approval allows it, then observe again.",
|
|
247
|
+
].join(" ")
|
|
248
|
+
: turn === 1
|
|
244
249
|
? [
|
|
245
250
|
"Solve the active CaP-X task.",
|
|
246
251
|
"First call capx_status, then capx_observe with includeImages=true.",
|
|
@@ -294,7 +299,9 @@ try {
|
|
|
294
299
|
break;
|
|
295
300
|
}
|
|
296
301
|
if (step?.taskCompleted || step?.terminated) {
|
|
297
|
-
console.error(
|
|
302
|
+
console.error(
|
|
303
|
+
`CaP-X reported completion state: ${completionSummary(step)}`,
|
|
304
|
+
);
|
|
298
305
|
break;
|
|
299
306
|
}
|
|
300
307
|
resetBeforeTurn = false;
|
package/examples/README.md
CHANGED
|
@@ -8,28 +8,35 @@ runtime by calling its HTTP API directly.
|
|
|
8
8
|
|
|
9
9
|
## Main Examples
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
3. write one Python Code-as-Policy step,
|
|
20
|
-
4. execute it through `capx-agent-runtime`,
|
|
21
|
-
5. observe again and summarize reward, stdout/stderr, and task completion.
|
|
11
|
+
There are two solver examples. Both connect an `agent-core` agent to an
|
|
12
|
+
already-running `capx-agent-runtime` service and expose the CaP-X session
|
|
13
|
+
through the `capx_*` tools.
|
|
14
|
+
|
|
15
|
+
`01-capx-runtime-solver.ts` is the default single-turn example. It creates one
|
|
16
|
+
agent, starts one runtime session, and gives the model one user turn. Inside
|
|
17
|
+
that turn, `agent-core` may still run multiple model/tool steps, but the prompt
|
|
18
|
+
asks for one useful Code-as-Policy action and a short result summary.
|
|
22
19
|
|
|
23
|
-
|
|
24
|
-
user turns. After each turn, the script observes the runtime and stops when
|
|
25
|
-
CaP-X reports task completion or when `CAPX_MAX_SOLVER_TURNS` is reached. Use
|
|
26
|
-
this when you want the harness to continue attempting the task instead of
|
|
27
|
-
exiting after one scripted turn.
|
|
20
|
+
That flow is:
|
|
28
21
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
22
|
+
1. observe the CaP-X task, simulator state, and rendered frame,
|
|
23
|
+
2. inspect runtime turn history and available policy-code context,
|
|
24
|
+
3. write one Python Code-as-Policy step,
|
|
25
|
+
4. execute that Python through `capx-agent-runtime`,
|
|
26
|
+
5. observe again and summarize reward, stdout/stderr, artifacts, and task
|
|
27
|
+
completion.
|
|
28
|
+
|
|
29
|
+
`02-capx-runtime-autosolve.ts` is the multi-turn example. It keeps the same
|
|
30
|
+
agent and runtime session open across several user turns. After each turn, the
|
|
31
|
+
script observes the runtime result and stops when CaP-X reports task completion
|
|
32
|
+
or when `CAPX_MAX_SOLVER_TURNS` is reached. Use it when the harness should keep
|
|
33
|
+
trying the task instead of exiting after one solver turn.
|
|
34
|
+
|
|
35
|
+
Both examples enable the packaged `capx-code-as-policy` agent-core skill by
|
|
36
|
+
default. That skill teaches the model how to use the `capx_*` tools and how to
|
|
37
|
+
write policy code for the runtime. It is separate from CaP-X's runtime-side
|
|
38
|
+
Python skill library, which appears dynamically in observation `codeContext`
|
|
39
|
+
and deliberate runtime APIs.
|
|
33
40
|
|
|
34
41
|
## Service-First Setup
|
|
35
42
|
|
|
@@ -37,9 +44,12 @@ The normal path is to start the runtime service first, usually on a Linux GPU
|
|
|
37
44
|
workstation, then run the TypeScript agent from your local machine or another
|
|
38
45
|
client.
|
|
39
46
|
|
|
40
|
-
Follow the runtime
|
|
47
|
+
Follow the runtime project docs first:
|
|
41
48
|
|
|
42
|
-
|
|
49
|
+
1. Prepare the GPU workstation with
|
|
50
|
+
[Workstation Setup](https://github.com/cuylabs-ai/capx-agent-runtime/blob/main/docs/workstation-setup.md).
|
|
51
|
+
2. Start and validate the runtime server with
|
|
52
|
+
[Runtime Server](https://github.com/cuylabs-ai/capx-agent-runtime/blob/main/docs/runtime-server.md).
|
|
43
53
|
|
|
44
54
|
The runtime server is typically started from the CaP-X checkout like this:
|
|
45
55
|
|
|
@@ -79,19 +89,25 @@ the service:
|
|
|
79
89
|
ssh -L 8210:127.0.0.1:8210 <user>@<gpu-host>
|
|
80
90
|
```
|
|
81
91
|
|
|
82
|
-
##
|
|
92
|
+
## Client Setup
|
|
83
93
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
workspace-linked here, so source changes can be tested before publishing.
|
|
94
|
+
In an application that consumes the released packages, install the TypeScript
|
|
95
|
+
client packages and the example runner dependencies:
|
|
87
96
|
|
|
88
|
-
|
|
89
|
-
|
|
97
|
+
```bash
|
|
98
|
+
npm install @cuylabs/agent-core @cuylabs/agent-physical @cuylabs/agent-physical-capx
|
|
99
|
+
npm install --save-dev @ai-sdk/openai dotenv tsx
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The released package already includes its built `dist/` files, so there is no
|
|
103
|
+
workspace build step in the normal install path.
|
|
104
|
+
|
|
105
|
+
If you are running the examples from a local `agents-ts` monorepo checkout
|
|
106
|
+
while changing package source, install workspace dependencies first:
|
|
90
107
|
|
|
91
108
|
```bash
|
|
92
109
|
cd /path/to/agents-ts
|
|
93
110
|
pnpm install
|
|
94
|
-
pnpm --filter @cuylabs/agent-physical-capx... build
|
|
95
111
|
```
|
|
96
112
|
|
|
97
113
|
Use the `pnpm` already available on your machine. If `pnpm` is missing and your
|
|
@@ -99,11 +115,8 @@ Node install includes Corepack, you can enable it with `corepack enable`; if
|
|
|
99
115
|
`corepack` is not available, install `pnpm` directly with your normal Node
|
|
100
116
|
package-manager setup.
|
|
101
117
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
`@cuylabs/agent-physical-capx` are built together.
|
|
105
|
-
|
|
106
|
-
Then configure the example environment:
|
|
118
|
+
For the checked-in examples, configure the local example environment from this
|
|
119
|
+
package directory:
|
|
107
120
|
|
|
108
121
|
```bash
|
|
109
122
|
cd packages/agent-physical-capx
|
|
@@ -129,60 +142,63 @@ gateway or hosted inference endpoint.
|
|
|
129
142
|
|
|
130
143
|
## Run Modes
|
|
131
144
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
`capx_run_policy_code
|
|
145
|
+
The examples default to observe-only mode. In that mode, the agent can inspect
|
|
146
|
+
the task, frame, runtime state, and policy-code context, but it cannot call
|
|
147
|
+
`capx_run_policy_code`.
|
|
148
|
+
|
|
149
|
+
### Observe Only
|
|
135
150
|
|
|
136
151
|
```bash
|
|
137
|
-
|
|
152
|
+
npx tsx examples/01-capx-runtime-solver.ts
|
|
138
153
|
```
|
|
139
154
|
|
|
140
|
-
|
|
141
|
-
|
|
155
|
+
Use this first to confirm that the runtime URL, model provider, session
|
|
156
|
+
creation, observation, and tool wiring are working.
|
|
157
|
+
|
|
158
|
+
### Single Policy Step
|
|
159
|
+
|
|
160
|
+
Allow the single-turn example to execute one Python Code-as-Policy action in
|
|
161
|
+
simulation.
|
|
142
162
|
|
|
143
163
|
```bash
|
|
144
164
|
CAPX_ALLOW_DESTRUCTIVE=1 \
|
|
145
|
-
|
|
165
|
+
npx tsx examples/01-capx-runtime-solver.ts
|
|
146
166
|
```
|
|
147
167
|
|
|
148
|
-
The startup line should
|
|
149
|
-
|
|
150
|
-
process.
|
|
168
|
+
The startup line should show `approval=policy-code-enabled`. If it still shows
|
|
169
|
+
`approval=observe-only`, the environment variable did not reach the Node
|
|
170
|
+
process. Use a single-line command to verify:
|
|
151
171
|
|
|
152
172
|
```bash
|
|
153
|
-
env CAPX_ALLOW_DESTRUCTIVE=1
|
|
173
|
+
env CAPX_ALLOW_DESTRUCTIVE=1 npx tsx examples/01-capx-runtime-solver.ts
|
|
154
174
|
```
|
|
155
175
|
|
|
156
|
-
|
|
157
|
-
`capx_run_policy_code` followed by an approval resolution. If the startup line
|
|
158
|
-
shows `approval=policy-code-enabled` but the tool result still says
|
|
159
|
-
`Approval denied for capx_run_policy_code`, rebuild and rerun the local
|
|
160
|
-
workspace; older example code used a hard default deny policy before the
|
|
161
|
-
example callback could approve the tool.
|
|
162
|
-
|
|
163
|
-
Allow execution and force video recording for that policy-code turn:
|
|
176
|
+
### Single Policy Step With Video
|
|
164
177
|
|
|
165
178
|
```bash
|
|
166
179
|
CAPX_ALLOW_DESTRUCTIVE=1 \
|
|
167
180
|
CAPX_POLICY_EXECUTION_RECORD_VIDEO=1 \
|
|
168
|
-
|
|
181
|
+
npx tsx examples/01-capx-runtime-solver.ts
|
|
169
182
|
```
|
|
170
183
|
|
|
171
|
-
|
|
184
|
+
### Multi-Turn Autosolve
|
|
185
|
+
|
|
186
|
+
Run the autosolver in observe-only mode.
|
|
172
187
|
|
|
173
188
|
```bash
|
|
174
|
-
CAPX_MAX_SOLVER_TURNS=6
|
|
189
|
+
CAPX_MAX_SOLVER_TURNS=6 npx tsx examples/02-capx-runtime-autosolve.ts
|
|
175
190
|
```
|
|
176
191
|
|
|
177
|
-
Allow
|
|
192
|
+
Allow policy-code execution across the autosolver loop.
|
|
178
193
|
|
|
179
194
|
```bash
|
|
180
195
|
CAPX_ALLOW_DESTRUCTIVE=1 \
|
|
181
196
|
CAPX_MAX_SOLVER_TURNS=6 \
|
|
182
|
-
|
|
197
|
+
npx tsx examples/02-capx-runtime-autosolve.ts
|
|
183
198
|
```
|
|
184
199
|
|
|
185
|
-
|
|
200
|
+
For the most complete demo, enable execution, video recording, one runtime
|
|
201
|
+
recovery reset, and stop-on-exit so the combined video artifact is flushed.
|
|
186
202
|
|
|
187
203
|
```bash
|
|
188
204
|
CAPX_ALLOW_DESTRUCTIVE=1 \
|
|
@@ -191,21 +207,14 @@ CAPX_MAX_SOLVER_TURNS=6 \
|
|
|
191
207
|
CAPX_RECOVER_ON_RUNTIME_ERROR=reset \
|
|
192
208
|
CAPX_MAX_RUNTIME_RESETS=1 \
|
|
193
209
|
CAPX_STOP_ON_EXIT=1 \
|
|
194
|
-
|
|
210
|
+
npx tsx examples/02-capx-runtime-autosolve.ts
|
|
195
211
|
```
|
|
196
212
|
|
|
197
|
-
|
|
198
|
-
`approval=policy-code-enabled` and `recordVideo=1`.
|
|
199
|
-
`CAPX_STOP_ON_EXIT=1` stops the runtime session at the end of the example so
|
|
200
|
-
`capx-agent-runtime` can flush the combined session video artifact. Stopped
|
|
201
|
-
sessions still keep their artifacts available through the console and HTTP API;
|
|
202
|
-
the session may remain listed there, but the live simulator environment has
|
|
203
|
-
been stopped. This does not shut down the top-level `capx-agent-runtime serve`
|
|
204
|
-
process.
|
|
213
|
+
## Expected Output
|
|
205
214
|
|
|
206
215
|
For the default Franka cube-stack config, a healthy run usually finishes after
|
|
207
|
-
one useful policy-code turn.
|
|
208
|
-
|
|
216
|
+
one useful policy-code turn. Exact sampled poses and artifact paths vary, but
|
|
217
|
+
the important terminal lines look like this:
|
|
209
218
|
|
|
210
219
|
```text
|
|
211
220
|
executionOk=true, taskCompleted=true, reward=1
|
|
@@ -224,47 +233,26 @@ Saved interaction video to .../video_session_combined.mp4
|
|
|
224
233
|
POST /sessions/<id>/stop ... 200 OK
|
|
225
234
|
```
|
|
226
235
|
|
|
227
|
-
The `video_..._turn_00.mp4` file is the per-policy-turn recording.
|
|
228
|
-
`video_session_combined.mp4`
|
|
229
|
-
|
|
230
|
-
the combined session video
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
When `CAPX_RECOVER_ON_RUNTIME_ERROR=reset` is set, the autosolver resets only
|
|
249
|
-
for runtime-level `env.step(...)` failures where CaP-X cannot return a normal
|
|
250
|
-
step result. It does not reset for ordinary policy-code `stderr`; those are
|
|
251
|
-
left to the next agent turn so the model can inspect the error and try a better
|
|
252
|
-
policy. Runtime resets use the next CaP-X trial/seed. The default reset budget
|
|
253
|
-
is one reset; set `CAPX_MAX_RUNTIME_RESETS` to change that. If
|
|
254
|
-
`CAPX_POLICY_EXECUTION_TRIAL` is unset, the first session uses trial `1`, the
|
|
255
|
-
first recovery reset uses trial `2`, and so on. `CAPX_STOP_ON_EXIT` is separate
|
|
256
|
-
from this recovery behavior: recovery reset happens during the solver loop,
|
|
257
|
-
while stop-on-exit runs once the example is done, fails, or exhausts its reset
|
|
258
|
-
budget.
|
|
259
|
-
|
|
260
|
-
If the reset budget is exhausted, or if you are running without automatic
|
|
261
|
-
recovery, do the cleanup first, then retry with a fresh session.
|
|
262
|
-
|
|
263
|
-
If you used `CAPX_STOP_ON_EXIT=1`, the example asks the server to stop the
|
|
264
|
-
runtime session before exiting and flushes the combined session video. You can
|
|
265
|
-
then rerun the example directly.
|
|
266
|
-
|
|
267
|
-
If the session is still running, find the runtime session id and stop it:
|
|
236
|
+
The `video_..._turn_00.mp4` file is the per-policy-turn recording.
|
|
237
|
+
`video_session_combined.mp4` is written when the session stops, so
|
|
238
|
+
`CAPX_STOP_ON_EXIT=1` is recommended for video examples. The runtime console
|
|
239
|
+
shows the combined session video first and links the per-turn videos as
|
|
240
|
+
individual artifact files.
|
|
241
|
+
|
|
242
|
+
## Recovery And Cleanup
|
|
243
|
+
|
|
244
|
+
The autosolver distinguishes ordinary policy-code failures from runtime-level
|
|
245
|
+
CaP-X failures.
|
|
246
|
+
|
|
247
|
+
| Case | What Happens |
|
|
248
|
+
| ------------------------------------------------------------------- | ------------------------------------------------------------------------------ |
|
|
249
|
+
| Python policy returns stderr | The next agent turn can inspect the error and write better code. |
|
|
250
|
+
| Observation or depth pipeline fails before `env.step(code)` returns | The autosolver stops or uses `CAPX_RECOVER_ON_RUNTIME_ERROR=reset` if enabled. |
|
|
251
|
+
| Recovery reset is enabled | The session resets to the next trial/seed. The default reset budget is `1`. |
|
|
252
|
+
| `CAPX_STOP_ON_EXIT=1` is set | The example stops the runtime session at exit and flushes the combined video. |
|
|
253
|
+
|
|
254
|
+
If the reset budget is exhausted, clean up first and retry with a fresh
|
|
255
|
+
session. When a session is still running, find its id and stop it:
|
|
268
256
|
|
|
269
257
|
```bash
|
|
270
258
|
curl -sS http://127.0.0.1:8210/sessions
|
|
@@ -281,25 +269,13 @@ curl -X POST \
|
|
|
281
269
|
http://127.0.0.1:8210/sessions/<session-id>/reset
|
|
282
270
|
```
|
|
283
271
|
|
|
284
|
-
Then run the autosolver again:
|
|
285
|
-
|
|
286
|
-
```bash
|
|
287
|
-
CAPX_ALLOW_DESTRUCTIVE=1 \
|
|
288
|
-
CAPX_POLICY_EXECUTION_RECORD_VIDEO=1 \
|
|
289
|
-
CAPX_MAX_SOLVER_TURNS=6 \
|
|
290
|
-
CAPX_RECOVER_ON_RUNTIME_ERROR=reset \
|
|
291
|
-
CAPX_MAX_RUNTIME_RESETS=1 \
|
|
292
|
-
CAPX_STOP_ON_EXIT=1 \
|
|
293
|
-
pnpm exec tsx examples/02-capx-runtime-autosolve.ts
|
|
294
|
-
```
|
|
295
|
-
|
|
296
272
|
If the depth assertion repeats immediately on a clean session, restart the
|
|
297
273
|
`capx-agent-runtime serve` process too. That recreates the Python environment
|
|
298
274
|
and the child API services instead of reusing the same process state.
|
|
299
275
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
config
|
|
276
|
+
To isolate the TypeScript adapter and `agent-core` loop from the vision/depth
|
|
277
|
+
stack, start `capx-agent-runtime` with a privileged cube-stack config when that
|
|
278
|
+
config is available:
|
|
303
279
|
|
|
304
280
|
```bash
|
|
305
281
|
uv run --no-sync --active capx-agent-runtime serve \
|
|
@@ -309,79 +285,59 @@ uv run --no-sync --active capx-agent-runtime serve \
|
|
|
309
285
|
--port 8210
|
|
310
286
|
```
|
|
311
287
|
|
|
312
|
-
That path avoids some vision-derived object-pose calls and is useful
|
|
313
|
-
|
|
314
|
-
loop
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
`
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
`
|
|
324
|
-
|
|
325
|
-
`
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
288
|
+
That path avoids some vision-derived object-pose calls and is useful when you
|
|
289
|
+
want to validate HTTP tools, approvals, artifacts, videos, and the external
|
|
290
|
+
agent loop before debugging the Robosuite camera/depth pipeline.
|
|
291
|
+
|
|
292
|
+
## Environment Variables
|
|
293
|
+
|
|
294
|
+
| Variable | Purpose |
|
|
295
|
+
| ---------------------------------------- | --------------------------------------------------------------------------------------------- |
|
|
296
|
+
| `OPENAI_API_KEY` | Configures the `agent-core` model provider. |
|
|
297
|
+
| `OPENAI_MODEL` | Model id. Defaults to `gpt-4o-mini` in `examples/_setup.ts`. |
|
|
298
|
+
| `OPENAI_BASE_URL` | Optional OpenAI-compatible provider endpoint. |
|
|
299
|
+
| `CAPX_RUNTIME_SERVER_URL` | URL for the running `capx-agent-runtime` service. |
|
|
300
|
+
| `CAPX_ALLOW_DESTRUCTIVE=1` | Lets the example approval policy execute `capx_run_policy_code`. |
|
|
301
|
+
| `CAPX_ALLOW_HARDWARE_POLICY_EXECUTION=1` | Extra gate required before policy execution against hardware configs. |
|
|
302
|
+
| `CAPX_MAX_SOLVER_TURNS` | Outer loop limit for `02-capx-runtime-autosolve.ts`. |
|
|
303
|
+
| `CAPX_RECOVER_ON_RUNTIME_ERROR=reset` | Reset the live runtime session after runtime-level observation/depth failures. |
|
|
304
|
+
| `CAPX_MAX_RUNTIME_RESETS` | Recovery reset budget. Defaults to `1` when recovery is enabled. |
|
|
305
|
+
| `CAPX_POLICY_EXECUTION_RECORD_VIDEO` | Optional `1` or `0` override for the selected YAML's video setting. |
|
|
306
|
+
| `CAPX_STOP_ON_EXIT=1` | Stop the runtime session when the example exits and flush combined video artifacts. |
|
|
307
|
+
| `CAPX_SESSION_OUTPUT_DIR` | Privileged per-session output override. Leave unset for normal server-owned paths. |
|
|
308
|
+
| `CAPX_SESSION_SKILL_LIBRARY_PATH` | Privileged per-session skill-library override. Leave unset unless path overrides are enabled. |
|
|
309
|
+
| `CAPX_TOOL_RESULT_MAX_CHARS` | Increase printed tool-result previews while debugging. |
|
|
310
|
+
|
|
311
|
+
By default, each example run uses the runtime server's configured output
|
|
312
|
+
directory and skill-library path. Set `CAPX_SESSION_OUTPUT_DIR` or
|
|
313
|
+
`CAPX_SESSION_SKILL_LIBRARY_PATH` only when the runtime server was started with
|
|
314
|
+
`--allow-client-path-overrides` and allowed roots for those paths.
|
|
315
|
+
|
|
316
|
+
## Runtime Contract
|
|
317
|
+
|
|
318
|
+
The examples always use the live runtime path: `mode: "runtime"`,
|
|
319
|
+
`startSession: true`, `enablePolicyCodeExecution: true`, and
|
|
320
|
+
`policyExecutionMode: "live-runtime"`.
|
|
321
|
+
|
|
322
|
+
The adapter does not accept `repoPath` or `configPath`, and it omits
|
|
323
|
+
`outputDir` and `skillLibraryPath` by default. Those path choices belong to the
|
|
324
|
+
runtime server startup command. That keeps the architecture clean: the Python
|
|
325
|
+
runtime service owns the CaP-X repo/config/output/simulator setup, and
|
|
326
|
+
`agent-core` owns the external agent loop.
|
|
329
327
|
|
|
330
328
|
The adapter defaults to `toolExecutionMode: "plan"`. In `agent-core`, "plan"
|
|
331
|
-
means framework-owned tool dispatch, not "only
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
results before the next model step. The console output lines such as
|
|
335
|
-
`capx_status({ ... })` and `capx_run_policy_code({ ... })` are the visible
|
|
336
|
-
planned tool calls.
|
|
337
|
-
|
|
338
|
-
Both examples use `agent-core`'s `createEventPrinter` to render progress:
|
|
339
|
-
steps, tool calls, tool results, approval events, text output, and completion.
|
|
340
|
-
For CaP-X, those logs are the easiest way to see the external agent loop:
|
|
341
|
-
status, observe, optional policy-code execution, observe again, then final
|
|
342
|
-
summary. Set `CAPX_TOOL_RESULT_MAX_CHARS` if you want the terminal to print
|
|
343
|
-
longer tool-result previews while debugging.
|
|
344
|
-
|
|
345
|
-
## Environment Model
|
|
346
|
-
|
|
347
|
-
`OPENAI_API_KEY` configures the `agent-core` model provider. `OPENAI_MODEL`
|
|
348
|
-
defaults to `gpt-4o-mini` through `examples/_setup.ts` if you omit it.
|
|
349
|
-
`OPENAI_BASE_URL` is only needed for non-default OpenAI-compatible endpoints.
|
|
350
|
-
|
|
351
|
-
`CAPX_RUNTIME_SERVER_URL` points to the `capx-agent-runtime` service. When
|
|
352
|
-
the example creates a session, it lets the server's startup arguments define
|
|
353
|
-
the CaP-X repo, YAML config, output directory, and simulator context. This
|
|
354
|
-
matches the workstation setup command above.
|
|
355
|
-
|
|
356
|
-
`CAPX_ALLOW_DESTRUCTIVE=1` lets the example approval policy allow
|
|
357
|
-
`capx_run_policy_code`. Without it, the agent can observe and propose code but
|
|
358
|
-
will not execute policy code.
|
|
359
|
-
|
|
360
|
-
`CAPX_MAX_SOLVER_TURNS` controls the outer loop in
|
|
361
|
-
`02-capx-runtime-autosolve.ts`. The same `agent-core` session id is reused for
|
|
362
|
-
each turn so the agent keeps conversation and tool history.
|
|
363
|
-
|
|
364
|
-
`CAPX_RECOVER_ON_RUNTIME_ERROR=reset` lets
|
|
365
|
-
`02-capx-runtime-autosolve.ts` reset the live CaP-X runtime session to the next
|
|
366
|
-
trial/seed and continue when CaP-X reports an observation/depth failure. This
|
|
367
|
-
is session-level recovery for failures where `env.step(code)` cannot return a
|
|
368
|
-
normal multi-turn result. `CAPX_MAX_RUNTIME_RESETS` controls the reset budget
|
|
369
|
-
and defaults to `1` when recovery is enabled.
|
|
370
|
-
|
|
371
|
-
`CAPX_POLICY_EXECUTION_RECORD_VIDEO` is optional. Leave it unset to use the
|
|
372
|
-
selected CaP-X YAML's `record_video` setting. Set it to `1` or `0` only when
|
|
373
|
-
you want the TypeScript example to override the runtime server/YAML value.
|
|
374
|
-
|
|
375
|
-
## Prompt Context
|
|
329
|
+
means framework-owned tool dispatch, not "only write a textual plan." The model
|
|
330
|
+
can still emit tool calls; `agent-core` applies approval and scheduling policy,
|
|
331
|
+
executes approved tools, then records tool results before the next model step.
|
|
376
332
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
`agent-core` agent.
|
|
333
|
+
Both examples use `agent-core`'s `createEventPrinter` to render steps, tool
|
|
334
|
+
calls, tool results, approval events, text output, and completion. For CaP-X,
|
|
335
|
+
those logs are the easiest way to see the external agent loop: status, observe,
|
|
336
|
+
optional policy-code execution, observe again, then final summary.
|
|
382
337
|
|
|
383
|
-
|
|
338
|
+
This package does not copy CaP-X prompt templates into TypeScript. In runtime
|
|
339
|
+
mode, `capx-agent-runtime` loads the selected CaP-X YAML config and trial.
|
|
340
|
+
`capx_observe` returns the CaP-X task prompt, full prompt, observations, API
|
|
341
|
+
descriptions, rendered frame when available, and last-step result. The external
|
|
342
|
+
agent reads that CaP-X-provided context and acts by calling
|
|
384
343
|
`capx_run_policy_code`.
|
|
385
|
-
|
|
386
|
-
This is the clean bring-your-own-agent reference: start the runtime service
|
|
387
|
-
first, then connect an external `agent-core` agent to it.
|