@cuylabs/agent-physical-capx 5.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +161 -0
- package/dist/agent.d.ts +57 -0
- package/dist/agent.js +14 -0
- package/dist/agent.js.map +1 -0
- package/dist/chunk-C53NNB7T.js +107 -0
- package/dist/chunk-C53NNB7T.js.map +1 -0
- package/dist/chunk-MYO63CWO.js +869 -0
- package/dist/chunk-MYO63CWO.js.map +1 -0
- package/dist/index.d.ts +35 -0
- package/dist/index.js +92 -0
- package/dist/index.js.map +1 -0
- package/dist/session-BxaROlXW.d.ts +202 -0
- package/dist/session.d.ts +3 -0
- package/dist/session.js +11 -0
- package/dist/session.js.map +1 -0
- package/docs/README.md +16 -0
- package/docs/agent-core-integration.md +73 -0
- package/docs/how-it-works.md +151 -0
- package/docs/limitations.md +25 -0
- package/examples/.env.example +36 -0
- package/examples/01-capx-runtime-solver.ts +162 -0
- package/examples/02-capx-runtime-autosolve.ts +307 -0
- package/examples/README.md +387 -0
- package/examples/_setup.ts +61 -0
- package/package.json +76 -0
- package/skills/capx-code-as-policy/SKILL.md +22 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# How The CaP-X Adapter Works
|
|
2
|
+
|
|
3
|
+
`CapxSession` implements the `PhysicalSession` contract from
|
|
4
|
+
`@cuylabs/agent-physical`. CaP-X itself remains the Python robotics
|
|
5
|
+
Code-as-Policy framework.
|
|
6
|
+
|
|
7
|
+
The long-term runtime path is service-first:
|
|
8
|
+
|
|
9
|
+
```text
|
|
10
|
+
GPU workstation
|
|
11
|
+
capx-agent-runtime serve --repo-path ... --config-path ...
|
|
12
|
+
-> loads CaP-X YAML config
|
|
13
|
+
-> starts configured CaP-X API servers
|
|
14
|
+
-> creates one CaP-X environment
|
|
15
|
+
-> exposes HTTP endpoints
|
|
16
|
+
|
|
17
|
+
TypeScript client
|
|
18
|
+
CapxSession.start()
|
|
19
|
+
-> GET /health
|
|
20
|
+
-> POST /sessions
|
|
21
|
+
-> agent-core drives observe/execute-code against that same session
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
The TypeScript package does not launch `capx-agent-runtime` and does not know
|
|
25
|
+
the CaP-X checkout or YAML path. Those values belong to the Python service
|
|
26
|
+
command because the Python environment is where simulator packages, CUDA
|
|
27
|
+
packages, assets, and CaP-X internals exist.
|
|
28
|
+
|
|
29
|
+
## Tool Mapping
|
|
30
|
+
|
|
31
|
+
`createCapxPhysicalTools(session)` delegates to the generic tools from
|
|
32
|
+
`@cuylabs/agent-physical` with a `capx_` prefix.
|
|
33
|
+
|
|
34
|
+
```text
|
|
35
|
+
capx_status -> session.getState()
|
|
36
|
+
capx_observe -> session.observe()
|
|
37
|
+
capx_artifacts -> session.listArtifacts()
|
|
38
|
+
capx_stop -> session.stop()
|
|
39
|
+
capx_run_policy_code -> POST /sessions/{id}/execute-code
|
|
40
|
+
capx_turn_history -> GET /sessions/{id}/turns
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
`createCapxAgent(options)` is the higher-level convenience wrapper. It creates
|
|
44
|
+
or accepts a `CapxSession`, optionally starts it, generates the `capx_*` tools,
|
|
45
|
+
and passes those tools to `createAgent()` from `@cuylabs/agent-core`.
|
|
46
|
+
|
|
47
|
+
A `CapxSession` is deliberately stateful. Once it starts, every `capx_observe`,
|
|
48
|
+
`capx_run_policy_code`, `capx_turn_history`, and `capx_artifacts` call targets
|
|
49
|
+
the same runtime session until the host resets or stops it. The model-facing
|
|
50
|
+
tool surface does not include a separate "start a new trial" command, because
|
|
51
|
+
session creation and reset policy belong to the host application and
|
|
52
|
+
`capx-agent-runtime`.
|
|
53
|
+
|
|
54
|
+
## Prompt And Skill Flow
|
|
55
|
+
|
|
56
|
+
The adapter does not vendor CaP-X prompts into TypeScript. On reset and observe,
|
|
57
|
+
`capx-agent-runtime` returns the CaP-X task prompt, full prompt, observations,
|
|
58
|
+
policy-code context, and last-step result. The adapter exposes those through
|
|
59
|
+
`capx_observe` so the `agent-core` model can write the next policy-code step.
|
|
60
|
+
|
|
61
|
+
When `capx_observe` is called with `includeImages=true`, the adapter also asks
|
|
62
|
+
the runtime for a rendered frame. The tool result keeps a text summary for CLI
|
|
63
|
+
logs and session history, and it provides a richer model-facing payload through
|
|
64
|
+
`agent-core` metadata: text observations plus image file data. That lets
|
|
65
|
+
image-capable models inspect the frame on the following reasoning step while
|
|
66
|
+
text-only logs remain readable. If the selected model or provider does not
|
|
67
|
+
support image inputs, use the textual prompt/observation data or the MCP path
|
|
68
|
+
described below.
|
|
69
|
+
|
|
70
|
+
CaP-X Python skills stay in the runtime. Runtime observations can include
|
|
71
|
+
skill-library summaries and typed policy-code affordances, while programmatic
|
|
72
|
+
clients can still call the runtime skill-library endpoints deliberately when
|
|
73
|
+
they need to inspect source, extract reusable helpers, or inject promoted
|
|
74
|
+
helpers with host approval.
|
|
75
|
+
|
|
76
|
+
## CaP-X Configs And Multi-Turn
|
|
77
|
+
|
|
78
|
+
The CaP-X YAML chosen by `capx-agent-runtime serve --config-path ...` selects
|
|
79
|
+
the simulator task, helper API surface, perception/API servers, output
|
|
80
|
+
directory, recording behavior, and prompt text. The TypeScript adapter does not
|
|
81
|
+
choose or reload that YAML; it connects to the already-started runtime service.
|
|
82
|
+
|
|
83
|
+
The external `agent-core` loop can run multiple turns against any compatible
|
|
84
|
+
CaP-X config because each `capx_run_policy_code` call executes one Python
|
|
85
|
+
program in the same live environment. The config does not need to be named
|
|
86
|
+
`multiturn` for the external loop to continue observing and executing.
|
|
87
|
+
|
|
88
|
+
CaP-X's `*_multiturn*.yaml` files are still useful because they often add
|
|
89
|
+
task-specific continuation guidance to the prompt and define
|
|
90
|
+
`multi_turn_prompt`. `capx-agent-runtime` exposes that prompt through
|
|
91
|
+
`capx_observe` so the external agent can use it as guidance.
|
|
92
|
+
|
|
93
|
+
CaP-X's `*_vf.yaml` and `*_vdm.yaml` configs express visual-feedback and
|
|
94
|
+
visual-differencing behavior. In the bring-your-own-agent path, those flags are
|
|
95
|
+
surfaced as runtime config metadata. The external agent should call
|
|
96
|
+
`capx_observe` with `includeImages=true` and perform the visual comparison in
|
|
97
|
+
the host harness/model.
|
|
98
|
+
|
|
99
|
+
Reduced-API and skill-library configs also work when the selected CaP-X
|
|
100
|
+
environment imports successfully. They change which Python helper functions
|
|
101
|
+
CaP-X binds into the policy-code namespace. The agent should rely on the
|
|
102
|
+
observed CaP-X full prompt and `codeContext`, not hard-coded TypeScript copies
|
|
103
|
+
of those helpers.
|
|
104
|
+
|
|
105
|
+
## Artifact Mapping
|
|
106
|
+
|
|
107
|
+
CaP-X writes generated code, logs, prompts, images, videos, and summaries under
|
|
108
|
+
its output directory. `capx-agent-runtime` lists those artifacts over HTTP and
|
|
109
|
+
the adapter maps them into `PhysicalArtifact` records.
|
|
110
|
+
|
|
111
|
+
In a healthy recorded cube-stack run, the first successful policy-code turn
|
|
112
|
+
returns `success=true`, `taskCompleted=true`, `reward=1`, `terminated=true`,
|
|
113
|
+
and `sandboxRc=0`. The runtime artifact list includes the submitted `code.py`,
|
|
114
|
+
CaP-X response JSON, summary/log files, rendered images, one per-turn MP4 such
|
|
115
|
+
as `video_1.000_turn_00.mp4`, and, after `capx_stop` or
|
|
116
|
+
`CAPX_STOP_ON_EXIT=1`, a `video_session_combined.mp4` artifact.
|
|
117
|
+
|
|
118
|
+
Short per-turn videos are normal when a policy step fails quickly. The combined
|
|
119
|
+
session video is the best artifact to inspect after a full run because it is
|
|
120
|
+
written once the live runtime session stops.
|
|
121
|
+
|
|
122
|
+
## Execution Diagnostics
|
|
123
|
+
|
|
124
|
+
Policy-code failures and runtime observation failures are different.
|
|
125
|
+
|
|
126
|
+
When CaP-X executes user code and returns a normal failed step, the adapter
|
|
127
|
+
surfaces `stderr`, `sandboxRc`, task-completion state, reward, and compact
|
|
128
|
+
diagnostics such as `failurePhase=policy_execution`,
|
|
129
|
+
`perceptionApiFailure=true`, or `emptyPointCloud=true`. The agent loop can
|
|
130
|
+
inspect that result and try another policy step.
|
|
131
|
+
|
|
132
|
+
When `env.step(code)` itself raises while collecting the post-policy
|
|
133
|
+
observation, the runtime marks the step as truncated and reports diagnostics
|
|
134
|
+
such as `failurePhase=post_policy_observation`, `observationPipeline=true`, and
|
|
135
|
+
`depthAssertion=true`. The autosolve example can reset the runtime to the next
|
|
136
|
+
trial/seed for that class of failure when
|
|
137
|
+
`CAPX_RECOVER_ON_RUNTIME_ERROR=reset` is enabled.
|
|
138
|
+
|
|
139
|
+
## Direct Adapter Versus MCP Images
|
|
140
|
+
|
|
141
|
+
The TypeScript adapter uses `capx_observe(includeImages=true)` for the common
|
|
142
|
+
agent loop. It converts the rendered frame into `agent-core` multimodal
|
|
143
|
+
tool-result metadata so the next model step can receive both text and image
|
|
144
|
+
content.
|
|
145
|
+
|
|
146
|
+
The MCP wrapper uses MCP-native content semantics instead. `capx_render_frame`
|
|
147
|
+
returns text plus MCP `ImageContent`, and `capx_read_artifact` returns
|
|
148
|
+
`ImageContent` for image artifacts. `agent-core`'s MCP client maps those MCP
|
|
149
|
+
image parts to model file-data parts. In other words, direct adapter and MCP
|
|
150
|
+
integrations both expose images, but they use the natural content contract for
|
|
151
|
+
their transport.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Current Limitations
|
|
2
|
+
|
|
3
|
+
This package is an `agents-ts` adapter for `capx-agent-runtime`, not a full
|
|
4
|
+
CaP-X control plane.
|
|
5
|
+
|
|
6
|
+
## Runtime Service Required
|
|
7
|
+
|
|
8
|
+
The TypeScript adapter requires an already-running `capx-agent-runtime` service.
|
|
9
|
+
It does not launch CaP-X, does not launch the runtime service, and does not know
|
|
10
|
+
the CaP-X checkout or YAML config. Start the Python service in the CaP-X
|
|
11
|
+
environment first, then pass `runtimeServerUrl`.
|
|
12
|
+
|
|
13
|
+
## Runtime-Service Boundary
|
|
14
|
+
|
|
15
|
+
The runtime path creates and controls sessions through `capx-agent-runtime`.
|
|
16
|
+
The TypeScript package does not attach to arbitrary existing CaP-X processes or
|
|
17
|
+
browser UI state. Keep all CaP-X process integration behind the Python runtime
|
|
18
|
+
service so the TypeScript tool surface stays stable.
|
|
19
|
+
|
|
20
|
+
## Hardware Safety Remains External
|
|
21
|
+
|
|
22
|
+
For real robot configs, CaP-X and the robot-side hardware stack still own
|
|
23
|
+
calibration, low-level realtime control, and hardware safety systems. This
|
|
24
|
+
package only provides an `agents-ts` integration boundary and approval-gated
|
|
25
|
+
policy-code tools.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Required agent model config. `_setup.ts` loads this file automatically when
|
|
2
|
+
# either example runs.
|
|
3
|
+
OPENAI_API_KEY=
|
|
4
|
+
OPENAI_MODEL=gpt-4o-mini
|
|
5
|
+
# Optional: set only for OpenAI-compatible providers that are not the default
|
|
6
|
+
# OpenAI endpoint.
|
|
7
|
+
OPENAI_BASE_URL=
|
|
8
|
+
|
|
9
|
+
# Required runtime service URL. Start capx-agent-runtime first, then point this
|
|
10
|
+
# example at the service or SSH tunnel URL. The TypeScript examples do not
|
|
11
|
+
# launch CaP-X or CaP-X Agent0.
|
|
12
|
+
CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210
|
|
13
|
+
|
|
14
|
+
# Safety switches for the example host. Set CAPX_ALLOW_DESTRUCTIVE=1 to allow
|
|
15
|
+
# capx_run_policy_code; otherwise the examples can observe and propose code but
|
|
16
|
+
# execution is denied.
|
|
17
|
+
CAPX_PHYSICAL_MODE=simulation
|
|
18
|
+
CAPX_ALLOW_DESTRUCTIVE=0
|
|
19
|
+
CAPX_ALLOW_HARDWARE_POLICY_EXECUTION=0
|
|
20
|
+
|
|
21
|
+
# Optional runtime/client tuning.
|
|
22
|
+
CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS=120000
|
|
23
|
+
CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS=1000000
|
|
24
|
+
CAPX_POLICY_EXECUTION_TIMEOUT_MS=1000000
|
|
25
|
+
CAPX_POLICY_EXECUTION_TRIAL=
|
|
26
|
+
# Leave blank to use the CaP-X YAML record_video setting. Set 1 or 0 only to
|
|
27
|
+
# override the server/YAML value for this example run.
|
|
28
|
+
CAPX_POLICY_EXECUTION_RECORD_VIDEO=
|
|
29
|
+
CAPX_STOP_ON_EXIT=0
|
|
30
|
+
CAPX_MAX_SOLVER_TURNS=6
|
|
31
|
+
|
|
32
|
+
# Optional runtime session overrides. Normally the runtime server startup
|
|
33
|
+
# command owns output paths and skill library paths.
|
|
34
|
+
CAPX_OUTPUT_DIR=
|
|
35
|
+
CAPX_SKILL_LIBRARY_PATH=
|
|
36
|
+
CAPX_AGENT_PROMPT=
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 01 - CaP-X runtime solver
|
|
3
|
+
*
|
|
4
|
+
* This is the default bring-your-own-agent flow:
|
|
5
|
+
* 1. Start capx-agent-runtime on the GPU workstation.
|
|
6
|
+
* 2. Point CAPX_RUNTIME_SERVER_URL at that service or SSH tunnel.
|
|
7
|
+
* 3. Run this TypeScript agent locally.
|
|
8
|
+
*
|
|
9
|
+
* The runtime service owns the CaP-X checkout, YAML config, simulator, and API
|
|
10
|
+
* servers. This agent-core example owns the reasoning loop: observe, inspect
|
|
11
|
+
* task context, write policy code, execute one step, observe again, and report.
|
|
12
|
+
*
|
|
13
|
+
* Run:
|
|
14
|
+
* pnpm --filter @cuylabs/agent-physical-capx build
|
|
15
|
+
* npx tsx examples/01-capx-runtime-solver.ts
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { createCapxAgent } from "@cuylabs/agent-physical-capx";
|
|
19
|
+
import { createEventPrinter } from "@cuylabs/agent-core";
|
|
20
|
+
import { exampleOpenAIModel } from "./_setup.js";
|
|
21
|
+
|
|
22
|
+
function optionalString(value: string | undefined): string | undefined {
|
|
23
|
+
const trimmed = value?.trim();
|
|
24
|
+
return trimmed ? trimmed : undefined;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function optionalBoolean(value: string | undefined): boolean | undefined {
|
|
28
|
+
if (value === undefined || value.trim() === "") {
|
|
29
|
+
return undefined;
|
|
30
|
+
}
|
|
31
|
+
return value === "1" || value.toLowerCase() === "true";
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function optionalNumber(value: string | undefined): number | undefined {
|
|
35
|
+
if (!value) {
|
|
36
|
+
return undefined;
|
|
37
|
+
}
|
|
38
|
+
const parsed = Number(value);
|
|
39
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const runtimeServerUrl = optionalString(process.env.CAPX_RUNTIME_SERVER_URL);
|
|
43
|
+
|
|
44
|
+
if (!runtimeServerUrl) {
|
|
45
|
+
throw new Error(
|
|
46
|
+
[
|
|
47
|
+
"CAPX_RUNTIME_SERVER_URL is required.",
|
|
48
|
+
"Start capx-agent-runtime on the CaP-X workstation, tunnel the port if needed,",
|
|
49
|
+
"then set CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210.",
|
|
50
|
+
].join(" "),
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const allowDestructive = process.env.CAPX_ALLOW_DESTRUCTIVE === "1";
|
|
55
|
+
const toolExecutionMode = "plan" as const;
|
|
56
|
+
const runId = Date.now();
|
|
57
|
+
const sessionId =
|
|
58
|
+
optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
|
|
59
|
+
`capx-runtime-solver-${runId}`;
|
|
60
|
+
const outputDir =
|
|
61
|
+
optionalString(process.env.CAPX_OUTPUT_DIR) ??
|
|
62
|
+
`outputs/capx-agent-runtime/${sessionId}`;
|
|
63
|
+
const recordVideo =
|
|
64
|
+
optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
|
|
65
|
+
"runtime-default";
|
|
66
|
+
|
|
67
|
+
function approveExampleTool(tool: string): "allow" | "deny" {
|
|
68
|
+
if (tool === "skill") {
|
|
69
|
+
return "allow";
|
|
70
|
+
}
|
|
71
|
+
return allowDestructive ? "allow" : "deny";
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const userPrompt =
|
|
75
|
+
optionalString(process.env.CAPX_AGENT_PROMPT) ??
|
|
76
|
+
[
|
|
77
|
+
"You are the external agent solving one CaP-X runtime simulation.",
|
|
78
|
+
"Check capx_status first, then call capx_observe with includeImages=true.",
|
|
79
|
+
"Use the CaP-X task prompt, full prompt, observations, API context, skill library, and turn history as the source of truth.",
|
|
80
|
+
"Propose one concise Python Code-as-Policy action toward the task.",
|
|
81
|
+
"If capx_run_policy_code is available and approval allows it, execute the action, observe again, inspect reward/stdout/stderr/task completion, and summarize the result.",
|
|
82
|
+
"If skill extraction or injection tools are available, only use them after useful successful code and with approval.",
|
|
83
|
+
"If execution is denied, explain the exact policy code you would run and why.",
|
|
84
|
+
].join(" ");
|
|
85
|
+
|
|
86
|
+
console.log(
|
|
87
|
+
[
|
|
88
|
+
"CaP-X agent mode=runtime",
|
|
89
|
+
"startSession=true",
|
|
90
|
+
"policyExecution=live-runtime",
|
|
91
|
+
`toolDispatch=${toolExecutionMode}`,
|
|
92
|
+
`approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
|
|
93
|
+
`recordVideo=${recordVideo}`,
|
|
94
|
+
`agentSessionId=${sessionId}`,
|
|
95
|
+
`outputDir=${outputDir}`,
|
|
96
|
+
`runtimeServerUrl=${runtimeServerUrl}`,
|
|
97
|
+
]
|
|
98
|
+
.join(" "),
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
const { agent, session } = await createCapxAgent({
|
|
102
|
+
model: exampleOpenAIModel(),
|
|
103
|
+
startSession: true,
|
|
104
|
+
toolExecutionMode,
|
|
105
|
+
sessionOptions: {
|
|
106
|
+
mode: "runtime",
|
|
107
|
+
physicalMode:
|
|
108
|
+
process.env.CAPX_PHYSICAL_MODE === "hardware" ? "hardware" : "simulation",
|
|
109
|
+
|
|
110
|
+
// Normal service-first path: connect to an already-running runtime server
|
|
111
|
+
// and let that server's --config-path/--repo-path defaults define the
|
|
112
|
+
// simulation.
|
|
113
|
+
runtimeServerUrl,
|
|
114
|
+
runtimeServerStartupTimeoutMs: optionalNumber(
|
|
115
|
+
process.env.CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS,
|
|
116
|
+
),
|
|
117
|
+
runtimeServerRequestTimeoutMs: optionalNumber(
|
|
118
|
+
process.env.CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS,
|
|
119
|
+
),
|
|
120
|
+
|
|
121
|
+
enablePolicyCodeExecution: true,
|
|
122
|
+
policyExecutionMode: "live-runtime",
|
|
123
|
+
allowHardwarePolicyExecution:
|
|
124
|
+
process.env.CAPX_ALLOW_HARDWARE_POLICY_EXECUTION === "1",
|
|
125
|
+
policyExecutionTimeoutMs: optionalNumber(
|
|
126
|
+
process.env.CAPX_POLICY_EXECUTION_TIMEOUT_MS,
|
|
127
|
+
),
|
|
128
|
+
policyExecutionTrial: optionalNumber(process.env.CAPX_POLICY_EXECUTION_TRIAL),
|
|
129
|
+
policyExecutionRecordVideo: optionalBoolean(
|
|
130
|
+
process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
|
|
131
|
+
),
|
|
132
|
+
|
|
133
|
+
outputDir,
|
|
134
|
+
skillLibraryPath: optionalString(process.env.CAPX_SKILL_LIBRARY_PATH),
|
|
135
|
+
},
|
|
136
|
+
approval: {
|
|
137
|
+
defaultAction: "ask",
|
|
138
|
+
onRequest: async (request) => {
|
|
139
|
+
console.log(
|
|
140
|
+
`approval requested: ${request.tool} risk=${request.risk} description=${request.description}`,
|
|
141
|
+
);
|
|
142
|
+
return approveExampleTool(request.tool);
|
|
143
|
+
},
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
try {
|
|
148
|
+
const printEvent = createEventPrinter({
|
|
149
|
+
steps: true,
|
|
150
|
+
completion: true,
|
|
151
|
+
toolResultMaxChars:
|
|
152
|
+
optionalNumber(process.env.CAPX_TOOL_RESULT_MAX_CHARS) ?? 2_000,
|
|
153
|
+
});
|
|
154
|
+
for await (const event of agent.chat(sessionId, userPrompt)) {
|
|
155
|
+
printEvent(event);
|
|
156
|
+
}
|
|
157
|
+
} finally {
|
|
158
|
+
await agent.close();
|
|
159
|
+
if (process.env.CAPX_STOP_ON_EXIT === "1") {
|
|
160
|
+
await session.stop("example exit");
|
|
161
|
+
}
|
|
162
|
+
}
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 02 - CaP-X runtime autosolve loop
|
|
3
|
+
*
|
|
4
|
+
* This example keeps the same agent-core session open across multiple user
|
|
5
|
+
* turns. Each turn lets the agent observe, write policy code, execute it when
|
|
6
|
+
* approval allows, and inspect the new result. The outer loop stops when CaP-X
|
|
7
|
+
* reports task completion or when CAPX_MAX_SOLVER_TURNS is reached.
|
|
8
|
+
*
|
|
9
|
+
* Run:
|
|
10
|
+
* pnpm --filter @cuylabs/agent-physical-capx build
|
|
11
|
+
* npx tsx examples/02-capx-runtime-autosolve.ts
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { createEventPrinter } from "@cuylabs/agent-core";
|
|
15
|
+
import { createCapxAgent } from "@cuylabs/agent-physical-capx";
|
|
16
|
+
import type { PhysicalObservation } from "@cuylabs/agent-physical";
|
|
17
|
+
import { exampleOpenAIModel } from "./_setup.js";
|
|
18
|
+
|
|
19
|
+
function optionalString(value: string | undefined): string | undefined {
|
|
20
|
+
const trimmed = value?.trim();
|
|
21
|
+
return trimmed ? trimmed : undefined;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function optionalBoolean(value: string | undefined): boolean | undefined {
|
|
25
|
+
if (value === undefined || value.trim() === "") {
|
|
26
|
+
return undefined;
|
|
27
|
+
}
|
|
28
|
+
return value === "1" || value.toLowerCase() === "true";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function optionalNumber(value: string | undefined): number | undefined {
|
|
32
|
+
if (!value) {
|
|
33
|
+
return undefined;
|
|
34
|
+
}
|
|
35
|
+
const parsed = Number(value);
|
|
36
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
interface LastRuntimeStep {
|
|
40
|
+
success?: boolean;
|
|
41
|
+
taskCompleted?: boolean | null;
|
|
42
|
+
task_completed?: boolean | null;
|
|
43
|
+
terminated?: boolean;
|
|
44
|
+
truncated?: boolean;
|
|
45
|
+
reward?: number | null;
|
|
46
|
+
sandboxRc?: number;
|
|
47
|
+
sandbox_rc?: number;
|
|
48
|
+
stderr?: string;
|
|
49
|
+
error?: string | null;
|
|
50
|
+
diagnostics?: {
|
|
51
|
+
failurePhase?: string;
|
|
52
|
+
observationPipeline?: boolean;
|
|
53
|
+
depthAssertion?: boolean;
|
|
54
|
+
} | null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function normalizeStep(step: LastRuntimeStep): LastRuntimeStep {
|
|
58
|
+
return {
|
|
59
|
+
...step,
|
|
60
|
+
taskCompleted: step.taskCompleted ?? step.task_completed,
|
|
61
|
+
sandboxRc: step.sandboxRc ?? step.sandbox_rc,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function lastStep(observation: PhysicalObservation): LastRuntimeStep | null {
|
|
66
|
+
const item = [...observation.items]
|
|
67
|
+
.reverse()
|
|
68
|
+
.find((entry) => entry.source === "capx:runtime:last-step");
|
|
69
|
+
if (!item || item.kind !== "text") {
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
try {
|
|
73
|
+
return normalizeStep(JSON.parse(item.text) as LastRuntimeStep);
|
|
74
|
+
} catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function isUnrecoverableObservationFailure(step: LastRuntimeStep | null): boolean {
|
|
80
|
+
if (!step) {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
const stderr = step.stderr ?? "";
|
|
84
|
+
const error = step.error ?? "";
|
|
85
|
+
const sandboxFailed = step.sandboxRc !== undefined && step.sandboxRc !== 0;
|
|
86
|
+
const diagnostics = step.diagnostics ?? null;
|
|
87
|
+
if (diagnostics?.failurePhase === "policy_execution") {
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
const runtimeRaised = Boolean(error) || Boolean(diagnostics);
|
|
91
|
+
if (!sandboxFailed || !runtimeRaised) {
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
if (diagnostics) {
|
|
95
|
+
return Boolean(
|
|
96
|
+
step.truncated &&
|
|
97
|
+
(diagnostics.observationPipeline ||
|
|
98
|
+
diagnostics.depthAssertion ||
|
|
99
|
+
error.includes("AssertionError")),
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
const mentionsObservationPath =
|
|
103
|
+
stderr.includes("_get_observation") ||
|
|
104
|
+
stderr.includes("get_observation") ||
|
|
105
|
+
stderr.includes("get_real_depth_map");
|
|
106
|
+
const mentionsDepthAssertion =
|
|
107
|
+
stderr.includes("get_real_depth_map") ||
|
|
108
|
+
(stderr.includes("AssertionError") && stderr.includes("depth"));
|
|
109
|
+
return Boolean(
|
|
110
|
+
sandboxFailed &&
|
|
111
|
+
step.truncated &&
|
|
112
|
+
(mentionsObservationPath || mentionsDepthAssertion || error.includes("AssertionError")),
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function completionSummary(step: LastRuntimeStep): string {
|
|
117
|
+
return [
|
|
118
|
+
`taskCompleted=${step.taskCompleted ?? "n/a"}`,
|
|
119
|
+
`terminated=${step.terminated ?? "n/a"}`,
|
|
120
|
+
`truncated=${step.truncated ?? "n/a"}`,
|
|
121
|
+
`sandboxRc=${step.sandboxRc ?? "n/a"}`,
|
|
122
|
+
`reward=${step.reward ?? "n/a"}`,
|
|
123
|
+
].join(" ");
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const runtimeServerUrl = optionalString(process.env.CAPX_RUNTIME_SERVER_URL);
|
|
127
|
+
|
|
128
|
+
if (!runtimeServerUrl) {
|
|
129
|
+
throw new Error(
|
|
130
|
+
[
|
|
131
|
+
"CAPX_RUNTIME_SERVER_URL is required.",
|
|
132
|
+
"Start capx-agent-runtime on the CaP-X workstation, tunnel the port if needed,",
|
|
133
|
+
"then set CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210.",
|
|
134
|
+
].join(" "),
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const allowDestructive = process.env.CAPX_ALLOW_DESTRUCTIVE === "1";
|
|
139
|
+
const maxTurns = optionalNumber(process.env.CAPX_MAX_SOLVER_TURNS) ?? 6;
|
|
140
|
+
const recoverOnRuntimeError =
|
|
141
|
+
optionalString(process.env.CAPX_RECOVER_ON_RUNTIME_ERROR) === "reset";
|
|
142
|
+
const maxRuntimeResets =
|
|
143
|
+
optionalNumber(process.env.CAPX_MAX_RUNTIME_RESETS) ??
|
|
144
|
+
(recoverOnRuntimeError ? 1 : 0);
|
|
145
|
+
const initialPolicyExecutionTrial =
|
|
146
|
+
optionalNumber(process.env.CAPX_POLICY_EXECUTION_TRIAL) ?? 1;
|
|
147
|
+
const runId = Date.now();
|
|
148
|
+
const sessionId =
|
|
149
|
+
optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
|
|
150
|
+
`capx-runtime-autosolve-${runId}`;
|
|
151
|
+
const outputDir =
|
|
152
|
+
optionalString(process.env.CAPX_OUTPUT_DIR) ??
|
|
153
|
+
`outputs/capx-agent-runtime/${sessionId}`;
|
|
154
|
+
const toolExecutionMode = "plan" as const;
|
|
155
|
+
const recordVideo =
|
|
156
|
+
optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
|
|
157
|
+
"runtime-default";
|
|
158
|
+
const printEvent = createEventPrinter({
|
|
159
|
+
steps: true,
|
|
160
|
+
completion: true,
|
|
161
|
+
toolResultMaxChars:
|
|
162
|
+
optionalNumber(process.env.CAPX_TOOL_RESULT_MAX_CHARS) ?? 2_000,
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
function approveExampleTool(tool: string): "allow" | "deny" {
|
|
166
|
+
if (tool === "skill") {
|
|
167
|
+
return "allow";
|
|
168
|
+
}
|
|
169
|
+
return allowDestructive ? "allow" : "deny";
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
console.error(
|
|
173
|
+
[
|
|
174
|
+
"CaP-X agent mode=runtime",
|
|
175
|
+
`maxTurns=${maxTurns}`,
|
|
176
|
+
"policyExecution=live-runtime",
|
|
177
|
+
`toolDispatch=${toolExecutionMode}`,
|
|
178
|
+
`approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
|
|
179
|
+
`recordVideo=${recordVideo}`,
|
|
180
|
+
`agentSessionId=${sessionId}`,
|
|
181
|
+
`outputDir=${outputDir}`,
|
|
182
|
+
`trial=${initialPolicyExecutionTrial}`,
|
|
183
|
+
`recoverOnRuntimeError=${recoverOnRuntimeError ? "reset" : "off"}`,
|
|
184
|
+
`maxRuntimeResets=${maxRuntimeResets}`,
|
|
185
|
+
`runtimeServerUrl=${runtimeServerUrl}`,
|
|
186
|
+
].join(" "),
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
const { agent, session } = await createCapxAgent({
|
|
190
|
+
model: exampleOpenAIModel(),
|
|
191
|
+
startSession: true,
|
|
192
|
+
toolExecutionMode,
|
|
193
|
+
sessionOptions: {
|
|
194
|
+
mode: "runtime",
|
|
195
|
+
runtimeServerUrl,
|
|
196
|
+
physicalMode:
|
|
197
|
+
process.env.CAPX_PHYSICAL_MODE === "hardware" ? "hardware" : "simulation",
|
|
198
|
+
runtimeServerStartupTimeoutMs: optionalNumber(
|
|
199
|
+
process.env.CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS,
|
|
200
|
+
),
|
|
201
|
+
runtimeServerRequestTimeoutMs: optionalNumber(
|
|
202
|
+
process.env.CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS,
|
|
203
|
+
),
|
|
204
|
+
enablePolicyCodeExecution: true,
|
|
205
|
+
policyExecutionMode: "live-runtime",
|
|
206
|
+
allowHardwarePolicyExecution:
|
|
207
|
+
process.env.CAPX_ALLOW_HARDWARE_POLICY_EXECUTION === "1",
|
|
208
|
+
policyExecutionTimeoutMs: optionalNumber(
|
|
209
|
+
process.env.CAPX_POLICY_EXECUTION_TIMEOUT_MS,
|
|
210
|
+
),
|
|
211
|
+
policyExecutionTrial: initialPolicyExecutionTrial,
|
|
212
|
+
policyExecutionRecordVideo: optionalBoolean(
|
|
213
|
+
process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
|
|
214
|
+
),
|
|
215
|
+
outputDir,
|
|
216
|
+
skillLibraryPath: optionalString(process.env.CAPX_SKILL_LIBRARY_PATH),
|
|
217
|
+
},
|
|
218
|
+
approval: {
|
|
219
|
+
defaultAction: "ask",
|
|
220
|
+
onRequest: async (request) => {
|
|
221
|
+
console.error(
|
|
222
|
+
`approval requested: ${request.tool} risk=${request.risk} description=${request.description}`,
|
|
223
|
+
);
|
|
224
|
+
return approveExampleTool(request.tool);
|
|
225
|
+
},
|
|
226
|
+
},
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
try {
|
|
230
|
+
let runtimeResetCount = 0;
|
|
231
|
+
let resetBeforeTurn = false;
|
|
232
|
+
|
|
233
|
+
for (let turn = 1; turn <= maxTurns; turn += 1) {
|
|
234
|
+
console.error(`\n--- solver turn ${turn}/${maxTurns} ---`);
|
|
235
|
+
const prompt =
|
|
236
|
+
resetBeforeTurn
|
|
237
|
+
? [
|
|
238
|
+
"The previous CaP-X runtime session hit an observation/depth failure, so the example reset the runtime before this turn.",
|
|
239
|
+
"Treat the current runtime state as fresh. Ignore any broken previous physical state.",
|
|
240
|
+
"Call capx_status and capx_observe with includeImages=true, then solve from the current task prompt and observations.",
|
|
241
|
+
"Execute one useful Python Code-as-Policy step if approval allows it, then observe again.",
|
|
242
|
+
].join(" ")
|
|
243
|
+
: turn === 1
|
|
244
|
+
? [
|
|
245
|
+
"Solve the active CaP-X task.",
|
|
246
|
+
"First call capx_status, then capx_observe with includeImages=true.",
|
|
247
|
+
"Use CaP-X's task prompt, API context, observations, skill library, and turn history as source of truth.",
|
|
248
|
+
"Execute one useful Python Code-as-Policy step if approval allows it.",
|
|
249
|
+
"After execution, observe again and report whether the task appears complete.",
|
|
250
|
+
].join(" ")
|
|
251
|
+
: [
|
|
252
|
+
"Continue solving the same CaP-X task from the current runtime state.",
|
|
253
|
+
"Inspect capx_turn_history and capx_observe before choosing the next action.",
|
|
254
|
+
"If the last step completed the task, say TASK_COMPLETE and do not execute more code.",
|
|
255
|
+
"Otherwise execute one more useful Python Code-as-Policy step if approval allows it, then observe again.",
|
|
256
|
+
].join(" ");
|
|
257
|
+
|
|
258
|
+
for await (const event of agent.chat(sessionId, prompt)) {
|
|
259
|
+
printEvent(event);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
const observation = await session.observe({ includeArtifacts: true });
|
|
263
|
+
const step = lastStep(observation);
|
|
264
|
+
if (isUnrecoverableObservationFailure(step)) {
|
|
265
|
+
if (recoverOnRuntimeError && runtimeResetCount < maxRuntimeResets) {
|
|
266
|
+
runtimeResetCount += 1;
|
|
267
|
+
const nextTrial = initialPolicyExecutionTrial + runtimeResetCount;
|
|
268
|
+
session.options.policyExecutionTrial = nextTrial;
|
|
269
|
+
console.error(
|
|
270
|
+
[
|
|
271
|
+
"CaP-X reported an observation/depth failure.",
|
|
272
|
+
`Resetting runtime session to trial ${nextTrial} before the next solver turn (${runtimeResetCount}/${maxRuntimeResets}).`,
|
|
273
|
+
step ? `Last step: ${completionSummary(step)}` : "",
|
|
274
|
+
]
|
|
275
|
+
.filter(Boolean)
|
|
276
|
+
.join(" "),
|
|
277
|
+
);
|
|
278
|
+
await session.reset?.();
|
|
279
|
+
resetBeforeTurn = true;
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
console.error(
|
|
283
|
+
[
|
|
284
|
+
"Stopping autosolve: CaP-X reported an unrecoverable observation/depth failure.",
|
|
285
|
+
"The current runtime session cannot execute further policy code because env.step() fails while collecting observations.",
|
|
286
|
+
recoverOnRuntimeError
|
|
287
|
+
? "Runtime reset budget is exhausted. Restart capx-agent-runtime serve or retry with a fresh session."
|
|
288
|
+
: "Set CAPX_RECOVER_ON_RUNTIME_ERROR=reset to let this example reset once and continue.",
|
|
289
|
+
step ? `Last step: ${completionSummary(step)}` : "",
|
|
290
|
+
]
|
|
291
|
+
.filter(Boolean)
|
|
292
|
+
.join(" "),
|
|
293
|
+
);
|
|
294
|
+
break;
|
|
295
|
+
}
|
|
296
|
+
if (step?.taskCompleted || step?.terminated) {
|
|
297
|
+
console.error(`CaP-X reported completion state: ${completionSummary(step)}`);
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
300
|
+
resetBeforeTurn = false;
|
|
301
|
+
}
|
|
302
|
+
} finally {
|
|
303
|
+
await agent.close();
|
|
304
|
+
if (process.env.CAPX_STOP_ON_EXIT === "1") {
|
|
305
|
+
await session.stop("example exit");
|
|
306
|
+
}
|
|
307
|
+
}
|