npm - @cuylabs/physical-capx-agent-core - Versions diffs - 0.1.1 - Mend

@cuylabs/physical-capx-agent-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/LICENSE +201 -0
package/README.md +132 -0
package/dist/agent.d.ts +59 -0
package/dist/agent.js +14 -0
package/dist/agent.js.map +1 -0
package/dist/chunk-57TF3E2Q.js +61 -0
package/dist/chunk-57TF3E2Q.js.map +1 -0
package/dist/chunk-U6REPGPH.js +109 -0
package/dist/chunk-U6REPGPH.js.map +1 -0
package/dist/index.d.ts +7 -0
package/dist/index.js +29 -0
package/dist/index.js.map +1 -0
package/dist/session.d.ts +5 -0
package/dist/session.js +12 -0
package/dist/session.js.map +1 -0
package/dist/tools.d.ts +8 -0
package/dist/tools.js +7 -0
package/dist/tools.js.map +1 -0
package/docs/README.md +23 -0
package/docs/agent-core-integration.md +76 -0
package/examples/.env.example +37 -0
package/examples/01-capx-runtime-solver.ts +165 -0
package/examples/02-capx-runtime-autosolve.ts +314 -0
package/examples/README.md +344 -0
package/examples/_setup.ts +61 -0
package/package.json +86 -0
package/skills/capx-code-as-policy/SKILL.md +22 -0

package/dist/session.js ADDED Viewed

@@ -0,0 +1,12 @@
+import {
+  createCapxPhysicalTools
+} from "./chunk-57TF3E2Q.js";
+// src/session.ts
+import { CapxSession, createCapxSession } from "@cuylabs/physical-capx";
+export {
+  CapxSession,
+  createCapxPhysicalTools,
+  createCapxSession
+};
+//# sourceMappingURL=session.js.map

package/dist/session.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":["../src/session.ts"],"sourcesContent":["export { CapxSession, createCapxSession } from \"@cuylabs/physical-capx\";\nexport type { CapxSessionOptions } from \"@cuylabs/physical-capx\";\nexport { createCapxPhysicalTools } from \"./tools.js\";\n"],"mappings":";;;;;AAAA,SAAS,aAAa,yBAAyB;","names":[]}

package/dist/tools.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import { CapxSession } from '@cuylabs/physical-capx';
+import { Tool } from '@cuylabs/agent-core/tool';
+import { PhysicalToolOptions } from '@cuylabs/physical-agent-core';
+import { PhysicalSession } from '@cuylabs/physical-core';
+declare function createCapxPhysicalTools(session: PhysicalSession | CapxSession, options?: PhysicalToolOptions): Tool.AnyInfo[];
+export { createCapxPhysicalTools };

package/dist/tools.js ADDED Viewed

@@ -0,0 +1,7 @@
+import {
+  createCapxPhysicalTools
+} from "./chunk-57TF3E2Q.js";
+export {
+  createCapxPhysicalTools
+};
+//# sourceMappingURL=tools.js.map

package/dist/tools.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}

package/docs/README.md ADDED Viewed

@@ -0,0 +1,23 @@
+# @cuylabs/physical-capx-agent-core Docs
+`@cuylabs/physical-capx-agent-core` binds CaP-X physical sessions to
+`@cuylabs/agent-core`.
+Use it for CaP-X tool creation, packaged CaP-X model guidance, and
+`createCapxAgent(...)`.
+Start here:
+- [Agent-Core Integration](./agent-core-integration.md) explains how
+  `agent-core` supervises a CaP-X session and owns the live observe/execute
+  loop.
+- [`../../physical-capx/docs/how-it-works.md`](../../physical-capx/docs/how-it-works.md)
+  explains the runtime/session adapter.
+- [`../../physical-capx/docs/limitations.md`](../../physical-capx/docs/limitations.md)
+  documents runtime-service and hardware-safety boundaries.
+- [../examples/README.md](../examples/README.md) is the operational runbook for
+  running the single-turn and autosolve examples.
+This adapter exists to integrate CaP-X, a Python robotics Code-as-Policies
+framework, with `@cuylabs/agent-core`. It is not a fork or TypeScript port of
+CaP-X.

package/docs/agent-core-integration.md ADDED Viewed

@@ -0,0 +1,76 @@
+# Agent-Core Integration
+`@cuylabs/physical-capx-agent-core` uses `@cuylabs/agent-core` as the
+application-level harness for a running `capx-agent-runtime` service.
+## Ownership
+`agent-core` owns conversation turns, tool choice, approvals, tracing, policy
+code authoring, and the live observe/execute/reset loop.
+`capx-agent-runtime` owns the CaP-X checkout, YAML config, simulator/API server
+lifecycle, Python execution namespace, rewards, task completion, and artifacts.
+The TypeScript adapter does not launch CaP-X and does not pass `repoPath` or
+`configPath`. It also omits `outputDir` and `skillLibraryPath` by default.
+Those path choices are runtime-service startup concerns unless a trusted server
+operator explicitly enables client path overrides.
+## Recommended Wiring
+```typescript
+import { createCapxAgent } from "@cuylabs/physical-capx-agent-core";
+const { agent, session } = await createCapxAgent({
+  model,
+  startSession: true,
+  toolExecutionMode: "plan",
+  sessionOptions: {
+    mode: "runtime",
+    runtimeServerUrl: "http://127.0.0.1:8210",
+    physicalMode: "simulation",
+    enablePolicyCodeExecution: true,
+  },
+});
+```
+The helper defaults to `toolExecutionMode: "plan"` so `agent-core` executes
+tool batches through its capability-aware runner. This is not a prompt-level
+plan or a plan-only agent. The model still reasons and chooses tools, but the
+tool call is deferred to `agent-core`, which then applies approval policy,
+capability metadata, and safe dispatch before committing the tool result back
+into the conversation.
+`toolExecutionMode: "auto"` is the simpler AI SDK path: executable tools are
+given directly to the SDK and execute inline while the model stream is running.
+That is fine for many text tools. For physical sessions, `plan` is the safer
+default because tools like `capx_run_policy_code` and `capx_stop` are
+side-effecting and should stay under the host harness' approval and scheduling
+policy.
+The helper also installs a default approval policy with `defaultAction: "deny"`.
+Read-only tools can pass through host policy; mutating tools such as policy
+execution and stop should remain approval-gated. Runtime skill-library mutation
+is available through programmatic APIs for deliberate workflows, but it is not
+part of the default agent tool surface.
+## Runtime Tools
+`capx_run_policy_code` sends Python code to the active runtime session and
+returns stdout, stderr, reward, task-completion metadata, diagnostics, and
+artifacts.
+`capx_observe` returns prompt, observation, and policy-code context. That
+context can include reusable Python helper summaries and typed affordances from
+the runtime skill library.
+The active runtime session is durable across tool calls. The agent should use
+`capx_observe`, then submit one selected Python policy step with
+`capx_run_policy_code`, then observe again in the same session. Starting,
+resetting, and replacing sessions are host/runtime orchestration decisions, not
+default model-facing actions.
+That gives an external agent the core Code-as-Policies boundary: prompt/API
+context comes from CaP-X, generated code runs in CaP-X's Python namespace, and
+reusable helper functions stay inside that runtime instead of being copied into
+TypeScript.

package/examples/.env.example ADDED Viewed

@@ -0,0 +1,37 @@
+# Required agent model config. `_setup.ts` loads this file automatically when
+# either example runs.
+OPENAI_API_KEY=
+OPENAI_MODEL=gpt-4o-mini
+# Optional: set only for OpenAI-compatible providers that are not the default
+# OpenAI endpoint.
+OPENAI_BASE_URL=
+# Required runtime service URL. Start capx-agent-runtime first, then point this
+# example at the service or SSH tunnel URL. The TypeScript examples do not
+# start CaP-X or select a CaP-X config.
+CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210
+# Safety switches for the example host. Set CAPX_ALLOW_DESTRUCTIVE=1 to allow
+# capx_run_policy_code; otherwise the examples can observe and propose code but
+# execution is denied.
+CAPX_PHYSICAL_MODE=simulation
+CAPX_ALLOW_DESTRUCTIVE=0
+CAPX_ALLOW_HARDWARE_POLICY_EXECUTION=0
+# Optional runtime/client tuning.
+CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS=120000
+CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS=1000000
+CAPX_POLICY_EXECUTION_TIMEOUT_MS=1000000
+CAPX_POLICY_EXECUTION_TRIAL=
+# Leave blank to use the CaP-X YAML record_video setting. Set 1 or 0 only to
+# override the server/YAML value for this example run.
+CAPX_POLICY_EXECUTION_RECORD_VIDEO=
+CAPX_STOP_ON_EXIT=0
+CAPX_MAX_SOLVER_TURNS=6
+# Privileged runtime session path overrides. Leave blank for normal use.
+# capx-agent-runtime rejects these unless the server was started with
+# --allow-client-path-overrides and matching allowed roots.
+CAPX_SESSION_OUTPUT_DIR=
+CAPX_SESSION_SKILL_LIBRARY_PATH=
+CAPX_AGENT_PROMPT=

package/examples/01-capx-runtime-solver.ts ADDED Viewed

@@ -0,0 +1,165 @@
+/**
+ * 01 - CaP-X runtime solver
+ *
+ * This is the default bring-your-own-agent flow:
+ *   1. Start capx-agent-runtime on the GPU workstation.
+ *   2. Point CAPX_RUNTIME_SERVER_URL at that service or SSH tunnel.
+ *   3. Run this TypeScript agent locally.
+ *
+ * The runtime service owns the CaP-X checkout, YAML config, simulator, and API
+ * servers. This agent-core example owns the reasoning loop: observe, inspect
+ * task context, write policy code, execute one step, observe again, and report.
+ *
+ * Run:
+ *   npx tsx examples/01-capx-runtime-solver.ts
+ */
+import { createCapxAgent } from "@cuylabs/physical-capx-agent-core";
+import { createEventPrinter } from "@cuylabs/agent-core";
+import { exampleOpenAIModel } from "./_setup.js";
+function optionalString(value: string | undefined): string | undefined {
+  const trimmed = value?.trim();
+  return trimmed ? trimmed : undefined;
+}
+function optionalBoolean(value: string | undefined): boolean | undefined {
+  if (value === undefined || value.trim() === "") {
+    return undefined;
+  }
+  return value === "1" || value.toLowerCase() === "true";
+}
+function optionalNumber(value: string | undefined): number | undefined {
+  if (!value) {
+    return undefined;
+  }
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? parsed : undefined;
+}
+const runtimeServerUrl = optionalString(process.env.CAPX_RUNTIME_SERVER_URL);
+if (!runtimeServerUrl) {
+  throw new Error(
+    [
+      "CAPX_RUNTIME_SERVER_URL is required.",
+      "Start capx-agent-runtime on the CaP-X workstation, tunnel the port if needed,",
+      "then set CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210.",
+    ].join(" "),
+  );
+}
+const allowDestructive = process.env.CAPX_ALLOW_DESTRUCTIVE === "1";
+const toolExecutionMode = "plan" as const;
+const runId = Date.now();
+const sessionId =
+  optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
+  `capx-runtime-solver-${runId}`;
+const sessionOutputDir = optionalString(process.env.CAPX_SESSION_OUTPUT_DIR);
+const sessionSkillLibraryPath = optionalString(
+  process.env.CAPX_SESSION_SKILL_LIBRARY_PATH,
+);
+const recordVideo =
+  optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
+  "runtime-default";
+function approveExampleTool(tool: string): "allow" | "deny" {
+  if (tool === "skill") {
+    return "allow";
+  }
+  return allowDestructive ? "allow" : "deny";
+}
+const userPrompt =
+  optionalString(process.env.CAPX_AGENT_PROMPT) ??
+  [
+    "You are the external agent solving one CaP-X runtime simulation.",
+    "Check capx_status first, then call capx_observe with includeImages=true.",
+    "Use the CaP-X task prompt, full prompt, observations, API context, skill library, and turn history as the source of truth.",
+    "Propose one concise Python Code-as-Policy action toward the task.",
+    "If capx_run_policy_code is available and approval allows it, execute the action, observe again, inspect reward/stdout/stderr/task completion, and summarize the result.",
+    "If skill extraction or injection tools are available, only use them after useful successful code and with approval.",
+    "If execution is denied, explain the exact policy code you would run and why.",
+  ].join(" ");
+console.log(
+  [
+    "CaP-X agent mode=runtime",
+    "startSession=true",
+    "policyExecution=live-runtime",
+    `toolDispatch=${toolExecutionMode}`,
+    `approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
+    `recordVideo=${recordVideo}`,
+    `agentSessionId=${sessionId}`,
+    `sessionOutputDir=${sessionOutputDir ?? "server-owned"}`,
+    `sessionSkillLibraryPath=${sessionSkillLibraryPath ?? "server-owned"}`,
+    `pathOverrides=${sessionOutputDir || sessionSkillLibraryPath ? "requested" : "none"}`,
+    `runtimeServerUrl=${runtimeServerUrl}`,
+  ].join(" "),
+);
+const { agent, session } = await createCapxAgent({
+  model: exampleOpenAIModel(),
+  startSession: true,
+  toolExecutionMode,
+  sessionOptions: {
+    mode: "runtime",
+    physicalMode:
+      process.env.CAPX_PHYSICAL_MODE === "hardware" ? "hardware" : "simulation",
+    // Normal service-first path: connect to an already-running runtime server
+    // and let that server's --config-path/--repo-path defaults define the
+    // simulation.
+    runtimeServerUrl,
+    runtimeServerStartupTimeoutMs: optionalNumber(
+      process.env.CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS,
+    ),
+    runtimeServerRequestTimeoutMs: optionalNumber(
+      process.env.CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS,
+    ),
+    enablePolicyCodeExecution: true,
+    policyExecutionMode: "live-runtime",
+    allowHardwarePolicyExecution:
+      process.env.CAPX_ALLOW_HARDWARE_POLICY_EXECUTION === "1",
+    policyExecutionTimeoutMs: optionalNumber(
+      process.env.CAPX_POLICY_EXECUTION_TIMEOUT_MS,
+    ),
+    policyExecutionTrial: optionalNumber(
+      process.env.CAPX_POLICY_EXECUTION_TRIAL,
+    ),
+    policyExecutionRecordVideo: optionalBoolean(
+      process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
+    ),
+    outputDir: sessionOutputDir,
+    skillLibraryPath: sessionSkillLibraryPath,
+  },
+  approval: {
+    defaultAction: "ask",
+    onRequest: async (request) => {
+      console.log(
+        `approval requested: ${request.tool} risk=${request.risk} description=${request.description}`,
+      );
+      return approveExampleTool(request.tool);
+    },
+  },
+});
+try {
+  const printEvent = createEventPrinter({
+    steps: true,
+    completion: true,
+    toolResultMaxChars:
+      optionalNumber(process.env.CAPX_TOOL_RESULT_MAX_CHARS) ?? 2_000,
+  });
+  for await (const event of agent.chat(sessionId, userPrompt)) {
+    printEvent(event);
+  }
+} finally {
+  await agent.close();
+  if (process.env.CAPX_STOP_ON_EXIT === "1") {
+    await session.stop("example exit");
+  }
+}

package/examples/02-capx-runtime-autosolve.ts ADDED Viewed

@@ -0,0 +1,314 @@
+/**
+ * 02 - CaP-X runtime autosolve loop
+ *
+ * This example keeps the same agent-core session open across multiple user
+ * turns. Each turn lets the agent observe, write policy code, execute it when
+ * approval allows, and inspect the new result. The outer loop stops when CaP-X
+ * reports task completion or when CAPX_MAX_SOLVER_TURNS is reached.
+ *
+ * Run:
+ *   npx tsx examples/02-capx-runtime-autosolve.ts
+ */
+import { createEventPrinter } from "@cuylabs/agent-core";
+import { createCapxAgent } from "@cuylabs/physical-capx-agent-core";
+import type { PhysicalObservation } from "@cuylabs/physical-core";
+import { exampleOpenAIModel } from "./_setup.js";
+function optionalString(value: string | undefined): string | undefined {
+  const trimmed = value?.trim();
+  return trimmed ? trimmed : undefined;
+}
+function optionalBoolean(value: string | undefined): boolean | undefined {
+  if (value === undefined || value.trim() === "") {
+    return undefined;
+  }
+  return value === "1" || value.toLowerCase() === "true";
+}
+function optionalNumber(value: string | undefined): number | undefined {
+  if (!value) {
+    return undefined;
+  }
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? parsed : undefined;
+}
+interface LastRuntimeStep {
+  success?: boolean;
+  taskCompleted?: boolean | null;
+  task_completed?: boolean | null;
+  terminated?: boolean;
+  truncated?: boolean;
+  reward?: number | null;
+  sandboxRc?: number;
+  sandbox_rc?: number;
+  stderr?: string;
+  error?: string | null;
+  diagnostics?: {
+    failurePhase?: string;
+    observationPipeline?: boolean;
+    depthAssertion?: boolean;
+  } | null;
+}
+function normalizeStep(step: LastRuntimeStep): LastRuntimeStep {
+  return {
+    ...step,
+    taskCompleted: step.taskCompleted ?? step.task_completed,
+    sandboxRc: step.sandboxRc ?? step.sandbox_rc,
+  };
+}
+function lastStep(observation: PhysicalObservation): LastRuntimeStep | null {
+  const item = [...observation.items]
+    .reverse()
+    .find((entry) => entry.source === "capx:runtime:last-step");
+  if (!item || item.kind !== "text") {
+    return null;
+  }
+  try {
+    return normalizeStep(JSON.parse(item.text) as LastRuntimeStep);
+  } catch {
+    return null;
+  }
+}
+function isUnrecoverableObservationFailure(
+  step: LastRuntimeStep | null,
+): boolean {
+  if (!step) {
+    return false;
+  }
+  const stderr = step.stderr ?? "";
+  const error = step.error ?? "";
+  const sandboxFailed = step.sandboxRc !== undefined && step.sandboxRc !== 0;
+  const diagnostics = step.diagnostics ?? null;
+  if (diagnostics?.failurePhase === "policy_execution") {
+    return false;
+  }
+  const runtimeRaised = Boolean(error) || Boolean(diagnostics);
+  if (!sandboxFailed || !runtimeRaised) {
+    return false;
+  }
+  if (diagnostics) {
+    return Boolean(
+      step.truncated &&
+      (diagnostics.observationPipeline ||
+        diagnostics.depthAssertion ||
+        error.includes("AssertionError")),
+    );
+  }
+  const mentionsObservationPath =
+    stderr.includes("_get_observation") ||
+    stderr.includes("get_observation") ||
+    stderr.includes("get_real_depth_map");
+  const mentionsDepthAssertion =
+    stderr.includes("get_real_depth_map") ||
+    (stderr.includes("AssertionError") && stderr.includes("depth"));
+  return Boolean(
+    sandboxFailed &&
+    step.truncated &&
+    (mentionsObservationPath ||
+      mentionsDepthAssertion ||
+      error.includes("AssertionError")),
+  );
+}
+function completionSummary(step: LastRuntimeStep): string {
+  return [
+    `taskCompleted=${step.taskCompleted ?? "n/a"}`,
+    `terminated=${step.terminated ?? "n/a"}`,
+    `truncated=${step.truncated ?? "n/a"}`,
+    `sandboxRc=${step.sandboxRc ?? "n/a"}`,
+    `reward=${step.reward ?? "n/a"}`,
+  ].join(" ");
+}
+const runtimeServerUrl = optionalString(process.env.CAPX_RUNTIME_SERVER_URL);
+if (!runtimeServerUrl) {
+  throw new Error(
+    [
+      "CAPX_RUNTIME_SERVER_URL is required.",
+      "Start capx-agent-runtime on the CaP-X workstation, tunnel the port if needed,",
+      "then set CAPX_RUNTIME_SERVER_URL=http://127.0.0.1:8210.",
+    ].join(" "),
+  );
+}
+const allowDestructive = process.env.CAPX_ALLOW_DESTRUCTIVE === "1";
+const maxTurns = optionalNumber(process.env.CAPX_MAX_SOLVER_TURNS) ?? 6;
+const recoverOnRuntimeError =
+  optionalString(process.env.CAPX_RECOVER_ON_RUNTIME_ERROR) === "reset";
+const maxRuntimeResets =
+  optionalNumber(process.env.CAPX_MAX_RUNTIME_RESETS) ??
+  (recoverOnRuntimeError ? 1 : 0);
+const initialPolicyExecutionTrial =
+  optionalNumber(process.env.CAPX_POLICY_EXECUTION_TRIAL) ?? 1;
+const runId = Date.now();
+const sessionId =
+  optionalString(process.env.CAPX_AGENT_SESSION_ID) ??
+  `capx-runtime-autosolve-${runId}`;
+const sessionOutputDir = optionalString(process.env.CAPX_SESSION_OUTPUT_DIR);
+const sessionSkillLibraryPath = optionalString(
+  process.env.CAPX_SESSION_SKILL_LIBRARY_PATH,
+);
+const toolExecutionMode = "plan" as const;
+const recordVideo =
+  optionalString(process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO) ??
+  "runtime-default";
+const printEvent = createEventPrinter({
+  steps: true,
+  completion: true,
+  toolResultMaxChars:
+    optionalNumber(process.env.CAPX_TOOL_RESULT_MAX_CHARS) ?? 2_000,
+});
+function approveExampleTool(tool: string): "allow" | "deny" {
+  if (tool === "skill") {
+    return "allow";
+  }
+  return allowDestructive ? "allow" : "deny";
+}
+console.error(
+  [
+    "CaP-X agent mode=runtime",
+    `maxTurns=${maxTurns}`,
+    "policyExecution=live-runtime",
+    `toolDispatch=${toolExecutionMode}`,
+    `approval=${allowDestructive ? "policy-code-enabled" : "observe-only"}`,
+    `recordVideo=${recordVideo}`,
+    `agentSessionId=${sessionId}`,
+    `sessionOutputDir=${sessionOutputDir ?? "server-owned"}`,
+    `sessionSkillLibraryPath=${sessionSkillLibraryPath ?? "server-owned"}`,
+    `pathOverrides=${sessionOutputDir || sessionSkillLibraryPath ? "requested" : "none"}`,
+    `trial=${initialPolicyExecutionTrial}`,
+    `recoverOnRuntimeError=${recoverOnRuntimeError ? "reset" : "off"}`,
+    `maxRuntimeResets=${maxRuntimeResets}`,
+    `runtimeServerUrl=${runtimeServerUrl}`,
+  ].join(" "),
+);
+const { agent, session } = await createCapxAgent({
+  model: exampleOpenAIModel(),
+  startSession: true,
+  toolExecutionMode,
+  sessionOptions: {
+    mode: "runtime",
+    runtimeServerUrl,
+    physicalMode:
+      process.env.CAPX_PHYSICAL_MODE === "hardware" ? "hardware" : "simulation",
+    runtimeServerStartupTimeoutMs: optionalNumber(
+      process.env.CAPX_RUNTIME_SERVER_STARTUP_TIMEOUT_MS,
+    ),
+    runtimeServerRequestTimeoutMs: optionalNumber(
+      process.env.CAPX_RUNTIME_SERVER_REQUEST_TIMEOUT_MS,
+    ),
+    enablePolicyCodeExecution: true,
+    policyExecutionMode: "live-runtime",
+    allowHardwarePolicyExecution:
+      process.env.CAPX_ALLOW_HARDWARE_POLICY_EXECUTION === "1",
+    policyExecutionTimeoutMs: optionalNumber(
+      process.env.CAPX_POLICY_EXECUTION_TIMEOUT_MS,
+    ),
+    policyExecutionTrial: initialPolicyExecutionTrial,
+    policyExecutionRecordVideo: optionalBoolean(
+      process.env.CAPX_POLICY_EXECUTION_RECORD_VIDEO,
+    ),
+    outputDir: sessionOutputDir,
+    skillLibraryPath: sessionSkillLibraryPath,
+  },
+  approval: {
+    defaultAction: "ask",
+    onRequest: async (request) => {
+      console.error(
+        `approval requested: ${request.tool} risk=${request.risk} description=${request.description}`,
+      );
+      return approveExampleTool(request.tool);
+    },
+  },
+});
+try {
+  let runtimeResetCount = 0;
+  let resetBeforeTurn = false;
+  for (let turn = 1; turn <= maxTurns; turn += 1) {
+    console.error(`\n--- solver turn ${turn}/${maxTurns} ---`);
+    const prompt = resetBeforeTurn
+      ? [
+          "The previous CaP-X runtime session hit an observation/depth failure, so the example reset the runtime before this turn.",
+          "Treat the current runtime state as fresh. Ignore any broken previous physical state.",
+          "Call capx_status and capx_observe with includeImages=true, then solve from the current task prompt and observations.",
+          "Execute one useful Python Code-as-Policy step if approval allows it, then observe again.",
+        ].join(" ")
+      : turn === 1
+        ? [
+            "Solve the active CaP-X task.",
+            "First call capx_status, then capx_observe with includeImages=true.",
+            "Use CaP-X's task prompt, API context, observations, skill library, and turn history as source of truth.",
+            "Execute one useful Python Code-as-Policy step if approval allows it.",
+            "After execution, observe again and report whether the task appears complete.",
+          ].join(" ")
+        : [
+            "Continue solving the same CaP-X task from the current runtime state.",
+            "Inspect capx_turn_history and capx_observe before choosing the next action.",
+            "If the last step completed the task, say TASK_COMPLETE and do not execute more code.",
+            "Otherwise execute one more useful Python Code-as-Policy step if approval allows it, then observe again.",
+          ].join(" ");
+    for await (const event of agent.chat(sessionId, prompt)) {
+      printEvent(event);
+    }
+    const observation = await session.observe({ includeArtifacts: true });
+    const step = lastStep(observation);
+    if (isUnrecoverableObservationFailure(step)) {
+      if (recoverOnRuntimeError && runtimeResetCount < maxRuntimeResets) {
+        runtimeResetCount += 1;
+        const nextTrial = initialPolicyExecutionTrial + runtimeResetCount;
+        session.options.policyExecutionTrial = nextTrial;
+        console.error(
+          [
+            "CaP-X reported an observation/depth failure.",
+            `Resetting runtime session to trial ${nextTrial} before the next solver turn (${runtimeResetCount}/${maxRuntimeResets}).`,
+            step ? `Last step: ${completionSummary(step)}` : "",
+          ]
+            .filter(Boolean)
+            .join(" "),
+        );
+        await session.reset?.();
+        resetBeforeTurn = true;
+        continue;
+      }
+      console.error(
+        [
+          "Stopping autosolve: CaP-X reported an unrecoverable observation/depth failure.",
+          "The current runtime session cannot execute further policy code because env.step() fails while collecting observations.",
+          recoverOnRuntimeError
+            ? "Runtime reset budget is exhausted. Restart capx-agent-runtime serve or retry with a fresh session."
+            : "Set CAPX_RECOVER_ON_RUNTIME_ERROR=reset to let this example reset once and continue.",
+          step ? `Last step: ${completionSummary(step)}` : "",
+        ]
+          .filter(Boolean)
+          .join(" "),
+      );
+      break;
+    }
+    if (step?.taskCompleted || step?.terminated) {
+      console.error(
+        `CaP-X reported completion state: ${completionSummary(step)}`,
+      );
+      break;
+    }
+    resetBeforeTurn = false;
+  }
+} finally {
+  await agent.close();
+  if (process.env.CAPX_STOP_ON_EXIT === "1") {
+    await session.stop("example exit");
+  }
+}