@cuylabs/agent-physical-capx 5.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +161 -0
- package/dist/agent.d.ts +57 -0
- package/dist/agent.js +14 -0
- package/dist/agent.js.map +1 -0
- package/dist/chunk-C53NNB7T.js +107 -0
- package/dist/chunk-C53NNB7T.js.map +1 -0
- package/dist/chunk-MYO63CWO.js +869 -0
- package/dist/chunk-MYO63CWO.js.map +1 -0
- package/dist/index.d.ts +35 -0
- package/dist/index.js +92 -0
- package/dist/index.js.map +1 -0
- package/dist/session-BxaROlXW.d.ts +202 -0
- package/dist/session.d.ts +3 -0
- package/dist/session.js +11 -0
- package/dist/session.js.map +1 -0
- package/docs/README.md +16 -0
- package/docs/agent-core-integration.md +73 -0
- package/docs/how-it-works.md +151 -0
- package/docs/limitations.md +25 -0
- package/examples/.env.example +36 -0
- package/examples/01-capx-runtime-solver.ts +162 -0
- package/examples/02-capx-runtime-autosolve.ts +307 -0
- package/examples/README.md +387 -0
- package/examples/_setup.ts +61 -0
- package/package.json +76 -0
- package/skills/capx-code-as-policy/SKILL.md +22 -0
|
@@ -0,0 +1,869 @@
|
|
|
1
|
+
// src/session.ts
|
|
2
|
+
import { randomUUID as randomUUID3 } from "crypto";
|
|
3
|
+
import { Tool } from "@cuylabs/agent-core/tool";
|
|
4
|
+
import {
|
|
5
|
+
createPhysicalSessionTools
|
|
6
|
+
} from "@cuylabs/agent-physical";
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
|
|
9
|
+
// src/live-runtime/index.ts
|
|
10
|
+
import { randomUUID as randomUUID2 } from "crypto";
|
|
11
|
+
|
|
12
|
+
// src/live-runtime/http-client.ts
|
|
13
|
+
import { setTimeout as sleep } from "timers/promises";
|
|
14
|
+
|
|
15
|
+
// src/live-runtime/utils.ts
|
|
16
|
+
import { randomUUID } from "crypto";
|
|
17
|
+
var DEFAULT_RUNTIME_REQUEST_TIMEOUT_MS = 1e6;
|
|
18
|
+
function normalizeBaseUrl(value) {
|
|
19
|
+
return value.replace(/\/+$/, "");
|
|
20
|
+
}
|
|
21
|
+
function resolveOutputDir(options, fallback) {
|
|
22
|
+
return options.outputDir ?? fallback;
|
|
23
|
+
}
|
|
24
|
+
function withoutUndefined(value) {
|
|
25
|
+
return Object.fromEntries(
|
|
26
|
+
Object.entries(value).filter(([, item]) => item !== void 0)
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
function failedTrace(request, reason, metadata = {}) {
|
|
30
|
+
const now = Date.now();
|
|
31
|
+
return {
|
|
32
|
+
id: `capx-live-${randomUUID()}`,
|
|
33
|
+
startedAt: now,
|
|
34
|
+
completedAt: now,
|
|
35
|
+
request,
|
|
36
|
+
stderr: reason,
|
|
37
|
+
outcome: {
|
|
38
|
+
success: false,
|
|
39
|
+
reason
|
|
40
|
+
},
|
|
41
|
+
metadata
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// src/live-runtime/http-client.ts
|
|
46
|
+
var CapxRuntimeHttpClient = class {
|
|
47
|
+
constructor(options) {
|
|
48
|
+
this.options = options;
|
|
49
|
+
}
|
|
50
|
+
#baseUrl;
|
|
51
|
+
get baseUrl() {
|
|
52
|
+
return this.#baseUrl;
|
|
53
|
+
}
|
|
54
|
+
get stdoutTail() {
|
|
55
|
+
return "";
|
|
56
|
+
}
|
|
57
|
+
get stderrTail() {
|
|
58
|
+
return "";
|
|
59
|
+
}
|
|
60
|
+
get ready() {
|
|
61
|
+
return this.#baseUrl !== void 0;
|
|
62
|
+
}
|
|
63
|
+
async ensureStarted() {
|
|
64
|
+
if (this.#baseUrl) {
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
this.#baseUrl = normalizeBaseUrl(this.options.runtimeServerUrl);
|
|
68
|
+
await this.#waitForHealth(
|
|
69
|
+
this.options.runtimeServerStartupTimeoutMs ?? 5e3
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
async request(method, route, body, timeoutMs = this.options.runtimeServerRequestTimeoutMs ?? this.options.policyExecutionTimeoutMs ?? DEFAULT_RUNTIME_REQUEST_TIMEOUT_MS) {
|
|
73
|
+
const baseUrl = this.requireBaseUrl();
|
|
74
|
+
const controller = new AbortController();
|
|
75
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
76
|
+
timer.unref();
|
|
77
|
+
try {
|
|
78
|
+
const response = await fetch(`${baseUrl}${route}`, {
|
|
79
|
+
method,
|
|
80
|
+
headers: body ? {
|
|
81
|
+
"content-type": "application/json"
|
|
82
|
+
} : void 0,
|
|
83
|
+
body: body ? JSON.stringify(body) : void 0,
|
|
84
|
+
signal: controller.signal
|
|
85
|
+
});
|
|
86
|
+
if (!response.ok) {
|
|
87
|
+
const text = await response.text();
|
|
88
|
+
throw new Error(
|
|
89
|
+
`capx-agent-runtime ${method} ${route} failed with ${response.status}: ${text}`
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
return await response.json();
|
|
93
|
+
} finally {
|
|
94
|
+
clearTimeout(timer);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
async stop(sessionId) {
|
|
98
|
+
if (!sessionId || !this.#baseUrl) {
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
try {
|
|
102
|
+
await this.request("POST", `/sessions/${sessionId}/stop`, {}, 5e3);
|
|
103
|
+
} catch {
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
requireBaseUrl() {
|
|
107
|
+
if (!this.#baseUrl) {
|
|
108
|
+
throw new Error("capx-agent-runtime base URL is not configured.");
|
|
109
|
+
}
|
|
110
|
+
return this.#baseUrl;
|
|
111
|
+
}
|
|
112
|
+
async #waitForHealth(timeoutMs) {
|
|
113
|
+
const started = Date.now();
|
|
114
|
+
let lastError;
|
|
115
|
+
while (Date.now() - started < timeoutMs) {
|
|
116
|
+
try {
|
|
117
|
+
await this.request("GET", "/health", void 0, 1e3);
|
|
118
|
+
return;
|
|
119
|
+
} catch (error) {
|
|
120
|
+
lastError = error;
|
|
121
|
+
await sleep(250);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
throw new Error(
|
|
125
|
+
`Timed out waiting for capx-agent-runtime health after ${timeoutMs}ms. ${String(
|
|
126
|
+
lastError
|
|
127
|
+
)}`
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
// src/live-runtime/mappers.ts
|
|
133
|
+
function mapExecutionResult(result) {
|
|
134
|
+
if (!result) {
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
return {
|
|
138
|
+
success: result.success ?? false,
|
|
139
|
+
reward: result.reward ?? void 0,
|
|
140
|
+
terminated: result.terminated,
|
|
141
|
+
truncated: result.truncated,
|
|
142
|
+
sandboxRc: result.sandboxRc ?? result.sandbox_rc,
|
|
143
|
+
stdout: result.stdout,
|
|
144
|
+
stderr: result.stderr,
|
|
145
|
+
taskCompleted: result.taskCompleted ?? result.task_completed,
|
|
146
|
+
codePath: result.codePath ?? result.code_path,
|
|
147
|
+
elapsedMs: result.elapsedMs ?? result.elapsed_ms,
|
|
148
|
+
error: result.error,
|
|
149
|
+
diagnostics: result.diagnostics
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
function mapObservation(observation) {
|
|
153
|
+
if (!observation) {
|
|
154
|
+
return {};
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
trial: observation.trial,
|
|
158
|
+
taskPrompt: observation.taskPrompt ?? observation.task_prompt ?? void 0,
|
|
159
|
+
multiTurnPrompt: observation.multiTurnPrompt ?? observation.multi_turn_prompt ?? void 0,
|
|
160
|
+
fullPrompt: observation.fullPrompt ?? observation.full_prompt,
|
|
161
|
+
resetInfo: observation.resetInfo ?? observation.reset_info,
|
|
162
|
+
lastStep: mapExecutionResult(observation.lastStep ?? observation.last_step),
|
|
163
|
+
runtimeConfig: observation.runtimeConfig ?? observation.runtime_config ?? void 0,
|
|
164
|
+
codeContext: mapCodeContext(
|
|
165
|
+
observation.codeContext ?? observation.code_context
|
|
166
|
+
)
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
function mapCodeContext(context) {
|
|
170
|
+
if (!context) {
|
|
171
|
+
return void 0;
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
policyLanguage: context.policyLanguage ?? context.policy_language,
|
|
175
|
+
executionModel: context.executionModel ?? context.execution_model,
|
|
176
|
+
submitVia: context.submitVia ?? context.submit_via,
|
|
177
|
+
affordances: context.affordances?.map(mapCodeAffordance),
|
|
178
|
+
skillLibrarySummary: context.skillLibrarySummary ?? context.skill_library_summary,
|
|
179
|
+
skillLibraryDocs: context.skillLibraryDocs ?? context.skill_library_docs
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
function mapCodeAffordance(affordance) {
|
|
183
|
+
return {
|
|
184
|
+
name: affordance.name,
|
|
185
|
+
description: affordance.description,
|
|
186
|
+
inputSchema: affordance.inputSchema ?? affordance.input_schema,
|
|
187
|
+
source: affordance.source,
|
|
188
|
+
symbol: affordance.symbol,
|
|
189
|
+
tags: affordance.tags,
|
|
190
|
+
metadata: affordance.metadata
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
function mapFrame(frame) {
|
|
194
|
+
if (!frame) {
|
|
195
|
+
return {};
|
|
196
|
+
}
|
|
197
|
+
return {
|
|
198
|
+
source: frame.source,
|
|
199
|
+
mimeType: frame.mimeType ?? frame.mime_type,
|
|
200
|
+
encoding: frame.encoding,
|
|
201
|
+
data: frame.data,
|
|
202
|
+
dataUrl: frame.dataUrl ?? frame.data_url,
|
|
203
|
+
width: frame.width ?? void 0,
|
|
204
|
+
height: frame.height ?? void 0,
|
|
205
|
+
timestamp: frame.timestamp
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
function mapRuntimeArtifact(artifact, baseUrl) {
|
|
209
|
+
const relativePath = artifact.path ?? "artifact";
|
|
210
|
+
const relativeUrl = artifact.url ?? "";
|
|
211
|
+
return {
|
|
212
|
+
id: `capx-runtime:${relativePath}`,
|
|
213
|
+
kind: mapArtifactKind(artifact.kind),
|
|
214
|
+
uri: relativeUrl.startsWith("http") ? relativeUrl : `${baseUrl}${relativeUrl}`,
|
|
215
|
+
label: relativePath,
|
|
216
|
+
metadata: {
|
|
217
|
+
capx: {
|
|
218
|
+
path: relativePath,
|
|
219
|
+
sizeBytes: artifact.sizeBytes ?? artifact.size_bytes,
|
|
220
|
+
modifiedTime: artifact.modifiedTime ?? artifact.modified_time,
|
|
221
|
+
kind: artifact.kind
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
function mapSkillExtraction(response) {
|
|
227
|
+
return {
|
|
228
|
+
path: response.path,
|
|
229
|
+
taskName: response.taskName ?? response.task_name,
|
|
230
|
+
newSkills: response.newSkills ?? response.new_skills ?? [],
|
|
231
|
+
total: response.total
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
function mapSkillInjection(response) {
|
|
235
|
+
return {
|
|
236
|
+
path: response.path,
|
|
237
|
+
injected: response.injected ?? 0,
|
|
238
|
+
promotedSkills: response.promotedSkills ?? response.promoted_skills ?? []
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
function mapArtifactKind(kind) {
|
|
242
|
+
if (kind === "image" || kind === "video" || kind === "json") {
|
|
243
|
+
return kind;
|
|
244
|
+
}
|
|
245
|
+
if (kind === "text") {
|
|
246
|
+
return "log";
|
|
247
|
+
}
|
|
248
|
+
return "other";
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// src/live-runtime/index.ts
|
|
252
|
+
var DEFAULT_OUTPUT_DIR = "outputs/agent-physical-capx-live";
|
|
253
|
+
var CapxLiveRuntime = class {
|
|
254
|
+
constructor(options, id = `capx-live-${randomUUID2()}`) {
|
|
255
|
+
this.options = options;
|
|
256
|
+
this.id = id;
|
|
257
|
+
this.outputDir = resolveOutputDir(options, DEFAULT_OUTPUT_DIR);
|
|
258
|
+
this.#client = new CapxRuntimeHttpClient(options);
|
|
259
|
+
}
|
|
260
|
+
id;
|
|
261
|
+
outputDir;
|
|
262
|
+
#client;
|
|
263
|
+
#sessionId;
|
|
264
|
+
#lastObservation;
|
|
265
|
+
get stdoutTail() {
|
|
266
|
+
return this.#client.stdoutTail;
|
|
267
|
+
}
|
|
268
|
+
get stderrTail() {
|
|
269
|
+
return this.#client.stderrTail;
|
|
270
|
+
}
|
|
271
|
+
get running() {
|
|
272
|
+
return this.#sessionId !== void 0 && this.#client.ready;
|
|
273
|
+
}
|
|
274
|
+
async start() {
|
|
275
|
+
if (this.#sessionId && this.#lastObservation) {
|
|
276
|
+
return this.#lastObservation;
|
|
277
|
+
}
|
|
278
|
+
try {
|
|
279
|
+
await this.#client.ensureStarted();
|
|
280
|
+
const response = await this.#client.request(
|
|
281
|
+
"POST",
|
|
282
|
+
"/sessions",
|
|
283
|
+
this.#sessionOptionsPayload()
|
|
284
|
+
);
|
|
285
|
+
const sessionId = response.sessionId ?? response.session_id;
|
|
286
|
+
if (!sessionId) {
|
|
287
|
+
throw new Error("capx-agent-runtime did not return a session id.");
|
|
288
|
+
}
|
|
289
|
+
this.#sessionId = sessionId;
|
|
290
|
+
this.#lastObservation = mapObservation(response.observation);
|
|
291
|
+
return this.#lastObservation;
|
|
292
|
+
} catch (error) {
|
|
293
|
+
await this.stop();
|
|
294
|
+
throw error;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
async observe() {
|
|
298
|
+
await this.start();
|
|
299
|
+
const response = await this.#client.request(
|
|
300
|
+
"GET",
|
|
301
|
+
`/sessions/${this.#requireSessionId()}/observe`,
|
|
302
|
+
void 0,
|
|
303
|
+
this.options.runtimeServerRequestTimeoutMs
|
|
304
|
+
);
|
|
305
|
+
const observation = mapObservation(response.observation);
|
|
306
|
+
this.#lastObservation = observation;
|
|
307
|
+
return observation;
|
|
308
|
+
}
|
|
309
|
+
async renderFrame(camera = "main") {
|
|
310
|
+
await this.start();
|
|
311
|
+
const response = await this.#client.request(
|
|
312
|
+
"GET",
|
|
313
|
+
`/sessions/${this.#requireSessionId()}/render?camera=${encodeURIComponent(camera)}`,
|
|
314
|
+
void 0,
|
|
315
|
+
this.options.runtimeServerRequestTimeoutMs
|
|
316
|
+
);
|
|
317
|
+
return mapFrame(response);
|
|
318
|
+
}
|
|
319
|
+
async turnHistory() {
|
|
320
|
+
await this.start();
|
|
321
|
+
const response = await this.#client.request(
|
|
322
|
+
"GET",
|
|
323
|
+
`/sessions/${this.#requireSessionId()}/turns`,
|
|
324
|
+
void 0,
|
|
325
|
+
this.options.runtimeServerRequestTimeoutMs
|
|
326
|
+
);
|
|
327
|
+
return response.turns ?? [];
|
|
328
|
+
}
|
|
329
|
+
async skillLibrary() {
|
|
330
|
+
await this.start();
|
|
331
|
+
return await this.#client.request(
|
|
332
|
+
"GET",
|
|
333
|
+
`/sessions/${this.#requireSessionId()}/skill-library`,
|
|
334
|
+
void 0,
|
|
335
|
+
this.options.runtimeServerRequestTimeoutMs
|
|
336
|
+
);
|
|
337
|
+
}
|
|
338
|
+
async extractSkills(options = {}) {
|
|
339
|
+
await this.start();
|
|
340
|
+
const response = await this.#client.request(
|
|
341
|
+
"POST",
|
|
342
|
+
`/sessions/${this.#requireSessionId()}/skill-library/extract`,
|
|
343
|
+
withoutUndefined({
|
|
344
|
+
code: options.code,
|
|
345
|
+
task_name: options.taskName
|
|
346
|
+
}),
|
|
347
|
+
this.options.runtimeServerRequestTimeoutMs
|
|
348
|
+
);
|
|
349
|
+
return mapSkillExtraction(response);
|
|
350
|
+
}
|
|
351
|
+
async injectSkillLibrary(minOccurrences = 2) {
|
|
352
|
+
await this.start();
|
|
353
|
+
const response = await this.#client.request(
|
|
354
|
+
"POST",
|
|
355
|
+
`/sessions/${this.#requireSessionId()}/skill-library/inject`,
|
|
356
|
+
{
|
|
357
|
+
min_occurrences: minOccurrences
|
|
358
|
+
},
|
|
359
|
+
this.options.runtimeServerRequestTimeoutMs
|
|
360
|
+
);
|
|
361
|
+
return mapSkillInjection(response);
|
|
362
|
+
}
|
|
363
|
+
async listArtifacts(limit = 50) {
|
|
364
|
+
await this.start();
|
|
365
|
+
const response = await this.#client.request(
|
|
366
|
+
"GET",
|
|
367
|
+
`/sessions/${this.#requireSessionId()}/artifacts`,
|
|
368
|
+
void 0,
|
|
369
|
+
this.options.runtimeServerRequestTimeoutMs
|
|
370
|
+
);
|
|
371
|
+
const artifacts = response.artifacts ?? [];
|
|
372
|
+
const baseUrl = this.#client.requireBaseUrl();
|
|
373
|
+
return artifacts.slice(0, limit).map((artifact) => mapRuntimeArtifact(artifact, baseUrl));
|
|
374
|
+
}
|
|
375
|
+
async reset(trial) {
|
|
376
|
+
await this.start();
|
|
377
|
+
const response = await this.#client.request(
|
|
378
|
+
"POST",
|
|
379
|
+
`/sessions/${this.#requireSessionId()}/reset`,
|
|
380
|
+
withoutUndefined({ trial }),
|
|
381
|
+
this.options.runtimeServerRequestTimeoutMs
|
|
382
|
+
);
|
|
383
|
+
const observation = mapObservation(response.observation);
|
|
384
|
+
this.#lastObservation = observation;
|
|
385
|
+
return observation;
|
|
386
|
+
}
|
|
387
|
+
async execute(request) {
|
|
388
|
+
const preflightFailure = this.#validateExecutionRequest(request);
|
|
389
|
+
if (preflightFailure) {
|
|
390
|
+
return preflightFailure;
|
|
391
|
+
}
|
|
392
|
+
if (request.dryRun) {
|
|
393
|
+
return dryRunTrace(request);
|
|
394
|
+
}
|
|
395
|
+
await this.start();
|
|
396
|
+
const startedAt = Date.now();
|
|
397
|
+
const traceId = `capx-live-${randomUUID2()}`;
|
|
398
|
+
const response = await this.#client.request(
|
|
399
|
+
"POST",
|
|
400
|
+
`/sessions/${this.#requireSessionId()}/execute-code`,
|
|
401
|
+
{ code: request.code },
|
|
402
|
+
request.timeoutMs ?? this.options.policyExecutionTimeoutMs ?? this.options.runtimeServerRequestTimeoutMs
|
|
403
|
+
);
|
|
404
|
+
const result = mapExecutionResult(response.result) ?? {
|
|
405
|
+
success: false,
|
|
406
|
+
stderr: "capx-agent-runtime returned no execution result."
|
|
407
|
+
};
|
|
408
|
+
this.#lastObservation = {
|
|
409
|
+
...this.#lastObservation,
|
|
410
|
+
lastStep: result
|
|
411
|
+
};
|
|
412
|
+
return {
|
|
413
|
+
id: traceId,
|
|
414
|
+
startedAt,
|
|
415
|
+
completedAt: Date.now(),
|
|
416
|
+
request,
|
|
417
|
+
stdout: result.stdout ?? "",
|
|
418
|
+
stderr: result.stderr ?? "",
|
|
419
|
+
artifacts: await this.#listExecutionArtifacts(),
|
|
420
|
+
outcome: {
|
|
421
|
+
success: result.success,
|
|
422
|
+
reward: result.reward,
|
|
423
|
+
taskCompleted: result.taskCompleted ?? void 0,
|
|
424
|
+
terminated: result.terminated,
|
|
425
|
+
truncated: result.truncated,
|
|
426
|
+
metadata: {
|
|
427
|
+
sandboxRc: result.sandboxRc,
|
|
428
|
+
elapsedMs: result.elapsedMs,
|
|
429
|
+
codePath: result.codePath,
|
|
430
|
+
diagnostics: result.diagnostics,
|
|
431
|
+
runtimeServer: "capx-agent-runtime"
|
|
432
|
+
}
|
|
433
|
+
},
|
|
434
|
+
metadata: {
|
|
435
|
+
capx: {
|
|
436
|
+
executionMode: "live-runtime",
|
|
437
|
+
runtimeServer: "capx-agent-runtime",
|
|
438
|
+
runtimeServerUrl: this.#client.baseUrl,
|
|
439
|
+
outputDir: this.outputDir
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
async stop() {
|
|
445
|
+
const sessionId = this.#sessionId;
|
|
446
|
+
this.#sessionId = void 0;
|
|
447
|
+
this.#lastObservation = void 0;
|
|
448
|
+
await this.#client.stop(sessionId);
|
|
449
|
+
}
|
|
450
|
+
#sessionOptionsPayload() {
|
|
451
|
+
return withoutUndefined({
|
|
452
|
+
trial: this.options.policyExecutionTrial,
|
|
453
|
+
output_dir: this.options.outputDir,
|
|
454
|
+
record_video: this.options.policyExecutionRecordVideo,
|
|
455
|
+
start_api_servers: this.options.policyExecutionStartApiServers,
|
|
456
|
+
skill_library_path: this.options.skillLibraryPath
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
#validateExecutionRequest(request) {
|
|
460
|
+
if (request.language && request.language !== "python") {
|
|
461
|
+
return failedTrace(
|
|
462
|
+
request,
|
|
463
|
+
`CaP-X live runtime only supports Python, got ${request.language}.`
|
|
464
|
+
);
|
|
465
|
+
}
|
|
466
|
+
if (this.options.physicalMode === "hardware" && !this.options.allowHardwarePolicyExecution) {
|
|
467
|
+
return failedTrace(
|
|
468
|
+
request,
|
|
469
|
+
"Hardware policy execution is blocked. Set allowHardwarePolicyExecution to opt in.",
|
|
470
|
+
{ physicalMode: this.options.physicalMode }
|
|
471
|
+
);
|
|
472
|
+
}
|
|
473
|
+
return void 0;
|
|
474
|
+
}
|
|
475
|
+
async #listExecutionArtifacts() {
|
|
476
|
+
try {
|
|
477
|
+
return await this.listArtifacts(50);
|
|
478
|
+
} catch {
|
|
479
|
+
return [];
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
#requireSessionId() {
|
|
483
|
+
if (!this.#sessionId) {
|
|
484
|
+
throw new Error("capx-agent-runtime session is not started.");
|
|
485
|
+
}
|
|
486
|
+
return this.#sessionId;
|
|
487
|
+
}
|
|
488
|
+
};
|
|
489
|
+
function dryRunTrace(request) {
|
|
490
|
+
const now = Date.now();
|
|
491
|
+
return {
|
|
492
|
+
id: `capx-live-${randomUUID2()}`,
|
|
493
|
+
startedAt: now,
|
|
494
|
+
completedAt: now,
|
|
495
|
+
request,
|
|
496
|
+
stdout: "Dry run accepted; code was not executed.",
|
|
497
|
+
outcome: {
|
|
498
|
+
success: true,
|
|
499
|
+
metadata: {
|
|
500
|
+
dryRun: true
|
|
501
|
+
}
|
|
502
|
+
},
|
|
503
|
+
metadata: {
|
|
504
|
+
capx: {
|
|
505
|
+
executionMode: "live-runtime",
|
|
506
|
+
runtimeServer: "capx-agent-runtime"
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// src/session.ts
|
|
513
|
+
var RUNTIME_DEFAULT_TASK_ID = "capx-agent-runtime-defaults";
|
|
514
|
+
var RUNTIME_DEFAULT_TASK_DESCRIPTION = "CaP-X config managed by capx-agent-runtime";
|
|
515
|
+
var CapxSession = class {
|
|
516
|
+
id;
|
|
517
|
+
options;
|
|
518
|
+
capabilities;
|
|
519
|
+
executeCode;
|
|
520
|
+
#status = "idle";
|
|
521
|
+
#startedAt;
|
|
522
|
+
#updatedAt = Date.now();
|
|
523
|
+
#message;
|
|
524
|
+
#liveRuntime;
|
|
525
|
+
constructor(options, id = `capx-${randomUUID3()}`) {
|
|
526
|
+
this.options = {
|
|
527
|
+
mode: "runtime",
|
|
528
|
+
physicalMode: "unknown",
|
|
529
|
+
policyExecutionMode: "live-runtime",
|
|
530
|
+
...options
|
|
531
|
+
};
|
|
532
|
+
this.id = id;
|
|
533
|
+
if (this.options.enablePolicyCodeExecution) {
|
|
534
|
+
this.capabilities = [
|
|
535
|
+
{
|
|
536
|
+
name: "capx_live_runtime_policy_code",
|
|
537
|
+
description: "Execute supplied Python policy code through capx-agent-runtime in a live CaP-X environment.",
|
|
538
|
+
safety: {
|
|
539
|
+
riskLevel: "dangerous",
|
|
540
|
+
requiresApproval: true,
|
|
541
|
+
requiresHumanPresence: this.options.physicalMode === "hardware",
|
|
542
|
+
allowedModes: this.options.physicalMode === "hardware" ? ["hardware"] : ["simulation", "hybrid", "unknown"]
|
|
543
|
+
},
|
|
544
|
+
metadata: {
|
|
545
|
+
capx: {
|
|
546
|
+
executionMode: "live-runtime",
|
|
547
|
+
activeProcessInjection: false,
|
|
548
|
+
activeRuntimeSession: true,
|
|
549
|
+
runtimeServer: "capx-agent-runtime",
|
|
550
|
+
loopOwner: "agent-core"
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
];
|
|
555
|
+
this.executeCode = (request) => this.#executePolicyCode(request);
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
#runtime() {
|
|
559
|
+
if (!this.#liveRuntime) {
|
|
560
|
+
this.#liveRuntime = new CapxLiveRuntime(
|
|
561
|
+
this.options,
|
|
562
|
+
`${this.id}-runtime`
|
|
563
|
+
);
|
|
564
|
+
}
|
|
565
|
+
return this.#liveRuntime;
|
|
566
|
+
}
|
|
567
|
+
async #ensureLiveRuntime() {
|
|
568
|
+
const runtime = this.#runtime();
|
|
569
|
+
await runtime.start();
|
|
570
|
+
return runtime;
|
|
571
|
+
}
|
|
572
|
+
async #executePolicyCode(request) {
|
|
573
|
+
this.#status = "running";
|
|
574
|
+
this.#updatedAt = Date.now();
|
|
575
|
+
try {
|
|
576
|
+
return await this.#runtime().execute(request);
|
|
577
|
+
} finally {
|
|
578
|
+
this.#status = "ready";
|
|
579
|
+
this.#updatedAt = Date.now();
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
getState() {
|
|
583
|
+
return {
|
|
584
|
+
sessionId: this.id,
|
|
585
|
+
status: this.#status,
|
|
586
|
+
task: this.options.task ?? {
|
|
587
|
+
id: RUNTIME_DEFAULT_TASK_ID,
|
|
588
|
+
description: RUNTIME_DEFAULT_TASK_DESCRIPTION,
|
|
589
|
+
environment: "capx",
|
|
590
|
+
mode: this.options.physicalMode
|
|
591
|
+
},
|
|
592
|
+
mode: this.options.physicalMode,
|
|
593
|
+
startedAt: this.#startedAt,
|
|
594
|
+
updatedAt: this.#updatedAt,
|
|
595
|
+
message: this.#message,
|
|
596
|
+
metadata: {
|
|
597
|
+
capx: {
|
|
598
|
+
mode: this.options.mode,
|
|
599
|
+
runtimeServerUrl: this.options.runtimeServerUrl,
|
|
600
|
+
policyExecution: {
|
|
601
|
+
enabled: this.options.enablePolicyCodeExecution ?? false,
|
|
602
|
+
mode: this.options.enablePolicyCodeExecution ? "live-runtime" : "none",
|
|
603
|
+
activeProcessInjection: false,
|
|
604
|
+
activeRuntimeSession: true,
|
|
605
|
+
runtimeServer: "capx-agent-runtime",
|
|
606
|
+
loopOwner: "agent-core"
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
};
|
|
611
|
+
}
|
|
612
|
+
async start(options) {
|
|
613
|
+
if (this.#status === "running" || this.#status === "ready") {
|
|
614
|
+
return this.getState();
|
|
615
|
+
}
|
|
616
|
+
this.#startedAt = Date.now();
|
|
617
|
+
this.#updatedAt = this.#startedAt;
|
|
618
|
+
this.#message = void 0;
|
|
619
|
+
if (options?.task) {
|
|
620
|
+
this.options.task = options.task;
|
|
621
|
+
}
|
|
622
|
+
this.#status = "starting";
|
|
623
|
+
await this.#ensureLiveRuntime();
|
|
624
|
+
this.#status = "ready";
|
|
625
|
+
this.#message = "Connected to capx-agent-runtime live session; agent-core owns the trial loop.";
|
|
626
|
+
this.#updatedAt = Date.now();
|
|
627
|
+
return this.getState();
|
|
628
|
+
}
|
|
629
|
+
async observe(options = {}) {
|
|
630
|
+
const items = [];
|
|
631
|
+
const state = this.getState();
|
|
632
|
+
if (this.#liveRuntime) {
|
|
633
|
+
const observation = await this.#liveRuntime.observe();
|
|
634
|
+
if (observation.taskPrompt) {
|
|
635
|
+
items.push({
|
|
636
|
+
kind: "text",
|
|
637
|
+
source: "capx:runtime:task-prompt",
|
|
638
|
+
text: observation.taskPrompt,
|
|
639
|
+
timestamp: Date.now()
|
|
640
|
+
});
|
|
641
|
+
}
|
|
642
|
+
if (observation.multiTurnPrompt) {
|
|
643
|
+
items.push({
|
|
644
|
+
kind: "text",
|
|
645
|
+
source: "capx:runtime:multi-turn-prompt",
|
|
646
|
+
text: observation.multiTurnPrompt,
|
|
647
|
+
timestamp: Date.now()
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
if (observation.fullPrompt) {
|
|
651
|
+
items.push({
|
|
652
|
+
kind: "text",
|
|
653
|
+
source: "capx:runtime:full-prompt",
|
|
654
|
+
text: JSON.stringify(observation.fullPrompt, null, 2),
|
|
655
|
+
timestamp: Date.now()
|
|
656
|
+
});
|
|
657
|
+
}
|
|
658
|
+
if (observation.runtimeConfig) {
|
|
659
|
+
items.push({
|
|
660
|
+
kind: "text",
|
|
661
|
+
source: "capx:runtime:config",
|
|
662
|
+
text: JSON.stringify(observation.runtimeConfig, null, 2),
|
|
663
|
+
timestamp: Date.now()
|
|
664
|
+
});
|
|
665
|
+
}
|
|
666
|
+
if (observation.codeContext) {
|
|
667
|
+
items.push({
|
|
668
|
+
kind: "text",
|
|
669
|
+
source: "capx:runtime:code-context",
|
|
670
|
+
text: JSON.stringify(observation.codeContext, null, 2),
|
|
671
|
+
timestamp: Date.now()
|
|
672
|
+
});
|
|
673
|
+
}
|
|
674
|
+
if (observation.lastStep) {
|
|
675
|
+
items.push({
|
|
676
|
+
kind: "text",
|
|
677
|
+
source: "capx:runtime:last-step",
|
|
678
|
+
text: JSON.stringify(observation.lastStep, null, 2),
|
|
679
|
+
timestamp: Date.now()
|
|
680
|
+
});
|
|
681
|
+
}
|
|
682
|
+
if (options.includeImages) {
|
|
683
|
+
try {
|
|
684
|
+
const frame = await this.#liveRuntime.renderFrame("main");
|
|
685
|
+
if (frame.dataUrl) {
|
|
686
|
+
items.push({
|
|
687
|
+
kind: "rgb",
|
|
688
|
+
source: "capx:runtime:main-camera",
|
|
689
|
+
dataUrl: frame.dataUrl,
|
|
690
|
+
width: frame.width,
|
|
691
|
+
height: frame.height,
|
|
692
|
+
mimeType: frame.mimeType,
|
|
693
|
+
frame: "camera",
|
|
694
|
+
timestamp: frame.timestamp ? frame.timestamp * 1e3 : Date.now()
|
|
695
|
+
});
|
|
696
|
+
}
|
|
697
|
+
} catch (error) {
|
|
698
|
+
items.push({
|
|
699
|
+
kind: "text",
|
|
700
|
+
source: "capx:runtime:render-error",
|
|
701
|
+
text: error instanceof Error ? error.message : String(error),
|
|
702
|
+
timestamp: Date.now()
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
} else {
|
|
707
|
+
items.push({
|
|
708
|
+
kind: "text",
|
|
709
|
+
source: "capx:runtime:session",
|
|
710
|
+
text: "No live CaP-X runtime session has started. The host application must call session.start() or createCapxAgent({ startSession: true }) before the agent can observe or solve the current CaP-X trial.",
|
|
711
|
+
timestamp: Date.now()
|
|
712
|
+
});
|
|
713
|
+
}
|
|
714
|
+
if (this.#liveRuntime?.stdoutTail) {
|
|
715
|
+
items.push({
|
|
716
|
+
kind: "text",
|
|
717
|
+
source: "capx:runtime:stdout",
|
|
718
|
+
text: this.#liveRuntime.stdoutTail,
|
|
719
|
+
timestamp: this.#updatedAt
|
|
720
|
+
});
|
|
721
|
+
}
|
|
722
|
+
if (this.#liveRuntime?.stderrTail) {
|
|
723
|
+
items.push({
|
|
724
|
+
kind: "text",
|
|
725
|
+
source: "capx:runtime:stderr",
|
|
726
|
+
text: this.#liveRuntime.stderrTail,
|
|
727
|
+
timestamp: this.#updatedAt
|
|
728
|
+
});
|
|
729
|
+
}
|
|
730
|
+
if (options.includeArtifacts && this.listArtifacts) {
|
|
731
|
+
const artifacts = await this.listArtifacts({ limit: options.maxItems });
|
|
732
|
+
for (const artifact of artifacts) {
|
|
733
|
+
items.push({
|
|
734
|
+
kind: "artifact",
|
|
735
|
+
source: "capx:outputs",
|
|
736
|
+
data: artifact,
|
|
737
|
+
timestamp: this.#updatedAt
|
|
738
|
+
});
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
const maxItems = options.maxItems ?? items.length;
|
|
742
|
+
const selected = items.slice(0, maxItems);
|
|
743
|
+
return {
|
|
744
|
+
sessionId: this.id,
|
|
745
|
+
status: state.status,
|
|
746
|
+
taskId: state.task?.id,
|
|
747
|
+
summary: selected.length === 0 ? "No CaP-X output has been captured yet." : `${selected.length} CaP-X observation item(s) captured.`,
|
|
748
|
+
items: selected,
|
|
749
|
+
metadata: state.metadata,
|
|
750
|
+
timestamp: Date.now()
|
|
751
|
+
};
|
|
752
|
+
}
|
|
753
|
+
async reset(options) {
|
|
754
|
+
if (options?.task) {
|
|
755
|
+
this.options.task = options.task;
|
|
756
|
+
}
|
|
757
|
+
this.#status = "starting";
|
|
758
|
+
const runtime = await this.#ensureLiveRuntime();
|
|
759
|
+
await runtime.reset(this.options.policyExecutionTrial);
|
|
760
|
+
this.#status = "ready";
|
|
761
|
+
this.#message = "Reset capx-agent-runtime live session.";
|
|
762
|
+
this.#updatedAt = Date.now();
|
|
763
|
+
return this.getState();
|
|
764
|
+
}
|
|
765
|
+
async listArtifacts(options = {}) {
|
|
766
|
+
const runtime = await this.#ensureLiveRuntime();
|
|
767
|
+
const artifacts = await runtime.listArtifacts(options.limit ?? 50);
|
|
768
|
+
return options.kind ? artifacts.filter((artifact) => artifact.kind === options.kind) : artifacts;
|
|
769
|
+
}
|
|
770
|
+
async getRuntimeTurnHistory() {
|
|
771
|
+
if (!this.#liveRuntime) {
|
|
772
|
+
return [];
|
|
773
|
+
}
|
|
774
|
+
return this.#liveRuntime.turnHistory();
|
|
775
|
+
}
|
|
776
|
+
async getRuntimeSkillLibrary() {
|
|
777
|
+
if (!this.#liveRuntime) {
|
|
778
|
+
return {
|
|
779
|
+
summary: "No live CaP-X runtime session has started.",
|
|
780
|
+
total: 0,
|
|
781
|
+
promoted: 0,
|
|
782
|
+
skills: []
|
|
783
|
+
};
|
|
784
|
+
}
|
|
785
|
+
return this.#liveRuntime.skillLibrary();
|
|
786
|
+
}
|
|
787
|
+
async extractRuntimeSkills(options = {}) {
|
|
788
|
+
const runtime = await this.#ensureLiveRuntime();
|
|
789
|
+
return runtime.extractSkills(options);
|
|
790
|
+
}
|
|
791
|
+
async injectRuntimeSkillLibrary(minOccurrences = 2) {
|
|
792
|
+
const runtime = await this.#ensureLiveRuntime();
|
|
793
|
+
return runtime.injectSkillLibrary(minOccurrences);
|
|
794
|
+
}
|
|
795
|
+
async stop(reason) {
|
|
796
|
+
this.#message = reason ?? "Stop requested.";
|
|
797
|
+
this.#updatedAt = Date.now();
|
|
798
|
+
if (this.#liveRuntime) {
|
|
799
|
+
this.#status = "stopping";
|
|
800
|
+
await this.#liveRuntime.stop();
|
|
801
|
+
this.#liveRuntime = void 0;
|
|
802
|
+
this.#status = "stopped";
|
|
803
|
+
this.#updatedAt = Date.now();
|
|
804
|
+
return this.getState();
|
|
805
|
+
}
|
|
806
|
+
this.#status = "stopped";
|
|
807
|
+
return this.getState();
|
|
808
|
+
}
|
|
809
|
+
};
|
|
810
|
+
function createCapxSession(options) {
|
|
811
|
+
return new CapxSession(options);
|
|
812
|
+
}
|
|
813
|
+
function createCapxPhysicalTools(session, options = {}) {
|
|
814
|
+
const tools = createPhysicalSessionTools(session, {
|
|
815
|
+
prefix: "capx",
|
|
816
|
+
...options
|
|
817
|
+
});
|
|
818
|
+
if (session instanceof CapxSession && session.options.policyExecutionMode === "live-runtime") {
|
|
819
|
+
tools.push(createCapxTurnHistoryTool(session, options));
|
|
820
|
+
}
|
|
821
|
+
return tools;
|
|
822
|
+
}
|
|
823
|
+
function capxToolName(name, options) {
|
|
824
|
+
return `${options?.prefix ?? "capx"}_${name}`;
|
|
825
|
+
}
|
|
826
|
+
function createCapxTurnHistoryTool(session, options) {
|
|
827
|
+
return Tool.define(
|
|
828
|
+
capxToolName("turn_history", options),
|
|
829
|
+
{
|
|
830
|
+
description: "List policy-code turns executed through the live CaP-X runtime session.",
|
|
831
|
+
parameters: z.object({}),
|
|
832
|
+
capabilities: {
|
|
833
|
+
parallelSafe: true,
|
|
834
|
+
readOnly: true,
|
|
835
|
+
riskLevel: "safe"
|
|
836
|
+
},
|
|
837
|
+
execute: async () => {
|
|
838
|
+
const turns = await session.getRuntimeTurnHistory();
|
|
839
|
+
return {
|
|
840
|
+
title: "CaP-X Turn History",
|
|
841
|
+
output: turns.length === 0 ? "No CaP-X runtime turns have executed yet." : `${turns.length} CaP-X runtime turn(s).`,
|
|
842
|
+
metadata: {
|
|
843
|
+
physical: {
|
|
844
|
+
sessionId: session.id
|
|
845
|
+
},
|
|
846
|
+
capx: {
|
|
847
|
+
turns
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
};
|
|
851
|
+
}
|
|
852
|
+
},
|
|
853
|
+
{
|
|
854
|
+
capabilitiesHint: {
|
|
855
|
+
parallelSafe: true,
|
|
856
|
+
readOnly: true,
|
|
857
|
+
riskLevel: "safe"
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
);
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
export {
|
|
864
|
+
CapxLiveRuntime,
|
|
865
|
+
CapxSession,
|
|
866
|
+
createCapxSession,
|
|
867
|
+
createCapxPhysicalTools
|
|
868
|
+
};
|
|
869
|
+
//# sourceMappingURL=chunk-MYO63CWO.js.map
|