@aexhq/sdk 0.20.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_contracts/index.d.ts +1 -0
- package/dist/_contracts/index.js +1 -0
- package/dist/_contracts/run-config.d.ts +23 -0
- package/dist/_contracts/run-config.js +44 -2
- package/dist/_contracts/run-custody.js +94 -3
- package/dist/_contracts/run-trace.d.ts +105 -0
- package/dist/_contracts/run-trace.js +174 -0
- package/dist/_contracts/runtime-manifest.d.ts +24 -0
- package/dist/_contracts/runtime-manifest.js +16 -1
- package/dist/_contracts/runtime-types.d.ts +17 -0
- package/dist/_contracts/submission.js +2 -3
- package/dist/cli.mjs +99 -5
- package/dist/cli.mjs.sha256 +1 -1
- package/dist/client.js +6 -12
- package/dist/client.js.map +1 -1
- package/dist/file.d.ts +25 -10
- package/dist/file.js +87 -26
- package/dist/file.js.map +1 -1
- package/dist/index.d.ts +2 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/docs/quickstart.md +18 -1
- package/docs/vision-skills.md +159 -0
- package/package.json +2 -2
|
@@ -10,6 +10,7 @@ export * from "./event-envelope.js";
|
|
|
10
10
|
export * from "./connection-ticket.js";
|
|
11
11
|
export * from "./event-stream-client.js";
|
|
12
12
|
export * from "./run-unit.js";
|
|
13
|
+
export * from "./run-trace.js";
|
|
13
14
|
export * from "./runtime-manifest.js";
|
|
14
15
|
export * from "./runtime-security-profile.js";
|
|
15
16
|
export * from "./run-record.js";
|
package/dist/_contracts/index.js
CHANGED
|
@@ -10,6 +10,7 @@ export * from "./event-envelope.js";
|
|
|
10
10
|
export * from "./connection-ticket.js";
|
|
11
11
|
export * from "./event-stream-client.js";
|
|
12
12
|
export * from "./run-unit.js";
|
|
13
|
+
export * from "./run-trace.js";
|
|
13
14
|
export * from "./runtime-manifest.js";
|
|
14
15
|
export * from "./runtime-security-profile.js";
|
|
15
16
|
export * from "./run-record.js";
|
|
@@ -106,6 +106,29 @@ export type AgentsMdRef = AssetRef;
|
|
|
106
106
|
export declare function isAgentsMdAssetRef(ref: AgentsMdRef): ref is AssetRef;
|
|
107
107
|
export type FileRef = AssetRef;
|
|
108
108
|
export declare function isFileAssetRef(ref: FileRef): ref is AssetRef;
|
|
109
|
+
/**
|
|
110
|
+
* The default mount DIRECTORY a `File` unzips into when the caller does not set
|
|
111
|
+
* `mountPath`. `/workspace` is also the agent's default working directory, so a
|
|
112
|
+
* file handed with no `mountPath` lands directly in the agent's cwd (e.g.
|
|
113
|
+
* `/workspace/source-video-subtitles.srt`).
|
|
114
|
+
*/
|
|
115
|
+
export declare const DEFAULT_FILE_MOUNT_PATH = "/workspace";
|
|
116
|
+
/**
|
|
117
|
+
* A `mountPath` is an ABSOLUTE container directory under the workspace. It must
|
|
118
|
+
* start with `/`, contain no `..`/`.` traversal or NUL/backslash, and stay
|
|
119
|
+
* within {@link MOUNT_PATH_MAX_LENGTH}. The managed runtime rebases it under the
|
|
120
|
+
* workspace root, so a path outside `/workspace` is clamped there — the pattern
|
|
121
|
+
* just rejects obviously-malformed input at the SDK/BFF boundary. A trailing
|
|
122
|
+
* slash is allowed (it is a directory) but not required.
|
|
123
|
+
*/
|
|
124
|
+
export declare const MOUNT_PATH_PATTERN: RegExp;
|
|
125
|
+
export declare const MOUNT_PATH_MAX_LENGTH = 512;
|
|
126
|
+
/**
|
|
127
|
+
* Validate a `File.mountPath` (an absolute container directory). Shared by the
|
|
128
|
+
* SDK `File` builders and the BFF asset-ref parser so both reject the same
|
|
129
|
+
* malformed input. Throws with `field` context on failure.
|
|
130
|
+
*/
|
|
131
|
+
export declare function assertValidMountPath(value: string, field: string): void;
|
|
109
132
|
/**
|
|
110
133
|
* Parse a `SkillRef` from untrusted input. Used by the BFF run parser
|
|
111
134
|
* and by the operations module when deserialising API responses. Only
|
|
@@ -99,6 +99,45 @@ export function isAgentsMdAssetRef(ref) {
|
|
|
99
99
|
export function isFileAssetRef(ref) {
|
|
100
100
|
return ref.kind === "asset";
|
|
101
101
|
}
|
|
102
|
+
/**
|
|
103
|
+
* The default mount DIRECTORY a `File` unzips into when the caller does not set
|
|
104
|
+
* `mountPath`. `/workspace` is also the agent's default working directory, so a
|
|
105
|
+
* file handed with no `mountPath` lands directly in the agent's cwd (e.g.
|
|
106
|
+
* `/workspace/source-video-subtitles.srt`).
|
|
107
|
+
*/
|
|
108
|
+
export const DEFAULT_FILE_MOUNT_PATH = "/workspace";
|
|
109
|
+
/**
|
|
110
|
+
* A `mountPath` is an ABSOLUTE container directory under the workspace. It must
|
|
111
|
+
* start with `/`, contain no `..`/`.` traversal or NUL/backslash, and stay
|
|
112
|
+
* within {@link MOUNT_PATH_MAX_LENGTH}. The managed runtime rebases it under the
|
|
113
|
+
* workspace root, so a path outside `/workspace` is clamped there — the pattern
|
|
114
|
+
* just rejects obviously-malformed input at the SDK/BFF boundary. A trailing
|
|
115
|
+
* slash is allowed (it is a directory) but not required.
|
|
116
|
+
*/
|
|
117
|
+
export const MOUNT_PATH_PATTERN = /^\/(?:[^/\0\\]+\/?)*$/;
|
|
118
|
+
export const MOUNT_PATH_MAX_LENGTH = 512;
|
|
119
|
+
/**
|
|
120
|
+
* Validate a `File.mountPath` (an absolute container directory). Shared by the
|
|
121
|
+
* SDK `File` builders and the BFF asset-ref parser so both reject the same
|
|
122
|
+
* malformed input. Throws with `field` context on failure.
|
|
123
|
+
*/
|
|
124
|
+
export function assertValidMountPath(value, field) {
|
|
125
|
+
if (value.length === 0 || value.length > MOUNT_PATH_MAX_LENGTH) {
|
|
126
|
+
throw new Error(`${field} must be 1..${MOUNT_PATH_MAX_LENGTH} chars`);
|
|
127
|
+
}
|
|
128
|
+
if (!value.startsWith("/")) {
|
|
129
|
+
throw new Error(`${field} must be an absolute path starting with '/'`);
|
|
130
|
+
}
|
|
131
|
+
if (value.includes("\0") || value.includes("\\")) {
|
|
132
|
+
throw new Error(`${field} must not contain NUL or backslash`);
|
|
133
|
+
}
|
|
134
|
+
if (value.split("/").some((seg) => seg === "..")) {
|
|
135
|
+
throw new Error(`${field} must not contain '..' traversal segments`);
|
|
136
|
+
}
|
|
137
|
+
if (!MOUNT_PATH_PATTERN.test(value)) {
|
|
138
|
+
throw new Error(`${field} must match ${MOUNT_PATH_PATTERN.source}`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
102
141
|
/**
|
|
103
142
|
* Parse a `SkillRef` from untrusted input. Used by the BFF run parser
|
|
104
143
|
* and by the operations module when deserialising API responses. Only
|
|
@@ -162,8 +201,11 @@ export function parseAssetRefFields(record, path) {
|
|
|
162
201
|
throw new Error(`${path}.name must be a non-empty string (<= 128 chars)`);
|
|
163
202
|
}
|
|
164
203
|
const mountPath = record.mountPath;
|
|
165
|
-
if (mountPath !== undefined
|
|
166
|
-
|
|
204
|
+
if (mountPath !== undefined) {
|
|
205
|
+
if (typeof mountPath !== "string") {
|
|
206
|
+
throw new Error(`${path}.mountPath, when provided, must be a string`);
|
|
207
|
+
}
|
|
208
|
+
assertValidMountPath(mountPath, `${path}.mountPath`);
|
|
167
209
|
}
|
|
168
210
|
return {
|
|
169
211
|
kind: "asset",
|
|
@@ -397,7 +397,7 @@ function visitCustodyValue(input, path, findings) {
|
|
|
397
397
|
}
|
|
398
398
|
function scanStringValue(value, path, findings) {
|
|
399
399
|
for (const pattern of forbiddenStringPatterns) {
|
|
400
|
-
if (pattern
|
|
400
|
+
if (matchesForbiddenPattern(pattern, value)) {
|
|
401
401
|
findings.push(Object.freeze({
|
|
402
402
|
path,
|
|
403
403
|
reason: pattern.reason,
|
|
@@ -406,11 +406,39 @@ function scanStringValue(value, path, findings) {
|
|
|
406
406
|
}
|
|
407
407
|
}
|
|
408
408
|
}
|
|
409
|
+
/**
|
|
410
|
+
* A pattern fires on a value if its `regex` matches AND — when the pattern
|
|
411
|
+
* carries an `accept` predicate — at least one matched run is accepted by it.
|
|
412
|
+
* The predicate lets a shape-matched run be VETOED per-match (the entropy
|
|
413
|
+
* catch-all uses it to skip content-addressed hashes and low-entropy slugs that
|
|
414
|
+
* its coarse regex would otherwise flag); shape-only patterns have no predicate.
|
|
415
|
+
*/
|
|
416
|
+
function matchesForbiddenPattern(pattern, value) {
|
|
417
|
+
if (!pattern.accept) {
|
|
418
|
+
return pattern.regex.test(value);
|
|
419
|
+
}
|
|
420
|
+
const scan = pattern.regex.global ? pattern.regex : new RegExp(pattern.regex.source, `${pattern.regex.flags}g`);
|
|
421
|
+
scan.lastIndex = 0;
|
|
422
|
+
let match;
|
|
423
|
+
while ((match = scan.exec(value)) !== null) {
|
|
424
|
+
if (pattern.accept(match[0])) {
|
|
425
|
+
return true;
|
|
426
|
+
}
|
|
427
|
+
if (match.index === scan.lastIndex) {
|
|
428
|
+
scan.lastIndex++;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return false;
|
|
432
|
+
}
|
|
409
433
|
const forbiddenStringPatterns = Object.freeze([
|
|
410
434
|
{ reason: "bearer_token", regex: /\bBearer\s+[A-Za-z0-9._~+/=-]{8,}/i },
|
|
411
435
|
{
|
|
412
436
|
reason: "provider_key",
|
|
413
|
-
|
|
437
|
+
// Prefixed provider keys (`sk-…`, Slack `xox*-…`, Google `AIza…`). The bare
|
|
438
|
+
// `sk-` body is intentionally generic so an unrecognised vendor's `sk-` key
|
|
439
|
+
// (e.g. DeepSeek `sk-<hex>`, OpenRouter `sk-or-…`) is still caught by shape,
|
|
440
|
+
// not left to the narrowed entropy catch-all below.
|
|
441
|
+
regex: /\b(?:sk-[A-Za-z0-9_-]{16,}|xox[baprs]-[A-Za-z0-9-]{8,}|AIza[A-Za-z0-9_-]{8,})/i
|
|
414
442
|
},
|
|
415
443
|
{ reason: "signed_url", regex: /[?&](?:X-Amz-Signature|X-Amz-Credential|X-Amz-Algorithm|AWSAccessKeyId)=/i },
|
|
416
444
|
{ reason: "object_store_key", regex: /(^|[\s"'`])(?:runs|assets)\/[^?<#\s"'`]+/i },
|
|
@@ -419,8 +447,71 @@ const forbiddenStringPatterns = Object.freeze([
|
|
|
419
447
|
reason: "private_resource_handle",
|
|
420
448
|
regex: /\b(?:machine|session|agent|file|skill|env|resource|handle|token_hash|bearer_hash)[_:-][A-Za-z0-9][A-Za-z0-9_-]{7,}\b/i
|
|
421
449
|
},
|
|
422
|
-
{
|
|
450
|
+
{
|
|
451
|
+
reason: "high_entropy_token",
|
|
452
|
+
// Catch-all for an unrecognised opaque secret blob. The candidate run
|
|
453
|
+
// EXCLUDES `_` (so `SCREAMING_SNAKE` env names and `slug_with_words` URL
|
|
454
|
+
// segments split instead of fusing into a phantom 40-char run — the live
|
|
455
|
+
// false positive was `ted_season_2_peacock_official_discussion_thread` in
|
|
456
|
+
// web-search result text and a fetched URL), and the `accept` predicate
|
|
457
|
+
// vetoes content-addressed hashes (md5/sha1/sha256 digests — the platform's
|
|
458
|
+
// OWN asset filenames) and low-entropy / single-class runs so only genuine
|
|
459
|
+
// opaque secrets remain. Slash-bearing secrets (signed URLs, connection
|
|
460
|
+
// strings, `Bearer …`) are covered by the named patterns above.
|
|
461
|
+
regex: /\b[A-Za-z0-9-]{40,}\b/,
|
|
462
|
+
accept: isHighEntropySecretRun
|
|
463
|
+
}
|
|
423
464
|
]);
|
|
465
|
+
/** A content-addressed hash (md5/sha1/sha256 hex digest) — the platform's own
|
|
466
|
+
* asset filenames and content references. Exempt from the entropy catch-all so
|
|
467
|
+
* a captured output named after its sha256 (or a hash echoed in tool-result
|
|
468
|
+
* text) is not misclassified as a leaked secret. */
|
|
469
|
+
const CONTENT_HASH_RUN = /^(?:[0-9a-f]{32}|[0-9a-f]{40}|[0-9a-f]{64})$/i;
|
|
470
|
+
/**
|
|
471
|
+
* Decide whether a coarse `[A-Za-z0-9-]{40,}` run is a genuine opaque secret.
|
|
472
|
+
* Rejects content-addressed hashes, then requires both character-class
|
|
473
|
+
* diversity (≥2 of lower/upper/digit) and high Shannon entropy — the property
|
|
474
|
+
* that separates an opaque key blob from a long dictionary-ish identifier. A
|
|
475
|
+
* real prefixless secret (base64url/alnum-mixed) clears both gates; a hash, a
|
|
476
|
+
* hyphenated slug, or a single-class run does not.
|
|
477
|
+
*/
|
|
478
|
+
function isHighEntropySecretRun(run) {
|
|
479
|
+
if (CONTENT_HASH_RUN.test(run)) {
|
|
480
|
+
return false;
|
|
481
|
+
}
|
|
482
|
+
if (!/[A-Za-z]/.test(run) || !/\d/.test(run)) {
|
|
483
|
+
return false;
|
|
484
|
+
}
|
|
485
|
+
if (highEntropyCharClassCount(run) < 2) {
|
|
486
|
+
return false;
|
|
487
|
+
}
|
|
488
|
+
return highEntropyShannonBits(run) >= 3.0;
|
|
489
|
+
}
|
|
490
|
+
function highEntropyCharClassCount(value) {
|
|
491
|
+
let count = 0;
|
|
492
|
+
if (/[a-z]/.test(value))
|
|
493
|
+
count++;
|
|
494
|
+
if (/[A-Z]/.test(value))
|
|
495
|
+
count++;
|
|
496
|
+
if (/[0-9]/.test(value))
|
|
497
|
+
count++;
|
|
498
|
+
return count;
|
|
499
|
+
}
|
|
500
|
+
function highEntropyShannonBits(value) {
|
|
501
|
+
if (value.length === 0) {
|
|
502
|
+
return 0;
|
|
503
|
+
}
|
|
504
|
+
const counts = new Map();
|
|
505
|
+
for (const char of value) {
|
|
506
|
+
counts.set(char, (counts.get(char) ?? 0) + 1);
|
|
507
|
+
}
|
|
508
|
+
let bits = 0;
|
|
509
|
+
for (const count of counts.values()) {
|
|
510
|
+
const p = count / value.length;
|
|
511
|
+
bits -= p * Math.log2(p);
|
|
512
|
+
}
|
|
513
|
+
return bits;
|
|
514
|
+
}
|
|
424
515
|
function isForbiddenCustodyFieldName(key) {
|
|
425
516
|
return /^(apiKey|secretValue|bearerHash|signedUrl|objectStoreKey|objectKey|vaultId|providerResponseBody|responseBody|privateResourceHandle|resourceHandle|rawBody)$/i.test(key);
|
|
426
517
|
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed decoders for a `listEvents(runId)` stream.
|
|
3
|
+
*
|
|
4
|
+
* `listEvents` returns the loose {@link RunEvent} wire shape: `type` at the top
|
|
5
|
+
* level, but the tool name at `data.name`, the call args at `data.arguments`,
|
|
6
|
+
* assistant text at `data.text`, and — the awkward part — a `TOOL_CALL_RESULT`
|
|
7
|
+
* carries no name, so a consumer must correlate each result back to its
|
|
8
|
+
* `TOOL_CALL_START` by `data.id` (the tool-call id). Every consumer ended up
|
|
9
|
+
* re-implementing that correlation; these helpers do it once, typed and pure.
|
|
10
|
+
*
|
|
11
|
+
* They are total and side-effect-free: pass the array `listEvents` returns and
|
|
12
|
+
* get back structured traces. Unknown event types are ignored (forward-compat),
|
|
13
|
+
* a result with no matching start surfaces as an orphan (never dropped silently),
|
|
14
|
+
* and timing falls back gracefully when a `recordedAt` is absent.
|
|
15
|
+
*
|
|
16
|
+
* Pairs with {@link Run.usage}: while a run executes `Run.usage` is empty — the
|
|
17
|
+
* per-turn token counts live ONLY in the `aex.usage` CUSTOM events — so
|
|
18
|
+
* {@link summarizeRunUsage} reconstructs the running total from the same stream.
|
|
19
|
+
*/
|
|
20
|
+
import type { UsageSummary } from "./runtime-types.js";
|
|
21
|
+
/**
|
|
22
|
+
* One decoded tool call: the `TOOL_CALL_START` and its correlated
|
|
23
|
+
* `TOOL_CALL_RESULT` (when present), with timing. `result` is undefined while a
|
|
24
|
+
* call is still in flight (the start has arrived but not the result), so a
|
|
25
|
+
* mid-run decode shows in-progress calls honestly rather than dropping them.
|
|
26
|
+
*/
|
|
27
|
+
export interface ToolCallTrace {
|
|
28
|
+
/** The tool-call id (`data.id`) that pairs the start with its result. */
|
|
29
|
+
readonly id: string;
|
|
30
|
+
/** The tool name (from the `TOOL_CALL_START`). */
|
|
31
|
+
readonly name: string;
|
|
32
|
+
/** The call arguments (`data.arguments`), or `{}` when absent. */
|
|
33
|
+
readonly args: Readonly<Record<string, unknown>>;
|
|
34
|
+
/** The id of the assistant message that issued the call, when present. */
|
|
35
|
+
readonly messageId?: string;
|
|
36
|
+
/** Event sequence of the `TOOL_CALL_START` (ordering within the run). */
|
|
37
|
+
readonly startSeq?: number;
|
|
38
|
+
/** ISO-8601 time of the `TOOL_CALL_START`, when the event carried one. */
|
|
39
|
+
readonly startedAt?: string;
|
|
40
|
+
/** The correlated result; undefined while the call is still in flight. */
|
|
41
|
+
readonly result?: ToolCallResult;
|
|
42
|
+
/** Result wall-clock minus start wall-clock (ms); undefined if either time is missing. */
|
|
43
|
+
readonly durationMs?: number;
|
|
44
|
+
}
|
|
45
|
+
/** The result half of a {@link ToolCallTrace}, decoded from `TOOL_CALL_RESULT`. */
|
|
46
|
+
export interface ToolCallResult {
|
|
47
|
+
/** True when the tool reported an error (`data.isError`). */
|
|
48
|
+
readonly isError: boolean;
|
|
49
|
+
/** The tool's result content, passed through verbatim (`data.content`). */
|
|
50
|
+
readonly content: unknown;
|
|
51
|
+
/** Event sequence of the `TOOL_CALL_RESULT`. */
|
|
52
|
+
readonly seq?: number;
|
|
53
|
+
/** ISO-8601 time of the `TOOL_CALL_RESULT`, when the event carried one. */
|
|
54
|
+
readonly recordedAt?: string;
|
|
55
|
+
}
|
|
56
|
+
/** One assistant text block, decoded from a `TEXT_MESSAGE_CONTENT` event. */
|
|
57
|
+
export interface AssistantTextEntry {
|
|
58
|
+
readonly text: string;
|
|
59
|
+
readonly messageId?: string;
|
|
60
|
+
readonly seq?: number;
|
|
61
|
+
readonly recordedAt?: string;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* A decoded view of a run's event stream: the correlated tool calls, the
|
|
65
|
+
* aggregate token usage, and the assistant text — everything a consumer
|
|
66
|
+
* previously hand-decoded from `listEvents`.
|
|
67
|
+
*/
|
|
68
|
+
export interface RunTrace {
|
|
69
|
+
readonly toolCalls: readonly ToolCallTrace[];
|
|
70
|
+
readonly usage: UsageSummary;
|
|
71
|
+
readonly text: readonly AssistantTextEntry[];
|
|
72
|
+
}
|
|
73
|
+
/** The loose event shape these decoders read — the {@link RunEvent} subset they touch. */
|
|
74
|
+
interface TraceEvent {
|
|
75
|
+
readonly type: string;
|
|
76
|
+
readonly seq?: number;
|
|
77
|
+
readonly recordedAt?: string;
|
|
78
|
+
readonly data?: unknown;
|
|
79
|
+
readonly [key: string]: unknown;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Decode a `listEvents` stream into correlated tool-call traces, in start order.
|
|
83
|
+
*
|
|
84
|
+
* Each `TOOL_CALL_START` opens a trace keyed by `data.id`; the matching
|
|
85
|
+
* `TOOL_CALL_RESULT` (same `data.id`) fills in `result` + `durationMs`. A result
|
|
86
|
+
* whose id never had a start is surfaced as an orphan trace (empty `name`, no
|
|
87
|
+
* `args`) rather than dropped, so a partial/mis-ordered stream never hides a
|
|
88
|
+
* result. Pure — no I/O, input is not mutated.
|
|
89
|
+
*/
|
|
90
|
+
export declare function decodeToolCalls(events: readonly TraceEvent[]): readonly ToolCallTrace[];
|
|
91
|
+
/**
|
|
92
|
+
* Sum the per-turn `aex.usage` CUSTOM events into one {@link UsageSummary} —
|
|
93
|
+
* the running token total a customer otherwise hand-sums while watching a run.
|
|
94
|
+
* `totalTokens` is the sum of input + output tokens. Pure.
|
|
95
|
+
*/
|
|
96
|
+
export declare function summarizeRunUsage(events: readonly TraceEvent[]): UsageSummary;
|
|
97
|
+
/** Decode the assistant text blocks (`TEXT_MESSAGE_CONTENT`) in stream order. Pure. */
|
|
98
|
+
export declare function decodeAssistantText(events: readonly TraceEvent[]): readonly AssistantTextEntry[];
|
|
99
|
+
/**
|
|
100
|
+
* Decode a whole `listEvents` stream in one pass: correlated tool calls,
|
|
101
|
+
* aggregate {@link UsageSummary}, and assistant text. Convenience over the three
|
|
102
|
+
* focused decoders; pure.
|
|
103
|
+
*/
|
|
104
|
+
export declare function summarizeRunTrace(events: readonly TraceEvent[]): RunTrace;
|
|
105
|
+
export {};
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed decoders for a `listEvents(runId)` stream.
|
|
3
|
+
*
|
|
4
|
+
* `listEvents` returns the loose {@link RunEvent} wire shape: `type` at the top
|
|
5
|
+
* level, but the tool name at `data.name`, the call args at `data.arguments`,
|
|
6
|
+
* assistant text at `data.text`, and — the awkward part — a `TOOL_CALL_RESULT`
|
|
7
|
+
* carries no name, so a consumer must correlate each result back to its
|
|
8
|
+
* `TOOL_CALL_START` by `data.id` (the tool-call id). Every consumer ended up
|
|
9
|
+
* re-implementing that correlation; these helpers do it once, typed and pure.
|
|
10
|
+
*
|
|
11
|
+
* They are total and side-effect-free: pass the array `listEvents` returns and
|
|
12
|
+
* get back structured traces. Unknown event types are ignored (forward-compat),
|
|
13
|
+
* a result with no matching start surfaces as an orphan (never dropped silently),
|
|
14
|
+
* and timing falls back gracefully when a `recordedAt` is absent.
|
|
15
|
+
*
|
|
16
|
+
* Pairs with {@link Run.usage}: while a run executes `Run.usage` is empty — the
|
|
17
|
+
* per-turn token counts live ONLY in the `aex.usage` CUSTOM events — so
|
|
18
|
+
* {@link summarizeRunUsage} reconstructs the running total from the same stream.
|
|
19
|
+
*/
|
|
20
|
+
const CUSTOM_USAGE_NAME = "aex.usage";
|
|
21
|
+
/** snake_case `aex.usage` field → the camelCase {@link UsageSummary} field. */
|
|
22
|
+
const USAGE_FIELD_MAP = {
|
|
23
|
+
input_tokens: "inputTokens",
|
|
24
|
+
output_tokens: "outputTokens",
|
|
25
|
+
cache_read_input_tokens: "cacheReadInputTokens",
|
|
26
|
+
cache_creation_input_tokens: "cacheCreationInputTokens"
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Decode a `listEvents` stream into correlated tool-call traces, in start order.
|
|
30
|
+
*
|
|
31
|
+
* Each `TOOL_CALL_START` opens a trace keyed by `data.id`; the matching
|
|
32
|
+
* `TOOL_CALL_RESULT` (same `data.id`) fills in `result` + `durationMs`. A result
|
|
33
|
+
* whose id never had a start is surfaced as an orphan trace (empty `name`, no
|
|
34
|
+
* `args`) rather than dropped, so a partial/mis-ordered stream never hides a
|
|
35
|
+
* result. Pure — no I/O, input is not mutated.
|
|
36
|
+
*/
|
|
37
|
+
export function decodeToolCalls(events) {
|
|
38
|
+
const order = [];
|
|
39
|
+
const byId = new Map();
|
|
40
|
+
for (const event of events) {
|
|
41
|
+
const data = asRecord(event.data);
|
|
42
|
+
if (event.type === "TOOL_CALL_START") {
|
|
43
|
+
const id = asString(data.id);
|
|
44
|
+
if (id === undefined)
|
|
45
|
+
continue;
|
|
46
|
+
const trace = {
|
|
47
|
+
id,
|
|
48
|
+
name: asString(data.name) ?? "",
|
|
49
|
+
args: asRecord(data.arguments)
|
|
50
|
+
};
|
|
51
|
+
const messageId = asString(data.messageId);
|
|
52
|
+
if (messageId !== undefined)
|
|
53
|
+
trace.messageId = messageId;
|
|
54
|
+
if (typeof event.seq === "number")
|
|
55
|
+
trace.startSeq = event.seq;
|
|
56
|
+
if (typeof event.recordedAt === "string")
|
|
57
|
+
trace.startedAt = event.recordedAt;
|
|
58
|
+
if (!byId.has(id))
|
|
59
|
+
order.push(id);
|
|
60
|
+
byId.set(id, trace);
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
if (event.type === "TOOL_CALL_RESULT") {
|
|
64
|
+
const id = asString(data.id);
|
|
65
|
+
if (id === undefined)
|
|
66
|
+
continue;
|
|
67
|
+
const result = {
|
|
68
|
+
isError: data.isError === true,
|
|
69
|
+
content: data.content ?? null
|
|
70
|
+
};
|
|
71
|
+
if (typeof event.seq === "number")
|
|
72
|
+
result.seq = event.seq;
|
|
73
|
+
if (typeof event.recordedAt === "string")
|
|
74
|
+
result.recordedAt = event.recordedAt;
|
|
75
|
+
let trace = byId.get(id);
|
|
76
|
+
if (trace === undefined) {
|
|
77
|
+
// Orphan result (no matching start) — surface it, never drop it.
|
|
78
|
+
trace = { id, name: "", args: {} };
|
|
79
|
+
order.push(id);
|
|
80
|
+
byId.set(id, trace);
|
|
81
|
+
}
|
|
82
|
+
trace.result = result;
|
|
83
|
+
const duration = durationMs(trace.startedAt, result.recordedAt);
|
|
84
|
+
if (duration !== undefined)
|
|
85
|
+
trace.durationMs = duration;
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return order.map((id) => byId.get(id));
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Sum the per-turn `aex.usage` CUSTOM events into one {@link UsageSummary} —
|
|
93
|
+
* the running token total a customer otherwise hand-sums while watching a run.
|
|
94
|
+
* `totalTokens` is the sum of input + output tokens. Pure.
|
|
95
|
+
*/
|
|
96
|
+
export function summarizeRunUsage(events) {
|
|
97
|
+
const totals = { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, cacheCreationInputTokens: 0 };
|
|
98
|
+
let seen = false;
|
|
99
|
+
for (const event of events) {
|
|
100
|
+
if (event.type !== "CUSTOM")
|
|
101
|
+
continue;
|
|
102
|
+
const data = asRecord(event.data);
|
|
103
|
+
if (asString(data.name) !== CUSTOM_USAGE_NAME)
|
|
104
|
+
continue;
|
|
105
|
+
const value = asRecord(data.value);
|
|
106
|
+
for (const [snake, camel] of Object.entries(USAGE_FIELD_MAP)) {
|
|
107
|
+
const n = value[snake];
|
|
108
|
+
if (typeof n === "number" && Number.isFinite(n)) {
|
|
109
|
+
totals[camel] += n;
|
|
110
|
+
seen = true;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
if (!seen)
|
|
115
|
+
return {};
|
|
116
|
+
return {
|
|
117
|
+
inputTokens: totals.inputTokens,
|
|
118
|
+
outputTokens: totals.outputTokens,
|
|
119
|
+
cacheReadInputTokens: totals.cacheReadInputTokens,
|
|
120
|
+
cacheCreationInputTokens: totals.cacheCreationInputTokens,
|
|
121
|
+
totalTokens: totals.inputTokens + totals.outputTokens
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
/** Decode the assistant text blocks (`TEXT_MESSAGE_CONTENT`) in stream order. Pure. */
|
|
125
|
+
export function decodeAssistantText(events) {
|
|
126
|
+
const out = [];
|
|
127
|
+
for (const event of events) {
|
|
128
|
+
if (event.type !== "TEXT_MESSAGE_CONTENT")
|
|
129
|
+
continue;
|
|
130
|
+
const data = asRecord(event.data);
|
|
131
|
+
const text = asString(data.text);
|
|
132
|
+
if (text === undefined)
|
|
133
|
+
continue;
|
|
134
|
+
const entry = { text };
|
|
135
|
+
const messageId = asString(data.messageId);
|
|
136
|
+
if (messageId !== undefined)
|
|
137
|
+
entry.messageId = messageId;
|
|
138
|
+
if (typeof event.seq === "number")
|
|
139
|
+
entry.seq = event.seq;
|
|
140
|
+
if (typeof event.recordedAt === "string")
|
|
141
|
+
entry.recordedAt = event.recordedAt;
|
|
142
|
+
out.push(entry);
|
|
143
|
+
}
|
|
144
|
+
return out;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Decode a whole `listEvents` stream in one pass: correlated tool calls,
|
|
148
|
+
* aggregate {@link UsageSummary}, and assistant text. Convenience over the three
|
|
149
|
+
* focused decoders; pure.
|
|
150
|
+
*/
|
|
151
|
+
export function summarizeRunTrace(events) {
|
|
152
|
+
return {
|
|
153
|
+
toolCalls: decodeToolCalls(events),
|
|
154
|
+
usage: summarizeRunUsage(events),
|
|
155
|
+
text: decodeAssistantText(events)
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
function asRecord(value) {
|
|
159
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
160
|
+
}
|
|
161
|
+
function asString(value) {
|
|
162
|
+
return typeof value === "string" ? value : undefined;
|
|
163
|
+
}
|
|
164
|
+
function durationMs(start, end) {
|
|
165
|
+
if (start === undefined || end === undefined)
|
|
166
|
+
return undefined;
|
|
167
|
+
const a = Date.parse(start);
|
|
168
|
+
const b = Date.parse(end);
|
|
169
|
+
if (!Number.isFinite(a) || !Number.isFinite(b))
|
|
170
|
+
return undefined;
|
|
171
|
+
const delta = b - a;
|
|
172
|
+
return delta >= 0 ? delta : undefined;
|
|
173
|
+
}
|
|
174
|
+
//# sourceMappingURL=run-trace.js.map
|
|
@@ -62,6 +62,21 @@ export interface RuntimeManifest {
|
|
|
62
62
|
* resolves against this exact map.
|
|
63
63
|
*/
|
|
64
64
|
readonly envVars: Readonly<Record<string, string>>;
|
|
65
|
+
/**
|
|
66
|
+
* The resolved in-container mount DIRECTORY of each submitted `File`, so the
|
|
67
|
+
* caller can learn where a handed file landed. `mountPath` is the validated
|
|
68
|
+
* directory the archive unzipped into (the SDK default is `/workspace`); a
|
|
69
|
+
* single file lands at `<mountPath>/<realFilename>`, a folder lands its entries
|
|
70
|
+
* under `<mountPath>/`. Empty when the run carried no files.
|
|
71
|
+
*/
|
|
72
|
+
readonly mountedFiles: readonly MountedFileManifest[];
|
|
73
|
+
}
|
|
74
|
+
/** One submitted `File`'s resolved mount directory (surfaced on the Run record). */
|
|
75
|
+
export interface MountedFileManifest {
|
|
76
|
+
/** The file's storage slug (`FileRef.name`). */
|
|
77
|
+
readonly name: string;
|
|
78
|
+
/** Absolute container directory the file unzipped into (defaults to `/workspace`). */
|
|
79
|
+
readonly mountPath: string;
|
|
65
80
|
}
|
|
66
81
|
/**
|
|
67
82
|
* Managed-runner container paths. Kept here so the BFF, worker, and
|
|
@@ -96,6 +111,15 @@ export interface BuildRuntimeManifestInput {
|
|
|
96
111
|
* (or a future bypass) can't poison the manifest.
|
|
97
112
|
*/
|
|
98
113
|
readonly customerEnvVars?: Readonly<Record<string, string>> | undefined;
|
|
114
|
+
/**
|
|
115
|
+
* The validated submission's `files` refs. Each resolves to one
|
|
116
|
+
* {@link MountedFileManifest} entry surfacing the resolved mount directory
|
|
117
|
+
* (the SDK default is `/workspace`). Absent / non-array ⇒ no mounted files.
|
|
118
|
+
*/
|
|
119
|
+
readonly files?: readonly {
|
|
120
|
+
readonly name?: unknown;
|
|
121
|
+
readonly mountPath?: unknown;
|
|
122
|
+
}[] | undefined;
|
|
99
123
|
}
|
|
100
124
|
/**
|
|
101
125
|
* Build the runtime manifest for a single submission. Pure function:
|
|
@@ -53,6 +53,13 @@ export function runtimePathsFor(provider) {
|
|
|
53
53
|
* need the submission parser.
|
|
54
54
|
*/
|
|
55
55
|
const AEX_PREFIX = "AEX_";
|
|
56
|
+
/**
|
|
57
|
+
* Default mount DIRECTORY for a `File` with no explicit `mountPath`. Mirrors
|
|
58
|
+
* `DEFAULT_FILE_MOUNT_PATH` in `run-config.ts`; duplicated here so this module
|
|
59
|
+
* stays self-contained (tree-shakeable) — the same reason {@link AEX_PREFIX}
|
|
60
|
+
* is inlined rather than imported from the submission parser.
|
|
61
|
+
*/
|
|
62
|
+
const DEFAULT_FILE_MOUNT_PATH = "/workspace";
|
|
56
63
|
/**
|
|
57
64
|
* Build the runtime manifest for a single submission. Pure function:
|
|
58
65
|
* same input → same output → safe to call from the BFF response path
|
|
@@ -82,6 +89,13 @@ export function buildRuntimeManifest(input) {
|
|
|
82
89
|
customerEnvVars[key] = value;
|
|
83
90
|
}
|
|
84
91
|
const envVars = Object.freeze({ ...aexEnvVars, ...customerEnvVars });
|
|
92
|
+
const mountedFiles = [];
|
|
93
|
+
for (const f of Array.isArray(input.files) ? input.files : []) {
|
|
94
|
+
if (typeof f.name !== "string" || f.name.length === 0)
|
|
95
|
+
continue;
|
|
96
|
+
const mountPath = typeof f.mountPath === "string" && f.mountPath.length > 0 ? f.mountPath : DEFAULT_FILE_MOUNT_PATH;
|
|
97
|
+
mountedFiles.push(Object.freeze({ name: f.name, mountPath }));
|
|
98
|
+
}
|
|
85
99
|
return Object.freeze({
|
|
86
100
|
provider: input.provider,
|
|
87
101
|
skillsRoot: paths.skillsRoot,
|
|
@@ -92,7 +106,8 @@ export function buildRuntimeManifest(input) {
|
|
|
92
106
|
readme: paths.readme,
|
|
93
107
|
runtimeJson: paths.runtimeJson,
|
|
94
108
|
runtimeEnv: paths.runtimeEnv,
|
|
95
|
-
envVars
|
|
109
|
+
envVars,
|
|
110
|
+
mountedFiles: Object.freeze(mountedFiles)
|
|
96
111
|
});
|
|
97
112
|
}
|
|
98
113
|
//# sourceMappingURL=runtime-manifest.js.map
|
|
@@ -16,7 +16,24 @@ export interface Run {
|
|
|
16
16
|
readonly createdAt?: string;
|
|
17
17
|
readonly updatedAt?: string;
|
|
18
18
|
readonly terminalAt?: string | null;
|
|
19
|
+
/**
|
|
20
|
+
* The run's EXECUTION start (ISO-8601) — when the agent actually began
|
|
21
|
+
* running, distinct from {@link createdAt} (submission/accept time). Present
|
|
22
|
+
* from the moment the run starts executing and throughout its live duration;
|
|
23
|
+
* absent before it starts and after the run's live object is torn down (a
|
|
24
|
+
* terminal run also carries {@link terminalAt} and {@link costTelemetry}
|
|
25
|
+
* durations).
|
|
26
|
+
*/
|
|
27
|
+
readonly startedAt?: string;
|
|
19
28
|
readonly errorMessage?: string | null;
|
|
29
|
+
/**
|
|
30
|
+
* Aggregate token usage. NOTE: mid-run this is NOT populated — detailed
|
|
31
|
+
* token counts live ONLY in the per-turn `aex.usage` CUSTOM events on the
|
|
32
|
+
* event stream until the run settles. To follow token/cost progress while a
|
|
33
|
+
* run executes, decode the event stream with `summarizeRunTrace(events)`
|
|
34
|
+
* (its `usage` totals the `aex.usage` events). Settled cost/usage rides
|
|
35
|
+
* {@link costTelemetry}.
|
|
36
|
+
*/
|
|
20
37
|
readonly usage?: UsageSummary;
|
|
21
38
|
readonly costTelemetry?: import("./run-cost.js").RunCostTelemetry;
|
|
22
39
|
readonly runtimeManifest?: import("./runtime-manifest.js").RuntimeManifest;
|
|
@@ -1526,9 +1526,8 @@ function parseFiles(input) {
|
|
|
1526
1526
|
throw new Error(`submission.files duplicate assetId: ${fields.assetId}`);
|
|
1527
1527
|
}
|
|
1528
1528
|
seenAssetId.add(fields.assetId);
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
}
|
|
1529
|
+
// mountPath is validated as an absolute container directory by
|
|
1530
|
+
// parseAssetRefFields → assertValidMountPath (above), so no extra check here.
|
|
1532
1531
|
return fields.mountPath !== undefined
|
|
1533
1532
|
? { kind: "asset", assetId: fields.assetId, name: fields.name, mountPath: fields.mountPath }
|
|
1534
1533
|
: { kind: "asset", assetId: fields.assetId, name: fields.name };
|