@tangle-network/agent-runtime 0.35.0 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-5QVVET72.js → chunk-3HMHSN22.js} +17 -1
- package/dist/chunk-3HMHSN22.js.map +1 -0
- package/dist/{chunk-TT3IHIQT.js → chunk-M65QJD35.js} +5 -161
- package/dist/chunk-M65QJD35.js.map +1 -0
- package/dist/{chunk-7ZECSZ3C.js → chunk-T3GJBKHA.js} +2 -2
- package/dist/{chunk-HSX6PFZR.js → chunk-V6GURW4W.js} +209 -1
- package/dist/chunk-V6GURW4W.js.map +1 -0
- package/dist/index.d.ts +86 -3
- package/dist/index.js +50 -2
- package/dist/index.js.map +1 -1
- package/dist/loops.d.ts +2 -3
- package/dist/mcp/bin.js +5 -4
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +81 -328
- package/dist/mcp/index.js +64 -7
- package/dist/mcp/index.js.map +1 -1
- package/dist/otel-export-DgFMwsVy.d.ts +552 -0
- package/dist/profiles.d.ts +1 -18
- package/dist/profiles.js +1 -1
- package/dist/{types-DrXVR2Fu.d.ts → types-CmTjKLyB.d.ts} +137 -3
- package/package.json +1 -1
- package/dist/chunk-5QVVET72.js.map +0 -1
- package/dist/chunk-HSX6PFZR.js.map +0 -1
- package/dist/chunk-TT3IHIQT.js.map +0 -1
- package/dist/otel-export-xgf4J6bo.d.ts +0 -191
- package/dist/runtime-run-B8VIiOhI.d.ts +0 -137
- /package/dist/{chunk-7ZECSZ3C.js.map → chunk-T3GJBKHA.js.map} +0 -0
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
import { O as OpenAIChatTool } from './types-CsCCryln.js';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* @experimental
|
|
5
|
-
*
|
|
6
|
-
* OpenAI Chat Completions `tools[]` projection of the 5 agent-runtime MCP
|
|
7
|
-
* delegation tools.
|
|
8
|
-
*
|
|
9
|
-
* Use when configuring `createOpenAICompatibleBackend({ tools: ... })` so the
|
|
10
|
-
* model can call `delegate_code`, `delegate_research`, `delegate_feedback`,
|
|
11
|
-
* `delegation_status`, and `delegation_history` through the OpenAI-compat
|
|
12
|
-
* transport (tcloud, OpenRouter, OpenAI direct, cli-bridge). The runtime
|
|
13
|
-
* surfaces tool calls as `tool_call` stream events — execution is the
|
|
14
|
-
* caller's responsibility (typically the parent sandbox runtime's MCP
|
|
15
|
-
* mount).
|
|
16
|
-
*
|
|
17
|
-
* Sandbox-SDK callers do NOT need this helper: the sandbox runtime mounts
|
|
18
|
-
* MCP servers natively and the in-sandbox harness discovers tools via the
|
|
19
|
-
* runtime, not via an OpenAI tools array.
|
|
20
|
-
*
|
|
21
|
-
* Tool name + description + JSON-schema are pulled from the canonical
|
|
22
|
-
* `DELEGATE_*` constants exported by `./tools/*` so the projection cannot
|
|
23
|
-
* drift from the server's own validators.
|
|
24
|
-
*/
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* @experimental
|
|
28
|
-
*
|
|
29
|
-
* Returns the 5 delegation tools projected into OpenAI Chat Completions
|
|
30
|
-
* `tools[]` shape. The order is stable: `delegate_code`,
|
|
31
|
-
* `delegate_research`, `delegate_feedback`, `delegation_status`,
|
|
32
|
-
* `delegation_history`.
|
|
33
|
-
*/
|
|
34
|
-
declare function mcpToolsForRuntimeMcp(): OpenAIChatTool[];
|
|
35
|
-
/**
|
|
36
|
-
* @experimental
|
|
37
|
-
*
|
|
38
|
-
* Subset filter — return only the projected tools whose `function.name`
|
|
39
|
-
* appears in `names`. Useful for curated mounts (e.g. only the queue-bound
|
|
40
|
-
* delegation tools, omitting `delegate_feedback`). Unknown names are
|
|
41
|
-
* silently ignored; pass an empty array to get an empty result.
|
|
42
|
-
*/
|
|
43
|
-
declare function mcpToolsForRuntimeMcpSubset(names: ReadonlyArray<string>): OpenAIChatTool[];
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* OTEL span exporter — streams LoopTraceEvents to an OTLP/HTTP collector.
|
|
47
|
-
*
|
|
48
|
-
* Reads OTEL_EXPORTER_OTLP_ENDPOINT + OTEL_EXPORTER_OTLP_HEADERS from env
|
|
49
|
-
* when no explicit config is given. Keeps the runtime dep-free from
|
|
50
|
-
* @opentelemetry/sdk-trace-base — minimal OTLP/JSON serializer.
|
|
51
|
-
*
|
|
52
|
-
* The exporter accepts both raw OtelSpan objects and LoopTraceEvents
|
|
53
|
-
* (which get converted to OTLP spans automatically).
|
|
54
|
-
*/
|
|
55
|
-
interface OtelExportConfig {
|
|
56
|
-
/** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */
|
|
57
|
-
endpoint?: string;
|
|
58
|
-
/** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */
|
|
59
|
-
headers?: Record<string, string>;
|
|
60
|
-
/** Batch size before flush. Default 64. */
|
|
61
|
-
batchSize?: number;
|
|
62
|
-
/** Flush interval ms. Default 5000. */
|
|
63
|
-
flushIntervalMs?: number;
|
|
64
|
-
/** Resource attributes stamped on every export. */
|
|
65
|
-
resourceAttributes?: Record<string, string | number | boolean>;
|
|
66
|
-
/** Service name. Default 'agent-runtime'. */
|
|
67
|
-
serviceName?: string;
|
|
68
|
-
}
|
|
69
|
-
interface OtelExporter {
|
|
70
|
-
/** Export a span. */
|
|
71
|
-
exportSpan(span: OtelSpan): void;
|
|
72
|
-
/** Force flush pending spans. */
|
|
73
|
-
flush(): Promise<void>;
|
|
74
|
-
/** Shutdown cleanly. */
|
|
75
|
-
shutdown(): Promise<void>;
|
|
76
|
-
}
|
|
77
|
-
interface OtelSpan {
|
|
78
|
-
traceId: string;
|
|
79
|
-
spanId: string;
|
|
80
|
-
parentSpanId?: string;
|
|
81
|
-
name: string;
|
|
82
|
-
kind?: number;
|
|
83
|
-
startTimeUnixNano: string;
|
|
84
|
-
endTimeUnixNano: string;
|
|
85
|
-
attributes?: OtelAttribute[];
|
|
86
|
-
status?: {
|
|
87
|
-
code: number;
|
|
88
|
-
message?: string;
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
interface OtelAttribute {
|
|
92
|
-
key: string;
|
|
93
|
-
value: {
|
|
94
|
-
stringValue?: string;
|
|
95
|
-
intValue?: string;
|
|
96
|
-
doubleValue?: number;
|
|
97
|
-
boolValue?: boolean;
|
|
98
|
-
};
|
|
99
|
-
}
|
|
100
|
-
/**
|
|
101
|
-
* Create an OTEL exporter. Returns undefined when no endpoint is configured.
|
|
102
|
-
*/
|
|
103
|
-
declare function createOtelExporter(config?: OtelExportConfig): OtelExporter | undefined;
|
|
104
|
-
/**
|
|
105
|
-
* Convert a LoopTraceEvent into an OtelSpan for export.
|
|
106
|
-
*/
|
|
107
|
-
declare function loopEventToOtelSpan(event: {
|
|
108
|
-
kind: string;
|
|
109
|
-
runId: string;
|
|
110
|
-
timestamp: number;
|
|
111
|
-
payload: object;
|
|
112
|
-
}, traceId: string, parentSpanId?: string): OtelSpan;
|
|
113
|
-
/**
|
|
114
|
-
* Build a nested, real-duration OTLP span tree for ONE loop run from its full
|
|
115
|
-
* ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
|
|
116
|
-
* zero-duration span per event), this reconstructs the topology hierarchy a
|
|
117
|
-
* GenAI trace viewer renders natively:
|
|
118
|
-
*
|
|
119
|
-
* loop (invoke_workflow)
|
|
120
|
-
* └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
|
|
121
|
-
* ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
|
|
122
|
-
* └─ …
|
|
123
|
-
*
|
|
124
|
-
* Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
|
|
125
|
-
* a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
|
|
126
|
-
* verdict / placement / cost (not yet standardized). Pure: feed it a buffered
|
|
127
|
-
* per-runId event array (e.g. flushed on `loop.ended`) and export the result.
|
|
128
|
-
*/
|
|
129
|
-
declare function buildLoopOtelSpans(events: ReadonlyArray<{
|
|
130
|
-
kind: string;
|
|
131
|
-
runId: string;
|
|
132
|
-
timestamp: number;
|
|
133
|
-
payload: object;
|
|
134
|
-
}>, traceId: string, rootParentSpanId?: string): OtelSpan[];
|
|
135
|
-
/** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
|
|
136
|
-
declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
|
|
137
|
-
interface EvalRunGeneration {
|
|
138
|
-
/** 0-based ordinal of this generation within the run (required by ingest). */
|
|
139
|
-
index: number;
|
|
140
|
-
/** Identity of the proposed surface change (content-addressed hash). */
|
|
141
|
-
surfaceHash: string;
|
|
142
|
-
/** Arbitrary provenance for this generation (rationale, evidence, source). */
|
|
143
|
-
surface?: unknown;
|
|
144
|
-
/** Per-scenario results; empty until the generation is measured. */
|
|
145
|
-
cells?: unknown[];
|
|
146
|
-
/** Mean composite score (0 when unmeasured — pair with labels.measured). */
|
|
147
|
-
compositeMean: number;
|
|
148
|
-
costUsd: number;
|
|
149
|
-
durationMs: number;
|
|
150
|
-
}
|
|
151
|
-
interface EvalRunEvent {
|
|
152
|
-
runId: string;
|
|
153
|
-
runDir: string;
|
|
154
|
-
/** ISO timestamp. */
|
|
155
|
-
timestamp: string;
|
|
156
|
-
status: 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
|
|
157
|
-
labels?: Record<string, string>;
|
|
158
|
-
baseline?: EvalRunGeneration;
|
|
159
|
-
generations?: EvalRunGeneration[];
|
|
160
|
-
gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling';
|
|
161
|
-
holdoutLift?: number;
|
|
162
|
-
totalCostUsd: number;
|
|
163
|
-
totalDurationMs: number;
|
|
164
|
-
errorMessage?: string;
|
|
165
|
-
}
|
|
166
|
-
interface EvalRunsExportConfig {
|
|
167
|
-
/** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */
|
|
168
|
-
apiKey?: string;
|
|
169
|
-
/** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */
|
|
170
|
-
base?: string;
|
|
171
|
-
/** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */
|
|
172
|
-
idempotencyKey?: string;
|
|
173
|
-
}
|
|
174
|
-
interface EvalRunsExportResult {
|
|
175
|
-
ok: boolean;
|
|
176
|
-
status: number;
|
|
177
|
-
accepted: number;
|
|
178
|
-
rejected: Array<{
|
|
179
|
-
index: number;
|
|
180
|
-
reason: string;
|
|
181
|
-
}>;
|
|
182
|
-
}
|
|
183
|
-
/**
|
|
184
|
-
* Ship self-improvement eval-run events to Tangle Intelligence. Unlike the
|
|
185
|
-
* best-effort span exporter, this RESOLVES with the ingest verdict (accepted /
|
|
186
|
-
* rejected per event) so a consumer's loop can assert its provenance landed.
|
|
187
|
-
* Throws only on a missing key or network failure.
|
|
188
|
-
*/
|
|
189
|
-
declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
|
|
190
|
-
|
|
191
|
-
export { type EvalRunEvent as E, INTELLIGENCE_WIRE_VERSION as I, type OtelExporter as O, mcpToolsForRuntimeMcpSubset as a, type EvalRunGeneration as b, type EvalRunsExportConfig as c, type EvalRunsExportResult as d, type OtelAttribute as e, type OtelExportConfig as f, type OtelSpan as g, buildLoopOtelSpans as h, createOtelExporter as i, exportEvalRuns as j, loopEventToOtelSpan as l, mcpToolsForRuntimeMcp as m };
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
import { A as AgentTaskSpec, R as RuntimeStreamEvent } from './types-CsCCryln.js';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* @stable
|
|
5
|
-
*
|
|
6
|
-
* Production-run lifecycle: record what the agent did on behalf of a customer,
|
|
7
|
-
* what it cost, and how it ended.
|
|
8
|
-
*
|
|
9
|
-
* Three concerns live in this module:
|
|
10
|
-
*
|
|
11
|
-
* 1. **Lifecycle state machine** — `running` -> `completed | failed | cancelled`,
|
|
12
|
-
* enforced by `RuntimeRunStateError`. Completion is idempotent for the same
|
|
13
|
-
* status (a second `complete()` call is a no-op so retries / cleanup paths
|
|
14
|
-
* don't double-fire side effects). A different terminal status is a state
|
|
15
|
-
* error.
|
|
16
|
-
*
|
|
17
|
-
* 2. **Cost ledger** — every `llm_call` event the handle observes contributes
|
|
18
|
-
* `tokensIn`, `tokensOut`, `costUsd`, and bumps `llmCalls`. Wall time is
|
|
19
|
-
* measured from `startRuntimeRun()` to `complete()`. Surface via
|
|
20
|
-
* `handle.cost()` for cost-per-task dashboards.
|
|
21
|
-
*
|
|
22
|
-
* 3. **Persistence adapter** — `RuntimeRunPersistenceAdapter` is the seam
|
|
23
|
-
* consumers plug in to write a `RuntimeRunRow` to their D1 / postgres /
|
|
24
|
-
* KV store. The adapter receives a sanitized row shape; no telemetry
|
|
25
|
-
* payload bytes flow through it unless the consumer opts in via
|
|
26
|
-
* `RuntimeRunOptions.telemetryEvents`.
|
|
27
|
-
*/
|
|
28
|
-
|
|
29
|
-
/** @stable */
|
|
30
|
-
type RuntimeRunStatus = 'running' | 'completed' | 'failed' | 'cancelled';
|
|
31
|
-
/** @stable */
|
|
32
|
-
interface RuntimeRunCost {
|
|
33
|
-
/** Cumulative input tokens across every observed `llm_call` event. */
|
|
34
|
-
tokensIn: number;
|
|
35
|
-
/** Cumulative output tokens across every observed `llm_call` event. */
|
|
36
|
-
tokensOut: number;
|
|
37
|
-
/** Sum of `costUsd` from every observed `llm_call` event. */
|
|
38
|
-
costUsd: number;
|
|
39
|
-
/** Wall time from `startRuntimeRun()` to `complete()` (or `now()` if not yet completed). */
|
|
40
|
-
wallMs: number;
|
|
41
|
-
/** Count of `llm_call` events observed during the run. */
|
|
42
|
-
llmCalls: number;
|
|
43
|
-
}
|
|
44
|
-
/** @stable */
|
|
45
|
-
interface RuntimeRunCompleteInput {
|
|
46
|
-
status: Exclude<RuntimeRunStatus, 'running'>;
|
|
47
|
-
resultSummary?: string;
|
|
48
|
-
/** Optional explicit cost override; if omitted, the accumulated ledger is used. */
|
|
49
|
-
cost?: Partial<RuntimeRunCost>;
|
|
50
|
-
/** Stable error message when `status === 'failed'`. */
|
|
51
|
-
error?: string;
|
|
52
|
-
/** Additional adapter-specific fields merged into the persisted row. */
|
|
53
|
-
metadata?: Record<string, unknown>;
|
|
54
|
-
}
|
|
55
|
-
/** @stable */
|
|
56
|
-
interface RuntimeRunRow {
|
|
57
|
-
/** Stable runtime-side identifier. Adapters may translate to their own primary key. */
|
|
58
|
-
id: string;
|
|
59
|
-
workspaceId: string;
|
|
60
|
-
sessionId?: string;
|
|
61
|
-
agentId?: string;
|
|
62
|
-
domain?: string;
|
|
63
|
-
taskId: string;
|
|
64
|
-
scenarioId?: string;
|
|
65
|
-
status: RuntimeRunStatus;
|
|
66
|
-
resultSummary?: string;
|
|
67
|
-
error?: string;
|
|
68
|
-
cost: RuntimeRunCost;
|
|
69
|
-
startedAt: string;
|
|
70
|
-
completedAt?: string;
|
|
71
|
-
metadata?: Record<string, unknown>;
|
|
72
|
-
}
|
|
73
|
-
/** @stable */
|
|
74
|
-
interface RuntimeRunPersistenceAdapter {
|
|
75
|
-
/**
|
|
76
|
-
* Called once when `handle.persist()` runs. Implementations write `row` to
|
|
77
|
-
* their durable store (D1, postgres, KV) and return whatever the consumer
|
|
78
|
-
* wants the caller to see (often the storage-side row id). Errors thrown
|
|
79
|
-
* here propagate out of `persist()` so the caller can decide whether to
|
|
80
|
-
* retry or log-and-continue.
|
|
81
|
-
*/
|
|
82
|
-
upsert(row: RuntimeRunRow): Promise<void> | void;
|
|
83
|
-
}
|
|
84
|
-
/** @stable */
|
|
85
|
-
interface RuntimeRunOptions {
|
|
86
|
-
workspaceId: string;
|
|
87
|
-
sessionId?: string;
|
|
88
|
-
agentId?: string;
|
|
89
|
-
taskSpec: AgentTaskSpec;
|
|
90
|
-
scenarioId?: string;
|
|
91
|
-
/** Optional persistence adapter; if omitted, `persist()` is a no-op. */
|
|
92
|
-
adapter?: RuntimeRunPersistenceAdapter;
|
|
93
|
-
/** Override the row id; default = `${taskSpec.id}:${random suffix}`. */
|
|
94
|
-
id?: string;
|
|
95
|
-
/** Override the clock; default = `Date.now()`. Useful for deterministic tests. */
|
|
96
|
-
now?: () => number;
|
|
97
|
-
}
|
|
98
|
-
/** @stable */
|
|
99
|
-
interface RuntimeRunHandle {
|
|
100
|
-
/** Stable id assigned at start. */
|
|
101
|
-
readonly id: string;
|
|
102
|
-
readonly workspaceId: string;
|
|
103
|
-
readonly sessionId: string | undefined;
|
|
104
|
-
readonly taskSpec: AgentTaskSpec;
|
|
105
|
-
readonly status: RuntimeRunStatus;
|
|
106
|
-
/**
|
|
107
|
-
* Observe a single `RuntimeStreamEvent`. The handle ignores non-cost events
|
|
108
|
-
* (text deltas, tool calls) silently so consumers can pipe the whole stream
|
|
109
|
-
* through `handle.observe`. `llm_call` events update the ledger.
|
|
110
|
-
*/
|
|
111
|
-
observe(event: RuntimeStreamEvent): void;
|
|
112
|
-
/** Snapshot of the current cost ledger. Safe to call at any time. */
|
|
113
|
-
cost(): RuntimeRunCost;
|
|
114
|
-
/**
|
|
115
|
-
* Transition to a terminal state. Idempotent for the same status; throws
|
|
116
|
-
* `RuntimeRunStateError` for a different terminal status (state machines
|
|
117
|
-
* don't time-travel).
|
|
118
|
-
*/
|
|
119
|
-
complete(input: RuntimeRunCompleteInput): void;
|
|
120
|
-
/** Build the current row without writing it. Useful for tests + dry runs. */
|
|
121
|
-
toRow(metadata?: Record<string, unknown>): RuntimeRunRow;
|
|
122
|
-
/**
|
|
123
|
-
* Persist the current row via the configured adapter. Must be called after
|
|
124
|
-
* `complete()`. Idempotent for the same terminal state (the adapter sees
|
|
125
|
-
* the same row on retry).
|
|
126
|
-
*/
|
|
127
|
-
persist(metadata?: Record<string, unknown>): Promise<void>;
|
|
128
|
-
}
|
|
129
|
-
/**
|
|
130
|
-
* @stable
|
|
131
|
-
*
|
|
132
|
-
* Construct a runtime-run handle. The returned handle is mutable across its
|
|
133
|
-
* lifetime; consumers should not share it across requests.
|
|
134
|
-
*/
|
|
135
|
-
declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
|
|
136
|
-
|
|
137
|
-
export { type RuntimeRunHandle as R, type RuntimeRunPersistenceAdapter as a, type RuntimeRunRow as b, startRuntimeRun as s };
|
|
File without changes
|