@tangle-network/agent-runtime 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,191 +0,0 @@
1
- import { O as OpenAIChatTool } from './types-CsCCryln.js';
2
-
3
- /**
4
- * @experimental
5
- *
6
- * OpenAI Chat Completions `tools[]` projection of the 5 agent-runtime MCP
7
- * delegation tools.
8
- *
9
- * Use when configuring `createOpenAICompatibleBackend({ tools: ... })` so the
10
- * model can call `delegate_code`, `delegate_research`, `delegate_feedback`,
11
- * `delegation_status`, and `delegation_history` through the OpenAI-compat
12
- * transport (tcloud, OpenRouter, OpenAI direct, cli-bridge). The runtime
13
- * surfaces tool calls as `tool_call` stream events — execution is the
14
- * caller's responsibility (typically the parent sandbox runtime's MCP
15
- * mount).
16
- *
17
- * Sandbox-SDK callers do NOT need this helper: the sandbox runtime mounts
18
- * MCP servers natively and the in-sandbox harness discovers tools via the
19
- * runtime, not via an OpenAI tools array.
20
- *
21
- * Tool name + description + JSON-schema are pulled from the canonical
22
- * `DELEGATE_*` constants exported by `./tools/*` so the projection cannot
23
- * drift from the server's own validators.
24
- */
25
-
26
- /**
27
- * @experimental
28
- *
29
- * Returns the 5 delegation tools projected into OpenAI Chat Completions
30
- * `tools[]` shape. The order is stable: `delegate_code`,
31
- * `delegate_research`, `delegate_feedback`, `delegation_status`,
32
- * `delegation_history`.
33
- */
34
- declare function mcpToolsForRuntimeMcp(): OpenAIChatTool[];
35
- /**
36
- * @experimental
37
- *
38
- * Subset filter — return only the projected tools whose `function.name`
39
- * appears in `names`. Useful for curated mounts (e.g. only the queue-bound
40
- * delegation tools, omitting `delegate_feedback`). Unknown names are
41
- * silently ignored; pass an empty array to get an empty result.
42
- */
43
- declare function mcpToolsForRuntimeMcpSubset(names: ReadonlyArray<string>): OpenAIChatTool[];
44
-
45
- /**
46
- * OTEL span exporter — streams LoopTraceEvents to an OTLP/HTTP collector.
47
- *
48
- * Reads OTEL_EXPORTER_OTLP_ENDPOINT + OTEL_EXPORTER_OTLP_HEADERS from env
49
- * when no explicit config is given. Keeps the runtime dep-free from
50
- * @opentelemetry/sdk-trace-base — minimal OTLP/JSON serializer.
51
- *
52
- * The exporter accepts both raw OtelSpan objects and LoopTraceEvents
53
- * (which get converted to OTLP spans automatically).
54
- */
55
- interface OtelExportConfig {
56
- /** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */
57
- endpoint?: string;
58
- /** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */
59
- headers?: Record<string, string>;
60
- /** Batch size before flush. Default 64. */
61
- batchSize?: number;
62
- /** Flush interval ms. Default 5000. */
63
- flushIntervalMs?: number;
64
- /** Resource attributes stamped on every export. */
65
- resourceAttributes?: Record<string, string | number | boolean>;
66
- /** Service name. Default 'agent-runtime'. */
67
- serviceName?: string;
68
- }
69
- interface OtelExporter {
70
- /** Export a span. */
71
- exportSpan(span: OtelSpan): void;
72
- /** Force flush pending spans. */
73
- flush(): Promise<void>;
74
- /** Shutdown cleanly. */
75
- shutdown(): Promise<void>;
76
- }
77
- interface OtelSpan {
78
- traceId: string;
79
- spanId: string;
80
- parentSpanId?: string;
81
- name: string;
82
- kind?: number;
83
- startTimeUnixNano: string;
84
- endTimeUnixNano: string;
85
- attributes?: OtelAttribute[];
86
- status?: {
87
- code: number;
88
- message?: string;
89
- };
90
- }
91
- interface OtelAttribute {
92
- key: string;
93
- value: {
94
- stringValue?: string;
95
- intValue?: string;
96
- doubleValue?: number;
97
- boolValue?: boolean;
98
- };
99
- }
100
- /**
101
- * Create an OTEL exporter. Returns undefined when no endpoint is configured.
102
- */
103
- declare function createOtelExporter(config?: OtelExportConfig): OtelExporter | undefined;
104
- /**
105
- * Convert a LoopTraceEvent into an OtelSpan for export.
106
- */
107
- declare function loopEventToOtelSpan(event: {
108
- kind: string;
109
- runId: string;
110
- timestamp: number;
111
- payload: object;
112
- }, traceId: string, parentSpanId?: string): OtelSpan;
113
- /**
114
- * Build a nested, real-duration OTLP span tree for ONE loop run from its full
115
- * ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
116
- * zero-duration span per event), this reconstructs the topology hierarchy a
117
- * GenAI trace viewer renders natively:
118
- *
119
- * loop (invoke_workflow)
120
- * └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
121
- * ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
122
- * └─ …
123
- *
124
- * Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
125
- * a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
126
- * verdict / placement / cost (not yet standardized). Pure: feed it a buffered
127
- * per-runId event array (e.g. flushed on `loop.ended`) and export the result.
128
- */
129
- declare function buildLoopOtelSpans(events: ReadonlyArray<{
130
- kind: string;
131
- runId: string;
132
- timestamp: number;
133
- payload: object;
134
- }>, traceId: string, rootParentSpanId?: string): OtelSpan[];
135
- /** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
136
- declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
137
- interface EvalRunGeneration {
138
- /** 0-based ordinal of this generation within the run (required by ingest). */
139
- index: number;
140
- /** Identity of the proposed surface change (content-addressed hash). */
141
- surfaceHash: string;
142
- /** Arbitrary provenance for this generation (rationale, evidence, source). */
143
- surface?: unknown;
144
- /** Per-scenario results; empty until the generation is measured. */
145
- cells?: unknown[];
146
- /** Mean composite score (0 when unmeasured — pair with labels.measured). */
147
- compositeMean: number;
148
- costUsd: number;
149
- durationMs: number;
150
- }
151
- interface EvalRunEvent {
152
- runId: string;
153
- runDir: string;
154
- /** ISO timestamp. */
155
- timestamp: string;
156
- status: 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
157
- labels?: Record<string, string>;
158
- baseline?: EvalRunGeneration;
159
- generations?: EvalRunGeneration[];
160
- gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling';
161
- holdoutLift?: number;
162
- totalCostUsd: number;
163
- totalDurationMs: number;
164
- errorMessage?: string;
165
- }
166
- interface EvalRunsExportConfig {
167
- /** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */
168
- apiKey?: string;
169
- /** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */
170
- base?: string;
171
- /** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */
172
- idempotencyKey?: string;
173
- }
174
- interface EvalRunsExportResult {
175
- ok: boolean;
176
- status: number;
177
- accepted: number;
178
- rejected: Array<{
179
- index: number;
180
- reason: string;
181
- }>;
182
- }
183
- /**
184
- * Ship self-improvement eval-run events to Tangle Intelligence. Unlike the
185
- * best-effort span exporter, this RESOLVES with the ingest verdict (accepted /
186
- * rejected per event) so a consumer's loop can assert its provenance landed.
187
- * Throws only on a missing key or network failure.
188
- */
189
- declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
190
-
191
- export { type EvalRunEvent as E, INTELLIGENCE_WIRE_VERSION as I, type OtelExporter as O, mcpToolsForRuntimeMcpSubset as a, type EvalRunGeneration as b, type EvalRunsExportConfig as c, type EvalRunsExportResult as d, type OtelAttribute as e, type OtelExportConfig as f, type OtelSpan as g, buildLoopOtelSpans as h, createOtelExporter as i, exportEvalRuns as j, loopEventToOtelSpan as l, mcpToolsForRuntimeMcp as m };
@@ -1,137 +0,0 @@
1
- import { A as AgentTaskSpec, R as RuntimeStreamEvent } from './types-CsCCryln.js';
2
-
3
- /**
4
- * @stable
5
- *
6
- * Production-run lifecycle: record what the agent did on behalf of a customer,
7
- * what it cost, and how it ended.
8
- *
9
- * Three concerns live in this module:
10
- *
11
- * 1. **Lifecycle state machine** — `running` -> `completed | failed | cancelled`,
12
- * enforced by `RuntimeRunStateError`. Completion is idempotent for the same
13
- * status (a second `complete()` call is a no-op so retries / cleanup paths
14
- * don't double-fire side effects). A different terminal status is a state
15
- * error.
16
- *
17
- * 2. **Cost ledger** — every `llm_call` event the handle observes contributes
18
- * `tokensIn`, `tokensOut`, `costUsd`, and bumps `llmCalls`. Wall time is
19
- * measured from `startRuntimeRun()` to `complete()`. Surface via
20
- * `handle.cost()` for cost-per-task dashboards.
21
- *
22
- * 3. **Persistence adapter** — `RuntimeRunPersistenceAdapter` is the seam
23
- * consumers plug in to write a `RuntimeRunRow` to their D1 / postgres /
24
- * KV store. The adapter receives a sanitized row shape; no telemetry
25
- * payload bytes flow through it unless the consumer opts in via
26
- * `RuntimeRunOptions.telemetryEvents`.
27
- */
28
-
29
- /** @stable */
30
- type RuntimeRunStatus = 'running' | 'completed' | 'failed' | 'cancelled';
31
- /** @stable */
32
- interface RuntimeRunCost {
33
- /** Cumulative input tokens across every observed `llm_call` event. */
34
- tokensIn: number;
35
- /** Cumulative output tokens across every observed `llm_call` event. */
36
- tokensOut: number;
37
- /** Sum of `costUsd` from every observed `llm_call` event. */
38
- costUsd: number;
39
- /** Wall time from `startRuntimeRun()` to `complete()` (or `now()` if not yet completed). */
40
- wallMs: number;
41
- /** Count of `llm_call` events observed during the run. */
42
- llmCalls: number;
43
- }
44
- /** @stable */
45
- interface RuntimeRunCompleteInput {
46
- status: Exclude<RuntimeRunStatus, 'running'>;
47
- resultSummary?: string;
48
- /** Optional explicit cost override; if omitted, the accumulated ledger is used. */
49
- cost?: Partial<RuntimeRunCost>;
50
- /** Stable error message when `status === 'failed'`. */
51
- error?: string;
52
- /** Additional adapter-specific fields merged into the persisted row. */
53
- metadata?: Record<string, unknown>;
54
- }
55
- /** @stable */
56
- interface RuntimeRunRow {
57
- /** Stable runtime-side identifier. Adapters may translate to their own primary key. */
58
- id: string;
59
- workspaceId: string;
60
- sessionId?: string;
61
- agentId?: string;
62
- domain?: string;
63
- taskId: string;
64
- scenarioId?: string;
65
- status: RuntimeRunStatus;
66
- resultSummary?: string;
67
- error?: string;
68
- cost: RuntimeRunCost;
69
- startedAt: string;
70
- completedAt?: string;
71
- metadata?: Record<string, unknown>;
72
- }
73
- /** @stable */
74
- interface RuntimeRunPersistenceAdapter {
75
- /**
76
- * Called once when `handle.persist()` runs. Implementations write `row` to
77
- * their durable store (D1, postgres, KV) and return whatever the consumer
78
- * wants the caller to see (often the storage-side row id). Errors thrown
79
- * here propagate out of `persist()` so the caller can decide whether to
80
- * retry or log-and-continue.
81
- */
82
- upsert(row: RuntimeRunRow): Promise<void> | void;
83
- }
84
- /** @stable */
85
- interface RuntimeRunOptions {
86
- workspaceId: string;
87
- sessionId?: string;
88
- agentId?: string;
89
- taskSpec: AgentTaskSpec;
90
- scenarioId?: string;
91
- /** Optional persistence adapter; if omitted, `persist()` is a no-op. */
92
- adapter?: RuntimeRunPersistenceAdapter;
93
- /** Override the row id; default = `${taskSpec.id}:${random suffix}`. */
94
- id?: string;
95
- /** Override the clock; default = `Date.now()`. Useful for deterministic tests. */
96
- now?: () => number;
97
- }
98
- /** @stable */
99
- interface RuntimeRunHandle {
100
- /** Stable id assigned at start. */
101
- readonly id: string;
102
- readonly workspaceId: string;
103
- readonly sessionId: string | undefined;
104
- readonly taskSpec: AgentTaskSpec;
105
- readonly status: RuntimeRunStatus;
106
- /**
107
- * Observe a single `RuntimeStreamEvent`. The handle ignores non-cost events
108
- * (text deltas, tool calls) silently so consumers can pipe the whole stream
109
- * through `handle.observe`. `llm_call` events update the ledger.
110
- */
111
- observe(event: RuntimeStreamEvent): void;
112
- /** Snapshot of the current cost ledger. Safe to call at any time. */
113
- cost(): RuntimeRunCost;
114
- /**
115
- * Transition to a terminal state. Idempotent for the same status; throws
116
- * `RuntimeRunStateError` for a different terminal status (state machines
117
- * don't time-travel).
118
- */
119
- complete(input: RuntimeRunCompleteInput): void;
120
- /** Build the current row without writing it. Useful for tests + dry runs. */
121
- toRow(metadata?: Record<string, unknown>): RuntimeRunRow;
122
- /**
123
- * Persist the current row via the configured adapter. Must be called after
124
- * `complete()`. Idempotent for the same terminal state (the adapter sees
125
- * the same row on retry).
126
- */
127
- persist(metadata?: Record<string, unknown>): Promise<void>;
128
- }
129
- /**
130
- * @stable
131
- *
132
- * Construct a runtime-run handle. The returned handle is mutable across its
133
- * lifetime; consumers should not share it across requests.
134
- */
135
- declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
136
-
137
- export { type RuntimeRunHandle as R, type RuntimeRunPersistenceAdapter as a, type RuntimeRunRow as b, startRuntimeRun as s };