@tangle-network/agent-runtime 0.36.0 → 0.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/agent.d.ts +3 -3
  2. package/dist/analyst-loop.d.ts +2 -2
  3. package/dist/analyst-loop.js +3 -257
  4. package/dist/analyst-loop.js.map +1 -1
  5. package/dist/{chunk-NYGEI3NV.js → chunk-M65QJD35.js} +5 -211
  6. package/dist/chunk-M65QJD35.js.map +1 -0
  7. package/dist/{chunk-HSX6PFZR.js → chunk-V6GURW4W.js} +209 -1
  8. package/dist/chunk-V6GURW4W.js.map +1 -0
  9. package/dist/chunk-VOX6Z3II.js +90 -0
  10. package/dist/chunk-VOX6Z3II.js.map +1 -0
  11. package/dist/chunk-XBUG326M.js +261 -0
  12. package/dist/chunk-XBUG326M.js.map +1 -0
  13. package/dist/{chunk-7ZECSZ3C.js → chunk-Z523NPJK.js} +59 -2
  14. package/dist/chunk-Z523NPJK.js.map +1 -0
  15. package/dist/dynamic-DeOPeeAw.d.ts +106 -0
  16. package/dist/{improvement-adapter-CaZxFxTd.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
  17. package/dist/improvement.d.ts +6 -130
  18. package/dist/improvement.js +4 -85
  19. package/dist/improvement.js.map +1 -1
  20. package/dist/index.d.ts +148 -3
  21. package/dist/index.js +109 -2
  22. package/dist/index.js.map +1 -1
  23. package/dist/loops.d.ts +5 -107
  24. package/dist/mcp/bin.js +4 -3
  25. package/dist/mcp/bin.js.map +1 -1
  26. package/dist/mcp/index.d.ts +6 -440
  27. package/dist/mcp/index.js +7 -62
  28. package/dist/mcp/index.js.map +1 -1
  29. package/dist/optimize-prompt-cmH9wZdH.d.ts +129 -0
  30. package/dist/otel-export-CNmeg_7B.d.ts +627 -0
  31. package/dist/profiles.d.ts +1 -2
  32. package/dist/{types-DrXVR2Fu.d.ts → types-CmkQl8qE.d.ts} +137 -3
  33. package/dist/{types-D_MXrmJP.d.ts → types-p8dWBIXL.d.ts} +1 -1
  34. package/package.json +1 -1
  35. package/dist/chunk-7ZECSZ3C.js.map +0 -1
  36. package/dist/chunk-HSX6PFZR.js.map +0 -1
  37. package/dist/chunk-NYGEI3NV.js.map +0 -1
  38. package/dist/otel-export-xgf4J6bo.d.ts +0 -191
  39. package/dist/runtime-run-B8VIiOhI.d.ts +0 -137
@@ -0,0 +1,627 @@
1
+ import { CoderOutput, CoderTask } from './profiles.js';
2
+ import { L as LoopSandboxClient } from './types-CmkQl8qE.js';
3
+ import { SandboxInstance } from '@tangle-network/sandbox';
4
+ import { O as OpenAIChatTool } from './types-CsCCryln.js';
5
+
6
+ /**
7
+ * @experimental
8
+ *
9
+ * Delegation executors — the layer between MCP delegates and the sandbox
10
+ * substrate. Each executor exposes a {@link LoopSandboxClient} the kernel
11
+ * consumes plus a placement tag so the trace pipeline can correlate workers
12
+ * with their physical placement.
13
+ *
14
+ * Two implementations ship in-box:
15
+ *
16
+ * - {@link createSiblingSandboxExecutor} — every delegation spawns a fresh
17
+ * sandbox sibling to the caller. Default when the MCP server runs as a
18
+ * standalone CLI mounted outside a fleet.
19
+ *
20
+ * - {@link createFleetWorkspaceExecutor} — delegations dispatch onto machines
21
+ * in the caller's existing fleet so worker diffs land directly on the
22
+ * caller's filesystem (the fleet's shared workspace). Selected when the
23
+ * parent sandbox passes `TANGLE_FLEET_ID` into the MCP server's env.
24
+ */
25
+
26
+ /** @experimental */
27
+ interface DelegationExecutor {
28
+ /** Sandbox client the kernel calls. Returned with `describePlacement` set. */
29
+ readonly client: LoopSandboxClient;
30
+ /** Best-effort one-liner used in stderr boot logs and diagnostics. */
31
+ describe(): string;
32
+ }
33
+ /** @experimental */
34
+ interface SiblingSandboxExecutorOptions {
35
+ client: LoopSandboxClient;
36
+ }
37
+ /**
38
+ * Wrap a raw sandbox SDK client so the kernel emits
39
+ * `loop.iteration.dispatch` events with `{ placement: 'sibling', sandboxId }`.
40
+ *
41
+ * The returned client `.create()` delegates to the underlying client; the
42
+ * only added behavior is a `describePlacement` tag the kernel reads.
43
+ *
44
+ * @experimental
45
+ */
46
+ declare function createSiblingSandboxExecutor(options: SiblingSandboxExecutorOptions): DelegationExecutor;
47
+ /**
48
+ * Minimal `SandboxFleet` surface the fleet executor calls. Declared
49
+ * structurally so tests can pass an in-memory stub without instantiating the
50
+ * sandbox SDK.
51
+ *
52
+ * @experimental
53
+ */
54
+ interface FleetHandle {
55
+ readonly fleetId: string;
56
+ /** Machine ids in dispatch-eligible order. The executor round-robins. */
57
+ readonly ids: ReadonlyArray<string>;
58
+ /** Resolve a machine id to its `SandboxInstance` — that machine is mounted
59
+ * on the fleet's shared workspace, so any diff the worker writes lands on
60
+ * every other fleet machine's filesystem too. */
61
+ sandbox(machineId: string): Promise<SandboxInstance>;
62
+ }
63
+ /** @experimental */
64
+ interface FleetWorkspaceExecutorOptions {
65
+ fleet: FleetHandle;
66
+ /**
67
+ * Override the machine-selection policy. Default = round-robin across
68
+ * `fleet.ids`, skipping the optional `excludeMachineIds` set (typically the
69
+ * coordinator machine the MCP server is running on).
70
+ */
71
+ selectMachine?: (call: {
72
+ callIndex: number;
73
+ ids: ReadonlyArray<string>;
74
+ }) => string;
75
+ /**
76
+ * Machine ids to skip during default round-robin. Set to the caller's own
77
+ * machineId so workers don't compete with the orchestrator on the same VM.
78
+ */
79
+ excludeMachineIds?: ReadonlyArray<string>;
80
+ }
81
+ /**
82
+ * Build an executor that resolves each delegated iteration to an existing
83
+ * machine in `fleet`. The fleet's shared-workspace policy means the worker
84
+ * machine sees the caller's filesystem — diffs land in-place with no
85
+ * cross-sandbox copy step.
86
+ *
87
+ * @experimental
88
+ */
89
+ declare function createFleetWorkspaceExecutor(options: FleetWorkspaceExecutorOptions): DelegationExecutor;
90
+
91
+ /**
92
+ * @experimental
93
+ *
94
+ * MCP delegation tool surface — the typed inputs/outputs the product agent
95
+ * sees over the wire. These types are the contract; the JSON schemas under
96
+ * `tools/*` mirror them for the MCP `tools/list` advertisement.
97
+ *
98
+ * Async semantics: `delegate_code` + `delegate_research` return a `taskId`
99
+ * immediately. The product agent polls `delegation_status` until the task
100
+ * transitions to `completed` | `failed` | `cancelled`. `delegate_feedback`
101
+ * + `delegation_history` are synchronous reads / writes against the local
102
+ * task queue + feedback store.
103
+ */
104
+
105
+ /** @experimental */
106
+ type DelegationProfile = 'coder' | 'researcher';
107
+ /** @experimental */
108
+ type DelegationStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
109
+ /**
110
+ * Minimal `CoderTask` overrides exposed over the MCP wire. The full
111
+ * `CoderTask` carries fields the kernel synthesizes from `goal` +
112
+ * `repoRoot` — the agent only edits the few that materially gate
113
+ * validator behavior.
114
+ *
115
+ * @experimental
116
+ */
117
+ interface DelegateCodeConfig {
118
+ testCmd?: string;
119
+ typecheckCmd?: string;
120
+ forbiddenPaths?: string[];
121
+ maxDiffLines?: number;
122
+ }
123
+ /** @experimental */
124
+ interface DelegateCodeArgs {
125
+ /** Natural-language description of what the coder must accomplish. */
126
+ goal: string;
127
+ /** Absolute path inside the sandbox where the repo lives. */
128
+ repoRoot: string;
129
+ /** Optional free-form context the agent surfaces in the prompt prelude. */
130
+ contextHint?: string;
131
+ /**
132
+ * When > 1, dispatches `multiHarnessCoderFanout` across N harnesses
133
+ * (claude-code, codex, opencode-glm) and picks the highest-scoring
134
+ * passing patch. Default 1.
135
+ */
136
+ variants?: number;
137
+ /** Validator + prompt overrides the agent knows for this repo. */
138
+ config?: DelegateCodeConfig;
139
+ /** Multi-tenant scope (customer-id, workspace-id). */
140
+ namespace?: string;
141
+ }
142
+ /** @experimental */
143
+ interface DelegateCodeResult {
144
+ taskId: string;
145
+ /** Best-effort hint — coder loops can take minutes-to-hours. */
146
+ estimatedDurationMs?: number;
147
+ }
148
+ /** @experimental */
149
+ type ResearchSource = 'web' | 'corpus' | 'twitter' | 'github' | 'docs';
150
+ /** @experimental */
151
+ interface DelegateResearchConfig {
152
+ recencyWindow?: {
153
+ since?: string;
154
+ until?: string;
155
+ };
156
+ maxItems?: number;
157
+ minConfidence?: number;
158
+ }
159
+ /** @experimental */
160
+ interface DelegateResearchArgs {
161
+ question: string;
162
+ namespace: string;
163
+ scope?: string;
164
+ sources?: ResearchSource[];
165
+ variants?: number;
166
+ config?: DelegateResearchConfig;
167
+ }
168
+ /** @experimental */
169
+ interface DelegateResearchResult {
170
+ taskId: string;
171
+ estimatedDurationMs?: number;
172
+ }
173
+ /** @experimental */
174
+ interface FeedbackRefersTo {
175
+ kind: 'delegation' | 'artifact' | 'outcome';
176
+ /** For `'delegation'`, this is the taskId. */
177
+ ref: string;
178
+ }
179
+ /** @experimental */
180
+ interface FeedbackRating {
181
+ /** [0, 1]. */
182
+ score: number;
183
+ label?: 'good' | 'bad' | 'neutral' | 'mixed';
184
+ notes: string;
185
+ }
186
+ /** @experimental */
187
+ interface DelegateFeedbackArgs {
188
+ refersTo: FeedbackRefersTo;
189
+ rating: FeedbackRating;
190
+ by: 'agent' | 'user' | 'downstream-judge';
191
+ /** ISO timestamp; defaults to server clock when omitted. */
192
+ capturedAt?: string;
193
+ namespace?: string;
194
+ }
195
+ /** @experimental */
196
+ interface DelegateFeedbackResult {
197
+ recorded: true;
198
+ id: string;
199
+ }
200
+ /** @experimental */
201
+ interface DelegationStatusArgs {
202
+ taskId: string;
203
+ }
204
+ /** @experimental */
205
+ interface DelegationProgress {
206
+ iteration: number;
207
+ phase: string;
208
+ }
209
+ /** @experimental */
210
+ interface DelegationError {
211
+ message: string;
212
+ kind: string;
213
+ }
214
+ /**
215
+ * Polymorphic `result` field: `CoderOutput` when the underlying profile
216
+ * is `'coder'`, a structurally-typed research output when `'researcher'`.
217
+ * The MCP wire carries it as JSON either way.
218
+ *
219
+ * @experimental
220
+ */
221
+ type DelegationResultPayload = {
222
+ profile: 'coder';
223
+ output: CoderOutput;
224
+ } | {
225
+ profile: 'researcher';
226
+ output: ResearchOutputShape;
227
+ };
228
+ /**
229
+ * Loose shape of a research output over the wire — the substrate cannot
230
+ * import the `ResearchOutput` type from agent-knowledge without inducing
231
+ * a dependency cycle, so the MCP layer treats it structurally.
232
+ *
233
+ * @experimental
234
+ */
235
+ interface ResearchOutputShape {
236
+ items: unknown[];
237
+ citations: unknown[];
238
+ proposedWrites: unknown[];
239
+ gaps?: string[];
240
+ notes?: string;
241
+ [key: string]: unknown;
242
+ }
243
+ /** @experimental */
244
+ interface DelegationStatusResult {
245
+ taskId: string;
246
+ profile: DelegationProfile;
247
+ status: DelegationStatus;
248
+ progress?: DelegationProgress;
249
+ result?: DelegationResultPayload;
250
+ error?: DelegationError;
251
+ costUsd?: number;
252
+ startedAt: string;
253
+ completedAt?: string;
254
+ }
255
+ /** @experimental */
256
+ interface DelegationHistoryArgs {
257
+ namespace?: string;
258
+ profile?: DelegationProfile;
259
+ /** ISO date — only delegations started at-or-after `since` are returned. */
260
+ since?: string;
261
+ /** Default 50. Hard cap 500. */
262
+ limit?: number;
263
+ }
264
+ /** @experimental */
265
+ interface DelegationFeedbackSnapshot {
266
+ id: string;
267
+ score: number;
268
+ label?: FeedbackRating['label'];
269
+ by: DelegateFeedbackArgs['by'];
270
+ notes: string;
271
+ capturedAt: string;
272
+ }
273
+ /** @experimental */
274
+ interface DelegationHistoryEntry {
275
+ taskId: string;
276
+ profile: DelegationProfile;
277
+ namespace?: string;
278
+ args: DelegateCodeArgs | DelegateResearchArgs;
279
+ status: DelegationStatus;
280
+ feedback?: DelegationFeedbackSnapshot[];
281
+ costUsd?: number;
282
+ startedAt: string;
283
+ completedAt?: string;
284
+ }
285
+ /** @experimental */
286
+ interface DelegationHistoryResult {
287
+ delegations: DelegationHistoryEntry[];
288
+ }
289
+
290
+ /** @experimental */
291
+ interface DelegateRunCtx {
292
+ signal: AbortSignal;
293
+ report(progress: DelegationProgress): void;
294
+ }
295
+ /** @experimental */
296
+ type CoderDelegate = (args: DelegateCodeArgs, ctx: DelegateRunCtx) => Promise<CoderOutput>;
297
+ /** @experimental */
298
+ type ResearcherDelegate = (args: DelegateResearchArgs, ctx: DelegateRunCtx) => Promise<ResearchOutputShape>;
299
+ /** @experimental Structured review verdict over a coder candidate. */
300
+ interface CoderReview {
301
+ /** Gate: only approved candidates are eligible to win. */
302
+ approved: boolean;
303
+ /** Reviewer's recommendation — surfaced in traces. */
304
+ recommendation: 'ship' | 'approve-with-nits' | 'changes-requested' | 'reject';
305
+ /** Readiness 0..1, used by the `highest-readiness` winner-selection strategy. */
306
+ readiness: number;
307
+ notes?: string;
308
+ }
309
+ /**
310
+ * @experimental
311
+ *
312
+ * Optional adversarial reviewer over a coder candidate that already passed
313
+ * mechanical validation (tests/typecheck/forbidden/diff/no-op/secrets). Folded
314
+ * from the ai-trading-blueprint delegation MCP: a candidate is only eligible to
315
+ * win if the reviewer approves it. The reviewer is the consumer's seam — an LLM
316
+ * judge, a `pnpm review` command, anything returning a `CoderReview`.
317
+ */
318
+ type CoderReviewer = (output: CoderOutput, task: CoderTask, ctx: {
319
+ signal: AbortSignal;
320
+ }) => Promise<CoderReview> | CoderReview;
321
+ /**
322
+ * @experimental Winner-selection strategy among validated (+ reviewed)
323
+ * candidates. `highest-readiness` requires a `reviewer`. Default `highest-score`
324
+ * (the kernel's behavior — preserves backward compatibility).
325
+ */
326
+ type CoderWinnerSelection = 'highest-score' | 'smallest-diff' | 'highest-readiness' | 'first-approved';
327
+ /** @experimental */
328
+ interface CreateDefaultCoderDelegateOptions {
329
+ /**
330
+ * Execution placement. Pass a {@link DelegationExecutor} (sibling or fleet)
331
+ * to control where worker iterations land. `sandboxClient` is a
332
+ * convenience shorthand that wraps the client in a sibling executor — pass
333
+ * one or the other, not both.
334
+ */
335
+ executor?: DelegationExecutor;
336
+ /**
337
+ * Convenience shorthand for sibling placement. Equivalent to
338
+ * `executor: createSiblingSandboxExecutor({ client: sandboxClient })`.
339
+ */
340
+ sandboxClient?: LoopSandboxClient;
341
+ /** Default `['claude-code', 'codex', 'opencode/zai-coding-plan/glm-5.1']` when variants > 1. */
342
+ fanoutHarnesses?: string[];
343
+ /** Hard cap on the kernel's per-batch concurrency. Default 4. */
344
+ maxConcurrency?: number;
345
+ /**
346
+ * Optional adversarial reviewer. When set, a candidate must pass mechanical
347
+ * validation AND `reviewer.approved` to be eligible to win — empty/secret/
348
+ * test-failing patches are already gone; this catches the "compiles + passes
349
+ * but wrong/unsafe" class the deterministic validator can't see.
350
+ */
351
+ reviewer?: CoderReviewer;
352
+ /** Winner-selection strategy among eligible candidates. Default `highest-score`. */
353
+ winnerSelection?: CoderWinnerSelection;
354
+ }
355
+ /**
356
+ * Build a coder delegate that drives `runLoop` against the project's
357
+ * sandbox client + coder profile. When `args.variants > 1` it switches
358
+ * to the multi-harness fanout topology.
359
+ *
360
+ * @experimental
361
+ */
362
+ declare function createDefaultCoderDelegate(options: CreateDefaultCoderDelegateOptions): CoderDelegate;
363
+
364
+ /**
365
+ * @experimental
366
+ *
367
+ * `createKbGate` — the valid-only knowledge-base growth gate, distilled from
368
+ * physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
369
+ * writer) runs candidate facts through this before persisting, so the KB grows
370
+ * with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
371
+ * vetoed at the gate.
372
+ *
373
+ * Fail-closed by construction: every judge must `accept`; the FIRST veto wins
374
+ * and the fact is rejected. The non-negotiable floor (always on, can't be
375
+ * disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
376
+ * literally appear in its `sourceText`. That single check kills the dominant
377
+ * failure mode (a confident claim decoupled from any real source).
378
+ *
379
+ * Pure + dependency-free: it operates on fact candidates, not on a store, so it
380
+ * composes with `@tangle-network/agent-knowledge` or any persistence layer
381
+ * without importing it. The remediation policy (correct-on-veto vs
382
+ * escalate-as-unverified) is the caller's — this returns the verdict; it never
383
+ * drops a fact silently.
384
+ */
385
+ /** @experimental A fact proposed for the KB, with its grounding. */
386
+ interface FactCandidate {
387
+ /** The atomic claim text. */
388
+ claim: string;
389
+ /** Optional extracted value (number or string) the claim asserts. */
390
+ value?: string | number;
391
+ /** Verbatim span lifted from the source that backs the claim. */
392
+ verbatimPassage: string;
393
+ /** The raw source text the passage must be grounded in. */
394
+ sourceText: string;
395
+ /** Where the fact claims to come from — checked for circular/self citations. */
396
+ citation?: string;
397
+ }
398
+ /** @experimental */
399
+ interface FactJudgeVerdict {
400
+ accept: boolean;
401
+ reason?: string;
402
+ }
403
+ /** @experimental A pluggable fact validator. Throw is NOT allowed — return a
404
+ * verdict; a thrown judge is a programmer error, not a veto. */
405
+ interface FactJudge {
406
+ name: string;
407
+ judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>;
408
+ }
409
+ /** @experimental */
410
+ interface KbGateResult {
411
+ accepted: boolean;
412
+ /** Name of the judge that vetoed; undefined when accepted. */
413
+ vetoedBy?: string;
414
+ reason?: string;
415
+ }
416
+ /** @experimental */
417
+ interface CreateKbGateOptions {
418
+ /** Extra judges appended after the built-in floor (e.g. an LLM judge). */
419
+ judges?: FactJudge[];
420
+ /** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
421
+ minPassageChars?: number;
422
+ /**
423
+ * Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
424
+ * `'cad_params'`, `'requirements'`). A citation naming one is circular
425
+ * (laundering) — the fact cites a derived artifact, not a real source.
426
+ * Default `[]` (no circular check unless the consumer declares its kinds).
427
+ */
428
+ selfArtifactKinds?: string[];
429
+ }
430
+ /**
431
+ * @experimental
432
+ *
433
+ * Build a fail-closed KB gate. The returned function runs the built-in floor
434
+ * (passage-non-empty → passage-present → value-in-passage → no-circular-citation)
435
+ * then any consumer judges, returning on the first veto.
436
+ */
437
+ declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
438
+
439
+ /**
440
+ * @experimental
441
+ *
442
+ * OpenAI Chat Completions `tools[]` projection of the 5 agent-runtime MCP
443
+ * delegation tools.
444
+ *
445
+ * Use when configuring `createOpenAICompatibleBackend({ tools: ... })` so the
446
+ * model can call `delegate_code`, `delegate_research`, `delegate_feedback`,
447
+ * `delegation_status`, and `delegation_history` through the OpenAI-compat
448
+ * transport (tcloud, OpenRouter, OpenAI direct, cli-bridge). The runtime
449
+ * surfaces tool calls as `tool_call` stream events — execution is the
450
+ * caller's responsibility (typically the parent sandbox runtime's MCP
451
+ * mount).
452
+ *
453
+ * Sandbox-SDK callers do NOT need this helper: the sandbox runtime mounts
454
+ * MCP servers natively and the in-sandbox harness discovers tools via the
455
+ * runtime, not via an OpenAI tools array.
456
+ *
457
+ * Tool name + description + JSON-schema are pulled from the canonical
458
+ * `DELEGATE_*` constants exported by `./tools/*` so the projection cannot
459
+ * drift from the server's own validators.
460
+ */
461
+
462
+ /**
463
+ * @experimental
464
+ *
465
+ * Returns the 5 delegation tools projected into OpenAI Chat Completions
466
+ * `tools[]` shape. The order is stable: `delegate_code`,
467
+ * `delegate_research`, `delegate_feedback`, `delegation_status`,
468
+ * `delegation_history`.
469
+ */
470
+ declare function mcpToolsForRuntimeMcp(): OpenAIChatTool[];
471
+ /**
472
+ * @experimental
473
+ *
474
+ * Subset filter — return only the projected tools whose `function.name`
475
+ * appears in `names`. Useful for curated mounts (e.g. only the queue-bound
476
+ * delegation tools, omitting `delegate_feedback`). Unknown names are
477
+ * silently ignored; pass an empty array to get an empty result.
478
+ */
479
+ declare function mcpToolsForRuntimeMcpSubset(names: ReadonlyArray<string>): OpenAIChatTool[];
480
+
481
+ /**
482
+ * OTEL span exporter — streams LoopTraceEvents to an OTLP/HTTP collector.
483
+ *
484
+ * Reads OTEL_EXPORTER_OTLP_ENDPOINT + OTEL_EXPORTER_OTLP_HEADERS from env
485
+ * when no explicit config is given. Keeps the runtime dep-free from
486
+ * @opentelemetry/sdk-trace-base — minimal OTLP/JSON serializer.
487
+ *
488
+ * The exporter accepts both raw OtelSpan objects and LoopTraceEvents
489
+ * (which get converted to OTLP spans automatically).
490
+ */
491
+ interface OtelExportConfig {
492
+ /** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */
493
+ endpoint?: string;
494
+ /** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */
495
+ headers?: Record<string, string>;
496
+ /** Batch size before flush. Default 64. */
497
+ batchSize?: number;
498
+ /** Flush interval ms. Default 5000. */
499
+ flushIntervalMs?: number;
500
+ /** Resource attributes stamped on every export. */
501
+ resourceAttributes?: Record<string, string | number | boolean>;
502
+ /** Service name. Default 'agent-runtime'. */
503
+ serviceName?: string;
504
+ }
505
+ interface OtelExporter {
506
+ /** Export a span. */
507
+ exportSpan(span: OtelSpan): void;
508
+ /** Force flush pending spans. */
509
+ flush(): Promise<void>;
510
+ /** Shutdown cleanly. */
511
+ shutdown(): Promise<void>;
512
+ }
513
+ interface OtelSpan {
514
+ traceId: string;
515
+ spanId: string;
516
+ parentSpanId?: string;
517
+ name: string;
518
+ kind?: number;
519
+ startTimeUnixNano: string;
520
+ endTimeUnixNano: string;
521
+ attributes?: OtelAttribute[];
522
+ status?: {
523
+ code: number;
524
+ message?: string;
525
+ };
526
+ }
527
+ interface OtelAttribute {
528
+ key: string;
529
+ value: {
530
+ stringValue?: string;
531
+ intValue?: string;
532
+ doubleValue?: number;
533
+ boolValue?: boolean;
534
+ };
535
+ }
536
+ /**
537
+ * Create an OTEL exporter. Returns undefined when no endpoint is configured.
538
+ */
539
+ declare function createOtelExporter(config?: OtelExportConfig): OtelExporter | undefined;
540
+ /**
541
+ * Convert a LoopTraceEvent into an OtelSpan for export.
542
+ */
543
+ declare function loopEventToOtelSpan(event: {
544
+ kind: string;
545
+ runId: string;
546
+ timestamp: number;
547
+ payload: object;
548
+ }, traceId: string, parentSpanId?: string): OtelSpan;
549
+ /**
550
+ * Build a nested, real-duration OTLP span tree for ONE loop run from its full
551
+ * ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
552
+ * zero-duration span per event), this reconstructs the topology hierarchy a
553
+ * GenAI trace viewer renders natively:
554
+ *
555
+ * loop (invoke_workflow)
556
+ * └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
557
+ * ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
558
+ * └─ …
559
+ *
560
+ * Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
561
+ * a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
562
+ * verdict / placement / cost (not yet standardized). Pure: feed it a buffered
563
+ * per-runId event array (e.g. flushed on `loop.ended`) and export the result.
564
+ */
565
+ declare function buildLoopOtelSpans(events: ReadonlyArray<{
566
+ kind: string;
567
+ runId: string;
568
+ timestamp: number;
569
+ payload: object;
570
+ }>, traceId: string, rootParentSpanId?: string): OtelSpan[];
571
+ /** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
572
+ declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
573
+ interface EvalRunGeneration {
574
+ /** 0-based ordinal of this generation within the run (required by ingest). */
575
+ index: number;
576
+ /** Identity of the proposed surface change (content-addressed hash). */
577
+ surfaceHash: string;
578
+ /** Arbitrary provenance for this generation (rationale, evidence, source). */
579
+ surface?: unknown;
580
+ /** Per-scenario results; empty until the generation is measured. */
581
+ cells?: unknown[];
582
+ /** Mean composite score (0 when unmeasured — pair with labels.measured). */
583
+ compositeMean: number;
584
+ costUsd: number;
585
+ durationMs: number;
586
+ }
587
+ interface EvalRunEvent {
588
+ runId: string;
589
+ runDir: string;
590
+ /** ISO timestamp. */
591
+ timestamp: string;
592
+ status: 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
593
+ labels?: Record<string, string>;
594
+ baseline?: EvalRunGeneration;
595
+ generations?: EvalRunGeneration[];
596
+ gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling';
597
+ holdoutLift?: number;
598
+ totalCostUsd: number;
599
+ totalDurationMs: number;
600
+ errorMessage?: string;
601
+ }
602
+ interface EvalRunsExportConfig {
603
+ /** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */
604
+ apiKey?: string;
605
+ /** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */
606
+ base?: string;
607
+ /** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */
608
+ idempotencyKey?: string;
609
+ }
610
+ interface EvalRunsExportResult {
611
+ ok: boolean;
612
+ status: number;
613
+ accepted: number;
614
+ rejected: Array<{
615
+ index: number;
616
+ reason: string;
617
+ }>;
618
+ }
619
+ /**
620
+ * Ship self-improvement eval-run events to Tangle Intelligence. Unlike the
621
+ * best-effort span exporter, this RESOLVES with the ingest verdict (accepted /
622
+ * rejected per event) so a consumer's loop can assert its provenance landed.
623
+ * Throws only on a missing key or network failure.
624
+ */
625
+ declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
626
+
627
+ export { type OtelSpan as $, type FactCandidate as A, type FactJudge as B, type CoderDelegate as C, type DelegationExecutor as D, type FactJudgeVerdict as E, type FleetHandle as F, type FeedbackRating as G, type FeedbackRefersTo as H, type FleetWorkspaceExecutorOptions as I, type ResearchOutputShape as J, type KbGateResult as K, createDefaultCoderDelegate as L, createFleetWorkspaceExecutor as M, createKbGate as N, type OtelExporter as O, createSiblingSandboxExecutor as P, mcpToolsForRuntimeMcp as Q, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, mcpToolsForRuntimeMcpSubset as T, type EvalRunEvent as U, type EvalRunGeneration as V, type EvalRunsExportConfig as W, type EvalRunsExportResult as X, INTELLIGENCE_WIRE_VERSION as Y, type OtelAttribute as Z, type OtelExportConfig as _, type DelegateFeedbackArgs as a, buildLoopOtelSpans as a0, createOtelExporter as a1, exportEvalRuns as a2, loopEventToOtelSpan as a3, type DelegationFeedbackSnapshot as b, type DelegationProfile as c, type DelegateCodeArgs as d, type DelegateResearchArgs as e, type DelegationStatus as f, type DelegationProgress as g, type DelegationResultPayload as h, type DelegationError as i, type DelegationStatusResult as j, type DelegationHistoryArgs as k, type DelegationHistoryEntry as l, type DelegateCodeResult as m, type DelegateFeedbackResult as n, type ResearchSource as o, type DelegateResearchResult as p, type DelegationHistoryResult as q, type DelegationStatusArgs as r, type CoderReview as s, type CoderReviewer as t, type CoderWinnerSelection as u, type CreateDefaultCoderDelegateOptions as v, type CreateKbGateOptions as w, type DelegateCodeConfig as x, type DelegateResearchConfig as y, type DelegateRunCtx as z };
@@ -1,7 +1,6 @@
1
1
  import { AgentProfile } from '@tangle-network/sandbox';
2
- import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-DrXVR2Fu.js';
2
+ import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-CmkQl8qE.js';
3
3
  import '@tangle-network/agent-eval';
4
- import './runtime-run-B8VIiOhI.js';
5
4
  import './types-CsCCryln.js';
6
5
 
7
6
  /**