@tangle-network/agent-runtime 0.33.0 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/loops.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { AgentProfile, SandboxEvent } from '@tangle-network/sandbox';
2
2
  export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
3
- import { I as Iteration, D as Driver, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, a as LoopWinner, b as LoopResult } from './types-CpCX1pfx.js';
4
- export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as LoopSandboxPlacement, i as LoopStartedPayload, j as LoopTokenUsage, k as LoopTraceEmitter, l as LoopTraceEvent, m as ValidationCtx } from './types-CpCX1pfx.js';
3
+ import { I as Iteration, D as Driver, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, a as LoopWinner, b as LoopResult } from './types-DrXVR2Fu.js';
4
+ export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as LoopPlanDescription, i as LoopPlanPayload, j as LoopSandboxPlacement, k as LoopStartedPayload, l as LoopTokenUsage, m as LoopTraceEmitter, n as LoopTraceEvent, o as ValidationCtx } from './types-DrXVR2Fu.js';
5
5
  import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
6
6
  export { DefaultVerdict } from '@tangle-network/agent-eval';
7
7
  import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
@@ -316,41 +316,6 @@ interface RunLoopOptions<Task, Output, Decision> {
316
316
  /** @experimental */
317
317
  declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
318
318
 
319
- /**
320
- * Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
321
- * dispatch.
322
- *
323
- * `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
324
- * the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
325
- * `runLoop` must forward the loop's cost AND token usage, or the guard reads
326
- * the run as a stub and throws. `reportLoopUsage` is that one line:
327
- *
328
- * const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
329
- * const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
330
- * reportLoopUsage(ctx, result)
331
- * return result.winner?.output as A
332
- * }
333
- *
334
- * Typed structurally against the campaign `DispatchContext.cost` so this module
335
- * stays free of an agent-eval import — it works with any cost meter exposing
336
- * `observe` + `observeTokens`.
337
- */
338
-
339
- /** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
340
- interface UsageSink {
341
- observe(amountUsd: number, source: string): void;
342
- observeTokens(usage: {
343
- input: number;
344
- output: number;
345
- }): void;
346
- }
347
- /**
348
- * Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
349
- * meter so the backend-integrity guard sees real LLM activity. `source`
350
- * defaults to `'loop'`.
351
- */
352
- declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
353
-
354
319
  /**
355
320
  * `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
356
321
  *
@@ -414,4 +379,39 @@ declare function loopCampaignDispatch<Task, Output, Decision, TScenario extends
414
379
  toLoopOptions: (scenario: TScenario) => LoopOptionsForDispatch<Task, Output, Decision>;
415
380
  }): DispatchFn<TScenario, TArtifact>;
416
381
 
382
+ /**
383
+ * Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
384
+ * dispatch.
385
+ *
386
+ * `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
387
+ * the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
388
+ * `runLoop` must forward the loop's cost AND token usage, or the guard reads
389
+ * the run as a stub and throws. `reportLoopUsage` is that one line:
390
+ *
391
+ * const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
392
+ * const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
393
+ * reportLoopUsage(ctx, result)
394
+ * return result.winner?.output as A
395
+ * }
396
+ *
397
+ * Typed structurally against the campaign `DispatchContext.cost` so this module
398
+ * stays free of an agent-eval import — it works with any cost meter exposing
399
+ * `observe` + `observeTokens`.
400
+ */
401
+
402
+ /** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
403
+ interface UsageSink {
404
+ observe(amountUsd: number, source: string): void;
405
+ observeTokens(usage: {
406
+ input: number;
407
+ output: number;
408
+ }): void;
409
+ }
410
+ /**
411
+ * Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
412
+ * meter so the backend-integrity guard sees real LLM activity. `source`
413
+ * defaults to `'loop'`.
414
+ */
415
+ declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
416
+
417
417
  export { AgentRunSpec, type CreateDynamicDriverOptions, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, type DynamicDecision, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, type PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMove, type TopologyMoveEnvelope, type TopologyPlanner, type UsageSink, Validator, createDynamicDriver, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, loopCampaignDispatch, loopDispatch, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations, summarizeHistory };
package/dist/loops.js CHANGED
@@ -8,7 +8,7 @@ import {
8
8
  reportLoopUsage,
9
9
  runLoop,
10
10
  summarizeHistory
11
- } from "./chunk-GLTUUKTN.js";
11
+ } from "./chunk-7JBDJQLO.js";
12
12
  import {
13
13
  createFanoutVoteDriver,
14
14
  scoreFanoutVoteIterations
package/dist/mcp/bin.js CHANGED
@@ -3,12 +3,12 @@ import {
3
3
  createDefaultCoderDelegate,
4
4
  createMcpServer,
5
5
  detectExecutor
6
- } from "../chunk-AAJVQRPL.js";
6
+ } from "../chunk-TT3IHIQT.js";
7
7
  import "../chunk-HSX6PFZR.js";
8
8
  import "../chunk-GLR25NG7.js";
9
9
  import {
10
10
  runLoop
11
- } from "../chunk-GLTUUKTN.js";
11
+ } from "../chunk-7JBDJQLO.js";
12
12
  import "../chunk-5QVVET72.js";
13
13
  import "../chunk-PY6NMZYX.js";
14
14
  import "../chunk-SQSCRJ7U.js";
@@ -1,10 +1,10 @@
1
- import { L as LoopSandboxClient, h as LoopSandboxPlacement, k as LoopTraceEmitter } from '../types-CpCX1pfx.js';
1
+ import { L as LoopSandboxClient, j as LoopSandboxPlacement, m as LoopTraceEmitter } from '../types-DrXVR2Fu.js';
2
2
  import { SandboxInstance } from '@tangle-network/sandbox';
3
3
  import { CoderOutput } from '../profiles.js';
4
4
  import { L as LocalHarness, r as runLocalHarness } from '../local-harness-KrdFTY5R.js';
5
5
  export { a as LocalHarnessResult, R as RunLocalHarnessOptions } from '../local-harness-KrdFTY5R.js';
6
- import { O as OtelExporter } from '../otel-export-CsgwKFq8.js';
7
- export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-CsgwKFq8.js';
6
+ import { O as OtelExporter } from '../otel-export-xgf4J6bo.js';
7
+ export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-xgf4J6bo.js';
8
8
  import '@tangle-network/agent-eval';
9
9
  import '../runtime-run-B8VIiOhI.js';
10
10
  import '../types-CsCCryln.js';
package/dist/mcp/index.js CHANGED
@@ -9,13 +9,13 @@ import {
9
9
  createWorktree,
10
10
  detectExecutor,
11
11
  removeWorktree
12
- } from "../chunk-AAJVQRPL.js";
12
+ } from "../chunk-TT3IHIQT.js";
13
13
  import {
14
+ buildLoopOtelSpans,
14
15
  createOtelExporter,
15
- loopEventToOtelSpan,
16
16
  mcpToolsForRuntimeMcp,
17
17
  mcpToolsForRuntimeMcpSubset
18
- } from "../chunk-RO7K6JNF.js";
18
+ } from "../chunk-7ZECSZ3C.js";
19
19
  import {
20
20
  DELEGATE_CODE_DESCRIPTION,
21
21
  DELEGATE_CODE_INPUT_SCHEMA,
@@ -50,7 +50,7 @@ import {
50
50
  import {
51
51
  runLocalHarness
52
52
  } from "../chunk-GLR25NG7.js";
53
- import "../chunk-GLTUUKTN.js";
53
+ import "../chunk-7JBDJQLO.js";
54
54
  import "../chunk-5QVVET72.js";
55
55
  import "../chunk-PY6NMZYX.js";
56
56
  import "../chunk-SQSCRJ7U.js";
@@ -64,11 +64,20 @@ function readTraceContextFromEnv() {
64
64
  }
65
65
  function createPropagatingTraceEmitter(ctx) {
66
66
  const exporter = createOtelExporter();
67
+ const buffers = /* @__PURE__ */ new Map();
67
68
  const emitter = {
68
69
  emit(event) {
69
70
  if (!exporter) return;
70
- const span = loopEventToOtelSpan(event, ctx.traceId, ctx.parentSpanId);
71
- exporter.exportSpan(span);
71
+ const buf = buffers.get(event.runId);
72
+ if (buf) buf.push(event);
73
+ else buffers.set(event.runId, [event]);
74
+ if (event.kind === "loop.ended") {
75
+ const events = buffers.get(event.runId) ?? [event];
76
+ buffers.delete(event.runId);
77
+ for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) {
78
+ exporter.exportSpan(span);
79
+ }
80
+ }
72
81
  }
73
82
  };
74
83
  return { emitter, exporter, context: ctx };
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/mcp/trace-propagation.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Trace context propagation for MCP subprocess.\n *\n * When the MCP server is launched as a child process by a sandbox harness,\n * the parent passes trace context via environment variables:\n *\n * TRACE_ID=<current-run-trace-id>\n * PARENT_SPAN_ID=<span-that-dispatched-the-delegation>\n *\n * The MCP server reads these at startup and uses them as the root of its\n * internal trace tree. All spans emitted by `runLoop` invocations inside\n * the MCP are children of the parent's delegation span.\n *\n * When these env vars are absent, the MCP generates a fresh trace root —\n * the server operates standalone without trace joining.\n */\n\nimport type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types'\nimport type { OtelExporter } from '../otel-export'\nimport { createOtelExporter, loopEventToOtelSpan } from '../otel-export'\n\nexport interface TraceContext {\n /** Trace id inherited from the parent process, or a fresh one. */\n traceId: string\n /** Parent span id from the delegation that launched this MCP server. */\n parentSpanId?: string\n}\n\n/**\n * Read trace context from the process environment.\n * Returns a context with inherited ids or a freshly generated root.\n */\nexport function readTraceContextFromEnv(): TraceContext {\n const traceId = process.env.TRACE_ID || generateTraceId()\n const parentSpanId = process.env.PARENT_SPAN_ID || undefined\n return { traceId, parentSpanId }\n}\n\n/**\n * Create a LoopTraceEmitter that:\n * 1. Parents all spans under the inherited PARENT_SPAN_ID.\n * 2. Exports spans to OTEL when OTEL_EXPORTER_OTLP_ENDPOINT is set.\n *\n * Returns both the emitter and the optional exporter handle for shutdown.\n */\nexport function createPropagatingTraceEmitter(ctx: TraceContext): {\n emitter: LoopTraceEmitter\n exporter: OtelExporter | undefined\n context: TraceContext\n} {\n const exporter = createOtelExporter()\n\n const emitter: LoopTraceEmitter = {\n emit(event: LoopTraceEvent) {\n if (!exporter) return\n const span = loopEventToOtelSpan(event, ctx.traceId, ctx.parentSpanId)\n exporter.exportSpan(span)\n },\n }\n\n return { emitter, exporter, context: ctx }\n}\n\n/**\n * Build env vars to pass to a child MCP subprocess so it inherits the\n * current trace context.\n */\nexport function traceContextToEnv(ctx: TraceContext): Record<string, string> {\n const env: Record<string, string> = { TRACE_ID: ctx.traceId }\n if (ctx.parentSpanId) env.PARENT_SPAN_ID = ctx.parentSpanId\n return env\n}\n\nfunction generateTraceId(): string {\n const bytes = new Uint8Array(16)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 16; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkCO,SAAS,0BAAwC;AACtD,QAAM,UAAU,QAAQ,IAAI,YAAY,gBAAgB;AACxD,QAAM,eAAe,QAAQ,IAAI,kBAAkB;AACnD,SAAO,EAAE,SAAS,aAAa;AACjC;AASO,SAAS,8BAA8B,KAI5C;AACA,QAAM,WAAW,mBAAmB;AAEpC,QAAM,UAA4B;AAAA,IAChC,KAAK,OAAuB;AAC1B,UAAI,CAAC,SAAU;AACf,YAAM,OAAO,oBAAoB,OAAO,IAAI,SAAS,IAAI,YAAY;AACrE,eAAS,WAAW,IAAI;AAAA,IAC1B;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,UAAU,SAAS,IAAI;AAC3C;AAMO,SAAS,kBAAkB,KAA2C;AAC3E,QAAM,MAA8B,EAAE,UAAU,IAAI,QAAQ;AAC5D,MAAI,IAAI,aAAc,KAAI,iBAAiB,IAAI;AAC/C,SAAO;AACT;AAEA,SAAS,kBAA0B;AACjC,QAAM,QAAQ,IAAI,WAAW,EAAE;AAC/B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,IAAI,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACxE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;","names":[]}
1
+ {"version":3,"sources":["../../src/mcp/trace-propagation.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Trace context propagation for MCP subprocess.\n *\n * When the MCP server is launched as a child process by a sandbox harness,\n * the parent passes trace context via environment variables:\n *\n * TRACE_ID=<current-run-trace-id>\n * PARENT_SPAN_ID=<span-that-dispatched-the-delegation>\n *\n * The MCP server reads these at startup and uses them as the root of its\n * internal trace tree. All spans emitted by `runLoop` invocations inside\n * the MCP are children of the parent's delegation span.\n *\n * When these env vars are absent, the MCP generates a fresh trace root —\n * the server operates standalone without trace joining.\n */\n\nimport type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types'\nimport type { OtelExporter } from '../otel-export'\nimport { buildLoopOtelSpans, createOtelExporter } from '../otel-export'\n\nexport interface TraceContext {\n /** Trace id inherited from the parent process, or a fresh one. */\n traceId: string\n /** Parent span id from the delegation that launched this MCP server. */\n parentSpanId?: string\n}\n\n/**\n * Read trace context from the process environment.\n * Returns a context with inherited ids or a freshly generated root.\n */\nexport function readTraceContextFromEnv(): TraceContext {\n const traceId = process.env.TRACE_ID || generateTraceId()\n const parentSpanId = process.env.PARENT_SPAN_ID || undefined\n return { traceId, parentSpanId }\n}\n\n/**\n * Create a LoopTraceEmitter that:\n * 1. Parents all spans under the inherited PARENT_SPAN_ID.\n * 2. Exports spans to OTEL when OTEL_EXPORTER_OTLP_ENDPOINT is set.\n *\n * Returns both the emitter and the optional exporter handle for shutdown.\n */\nexport function createPropagatingTraceEmitter(ctx: TraceContext): {\n emitter: LoopTraceEmitter\n exporter: OtelExporter | undefined\n context: TraceContext\n} {\n const exporter = createOtelExporter()\n\n // Buffer events per loop run, then emit the full nested span tree on\n // `loop.ended` so the topology hierarchy (loop → round → branch) reaches the\n // OTLP collector — not a flat list of zero-duration point spans. A run that\n // never reaches `loop.ended` (hard abort) drops its buffer; acceptable for\n // the short-lived MCP subprocess.\n const buffers = new Map<string, LoopTraceEvent[]>()\n\n const emitter: LoopTraceEmitter = {\n emit(event: LoopTraceEvent) {\n if (!exporter) return\n const buf = buffers.get(event.runId)\n if (buf) buf.push(event)\n else buffers.set(event.runId, [event])\n if (event.kind === 'loop.ended') {\n const events = buffers.get(event.runId) ?? [event]\n buffers.delete(event.runId)\n for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) {\n exporter.exportSpan(span)\n }\n }\n },\n }\n\n return { emitter, exporter, context: ctx }\n}\n\n/**\n * Build env vars to pass to a child MCP subprocess so it inherits the\n * current trace context.\n */\nexport function traceContextToEnv(ctx: TraceContext): Record<string, string> {\n const env: Record<string, string> = { TRACE_ID: ctx.traceId }\n if (ctx.parentSpanId) env.PARENT_SPAN_ID = ctx.parentSpanId\n return env\n}\n\nfunction generateTraceId(): string {\n const bytes = new Uint8Array(16)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 16; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkCO,SAAS,0BAAwC;AACtD,QAAM,UAAU,QAAQ,IAAI,YAAY,gBAAgB;AACxD,QAAM,eAAe,QAAQ,IAAI,kBAAkB;AACnD,SAAO,EAAE,SAAS,aAAa;AACjC;AASO,SAAS,8BAA8B,KAI5C;AACA,QAAM,WAAW,mBAAmB;AAOpC,QAAM,UAAU,oBAAI,IAA8B;AAElD,QAAM,UAA4B;AAAA,IAChC,KAAK,OAAuB;AAC1B,UAAI,CAAC,SAAU;AACf,YAAM,MAAM,QAAQ,IAAI,MAAM,KAAK;AACnC,UAAI,IAAK,KAAI,KAAK,KAAK;AAAA,UAClB,SAAQ,IAAI,MAAM,OAAO,CAAC,KAAK,CAAC;AACrC,UAAI,MAAM,SAAS,cAAc;AAC/B,cAAM,SAAS,QAAQ,IAAI,MAAM,KAAK,KAAK,CAAC,KAAK;AACjD,gBAAQ,OAAO,MAAM,KAAK;AAC1B,mBAAW,QAAQ,mBAAmB,QAAQ,IAAI,SAAS,IAAI,YAAY,GAAG;AAC5E,mBAAS,WAAW,IAAI;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,UAAU,SAAS,IAAI;AAC3C;AAMO,SAAS,kBAAkB,KAA2C;AAC3E,QAAM,MAA8B,EAAE,UAAU,IAAI,QAAQ;AAC5D,MAAI,IAAI,aAAc,KAAI,iBAAiB,IAAI;AAC/C,SAAO;AACT;AAEA,SAAS,kBAA0B;AACjC,QAAM,QAAQ,IAAI,WAAW,EAAE;AAC/B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,IAAI,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACxE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;","names":[]}
@@ -110,6 +110,28 @@ declare function loopEventToOtelSpan(event: {
110
110
  timestamp: number;
111
111
  payload: object;
112
112
  }, traceId: string, parentSpanId?: string): OtelSpan;
113
+ /**
114
+ * Build a nested, real-duration OTLP span tree for ONE loop run from its full
115
+ * ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
116
+ * zero-duration span per event), this reconstructs the topology hierarchy a
117
+ * GenAI trace viewer renders natively:
118
+ *
119
+ * loop (invoke_workflow)
120
+ * └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
121
+ * ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
122
+ * └─ …
123
+ *
124
+ * Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
125
+ * a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
126
+ * verdict / placement / cost (not yet standardized). Pure: feed it a buffered
127
+ * per-runId event array (e.g. flushed on `loop.ended`) and export the result.
128
+ */
129
+ declare function buildLoopOtelSpans(events: ReadonlyArray<{
130
+ kind: string;
131
+ runId: string;
132
+ timestamp: number;
133
+ payload: object;
134
+ }>, traceId: string, rootParentSpanId?: string): OtelSpan[];
113
135
  /** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
114
136
  declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
115
137
  interface EvalRunGeneration {
@@ -166,4 +188,4 @@ interface EvalRunsExportResult {
166
188
  */
167
189
  declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
168
190
 
169
- export { type EvalRunEvent as E, INTELLIGENCE_WIRE_VERSION as I, type OtelExporter as O, mcpToolsForRuntimeMcpSubset as a, type EvalRunGeneration as b, type EvalRunsExportConfig as c, type EvalRunsExportResult as d, type OtelAttribute as e, type OtelExportConfig as f, type OtelSpan as g, createOtelExporter as h, exportEvalRuns as i, loopEventToOtelSpan as l, mcpToolsForRuntimeMcp as m };
191
+ export { type EvalRunEvent as E, INTELLIGENCE_WIRE_VERSION as I, type OtelExporter as O, mcpToolsForRuntimeMcpSubset as a, type EvalRunGeneration as b, type EvalRunsExportConfig as c, type EvalRunsExportResult as d, type OtelAttribute as e, type OtelExportConfig as f, type OtelSpan as g, buildLoopOtelSpans as h, createOtelExporter as i, exportEvalRuns as j, loopEventToOtelSpan as l, mcpToolsForRuntimeMcp as m };
@@ -1,5 +1,5 @@
1
1
  import { AgentProfile } from '@tangle-network/sandbox';
2
- import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-CpCX1pfx.js';
2
+ import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-DrXVR2Fu.js';
3
3
  import '@tangle-network/agent-eval';
4
4
  import './runtime-run-B8VIiOhI.js';
5
5
  import './types-CsCCryln.js';
@@ -118,6 +118,23 @@ interface Driver<Task, Output, Decision> {
118
118
  * is hit, or when the abort signal fires.
119
119
  */
120
120
  decide(history: ReadonlyArray<Iteration<Task, Output>>): Decision | Promise<Decision>;
121
+ /**
122
+ * Optional: describe the move `plan()` just produced, for trace emission.
123
+ * The kernel calls this immediately after `plan()` and emits the result in
124
+ * the `loop.plan` event so a topology viewer can render the agent's chosen
125
+ * move + rationale (not just the inferred fan-width). Drivers whose topology
126
+ * is a pure function of count (refine/fanout-vote) omit it — the kernel
127
+ * infers `moveKind` from the planned-task count. Agent-authored drivers
128
+ * (`createDynamicDriver`) return their chosen move's kind + rationale.
129
+ */
130
+ describePlan?(): LoopPlanDescription | undefined;
131
+ }
132
+ /** @experimental Driver-supplied description of the just-planned move. */
133
+ interface LoopPlanDescription {
134
+ /** Topology move this round — e.g. `'refine' | 'fanout' | 'verify' | 'stop'`. */
135
+ kind: string;
136
+ /** Why the driver chose this move (the agent's rationale), when available. */
137
+ rationale?: string;
121
138
  }
122
139
  /** @experimental */
123
140
  interface LoopWinner<Task, Output> {
@@ -174,6 +191,11 @@ type LoopTraceEvent = {
174
191
  runId: string;
175
192
  timestamp: number;
176
193
  payload: LoopStartedPayload;
194
+ } | {
195
+ kind: 'loop.plan';
196
+ runId: string;
197
+ timestamp: number;
198
+ payload: LoopPlanPayload;
177
199
  } | {
178
200
  kind: 'loop.iteration.started';
179
201
  runId: string;
@@ -207,11 +229,41 @@ interface LoopStartedPayload {
207
229
  maxIterations: number;
208
230
  maxConcurrency: number;
209
231
  }
232
+ /**
233
+ * Emitted once per `plan()` round, immediately after the driver plans. Carries
234
+ * the topology move so a viewer renders WHAT the agent decided + WHY, not just
235
+ * the inferred fan-width. `moveKind` is the driver's `describePlan().kind` when
236
+ * provided, else inferred from `plannedCount` (0→stop, 1→refine, N→fanout).
237
+ *
238
+ * @experimental
239
+ */
240
+ interface LoopPlanPayload {
241
+ /** 0-based plan round (one per `plan()` call). */
242
+ roundIndex: number;
243
+ /** Tasks the driver issued this round. */
244
+ plannedCount: number;
245
+ /** Topology move — `'refine' | 'fanout' | 'verify' | 'stop'` etc. */
246
+ moveKind: string;
247
+ /** Driver rationale for the move, when available. */
248
+ rationale?: string;
249
+ /**
250
+ * Iteration index this round branched FROM (the edge source). `undefined`
251
+ * for round 0 (root). Kernel-inferred branch point — the best-valid (else
252
+ * latest) iteration so far — unless a driver later declares it explicitly.
253
+ */
254
+ parentIndex?: number;
255
+ /** Iteration indices this round dispatched (the edge targets). */
256
+ childIndices: number[];
257
+ }
210
258
  /** @experimental */
211
259
  interface LoopIterationStartedPayload {
212
260
  iterationIndex: number;
213
261
  agentRunName: string;
214
262
  taskHash: string;
263
+ /** Plan round (== `LoopPlanPayload.roundIndex`) this iteration belongs to. */
264
+ groupId?: number;
265
+ /** Iteration this one was planned from; `undefined` ⇒ root. */
266
+ parentIndex?: number;
215
267
  }
216
268
  /**
217
269
  * Where the iteration's worker was placed. `sibling` = a fresh sandbox the
@@ -231,6 +283,10 @@ interface LoopIterationDispatchPayload {
231
283
  fleetId?: string;
232
284
  /** Set only when `placement === 'fleet'`. */
233
285
  machineId?: string;
286
+ /** Plan round this iteration belongs to. */
287
+ groupId?: number;
288
+ /** Iteration this one was planned from; `undefined` ⇒ root. */
289
+ parentIndex?: number;
234
290
  }
235
291
  /** @experimental */
236
292
  interface LoopIterationEndedPayload {
@@ -241,6 +297,16 @@ interface LoopIterationEndedPayload {
241
297
  error?: string;
242
298
  costUsd: number;
243
299
  durationMs: number;
300
+ /** Summed LLM token usage for this iteration — maps to gen_ai.usage.* on the
301
+ * branch span. Omitted when no `llm_call` events carried token counts. */
302
+ tokenUsage?: LoopTokenUsage;
303
+ /** Plan round this iteration belongs to. */
304
+ groupId?: number;
305
+ /** Iteration this one was planned from; `undefined` ⇒ root. */
306
+ parentIndex?: number;
307
+ /** Truncated string preview of the parsed output — for a viewer's drawer.
308
+ * Bounded to ~280 chars; never the full payload. */
309
+ outputPreview?: string;
244
310
  }
245
311
  /** @experimental */
246
312
  interface LoopDecisionPayload {
@@ -281,4 +347,4 @@ interface ExecCtx {
281
347
  parentSpanId?: string;
282
348
  }
283
349
 
284
- export type { AgentRunSpec as A, Driver as D, ExecCtx as E, Iteration as I, LoopSandboxClient as L, OutputAdapter as O, Validator as V, LoopWinner as a, LoopResult as b, LoopDecisionPayload as c, LoopEndedPayload as d, LoopIterationDispatchPayload as e, LoopIterationEndedPayload as f, LoopIterationStartedPayload as g, LoopSandboxPlacement as h, LoopStartedPayload as i, LoopTokenUsage as j, LoopTraceEmitter as k, LoopTraceEvent as l, ValidationCtx as m };
350
+ export type { AgentRunSpec as A, Driver as D, ExecCtx as E, Iteration as I, LoopSandboxClient as L, OutputAdapter as O, Validator as V, LoopWinner as a, LoopResult as b, LoopDecisionPayload as c, LoopEndedPayload as d, LoopIterationDispatchPayload as e, LoopIterationEndedPayload as f, LoopIterationStartedPayload as g, LoopPlanDescription as h, LoopPlanPayload as i, LoopSandboxPlacement as j, LoopStartedPayload as k, LoopTokenUsage as l, LoopTraceEmitter as m, LoopTraceEvent as n, ValidationCtx as o };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tangle-network/agent-runtime",
3
- "version": "0.33.0",
3
+ "version": "0.35.0",
4
4
  "description": "Reusable runtime lifecycle for domain-specific agents.",
5
5
  "homepage": "https://github.com/tangle-network/agent-runtime#readme",
6
6
  "repository": {
@@ -60,7 +60,8 @@
60
60
  },
61
61
  "files": [
62
62
  "dist",
63
- "README.md"
63
+ "README.md",
64
+ "skills"
64
65
  ],
65
66
  "publishConfig": {
66
67
  "access": "public"
@@ -0,0 +1,170 @@
1
+ ---
2
+ name: agent-runtime-adoption
3
+ description: Adopt @tangle-network/agent-runtime in a product — the driven-loop kernel (runLoop), topology drivers (refine / fanout-vote / dynamic agent-authored), the loopDispatch campaign bridge, MCP delegation, and identity-gated prompt-surface optimization (optimizePrompt). Self-contained; needs only the published package + @tangle-network/agent-eval. Use when wiring runLoop, choosing a topology driver, optimizing a system/planner prompt, or exposing delegation tools.
4
+ ---
5
+
6
+ # agent-runtime adoption — driven loops, topology drivers, prompt optimization
7
+
8
+ `@tangle-network/agent-runtime` is the task-lifecycle skeleton: it owns the loop
9
+ kernel and orchestration seams; it delegates domain behavior (models, tools,
10
+ scoring) to adapters you supply. It depends on `@tangle-network/agent-eval`
11
+ (substrate); never the reverse. This skill is self-contained — you need only the
12
+ two published packages.
13
+
14
+ ## Principle
15
+
16
+ The kernel owns iteration accounting, concurrency, abort, cost/token aggregation,
17
+ and trace emission. It does NOT own *what the agent runs* (sandbox SDK + profile),
18
+ *how output is decoded* (output adapter), *how it's scored* (validator), or
19
+ *topology* (driver). Keep those four as injected seams — do not fork the kernel.
20
+
21
+ **Fail loud.** External-boundary calls return typed outcomes; a `null` sandbox
22
+ client, a `null` output adapter return, or a malformed planner move must throw,
23
+ never silently produce a `{0,0}` cell the integrity guard reads as a stub.
24
+
25
+ ## The Driver seam — `runLoop` + topology
26
+
27
+ `runLoop({ driver, agentRun | agentRuns, output, validator?, task, ctx })` runs
28
+ each iteration: `driver.plan(task, history) → Task[]` → per task spawn a sandbox
29
+ on an `AgentRunSpec.profile` + `streamPrompt` → `output.parse(events)` →
30
+ `validator?.validate(...)` → `driver.decide(history)`. Terminal decisions:
31
+ `'stop' | 'pick-winner' | 'fail' | 'done'`. Returns
32
+ `LoopResult { decision, iterations, winner, costUsd, tokenUsage }`.
33
+
34
+ A `Driver<Task, Output, Decision>` is just `plan(task, history) → Task[]`
35
+ (`[task]`→refine, N copies→fanout, `[]`→stop) + `decide(history) → Decision`.
36
+ Topology is data; the kernel is topology-agnostic.
37
+
38
+ ### Three shipped drivers — `@tangle-network/agent-runtime/loops`
39
+
40
+ - **`createRefineDriver({ maxIterations?, refineTask? })`** — one task/iteration,
41
+ validator-gated; replay or rewrite the task until valid or capped. Use for
42
+ incremental patches, document revision, anything monotonic.
43
+ - **`createFanoutVoteDriver({ n, selector? })`** — N parallel attempts in
44
+ iteration 0, score once, pick the winner (default: highest valid score). Use
45
+ for multi-harness coder fanout, redundant research with disagreement detection.
46
+ - **`createDynamicDriver({ planner, maxIterations?, maxFanout? })`** — **the
47
+ agent authors the topology.** `plan`/`decide` are backed by an injected
48
+ `TopologyPlanner` that emits one `TopologyMove` per round
49
+ (`{kind:'refine',task}` | `{kind:'fanout',tasks}` | `{kind:'stop'}`). The
50
+ planner is invoked once per round in `plan()`; `decide()` reads the cached move
51
+ so an LLM planner is never double-called. Use when the right shape is
52
+ task-dependent (scout-then-fanout, refine-then-branch, decompose).
53
+
54
+ Topology is **orthogonal to harness**: a driver returns `Task[]`; the kernel
55
+ round-robins `agentRuns[]` to decide which harness (claude-code / codex /
56
+ opencode / pi) runs each branch. One driver spans all backends, including
57
+ fanning a single round across several.
58
+
59
+ ### Wiring an LLM planner — `createSandboxPlanner`
60
+
61
+ ```ts
62
+ import { createDynamicDriver, createSandboxPlanner, runLoop } from '@tangle-network/agent-runtime/loops'
63
+
64
+ const planner = createSandboxPlanner<Task, Out>({
65
+ client, profile: plannerProfile, // any harness; cheap model is fine
66
+ decodeTask: (raw) => raw as Task, // envelope task → domain Task
67
+ // buildPrompt? — defaults to a history-summary prompt; override to customize
68
+ })
69
+ const result = await runLoop({
70
+ driver: createDynamicDriver({ planner, maxIterations: 8 }),
71
+ agentRuns: workerSpecs, output, validator, task, ctx: { sandboxClient: client },
72
+ })
73
+ ```
74
+
75
+ The planner emits a JSON envelope (`{ kind, tasks?, n?, rationale }`); a missing,
76
+ unparseable, or unknown-kind envelope throws `PlannerError` — the loop never runs
77
+ a topology nobody chose.
78
+
79
+ ### Driver gotchas
80
+
81
+ - `runLoop` validates `ctx.sandboxClient.create` exists or throws
82
+ `ValidationError`. Never stub a `null` client.
83
+ - The kernel emits `loop.started / iteration.dispatch / iteration.ended /
84
+ decision / ended` via `ctx.traceEmitter`. Wire it to the same OTLP sink as the
85
+ chat path so loop telemetry is queryable.
86
+ - The output adapter MUST return a typed value or throw. A `null`/`undefined`
87
+ return silently drops the iteration from scoring.
88
+ - Dynamic driver: set the kernel's `runLoop` `maxIterations >=` the driver's so
89
+ the driver's cap governs and the loop closes on a clean `'done'`.
90
+
91
+ ## Campaign bridge — `loopDispatch` / `loopCampaignDispatch`
92
+
93
+ To run `runLoop` as an agent-eval campaign cell, do NOT hand-build the ExecCtx +
94
+ forward trace + report usage every time (the third is silent — forgetting it
95
+ yields a `{0,0}` cell `assertRealBackend` reads as a stub). Use the adapter:
96
+
97
+ ```ts
98
+ import { loopCampaignDispatch } from '@tangle-network/agent-runtime/loops'
99
+ const dispatch = loopCampaignDispatch({
100
+ sandboxClient,
101
+ toLoopOptions: (scenario) => ({ driver, agentRun, output, validator, task: toTask(scenario) }),
102
+ // toArtifact? — defaults to result.winner?.output
103
+ })
104
+ // pass `dispatch` to runCampaign / runEvalCampaign; usage + trace are auto-forwarded
105
+ ```
106
+
107
+ `loopDispatch` is the `runProfileMatrix` variant (profile is an axis).
108
+
109
+ ## Identity-gated prompt optimization — `optimizePrompt`
110
+
111
+ `@tangle-network/agent-runtime/improvement`. The text-surface entry point onto
112
+ agent-eval's `runImprovementLoop` — sibling to `improvementDriver` (the
113
+ code/worktree path). Optimizes any prompt surface (system / planner / judge
114
+ rubric) and is **identity-gated by construction**: it runs evals, proposes
115
+ candidates (default driver `gepaDriver`), and the held-out gate compares
116
+ candidate vs baseline. `result.prompt` is the **baseline unless the gate decided
117
+ `'ship'`** — so registering a prompt for optimization can never regress it; it
118
+ only improves when held-out data earns it.
119
+
120
+ ```ts
121
+ import { optimizePrompt } from '@tangle-network/agent-runtime/improvement'
122
+ const { prompt, improved, decision, delta } = await optimizePrompt({
123
+ baselinePrompt: CURRENT_SYSTEM_PROMPT,
124
+ runWithPrompt: (prompt, scenario, ctx) => runYourThing(prompt, scenario), // sandbox / runLoop / direct call
125
+ scenarios, holdoutScenarios, judges, runDir,
126
+ reflection: { llm, model: REFLECTION_MODEL }, // builds the default gepaDriver
127
+ // gate? — defaults to heldOutGate; pass defaultProductionGate for red-team hardening
128
+ })
129
+ // use `prompt` unconditionally: it's the baseline until a candidate genuinely wins
130
+ ```
131
+
132
+ ### optimizePrompt gotchas — read before wiring
133
+
134
+ - **`gepaDriver` mutates TEXT only**, and its only structural guard is `##` H2
135
+ headings (`preserveSections`) + `maxSentenceEdits`. Make load-bearing sections
136
+ of your prompt real `##` headings, and treat the output schema as fixed code —
137
+ GEPA optimizes the prose, never the envelope/contract.
138
+ - **Scenarios must be domain-real.** Derive them from the surface's own traces /
139
+ ground truth, not from unrelated corpora. Cross-domain examples are noise.
140
+ - **Extend, don't fork.** If the product already wires `runImprovementLoop`
141
+ (e.g. for a main-agent prompt), add the new surface as another target in that
142
+ harness rather than bolting on a second optimizer.
143
+ - `runWithPrompt` is the only domain seam — the optimizer never assumes how a
144
+ prompt runs. Report cost via `ctx.cost` inside it so the integrity guard sees
145
+ real activity.
146
+ - A live run needs a real backend (`TANGLE_API_KEY` / router, or local
147
+ cli-bridge) and real spend; it is not free.
148
+
149
+ ## MCP delegation — `@tangle-network/agent-runtime/mcp`
150
+
151
+ `agent-runtime-mcp` (stdio) exposes delegation tools (`delegate_code`,
152
+ `delegate_research`, …) that drive `runLoop` behind the scenes (refine or
153
+ fanout-vote per `variants`). Env: `TANGLE_API_KEY`, `SANDBOX_BASE_URL`,
154
+ `TANGLE_FLEET_ID` (sibling vs fleet placement), `MCP_CODER_FANOUT_HARNESSES`.
155
+ Mount it on a production `AgentProfile.mcp`; do not re-implement delegation.
156
+
157
+ ## Acceptance checklist
158
+
159
+ - [ ] Topology is a `Driver`, not hard-coded control flow. Reuse refine /
160
+ fanout-vote / dynamic; build a custom `Driver` against
161
+ `loops/types.ts:Driver` only when none fit — never fork the kernel.
162
+ - [ ] `runLoop` is bridged to campaigns via `loopDispatch` / `loopCampaignDispatch`
163
+ (usage + trace auto-forwarded), not a hand-rolled ExecCtx.
164
+ - [ ] Every optimizable prompt is registered through `optimizePrompt` (or the
165
+ product's existing `runImprovementLoop`), identity-gated on a held-out set.
166
+ - [ ] Boundaries fail loud: no `null` sandbox client, no silent adapter return,
167
+ no unguarded planner envelope.
168
+
169
+ For the full self-improving pipeline (trace sink → analyst loop → scorecard →
170
+ production loop → CI), see the broader `agent-eval-adoption` skill.