@tangle-network/agent-runtime 0.33.0 → 0.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{chunk-GLTUUKTN.js → chunk-7KS6UEHB.js} +28 -9
- package/dist/chunk-7KS6UEHB.js.map +1 -0
- package/dist/{chunk-RO7K6JNF.js → chunk-Q4ZDSLBD.js} +152 -2
- package/dist/chunk-Q4ZDSLBD.js.map +1 -0
- package/dist/{chunk-AAJVQRPL.js → chunk-VVHX5RKE.js} +2 -2
- package/dist/index.d.ts +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/loops.d.ts +37 -37
- package/dist/loops.js +1 -1
- package/dist/mcp/bin.js +2 -2
- package/dist/mcp/index.d.ts +3 -3
- package/dist/mcp/index.js +15 -6
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-CsgwKFq8.d.ts → otel-export-xgf4J6bo.d.ts} +23 -1
- package/dist/profiles.d.ts +1 -1
- package/dist/{types-CpCX1pfx.d.ts → types-BZw2bqJc.d.ts} +44 -1
- package/package.json +3 -2
- package/skills/agent-runtime-adoption/SKILL.md +170 -0
- package/dist/chunk-GLTUUKTN.js.map +0 -1
- package/dist/chunk-RO7K6JNF.js.map +0 -1
- /package/dist/{chunk-AAJVQRPL.js.map → chunk-VVHX5RKE.js.map} +0 -0
package/dist/loops.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { AgentProfile, SandboxEvent } from '@tangle-network/sandbox';
|
|
2
2
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
3
|
-
import { I as Iteration, D as Driver, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, a as LoopWinner, b as LoopResult } from './types-
|
|
4
|
-
export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as
|
|
3
|
+
import { I as Iteration, D as Driver, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, a as LoopWinner, b as LoopResult } from './types-BZw2bqJc.js';
|
|
4
|
+
export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as LoopPlanDescription, i as LoopPlanPayload, j as LoopSandboxPlacement, k as LoopStartedPayload, l as LoopTokenUsage, m as LoopTraceEmitter, n as LoopTraceEvent, o as ValidationCtx } from './types-BZw2bqJc.js';
|
|
5
5
|
import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
|
|
6
6
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
7
7
|
import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
@@ -316,41 +316,6 @@ interface RunLoopOptions<Task, Output, Decision> {
|
|
|
316
316
|
/** @experimental */
|
|
317
317
|
declare function runLoop<Task, Output, Decision>(options: RunLoopOptions<Task, Output, Decision>): Promise<LoopResult<Task, Output, Decision>>;
|
|
318
318
|
|
|
319
|
-
/**
|
|
320
|
-
* Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
|
|
321
|
-
* dispatch.
|
|
322
|
-
*
|
|
323
|
-
* `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
|
|
324
|
-
* the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
|
|
325
|
-
* `runLoop` must forward the loop's cost AND token usage, or the guard reads
|
|
326
|
-
* the run as a stub and throws. `reportLoopUsage` is that one line:
|
|
327
|
-
*
|
|
328
|
-
* const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
|
|
329
|
-
* const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
|
|
330
|
-
* reportLoopUsage(ctx, result)
|
|
331
|
-
* return result.winner?.output as A
|
|
332
|
-
* }
|
|
333
|
-
*
|
|
334
|
-
* Typed structurally against the campaign `DispatchContext.cost` so this module
|
|
335
|
-
* stays free of an agent-eval import — it works with any cost meter exposing
|
|
336
|
-
* `observe` + `observeTokens`.
|
|
337
|
-
*/
|
|
338
|
-
|
|
339
|
-
/** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
|
|
340
|
-
interface UsageSink {
|
|
341
|
-
observe(amountUsd: number, source: string): void;
|
|
342
|
-
observeTokens(usage: {
|
|
343
|
-
input: number;
|
|
344
|
-
output: number;
|
|
345
|
-
}): void;
|
|
346
|
-
}
|
|
347
|
-
/**
|
|
348
|
-
* Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
|
|
349
|
-
* meter so the backend-integrity guard sees real LLM activity. `source`
|
|
350
|
-
* defaults to `'loop'`.
|
|
351
|
-
*/
|
|
352
|
-
declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
|
|
353
|
-
|
|
354
319
|
/**
|
|
355
320
|
* `loopDispatch` — turn `runLoop` into an agent-eval campaign dispatch.
|
|
356
321
|
*
|
|
@@ -414,4 +379,39 @@ declare function loopCampaignDispatch<Task, Output, Decision, TScenario extends
|
|
|
414
379
|
toLoopOptions: (scenario: TScenario) => LoopOptionsForDispatch<Task, Output, Decision>;
|
|
415
380
|
}): DispatchFn<TScenario, TArtifact>;
|
|
416
381
|
|
|
382
|
+
/**
|
|
383
|
+
* Bridge a finished `runLoop` into an agent-eval campaign / profile-matrix
|
|
384
|
+
* dispatch.
|
|
385
|
+
*
|
|
386
|
+
* `runProfileMatrix` (and `runCampaign`) run the backend-integrity guard over
|
|
387
|
+
* the token usage a dispatch reports through `ctx.cost`. A dispatch that wraps
|
|
388
|
+
* `runLoop` must forward the loop's cost AND token usage, or the guard reads
|
|
389
|
+
* the run as a stub and throws. `reportLoopUsage` is that one line:
|
|
390
|
+
*
|
|
391
|
+
* const dispatch: ProfileDispatchFn<S, A> = async (profile, scenario, ctx) => {
|
|
392
|
+
* const result = await runLoop({ ...optsFor(profile, scenario), ctx: loopCtx })
|
|
393
|
+
* reportLoopUsage(ctx, result)
|
|
394
|
+
* return result.winner?.output as A
|
|
395
|
+
* }
|
|
396
|
+
*
|
|
397
|
+
* Typed structurally against the campaign `DispatchContext.cost` so this module
|
|
398
|
+
* stays free of an agent-eval import — it works with any cost meter exposing
|
|
399
|
+
* `observe` + `observeTokens`.
|
|
400
|
+
*/
|
|
401
|
+
|
|
402
|
+
/** The slice of an agent-eval campaign `DispatchContext.cost` this needs. */
|
|
403
|
+
interface UsageSink {
|
|
404
|
+
observe(amountUsd: number, source: string): void;
|
|
405
|
+
observeTokens(usage: {
|
|
406
|
+
input: number;
|
|
407
|
+
output: number;
|
|
408
|
+
}): void;
|
|
409
|
+
}
|
|
410
|
+
/**
|
|
411
|
+
* Forward a `LoopResult`'s aggregated cost + token usage into a campaign cost
|
|
412
|
+
* meter so the backend-integrity guard sees real LLM activity. `source`
|
|
413
|
+
* defaults to `'loop'`.
|
|
414
|
+
*/
|
|
415
|
+
declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
|
|
416
|
+
|
|
417
417
|
export { AgentRunSpec, type CreateDynamicDriverOptions, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, type DynamicDecision, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, type PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMove, type TopologyMoveEnvelope, type TopologyPlanner, type UsageSink, Validator, createDynamicDriver, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, loopCampaignDispatch, loopDispatch, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations, summarizeHistory };
|
package/dist/loops.js
CHANGED
package/dist/mcp/bin.js
CHANGED
|
@@ -3,12 +3,12 @@ import {
|
|
|
3
3
|
createDefaultCoderDelegate,
|
|
4
4
|
createMcpServer,
|
|
5
5
|
detectExecutor
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-VVHX5RKE.js";
|
|
7
7
|
import "../chunk-HSX6PFZR.js";
|
|
8
8
|
import "../chunk-GLR25NG7.js";
|
|
9
9
|
import {
|
|
10
10
|
runLoop
|
|
11
|
-
} from "../chunk-
|
|
11
|
+
} from "../chunk-7KS6UEHB.js";
|
|
12
12
|
import "../chunk-5QVVET72.js";
|
|
13
13
|
import "../chunk-PY6NMZYX.js";
|
|
14
14
|
import "../chunk-SQSCRJ7U.js";
|
package/dist/mcp/index.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import { L as LoopSandboxClient,
|
|
1
|
+
import { L as LoopSandboxClient, j as LoopSandboxPlacement, m as LoopTraceEmitter } from '../types-BZw2bqJc.js';
|
|
2
2
|
import { SandboxInstance } from '@tangle-network/sandbox';
|
|
3
3
|
import { CoderOutput } from '../profiles.js';
|
|
4
4
|
import { L as LocalHarness, r as runLocalHarness } from '../local-harness-KrdFTY5R.js';
|
|
5
5
|
export { a as LocalHarnessResult, R as RunLocalHarnessOptions } from '../local-harness-KrdFTY5R.js';
|
|
6
|
-
import { O as OtelExporter } from '../otel-export-
|
|
7
|
-
export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-
|
|
6
|
+
import { O as OtelExporter } from '../otel-export-xgf4J6bo.js';
|
|
7
|
+
export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-xgf4J6bo.js';
|
|
8
8
|
import '@tangle-network/agent-eval';
|
|
9
9
|
import '../runtime-run-B8VIiOhI.js';
|
|
10
10
|
import '../types-CsCCryln.js';
|
package/dist/mcp/index.js
CHANGED
|
@@ -9,13 +9,13 @@ import {
|
|
|
9
9
|
createWorktree,
|
|
10
10
|
detectExecutor,
|
|
11
11
|
removeWorktree
|
|
12
|
-
} from "../chunk-
|
|
12
|
+
} from "../chunk-VVHX5RKE.js";
|
|
13
13
|
import {
|
|
14
|
+
buildLoopOtelSpans,
|
|
14
15
|
createOtelExporter,
|
|
15
|
-
loopEventToOtelSpan,
|
|
16
16
|
mcpToolsForRuntimeMcp,
|
|
17
17
|
mcpToolsForRuntimeMcpSubset
|
|
18
|
-
} from "../chunk-
|
|
18
|
+
} from "../chunk-Q4ZDSLBD.js";
|
|
19
19
|
import {
|
|
20
20
|
DELEGATE_CODE_DESCRIPTION,
|
|
21
21
|
DELEGATE_CODE_INPUT_SCHEMA,
|
|
@@ -50,7 +50,7 @@ import {
|
|
|
50
50
|
import {
|
|
51
51
|
runLocalHarness
|
|
52
52
|
} from "../chunk-GLR25NG7.js";
|
|
53
|
-
import "../chunk-
|
|
53
|
+
import "../chunk-7KS6UEHB.js";
|
|
54
54
|
import "../chunk-5QVVET72.js";
|
|
55
55
|
import "../chunk-PY6NMZYX.js";
|
|
56
56
|
import "../chunk-SQSCRJ7U.js";
|
|
@@ -64,11 +64,20 @@ function readTraceContextFromEnv() {
|
|
|
64
64
|
}
|
|
65
65
|
function createPropagatingTraceEmitter(ctx) {
|
|
66
66
|
const exporter = createOtelExporter();
|
|
67
|
+
const buffers = /* @__PURE__ */ new Map();
|
|
67
68
|
const emitter = {
|
|
68
69
|
emit(event) {
|
|
69
70
|
if (!exporter) return;
|
|
70
|
-
const
|
|
71
|
-
|
|
71
|
+
const buf = buffers.get(event.runId);
|
|
72
|
+
if (buf) buf.push(event);
|
|
73
|
+
else buffers.set(event.runId, [event]);
|
|
74
|
+
if (event.kind === "loop.ended") {
|
|
75
|
+
const events = buffers.get(event.runId) ?? [event];
|
|
76
|
+
buffers.delete(event.runId);
|
|
77
|
+
for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) {
|
|
78
|
+
exporter.exportSpan(span);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
72
81
|
}
|
|
73
82
|
};
|
|
74
83
|
return { emitter, exporter, context: ctx };
|
package/dist/mcp/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/mcp/trace-propagation.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Trace context propagation for MCP subprocess.\n *\n * When the MCP server is launched as a child process by a sandbox harness,\n * the parent passes trace context via environment variables:\n *\n * TRACE_ID=<current-run-trace-id>\n * PARENT_SPAN_ID=<span-that-dispatched-the-delegation>\n *\n * The MCP server reads these at startup and uses them as the root of its\n * internal trace tree. All spans emitted by `runLoop` invocations inside\n * the MCP are children of the parent's delegation span.\n *\n * When these env vars are absent, the MCP generates a fresh trace root —\n * the server operates standalone without trace joining.\n */\n\nimport type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types'\nimport type { OtelExporter } from '../otel-export'\nimport {
|
|
1
|
+
{"version":3,"sources":["../../src/mcp/trace-propagation.ts"],"sourcesContent":["/**\n * @experimental\n *\n * Trace context propagation for MCP subprocess.\n *\n * When the MCP server is launched as a child process by a sandbox harness,\n * the parent passes trace context via environment variables:\n *\n * TRACE_ID=<current-run-trace-id>\n * PARENT_SPAN_ID=<span-that-dispatched-the-delegation>\n *\n * The MCP server reads these at startup and uses them as the root of its\n * internal trace tree. All spans emitted by `runLoop` invocations inside\n * the MCP are children of the parent's delegation span.\n *\n * When these env vars are absent, the MCP generates a fresh trace root —\n * the server operates standalone without trace joining.\n */\n\nimport type { LoopTraceEmitter, LoopTraceEvent } from '../loops/types'\nimport type { OtelExporter } from '../otel-export'\nimport { buildLoopOtelSpans, createOtelExporter } from '../otel-export'\n\nexport interface TraceContext {\n /** Trace id inherited from the parent process, or a fresh one. */\n traceId: string\n /** Parent span id from the delegation that launched this MCP server. */\n parentSpanId?: string\n}\n\n/**\n * Read trace context from the process environment.\n * Returns a context with inherited ids or a freshly generated root.\n */\nexport function readTraceContextFromEnv(): TraceContext {\n const traceId = process.env.TRACE_ID || generateTraceId()\n const parentSpanId = process.env.PARENT_SPAN_ID || undefined\n return { traceId, parentSpanId }\n}\n\n/**\n * Create a LoopTraceEmitter that:\n * 1. Parents all spans under the inherited PARENT_SPAN_ID.\n * 2. Exports spans to OTEL when OTEL_EXPORTER_OTLP_ENDPOINT is set.\n *\n * Returns both the emitter and the optional exporter handle for shutdown.\n */\nexport function createPropagatingTraceEmitter(ctx: TraceContext): {\n emitter: LoopTraceEmitter\n exporter: OtelExporter | undefined\n context: TraceContext\n} {\n const exporter = createOtelExporter()\n\n // Buffer events per loop run, then emit the full nested span tree on\n // `loop.ended` so the topology hierarchy (loop → round → branch) reaches the\n // OTLP collector — not a flat list of zero-duration point spans. A run that\n // never reaches `loop.ended` (hard abort) drops its buffer; acceptable for\n // the short-lived MCP subprocess.\n const buffers = new Map<string, LoopTraceEvent[]>()\n\n const emitter: LoopTraceEmitter = {\n emit(event: LoopTraceEvent) {\n if (!exporter) return\n const buf = buffers.get(event.runId)\n if (buf) buf.push(event)\n else buffers.set(event.runId, [event])\n if (event.kind === 'loop.ended') {\n const events = buffers.get(event.runId) ?? [event]\n buffers.delete(event.runId)\n for (const span of buildLoopOtelSpans(events, ctx.traceId, ctx.parentSpanId)) {\n exporter.exportSpan(span)\n }\n }\n },\n }\n\n return { emitter, exporter, context: ctx }\n}\n\n/**\n * Build env vars to pass to a child MCP subprocess so it inherits the\n * current trace context.\n */\nexport function traceContextToEnv(ctx: TraceContext): Record<string, string> {\n const env: Record<string, string> = { TRACE_ID: ctx.traceId }\n if (ctx.parentSpanId) env.PARENT_SPAN_ID = ctx.parentSpanId\n return env\n}\n\nfunction generateTraceId(): string {\n const bytes = new Uint8Array(16)\n if (typeof globalThis.crypto?.getRandomValues === 'function') {\n globalThis.crypto.getRandomValues(bytes)\n } else {\n for (let i = 0; i < 16; i++) bytes[i] = Math.floor(Math.random() * 256)\n }\n return Array.from(bytes)\n .map((b) => b.toString(16).padStart(2, '0'))\n .join('')\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkCO,SAAS,0BAAwC;AACtD,QAAM,UAAU,QAAQ,IAAI,YAAY,gBAAgB;AACxD,QAAM,eAAe,QAAQ,IAAI,kBAAkB;AACnD,SAAO,EAAE,SAAS,aAAa;AACjC;AASO,SAAS,8BAA8B,KAI5C;AACA,QAAM,WAAW,mBAAmB;AAOpC,QAAM,UAAU,oBAAI,IAA8B;AAElD,QAAM,UAA4B;AAAA,IAChC,KAAK,OAAuB;AAC1B,UAAI,CAAC,SAAU;AACf,YAAM,MAAM,QAAQ,IAAI,MAAM,KAAK;AACnC,UAAI,IAAK,KAAI,KAAK,KAAK;AAAA,UAClB,SAAQ,IAAI,MAAM,OAAO,CAAC,KAAK,CAAC;AACrC,UAAI,MAAM,SAAS,cAAc;AAC/B,cAAM,SAAS,QAAQ,IAAI,MAAM,KAAK,KAAK,CAAC,KAAK;AACjD,gBAAQ,OAAO,MAAM,KAAK;AAC1B,mBAAW,QAAQ,mBAAmB,QAAQ,IAAI,SAAS,IAAI,YAAY,GAAG;AAC5E,mBAAS,WAAW,IAAI;AAAA,QAC1B;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,UAAU,SAAS,IAAI;AAC3C;AAMO,SAAS,kBAAkB,KAA2C;AAC3E,QAAM,MAA8B,EAAE,UAAU,IAAI,QAAQ;AAC5D,MAAI,IAAI,aAAc,KAAI,iBAAiB,IAAI;AAC/C,SAAO;AACT;AAEA,SAAS,kBAA0B;AACjC,QAAM,QAAQ,IAAI,WAAW,EAAE;AAC/B,MAAI,OAAO,WAAW,QAAQ,oBAAoB,YAAY;AAC5D,eAAW,OAAO,gBAAgB,KAAK;AAAA,EACzC,OAAO;AACL,aAAS,IAAI,GAAG,IAAI,IAAI,IAAK,OAAM,CAAC,IAAI,KAAK,MAAM,KAAK,OAAO,IAAI,GAAG;AAAA,EACxE;AACA,SAAO,MAAM,KAAK,KAAK,EACpB,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,SAAS,GAAG,GAAG,CAAC,EAC1C,KAAK,EAAE;AACZ;","names":[]}
|
|
@@ -110,6 +110,28 @@ declare function loopEventToOtelSpan(event: {
|
|
|
110
110
|
timestamp: number;
|
|
111
111
|
payload: object;
|
|
112
112
|
}, traceId: string, parentSpanId?: string): OtelSpan;
|
|
113
|
+
/**
|
|
114
|
+
* Build a nested, real-duration OTLP span tree for ONE loop run from its full
|
|
115
|
+
* ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
|
|
116
|
+
* zero-duration span per event), this reconstructs the topology hierarchy a
|
|
117
|
+
* GenAI trace viewer renders natively:
|
|
118
|
+
*
|
|
119
|
+
* loop (invoke_workflow)
|
|
120
|
+
* └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
|
|
121
|
+
* ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
|
|
122
|
+
* └─ …
|
|
123
|
+
*
|
|
124
|
+
* Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
|
|
125
|
+
* a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
|
|
126
|
+
* verdict / placement / cost (not yet standardized). Pure: feed it a buffered
|
|
127
|
+
* per-runId event array (e.g. flushed on `loop.ended`) and export the result.
|
|
128
|
+
*/
|
|
129
|
+
declare function buildLoopOtelSpans(events: ReadonlyArray<{
|
|
130
|
+
kind: string;
|
|
131
|
+
runId: string;
|
|
132
|
+
timestamp: number;
|
|
133
|
+
payload: object;
|
|
134
|
+
}>, traceId: string, rootParentSpanId?: string): OtelSpan[];
|
|
113
135
|
/** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
|
|
114
136
|
declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
|
|
115
137
|
interface EvalRunGeneration {
|
|
@@ -166,4 +188,4 @@ interface EvalRunsExportResult {
|
|
|
166
188
|
*/
|
|
167
189
|
declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
|
|
168
190
|
|
|
169
|
-
export { type EvalRunEvent as E, INTELLIGENCE_WIRE_VERSION as I, type OtelExporter as O, mcpToolsForRuntimeMcpSubset as a, type EvalRunGeneration as b, type EvalRunsExportConfig as c, type EvalRunsExportResult as d, type OtelAttribute as e, type OtelExportConfig as f, type OtelSpan as g,
|
|
191
|
+
export { type EvalRunEvent as E, INTELLIGENCE_WIRE_VERSION as I, type OtelExporter as O, mcpToolsForRuntimeMcpSubset as a, type EvalRunGeneration as b, type EvalRunsExportConfig as c, type EvalRunsExportResult as d, type OtelAttribute as e, type OtelExportConfig as f, type OtelSpan as g, buildLoopOtelSpans as h, createOtelExporter as i, exportEvalRuns as j, loopEventToOtelSpan as l, mcpToolsForRuntimeMcp as m };
|
package/dist/profiles.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { AgentProfile } from '@tangle-network/sandbox';
|
|
2
|
-
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-
|
|
2
|
+
import { O as OutputAdapter, V as Validator, A as AgentRunSpec, D as Driver } from './types-BZw2bqJc.js';
|
|
3
3
|
import '@tangle-network/agent-eval';
|
|
4
4
|
import './runtime-run-B8VIiOhI.js';
|
|
5
5
|
import './types-CsCCryln.js';
|
|
@@ -118,6 +118,23 @@ interface Driver<Task, Output, Decision> {
|
|
|
118
118
|
* is hit, or when the abort signal fires.
|
|
119
119
|
*/
|
|
120
120
|
decide(history: ReadonlyArray<Iteration<Task, Output>>): Decision | Promise<Decision>;
|
|
121
|
+
/**
|
|
122
|
+
* Optional: describe the move `plan()` just produced, for trace emission.
|
|
123
|
+
* The kernel calls this immediately after `plan()` and emits the result in
|
|
124
|
+
* the `loop.plan` event so a topology viewer can render the agent's chosen
|
|
125
|
+
* move + rationale (not just the inferred fan-width). Drivers whose topology
|
|
126
|
+
* is a pure function of count (refine/fanout-vote) omit it — the kernel
|
|
127
|
+
* infers `moveKind` from the planned-task count. Agent-authored drivers
|
|
128
|
+
* (`createDynamicDriver`) return their chosen move's kind + rationale.
|
|
129
|
+
*/
|
|
130
|
+
describePlan?(): LoopPlanDescription | undefined;
|
|
131
|
+
}
|
|
132
|
+
/** @experimental Driver-supplied description of the just-planned move. */
|
|
133
|
+
interface LoopPlanDescription {
|
|
134
|
+
/** Topology move this round — e.g. `'refine' | 'fanout' | 'verify' | 'stop'`. */
|
|
135
|
+
kind: string;
|
|
136
|
+
/** Why the driver chose this move (the agent's rationale), when available. */
|
|
137
|
+
rationale?: string;
|
|
121
138
|
}
|
|
122
139
|
/** @experimental */
|
|
123
140
|
interface LoopWinner<Task, Output> {
|
|
@@ -174,6 +191,11 @@ type LoopTraceEvent = {
|
|
|
174
191
|
runId: string;
|
|
175
192
|
timestamp: number;
|
|
176
193
|
payload: LoopStartedPayload;
|
|
194
|
+
} | {
|
|
195
|
+
kind: 'loop.plan';
|
|
196
|
+
runId: string;
|
|
197
|
+
timestamp: number;
|
|
198
|
+
payload: LoopPlanPayload;
|
|
177
199
|
} | {
|
|
178
200
|
kind: 'loop.iteration.started';
|
|
179
201
|
runId: string;
|
|
@@ -207,6 +229,24 @@ interface LoopStartedPayload {
|
|
|
207
229
|
maxIterations: number;
|
|
208
230
|
maxConcurrency: number;
|
|
209
231
|
}
|
|
232
|
+
/**
|
|
233
|
+
* Emitted once per `plan()` round, immediately after the driver plans. Carries
|
|
234
|
+
* the topology move so a viewer renders WHAT the agent decided + WHY, not just
|
|
235
|
+
* the inferred fan-width. `moveKind` is the driver's `describePlan().kind` when
|
|
236
|
+
* provided, else inferred from `plannedCount` (0→stop, 1→refine, N→fanout).
|
|
237
|
+
*
|
|
238
|
+
* @experimental
|
|
239
|
+
*/
|
|
240
|
+
interface LoopPlanPayload {
|
|
241
|
+
/** 0-based plan round (one per `plan()` call). */
|
|
242
|
+
roundIndex: number;
|
|
243
|
+
/** Tasks the driver issued this round. */
|
|
244
|
+
plannedCount: number;
|
|
245
|
+
/** Topology move — `'refine' | 'fanout' | 'verify' | 'stop'` etc. */
|
|
246
|
+
moveKind: string;
|
|
247
|
+
/** Driver rationale for the move, when available. */
|
|
248
|
+
rationale?: string;
|
|
249
|
+
}
|
|
210
250
|
/** @experimental */
|
|
211
251
|
interface LoopIterationStartedPayload {
|
|
212
252
|
iterationIndex: number;
|
|
@@ -241,6 +281,9 @@ interface LoopIterationEndedPayload {
|
|
|
241
281
|
error?: string;
|
|
242
282
|
costUsd: number;
|
|
243
283
|
durationMs: number;
|
|
284
|
+
/** Summed LLM token usage for this iteration — maps to gen_ai.usage.* on the
|
|
285
|
+
* branch span. Omitted when no `llm_call` events carried token counts. */
|
|
286
|
+
tokenUsage?: LoopTokenUsage;
|
|
244
287
|
}
|
|
245
288
|
/** @experimental */
|
|
246
289
|
interface LoopDecisionPayload {
|
|
@@ -281,4 +324,4 @@ interface ExecCtx {
|
|
|
281
324
|
parentSpanId?: string;
|
|
282
325
|
}
|
|
283
326
|
|
|
284
|
-
export type { AgentRunSpec as A, Driver as D, ExecCtx as E, Iteration as I, LoopSandboxClient as L, OutputAdapter as O, Validator as V, LoopWinner as a, LoopResult as b, LoopDecisionPayload as c, LoopEndedPayload as d, LoopIterationDispatchPayload as e, LoopIterationEndedPayload as f, LoopIterationStartedPayload as g,
|
|
327
|
+
export type { AgentRunSpec as A, Driver as D, ExecCtx as E, Iteration as I, LoopSandboxClient as L, OutputAdapter as O, Validator as V, LoopWinner as a, LoopResult as b, LoopDecisionPayload as c, LoopEndedPayload as d, LoopIterationDispatchPayload as e, LoopIterationEndedPayload as f, LoopIterationStartedPayload as g, LoopPlanDescription as h, LoopPlanPayload as i, LoopSandboxPlacement as j, LoopStartedPayload as k, LoopTokenUsage as l, LoopTraceEmitter as m, LoopTraceEvent as n, ValidationCtx as o };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-runtime",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.34.0",
|
|
4
4
|
"description": "Reusable runtime lifecycle for domain-specific agents.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-runtime#readme",
|
|
6
6
|
"repository": {
|
|
@@ -60,7 +60,8 @@
|
|
|
60
60
|
},
|
|
61
61
|
"files": [
|
|
62
62
|
"dist",
|
|
63
|
-
"README.md"
|
|
63
|
+
"README.md",
|
|
64
|
+
"skills"
|
|
64
65
|
],
|
|
65
66
|
"publishConfig": {
|
|
66
67
|
"access": "public"
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: agent-runtime-adoption
|
|
3
|
+
description: Adopt @tangle-network/agent-runtime in a product — the driven-loop kernel (runLoop), topology drivers (refine / fanout-vote / dynamic agent-authored), the loopDispatch campaign bridge, MCP delegation, and identity-gated prompt-surface optimization (optimizePrompt). Self-contained; needs only the published package + @tangle-network/agent-eval. Use when wiring runLoop, choosing a topology driver, optimizing a system/planner prompt, or exposing delegation tools.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# agent-runtime adoption — driven loops, topology drivers, prompt optimization
|
|
7
|
+
|
|
8
|
+
`@tangle-network/agent-runtime` is the task-lifecycle skeleton: it owns the loop
|
|
9
|
+
kernel and orchestration seams; it delegates domain behavior (models, tools,
|
|
10
|
+
scoring) to adapters you supply. It depends on `@tangle-network/agent-eval`
|
|
11
|
+
(substrate); never the reverse. This skill is self-contained — you need only the
|
|
12
|
+
two published packages.
|
|
13
|
+
|
|
14
|
+
## Principle
|
|
15
|
+
|
|
16
|
+
The kernel owns iteration accounting, concurrency, abort, cost/token aggregation,
|
|
17
|
+
and trace emission. It does NOT own *what the agent runs* (sandbox SDK + profile),
|
|
18
|
+
*how output is decoded* (output adapter), *how it's scored* (validator), or
|
|
19
|
+
*topology* (driver). Keep those four as injected seams — do not fork the kernel.
|
|
20
|
+
|
|
21
|
+
**Fail loud.** External-boundary calls return typed outcomes; a `null` sandbox
|
|
22
|
+
client, a `null` output adapter return, or a malformed planner move must throw,
|
|
23
|
+
never silently produce a `{0,0}` cell the integrity guard reads as a stub.
|
|
24
|
+
|
|
25
|
+
## The Driver seam — `runLoop` + topology
|
|
26
|
+
|
|
27
|
+
`runLoop({ driver, agentRun | agentRuns, output, validator?, task, ctx })` runs
|
|
28
|
+
each iteration: `driver.plan(task, history) → Task[]` → per task spawn a sandbox
|
|
29
|
+
on an `AgentRunSpec.profile` + `streamPrompt` → `output.parse(events)` →
|
|
30
|
+
`validator?.validate(...)` → `driver.decide(history)`. Terminal decisions:
|
|
31
|
+
`'stop' | 'pick-winner' | 'fail' | 'done'`. Returns
|
|
32
|
+
`LoopResult { decision, iterations, winner, costUsd, tokenUsage }`.
|
|
33
|
+
|
|
34
|
+
A `Driver<Task, Output, Decision>` is just `plan(task, history) → Task[]`
|
|
35
|
+
(`[task]`→refine, N copies→fanout, `[]`→stop) + `decide(history) → Decision`.
|
|
36
|
+
Topology is data; the kernel is topology-agnostic.
|
|
37
|
+
|
|
38
|
+
### Three shipped drivers — `@tangle-network/agent-runtime/loops`
|
|
39
|
+
|
|
40
|
+
- **`createRefineDriver({ maxIterations?, refineTask? })`** — one task/iteration,
|
|
41
|
+
validator-gated; replay or rewrite the task until valid or capped. Use for
|
|
42
|
+
incremental patches, document revision, anything monotonic.
|
|
43
|
+
- **`createFanoutVoteDriver({ n, selector? })`** — N parallel attempts in
|
|
44
|
+
iteration 0, score once, pick the winner (default: highest valid score). Use
|
|
45
|
+
for multi-harness coder fanout, redundant research with disagreement detection.
|
|
46
|
+
- **`createDynamicDriver({ planner, maxIterations?, maxFanout? })`** — **the
|
|
47
|
+
agent authors the topology.** `plan`/`decide` are backed by an injected
|
|
48
|
+
`TopologyPlanner` that emits one `TopologyMove` per round
|
|
49
|
+
(`{kind:'refine',task}` | `{kind:'fanout',tasks}` | `{kind:'stop'}`). The
|
|
50
|
+
planner is invoked once per round in `plan()`; `decide()` reads the cached move
|
|
51
|
+
so an LLM planner is never double-called. Use when the right shape is
|
|
52
|
+
task-dependent (scout-then-fanout, refine-then-branch, decompose).
|
|
53
|
+
|
|
54
|
+
Topology is **orthogonal to harness**: a driver returns `Task[]`; the kernel
|
|
55
|
+
round-robins `agentRuns[]` to decide which harness (claude-code / codex /
|
|
56
|
+
opencode / pi) runs each branch. One driver spans all backends, including
|
|
57
|
+
fanning a single round across several.
|
|
58
|
+
|
|
59
|
+
### Wiring an LLM planner — `createSandboxPlanner`
|
|
60
|
+
|
|
61
|
+
```ts
|
|
62
|
+
import { createDynamicDriver, createSandboxPlanner, runLoop } from '@tangle-network/agent-runtime/loops'
|
|
63
|
+
|
|
64
|
+
const planner = createSandboxPlanner<Task, Out>({
|
|
65
|
+
client, profile: plannerProfile, // any harness; cheap model is fine
|
|
66
|
+
decodeTask: (raw) => raw as Task, // envelope task → domain Task
|
|
67
|
+
// buildPrompt? — defaults to a history-summary prompt; override to customize
|
|
68
|
+
})
|
|
69
|
+
const result = await runLoop({
|
|
70
|
+
driver: createDynamicDriver({ planner, maxIterations: 8 }),
|
|
71
|
+
agentRuns: workerSpecs, output, validator, task, ctx: { sandboxClient: client },
|
|
72
|
+
})
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The planner emits a JSON envelope (`{ kind, tasks?, n?, rationale }`); a missing,
|
|
76
|
+
unparseable, or unknown-kind envelope throws `PlannerError` — the loop never runs
|
|
77
|
+
a topology nobody chose.
|
|
78
|
+
|
|
79
|
+
### Driver gotchas
|
|
80
|
+
|
|
81
|
+
- `runLoop` validates `ctx.sandboxClient.create` exists or throws
|
|
82
|
+
`ValidationError`. Never stub a `null` client.
|
|
83
|
+
- The kernel emits `loop.started / iteration.dispatch / iteration.ended /
|
|
84
|
+
decision / ended` via `ctx.traceEmitter`. Wire it to the same OTLP sink as the
|
|
85
|
+
chat path so loop telemetry is queryable.
|
|
86
|
+
- The output adapter MUST return a typed value or throw. A `null`/`undefined`
|
|
87
|
+
return silently drops the iteration from scoring.
|
|
88
|
+
- Dynamic driver: set the kernel's `runLoop` `maxIterations >=` the driver's so
|
|
89
|
+
the driver's cap governs and the loop closes on a clean `'done'`.
|
|
90
|
+
|
|
91
|
+
## Campaign bridge — `loopDispatch` / `loopCampaignDispatch`
|
|
92
|
+
|
|
93
|
+
To run `runLoop` as an agent-eval campaign cell, do NOT hand-build the ExecCtx +
|
|
94
|
+
forward trace + report usage every time (the third is silent — forgetting it
|
|
95
|
+
yields a `{0,0}` cell `assertRealBackend` reads as a stub). Use the adapter:
|
|
96
|
+
|
|
97
|
+
```ts
|
|
98
|
+
import { loopCampaignDispatch } from '@tangle-network/agent-runtime/loops'
|
|
99
|
+
const dispatch = loopCampaignDispatch({
|
|
100
|
+
sandboxClient,
|
|
101
|
+
toLoopOptions: (scenario) => ({ driver, agentRun, output, validator, task: toTask(scenario) }),
|
|
102
|
+
// toArtifact? — defaults to result.winner?.output
|
|
103
|
+
})
|
|
104
|
+
// pass `dispatch` to runCampaign / runEvalCampaign; usage + trace are auto-forwarded
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
`loopDispatch` is the `runProfileMatrix` variant (profile is an axis).
|
|
108
|
+
|
|
109
|
+
## Identity-gated prompt optimization — `optimizePrompt`
|
|
110
|
+
|
|
111
|
+
`@tangle-network/agent-runtime/improvement`. The text-surface entry point onto
|
|
112
|
+
agent-eval's `runImprovementLoop` — sibling to `improvementDriver` (the
|
|
113
|
+
code/worktree path). Optimizes any prompt surface (system / planner / judge
|
|
114
|
+
rubric) and is **identity-gated by construction**: it runs evals, proposes
|
|
115
|
+
candidates (default driver `gepaDriver`), and the held-out gate compares
|
|
116
|
+
candidate vs baseline. `result.prompt` is the **baseline unless the gate decided
|
|
117
|
+
`'ship'`** — so registering a prompt for optimization can never regress it; it
|
|
118
|
+
only improves when held-out data earns it.
|
|
119
|
+
|
|
120
|
+
```ts
|
|
121
|
+
import { optimizePrompt } from '@tangle-network/agent-runtime/improvement'
|
|
122
|
+
const { prompt, improved, decision, delta } = await optimizePrompt({
|
|
123
|
+
baselinePrompt: CURRENT_SYSTEM_PROMPT,
|
|
124
|
+
runWithPrompt: (prompt, scenario, ctx) => runYourThing(prompt, scenario), // sandbox / runLoop / direct call
|
|
125
|
+
scenarios, holdoutScenarios, judges, runDir,
|
|
126
|
+
reflection: { llm, model: REFLECTION_MODEL }, // builds the default gepaDriver
|
|
127
|
+
// gate? — defaults to heldOutGate; pass defaultProductionGate for red-team hardening
|
|
128
|
+
})
|
|
129
|
+
// use `prompt` unconditionally: it's the baseline until a candidate genuinely wins
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### optimizePrompt gotchas — read before wiring
|
|
133
|
+
|
|
134
|
+
- **`gepaDriver` mutates TEXT only**, and its only structural guard is `##` H2
|
|
135
|
+
headings (`preserveSections`) + `maxSentenceEdits`. Make load-bearing sections
|
|
136
|
+
of your prompt real `##` headings, and treat the output schema as fixed code —
|
|
137
|
+
GEPA optimizes the prose, never the envelope/contract.
|
|
138
|
+
- **Scenarios must be domain-real.** Derive them from the surface's own traces /
|
|
139
|
+
ground truth, not from unrelated corpora. Cross-domain examples are noise.
|
|
140
|
+
- **Extend, don't fork.** If the product already wires `runImprovementLoop`
|
|
141
|
+
(e.g. for a main-agent prompt), add the new surface as another target in that
|
|
142
|
+
harness rather than bolting on a second optimizer.
|
|
143
|
+
- `runWithPrompt` is the only domain seam — the optimizer never assumes how a
|
|
144
|
+
prompt runs. Report cost via `ctx.cost` inside it so the integrity guard sees
|
|
145
|
+
real activity.
|
|
146
|
+
- A live run needs a real backend (`TANGLE_API_KEY` / router, or local
|
|
147
|
+
cli-bridge) and real spend; it is not free.
|
|
148
|
+
|
|
149
|
+
## MCP delegation — `@tangle-network/agent-runtime/mcp`
|
|
150
|
+
|
|
151
|
+
`agent-runtime-mcp` (stdio) exposes delegation tools (`delegate_code`,
|
|
152
|
+
`delegate_research`, …) that drive `runLoop` behind the scenes (refine or
|
|
153
|
+
fanout-vote per `variants`). Env: `TANGLE_API_KEY`, `SANDBOX_BASE_URL`,
|
|
154
|
+
`TANGLE_FLEET_ID` (sibling vs fleet placement), `MCP_CODER_FANOUT_HARNESSES`.
|
|
155
|
+
Mount it on a production `AgentProfile.mcp`; do not re-implement delegation.
|
|
156
|
+
|
|
157
|
+
## Acceptance checklist
|
|
158
|
+
|
|
159
|
+
- [ ] Topology is a `Driver`, not hard-coded control flow. Reuse refine /
|
|
160
|
+
fanout-vote / dynamic; build a custom `Driver` against
|
|
161
|
+
`loops/types.ts:Driver` only when none fit — never fork the kernel.
|
|
162
|
+
- [ ] `runLoop` is bridged to campaigns via `loopDispatch` / `loopCampaignDispatch`
|
|
163
|
+
(usage + trace auto-forwarded), not a hand-rolled ExecCtx.
|
|
164
|
+
- [ ] Every optimizable prompt is registered through `optimizePrompt` (or the
|
|
165
|
+
product's existing `runImprovementLoop`), identity-gated on a held-out set.
|
|
166
|
+
- [ ] Boundaries fail loud: no `null` sandbox client, no silent adapter return,
|
|
167
|
+
no unguarded planner envelope.
|
|
168
|
+
|
|
169
|
+
For the full self-improving pipeline (trace sink → analyst loop → scorecard →
|
|
170
|
+
production loop → CI), see the broader `agent-eval-adoption` skill.
|