@tangle-network/agent-runtime 0.46.0 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/agent.d.ts +1 -1
  2. package/dist/agent.js +1 -1
  3. package/dist/analyst-loop.d.ts +1 -1
  4. package/dist/{chunk-GN75RGM6.js → chunk-656G2XCL.js} +3 -3
  5. package/dist/{chunk-65FQLI4V.js → chunk-IW2LMLK6.js} +1714 -42
  6. package/dist/chunk-IW2LMLK6.js.map +1 -0
  7. package/dist/{chunk-I42NHLKX.js → chunk-LX66I3SC.js} +11 -6
  8. package/dist/chunk-LX66I3SC.js.map +1 -0
  9. package/dist/{chunk-KPN7OQ64.js → chunk-TJS7S3HJ.js} +2 -2
  10. package/dist/{chunk-KPN7OQ64.js.map → chunk-TJS7S3HJ.js.map} +1 -1
  11. package/dist/{coder-DCWFQpmJ.d.ts → coder-CVZNGbyg.d.ts} +1 -1
  12. package/dist/{driver-C-mtBo7h.d.ts → driver-DYU2sgHr.d.ts} +1 -1
  13. package/dist/index.d.ts +7 -7
  14. package/dist/index.js +3 -3
  15. package/dist/{kb-gate-2Gwpz_27.d.ts → kb-gate-51BlLlVM.d.ts} +8 -2
  16. package/dist/{loop-runner-bin-D-K6bRp3.d.ts → loop-runner-bin-DEm4roYF.d.ts} +4 -4
  17. package/dist/loop-runner-bin.d.ts +5 -5
  18. package/dist/loop-runner-bin.js +3 -3
  19. package/dist/loops.d.ts +5 -5
  20. package/dist/loops.js +55 -1
  21. package/dist/mcp/bin.js +3 -3
  22. package/dist/mcp/index.d.ts +71 -70
  23. package/dist/mcp/index.js +199 -27
  24. package/dist/mcp/index.js.map +1 -1
  25. package/dist/{otel-export-nurzFwuJ.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
  26. package/dist/profiles.d.ts +2 -2
  27. package/dist/{run-loop-CU2Y00Si.d.ts → run-loop-DvD4aGiE.d.ts} +1 -1
  28. package/dist/runtime.d.ts +915 -71
  29. package/dist/runtime.js +55 -1
  30. package/dist/{types-BfoeiQRZ.d.ts → types-BpDfCPUp.d.ts} +5 -5
  31. package/dist/{types-DnYoHvvZ.d.ts → types-nBMuollC.d.ts} +17 -0
  32. package/dist/workflow.d.ts +2 -2
  33. package/dist/workflow.js +1 -1
  34. package/package.json +25 -14
  35. package/skills/loop-writer/SKILL.md +163 -0
  36. package/dist/chunk-65FQLI4V.js.map +0 -1
  37. package/dist/chunk-I42NHLKX.js.map +0 -1
  38. /package/dist/{chunk-GN75RGM6.js.map → chunk-656G2XCL.js.map} +0 -0
package/dist/runtime.js CHANGED
@@ -6,7 +6,12 @@ import {
6
6
  InMemoryResultBlobStore,
7
7
  InMemorySpawnJournal,
8
8
  acquireSandbox,
9
+ adaptiveRefine,
10
+ assertStrategyContract,
9
11
  assertTraceDerivedFindings,
12
+ auditIntent,
13
+ authorStrategy,
14
+ breadthDriver,
10
15
  buildSteerContext,
11
16
  builtinShapes,
12
17
  completionAuthorizes,
@@ -15,6 +20,7 @@ import {
15
20
  createDriver,
16
21
  createExecutor,
17
22
  createExecutorRegistry,
23
+ createMcpEnvironment,
18
24
  createRootHandle,
19
25
  createSandboxForSpec,
20
26
  createSandboxLineage,
@@ -22,35 +28,56 @@ import {
22
28
  createScopeAnalyst,
23
29
  createShapeRegistry,
24
30
  createSupervisor,
31
+ createVerifierEnvironment,
32
+ defaultAnalystInstruction,
33
+ defaultAuditorInstruction,
25
34
  defaultSelectWinner,
26
35
  definePersona,
36
+ defineStrategy,
37
+ depthDriver,
27
38
  deterministicCompletion,
28
39
  equalKOnCost,
29
40
  fanout,
30
41
  flatWidenGate,
42
+ gitWorkspace,
43
+ harvestCorpus,
31
44
  inlineSandboxClient,
45
+ jjWorkspace,
46
+ localShell,
32
47
  loopDispatch,
33
48
  loopUntil,
34
49
  materializeTreeView,
50
+ observe,
35
51
  openSandboxRun,
36
52
  panel,
37
53
  pipeline,
54
+ printBenchmarkReport,
38
55
  probeSandboxCapabilities,
56
+ promotionGate,
57
+ refine,
39
58
  registerShape,
40
59
  renderAnalyses,
41
60
  renderCorpusToInstructions,
61
+ renderReport,
42
62
  replaySpawnTree,
43
63
  reportLoopUsage,
64
+ runAgentic,
65
+ runBenchmark,
44
66
  runLoop,
45
67
  runPersonified,
68
+ runStrategyEvolution,
69
+ sample,
70
+ sampleThenRefine,
71
+ selectChampion,
46
72
  sentinelCompletion,
47
73
  settledToIteration,
48
74
  spendFromUsageEvents,
49
75
  stopSentinel,
76
+ strategyAuthorContract,
50
77
  trajectoryReport,
51
78
  verify,
52
79
  widen
53
- } from "./chunk-65FQLI4V.js";
80
+ } from "./chunk-IW2LMLK6.js";
54
81
  import {
55
82
  extractLlmCallEvent,
56
83
  mapSandboxEvent
@@ -64,7 +91,12 @@ export {
64
91
  InMemoryResultBlobStore,
65
92
  InMemorySpawnJournal,
66
93
  acquireSandbox,
94
+ adaptiveRefine,
95
+ assertStrategyContract,
67
96
  assertTraceDerivedFindings,
97
+ auditIntent,
98
+ authorStrategy,
99
+ breadthDriver,
68
100
  buildSteerContext,
69
101
  builtinShapes,
70
102
  completionAuthorizes,
@@ -73,6 +105,7 @@ export {
73
105
  createDriver,
74
106
  createExecutor,
75
107
  createExecutorRegistry,
108
+ createMcpEnvironment,
76
109
  createRootHandle,
77
110
  createSandboxForSpec,
78
111
  createSandboxLineage,
@@ -80,33 +113,54 @@ export {
80
113
  createScopeAnalyst,
81
114
  createShapeRegistry,
82
115
  createSupervisor,
116
+ createVerifierEnvironment,
117
+ defaultAnalystInstruction,
118
+ defaultAuditorInstruction,
83
119
  defaultSelectWinner,
84
120
  definePersona,
121
+ defineStrategy,
122
+ depthDriver,
85
123
  deterministicCompletion,
86
124
  equalKOnCost,
87
125
  extractLlmCallEvent,
88
126
  fanout,
89
127
  flatWidenGate,
128
+ gitWorkspace,
129
+ harvestCorpus,
90
130
  inlineSandboxClient,
131
+ jjWorkspace,
132
+ localShell,
91
133
  loopDispatch,
92
134
  loopUntil,
93
135
  mapSandboxEvent,
94
136
  materializeTreeView,
137
+ observe,
95
138
  openSandboxRun,
96
139
  panel,
97
140
  pipeline,
141
+ printBenchmarkReport,
98
142
  probeSandboxCapabilities,
143
+ promotionGate,
144
+ refine,
99
145
  registerShape,
100
146
  renderAnalyses,
101
147
  renderCorpusToInstructions,
148
+ renderReport,
102
149
  replaySpawnTree,
103
150
  reportLoopUsage,
151
+ runAgentic,
152
+ runBenchmark,
104
153
  runLoop,
105
154
  runPersonified,
155
+ runStrategyEvolution,
156
+ sample,
157
+ sampleThenRefine,
158
+ selectChampion,
106
159
  sentinelCompletion,
107
160
  settledToIteration,
108
161
  spendFromUsageEvents,
109
162
  stopSentinel,
163
+ strategyAuthorContract,
110
164
  trajectoryReport,
111
165
  verify,
112
166
  widen
@@ -1,7 +1,7 @@
1
1
  import { DefaultVerdict } from '@tangle-network/agent-eval';
2
2
  import { AgentProfile, BackendType } from '@tangle-network/sandbox';
3
3
  import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
4
- import { c as LoopTokenUsage } from './types-DnYoHvvZ.js';
4
+ import { c as LoopTokenUsage } from './types-nBMuollC.js';
5
5
 
6
6
  /**
7
7
  * @experimental
@@ -245,9 +245,9 @@ type Settled<Out> = {
245
245
  };
246
246
  /**
247
247
  * The budget-conserving reactive scope an `Agent.act` runs inside. `spawn` reserves
248
- * budget atomically from the shared pool and FAILS CLOSED when the pool can't cover it;
249
- * `next()` is a ray.wait cursor (n=1) over THIS scope's IN-MEMORY live set; `view` reads
250
- * the in-memory nursery (NOT the log), O(live).
248
+ * budget atomically from the shared pool and fails closed when the pool cannot cover it.
249
+ * `next()` waits for one settlement from this scope's live set; `view` reads live state,
250
+ * not the replay log.
251
251
  */
252
252
  interface Scope<Out> {
253
253
  /**
@@ -435,4 +435,4 @@ interface WidenGate<Out> {
435
435
  readonly judgeExempt?: boolean;
436
436
  }
437
437
 
438
- export type { Agent as A, Budget as B, ExecutorFactory as E, Handle as H, NodeId as N, ResultBlobStore as R, Scope as S, TreeView as T, UsageEvent as U, WidenGate as W, SpawnJournal as a, SpawnEvent as b, Settled as c, AgentSpec as d, ExecutorRegistry as e, RootHandle as f, SupervisedResult as g, Spend as h, Supervisor as i, Executor as j, ExecutorContext as k, ExecutorResult as l, NodeSnapshot as m, NodeStatus as n, Restart as o, RootSignal as p, Runtime as q, SpawnOpts as r, SupervisorOpts as s };
438
+ export type { Agent as A, Budget as B, ExecutorRegistry as E, Handle as H, NodeId as N, ResultBlobStore as R, Scope as S, TreeView as T, UsageEvent as U, WidenGate as W, SpawnJournal as a, SpawnEvent as b, Settled as c, AgentSpec as d, RootHandle as e, SupervisedResult as f, Spend as g, ExecutorFactory as h, Supervisor as i, Executor as j, ExecutorContext as k, ExecutorResult as l, NodeSnapshot as m, NodeStatus as n, Restart as o, RootSignal as p, Runtime as q, SpawnOpts as r, SupervisorOpts as s };
@@ -639,6 +639,12 @@ declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
639
639
  interface ValidationCtx {
640
640
  /** Iteration index this output came from (0-based). */
641
641
  iteration: number;
642
+ /**
643
+ * Live sandbox for this iteration. Validators that need execution-grounded
644
+ * evidence can inspect files or run commands here instead of forcing callers
645
+ * to bypass the loop kernel with raw Sandbox SDK orchestration.
646
+ */
647
+ box?: SandboxInstance;
642
648
  /** Cooperative cancellation channel. */
643
649
  signal: AbortSignal;
644
650
  /**
@@ -668,6 +674,17 @@ interface AgentRunSpec<Task> {
668
674
  profile: AgentProfile;
669
675
  /** Task → prompt formatter. Pure and deterministic. */
670
676
  taskToPrompt: (task: Task) => string;
677
+ /**
678
+ * Optional pre-prompt sandbox provisioner. Runs after the sandbox is acquired
679
+ * and before the first prompt is streamed into that box. Use this for
680
+ * domain-agnostic setup such as repo snapshots, benchmark fixtures, policy
681
+ * files, or seed datasets. The hook is part of the runtime surface so loop
682
+ * consumers do not hand-roll Sandbox SDK orchestration just to prepare a
683
+ * workspace before the agent sees it.
684
+ */
685
+ prepareBox?: (box: SandboxInstance, ctx: {
686
+ signal: AbortSignal;
687
+ }) => Promise<void> | void;
671
688
  /**
672
689
  * Per-spec stable name. Surfaced in trace events and the default winner
673
690
  * selector tiebreak. Falls back to `profile.name ?? 'agent'`.
@@ -1,6 +1,6 @@
1
1
  import { AgentProfile, CreateSandboxOptions, PromptOptions, TaskOptions, SandboxEvent } from '@tangle-network/sandbox';
2
- import { S as SandboxClient, O as OutputAdapter, f as LoopSandboxPlacement, b as LoopResult } from './types-DnYoHvvZ.js';
3
- import { R as RunLoopOptions } from './run-loop-CU2Y00Si.js';
2
+ import { S as SandboxClient, O as OutputAdapter, f as LoopSandboxPlacement, b as LoopResult } from './types-nBMuollC.js';
3
+ import { R as RunLoopOptions } from './run-loop-DvD4aGiE.js';
4
4
  import '@tangle-network/agent-eval';
5
5
  import './runtime-hooks-C7JwKb9E.js';
6
6
 
package/dist/workflow.js CHANGED
@@ -2,7 +2,7 @@ import {
2
2
  createSandboxForSpec,
3
3
  describeSandboxPlacement,
4
4
  runLoop
5
- } from "./chunk-65FQLI4V.js";
5
+ } from "./chunk-IW2LMLK6.js";
6
6
  import {
7
7
  ValidationError,
8
8
  extractLlmCallEvent
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tangle-network/agent-runtime",
3
- "version": "0.46.0",
3
+ "version": "0.48.0",
4
4
  "description": "Shared task-lifecycle skeleton for agents: a recursive loop kernel for chat turns, one-shot tasks, and multi-attempt loops, with trace capture and eval-gated self-improvement. Domain behavior lives in adapters; scoring and ship-gates in @tangle-network/agent-eval.",
5
5
  "homepage": "https://github.com/tangle-network/agent-runtime#readme",
6
6
  "repository": {
@@ -87,9 +87,21 @@
87
87
  "publishConfig": {
88
88
  "access": "public"
89
89
  },
90
+ "scripts": {
91
+ "build": "tsup",
92
+ "dev": "tsup --watch",
93
+ "prepare": "tsup",
94
+ "test": "vitest run",
95
+ "test:watch": "vitest",
96
+ "lint": "biome check src tests examples",
97
+ "lint:fix": "biome check --write src tests examples",
98
+ "typecheck": "tsc --noEmit",
99
+ "typecheck:examples": "tsc --noEmit -p tsconfig.examples.json",
100
+ "verify:package": "node scripts/verify-package-exports.mjs"
101
+ },
90
102
  "devDependencies": {
91
103
  "@biomejs/biome": "^2.4.0",
92
- "@tangle-network/agent-eval": "^0.83.0",
104
+ "@tangle-network/agent-eval": "^0.89.0",
93
105
  "@tangle-network/sandbox": "^0.4.0",
94
106
  "@types/node": "^25.6.0",
95
107
  "playwright": "^1.40.0",
@@ -97,10 +109,20 @@
97
109
  "typescript": "^5.7.0",
98
110
  "vitest": "^3.0.0"
99
111
  },
112
+ "pnpm": {
113
+ "minimumReleaseAge": 4320,
114
+ "minimumReleaseAgeExclude": [
115
+ "@tangle-network/agent-eval"
116
+ ],
117
+ "onlyBuiltDependencies": [
118
+ "esbuild"
119
+ ]
120
+ },
100
121
  "engines": {
101
122
  "node": ">=20"
102
123
  },
103
124
  "license": "MIT",
125
+ "packageManager": "pnpm@10.28.0",
104
126
  "peerDependencies": {
105
127
  "@tangle-network/agent-eval": ">=0.83.0 <1.0.0",
106
128
  "@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
@@ -117,16 +139,5 @@
117
139
  "playwright": {
118
140
  "optional": true
119
141
  }
120
- },
121
- "scripts": {
122
- "build": "tsup",
123
- "dev": "tsup --watch",
124
- "test": "vitest run",
125
- "test:watch": "vitest",
126
- "lint": "biome check src tests examples",
127
- "lint:fix": "biome check --write src tests examples",
128
- "typecheck": "tsc --noEmit",
129
- "typecheck:examples": "tsc --noEmit -p tsconfig.examples.json",
130
- "verify:package": "node scripts/verify-package-exports.mjs"
131
142
  }
132
- }
143
+ }
@@ -0,0 +1,163 @@
1
+ ---
2
+ name: loop-writer
3
+ description: Author clean recursive agent loops on @tangle-network/agent-runtime. Use for Scope/supervisor orchestration, runLoop, Pi/sandbox drivers, fanout, trace analysts, verifiers/judges, question escalation, live messages, and self-improving loop recipes.
4
+ ---
5
+
6
+ # loop-writer
7
+
8
+ Design the smallest loop that can honestly solve the objective. The blessed
9
+ surface is the substrate: `fanout`/`pipeline` for fixed shapes, `runLoop` for
10
+ round-synchronous sandbox loops, and `Scope`/Supervisor for recursive
11
+ driver/worker trees. Do not create a second loop grammar.
12
+
13
+ ## Mental Model
14
+
15
+ ```txt
16
+ user -> Pi/root driver -> supervisor -> sandbox driver -> worker -> leaf harness
17
+ ```
18
+
19
+ Each level may spawn below, wait below, analyze below, steer below, and escalate
20
+ questions upward. The substrate owns budget, trace, abort, journal, and replay.
21
+ The driver owns strategy.
22
+
23
+ ## Pick The Primitive
24
+
25
+ | Objective | Use |
26
+ |---|---|
27
+ | Try N attempts, pick best | `fanout` or `createFanoutVoteDriver` |
28
+ | Ordered stages | `pipeline` |
29
+ | Improve until executable check passes | `loopUntil` + verifier |
30
+ | Review from several lenses | `panel` |
31
+ | Simulated user/product eval | `defineConversation` + `runConversation` |
32
+ | Dynamic topology / drivers of drivers | `Scope` or sandbox driver + `createCoordinationTools` |
33
+ | Mutate a shared repo | git branch/clone loop with typed merge outcomes |
34
+
35
+ If a fixed combinator solves it, do not use a dynamic driver.
36
+
37
+ ## Minimal Sandbox Loop
38
+
39
+ ```ts
40
+ const trace: unknown[] = []
41
+ const result = await runLoop({
42
+ driver: createDriver({ planner, maxIterations: 4 }),
43
+ agentRun: agentRunSpec,
44
+ output,
45
+ validator: executableGate,
46
+ task,
47
+ ctx: {
48
+ sandboxClient,
49
+ traceEmitter: { emit: async (event) => trace.push(event) },
50
+ },
51
+ })
52
+
53
+ const observation = await observe(
54
+ {
55
+ task: String(task),
56
+ output: JSON.stringify(result.winner?.output ?? result.decision),
57
+ trace,
58
+ outcome: result.winner ? 'passed' : 'failed',
59
+ runId,
60
+ },
61
+ { chat, model, corpus },
62
+ )
63
+ ```
64
+
65
+ ## Minimal Recursive Driver
66
+
67
+ ```ts
68
+ const driver: Agent<Task, Output> = {
69
+ name: 'secure-build-driver',
70
+ async act(task, scope) {
71
+ const spawned = scope.spawn(workerAgent, task, { budget: perWorker, label: 'worker-a' })
72
+ if (!spawned.ok) throw new Error(spawned.reason)
73
+
74
+ const settled = await scope.next()
75
+ const observation = await observe(
76
+ {
77
+ task: String(task),
78
+ output: JSON.stringify(settled),
79
+ trace: [settled, scope.view],
80
+ outcome: settled?.kind === 'done' ? 'passed' : 'failed',
81
+ runId,
82
+ },
83
+ { chat, model, corpus },
84
+ )
85
+
86
+ const steer = observation.findings[0]?.recommended_action
87
+ if (!steer) return synthesize(settled, observation)
88
+
89
+ const correction = scope.spawn(workerAgent, { task, prior: settled }, {
90
+ budget: perWorker,
91
+ label: 'worker-corrected',
92
+ })
93
+ if (!correction.ok) throw new Error(correction.reason)
94
+ if (!scope.send(correction.handle.id, { steer })) throw new Error('steer delivery failed')
95
+
96
+ const fixed = await scope.next()
97
+ return synthesize(fixed, observation)
98
+ },
99
+ }
100
+
101
+ const result = await createSupervisor<Task, Output>().run(driver, task, supervisorOpts)
102
+ ```
103
+
104
+ When the driver lives in a sandbox, expose the same verbs through
105
+ `createCoordinationTools`: `spawn_worker`, `await_next`, `observe_worker`,
106
+ `steer_worker`, `list_questions`, `answer_question`, `ask_parent`, `stop`, and
107
+ optional analyst tools.
108
+
109
+ ## Role Boundaries
110
+
111
+ - **Verifier**: executable shippability gate; controls accept/reject.
112
+ - **Judge**: held-out score only; never steers the current run.
113
+ - **Analyst**: trace-derived diagnosis over worker, pairwise, subtree, or full
114
+ loop traces; may emit findings, questions, messages, or blockers.
115
+ - **Driver/reviewer**: consumes evidence and chooses continue, steer, spawn,
116
+ answer, escalate, or stop.
117
+
118
+ ## Questions And Steering
119
+
120
+ Questions are blockers, not prose hidden in output. A child asks its parent; the
121
+ parent answers when it has evidence, defers when safe, or escalates to Pi/user
122
+ when answering would invent requirements. `failClosed` loops must not stop clean
123
+ with unresolved `blocks-run` questions.
124
+
125
+ Steer sparingly: only when an analyst finds a concrete mistake, a loop is
126
+ duplicating work, a parent/Pi answers a blocker, or a verifier reveals a specific
127
+ fix a running worker can still use. Delivery is through `Scope.send` or
128
+ `steer_worker`; failed delivery means spawn a fresh corrected attempt.
129
+
130
+ ## Workspace Loops
131
+
132
+ Git is the durable workspace seam:
133
+
134
+ - one branch/clone per worker
135
+ - `gitWorkspace({ ref })` when host and sandbox need the same clone/commit/push contract
136
+ - explicit commit per worker
137
+ - typed merge result: `merged | conflict | stale-base | rejected | verifier-failed`
138
+ - resume derives completion from git state, not only a side journal
139
+ - conflicts become blockers/questions, not silent overwrite
140
+
141
+ Proof command for the local substrate join:
142
+
143
+ ```bash
144
+ pnpm exec tsx bench/src/observe-steer-workspace-loop.mts
145
+ ```
146
+
147
+ It proves `Scope.spawn -> coordination tools -> gitWorkspace -> observe ->
148
+ Scope.send -> corrective worker -> integration pass`. Until the same proof runs
149
+ with `openSandboxRun` and a remote branch, claim local substrate closure and
150
+ serial git accumulation, not full cloud migration safety.
151
+
152
+ ## Final Check
153
+
154
+ - Does every meaningful product land in result blobs, journals, commits,
155
+ conversation journals, or trace events?
156
+ - Are verifier, judge, analyst, and driver roles separated?
157
+ - Can blocking questions move up the chain?
158
+ - Can Pi/parent steer without bypassing verification?
159
+ - Is workspace mutation transactional if workers edit shared code?
160
+ - Can existing trace/journal views isolate agents, pairs, subtrees, and the full
161
+ run?
162
+ - Is the loop small enough that an agent can author it without inventing hidden
163
+ runtime behavior?