npm - @tangle-network/agent-runtime - Versions diffs - 0.46.0 → 0.48.0 - Mend

@tangle-network/agent-runtime 0.46.0 → 0.48.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist/agent.d.ts +1 -1
package/dist/agent.js +1 -1
package/dist/analyst-loop.d.ts +1 -1
package/dist/{chunk-GN75RGM6.js → chunk-656G2XCL.js} +3 -3
package/dist/{chunk-65FQLI4V.js → chunk-IW2LMLK6.js} +1714 -42
package/dist/chunk-IW2LMLK6.js.map +1 -0
package/dist/{chunk-I42NHLKX.js → chunk-LX66I3SC.js} +11 -6
package/dist/chunk-LX66I3SC.js.map +1 -0
package/dist/{chunk-KPN7OQ64.js → chunk-TJS7S3HJ.js} +2 -2
package/dist/{chunk-KPN7OQ64.js.map → chunk-TJS7S3HJ.js.map} +1 -1
package/dist/{coder-DCWFQpmJ.d.ts → coder-CVZNGbyg.d.ts} +1 -1
package/dist/{driver-C-mtBo7h.d.ts → driver-DYU2sgHr.d.ts} +1 -1
package/dist/index.d.ts +7 -7
package/dist/index.js +3 -3
package/dist/{kb-gate-2Gwpz_27.d.ts → kb-gate-51BlLlVM.d.ts} +8 -2
package/dist/{loop-runner-bin-D-K6bRp3.d.ts → loop-runner-bin-DEm4roYF.d.ts} +4 -4
package/dist/loop-runner-bin.d.ts +5 -5
package/dist/loop-runner-bin.js +3 -3
package/dist/loops.d.ts +5 -5
package/dist/loops.js +55 -1
package/dist/mcp/bin.js +3 -3
package/dist/mcp/index.d.ts +71 -70
package/dist/mcp/index.js +199 -27
package/dist/mcp/index.js.map +1 -1
package/dist/{otel-export-nurzFwuJ.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
package/dist/profiles.d.ts +2 -2
package/dist/{run-loop-CU2Y00Si.d.ts → run-loop-DvD4aGiE.d.ts} +1 -1
package/dist/runtime.d.ts +915 -71
package/dist/runtime.js +55 -1
package/dist/{types-BfoeiQRZ.d.ts → types-BpDfCPUp.d.ts} +5 -5
package/dist/{types-DnYoHvvZ.d.ts → types-nBMuollC.d.ts} +17 -0
package/dist/workflow.d.ts +2 -2
package/dist/workflow.js +1 -1
package/package.json +25 -14
package/skills/loop-writer/SKILL.md +163 -0
package/dist/chunk-65FQLI4V.js.map +0 -1
package/dist/chunk-I42NHLKX.js.map +0 -1
/package/dist/{chunk-GN75RGM6.js.map → chunk-656G2XCL.js.map} +0 -0

package/dist/runtime.js CHANGED Viewed

@@ -6,7 +6,12 @@ import {
   InMemoryResultBlobStore,
   InMemorySpawnJournal,
   acquireSandbox,
+  adaptiveRefine,
+  assertStrategyContract,
   assertTraceDerivedFindings,
+  auditIntent,
+  authorStrategy,
+  breadthDriver,
   buildSteerContext,
   builtinShapes,
   completionAuthorizes,
@@ -15,6 +20,7 @@ import {
   createDriver,
   createExecutor,
   createExecutorRegistry,
+  createMcpEnvironment,
   createRootHandle,
   createSandboxForSpec,
   createSandboxLineage,
@@ -22,35 +28,56 @@ import {
   createScopeAnalyst,
   createShapeRegistry,
   createSupervisor,
+  createVerifierEnvironment,
+  defaultAnalystInstruction,
+  defaultAuditorInstruction,
   defaultSelectWinner,
   definePersona,
+  defineStrategy,
+  depthDriver,
   deterministicCompletion,
   equalKOnCost,
   fanout,
   flatWidenGate,
+  gitWorkspace,
+  harvestCorpus,
   inlineSandboxClient,
+  jjWorkspace,
+  localShell,
   loopDispatch,
   loopUntil,
   materializeTreeView,
+  observe,
   openSandboxRun,
   panel,
   pipeline,
+  printBenchmarkReport,
   probeSandboxCapabilities,
+  promotionGate,
+  refine,
   registerShape,
   renderAnalyses,
   renderCorpusToInstructions,
+  renderReport,
   replaySpawnTree,
   reportLoopUsage,
+  runAgentic,
+  runBenchmark,
   runLoop,
   runPersonified,
+  runStrategyEvolution,
+  sample,
+  sampleThenRefine,
+  selectChampion,
   sentinelCompletion,
   settledToIteration,
   spendFromUsageEvents,
   stopSentinel,
+  strategyAuthorContract,
   trajectoryReport,
   verify,
   widen
-} from "./chunk-65FQLI4V.js";
+} from "./chunk-IW2LMLK6.js";
 import {
   extractLlmCallEvent,
   mapSandboxEvent
@@ -64,7 +91,12 @@ export {
   InMemoryResultBlobStore,
   InMemorySpawnJournal,
   acquireSandbox,
+  adaptiveRefine,
+  assertStrategyContract,
   assertTraceDerivedFindings,
+  auditIntent,
+  authorStrategy,
+  breadthDriver,
   buildSteerContext,
   builtinShapes,
   completionAuthorizes,
@@ -73,6 +105,7 @@ export {
   createDriver,
   createExecutor,
   createExecutorRegistry,
+  createMcpEnvironment,
   createRootHandle,
   createSandboxForSpec,
   createSandboxLineage,
@@ -80,33 +113,54 @@ export {
   createScopeAnalyst,
   createShapeRegistry,
   createSupervisor,
+  createVerifierEnvironment,
+  defaultAnalystInstruction,
+  defaultAuditorInstruction,
   defaultSelectWinner,
   definePersona,
+  defineStrategy,
+  depthDriver,
   deterministicCompletion,
   equalKOnCost,
   extractLlmCallEvent,
   fanout,
   flatWidenGate,
+  gitWorkspace,
+  harvestCorpus,
   inlineSandboxClient,
+  jjWorkspace,
+  localShell,
   loopDispatch,
   loopUntil,
   mapSandboxEvent,
   materializeTreeView,
+  observe,
   openSandboxRun,
   panel,
   pipeline,
+  printBenchmarkReport,
   probeSandboxCapabilities,
+  promotionGate,
+  refine,
   registerShape,
   renderAnalyses,
   renderCorpusToInstructions,
+  renderReport,
   replaySpawnTree,
   reportLoopUsage,
+  runAgentic,
+  runBenchmark,
   runLoop,
   runPersonified,
+  runStrategyEvolution,
+  sample,
+  sampleThenRefine,
+  selectChampion,
   sentinelCompletion,
   settledToIteration,
   spendFromUsageEvents,
   stopSentinel,
+  strategyAuthorContract,
   trajectoryReport,
   verify,
   widen

package/dist/{types-BfoeiQRZ.d.ts → types-BpDfCPUp.d.ts} RENAMED Viewed

@@ -1,7 +1,7 @@
 import { DefaultVerdict } from '@tangle-network/agent-eval';
 import { AgentProfile, BackendType } from '@tangle-network/sandbox';
 import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
-import { c as LoopTokenUsage } from './types-DnYoHvvZ.js';
+import { c as LoopTokenUsage } from './types-nBMuollC.js';
 /**
  * @experimental
@@ -245,9 +245,9 @@ type Settled<Out> = {
 };
 /**
  * The budget-conserving reactive scope an `Agent.act` runs inside. `spawn` reserves
- * budget atomically from the shared pool and FAILS CLOSED when the pool can't cover it;
- * `next()` is a ray.wait cursor (n=1) over THIS scope's IN-MEMORY live set; `view` reads
- * the in-memory nursery (NOT the log), O(live).
+ * budget atomically from the shared pool and fails closed when the pool cannot cover it.
+ * `next()` waits for one settlement from this scope's live set; `view` reads live state,
+ * not the replay log.
  */
 interface Scope<Out> {
     /**
@@ -435,4 +435,4 @@ interface WidenGate<Out> {
     readonly judgeExempt?: boolean;
 }
-export type { Agent as A, Budget as B, ExecutorFactory as E, Handle as H, NodeId as N, ResultBlobStore as R, Scope as S, TreeView as T, UsageEvent as U, WidenGate as W, SpawnJournal as a, SpawnEvent as b, Settled as c, AgentSpec as d, ExecutorRegistry as e, RootHandle as f, SupervisedResult as g, Spend as h, Supervisor as i, Executor as j, ExecutorContext as k, ExecutorResult as l, NodeSnapshot as m, NodeStatus as n, Restart as o, RootSignal as p, Runtime as q, SpawnOpts as r, SupervisorOpts as s };
+export type { Agent as A, Budget as B, ExecutorRegistry as E, Handle as H, NodeId as N, ResultBlobStore as R, Scope as S, TreeView as T, UsageEvent as U, WidenGate as W, SpawnJournal as a, SpawnEvent as b, Settled as c, AgentSpec as d, RootHandle as e, SupervisedResult as f, Spend as g, ExecutorFactory as h, Supervisor as i, Executor as j, ExecutorContext as k, ExecutorResult as l, NodeSnapshot as m, NodeStatus as n, Restart as o, RootSignal as p, Runtime as q, SpawnOpts as r, SupervisorOpts as s };

package/dist/{types-DnYoHvvZ.d.ts → types-nBMuollC.d.ts} RENAMED Viewed

@@ -639,6 +639,12 @@ declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
 interface ValidationCtx {
     /** Iteration index this output came from (0-based). */
     iteration: number;
+    /**
+     * Live sandbox for this iteration. Validators that need execution-grounded
+     * evidence can inspect files or run commands here instead of forcing callers
+     * to bypass the loop kernel with raw Sandbox SDK orchestration.
+     */
+    box?: SandboxInstance;
     /** Cooperative cancellation channel. */
     signal: AbortSignal;
     /**
@@ -668,6 +674,17 @@ interface AgentRunSpec<Task> {
     profile: AgentProfile;
     /** Task → prompt formatter. Pure and deterministic. */
     taskToPrompt: (task: Task) => string;
+    /**
+     * Optional pre-prompt sandbox provisioner. Runs after the sandbox is acquired
+     * and before the first prompt is streamed into that box. Use this for
+     * domain-agnostic setup such as repo snapshots, benchmark fixtures, policy
+     * files, or seed datasets. The hook is part of the runtime surface so loop
+     * consumers do not hand-roll Sandbox SDK orchestration just to prepare a
+     * workspace before the agent sees it.
+     */
+    prepareBox?: (box: SandboxInstance, ctx: {
+        signal: AbortSignal;
+    }) => Promise<void> | void;
     /**
      * Per-spec stable name. Surfaced in trace events and the default winner
      * selector tiebreak. Falls back to `profile.name ?? 'agent'`.

package/dist/workflow.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { AgentProfile, CreateSandboxOptions, PromptOptions, TaskOptions, SandboxEvent } from '@tangle-network/sandbox';
-import { S as SandboxClient, O as OutputAdapter, f as LoopSandboxPlacement, b as LoopResult } from './types-DnYoHvvZ.js';
-import { R as RunLoopOptions } from './run-loop-CU2Y00Si.js';
+import { S as SandboxClient, O as OutputAdapter, f as LoopSandboxPlacement, b as LoopResult } from './types-nBMuollC.js';
+import { R as RunLoopOptions } from './run-loop-DvD4aGiE.js';
 import '@tangle-network/agent-eval';
 import './runtime-hooks-C7JwKb9E.js';

package/dist/workflow.js CHANGED Viewed

@@ -2,7 +2,7 @@ import {
   createSandboxForSpec,
   describeSandboxPlacement,
   runLoop
-} from "./chunk-65FQLI4V.js";
+} from "./chunk-IW2LMLK6.js";
 import {
   ValidationError,
   extractLlmCallEvent

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tangle-network/agent-runtime",
-  "version": "0.46.0",
+  "version": "0.48.0",
   "description": "Shared task-lifecycle skeleton for agents: a recursive loop kernel for chat turns, one-shot tasks, and multi-attempt loops, with trace capture and eval-gated self-improvement. Domain behavior lives in adapters; scoring and ship-gates in @tangle-network/agent-eval.",
   "homepage": "https://github.com/tangle-network/agent-runtime#readme",
   "repository": {
@@ -87,9 +87,21 @@
   "publishConfig": {
     "access": "public"
   },
+  "scripts": {
+    "build": "tsup",
+    "dev": "tsup --watch",
+    "prepare": "tsup",
+    "test": "vitest run",
+    "test:watch": "vitest",
+    "lint": "biome check src tests examples",
+    "lint:fix": "biome check --write src tests examples",
+    "typecheck": "tsc --noEmit",
+    "typecheck:examples": "tsc --noEmit -p tsconfig.examples.json",
+    "verify:package": "node scripts/verify-package-exports.mjs"
+  },
   "devDependencies": {
     "@biomejs/biome": "^2.4.0",
-    "@tangle-network/agent-eval": "^0.83.0",
+    "@tangle-network/agent-eval": "^0.89.0",
     "@tangle-network/sandbox": "^0.4.0",
     "@types/node": "^25.6.0",
     "playwright": "^1.40.0",
@@ -97,10 +109,20 @@
     "typescript": "^5.7.0",
     "vitest": "^3.0.0"
   },
+  "pnpm": {
+    "minimumReleaseAge": 4320,
+    "minimumReleaseAgeExclude": [
+      "@tangle-network/agent-eval"
+    ],
+    "onlyBuiltDependencies": [
+      "esbuild"
+    ]
+  },
   "engines": {
     "node": ">=20"
   },
   "license": "MIT",
+  "packageManager": "pnpm@10.28.0",
   "peerDependencies": {
     "@tangle-network/agent-eval": ">=0.83.0 <1.0.0",
     "@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
@@ -117,16 +139,5 @@
     "playwright": {
       "optional": true
     }
-  },
-  "scripts": {
-    "build": "tsup",
-    "dev": "tsup --watch",
-    "test": "vitest run",
-    "test:watch": "vitest",
-    "lint": "biome check src tests examples",
-    "lint:fix": "biome check --write src tests examples",
-    "typecheck": "tsc --noEmit",
-    "typecheck:examples": "tsc --noEmit -p tsconfig.examples.json",
-    "verify:package": "node scripts/verify-package-exports.mjs"
   }
-}
+}

package/skills/loop-writer/SKILL.md ADDED Viewed

@@ -0,0 +1,163 @@
+---
+name: loop-writer
+description: Author clean recursive agent loops on @tangle-network/agent-runtime. Use for Scope/supervisor orchestration, runLoop, Pi/sandbox drivers, fanout, trace analysts, verifiers/judges, question escalation, live messages, and self-improving loop recipes.
+---
+# loop-writer
+Design the smallest loop that can honestly solve the objective. The blessed
+surface is the substrate: `fanout`/`pipeline` for fixed shapes, `runLoop` for
+round-synchronous sandbox loops, and `Scope`/Supervisor for recursive
+driver/worker trees. Do not create a second loop grammar.
+## Mental Model
+```txt
+user -> Pi/root driver -> supervisor -> sandbox driver -> worker -> leaf harness
+```
+Each level may spawn below, wait below, analyze below, steer below, and escalate
+questions upward. The substrate owns budget, trace, abort, journal, and replay.
+The driver owns strategy.
+## Pick The Primitive
+| Objective | Use |
+|---|---|
+| Try N attempts, pick best | `fanout` or `createFanoutVoteDriver` |
+| Ordered stages | `pipeline` |
+| Improve until executable check passes | `loopUntil` + verifier |
+| Review from several lenses | `panel` |
+| Simulated user/product eval | `defineConversation` + `runConversation` |
+| Dynamic topology / drivers of drivers | `Scope` or sandbox driver + `createCoordinationTools` |
+| Mutate a shared repo | git branch/clone loop with typed merge outcomes |
+If a fixed combinator solves it, do not use a dynamic driver.
+## Minimal Sandbox Loop
+```ts
+const trace: unknown[] = []
+const result = await runLoop({
+  driver: createDriver({ planner, maxIterations: 4 }),
+  agentRun: agentRunSpec,
+  output,
+  validator: executableGate,
+  task,
+  ctx: {
+    sandboxClient,
+    traceEmitter: { emit: async (event) => trace.push(event) },
+  },
+})
+const observation = await observe(
+  {
+    task: String(task),
+    output: JSON.stringify(result.winner?.output ?? result.decision),
+    trace,
+    outcome: result.winner ? 'passed' : 'failed',
+    runId,
+  },
+  { chat, model, corpus },
+)
+```
+## Minimal Recursive Driver
+```ts
+const driver: Agent<Task, Output> = {
+  name: 'secure-build-driver',
+  async act(task, scope) {
+    const spawned = scope.spawn(workerAgent, task, { budget: perWorker, label: 'worker-a' })
+    if (!spawned.ok) throw new Error(spawned.reason)
+    const settled = await scope.next()
+    const observation = await observe(
+      {
+        task: String(task),
+        output: JSON.stringify(settled),
+        trace: [settled, scope.view],
+        outcome: settled?.kind === 'done' ? 'passed' : 'failed',
+        runId,
+      },
+      { chat, model, corpus },
+    )
+    const steer = observation.findings[0]?.recommended_action
+    if (!steer) return synthesize(settled, observation)
+    const correction = scope.spawn(workerAgent, { task, prior: settled }, {
+      budget: perWorker,
+      label: 'worker-corrected',
+    })
+    if (!correction.ok) throw new Error(correction.reason)
+    if (!scope.send(correction.handle.id, { steer })) throw new Error('steer delivery failed')
+    const fixed = await scope.next()
+    return synthesize(fixed, observation)
+  },
+}
+const result = await createSupervisor<Task, Output>().run(driver, task, supervisorOpts)
+```
+When the driver lives in a sandbox, expose the same verbs through
+`createCoordinationTools`: `spawn_worker`, `await_next`, `observe_worker`,
+`steer_worker`, `list_questions`, `answer_question`, `ask_parent`, `stop`, and
+optional analyst tools.
+## Role Boundaries
+- **Verifier**: executable shippability gate; controls accept/reject.
+- **Judge**: held-out score only; never steers the current run.
+- **Analyst**: trace-derived diagnosis over worker, pairwise, subtree, or full
+  loop traces; may emit findings, questions, messages, or blockers.
+- **Driver/reviewer**: consumes evidence and chooses continue, steer, spawn,
+  answer, escalate, or stop.
+## Questions And Steering
+Questions are blockers, not prose hidden in output. A child asks its parent; the
+parent answers when it has evidence, defers when safe, or escalates to Pi/user
+when answering would invent requirements. `failClosed` loops must not stop clean
+with unresolved `blocks-run` questions.
+Steer sparingly: only when an analyst finds a concrete mistake, a loop is
+duplicating work, a parent/Pi answers a blocker, or a verifier reveals a specific
+fix a running worker can still use. Delivery is through `Scope.send` or
+`steer_worker`; failed delivery means spawn a fresh corrected attempt.
+## Workspace Loops
+Git is the durable workspace seam:
+- one branch/clone per worker
+- `gitWorkspace({ ref })` when host and sandbox need the same clone/commit/push contract
+- explicit commit per worker
+- typed merge result: `merged | conflict | stale-base | rejected | verifier-failed`
+- resume derives completion from git state, not only a side journal
+- conflicts become blockers/questions, not silent overwrite
+Proof command for the local substrate join:
+```bash
+pnpm exec tsx bench/src/observe-steer-workspace-loop.mts
+```
+It proves `Scope.spawn -> coordination tools -> gitWorkspace -> observe ->
+Scope.send -> corrective worker -> integration pass`. Until the same proof runs
+with `openSandboxRun` and a remote branch, claim local substrate closure and
+serial git accumulation, not full cloud migration safety.
+## Final Check
+- Does every meaningful product land in result blobs, journals, commits,
+  conversation journals, or trace events?
+- Are verifier, judge, analyst, and driver roles separated?
+- Can blocking questions move up the chain?
+- Can Pi/parent steer without bypassing verification?
+- Is workspace mutation transactional if workers edit shared code?
+- Can existing trace/journal views isolate agents, pairs, subtrees, and the full
+  run?
+- Is the loop small enough that an agent can author it without inventing hidden
+  runtime behavior?