@tangle-network/agent-runtime 0.46.0 → 0.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +1 -1
- package/dist/agent.js +1 -1
- package/dist/analyst-loop.d.ts +1 -1
- package/dist/{chunk-GN75RGM6.js → chunk-656G2XCL.js} +3 -3
- package/dist/{chunk-65FQLI4V.js → chunk-IW2LMLK6.js} +1714 -42
- package/dist/chunk-IW2LMLK6.js.map +1 -0
- package/dist/{chunk-I42NHLKX.js → chunk-LX66I3SC.js} +11 -6
- package/dist/chunk-LX66I3SC.js.map +1 -0
- package/dist/{chunk-KPN7OQ64.js → chunk-TJS7S3HJ.js} +2 -2
- package/dist/{chunk-KPN7OQ64.js.map → chunk-TJS7S3HJ.js.map} +1 -1
- package/dist/{coder-DCWFQpmJ.d.ts → coder-CVZNGbyg.d.ts} +1 -1
- package/dist/{driver-C-mtBo7h.d.ts → driver-DYU2sgHr.d.ts} +1 -1
- package/dist/index.d.ts +7 -7
- package/dist/index.js +3 -3
- package/dist/{kb-gate-2Gwpz_27.d.ts → kb-gate-51BlLlVM.d.ts} +8 -2
- package/dist/{loop-runner-bin-D-K6bRp3.d.ts → loop-runner-bin-DEm4roYF.d.ts} +4 -4
- package/dist/loop-runner-bin.d.ts +5 -5
- package/dist/loop-runner-bin.js +3 -3
- package/dist/loops.d.ts +5 -5
- package/dist/loops.js +55 -1
- package/dist/mcp/bin.js +3 -3
- package/dist/mcp/index.d.ts +71 -70
- package/dist/mcp/index.js +199 -27
- package/dist/mcp/index.js.map +1 -1
- package/dist/{otel-export-nurzFwuJ.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
- package/dist/profiles.d.ts +2 -2
- package/dist/{run-loop-CU2Y00Si.d.ts → run-loop-DvD4aGiE.d.ts} +1 -1
- package/dist/runtime.d.ts +915 -71
- package/dist/runtime.js +55 -1
- package/dist/{types-BfoeiQRZ.d.ts → types-BpDfCPUp.d.ts} +5 -5
- package/dist/{types-DnYoHvvZ.d.ts → types-nBMuollC.d.ts} +17 -0
- package/dist/workflow.d.ts +2 -2
- package/dist/workflow.js +1 -1
- package/package.json +25 -14
- package/skills/loop-writer/SKILL.md +163 -0
- package/dist/chunk-65FQLI4V.js.map +0 -1
- package/dist/chunk-I42NHLKX.js.map +0 -1
- /package/dist/{chunk-GN75RGM6.js.map → chunk-656G2XCL.js.map} +0 -0
package/dist/runtime.js
CHANGED
|
@@ -6,7 +6,12 @@ import {
|
|
|
6
6
|
InMemoryResultBlobStore,
|
|
7
7
|
InMemorySpawnJournal,
|
|
8
8
|
acquireSandbox,
|
|
9
|
+
adaptiveRefine,
|
|
10
|
+
assertStrategyContract,
|
|
9
11
|
assertTraceDerivedFindings,
|
|
12
|
+
auditIntent,
|
|
13
|
+
authorStrategy,
|
|
14
|
+
breadthDriver,
|
|
10
15
|
buildSteerContext,
|
|
11
16
|
builtinShapes,
|
|
12
17
|
completionAuthorizes,
|
|
@@ -15,6 +20,7 @@ import {
|
|
|
15
20
|
createDriver,
|
|
16
21
|
createExecutor,
|
|
17
22
|
createExecutorRegistry,
|
|
23
|
+
createMcpEnvironment,
|
|
18
24
|
createRootHandle,
|
|
19
25
|
createSandboxForSpec,
|
|
20
26
|
createSandboxLineage,
|
|
@@ -22,35 +28,56 @@ import {
|
|
|
22
28
|
createScopeAnalyst,
|
|
23
29
|
createShapeRegistry,
|
|
24
30
|
createSupervisor,
|
|
31
|
+
createVerifierEnvironment,
|
|
32
|
+
defaultAnalystInstruction,
|
|
33
|
+
defaultAuditorInstruction,
|
|
25
34
|
defaultSelectWinner,
|
|
26
35
|
definePersona,
|
|
36
|
+
defineStrategy,
|
|
37
|
+
depthDriver,
|
|
27
38
|
deterministicCompletion,
|
|
28
39
|
equalKOnCost,
|
|
29
40
|
fanout,
|
|
30
41
|
flatWidenGate,
|
|
42
|
+
gitWorkspace,
|
|
43
|
+
harvestCorpus,
|
|
31
44
|
inlineSandboxClient,
|
|
45
|
+
jjWorkspace,
|
|
46
|
+
localShell,
|
|
32
47
|
loopDispatch,
|
|
33
48
|
loopUntil,
|
|
34
49
|
materializeTreeView,
|
|
50
|
+
observe,
|
|
35
51
|
openSandboxRun,
|
|
36
52
|
panel,
|
|
37
53
|
pipeline,
|
|
54
|
+
printBenchmarkReport,
|
|
38
55
|
probeSandboxCapabilities,
|
|
56
|
+
promotionGate,
|
|
57
|
+
refine,
|
|
39
58
|
registerShape,
|
|
40
59
|
renderAnalyses,
|
|
41
60
|
renderCorpusToInstructions,
|
|
61
|
+
renderReport,
|
|
42
62
|
replaySpawnTree,
|
|
43
63
|
reportLoopUsage,
|
|
64
|
+
runAgentic,
|
|
65
|
+
runBenchmark,
|
|
44
66
|
runLoop,
|
|
45
67
|
runPersonified,
|
|
68
|
+
runStrategyEvolution,
|
|
69
|
+
sample,
|
|
70
|
+
sampleThenRefine,
|
|
71
|
+
selectChampion,
|
|
46
72
|
sentinelCompletion,
|
|
47
73
|
settledToIteration,
|
|
48
74
|
spendFromUsageEvents,
|
|
49
75
|
stopSentinel,
|
|
76
|
+
strategyAuthorContract,
|
|
50
77
|
trajectoryReport,
|
|
51
78
|
verify,
|
|
52
79
|
widen
|
|
53
|
-
} from "./chunk-
|
|
80
|
+
} from "./chunk-IW2LMLK6.js";
|
|
54
81
|
import {
|
|
55
82
|
extractLlmCallEvent,
|
|
56
83
|
mapSandboxEvent
|
|
@@ -64,7 +91,12 @@ export {
|
|
|
64
91
|
InMemoryResultBlobStore,
|
|
65
92
|
InMemorySpawnJournal,
|
|
66
93
|
acquireSandbox,
|
|
94
|
+
adaptiveRefine,
|
|
95
|
+
assertStrategyContract,
|
|
67
96
|
assertTraceDerivedFindings,
|
|
97
|
+
auditIntent,
|
|
98
|
+
authorStrategy,
|
|
99
|
+
breadthDriver,
|
|
68
100
|
buildSteerContext,
|
|
69
101
|
builtinShapes,
|
|
70
102
|
completionAuthorizes,
|
|
@@ -73,6 +105,7 @@ export {
|
|
|
73
105
|
createDriver,
|
|
74
106
|
createExecutor,
|
|
75
107
|
createExecutorRegistry,
|
|
108
|
+
createMcpEnvironment,
|
|
76
109
|
createRootHandle,
|
|
77
110
|
createSandboxForSpec,
|
|
78
111
|
createSandboxLineage,
|
|
@@ -80,33 +113,54 @@ export {
|
|
|
80
113
|
createScopeAnalyst,
|
|
81
114
|
createShapeRegistry,
|
|
82
115
|
createSupervisor,
|
|
116
|
+
createVerifierEnvironment,
|
|
117
|
+
defaultAnalystInstruction,
|
|
118
|
+
defaultAuditorInstruction,
|
|
83
119
|
defaultSelectWinner,
|
|
84
120
|
definePersona,
|
|
121
|
+
defineStrategy,
|
|
122
|
+
depthDriver,
|
|
85
123
|
deterministicCompletion,
|
|
86
124
|
equalKOnCost,
|
|
87
125
|
extractLlmCallEvent,
|
|
88
126
|
fanout,
|
|
89
127
|
flatWidenGate,
|
|
128
|
+
gitWorkspace,
|
|
129
|
+
harvestCorpus,
|
|
90
130
|
inlineSandboxClient,
|
|
131
|
+
jjWorkspace,
|
|
132
|
+
localShell,
|
|
91
133
|
loopDispatch,
|
|
92
134
|
loopUntil,
|
|
93
135
|
mapSandboxEvent,
|
|
94
136
|
materializeTreeView,
|
|
137
|
+
observe,
|
|
95
138
|
openSandboxRun,
|
|
96
139
|
panel,
|
|
97
140
|
pipeline,
|
|
141
|
+
printBenchmarkReport,
|
|
98
142
|
probeSandboxCapabilities,
|
|
143
|
+
promotionGate,
|
|
144
|
+
refine,
|
|
99
145
|
registerShape,
|
|
100
146
|
renderAnalyses,
|
|
101
147
|
renderCorpusToInstructions,
|
|
148
|
+
renderReport,
|
|
102
149
|
replaySpawnTree,
|
|
103
150
|
reportLoopUsage,
|
|
151
|
+
runAgentic,
|
|
152
|
+
runBenchmark,
|
|
104
153
|
runLoop,
|
|
105
154
|
runPersonified,
|
|
155
|
+
runStrategyEvolution,
|
|
156
|
+
sample,
|
|
157
|
+
sampleThenRefine,
|
|
158
|
+
selectChampion,
|
|
106
159
|
sentinelCompletion,
|
|
107
160
|
settledToIteration,
|
|
108
161
|
spendFromUsageEvents,
|
|
109
162
|
stopSentinel,
|
|
163
|
+
strategyAuthorContract,
|
|
110
164
|
trajectoryReport,
|
|
111
165
|
verify,
|
|
112
166
|
widen
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
2
2
|
import { AgentProfile, BackendType } from '@tangle-network/sandbox';
|
|
3
3
|
import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
|
|
4
|
-
import { c as LoopTokenUsage } from './types-
|
|
4
|
+
import { c as LoopTokenUsage } from './types-nBMuollC.js';
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* @experimental
|
|
@@ -245,9 +245,9 @@ type Settled<Out> = {
|
|
|
245
245
|
};
|
|
246
246
|
/**
|
|
247
247
|
* The budget-conserving reactive scope an `Agent.act` runs inside. `spawn` reserves
|
|
248
|
-
* budget atomically from the shared pool and
|
|
249
|
-
* `next()`
|
|
250
|
-
* the
|
|
248
|
+
* budget atomically from the shared pool and fails closed when the pool cannot cover it.
|
|
249
|
+
* `next()` waits for one settlement from this scope's live set; `view` reads live state,
|
|
250
|
+
* not the replay log.
|
|
251
251
|
*/
|
|
252
252
|
interface Scope<Out> {
|
|
253
253
|
/**
|
|
@@ -435,4 +435,4 @@ interface WidenGate<Out> {
|
|
|
435
435
|
readonly judgeExempt?: boolean;
|
|
436
436
|
}
|
|
437
437
|
|
|
438
|
-
export type { Agent as A, Budget as B,
|
|
438
|
+
export type { Agent as A, Budget as B, ExecutorRegistry as E, Handle as H, NodeId as N, ResultBlobStore as R, Scope as S, TreeView as T, UsageEvent as U, WidenGate as W, SpawnJournal as a, SpawnEvent as b, Settled as c, AgentSpec as d, RootHandle as e, SupervisedResult as f, Spend as g, ExecutorFactory as h, Supervisor as i, Executor as j, ExecutorContext as k, ExecutorResult as l, NodeSnapshot as m, NodeStatus as n, Restart as o, RootSignal as p, Runtime as q, SpawnOpts as r, SupervisorOpts as s };
|
|
@@ -639,6 +639,12 @@ declare function startRuntimeRun(options: RuntimeRunOptions): RuntimeRunHandle;
|
|
|
639
639
|
interface ValidationCtx {
|
|
640
640
|
/** Iteration index this output came from (0-based). */
|
|
641
641
|
iteration: number;
|
|
642
|
+
/**
|
|
643
|
+
* Live sandbox for this iteration. Validators that need execution-grounded
|
|
644
|
+
* evidence can inspect files or run commands here instead of forcing callers
|
|
645
|
+
* to bypass the loop kernel with raw Sandbox SDK orchestration.
|
|
646
|
+
*/
|
|
647
|
+
box?: SandboxInstance;
|
|
642
648
|
/** Cooperative cancellation channel. */
|
|
643
649
|
signal: AbortSignal;
|
|
644
650
|
/**
|
|
@@ -668,6 +674,17 @@ interface AgentRunSpec<Task> {
|
|
|
668
674
|
profile: AgentProfile;
|
|
669
675
|
/** Task → prompt formatter. Pure and deterministic. */
|
|
670
676
|
taskToPrompt: (task: Task) => string;
|
|
677
|
+
/**
|
|
678
|
+
* Optional pre-prompt sandbox provisioner. Runs after the sandbox is acquired
|
|
679
|
+
* and before the first prompt is streamed into that box. Use this for
|
|
680
|
+
* domain-agnostic setup such as repo snapshots, benchmark fixtures, policy
|
|
681
|
+
* files, or seed datasets. The hook is part of the runtime surface so loop
|
|
682
|
+
* consumers do not hand-roll Sandbox SDK orchestration just to prepare a
|
|
683
|
+
* workspace before the agent sees it.
|
|
684
|
+
*/
|
|
685
|
+
prepareBox?: (box: SandboxInstance, ctx: {
|
|
686
|
+
signal: AbortSignal;
|
|
687
|
+
}) => Promise<void> | void;
|
|
671
688
|
/**
|
|
672
689
|
* Per-spec stable name. Surfaced in trace events and the default winner
|
|
673
690
|
* selector tiebreak. Falls back to `profile.name ?? 'agent'`.
|
package/dist/workflow.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { AgentProfile, CreateSandboxOptions, PromptOptions, TaskOptions, SandboxEvent } from '@tangle-network/sandbox';
|
|
2
|
-
import { S as SandboxClient, O as OutputAdapter, f as LoopSandboxPlacement, b as LoopResult } from './types-
|
|
3
|
-
import { R as RunLoopOptions } from './run-loop-
|
|
2
|
+
import { S as SandboxClient, O as OutputAdapter, f as LoopSandboxPlacement, b as LoopResult } from './types-nBMuollC.js';
|
|
3
|
+
import { R as RunLoopOptions } from './run-loop-DvD4aGiE.js';
|
|
4
4
|
import '@tangle-network/agent-eval';
|
|
5
5
|
import './runtime-hooks-C7JwKb9E.js';
|
|
6
6
|
|
package/dist/workflow.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-runtime",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.48.0",
|
|
4
4
|
"description": "Shared task-lifecycle skeleton for agents: a recursive loop kernel for chat turns, one-shot tasks, and multi-attempt loops, with trace capture and eval-gated self-improvement. Domain behavior lives in adapters; scoring and ship-gates in @tangle-network/agent-eval.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-runtime#readme",
|
|
6
6
|
"repository": {
|
|
@@ -87,9 +87,21 @@
|
|
|
87
87
|
"publishConfig": {
|
|
88
88
|
"access": "public"
|
|
89
89
|
},
|
|
90
|
+
"scripts": {
|
|
91
|
+
"build": "tsup",
|
|
92
|
+
"dev": "tsup --watch",
|
|
93
|
+
"prepare": "tsup",
|
|
94
|
+
"test": "vitest run",
|
|
95
|
+
"test:watch": "vitest",
|
|
96
|
+
"lint": "biome check src tests examples",
|
|
97
|
+
"lint:fix": "biome check --write src tests examples",
|
|
98
|
+
"typecheck": "tsc --noEmit",
|
|
99
|
+
"typecheck:examples": "tsc --noEmit -p tsconfig.examples.json",
|
|
100
|
+
"verify:package": "node scripts/verify-package-exports.mjs"
|
|
101
|
+
},
|
|
90
102
|
"devDependencies": {
|
|
91
103
|
"@biomejs/biome": "^2.4.0",
|
|
92
|
-
"@tangle-network/agent-eval": "^0.
|
|
104
|
+
"@tangle-network/agent-eval": "^0.89.0",
|
|
93
105
|
"@tangle-network/sandbox": "^0.4.0",
|
|
94
106
|
"@types/node": "^25.6.0",
|
|
95
107
|
"playwright": "^1.40.0",
|
|
@@ -97,10 +109,20 @@
|
|
|
97
109
|
"typescript": "^5.7.0",
|
|
98
110
|
"vitest": "^3.0.0"
|
|
99
111
|
},
|
|
112
|
+
"pnpm": {
|
|
113
|
+
"minimumReleaseAge": 4320,
|
|
114
|
+
"minimumReleaseAgeExclude": [
|
|
115
|
+
"@tangle-network/agent-eval"
|
|
116
|
+
],
|
|
117
|
+
"onlyBuiltDependencies": [
|
|
118
|
+
"esbuild"
|
|
119
|
+
]
|
|
120
|
+
},
|
|
100
121
|
"engines": {
|
|
101
122
|
"node": ">=20"
|
|
102
123
|
},
|
|
103
124
|
"license": "MIT",
|
|
125
|
+
"packageManager": "pnpm@10.28.0",
|
|
104
126
|
"peerDependencies": {
|
|
105
127
|
"@tangle-network/agent-eval": ">=0.83.0 <1.0.0",
|
|
106
128
|
"@tangle-network/agent-knowledge": ">=1.3.0 <2.0.0",
|
|
@@ -117,16 +139,5 @@
|
|
|
117
139
|
"playwright": {
|
|
118
140
|
"optional": true
|
|
119
141
|
}
|
|
120
|
-
},
|
|
121
|
-
"scripts": {
|
|
122
|
-
"build": "tsup",
|
|
123
|
-
"dev": "tsup --watch",
|
|
124
|
-
"test": "vitest run",
|
|
125
|
-
"test:watch": "vitest",
|
|
126
|
-
"lint": "biome check src tests examples",
|
|
127
|
-
"lint:fix": "biome check --write src tests examples",
|
|
128
|
-
"typecheck": "tsc --noEmit",
|
|
129
|
-
"typecheck:examples": "tsc --noEmit -p tsconfig.examples.json",
|
|
130
|
-
"verify:package": "node scripts/verify-package-exports.mjs"
|
|
131
142
|
}
|
|
132
|
-
}
|
|
143
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: loop-writer
|
|
3
|
+
description: Author clean recursive agent loops on @tangle-network/agent-runtime. Use for Scope/supervisor orchestration, runLoop, Pi/sandbox drivers, fanout, trace analysts, verifiers/judges, question escalation, live messages, and self-improving loop recipes.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# loop-writer
|
|
7
|
+
|
|
8
|
+
Design the smallest loop that can honestly solve the objective. The blessed
|
|
9
|
+
surface is the substrate: `fanout`/`pipeline` for fixed shapes, `runLoop` for
|
|
10
|
+
round-synchronous sandbox loops, and `Scope`/Supervisor for recursive
|
|
11
|
+
driver/worker trees. Do not create a second loop grammar.
|
|
12
|
+
|
|
13
|
+
## Mental Model
|
|
14
|
+
|
|
15
|
+
```txt
|
|
16
|
+
user -> Pi/root driver -> supervisor -> sandbox driver -> worker -> leaf harness
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Each level may spawn below, wait below, analyze below, steer below, and escalate
|
|
20
|
+
questions upward. The substrate owns budget, trace, abort, journal, and replay.
|
|
21
|
+
The driver owns strategy.
|
|
22
|
+
|
|
23
|
+
## Pick The Primitive
|
|
24
|
+
|
|
25
|
+
| Objective | Use |
|
|
26
|
+
|---|---|
|
|
27
|
+
| Try N attempts, pick best | `fanout` or `createFanoutVoteDriver` |
|
|
28
|
+
| Ordered stages | `pipeline` |
|
|
29
|
+
| Improve until executable check passes | `loopUntil` + verifier |
|
|
30
|
+
| Review from several lenses | `panel` |
|
|
31
|
+
| Simulated user/product eval | `defineConversation` + `runConversation` |
|
|
32
|
+
| Dynamic topology / drivers of drivers | `Scope` or sandbox driver + `createCoordinationTools` |
|
|
33
|
+
| Mutate a shared repo | git branch/clone loop with typed merge outcomes |
|
|
34
|
+
|
|
35
|
+
If a fixed combinator solves it, do not use a dynamic driver.
|
|
36
|
+
|
|
37
|
+
## Minimal Sandbox Loop
|
|
38
|
+
|
|
39
|
+
```ts
|
|
40
|
+
const trace: unknown[] = []
|
|
41
|
+
const result = await runLoop({
|
|
42
|
+
driver: createDriver({ planner, maxIterations: 4 }),
|
|
43
|
+
agentRun: agentRunSpec,
|
|
44
|
+
output,
|
|
45
|
+
validator: executableGate,
|
|
46
|
+
task,
|
|
47
|
+
ctx: {
|
|
48
|
+
sandboxClient,
|
|
49
|
+
traceEmitter: { emit: async (event) => trace.push(event) },
|
|
50
|
+
},
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
const observation = await observe(
|
|
54
|
+
{
|
|
55
|
+
task: String(task),
|
|
56
|
+
output: JSON.stringify(result.winner?.output ?? result.decision),
|
|
57
|
+
trace,
|
|
58
|
+
outcome: result.winner ? 'passed' : 'failed',
|
|
59
|
+
runId,
|
|
60
|
+
},
|
|
61
|
+
{ chat, model, corpus },
|
|
62
|
+
)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Minimal Recursive Driver
|
|
66
|
+
|
|
67
|
+
```ts
|
|
68
|
+
const driver: Agent<Task, Output> = {
|
|
69
|
+
name: 'secure-build-driver',
|
|
70
|
+
async act(task, scope) {
|
|
71
|
+
const spawned = scope.spawn(workerAgent, task, { budget: perWorker, label: 'worker-a' })
|
|
72
|
+
if (!spawned.ok) throw new Error(spawned.reason)
|
|
73
|
+
|
|
74
|
+
const settled = await scope.next()
|
|
75
|
+
const observation = await observe(
|
|
76
|
+
{
|
|
77
|
+
task: String(task),
|
|
78
|
+
output: JSON.stringify(settled),
|
|
79
|
+
trace: [settled, scope.view],
|
|
80
|
+
outcome: settled?.kind === 'done' ? 'passed' : 'failed',
|
|
81
|
+
runId,
|
|
82
|
+
},
|
|
83
|
+
{ chat, model, corpus },
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
const steer = observation.findings[0]?.recommended_action
|
|
87
|
+
if (!steer) return synthesize(settled, observation)
|
|
88
|
+
|
|
89
|
+
const correction = scope.spawn(workerAgent, { task, prior: settled }, {
|
|
90
|
+
budget: perWorker,
|
|
91
|
+
label: 'worker-corrected',
|
|
92
|
+
})
|
|
93
|
+
if (!correction.ok) throw new Error(correction.reason)
|
|
94
|
+
if (!scope.send(correction.handle.id, { steer })) throw new Error('steer delivery failed')
|
|
95
|
+
|
|
96
|
+
const fixed = await scope.next()
|
|
97
|
+
return synthesize(fixed, observation)
|
|
98
|
+
},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const result = await createSupervisor<Task, Output>().run(driver, task, supervisorOpts)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
When the driver lives in a sandbox, expose the same verbs through
|
|
105
|
+
`createCoordinationTools`: `spawn_worker`, `await_next`, `observe_worker`,
|
|
106
|
+
`steer_worker`, `list_questions`, `answer_question`, `ask_parent`, `stop`, and
|
|
107
|
+
optional analyst tools.
|
|
108
|
+
|
|
109
|
+
## Role Boundaries
|
|
110
|
+
|
|
111
|
+
- **Verifier**: executable shippability gate; controls accept/reject.
|
|
112
|
+
- **Judge**: held-out score only; never steers the current run.
|
|
113
|
+
- **Analyst**: trace-derived diagnosis over worker, pairwise, subtree, or full
|
|
114
|
+
loop traces; may emit findings, questions, messages, or blockers.
|
|
115
|
+
- **Driver/reviewer**: consumes evidence and chooses continue, steer, spawn,
|
|
116
|
+
answer, escalate, or stop.
|
|
117
|
+
|
|
118
|
+
## Questions And Steering
|
|
119
|
+
|
|
120
|
+
Questions are blockers, not prose hidden in output. A child asks its parent; the
|
|
121
|
+
parent answers when it has evidence, defers when safe, or escalates to Pi/user
|
|
122
|
+
when answering would invent requirements. `failClosed` loops must not stop clean
|
|
123
|
+
with unresolved `blocks-run` questions.
|
|
124
|
+
|
|
125
|
+
Steer sparingly: only when an analyst finds a concrete mistake, a loop is
|
|
126
|
+
duplicating work, a parent/Pi answers a blocker, or a verifier reveals a specific
|
|
127
|
+
fix a running worker can still use. Delivery is through `Scope.send` or
|
|
128
|
+
`steer_worker`; failed delivery means spawn a fresh corrected attempt.
|
|
129
|
+
|
|
130
|
+
## Workspace Loops
|
|
131
|
+
|
|
132
|
+
Git is the durable workspace seam:
|
|
133
|
+
|
|
134
|
+
- one branch/clone per worker
|
|
135
|
+
- `gitWorkspace({ ref })` when host and sandbox need the same clone/commit/push contract
|
|
136
|
+
- explicit commit per worker
|
|
137
|
+
- typed merge result: `merged | conflict | stale-base | rejected | verifier-failed`
|
|
138
|
+
- resume derives completion from git state, not only a side journal
|
|
139
|
+
- conflicts become blockers/questions, not silent overwrite
|
|
140
|
+
|
|
141
|
+
Proof command for the local substrate join:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
pnpm exec tsx bench/src/observe-steer-workspace-loop.mts
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
It proves `Scope.spawn -> coordination tools -> gitWorkspace -> observe ->
|
|
148
|
+
Scope.send -> corrective worker -> integration pass`. Until the same proof runs
|
|
149
|
+
with `openSandboxRun` and a remote branch, claim local substrate closure and
|
|
150
|
+
serial git accumulation, not full cloud migration safety.
|
|
151
|
+
|
|
152
|
+
## Final Check
|
|
153
|
+
|
|
154
|
+
- Does every meaningful product land in result blobs, journals, commits,
|
|
155
|
+
conversation journals, or trace events?
|
|
156
|
+
- Are verifier, judge, analyst, and driver roles separated?
|
|
157
|
+
- Can blocking questions move up the chain?
|
|
158
|
+
- Can Pi/parent steer without bypassing verification?
|
|
159
|
+
- Is workspace mutation transactional if workers edit shared code?
|
|
160
|
+
- Can existing trace/journal views isolate agents, pairs, subtrees, and the full
|
|
161
|
+
run?
|
|
162
|
+
- Is the loop small enough that an agent can author it without inventing hidden
|
|
163
|
+
runtime behavior?
|