@tangle-network/agent-runtime 0.51.0 → 0.52.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +1 -1
- package/dist/{chunk-FQH33M5N.js → chunk-2OU7ZQPD.js} +26 -5
- package/dist/chunk-2OU7ZQPD.js.map +1 -0
- package/dist/{chunk-HYG4ISNS.js → chunk-4JI4BCBI.js} +2 -2
- package/dist/{chunk-HAA4KZUD.js → chunk-7SP2OVYZ.js} +3 -3
- package/dist/{chunk-47SWANFA.js → chunk-BERLUBAP.js} +2 -2
- package/dist/{chunk-XEI7AIHU.js → chunk-COAVO6QB.js} +3 -3
- package/dist/{chunk-FKHNHUXP.js → chunk-V2K35HF2.js} +2 -2
- package/dist/index.d.ts +114 -4
- package/dist/index.js +141 -16
- package/dist/index.js.map +1 -1
- package/dist/loop-runner-bin.js +3 -3
- package/dist/loops.d.ts +2 -1
- package/dist/loops.js +3 -1
- package/dist/mcp/bin.js +4 -4
- package/dist/mcp/index.js +5 -5
- package/dist/runtime.d.ts +47 -8
- package/dist/runtime.js +3 -1
- package/dist/workflow.js +1 -1
- package/package.json +1 -1
- package/skills/agent-runtime-adoption/SKILL.md +41 -26
- package/skills/build-with-agent-runtime/SKILL.md +143 -0
- package/skills/loop-writer/SKILL.md +6 -7
- package/dist/chunk-FQH33M5N.js.map +0 -1
- /package/dist/{chunk-HYG4ISNS.js.map → chunk-4JI4BCBI.js.map} +0 -0
- /package/dist/{chunk-HAA4KZUD.js.map → chunk-7SP2OVYZ.js.map} +0 -0
- /package/dist/{chunk-47SWANFA.js.map → chunk-BERLUBAP.js.map} +0 -0
- /package/dist/{chunk-XEI7AIHU.js.map → chunk-COAVO6QB.js.map} +0 -0
- /package/dist/{chunk-FKHNHUXP.js.map → chunk-V2K35HF2.js.map} +0 -0
package/dist/loop-runner-bin.js
CHANGED
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
import {
|
|
3
3
|
parseLoopRunnerArgv,
|
|
4
4
|
runLoopRunnerCli
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-COAVO6QB.js";
|
|
6
6
|
import "./chunk-FNMGYYSS.js";
|
|
7
|
-
import "./chunk-
|
|
7
|
+
import "./chunk-4JI4BCBI.js";
|
|
8
8
|
import "./chunk-KADIJAD4.js";
|
|
9
9
|
import "./chunk-HNUXAZIJ.js";
|
|
10
10
|
import "./chunk-G3RGMA7C.js";
|
|
11
|
-
import "./chunk-
|
|
11
|
+
import "./chunk-2OU7ZQPD.js";
|
|
12
12
|
import "./chunk-GSUO5QS6.js";
|
|
13
13
|
import "./chunk-DGUM43GV.js";
|
|
14
14
|
export {
|
package/dist/loops.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
2
|
-
export { AgenticOptions, AgenticRunResult, AgenticSurface, AgenticTask, AgenticTool, AnytimeReport, AnytimeStrategySummary, AnytimeTaskCurve, ArtifactHandle, AssertTraceDerivedFindings, AuditIntentInput, AuditIntentOptions, AuthorStrategyOptions, AuthoredStrategy, BenchmarkCell, BenchmarkConfig, BenchmarkLift, BenchmarkReport, BenchmarkStrategySummary, BenchmarkTaskRow, BridgeSeam, BudgetPool, BudgetReadout, ChampionPick, ChampionPolicy, CheckpointCapableBox, CliSeam, CombinatorShape, Corpus, CorpusFilter, CorpusRecord, CreateScopeAnalystOptions, CriuCapableClient, DefinePersona, DefinePersonaInput, Deliverable, Environment, EqualKArm, EqualKOnCost, EqualKOnCostOptions, EqualKVerdict, EvolutionArchiveNode, EvolutionAuthor, EvolutionBandInfo, EvolutionCandidate, EvolutionGeneration, EvolutionReport, ExecutorConfig, Fanout, FanoutOptions, FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, FlatWidenGate, ForkCapableBox, GitWorkspaceOptions, HarvestCorpusOptions, HarvestFailure, HarvestReport, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, IntentAudit, LoopDispatchOptions, LoopOptionsForDispatch, LoopShape, LoopUntil, LoopUntilSpec, LoopUntilState, McpEndpoint, McpEnvironmentOptions, Observation, ObserveInput, ObserveOptions, OpenSandboxRunOptions, Outcome, Panel, PanelJudge, PanelSpec, PanelVerdict, Persona, PersonaContext, PersonaExecutors, Pipeline, PipelineStage, PromotionGateOptions, PromotionVerdict, RenderCorpusToInstructions, RenderCorpusToInstructionsOptions, ReservationTicket, RouterChatResult, RouterChatToolsResult, RouterConfig, RouterSeam, RouterToolCall, RouterToolLoopResult, RouterToolsSeam, RunAgenticOptions, RunPersonified, RunPersonifiedOptions, SandboxCapabilities, SandboxLineage, SandboxLineageHandle, SandboxRun, SandboxSeam, ScopeAnalyst, ScopeAnalyzeInput, ScopeWidenGate, SessionCapableBox, ShapeBudget, ShapeContext, ShapeRegistry, Shell, ShotPersona, ShotSpec, SteerContext, Strategy, StrategyCtx, StrategyEvolutionConfig, StrategyResult, SurfaceScore, ToolSpec, TrajectoryNode, TrajectoryReport, TrajectoryReportFn, TrajectoryReportOptions, TurnResult, UsageSink, VerifierEnvironmentOptions, Verify, VerifySpec, WaterfallCollector, WaterfallReport, WaterfallSpan, Widen, WidenDecision, WidenLineage, WidenSpec, Workspace, WorkspaceCommit, acquireSandbox, adaptiveRefine, anytimeReport, assertStrategyContract, assertTraceDerivedFindings, auditIntent, authorStrategy, breadthDriver, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createMcpEnvironment, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, createVerifierEnvironment, createWaterfallCollector, defaultAnalystInstruction, defaultAuditorInstruction, definePersona, defineStrategy, depthDriver, discriminatingMeans, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, harvestCorpus, inlineSandboxClient, jjWorkspace, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pickChampion, pipeline, printBenchmarkReport, probeSandboxCapabilities, promotionGate, refine, registerShape, renderAnytimeTable, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, routerChatWithTools, routerChatWithUsage, routerToolLoop, runAgentic, runBenchmark, runPersonified, runStrategyEvolution, sample, sampleThenRefine, selectChampion, settledToIteration, spendFromUsageEvents, strategyAuthorContract, trajectoryReport, verify, widen } from './runtime.js';
|
|
2
|
+
export { AgenticOptions, AgenticRunResult, AgenticSurface, AgenticTask, AgenticTool, AnytimeReport, AnytimeStrategySummary, AnytimeTaskCurve, ArtifactHandle, AssertTraceDerivedFindings, AuditIntentInput, AuditIntentOptions, AuthorStrategyOptions, AuthoredStrategy, BenchmarkCell, BenchmarkConfig, BenchmarkLift, BenchmarkReport, BenchmarkStrategySummary, BenchmarkTaskRow, BridgeSeam, BudgetPool, BudgetReadout, ChampionPick, ChampionPolicy, CheckpointCapableBox, CliSeam, CombinatorShape, Corpus, CorpusFilter, CorpusRecord, CreateScopeAnalystOptions, CriuCapableClient, DefinePersona, DefinePersonaInput, Deliverable, Environment, EqualKArm, EqualKOnCost, EqualKOnCostOptions, EqualKVerdict, EvolutionArchiveNode, EvolutionAuthor, EvolutionBandInfo, EvolutionCandidate, EvolutionGeneration, EvolutionReport, ExecutorConfig, Fanout, FanoutOptions, FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, FlatWidenGate, ForkCapableBox, GitWorkspaceOptions, HarvestCorpusOptions, HarvestFailure, HarvestReport, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, IntentAudit, LoopDispatchOptions, LoopOptionsForDispatch, LoopShape, LoopUntil, LoopUntilSpec, LoopUntilState, McpEndpoint, McpEnvironmentOptions, Observation, ObserveInput, ObserveOptions, OpenSandboxRunOptions, Outcome, Panel, PanelJudge, PanelSpec, PanelVerdict, Persona, PersonaContext, PersonaExecutors, Pipeline, PipelineStage, PromotionGateOptions, PromotionVerdict, RegistryAnalyzeProjection, RenderCorpusToInstructions, RenderCorpusToInstructionsOptions, ReservationTicket, RouterChatResult, RouterChatToolsResult, RouterConfig, RouterSeam, RouterToolCall, RouterToolLoopResult, RouterToolsSeam, RunAgenticOptions, RunPersonified, RunPersonifiedOptions, SandboxCapabilities, SandboxLineage, SandboxLineageHandle, SandboxRun, SandboxSeam, ScopeAnalyst, ScopeAnalyzeInput, ScopeWidenGate, SessionCapableBox, ShapeBudget, ShapeContext, ShapeRegistry, Shell, ShotPersona, ShotSpec, SteerContext, Strategy, StrategyCtx, StrategyEvolutionConfig, StrategyResult, SurfaceScore, ToolSpec, TrajectoryNode, TrajectoryReport, TrajectoryReportFn, TrajectoryReportOptions, TurnResult, UsageSink, VerifierEnvironmentOptions, Verify, VerifySpec, WaterfallCollector, WaterfallReport, WaterfallSpan, Widen, WidenDecision, WidenLineage, WidenSpec, Workspace, WorkspaceCommit, acquireSandbox, adaptiveRefine, anytimeReport, assertStrategyContract, assertTraceDerivedFindings, auditIntent, authorStrategy, breadthDriver, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createMcpEnvironment, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, createVerifierEnvironment, createWaterfallCollector, defaultAnalystInstruction, defaultAuditorInstruction, definePersona, defineStrategy, depthDriver, discriminatingMeans, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, harvestCorpus, inlineSandboxClient, jjWorkspace, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pickChampion, pipeline, printBenchmarkReport, probeSandboxCapabilities, promotionGate, refine, registerShape, registryScopeAnalyst, renderAnytimeTable, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, routerChatWithTools, routerChatWithUsage, routerToolLoop, runAgentic, runBenchmark, runPersonified, runStrategyEvolution, sample, sampleThenRefine, selectChampion, settledToIteration, spendFromUsageEvents, strategyAuthorContract, trajectoryReport, verify, widen } from './runtime.js';
|
|
3
3
|
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-DLI1io57.js';
|
|
4
4
|
export { R as RunLoopOptions, c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-BIineL1T.js';
|
|
5
5
|
export { A as Agent, d as AgentSpec, B as Budget, j as Executor, k as ExecutorContext, h as ExecutorFactory, E as ExecutorRegistry, l as ExecutorResult, H as Handle, N as NodeId, m as NodeSnapshot, n as NodeStatus, o as Restart, R as ResultBlobStore, e as RootHandle, p as RootSignal, q as Runtime, S as Scope, c as Settled, b as SpawnEvent, a as SpawnJournal, r as SpawnOpts, g as Spend, f as SupervisedResult, i as Supervisor, s as SupervisorOpts, T as TreeView, U as UsageEvent, W as WidenGate } from './types-5MGt5KTY.js';
|
|
@@ -7,3 +7,4 @@ export { A as AgentRunSpec, D as Driver, E as ExecCtx, I as Iteration, F as Loop
|
|
|
7
7
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
8
8
|
import './runtime-hooks-C7JwKb9E.js';
|
|
9
9
|
import '@tangle-network/agent-eval/campaign';
|
|
10
|
+
import './types-p8dWBIXL.js';
|
package/dist/loops.js
CHANGED
|
@@ -60,6 +60,7 @@ import {
|
|
|
60
60
|
promotionGate,
|
|
61
61
|
refine,
|
|
62
62
|
registerShape,
|
|
63
|
+
registryScopeAnalyst,
|
|
63
64
|
renderAnalyses,
|
|
64
65
|
renderAnytimeTable,
|
|
65
66
|
renderCorpusToInstructions,
|
|
@@ -85,7 +86,7 @@ import {
|
|
|
85
86
|
trajectoryReport,
|
|
86
87
|
verify,
|
|
87
88
|
widen
|
|
88
|
-
} from "./chunk-
|
|
89
|
+
} from "./chunk-2OU7ZQPD.js";
|
|
89
90
|
import {
|
|
90
91
|
extractLlmCallEvent,
|
|
91
92
|
mapSandboxEvent
|
|
@@ -155,6 +156,7 @@ export {
|
|
|
155
156
|
promotionGate,
|
|
156
157
|
refine,
|
|
157
158
|
registerShape,
|
|
159
|
+
registryScopeAnalyst,
|
|
158
160
|
renderAnalyses,
|
|
159
161
|
renderAnytimeTable,
|
|
160
162
|
renderCorpusToInstructions,
|
package/dist/mcp/bin.js
CHANGED
|
@@ -4,12 +4,12 @@ import {
|
|
|
4
4
|
createPropagatingTraceEmitter,
|
|
5
5
|
detectExecutor,
|
|
6
6
|
readTraceContextFromEnv
|
|
7
|
-
} from "../chunk-
|
|
7
|
+
} from "../chunk-7SP2OVYZ.js";
|
|
8
8
|
import "../chunk-WIR4HOOJ.js";
|
|
9
9
|
import {
|
|
10
10
|
DelegationTaskQueue,
|
|
11
11
|
FileDelegationStore
|
|
12
|
-
} from "../chunk-
|
|
12
|
+
} from "../chunk-BERLUBAP.js";
|
|
13
13
|
import {
|
|
14
14
|
coderTaskFromArgs,
|
|
15
15
|
composeLoopTraceEmitters,
|
|
@@ -20,7 +20,7 @@ import {
|
|
|
20
20
|
parseDetachedSessionRef,
|
|
21
21
|
runDetachedTurn,
|
|
22
22
|
settleDetachedCoderTurn
|
|
23
|
-
} from "../chunk-
|
|
23
|
+
} from "../chunk-4JI4BCBI.js";
|
|
24
24
|
import {
|
|
25
25
|
coderProfile
|
|
26
26
|
} from "../chunk-KADIJAD4.js";
|
|
@@ -28,7 +28,7 @@ import "../chunk-GLR25NG7.js";
|
|
|
28
28
|
import "../chunk-G3RGMA7C.js";
|
|
29
29
|
import {
|
|
30
30
|
runLoop
|
|
31
|
-
} from "../chunk-
|
|
31
|
+
} from "../chunk-2OU7ZQPD.js";
|
|
32
32
|
import "../chunk-GSUO5QS6.js";
|
|
33
33
|
import "../chunk-DGUM43GV.js";
|
|
34
34
|
|
package/dist/mcp/index.js
CHANGED
|
@@ -14,12 +14,12 @@ import {
|
|
|
14
14
|
removeWorktree,
|
|
15
15
|
traceContextToEnv,
|
|
16
16
|
validateDelegateUiAuditArgs
|
|
17
|
-
} from "../chunk-
|
|
17
|
+
} from "../chunk-7SP2OVYZ.js";
|
|
18
18
|
import "../chunk-WIR4HOOJ.js";
|
|
19
19
|
import {
|
|
20
20
|
mcpToolsForRuntimeMcp,
|
|
21
21
|
mcpToolsForRuntimeMcpSubset
|
|
22
|
-
} from "../chunk-
|
|
22
|
+
} from "../chunk-V2K35HF2.js";
|
|
23
23
|
import {
|
|
24
24
|
DELEGATE_CODE_DESCRIPTION,
|
|
25
25
|
DELEGATE_CODE_INPUT_SCHEMA,
|
|
@@ -54,7 +54,7 @@ import {
|
|
|
54
54
|
validateDelegateResearchArgs,
|
|
55
55
|
validateDelegationHistoryArgs,
|
|
56
56
|
validateDelegationStatusArgs
|
|
57
|
-
} from "../chunk-
|
|
57
|
+
} from "../chunk-BERLUBAP.js";
|
|
58
58
|
import {
|
|
59
59
|
createKbGate
|
|
60
60
|
} from "../chunk-FNMGYYSS.js";
|
|
@@ -75,7 +75,7 @@ import {
|
|
|
75
75
|
parseDetachedSessionRef,
|
|
76
76
|
runDetachedTurn,
|
|
77
77
|
settleDetachedCoderTurn
|
|
78
|
-
} from "../chunk-
|
|
78
|
+
} from "../chunk-4JI4BCBI.js";
|
|
79
79
|
import "../chunk-KADIJAD4.js";
|
|
80
80
|
import {
|
|
81
81
|
runLocalHarness
|
|
@@ -88,7 +88,7 @@ import {
|
|
|
88
88
|
import "../chunk-G3RGMA7C.js";
|
|
89
89
|
import {
|
|
90
90
|
assertTraceDerivedFindings
|
|
91
|
-
} from "../chunk-
|
|
91
|
+
} from "../chunk-2OU7ZQPD.js";
|
|
92
92
|
import "../chunk-GSUO5QS6.js";
|
|
93
93
|
import "../chunk-DGUM43GV.js";
|
|
94
94
|
|
package/dist/runtime.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } fro
|
|
|
3
3
|
import { R as ResultBlobStore, a as SpawnJournal, N as NodeId, b as SpawnEvent, T as TreeView, c as Settled, d as AgentSpec, E as ExecutorRegistry, B as Budget, A as Agent, e as RootHandle, f as SupervisedResult, g as Spend, S as Scope, h as ExecutorFactory, U as UsageEvent, i as Supervisor } from './types-5MGt5KTY.js';
|
|
4
4
|
export { j as Executor, k as ExecutorContext, l as ExecutorResult, H as Handle, m as NodeSnapshot, n as NodeStatus, o as Restart, p as RootSignal, q as Runtime, r as SpawnOpts, s as SupervisorOpts, W as WidenGate } from './types-5MGt5KTY.js';
|
|
5
5
|
import { R as RuntimeHooks } from './runtime-hooks-C7JwKb9E.js';
|
|
6
|
-
import { ChatClient, AnalystFinding, DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
|
|
6
|
+
import { ChatClient, AnalystFinding, DefaultVerdict, AgentProfile as AgentProfile$1, AnalystRunInputs } from '@tangle-network/agent-eval';
|
|
7
7
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
8
8
|
export { A as AnalyzeInput, a as CompletionAnalyst, b as CompletionEvidence, c as CompletionPolicy, d as CompletionVerdict, C as CreateDriverOptions, D as DriverDecision, P as PlannerContext, e as TopologyMove, T as TopologyPlanner, f as completionAuthorizes, g as createDriver, h as deterministicCompletion, r as renderAnalyses, s as sentinelCompletion, i as stopSentinel } from './driver-DLI1io57.js';
|
|
9
9
|
import { S as SandboxClient, b as LoopResult, d as LoopTokenUsage, R as RuntimeStreamEvent, A as AgentRunSpec, E as ExecCtx, I as Iteration } from './types-BEQsBhOE.js';
|
|
@@ -11,6 +11,7 @@ export { D as Driver, F as LoopDecisionPayload, G as LoopEndedPayload, H as Loop
|
|
|
11
11
|
import { Scenario, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
12
12
|
import { R as RunLoopOptions } from './run-loop-BIineL1T.js';
|
|
13
13
|
export { c as createSandboxForSpec, d as defaultSelectWinner, r as runLoop } from './run-loop-BIineL1T.js';
|
|
14
|
+
import { b as AnalystRegistryLike } from './types-p8dWBIXL.js';
|
|
14
15
|
|
|
15
16
|
/**
|
|
16
17
|
* @experimental
|
|
@@ -464,6 +465,9 @@ interface ShapeContext<D = unknown> {
|
|
|
464
465
|
/** Derive a child `AgentSpec` from the persona's root spec with an overridden profile —
|
|
465
466
|
* the seam a shape uses to give a worker a narrower role/prompt than the root persona. */
|
|
466
467
|
childSpec(profile: AgentProfile, harness?: BackendType | null): AgentSpec;
|
|
468
|
+
/** The scope analyst (selector≠judge firewall) the combinator steers from. Absent ⇒ the
|
|
469
|
+
* dormant default (empty findings → gates read deliverables/state only). */
|
|
470
|
+
readonly analyst?: ScopeAnalyst<D>;
|
|
467
471
|
}
|
|
468
472
|
/**
|
|
469
473
|
* A reusable act-body factory. Given the persona's content + seams (`ShapeContext`), it
|
|
@@ -521,6 +525,9 @@ interface RunPersonifiedOptions<Task, D> {
|
|
|
521
525
|
readonly handle?: RootHandle<Outcome<D>>;
|
|
522
526
|
readonly now?: () => number;
|
|
523
527
|
readonly signal?: AbortSignal;
|
|
528
|
+
/** Optional scope analyst threaded into the shape's ShapeContext so loopUntil/widen steer
|
|
529
|
+
* on trace-derived findings instead of the dormant empty default. */
|
|
530
|
+
readonly analyst?: ScopeAnalyst<D>;
|
|
524
531
|
}
|
|
525
532
|
/** The composed run signature. */
|
|
526
533
|
type RunPersonified = <Task, D>(options: RunPersonifiedOptions<Task, D>) => Promise<SupervisedResult<Outcome<D>>>;
|
|
@@ -1635,6 +1642,32 @@ interface CreateScopeAnalystOptions<D> {
|
|
|
1635
1642
|
* - any finding cites judge-derived metric evidence → `PlannerError` via the firewall
|
|
1636
1643
|
*/
|
|
1637
1644
|
declare function createScopeAnalyst<D>(scope: Scope<Outcome<D>>, options: CreateScopeAnalystOptions<D>): ScopeAnalyst<D>;
|
|
1645
|
+
/**
|
|
1646
|
+
* Project a `ScopeAnalyzeInput` into the `AnalystRegistry.run` arguments. The registry runs over a
|
|
1647
|
+
* `runId` + `AnalystRunInputs` (a trace store / run record / artifact dir), NOT in-memory scope
|
|
1648
|
+
* settlements — so the CALLER owns the projection from the combinator's drained children to the
|
|
1649
|
+
* registry's inputs (e.g. the trace store the run already wrote). This adapter never invents that
|
|
1650
|
+
* bridge; it only runs the projected inputs and firewalls the merged findings.
|
|
1651
|
+
*/
|
|
1652
|
+
interface RegistryAnalyzeProjection {
|
|
1653
|
+
readonly runId: string;
|
|
1654
|
+
readonly inputs: AnalystRunInputs;
|
|
1655
|
+
/** Optional `run` opts (e.g. `priorFindings`) forwarded verbatim to the registry. */
|
|
1656
|
+
readonly opts?: Parameters<AnalystRegistryLike['run']>[2];
|
|
1657
|
+
}
|
|
1658
|
+
/**
|
|
1659
|
+
* A `ScopeAnalyst` backed by an `AnalystRegistry` — the panel-of-analysts seam. The registry merges
|
|
1660
|
+
* N analyst KINDS into one `AnalystRunResult.findings`; `analyze` runs it over the caller-projected
|
|
1661
|
+
* `{ runId, inputs }` and pipes the merged findings through the SAME `assertTraceDerivedFindings`
|
|
1662
|
+
* firewall `createScopeAnalyst` uses (single-sourced selector≠judge). Distinct from `panel()`
|
|
1663
|
+
* (judges-vs-one-artifact) — this is analysts-over-a-trace, the diagnosis side of the wire.
|
|
1664
|
+
*
|
|
1665
|
+
* Fail loud: a registry that throws propagates; a judge-derived finding aborts via the firewall.
|
|
1666
|
+
* The projection is the caller's (`buildInputs`) — if the scope settlements do not cleanly map to
|
|
1667
|
+
* the registry's `AnalystRunInputs`, that is a caller-side contract gap, surfaced there, not papered
|
|
1668
|
+
* over with a fabricated input here.
|
|
1669
|
+
*/
|
|
1670
|
+
declare function registryScopeAnalyst<D>(registry: AnalystRegistryLike, buildInputs: (input: ScopeAnalyzeInput<D>) => RegistryAnalyzeProjection): ScopeAnalyst<D>;
|
|
1638
1671
|
/**
|
|
1639
1672
|
* Build the `SteerContext` a combinator reads to steer (its `loopUntil.until`, `widen` gate, any
|
|
1640
1673
|
* future steer). One place enforces the firewall: `findings` is asserted trace-derived before it is
|
|
@@ -1693,9 +1726,10 @@ declare function fanout<Task, Item, D>(items: ReadonlyArray<Item>, opts: FanoutO
|
|
|
1693
1726
|
* the deployable stop. The conserved pool IS the loop bound: once `spawn` fails closed the loop
|
|
1694
1727
|
* stops. A loop that exhausted the pool without `until` ever satisfying is a concrete blocker.
|
|
1695
1728
|
*
|
|
1696
|
-
*
|
|
1697
|
-
*
|
|
1698
|
-
*
|
|
1729
|
+
* When `ctx.analyst` is set, each round runs it over the children settled so far and steers
|
|
1730
|
+
* `until` on the resulting trace-derived findings (the analyst spawns into THIS scope, so its
|
|
1731
|
+
* compute is conserved-pooled — equal-k holds by construction). Absent an analyst the findings
|
|
1732
|
+
* argument is the empty array — never a fabricated finding (fail-loud honesty over a silent default).
|
|
1699
1733
|
*/
|
|
1700
1734
|
declare function loopUntil<Task, State, D>(seed: State, spec: LoopUntilSpec<Task, State, D>): CombinatorShape<Task, D>;
|
|
1701
1735
|
/**
|
|
@@ -1721,9 +1755,14 @@ declare function verify<Task, Candidate, D>(spec: VerifySpec<Task, Candidate, D>
|
|
|
1721
1755
|
* never a child's raw `verdict` — and the default gate (`flatWidenGate`) never widens, so the R2
|
|
1722
1756
|
* firewall stays dormant. Terminal selection is `spec.synthesize` over every settled lineage.
|
|
1723
1757
|
*
|
|
1724
|
-
*
|
|
1725
|
-
*
|
|
1726
|
-
*
|
|
1758
|
+
* When `ctx.analyst` is set, `decide` is consulted with that round's trace-derived findings;
|
|
1759
|
+
* absent an analyst the findings argument is the empty array a flat gate ignores. The analyst
|
|
1760
|
+
* spawns into THIS scope (conserved-pooled, so equal-k holds). Streaming caveat: a wired analyst
|
|
1761
|
+
* drains its own child off the SHARED cursor by id-match, so on a NON-flat gate (which spawns
|
|
1762
|
+
* widen children that are live concurrently) the analyst can consume a sibling's settlement before
|
|
1763
|
+
* the widen loop sees it. The shipped default (`flatWidenGate`) never widens, so no widen child is
|
|
1764
|
+
* ever live when the analyst runs and the wire is exact; a non-flat gate must drive the analyst on
|
|
1765
|
+
* a scope whose siblings are quiesced, or read findings without the shared-cursor drain.
|
|
1727
1766
|
*/
|
|
1728
1767
|
declare function widen<Task, Seed, D>(spec: WidenSpec<Seed, D>): CombinatorShape<Task, D>;
|
|
1729
1768
|
/**
|
|
@@ -3184,4 +3223,4 @@ declare function gitWorkspace(opts: GitWorkspaceOptions): Workspace;
|
|
|
3184
3223
|
* requires `jj` on the `Shell`'s host. */
|
|
3185
3224
|
declare function jjWorkspace(opts: GitWorkspaceOptions): Workspace;
|
|
3186
3225
|
|
|
3187
|
-
export { Agent, AgentRunSpec, AgentSpec, type AgenticOptions, type AgenticRunResult, type AgenticSurface, type AgenticTask, type AgenticTool, type AnytimeReport, type AnytimeStrategySummary, type AnytimeTaskCurve, type ArtifactHandle, type AssertTraceDerivedFindings, type AuditIntentInput, type AuditIntentOptions, type AuthorStrategyOptions, type AuthoredStrategy, type BenchmarkCell, type BenchmarkConfig, type BenchmarkLift, type BenchmarkReport, type BenchmarkStrategySummary, type BenchmarkTaskRow, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type ChampionPick, type ChampionPolicy, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type Environment, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, type EvolutionArchiveNode, type EvolutionAuthor, type EvolutionBandInfo, type EvolutionCandidate, type EvolutionGeneration, type EvolutionReport, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, type GitWorkspaceOptions, type HarvestCorpusOptions, type HarvestFailure, type HarvestReport, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, type IntentAudit, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, type McpEndpoint, type McpEnvironmentOptions, NodeId, type Observation, type ObserveInput, type ObserveOptions, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type PromotionGateOptions, type PromotionVerdict, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterChatResult, type RouterChatToolsResult, type RouterConfig, type RouterSeam, type RouterToolCall, type RouterToolLoopResult, type RouterToolsSeam, type RunAgenticOptions, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, type Shell, type ShotPersona, type ShotSpec, SpawnEvent, SpawnJournal, Spend, type SteerContext, type Strategy, type StrategyCtx, type StrategyEvolutionConfig, type StrategyResult, SupervisedResult, Supervisor, type SurfaceScore, type ToolSpec, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type VerifierEnvironmentOptions, type Verify, type VerifySpec, type WaterfallCollector, type WaterfallReport, type WaterfallSpan, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, type Workspace, type WorkspaceCommit, acquireSandbox, adaptiveRefine, anytimeReport, assertStrategyContract, assertTraceDerivedFindings, auditIntent, authorStrategy, breadthDriver, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createMcpEnvironment, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, createVerifierEnvironment, createWaterfallCollector, defaultAnalystInstruction, defaultAuditorInstruction, definePersona, defineStrategy, depthDriver, discriminatingMeans, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, harvestCorpus, inlineSandboxClient, jjWorkspace, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pickChampion, pipeline, printBenchmarkReport, probeSandboxCapabilities, promotionGate, refine, registerShape, renderAnytimeTable, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, routerChatWithTools, routerChatWithUsage, routerToolLoop, runAgentic, runBenchmark, runPersonified, runStrategyEvolution, sample, sampleThenRefine, selectChampion, settledToIteration, spendFromUsageEvents, strategyAuthorContract, trajectoryReport, verify, widen };
|
|
3226
|
+
export { Agent, AgentRunSpec, AgentSpec, type AgenticOptions, type AgenticRunResult, type AgenticSurface, type AgenticTask, type AgenticTool, type AnytimeReport, type AnytimeStrategySummary, type AnytimeTaskCurve, type ArtifactHandle, type AssertTraceDerivedFindings, type AuditIntentInput, type AuditIntentOptions, type AuthorStrategyOptions, type AuthoredStrategy, type BenchmarkCell, type BenchmarkConfig, type BenchmarkLift, type BenchmarkReport, type BenchmarkStrategySummary, type BenchmarkTaskRow, type BridgeSeam, Budget, type BudgetPool, type BudgetReadout, type ChampionPick, type ChampionPolicy, type CheckpointCapableBox, type CliSeam, type CombinatorShape, type Corpus, type CorpusFilter, type CorpusRecord, type CreateScopeAnalystOptions, type CriuCapableClient, type DefinePersona, type DefinePersonaInput, type Deliverable, type Environment, type EqualKArm, type EqualKOnCost, type EqualKOnCostOptions, type EqualKVerdict, type EvolutionArchiveNode, type EvolutionAuthor, type EvolutionBandInfo, type EvolutionCandidate, type EvolutionGeneration, type EvolutionReport, ExecCtx, type ExecutorConfig, ExecutorFactory, ExecutorRegistry, type Fanout, type FanoutOptions, type FanoutSynthesis, FileCorpus, FileResultBlobStore, FileSpawnJournal, type FlatWidenGate, type ForkCapableBox, type GitWorkspaceOptions, type HarvestCorpusOptions, type HarvestFailure, type HarvestReport, InMemoryCorpus, InMemoryResultBlobStore, InMemorySpawnJournal, type IntentAudit, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, type LoopShape, LoopTokenUsage, type LoopUntil, type LoopUntilSpec, type LoopUntilState, type McpEndpoint, type McpEnvironmentOptions, NodeId, type Observation, type ObserveInput, type ObserveOptions, type OpenSandboxRunOptions, type Outcome, type Panel, type PanelJudge, type PanelSpec, type PanelVerdict, type Persona, type PersonaContext, type PersonaExecutors, type Pipeline, type PipelineStage, type PromotionGateOptions, type PromotionVerdict, type RegistryAnalyzeProjection, type RenderCorpusToInstructions, type RenderCorpusToInstructionsOptions, type ReservationTicket, ResultBlobStore, RootHandle, type RouterChatResult, type RouterChatToolsResult, type RouterConfig, type RouterSeam, type RouterToolCall, type RouterToolLoopResult, type RouterToolsSeam, type RunAgenticOptions, RunLoopOptions, type RunPersonified, type RunPersonifiedOptions, type SandboxCapabilities, SandboxClient, type SandboxLineage, type SandboxLineageHandle, type SandboxRun, type SandboxSeam, Scope, type ScopeAnalyst, type ScopeAnalyzeInput, type ScopeWidenGate, type SessionCapableBox, Settled, type ShapeBudget, type ShapeContext, type ShapeRegistry, type Shell, type ShotPersona, type ShotSpec, SpawnEvent, SpawnJournal, Spend, type SteerContext, type Strategy, type StrategyCtx, type StrategyEvolutionConfig, type StrategyResult, SupervisedResult, Supervisor, type SurfaceScore, type ToolSpec, type TrajectoryNode, type TrajectoryReport, type TrajectoryReportFn, type TrajectoryReportOptions, TreeView, type TurnResult, UsageEvent, type UsageSink, type VerifierEnvironmentOptions, type Verify, type VerifySpec, type WaterfallCollector, type WaterfallReport, type WaterfallSpan, type Widen, type WidenDecision, type WidenLineage, type WidenSpec, type Workspace, type WorkspaceCommit, acquireSandbox, adaptiveRefine, anytimeReport, assertStrategyContract, assertTraceDerivedFindings, auditIntent, authorStrategy, breadthDriver, buildSteerContext, builtinShapes, contentAddress, createBudgetPool, createExecutor, createExecutorRegistry, createMcpEnvironment, createRootHandle, createSandboxLineage, createScope, createScopeAnalyst, createShapeRegistry, createSupervisor, createVerifierEnvironment, createWaterfallCollector, defaultAnalystInstruction, defaultAuditorInstruction, definePersona, defineStrategy, depthDriver, discriminatingMeans, equalKOnCost, extractLlmCallEvent, fanout, flatWidenGate, gitWorkspace, harvestCorpus, inlineSandboxClient, jjWorkspace, localShell, loopDispatch, loopUntil, mapSandboxEvent, materializeTreeView, observe, openSandboxRun, panel, pickChampion, pipeline, printBenchmarkReport, probeSandboxCapabilities, promotionGate, refine, registerShape, registryScopeAnalyst, renderAnytimeTable, renderCorpusToInstructions, renderReport, replaySpawnTree, reportLoopUsage, routerChatWithTools, routerChatWithUsage, routerToolLoop, runAgentic, runBenchmark, runPersonified, runStrategyEvolution, sample, sampleThenRefine, selectChampion, settledToIteration, spendFromUsageEvents, strategyAuthorContract, trajectoryReport, verify, widen };
|
package/dist/runtime.js
CHANGED
|
@@ -60,6 +60,7 @@ import {
|
|
|
60
60
|
promotionGate,
|
|
61
61
|
refine,
|
|
62
62
|
registerShape,
|
|
63
|
+
registryScopeAnalyst,
|
|
63
64
|
renderAnalyses,
|
|
64
65
|
renderAnytimeTable,
|
|
65
66
|
renderCorpusToInstructions,
|
|
@@ -85,7 +86,7 @@ import {
|
|
|
85
86
|
trajectoryReport,
|
|
86
87
|
verify,
|
|
87
88
|
widen
|
|
88
|
-
} from "./chunk-
|
|
89
|
+
} from "./chunk-2OU7ZQPD.js";
|
|
89
90
|
import {
|
|
90
91
|
extractLlmCallEvent,
|
|
91
92
|
mapSandboxEvent
|
|
@@ -155,6 +156,7 @@ export {
|
|
|
155
156
|
promotionGate,
|
|
156
157
|
refine,
|
|
157
158
|
registerShape,
|
|
159
|
+
registryScopeAnalyst,
|
|
158
160
|
renderAnalyses,
|
|
159
161
|
renderAnytimeTable,
|
|
160
162
|
renderCorpusToInstructions,
|
package/dist/workflow.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tangle-network/agent-runtime",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.52.0",
|
|
4
4
|
"description": "Shared task-lifecycle skeleton for agents: a recursive loop kernel for chat turns, one-shot tasks, and multi-attempt loops, with trace capture and eval-gated self-improvement. Domain behavior lives in adapters; scoring and ship-gates in @tangle-network/agent-eval.",
|
|
5
5
|
"homepage": "https://github.com/tangle-network/agent-runtime#readme",
|
|
6
6
|
"repository": {
|
|
@@ -35,14 +35,24 @@ A `Driver<Task, Output, Decision>` is just `plan(task, history) → Task[]`
|
|
|
35
35
|
(`[task]`→refine, N copies→fanout, `[]`→stop) + `decide(history) → Decision`.
|
|
36
36
|
Topology is data; the kernel is topology-agnostic.
|
|
37
37
|
|
|
38
|
-
###
|
|
39
|
-
|
|
40
|
-
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
38
|
+
### Topology drivers — `@tangle-network/agent-runtime/loops`
|
|
39
|
+
|
|
40
|
+
> **Stale-name correction (gen-6 consolidation, #165):** the standalone
|
|
41
|
+
> `createRefineDriver` / `createFanoutVoteDriver` factories were **removed** —
|
|
42
|
+
> refine/fanout collapsed into the one recursive agent tree. Canonical today:
|
|
43
|
+
> the personify combinators `loopUntil`(depth/refine) / `fanout`(breadth/vote)
|
|
44
|
+
> and the `Strategy` values `refine` / `sample`, plus `createDriver` for an
|
|
45
|
+
> agent-authored topology. Verify names in `src/runtime/index.ts`; see
|
|
46
|
+
> `build-with-agent-runtime` + `docs/canonical-api.md` §3.1/§3.3 for the live
|
|
47
|
+
> signatures. Likewise `createSandboxPlanner` is gone — pass a `TopologyPlanner`
|
|
48
|
+
> to `createDriver({ planner })` directly.
|
|
49
|
+
|
|
50
|
+
- **`refine` / `loopUntil`** — one attempt/round, validator-gated; iterate over
|
|
51
|
+
one evolving artifact until valid or budget-capped. Use for incremental
|
|
52
|
+
patches, document revision, anything monotonic. (Replaces `createRefineDriver`.)
|
|
53
|
+
- **`sample` / `fanout`** — N attempts at equal budget, score once, pick the
|
|
54
|
+
winner via the single-sourced selector. Use for multi-harness coder fanout,
|
|
55
|
+
redundant research with disagreement detection. (Replaces `createFanoutVoteDriver`.)
|
|
46
56
|
- **`createDriver({ planner, maxIterations?, maxFanout? })`** — **the
|
|
47
57
|
agent authors the topology.** `plan`/`decide` are backed by an injected
|
|
48
58
|
`TopologyPlanner` that emits one `TopologyMove` per round
|
|
@@ -56,16 +66,18 @@ round-robins `agentRuns[]` to decide which harness (claude-code / codex /
|
|
|
56
66
|
opencode / pi) runs each branch. One driver spans all backends, including
|
|
57
67
|
fanning a single round across several.
|
|
58
68
|
|
|
59
|
-
### Wiring an LLM planner — `
|
|
69
|
+
### Wiring an LLM planner — inject a `TopologyPlanner`
|
|
70
|
+
|
|
71
|
+
`createDriver({ planner })` takes an injected `TopologyPlanner` (the standalone
|
|
72
|
+
`createSandboxPlanner` factory was removed in the gen-6 consolidation — verify
|
|
73
|
+
the live shape in `src/runtime/driver.ts` / `src/runtime/index.ts`). The planner
|
|
74
|
+
is the brain (it may call any harness/LLM to author the move); the driver maps
|
|
75
|
+
each `TopologyMove` onto kernel structure.
|
|
60
76
|
|
|
61
77
|
```ts
|
|
62
|
-
import { createDriver,
|
|
78
|
+
import { createDriver, runLoop, type TopologyPlanner } from '@tangle-network/agent-runtime/loops'
|
|
63
79
|
|
|
64
|
-
const planner
|
|
65
|
-
client, profile: plannerProfile, // any harness; cheap model is fine
|
|
66
|
-
decodeTask: (raw) => raw as Task, // envelope task → domain Task
|
|
67
|
-
// buildPrompt? — defaults to a history-summary prompt; override to customize
|
|
68
|
-
})
|
|
80
|
+
const planner: TopologyPlanner<Task, Out> = {/* plan() → one {kind:'refine'|'fanout'|'stop',…} per round */}
|
|
69
81
|
const result = await runLoop({
|
|
70
82
|
driver: createDriver({ planner, maxIterations: 8 }),
|
|
71
83
|
agentRuns: workerSpecs, output, validator, task, ctx: { sandboxClient: client },
|
|
@@ -88,23 +100,25 @@ a topology nobody chose.
|
|
|
88
100
|
- Dynamic driver: set the kernel's `runLoop` `maxIterations >=` the driver's so
|
|
89
101
|
the driver's cap governs and the loop closes on a clean `'done'`.
|
|
90
102
|
|
|
91
|
-
## Campaign bridge — `loopDispatch`
|
|
103
|
+
## Campaign bridge — `loopDispatch`
|
|
92
104
|
|
|
93
105
|
To run `runLoop` as an agent-eval campaign cell, do NOT hand-build the ExecCtx +
|
|
94
106
|
forward trace + report usage every time (the third is silent — forgetting it
|
|
95
|
-
yields a `{0,0}` cell `assertRealBackend` reads as a stub). Use the
|
|
107
|
+
yields a `{0,0}` cell `assertRealBackend` reads as a stub). Use the one bridge,
|
|
108
|
+
`loopDispatch` (the old `loopCampaignDispatch` name was consolidated away; verify
|
|
109
|
+
in `src/runtime/index.ts`):
|
|
96
110
|
|
|
97
111
|
```ts
|
|
98
|
-
import {
|
|
99
|
-
const dispatch =
|
|
112
|
+
import { loopDispatch } from '@tangle-network/agent-runtime/loops'
|
|
113
|
+
const dispatch = loopDispatch({
|
|
100
114
|
sandboxClient,
|
|
101
|
-
toLoopOptions: (scenario) => ({ driver, agentRun, output, validator, task: toTask(scenario) }),
|
|
115
|
+
toLoopOptions: (scenario, profile) => ({ driver, agentRun, output, validator, task: toTask(scenario) }),
|
|
102
116
|
// toArtifact? — defaults to result.winner?.output
|
|
103
117
|
})
|
|
104
118
|
// pass `dispatch` to runCampaign / runEvalCampaign; usage + trace are auto-forwarded
|
|
105
119
|
```
|
|
106
120
|
|
|
107
|
-
`loopDispatch`
|
|
121
|
+
`loopDispatch` doubles as the `runProfileMatrix` variant (the `profile` arg is an axis).
|
|
108
122
|
|
|
109
123
|
## Identity-gated optimization — agent-eval's `selfImprove`
|
|
110
124
|
|
|
@@ -159,11 +173,12 @@ Mount it on a production `AgentProfile.mcp`; do not re-implement delegation.
|
|
|
159
173
|
|
|
160
174
|
## Acceptance checklist
|
|
161
175
|
|
|
162
|
-
- [ ] Topology is a `Driver
|
|
163
|
-
fanout
|
|
164
|
-
`loops/types.ts:Driver` only when none fit —
|
|
165
|
-
|
|
166
|
-
|
|
176
|
+
- [ ] Topology is a `Driver`/combinator, not hard-coded control flow. Reuse
|
|
177
|
+
`refine`/`loopUntil`, `sample`/`fanout`, or the agent-authored `createDriver`;
|
|
178
|
+
build a custom `Driver` against `loops/types.ts:Driver` only when none fit —
|
|
179
|
+
never fork the kernel.
|
|
180
|
+
- [ ] `runLoop` is bridged to campaigns via `loopDispatch` (usage + trace
|
|
181
|
+
auto-forwarded), not a hand-rolled ExecCtx.
|
|
167
182
|
- [ ] Every optimizable prompt is registered through `selfImprove` (or the
|
|
168
183
|
product's existing `runImprovementLoop`), identity-gated on a held-out set.
|
|
169
184
|
- [ ] Boundaries fail loud: no `null` sandbox client, no silent adapter return,
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: build-with-agent-runtime
|
|
3
|
+
description: Use before hand-rolling a tool loop, driver, corpus, or optimizer wrapper. Create an agent genome, run it on a benchmark, optimize+gate it, observe/ship it with @tangle-network/agent-runtime. The genome→run→optimize→observe spine.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# build-with-agent-runtime
|
|
7
|
+
|
|
8
|
+
The one create→run→optimize→observe seam for `@tangle-network/agent-runtime`. A
|
|
9
|
+
**genome** (an `AgentProfile`/`AgentSurfaces` — systemPrompt + skills + tools +
|
|
10
|
+
mcp + knowledge + memory + rag as ONE combined surface) runs as a
|
|
11
|
+
**driver⟷worker** shape over a **benchmark**, gets **optimized by a gated loop**
|
|
12
|
+
that evolves the genome and certifies wins on a **frozen holdout**, and is
|
|
13
|
+
**observed** through the one lifecycle stream. The selector is never the judge;
|
|
14
|
+
observation attaches to the loop, never to the portable genome.
|
|
15
|
+
|
|
16
|
+
If you are about to write a `runConversation`, a "skill optimizer", a
|
|
17
|
+
"profile-seam", a depth-vs-breadth A/B harness, a bootstrap loop, or a
|
|
18
|
+
`new Sandbox(...)` + stream + read dance — **stop.** It exists, and a parallel
|
|
19
|
+
silently breaks a load-bearing invariant (equal-k, selector≠judge,
|
|
20
|
+
capture-integrity, or eval/prod parity).
|
|
21
|
+
|
|
22
|
+
## Load order — point at source, never freeze snippets
|
|
23
|
+
|
|
24
|
+
This skill carries **no API snippets**. The barrel MOVES (`./loops` is a
|
|
25
|
+
back-compat alias of `./runtime`), the agent-eval pin drifts, and signatures get
|
|
26
|
+
corrected in place. Freezing a snippet here guarantees rot. Instead, read, in
|
|
27
|
+
order, and re-verify against source:
|
|
28
|
+
|
|
29
|
+
1. **`docs/canonical-api.md`** — the source of truth: the §2 decision table
|
|
30
|
+
("I want to X → use Y → NOT Z"), §3 per-subsystem signatures (each cited
|
|
31
|
+
`file:line`), §4 the end-to-end recipe, §5 the recursive atom, §6 the
|
|
32
|
+
two-substrate map. Every signature there was read from source.
|
|
33
|
+
2. **`grep` the export barrel** — `grep -nE 'export (function|const|type)' src/runtime/index.ts`
|
|
34
|
+
(and `src/agent/index.ts`, `src/improvement/index.ts`, `src/mcp/index.ts`,
|
|
35
|
+
`src/intelligence/index.ts`) for the live names + subpaths. `./loops` and
|
|
36
|
+
`./runtime` resolve to the SAME barrel (`package.json` maps both to
|
|
37
|
+
`src/runtime/index.ts`).
|
|
38
|
+
3. **`bench/HARNESS.md`** — the experiment-harness map: commands, the
|
|
39
|
+
`rollout → corpus → selector → CI → gate` flow, and the `ADAPTERS` registry
|
|
40
|
+
(a harness-local export, `bench/src/adapters.ts`, not a package export).
|
|
41
|
+
|
|
42
|
+
**Code wins.** If a name, subpath, or signature here or in `docs/canonical-api.md`
|
|
43
|
+
disagrees with source, the **source is right** — fix the map in the *same turn*
|
|
44
|
+
(the anti-rediscovery law). Verify with Read/Edit, don't re-read to confirm.
|
|
45
|
+
|
|
46
|
+
## Decision table — by altitude (each row → ONE source, not a snippet)
|
|
47
|
+
|
|
48
|
+
Read the cited `docs/canonical-api.md` row before writing; it carries the live
|
|
49
|
+
signature + the exact "do NOT build".
|
|
50
|
+
|
|
51
|
+
| Altitude — I want to… | Use | Source |
|
|
52
|
+
|---|---|---|
|
|
53
|
+
| **Define a genome** (who the agent is + what it can do, ONE surface) | `AgentProfile` (runnable) / `AgentSurfaces` (the editable-coordinate map) — `/runtime`, `/agent` | canonical-api §3.2 |
|
|
54
|
+
| **Define the personified-run record** (model+prompt+tools+role+seams) | `definePersona(input)` — `/runtime` | canonical-api §3.1 |
|
|
55
|
+
| **Run a genome driver⟷worker, end-to-end** | `runPersonified({ persona, shape, task, budget })` — `/runtime` | canonical-api §3.1 |
|
|
56
|
+
| **Loop a worker over one evolving artifact, K rounds, stop-when-good** | `loopUntil(seed, spec)` as the `shape` — `/runtime` | canonical-api §3.1 |
|
|
57
|
+
| **Best-of-N / parallel-research at equal compute** | `fanout(items, opts)` — `/runtime` | canonical-api §3.1 |
|
|
58
|
+
| **Produce-then-gate / multi-judge quorum / fixed chain** | `verify` / `panel` / `pipeline` — `/runtime` | canonical-api §3.1 |
|
|
59
|
+
| **Run depth-vs-breadth (or a custom strategy) over a stateful tool domain** | `runAgentic({ surface, task, mode\|strategy, budget })` — `/loops` | canonical-api §3.3 |
|
|
60
|
+
| **Author a new topology/strategy compactly** | `defineStrategy(name, body)` w/ `ctx.shot()`+`ctx.critique()` — `/loops` | canonical-api §3.3 |
|
|
61
|
+
| **Add a stateful tool-using domain** | implement `AgenticSurface` (5 hooks) — `/loops` | canonical-api §3.3 |
|
|
62
|
+
| **Benchmark: compare strategies + significance + Pareto on a domain** | `runBenchmark({ environment, tasks, worker, strategies })` — `/loops` | canonical-api §3.3 |
|
|
63
|
+
| **Benchmark: add/run an external benchmark from the harness** | `ADAPTERS`/`resolveAdapter(key)` + `runExperiment` — `bench` | canonical-api §3.3 |
|
|
64
|
+
| **Sandbox coding rollout** (fresh box/round, or persistent+resume) | `runLoop(options)` / `openSandboxRun(client, opts, deliverable)` — `/runtime` | canonical-api §3.1 |
|
|
65
|
+
| **Optimize a CODE surface** in a gated loop | `improvementDriver({ worktree, generator })` — `/improvement` | canonical-api §3.4 |
|
|
66
|
+
| **Optimize a PROMPT/config surface** (one call) | `selfImprove({ agent, scenarios, judge, baselineSurface })` — `agent-eval/contract` | canonical-api §3.4 |
|
|
67
|
+
| **Gate: ship/hold a candidate** (campaign ctx) | `defaultProductionGate` / `heldOutGate` / `composeGate` — `agent-eval/contract` | canonical-api §3.4 |
|
|
68
|
+
| **Gate: ship/hold from a `BenchmarkReport`** (per-task cells) | `promotionGate({ report, incumbent, candidate })` — `/runtime` | canonical-api §3.4 |
|
|
69
|
+
| **Run the full multi-generation flywheel + certify** | `runStrategyEvolution(config)` — `/runtime` | canonical-api §3.4 |
|
|
70
|
+
| **Compose the prod sandbox profile** (eval/prod parity) | `composeProductionAgentProfile(base, opts)` — `/mcp` | canonical-api §3.2 |
|
|
71
|
+
| **Observe a run** (cost/time waterfall, live tree, OTLP) | `createWaterfallCollector` / `createTopologyView` / `createOtelExporter` via `composeRuntimeHooks(...)` — root | canonical-api §3.5 |
|
|
72
|
+
| **State any A/B claim** | `pairedLift` (bench) over `pairedBootstrap`/`heldoutSignificance` (substrate) | canonical-api §3.5 |
|
|
73
|
+
| **Observe/ship with billing-boundary** | `withTangleIntelligence(agent, { project, effort })` — `/intelligence` | canonical-api §7 (now live on main — verify) |
|
|
74
|
+
|
|
75
|
+
## Do-NOT-reinvent — the traps this skill exists to stop
|
|
76
|
+
|
|
77
|
+
Each of these gets hand-rolled every session; the canonical primitive already
|
|
78
|
+
holds the load-bearing invariant the parallel breaks:
|
|
79
|
+
|
|
80
|
+
- `runConversation` / persona-runner / `while(!done)` steering loop **≈**
|
|
81
|
+
`loopUntil` + `runPersonified` (threads executor seams; equal-k; selector≠judge
|
|
82
|
+
firewall; journal/replay — a parallel runner silently fails to wire the seams).
|
|
83
|
+
- "skill optimizer" / "topology mutator" that opens branches + applies patches
|
|
84
|
+
**≈** `improvementDriver` (code surface) or `selfImprove`/`gepaDriver` (prompt
|
|
85
|
+
surface) — both gated on a frozen holdout.
|
|
86
|
+
- "profile-seam" / agent-config wrapper carrying model+prompt+tools+role **≈**
|
|
87
|
+
`AgentProfile` (it IS that bundle) + `definePersona` (the run record);
|
|
88
|
+
`sandboxAgentRun({ profile })` is the box seam — never pass a router key into
|
|
89
|
+
the box.
|
|
90
|
+
- `new Sandbox()` + acquire + stream + `box.fs.read` + delete **≈**
|
|
91
|
+
`openSandboxRun` (persistent + resume) or `runLoop` (fresh box/round).
|
|
92
|
+
- `Promise.all` over N calls + manual argmax/merge **≈** `fanout` (bypassing the
|
|
93
|
+
budget pool breaks equal-compute claims).
|
|
94
|
+
- a per-step cost/token tally over events **≈** `createWaterfallCollector` (the
|
|
95
|
+
sum of spans IS the billed run cost; a parallel tally drifts).
|
|
96
|
+
- your own bootstrap loop / PRNG per gate **≈** `pairedLift` / `promotionGate`
|
|
97
|
+
(seeded, identical run-to-run; never report a point lift without `low/high/pairs`).
|
|
98
|
+
|
|
99
|
+
## End-to-end recipe
|
|
100
|
+
|
|
101
|
+
`docs/canonical-api.md` §4 is the real composition — copy it from there, don't
|
|
102
|
+
re-derive: **define a genome → run driver⟷worker via the reactive substrate over
|
|
103
|
+
a multi-turn `AgenticSurface` → measure with `runBenchmark` → optimize a prompt
|
|
104
|
+
surface with `selfImprove` → certify on a frozen holdout with the gate.** For the
|
|
105
|
+
multi-generation flywheel, replace the measure/certify steps with one
|
|
106
|
+
`runStrategyEvolution(...)` and read `report.verdict` (NOT `report.trajectory`)
|
|
107
|
+
as the evidence. For a sandbox coding rollout judged by an external deterministic
|
|
108
|
+
checker, use the bench path: `runExperiment({ adapter: resolveAdapter(...),
|
|
109
|
+
sandboxClient, agentRun: sandboxAgentRun({ profile }), arms: [randomArm(...),
|
|
110
|
+
analystArm(...)] })` — `arms[0]` is the mandatory equal-compute control.
|
|
111
|
+
|
|
112
|
+
## Two substrates — pick one, don't invent a third
|
|
113
|
+
|
|
114
|
+
Both implement the same recursive-decision atom over the one `Executor` port and
|
|
115
|
+
share `defaultSelectWinner`. **Reactive** (`Supervisor`/`Scope` + personify
|
|
116
|
+
combinators: `runPersonified`/`runAgentic`/`runBenchmark`) — prefer for NEW
|
|
117
|
+
recursive work; equal-k by construction. **Round-synchronous** (`runLoop` +
|
|
118
|
+
`createDriver`, `runExperiment`) — sandbox coding rollouts against external
|
|
119
|
+
benchmarks. The full when-which map is `docs/canonical-api.md` §6.
|
|
120
|
+
|
|
121
|
+
## Observe / ship with the Intelligence SDK
|
|
122
|
+
|
|
123
|
+
One line wraps any agent with trace + billing boundary:
|
|
124
|
+
`withTangleIntelligence(agent, { project, effort })`, `effort ∈
|
|
125
|
+
off|eco|standard|thorough|max` (`'off'` is the provable passthrough floor —
|
|
126
|
+
intelligence spend clamped to 0). It builds on `createOtelExporter` +
|
|
127
|
+
`loopEventToOtelSpan` — don't hand-roll a trace-wrapper or effort/tier config.
|
|
128
|
+
Verify the live subpath against `src/intelligence/index.ts` (canonical-api §7's
|
|
129
|
+
"branch-only" note is stale — it landed on main).
|
|
130
|
+
|
|
131
|
+
## Final check
|
|
132
|
+
|
|
133
|
+
- Picked a primitive from the decision table, not a hand-rolled parallel?
|
|
134
|
+
- Genome is ONE `AgentProfile`/`AgentSurfaces` surface, not split skill/tool/prompt knobs?
|
|
135
|
+
- Equal compute preserved (budget pool, or `arms[0]` control) — no `Promise.all` zoo?
|
|
136
|
+
- Selector ≠ judge: no judge score feeding a driver/another judge; holdout score write-only?
|
|
137
|
+
- Any win certified on a FROZEN holdout via a gate, never on the training composite?
|
|
138
|
+
- Map fixed in the same turn if source disagreed with `docs/canonical-api.md`?
|
|
139
|
+
|
|
140
|
+
See `_common.md` for shared conventions (frontmatter, fail-loud, no AI attribution).
|
|
141
|
+
|
|
142
|
+
Next: build the genome/loop/optimizer against `docs/canonical-api.md` §3–§4; if a
|
|
143
|
+
strategy beats incumbent on the holdout gate, `/ship` it.
|
|
@@ -24,7 +24,7 @@ The driver owns strategy.
|
|
|
24
24
|
|
|
25
25
|
| Objective | Use |
|
|
26
26
|
|---|---|
|
|
27
|
-
| Try N attempts, pick best | `fanout` or `
|
|
27
|
+
| Try N attempts, pick best | `fanout` (or the `sample` strategy) |
|
|
28
28
|
| Ordered stages | `pipeline` |
|
|
29
29
|
| Improve until executable check passes | `loopUntil` + verifier |
|
|
30
30
|
| Review from several lenses | `panel` |
|
|
@@ -138,16 +138,15 @@ Git is the durable workspace seam:
|
|
|
138
138
|
- resume derives completion from git state, not only a side journal
|
|
139
139
|
- conflicts become blockers/questions, not silent overwrite
|
|
140
140
|
|
|
141
|
-
Proof command
|
|
141
|
+
Proof command (real sandbox, real observe→steer join):
|
|
142
142
|
|
|
143
143
|
```bash
|
|
144
|
-
pnpm exec tsx bench/src/
|
|
144
|
+
TANGLE_API_KEY=... pnpm exec tsx bench/src/cloud-loop.mts
|
|
145
145
|
```
|
|
146
146
|
|
|
147
|
-
It proves `
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
serial git accumulation, not full cloud migration safety.
|
|
147
|
+
It proves `openSandboxRun -> observe -> steer -> corrective worker` over a live
|
|
148
|
+
sandbox. The old `observe-steer-workspace-loop.mts` used mock executors and is
|
|
149
|
+
deleted — the live proof is the only valid one.
|
|
151
150
|
|
|
152
151
|
## Final Check
|
|
153
152
|
|