@tangle-network/agent-runtime 0.46.0 → 0.47.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/agent.d.ts +1 -1
  2. package/dist/agent.js +1 -1
  3. package/dist/analyst-loop.d.ts +1 -1
  4. package/dist/{chunk-I42NHLKX.js → chunk-5YDS7BLC.js} +11 -6
  5. package/dist/chunk-5YDS7BLC.js.map +1 -0
  6. package/dist/{chunk-65FQLI4V.js → chunk-72JQCHOZ.js} +232 -3
  7. package/dist/chunk-72JQCHOZ.js.map +1 -0
  8. package/dist/{chunk-GN75RGM6.js → chunk-MGFEUYOH.js} +3 -3
  9. package/dist/{chunk-KPN7OQ64.js → chunk-T4OQQEE3.js} +2 -2
  10. package/dist/{chunk-KPN7OQ64.js.map → chunk-T4OQQEE3.js.map} +1 -1
  11. package/dist/{coder-DCWFQpmJ.d.ts → coder-CVZNGbyg.d.ts} +1 -1
  12. package/dist/{driver-C-mtBo7h.d.ts → driver-DYU2sgHr.d.ts} +1 -1
  13. package/dist/index.d.ts +7 -7
  14. package/dist/index.js +3 -3
  15. package/dist/{kb-gate-2Gwpz_27.d.ts → kb-gate-51BlLlVM.d.ts} +8 -2
  16. package/dist/{loop-runner-bin-D-K6bRp3.d.ts → loop-runner-bin-DEm4roYF.d.ts} +4 -4
  17. package/dist/loop-runner-bin.d.ts +5 -5
  18. package/dist/loop-runner-bin.js +3 -3
  19. package/dist/loops.d.ts +5 -5
  20. package/dist/loops.js +9 -1
  21. package/dist/mcp/bin.js +3 -3
  22. package/dist/mcp/index.d.ts +71 -70
  23. package/dist/mcp/index.js +199 -27
  24. package/dist/mcp/index.js.map +1 -1
  25. package/dist/{otel-export-nurzFwuJ.d.ts → otel-export-EzfsVUhh.d.ts} +1 -1
  26. package/dist/profiles.d.ts +2 -2
  27. package/dist/{run-loop-CU2Y00Si.d.ts → run-loop-DvD4aGiE.d.ts} +1 -1
  28. package/dist/runtime.d.ts +96 -13
  29. package/dist/runtime.js +9 -1
  30. package/dist/{types-BfoeiQRZ.d.ts → types-Cbx3dNK5.d.ts} +4 -4
  31. package/dist/{types-DnYoHvvZ.d.ts → types-nBMuollC.d.ts} +17 -0
  32. package/dist/workflow.d.ts +2 -2
  33. package/dist/workflow.js +1 -1
  34. package/package.json +24 -13
  35. package/skills/loop-writer/SKILL.md +163 -0
  36. package/dist/chunk-65FQLI4V.js.map +0 -1
  37. package/dist/chunk-I42NHLKX.js.map +0 -1
  38. /package/dist/{chunk-GN75RGM6.js.map → chunk-MGFEUYOH.js.map} +0 -0
package/dist/loops.js CHANGED
@@ -28,10 +28,13 @@ import {
28
28
  equalKOnCost,
29
29
  fanout,
30
30
  flatWidenGate,
31
+ gitWorkspace,
31
32
  inlineSandboxClient,
33
+ localShell,
32
34
  loopDispatch,
33
35
  loopUntil,
34
36
  materializeTreeView,
37
+ observe,
35
38
  openSandboxRun,
36
39
  panel,
37
40
  pipeline,
@@ -39,6 +42,7 @@ import {
39
42
  registerShape,
40
43
  renderAnalyses,
41
44
  renderCorpusToInstructions,
45
+ renderReport,
42
46
  replaySpawnTree,
43
47
  reportLoopUsage,
44
48
  runLoop,
@@ -50,7 +54,7 @@ import {
50
54
  trajectoryReport,
51
55
  verify,
52
56
  widen
53
- } from "./chunk-65FQLI4V.js";
57
+ } from "./chunk-72JQCHOZ.js";
54
58
  import {
55
59
  extractLlmCallEvent,
56
60
  mapSandboxEvent
@@ -87,11 +91,14 @@ export {
87
91
  extractLlmCallEvent,
88
92
  fanout,
89
93
  flatWidenGate,
94
+ gitWorkspace,
90
95
  inlineSandboxClient,
96
+ localShell,
91
97
  loopDispatch,
92
98
  loopUntil,
93
99
  mapSandboxEvent,
94
100
  materializeTreeView,
101
+ observe,
95
102
  openSandboxRun,
96
103
  panel,
97
104
  pipeline,
@@ -99,6 +106,7 @@ export {
99
106
  registerShape,
100
107
  renderAnalyses,
101
108
  renderCorpusToInstructions,
109
+ renderReport,
102
110
  replaySpawnTree,
103
111
  reportLoopUsage,
104
112
  runLoop,
package/dist/mcp/bin.js CHANGED
@@ -4,17 +4,17 @@ import {
4
4
  createPropagatingTraceEmitter,
5
5
  detectExecutor,
6
6
  readTraceContextFromEnv
7
- } from "../chunk-KPN7OQ64.js";
7
+ } from "../chunk-T4OQQEE3.js";
8
8
  import "../chunk-WIR4HOOJ.js";
9
9
  import "../chunk-JNPK46YH.js";
10
10
  import {
11
11
  createDefaultCoderDelegate
12
- } from "../chunk-I42NHLKX.js";
12
+ } from "../chunk-5YDS7BLC.js";
13
13
  import "../chunk-KADIJAD4.js";
14
14
  import "../chunk-GLR25NG7.js";
15
15
  import {
16
16
  runLoop
17
- } from "../chunk-65FQLI4V.js";
17
+ } from "../chunk-72JQCHOZ.js";
18
18
  import "../chunk-GSUO5QS6.js";
19
19
  import "../chunk-DGUM43GV.js";
20
20
 
@@ -1,17 +1,17 @@
1
- import { S as SandboxClient, f as LoopSandboxPlacement, e as LoopTraceEmitter } from '../types-DnYoHvvZ.js';
2
- import { c as FleetHandle, d as DelegationExecutor, e as DelegateFeedbackArgs, f as DelegationFeedbackSnapshot, g as DelegationProfile, D as DelegateCodeArgs, h as DelegateResearchArgs, i as DelegateUiAuditArgs, j as DelegationStatus, k as DelegationProgress, l as DelegationResultPayload, m as DelegationError, n as DelegationStatusResult, o as DelegationHistoryArgs, p as DelegationHistoryEntry, q as CoderDelegate, R as ResearcherDelegate, U as UiAuditorDelegate, r as DelegateCodeResult, s as DelegateFeedbackResult, t as ResearchSource, u as DelegateResearchResult, v as DelegateUiAuditResult, w as DelegationHistoryResult, x as DelegationStatusArgs } from '../kb-gate-2Gwpz_27.js';
3
- export { y as CoderReview, C as CoderReviewer, a as CoderWinnerSelection, z as CreateDefaultCoderDelegateOptions, b as CreateKbGateOptions, A as DelegateCodeConfig, B as DelegateResearchConfig, E as DelegateRunCtx, G as DelegateUiAuditConfig, H as DelegateUiAuditRoute, F as FactCandidate, I as FactJudge, J as FactJudgeVerdict, K as FeedbackRating, L as FeedbackRefersTo, M as FleetWorkspaceExecutorOptions, N as KbGateResult, O as ResearchOutputShape, S as SiblingSandboxExecutorOptions, P as UiAuditorDelegationOutput, Q as createDefaultCoderDelegate, T as createFleetWorkspaceExecutor, V as createKbGate, W as createSiblingSandboxExecutor } from '../kb-gate-2Gwpz_27.js';
1
+ import { S as SandboxClient, f as LoopSandboxPlacement, e as LoopTraceEmitter } from '../types-nBMuollC.js';
2
+ import { c as FleetHandle, d as DelegationExecutor, e as DelegateFeedbackArgs, f as DelegationFeedbackSnapshot, g as DelegationProfile, D as DelegateCodeArgs, h as DelegateResearchArgs, i as DelegateUiAuditArgs, j as DelegationStatus, k as DelegationProgress, l as DelegationResultPayload, m as DelegationError, n as DelegationStatusResult, o as DelegationHistoryArgs, p as DelegationHistoryEntry, q as CoderDelegate, R as ResearcherDelegate, U as UiAuditorDelegate, r as DelegateCodeResult, s as DelegateFeedbackResult, t as ResearchSource, u as DelegateResearchResult, v as DelegateUiAuditResult, w as DelegationHistoryResult, x as DelegationStatusArgs } from '../kb-gate-51BlLlVM.js';
3
+ export { y as CoderReview, C as CoderReviewer, a as CoderWinnerSelection, z as CreateDefaultCoderDelegateOptions, b as CreateKbGateOptions, A as DelegateCodeConfig, B as DelegateResearchConfig, E as DelegateRunCtx, G as DelegateUiAuditConfig, H as DelegateUiAuditRoute, F as FactCandidate, I as FactJudge, J as FactJudgeVerdict, K as FeedbackRating, L as FeedbackRefersTo, M as FleetWorkspaceExecutorOptions, N as KbGateResult, O as ResearchOutputShape, S as SiblingSandboxExecutorOptions, P as UiAuditorDelegationOutput, Q as createDefaultCoderDelegate, T as createFleetWorkspaceExecutor, V as createKbGate, W as createSiblingSandboxExecutor } from '../kb-gate-51BlLlVM.js';
4
4
  export { B as BuildDelegationMcpServerOptions, C as ComposeProductionAgentProfileOptions, D as DELEGATION_MCP_SERVER_KEY, b as buildDelegationMcpServer, c as composeProductionAgentProfile } from '../delegation-profile-1GbW5yA3.js';
5
5
  import { L as LocalHarness, r as runLocalHarness } from '../local-harness-KrdFTY5R.js';
6
6
  export { a as LocalHarnessResult, R as RunLocalHarnessOptions } from '../local-harness-KrdFTY5R.js';
7
- import { O as OtelExporter } from '../otel-export-nurzFwuJ.js';
8
- export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-nurzFwuJ.js';
7
+ import { O as OtelExporter } from '../otel-export-EzfsVUhh.js';
8
+ export { m as mcpToolsForRuntimeMcp, a as mcpToolsForRuntimeMcpSubset } from '../otel-export-EzfsVUhh.js';
9
9
  import { AnalystFinding } from '@tangle-network/agent-eval';
10
- import { S as Scope, R as ResultBlobStore, A as Agent, B as Budget } from '../types-BfoeiQRZ.js';
10
+ import { S as Scope, R as ResultBlobStore, A as Agent, B as Budget } from '../types-Cbx3dNK5.js';
11
11
  import { a as UiLens } from '../substrate-CUgk7F7s.js';
12
12
  import '@tangle-network/sandbox';
13
13
  import '../runtime-hooks-C7JwKb9E.js';
14
- import '../coder-DCWFQpmJ.js';
14
+ import '../coder-CVZNGbyg.js';
15
15
  import 'node:child_process';
16
16
 
17
17
  /**
@@ -452,10 +452,9 @@ interface McpServerOptions {
452
452
  /** Override the default in-memory task queue. */
453
453
  queue?: DelegationTaskQueue;
454
454
  /**
455
- * Extra tools to serve alongside the delegation tools e.g. the operator toolbox
456
- * (`createCoordinationTools(...).tools`), which exposes the driver's spawn/observe/steer verbs over
457
- * MCP so a sandbox agent can BE the driver. Registered after the built-ins; a duplicate name
458
- * throws (fail loud — no silent shadowing of a delegation tool).
455
+ * Extra tools to serve alongside the delegation tools, for example
456
+ * `createCoordinationTools(...).tools`. Registered after the built-ins; a
457
+ * duplicate name throws so delegation tools cannot be shadowed silently.
459
458
  */
460
459
  extraTools?: McpToolDescriptor[];
461
460
  /** Server display name surfaced via `initialize`. Default `'agent-runtime-mcp'`. */
@@ -570,9 +569,11 @@ interface CheckRunnerOptions {
570
569
  /** Run ONE lens over a trace → findings. Generic over any kind: prompt = the lens + the agent-eval
571
570
  * finding schema; the model's JSON array is parsed (`parseRawFinding`), lifted, and firewalled. */
572
571
  declare function runCheck(kind: Check, trace: unknown, opts: CheckRunnerOptions, producedAt: string): Promise<AnalystFinding[]>;
573
- /** Build a `run_analyst` runner over a kind directory — the seam the operator toolbox is wired with.
574
- * Returns the findings, or a typed error for an unknown kind. `producedAt` is passed in (the runtime
575
- * forbids `Date.now` in replay-safe paths; the caller stamps it). */
572
+ /**
573
+ * Build a `run_analyst` runner over a kind directory.
574
+ * Returns findings, or a typed error for an unknown kind. `producedAt` is
575
+ * passed in because replay-safe paths must not read `Date.now`.
576
+ */
576
577
  declare function makeCheckRunner(kinds: Record<string, Check>, opts: CheckRunnerOptions): (kindId: string, trace: unknown, producedAt: string) => Promise<AnalystFinding[] | {
577
578
  error: string;
578
579
  }>;
@@ -580,84 +581,84 @@ declare function makeCheckRunner(kinds: Record<string, Check>, opts: CheckRunner
580
581
  /**
581
582
  * @experimental
582
583
  *
583
- * COORDINATION TOOLS the verbs a parent agent uses to coordinate the child agents it spawns,
584
- * exposed as MCP tools backed by a live keystone `Scope`. This is `Scope`-as-MCP.
585
- *
586
- * NOT a transport. The cross-org message bus (`docs/agent-bus-protocol.md`) and the SDK's
587
- * `dispatchPrompt`/`SessionMessage` are the *transports* the `steer` verb rides; THIS file is the
588
- * verb set (the API). One verb, several bindings: in-process `Scope.send` is a direct call; across
589
- * sandboxes it rides SDK session-messaging; across orgs it rides the agent-bus protocol.
590
- *
591
- * spawn_worker → scope.spawn (budget-bounded, fail-closed — equal-k holds even for an LLM driver)
592
- * await_next → scope.next (THE wake event: block until the next spawned child settles)
593
- * observe_worker→ scope.view + the result blob (a child's status, spend, and settled output)
594
- * steer_worker → scope.send (deliver a next-instruction / interrupt to a RUNNING child)
595
- * list_analysts → the check menu (the trace lenses the agent can apply — see checks.ts)
596
- * run_analyst → apply a CHECK (run a kind over a child's trace → trace-derived findings)
597
- * stop → declare the run complete (the terminal move)
598
- *
599
- * The check verbs are present only when the check seam (`analystKinds` + `runAnalyst`) is wired —
600
- * an agent that does not review traces (a pure dispatcher) omits them. A trace check is a SEPARATE
601
- * lens (selector ≠ judge: it reads the trace, never the score); authoring a NEW check at runtime is
602
- * the next addition.
603
- *
604
- * A worker the driver spawns may itself carry the driver profile — `spawn_worker` does not care what
605
- * the profile is, so drivers-of-drivers fall out for free (each sub-driver gets its own sub-scope,
606
- * bounded by `maxDepth` + the conserved pool).
584
+ * MCP binding for a live `Scope`. A sandbox driver gets the same small verbs
585
+ * the in-process driver has: spawn, observe, await, steer, ask/answer, analyze,
586
+ * and stop. Settled outputs remain Scope artifacts; product code can project
587
+ * them into any UI/report envelope it needs.
607
588
  */
608
589
 
609
- /** A worker the driver has drained via `await_next` — the operator's running ledger of settled
610
- * workers + their DEPLOYABLE verdict (the driver IS the selector, so it legitimately reads the
611
- * verdict; the analyst, which reads only the trace, is the separate selector≠judge lens). The
612
- * driver picks its deliverable from this ledger at `stop`. */
590
+ /** A worker the driver has drained via `await_next`. */
613
591
  interface SettledWorker {
614
592
  readonly id: string;
615
593
  readonly status: 'done' | 'down';
616
- /** Deployable score in [0,1] from the worker's verdict (done only). */
617
594
  readonly score?: number;
618
- /** Whether the deployable verdict passed (done only). */
619
595
  readonly valid?: boolean;
620
- /** Result-blob pointer for the worker's output/trace (done only). */
621
596
  readonly outRef?: string;
622
- /** Failure reason (down only). */
623
597
  readonly reason?: string;
624
598
  }
625
- /** How a `spawn_worker` profile becomes a spawnable leaf `Agent`. The caller wires this (e.g. the
626
- * surface registry turns a profile into a shot executor) so the toolbox stays domain-blind. */
599
+ type QuestionLevel = 'worker' | 'driver' | 'loop';
600
+ type QuestionUrgency = 'continue-without' | 'blocks-step' | 'blocks-run';
601
+ interface QuestionOption {
602
+ readonly label: string;
603
+ readonly tradeoff: string;
604
+ }
605
+ interface Question {
606
+ readonly id: string;
607
+ readonly from: string;
608
+ readonly level: QuestionLevel;
609
+ readonly question: string;
610
+ readonly reason: string;
611
+ readonly urgency: QuestionUrgency;
612
+ readonly options?: ReadonlyArray<QuestionOption>;
613
+ }
614
+ type QuestionDecision = {
615
+ readonly kind: 'answer';
616
+ readonly answer: string;
617
+ readonly by: string;
618
+ } | {
619
+ readonly kind: 'defer';
620
+ readonly reason: string;
621
+ } | {
622
+ readonly kind: 'escalate';
623
+ readonly to: 'parent' | 'user' | string;
624
+ readonly reason: string;
625
+ };
626
+ interface QuestionRecord extends Question {
627
+ readonly status: 'open' | 'answered' | 'deferred' | 'escalated';
628
+ readonly decision?: QuestionDecision;
629
+ readonly openedAt: number;
630
+ }
631
+ type QuestionPolicy = 'auto' | 'mustDecide' | 'bubble' | 'failClosed';
632
+ interface AnalystRegistry {
633
+ readonly kinds: ReadonlyArray<{
634
+ id: string;
635
+ description: string;
636
+ area: string;
637
+ }>;
638
+ readonly run: (kindId: string, trace: unknown) => Promise<unknown>;
639
+ }
640
+ type CoordinationEvent = {
641
+ readonly type: 'question';
642
+ readonly question: QuestionRecord;
643
+ };
627
644
  type MakeWorkerAgent = (profile: unknown) => Agent<unknown, unknown>;
628
645
  interface CoordinationToolsOptions {
629
- /** The DRIVER's live scope — spawn/observe/steer all act on this. */
630
646
  readonly scope: Scope<unknown>;
631
- /** Result blobs, so `observe_worker` can rehydrate a settled worker's output. */
632
647
  readonly blobs: ResultBlobStore;
633
- /** Turn a spawn_worker `profile` into a leaf agent (registry-resolved on spawn). */
634
648
  readonly makeWorkerAgent: MakeWorkerAgent;
635
- /** Per-worker conserved budget the driver reserves on each spawn. */
636
649
  readonly perWorker: Budget;
637
- /** The analyst lens menu (for `list_analysts`) — id + one-line + area. Injected so the toolbox
638
- * stays domain-blind; wire it from `analyst-kinds.ts`'s directory. Omit to disable analyst tools. */
639
- readonly analystKinds?: ReadonlyArray<{
640
- id: string;
641
- description: string;
642
- area: string;
643
- }>;
644
- /** Run a lens over a worker's trace → findings (or a typed error). Wire it from
645
- * `makeCheckRunner(...)`. `run_analyst` fetches the worker's settled output and passes it here. */
646
- readonly runAnalyst?: (kindId: string, trace: unknown) => Promise<unknown>;
650
+ readonly analysts?: AnalystRegistry;
651
+ readonly onEvent?: (event: CoordinationEvent) => void | Promise<void>;
652
+ readonly questionPolicy?: QuestionPolicy;
647
653
  }
648
654
  interface CoordinationTools {
649
- /** MCP tools — register on an `McpServer`, or call the handlers directly in-process. */
650
655
  readonly tools: McpToolDescriptor[];
651
- /** True once the driver called `stop` — the operator loop reads this to terminate. */
652
656
  isStopped(): boolean;
653
- /** The reason passed to `stop`, if any. */
654
657
  stopReason(): string | undefined;
655
- /** The workers drained so far via `await_next` (the driver's selection ledger). */
656
658
  settled(): ReadonlyArray<SettledWorker>;
659
+ questions(): ReadonlyArray<QuestionRecord>;
657
660
  }
658
- /** Build the operator toolbox over a live scope. The tools are the driver's verbs; their handlers
659
- * are thin wrappers over the keystone (spawn/view/send), so the budget/journal/abort discipline of
660
- * the Supervisor applies to a sandbox driver exactly as to the in-process one. */
661
+ /** Build the driver's MCP tools over a live scope. */
661
662
  declare function createCoordinationTools(opts: CoordinationToolsOptions): CoordinationTools;
662
663
 
663
664
  /**
@@ -1147,4 +1148,4 @@ declare function createPropagatingTraceEmitter(ctx: TraceContext): {
1147
1148
  */
1148
1149
  declare function traceContextToEnv(ctx: TraceContext): Record<string, string>;
1149
1150
 
1150
- export { type Check, type CheckRunnerOptions, CoderDelegate, type CoordinationTools, type CoordinationToolsOptions, type CreateWorktreeOptions, DELEGATE_CODE_DESCRIPTION, DELEGATE_CODE_INPUT_SCHEMA, DELEGATE_CODE_TOOL_NAME, DELEGATE_FEEDBACK_DESCRIPTION, DELEGATE_FEEDBACK_INPUT_SCHEMA, DELEGATE_FEEDBACK_TOOL_NAME, DELEGATE_RESEARCH_DESCRIPTION, DELEGATE_RESEARCH_INPUT_SCHEMA, DELEGATE_RESEARCH_TOOL_NAME, DELEGATE_UI_AUDIT_DESCRIPTION, DELEGATE_UI_AUDIT_INPUT_SCHEMA, DELEGATE_UI_AUDIT_TOOL_NAME, DELEGATION_HISTORY_DESCRIPTION, DELEGATION_HISTORY_INPUT_SCHEMA, DELEGATION_HISTORY_TOOL_NAME, DELEGATION_STATUS_DESCRIPTION, DELEGATION_STATUS_INPUT_SCHEMA, DELEGATION_STATUS_TOOL_NAME, DelegateCodeArgs, DelegateCodeResult, DelegateFeedbackArgs, DelegateFeedbackResult, DelegateResearchArgs, DelegateResearchResult, DelegateUiAuditArgs, DelegateUiAuditResult, DelegationError, DelegationExecutor, DelegationFeedbackSnapshot, DelegationHistoryArgs, DelegationHistoryEntry, DelegationHistoryResult, DelegationProfile, DelegationProgress, type DelegationRecord, DelegationResultPayload, DelegationStatus, DelegationStatusArgs, DelegationStatusResult, DelegationTaskQueue, type DelegationTaskQueueOptions, type DetectExecutorArgs, type DiffOptions, type DiffResult, type FeedbackEvent, type FeedbackStore, FleetHandle, type GitRunner, InMemoryFeedbackStore, type InProcessExecutorDescribePlacement, type InProcessExecutorOptions, type JsonRpcMessage, type JsonRpcResponse, LocalHarness, type MakeWorkerAgent, type McpServer, type McpServerOptions, type McpToolDescriptor, type McpTransport, type RemoveWorktreeOptions, ResearchSource, ResearcherDelegate, type SettledWorker, type SubmitInput, type SubmitOutput, type TraceContext, UiAuditorDelegate, type WorktreeHandle, captureWorktreeDiff, createCoordinationTools, createDelegateCodeHandler, createDelegateFeedbackHandler, createDelegateResearchHandler, createDelegateUiAuditHandler, createDelegationHistoryHandler, createDelegationStatusHandler, createInProcessExecutor, createInProcessTransport, createMcpServer, createPropagatingTraceEmitter, createWorktree, defaultChecks, detectExecutor, eventToSnapshot, hashIdempotencyInput, liftFindings, makeCheckRunner, readTraceContextFromEnv, removeWorktree, renderTrace, runCheck, runLocalHarness, traceContextToEnv, validateDelegateCodeArgs, validateDelegateFeedbackArgs, validateDelegateResearchArgs, validateDelegateUiAuditArgs, validateDelegationHistoryArgs, validateDelegationStatusArgs };
1151
+ export { type AnalystRegistry, type Check, type CheckRunnerOptions, CoderDelegate, type CoordinationEvent, type CoordinationTools, type CoordinationToolsOptions, type CreateWorktreeOptions, DELEGATE_CODE_DESCRIPTION, DELEGATE_CODE_INPUT_SCHEMA, DELEGATE_CODE_TOOL_NAME, DELEGATE_FEEDBACK_DESCRIPTION, DELEGATE_FEEDBACK_INPUT_SCHEMA, DELEGATE_FEEDBACK_TOOL_NAME, DELEGATE_RESEARCH_DESCRIPTION, DELEGATE_RESEARCH_INPUT_SCHEMA, DELEGATE_RESEARCH_TOOL_NAME, DELEGATE_UI_AUDIT_DESCRIPTION, DELEGATE_UI_AUDIT_INPUT_SCHEMA, DELEGATE_UI_AUDIT_TOOL_NAME, DELEGATION_HISTORY_DESCRIPTION, DELEGATION_HISTORY_INPUT_SCHEMA, DELEGATION_HISTORY_TOOL_NAME, DELEGATION_STATUS_DESCRIPTION, DELEGATION_STATUS_INPUT_SCHEMA, DELEGATION_STATUS_TOOL_NAME, DelegateCodeArgs, DelegateCodeResult, DelegateFeedbackArgs, DelegateFeedbackResult, DelegateResearchArgs, DelegateResearchResult, DelegateUiAuditArgs, DelegateUiAuditResult, DelegationError, DelegationExecutor, DelegationFeedbackSnapshot, DelegationHistoryArgs, DelegationHistoryEntry, DelegationHistoryResult, DelegationProfile, DelegationProgress, type DelegationRecord, DelegationResultPayload, DelegationStatus, DelegationStatusArgs, DelegationStatusResult, DelegationTaskQueue, type DelegationTaskQueueOptions, type DetectExecutorArgs, type DiffOptions, type DiffResult, type FeedbackEvent, type FeedbackStore, FleetHandle, type GitRunner, InMemoryFeedbackStore, type InProcessExecutorDescribePlacement, type InProcessExecutorOptions, type JsonRpcMessage, type JsonRpcResponse, LocalHarness, type MakeWorkerAgent, type McpServer, type McpServerOptions, type McpToolDescriptor, type McpTransport, type Question, type QuestionDecision, type QuestionPolicy, type QuestionRecord, type RemoveWorktreeOptions, ResearchSource, ResearcherDelegate, type SettledWorker, type SubmitInput, type SubmitOutput, type TraceContext, UiAuditorDelegate, type WorktreeHandle, captureWorktreeDiff, createCoordinationTools, createDelegateCodeHandler, createDelegateFeedbackHandler, createDelegateResearchHandler, createDelegateUiAuditHandler, createDelegationHistoryHandler, createDelegationStatusHandler, createInProcessExecutor, createInProcessTransport, createMcpServer, createPropagatingTraceEmitter, createWorktree, defaultChecks, detectExecutor, eventToSnapshot, hashIdempotencyInput, liftFindings, makeCheckRunner, readTraceContextFromEnv, removeWorktree, renderTrace, runCheck, runLocalHarness, traceContextToEnv, validateDelegateCodeArgs, validateDelegateFeedbackArgs, validateDelegateResearchArgs, validateDelegateUiAuditArgs, validateDelegationHistoryArgs, validateDelegationStatusArgs };
package/dist/mcp/index.js CHANGED
@@ -14,7 +14,7 @@ import {
14
14
  removeWorktree,
15
15
  traceContextToEnv,
16
16
  validateDelegateUiAuditArgs
17
- } from "../chunk-KPN7OQ64.js";
17
+ } from "../chunk-T4OQQEE3.js";
18
18
  import "../chunk-WIR4HOOJ.js";
19
19
  import {
20
20
  mcpToolsForRuntimeMcp,
@@ -58,7 +58,7 @@ import {
58
58
  createDefaultCoderDelegate,
59
59
  createFleetWorkspaceExecutor,
60
60
  createSiblingSandboxExecutor
61
- } from "../chunk-I42NHLKX.js";
61
+ } from "../chunk-5YDS7BLC.js";
62
62
  import "../chunk-KADIJAD4.js";
63
63
  import {
64
64
  runLocalHarness
@@ -70,7 +70,7 @@ import {
70
70
  } from "../chunk-7JITYN6T.js";
71
71
  import {
72
72
  assertTraceDerivedFindings
73
- } from "../chunk-65FQLI4V.js";
73
+ } from "../chunk-72JQCHOZ.js";
74
74
  import "../chunk-GSUO5QS6.js";
75
75
  import "../chunk-DGUM43GV.js";
76
76
 
@@ -246,7 +246,30 @@ var idArg = { type: "string", description: "The workerId returned by spawn_worke
246
246
  function createCoordinationTools(opts) {
247
247
  let stopped = false;
248
248
  let reason;
249
+ let questionSeq = 0;
249
250
  const ledger = [];
251
+ const questions = [];
252
+ const questionPolicy = opts.questionPolicy ?? "auto";
253
+ const str = (v, field) => {
254
+ if (typeof v !== "string" || v.length === 0)
255
+ throw new Error(`coordination tools: "${field}" must be a non-empty string`);
256
+ return v;
257
+ };
258
+ const obj = (raw) => {
259
+ if (!raw || typeof raw !== "object")
260
+ throw new Error("coordination tools: arguments must be an object");
261
+ return raw;
262
+ };
263
+ const level = (v) => {
264
+ if (v === "worker" || v === "driver" || v === "loop") return v;
265
+ throw new Error('coordination tools: "level" must be worker, driver, or loop');
266
+ };
267
+ const urgency = (v) => {
268
+ if (v === "continue-without" || v === "blocks-step" || v === "blocks-run") return v;
269
+ throw new Error(
270
+ 'coordination tools: "urgency" must be continue-without, blocks-step, or blocks-run'
271
+ );
272
+ };
250
273
  const recordSettled = (s) => {
251
274
  const w = s.kind === "done" ? {
252
275
  id: s.handle.id,
@@ -258,24 +281,68 @@ function createCoordinationTools(opts) {
258
281
  ledger.push(w);
259
282
  return w;
260
283
  };
261
- const str = (v, field) => {
262
- if (typeof v !== "string" || v.length === 0)
263
- throw new Error(`operator toolbox: "${field}" must be a non-empty string`);
264
- return v;
284
+ const nextQuestionId = (from) => `${from}:q${questionSeq++}`;
285
+ const normalizeQuestion = (q, fallbackFrom) => {
286
+ const from = str(q.from ?? fallbackFrom, "from");
287
+ return {
288
+ id: typeof q.id === "string" && q.id.length > 0 ? q.id : nextQuestionId(from),
289
+ from,
290
+ level: level(q.level),
291
+ question: str(q.question, "question"),
292
+ reason: str(q.reason, "reason"),
293
+ ...q.options ? { options: q.options } : {},
294
+ urgency: urgency(q.urgency)
295
+ };
265
296
  };
266
- const obj = (raw) => {
267
- if (!raw || typeof raw !== "object")
268
- throw new Error("operator toolbox: arguments must be an object");
269
- return raw;
297
+ const addQuestion = (raw, fallbackFrom, decision) => {
298
+ const q = normalizeQuestion(raw, fallbackFrom);
299
+ const existing = questions.find((x) => x.id === q.id);
300
+ if (existing) return { question: existing, added: false };
301
+ const effectiveDecision = decision ?? (questionPolicy === "bubble" ? {
302
+ kind: "escalate",
303
+ to: "parent",
304
+ reason: "question policy bubbled to parent"
305
+ } : void 0);
306
+ const status = effectiveDecision?.kind === "answer" ? "answered" : effectiveDecision?.kind === "defer" ? "deferred" : effectiveDecision?.kind === "escalate" ? "escalated" : "open";
307
+ const record = {
308
+ ...q,
309
+ status,
310
+ openedAt: Date.now(),
311
+ ...effectiveDecision ? { decision: effectiveDecision } : {}
312
+ };
313
+ questions.push(record);
314
+ return { question: record, added: true };
315
+ };
316
+ const emitNewQuestion = async (record) => {
317
+ if (record.added) await opts.onEvent?.({ type: "question", question: record.question });
318
+ return record.question;
319
+ };
320
+ const decideQuestion = (questionId, decision) => {
321
+ const idx = questions.findIndex((q) => q.id === questionId);
322
+ if (idx < 0) throw new Error(`unknown questionId ${JSON.stringify(questionId)}`);
323
+ const prior = questions[idx];
324
+ const status = decision.kind === "answer" ? "answered" : decision.kind === "defer" ? "deferred" : "escalated";
325
+ const next = { ...prior, status, decision };
326
+ questions[idx] = next;
327
+ return next;
328
+ };
329
+ const blockingQuestionsForStop = () => {
330
+ if (questionPolicy === "auto" || questionPolicy === "bubble") return [];
331
+ return questions.filter((q) => {
332
+ const blocking = q.urgency === "blocks-step" || q.urgency === "blocks-run";
333
+ if (!blocking) return false;
334
+ if (questionPolicy === "mustDecide") return q.status === "open";
335
+ return q.status !== "answered" && q.status !== "deferred";
336
+ });
270
337
  };
271
338
  const tools = [
272
339
  {
273
340
  name: "spawn_worker",
274
- description: 'Start a worker the operator will drive. `profile` is the worker (or another DRIVER \u2014 drivers-of-drivers are allowed); `task` is what it should do. Reserves the worker\u2019s budget from the conserved pool and FAILS CLOSED when the pool is dry \u2014 so spawning "at will" is bounded by the budget. Returns { workerId } or { error: "budget-exhausted" | "depth-exceeded" }.',
341
+ description: "Start a worker the driver will drive. `profile` is the worker or another driver; `task` is what it should do. Reserves budget from the conserved pool and fails closed.",
275
342
  inputSchema: {
276
343
  type: "object",
277
344
  properties: {
278
- profile: { description: "The worker/driver profile to run (passed to makeWorkerAgent)." },
345
+ profile: { description: "The worker/driver profile to run." },
279
346
  task: { description: "The task the worker should perform." },
280
347
  label: { type: "string", description: "Optional trace label." }
281
348
  },
@@ -293,7 +360,7 @@ function createCoordinationTools(opts) {
293
360
  },
294
361
  {
295
362
  name: "observe_worker",
296
- description: "Inspect a worker you are driving: its live status + conserved spend, and \u2014 once it has settled \u2014 its output artifact (rehydrated from the result blob). Use this to review work before deciding your next move. (In-flight token-level trace is surfaced via the analyst, not here.)",
363
+ description: "Inspect a worker status, spend, and settled output artifact when available.",
297
364
  inputSchema: { type: "object", properties: { workerId: idArg }, required: ["workerId"] },
298
365
  handler: async (raw) => {
299
366
  const id = str(obj(raw).workerId, "workerId");
@@ -310,7 +377,7 @@ function createCoordinationTools(opts) {
310
377
  },
311
378
  {
312
379
  name: "steer_worker",
313
- description: "Steer a RUNNING worker out-of-band \u2014 deliver your next instruction / a course-correction / an interrupt to its inbox. Returns { delivered } \u2014 false if the worker has finished or its harness cannot be steered mid-flight (then spawn a fresh one or wait and re-observe).",
380
+ description: "Deliver an out-of-band instruction to a running worker inbox.",
314
381
  inputSchema: {
315
382
  type: "object",
316
383
  properties: {
@@ -329,23 +396,124 @@ function createCoordinationTools(opts) {
329
396
  },
330
397
  {
331
398
  name: "await_next",
332
- description: 'Wait for the next worker you spawned to FINISH, then read its deployable verdict. This is how you advance: spawn one or more workers, then call await_next to block until the next one settles. Returns { settled: workerId, status: "done"|"down", score, valid } for a finished worker, or { idle: true } when no worker is still running (then spawn more or stop). Workers run concurrently \u2014 spawn a batch, then await_next repeatedly to collect them.',
399
+ description: "Wait for the next spawned worker to settle. Returns { idle: true } when none are live.",
333
400
  inputSchema: { type: "object", properties: {} },
334
401
  handler: async () => {
335
402
  const s = await opts.scope.next();
336
403
  if (!s) return { idle: true };
337
404
  const w = recordSettled(s);
338
- return w.status === "done" ? { settled: w.id, status: "done", score: w.score, valid: w.valid } : { settled: w.id, status: "down", reason: w.reason };
405
+ return w.status === "done" ? {
406
+ settled: w.id,
407
+ status: "done",
408
+ score: w.score,
409
+ valid: w.valid,
410
+ outRef: w.outRef
411
+ } : { settled: w.id, status: "down", reason: w.reason };
412
+ }
413
+ },
414
+ {
415
+ name: "list_questions",
416
+ description: "List questions raised by workers, drivers, or analysts. Blocking stop behavior follows questionPolicy.",
417
+ inputSchema: { type: "object", properties: {} },
418
+ handler: () => Promise.resolve({ questions })
419
+ },
420
+ {
421
+ name: "answer_question",
422
+ description: "Record an answer, deferral, or escalation for a loop question.",
423
+ inputSchema: {
424
+ type: "object",
425
+ properties: {
426
+ questionId: { type: "string" },
427
+ answer: { type: "string" },
428
+ by: { type: "string", description: 'Node id or "user".' },
429
+ deferReason: { type: "string" },
430
+ escalateTo: { type: "string", enum: ["parent", "user"] },
431
+ escalateReason: { type: "string" }
432
+ },
433
+ required: ["questionId"]
434
+ },
435
+ handler: (raw) => {
436
+ const a = obj(raw);
437
+ const questionId = str(a.questionId, "questionId");
438
+ if (typeof a.answer === "string" && a.answer.length > 0) {
439
+ return Promise.resolve({
440
+ question: decideQuestion(questionId, {
441
+ kind: "answer",
442
+ answer: a.answer,
443
+ by: typeof a.by === "string" && a.by.length > 0 ? a.by : "user"
444
+ })
445
+ });
446
+ }
447
+ if (typeof a.deferReason === "string" && a.deferReason.length > 0) {
448
+ return Promise.resolve({
449
+ question: decideQuestion(questionId, {
450
+ kind: "defer",
451
+ reason: a.deferReason
452
+ })
453
+ });
454
+ }
455
+ if (a.escalateTo === "parent" || a.escalateTo === "user") {
456
+ const escalateReason = typeof a.escalateReason === "string" && a.escalateReason.length > 0 ? a.escalateReason : "driver escalated";
457
+ return Promise.resolve({
458
+ question: decideQuestion(questionId, {
459
+ kind: "escalate",
460
+ to: a.escalateTo,
461
+ reason: escalateReason
462
+ })
463
+ });
464
+ }
465
+ throw new Error("answer_question: provide answer, deferReason, or escalateTo");
466
+ }
467
+ },
468
+ {
469
+ name: "ask_parent",
470
+ description: "Raise a question to the parent driver/Pi/user when this driver cannot decide.",
471
+ inputSchema: {
472
+ type: "object",
473
+ properties: {
474
+ from: { type: "string" },
475
+ level: { type: "string", enum: ["worker", "driver", "loop"] },
476
+ question: { type: "string" },
477
+ reason: { type: "string" },
478
+ urgency: { type: "string", enum: ["continue-without", "blocks-step", "blocks-run"] }
479
+ },
480
+ required: ["from", "level", "question", "reason", "urgency"]
481
+ },
482
+ handler: async (raw) => {
483
+ const a = obj(raw);
484
+ const from = str(a.from, "from");
485
+ const q = await emitNewQuestion(
486
+ addQuestion(
487
+ {
488
+ from,
489
+ level: level(a.level),
490
+ question: str(a.question, "question"),
491
+ reason: str(a.reason, "reason"),
492
+ urgency: urgency(a.urgency)
493
+ },
494
+ from,
495
+ { kind: "escalate", to: "parent", reason: "asked parent" }
496
+ )
497
+ );
498
+ return { question: q };
339
499
  }
340
500
  },
341
501
  {
342
502
  name: "stop",
343
- description: "Declare the run complete \u2014 every required change is made and verified. The terminal move.",
503
+ description: "Declare the run complete.",
344
504
  inputSchema: {
345
505
  type: "object",
346
506
  properties: { reason: { type: "string", description: "Why you are stopping." } }
347
507
  },
348
508
  handler: (raw) => {
509
+ const blocking = blockingQuestionsForStop();
510
+ if (blocking.length) {
511
+ return Promise.resolve({
512
+ stopped: false,
513
+ error: "unresolved-blocking-questions",
514
+ questions: blocking
515
+ });
516
+ }
349
517
  stopped = true;
350
518
  const r = obj(raw).reason;
351
519
  reason = typeof r === "string" ? r : void 0;
@@ -353,22 +521,20 @@ function createCoordinationTools(opts) {
353
521
  }
354
522
  }
355
523
  ];
356
- if (opts.analystKinds) {
524
+ if (opts.analysts) {
357
525
  tools.push({
358
526
  name: "list_analysts",
359
- description: "List the trace-analyst lenses available to run over a worker \u2014 id, what each looks for, and its area.",
527
+ description: "List trace-analyst lenses available to run over a settled worker.",
360
528
  inputSchema: { type: "object", properties: {} },
361
- handler: () => Promise.resolve({ analysts: opts.analystKinds })
529
+ handler: () => Promise.resolve({ analysts: opts.analysts?.kinds })
362
530
  });
363
- }
364
- if (opts.runAnalyst) {
365
531
  tools.push({
366
532
  name: "run_analyst",
367
- description: "Apply an analyst LENS to a worker you are driving \u2014 run `kind` over the worker\u2019s trace and return its findings (trace-derived, never score-derived). Use `list_analysts` for the menu; run several lenses to triangulate. The worker must have settled (its trace is read from its output).",
533
+ description: "Apply an analyst lens to a settled worker trace.",
368
534
  inputSchema: {
369
535
  type: "object",
370
536
  properties: {
371
- kind: { type: "string", description: "The analyst kind id (see list_analysts)." },
537
+ kind: { type: "string", description: "The analyst kind id." },
372
538
  workerId: idArg
373
539
  },
374
540
  required: ["kind", "workerId"]
@@ -381,11 +547,17 @@ function createCoordinationTools(opts) {
381
547
  if (!node.outRef)
382
548
  return { error: `worker ${JSON.stringify(id)} has not settled \u2014 no trace to analyze yet` };
383
549
  const trace = await opts.blobs.get(node.outRef);
384
- return { findings: await opts.runAnalyst?.(str(a.kind, "kind"), trace) };
550
+ return { findings: await opts.analysts?.run(str(a.kind, "kind"), trace) };
385
551
  }
386
552
  });
387
553
  }
388
- return { tools, isStopped: () => stopped, stopReason: () => reason, settled: () => ledger };
554
+ return {
555
+ tools,
556
+ isStopped: () => stopped,
557
+ stopReason: () => reason,
558
+ settled: () => ledger,
559
+ questions: () => questions
560
+ };
389
561
  }
390
562
  export {
391
563
  DELEGATE_CODE_DESCRIPTION,