@tangle-network/agent-runtime 0.37.0 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/agent.d.ts +3 -3
  2. package/dist/analyst-loop.d.ts +2 -2
  3. package/dist/analyst-loop.js +3 -257
  4. package/dist/analyst-loop.js.map +1 -1
  5. package/dist/{chunk-T3GJBKHA.js → chunk-7ZECSZ3C.js} +2 -2
  6. package/dist/chunk-AXWGLYSF.js +201 -0
  7. package/dist/chunk-AXWGLYSF.js.map +1 -0
  8. package/dist/chunk-FNMGYYSS.js +60 -0
  9. package/dist/chunk-FNMGYYSS.js.map +1 -0
  10. package/dist/{chunk-V6GURW4W.js → chunk-HSX6PFZR.js} +1 -209
  11. package/dist/chunk-HSX6PFZR.js.map +1 -0
  12. package/dist/{chunk-M65QJD35.js → chunk-PK5DYSNO.js} +5 -3
  13. package/dist/{chunk-M65QJD35.js.map → chunk-PK5DYSNO.js.map} +1 -1
  14. package/dist/chunk-VLXRXMTF.js +212 -0
  15. package/dist/chunk-VLXRXMTF.js.map +1 -0
  16. package/dist/chunk-VOX6Z3II.js +90 -0
  17. package/dist/chunk-VOX6Z3II.js.map +1 -0
  18. package/dist/chunk-XBUG326M.js +261 -0
  19. package/dist/chunk-XBUG326M.js.map +1 -0
  20. package/dist/dynamic-DcrwVGuV.d.ts +106 -0
  21. package/dist/{improvement-adapter-CaZxFxTd.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
  22. package/dist/improvement.d.ts +6 -130
  23. package/dist/improvement.js +4 -85
  24. package/dist/improvement.js.map +1 -1
  25. package/dist/index.d.ts +10 -85
  26. package/dist/index.js +27 -44
  27. package/dist/index.js.map +1 -1
  28. package/dist/{otel-export-DgFMwsVy.d.ts → kb-gate-YdPNEagq.d.ts} +62 -176
  29. package/dist/loop-runner-bin-DgZj0zfJ.d.ts +192 -0
  30. package/dist/loop-runner-bin.d.ts +12 -0
  31. package/dist/loop-runner-bin.js +19 -0
  32. package/dist/loop-runner-bin.js.map +1 -0
  33. package/dist/loops.d.ts +5 -106
  34. package/dist/mcp/bin.js +3 -2
  35. package/dist/mcp/bin.js.map +1 -1
  36. package/dist/mcp/index.d.ts +6 -79
  37. package/dist/mcp/index.js +11 -62
  38. package/dist/mcp/index.js.map +1 -1
  39. package/dist/optimize-prompt-D-urF2wW.d.ts +129 -0
  40. package/dist/otel-export-xgf4J6bo.d.ts +191 -0
  41. package/dist/profiles.d.ts +1 -1
  42. package/dist/{types-CmTjKLyB.d.ts → types-B9O7l-ij.d.ts} +2 -2
  43. package/dist/{types-D_MXrmJP.d.ts → types-p8dWBIXL.d.ts} +1 -1
  44. package/package.json +3 -2
  45. package/dist/chunk-V6GURW4W.js.map +0 -1
  46. /package/dist/{chunk-T3GJBKHA.js.map → chunk-7ZECSZ3C.js.map} +0 -0
@@ -1,7 +1,6 @@
1
1
  import { CoderOutput, CoderTask } from './profiles.js';
2
- import { L as LoopSandboxClient } from './types-CmTjKLyB.js';
2
+ import { L as LoopSandboxClient } from './types-B9O7l-ij.js';
3
3
  import { SandboxInstance } from '@tangle-network/sandbox';
4
- import { O as OpenAIChatTool } from './types-CsCCryln.js';
5
4
 
6
5
  /**
7
6
  * @experimental
@@ -364,189 +363,76 @@ declare function createDefaultCoderDelegate(options: CreateDefaultCoderDelegateO
364
363
  /**
365
364
  * @experimental
366
365
  *
367
- * OpenAI Chat Completions `tools[]` projection of the 5 agent-runtime MCP
368
- * delegation tools.
366
+ * `createKbGate` the valid-only knowledge-base growth gate, distilled from
367
+ * physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
368
+ * writer) runs candidate facts through this before persisting, so the KB grows
369
+ * with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
370
+ * vetoed at the gate.
369
371
  *
370
- * Use when configuring `createOpenAICompatibleBackend({ tools: ... })` so the
371
- * model can call `delegate_code`, `delegate_research`, `delegate_feedback`,
372
- * `delegation_status`, and `delegation_history` through the OpenAI-compat
373
- * transport (tcloud, OpenRouter, OpenAI direct, cli-bridge). The runtime
374
- * surfaces tool calls as `tool_call` stream events execution is the
375
- * caller's responsibility (typically the parent sandbox runtime's MCP
376
- * mount).
372
+ * Fail-closed by construction: every judge must `accept`; the FIRST veto wins
373
+ * and the fact is rejected. The non-negotiable floor (always on, can't be
374
+ * disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
375
+ * literally appear in its `sourceText`. That single check kills the dominant
376
+ * failure mode (a confident claim decoupled from any real source).
377
377
  *
378
- * Sandbox-SDK callers do NOT need this helper: the sandbox runtime mounts
379
- * MCP servers natively and the in-sandbox harness discovers tools via the
380
- * runtime, not via an OpenAI tools array.
381
- *
382
- * Tool name + description + JSON-schema are pulled from the canonical
383
- * `DELEGATE_*` constants exported by `./tools/*` so the projection cannot
384
- * drift from the server's own validators.
385
- */
386
-
387
- /**
388
- * @experimental
389
- *
390
- * Returns the 5 delegation tools projected into OpenAI Chat Completions
391
- * `tools[]` shape. The order is stable: `delegate_code`,
392
- * `delegate_research`, `delegate_feedback`, `delegation_status`,
393
- * `delegation_history`.
394
- */
395
- declare function mcpToolsForRuntimeMcp(): OpenAIChatTool[];
396
- /**
397
- * @experimental
398
- *
399
- * Subset filter — return only the projected tools whose `function.name`
400
- * appears in `names`. Useful for curated mounts (e.g. only the queue-bound
401
- * delegation tools, omitting `delegate_feedback`). Unknown names are
402
- * silently ignored; pass an empty array to get an empty result.
378
+ * Pure + dependency-free: it operates on fact candidates, not on a store, so it
379
+ * composes with `@tangle-network/agent-knowledge` or any persistence layer
380
+ * without importing it. The remediation policy (correct-on-veto vs
381
+ * escalate-as-unverified) is the caller's — this returns the verdict; it never
382
+ * drops a fact silently.
403
383
  */
404
- declare function mcpToolsForRuntimeMcpSubset(names: ReadonlyArray<string>): OpenAIChatTool[];
405
-
406
- /**
407
- * OTEL span exporter — streams LoopTraceEvents to an OTLP/HTTP collector.
408
- *
409
- * Reads OTEL_EXPORTER_OTLP_ENDPOINT + OTEL_EXPORTER_OTLP_HEADERS from env
410
- * when no explicit config is given. Keeps the runtime dep-free from
411
- * @opentelemetry/sdk-trace-base — minimal OTLP/JSON serializer.
412
- *
413
- * The exporter accepts both raw OtelSpan objects and LoopTraceEvents
414
- * (which get converted to OTLP spans automatically).
415
- */
416
- interface OtelExportConfig {
417
- /** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */
418
- endpoint?: string;
419
- /** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */
420
- headers?: Record<string, string>;
421
- /** Batch size before flush. Default 64. */
422
- batchSize?: number;
423
- /** Flush interval ms. Default 5000. */
424
- flushIntervalMs?: number;
425
- /** Resource attributes stamped on every export. */
426
- resourceAttributes?: Record<string, string | number | boolean>;
427
- /** Service name. Default 'agent-runtime'. */
428
- serviceName?: string;
384
+ /** @experimental A fact proposed for the KB, with its grounding. */
385
+ interface FactCandidate {
386
+ /** The atomic claim text. */
387
+ claim: string;
388
+ /** Optional extracted value (number or string) the claim asserts. */
389
+ value?: string | number;
390
+ /** Verbatim span lifted from the source that backs the claim. */
391
+ verbatimPassage: string;
392
+ /** The raw source text the passage must be grounded in. */
393
+ sourceText: string;
394
+ /** Where the fact claims to come from — checked for circular/self citations. */
395
+ citation?: string;
429
396
  }
430
- interface OtelExporter {
431
- /** Export a span. */
432
- exportSpan(span: OtelSpan): void;
433
- /** Force flush pending spans. */
434
- flush(): Promise<void>;
435
- /** Shutdown cleanly. */
436
- shutdown(): Promise<void>;
397
+ /** @experimental */
398
+ interface FactJudgeVerdict {
399
+ accept: boolean;
400
+ reason?: string;
437
401
  }
438
- interface OtelSpan {
439
- traceId: string;
440
- spanId: string;
441
- parentSpanId?: string;
402
+ /** @experimental A pluggable fact validator. Throw is NOT allowed — return a
403
+ * verdict; a thrown judge is a programmer error, not a veto. */
404
+ interface FactJudge {
442
405
  name: string;
443
- kind?: number;
444
- startTimeUnixNano: string;
445
- endTimeUnixNano: string;
446
- attributes?: OtelAttribute[];
447
- status?: {
448
- code: number;
449
- message?: string;
450
- };
451
- }
452
- interface OtelAttribute {
453
- key: string;
454
- value: {
455
- stringValue?: string;
456
- intValue?: string;
457
- doubleValue?: number;
458
- boolValue?: boolean;
459
- };
406
+ judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>;
460
407
  }
461
- /**
462
- * Create an OTEL exporter. Returns undefined when no endpoint is configured.
463
- */
464
- declare function createOtelExporter(config?: OtelExportConfig): OtelExporter | undefined;
465
- /**
466
- * Convert a LoopTraceEvent into an OtelSpan for export.
467
- */
468
- declare function loopEventToOtelSpan(event: {
469
- kind: string;
470
- runId: string;
471
- timestamp: number;
472
- payload: object;
473
- }, traceId: string, parentSpanId?: string): OtelSpan;
474
- /**
475
- * Build a nested, real-duration OTLP span tree for ONE loop run from its full
476
- * ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
477
- * zero-duration span per event), this reconstructs the topology hierarchy a
478
- * GenAI trace viewer renders natively:
479
- *
480
- * loop (invoke_workflow)
481
- * └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
482
- * ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
483
- * └─ …
484
- *
485
- * Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
486
- * a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
487
- * verdict / placement / cost (not yet standardized). Pure: feed it a buffered
488
- * per-runId event array (e.g. flushed on `loop.ended`) and export the result.
489
- */
490
- declare function buildLoopOtelSpans(events: ReadonlyArray<{
491
- kind: string;
492
- runId: string;
493
- timestamp: number;
494
- payload: object;
495
- }>, traceId: string, rootParentSpanId?: string): OtelSpan[];
496
- /** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
497
- declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
498
- interface EvalRunGeneration {
499
- /** 0-based ordinal of this generation within the run (required by ingest). */
500
- index: number;
501
- /** Identity of the proposed surface change (content-addressed hash). */
502
- surfaceHash: string;
503
- /** Arbitrary provenance for this generation (rationale, evidence, source). */
504
- surface?: unknown;
505
- /** Per-scenario results; empty until the generation is measured. */
506
- cells?: unknown[];
507
- /** Mean composite score (0 when unmeasured — pair with labels.measured). */
508
- compositeMean: number;
509
- costUsd: number;
510
- durationMs: number;
511
- }
512
- interface EvalRunEvent {
513
- runId: string;
514
- runDir: string;
515
- /** ISO timestamp. */
516
- timestamp: string;
517
- status: 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
518
- labels?: Record<string, string>;
519
- baseline?: EvalRunGeneration;
520
- generations?: EvalRunGeneration[];
521
- gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling';
522
- holdoutLift?: number;
523
- totalCostUsd: number;
524
- totalDurationMs: number;
525
- errorMessage?: string;
526
- }
527
- interface EvalRunsExportConfig {
528
- /** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */
529
- apiKey?: string;
530
- /** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */
531
- base?: string;
532
- /** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */
533
- idempotencyKey?: string;
408
+ /** @experimental */
409
+ interface KbGateResult {
410
+ accepted: boolean;
411
+ /** Name of the judge that vetoed; undefined when accepted. */
412
+ vetoedBy?: string;
413
+ reason?: string;
534
414
  }
535
- interface EvalRunsExportResult {
536
- ok: boolean;
537
- status: number;
538
- accepted: number;
539
- rejected: Array<{
540
- index: number;
541
- reason: string;
542
- }>;
415
+ /** @experimental */
416
+ interface CreateKbGateOptions {
417
+ /** Extra judges appended after the built-in floor (e.g. an LLM judge). */
418
+ judges?: FactJudge[];
419
+ /** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
420
+ minPassageChars?: number;
421
+ /**
422
+ * Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
423
+ * `'cad_params'`, `'requirements'`). A citation naming one is circular
424
+ * (laundering) — the fact cites a derived artifact, not a real source.
425
+ * Default `[]` (no circular check unless the consumer declares its kinds).
426
+ */
427
+ selfArtifactKinds?: string[];
543
428
  }
544
429
  /**
545
- * Ship self-improvement eval-run events to Tangle Intelligence. Unlike the
546
- * best-effort span exporter, this RESOLVES with the ingest verdict (accepted /
547
- * rejected per event) so a consumer's loop can assert its provenance landed.
548
- * Throws only on a missing key or network failure.
430
+ * @experimental
431
+ *
432
+ * Build a fail-closed KB gate. The returned function runs the built-in floor
433
+ * (passage-non-empty passage-present value-in-passage no-circular-citation)
434
+ * then any consumer judges, returning on the first veto.
549
435
  */
550
- declare function exportEvalRuns(events: EvalRunEvent[], config?: EvalRunsExportConfig): Promise<EvalRunsExportResult>;
436
+ declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
551
437
 
552
- export { type FeedbackRefersTo as A, type FleetWorkspaceExecutorOptions as B, type CoderDelegate as C, type DelegationExecutor as D, type ResearchOutputShape as E, type FleetHandle as F, createDefaultCoderDelegate as G, createFleetWorkspaceExecutor as H, createSiblingSandboxExecutor as I, mcpToolsForRuntimeMcp as J, mcpToolsForRuntimeMcpSubset as K, type EvalRunEvent as L, type EvalRunGeneration as M, type EvalRunsExportConfig as N, type OtelExporter as O, type EvalRunsExportResult as P, INTELLIGENCE_WIRE_VERSION as Q, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, type OtelAttribute as T, type OtelExportConfig as U, type OtelSpan as V, buildLoopOtelSpans as W, createOtelExporter as X, exportEvalRuns as Y, loopEventToOtelSpan as Z, type DelegateFeedbackArgs as a, type DelegationFeedbackSnapshot as b, type DelegationProfile as c, type DelegateCodeArgs as d, type DelegateResearchArgs as e, type DelegationStatus as f, type DelegationProgress as g, type DelegationResultPayload as h, type DelegationError as i, type DelegationStatusResult as j, type DelegationHistoryArgs as k, type DelegationHistoryEntry as l, type DelegateCodeResult as m, type DelegateFeedbackResult as n, type ResearchSource as o, type DelegateResearchResult as p, type DelegationHistoryResult as q, type DelegationStatusArgs as r, type CoderReview as s, type CoderReviewer as t, type CoderWinnerSelection as u, type CreateDefaultCoderDelegateOptions as v, type DelegateCodeConfig as w, type DelegateResearchConfig as x, type DelegateRunCtx as y, type FeedbackRating as z };
438
+ export { type DelegateRunCtx as A, type FactJudge as B, type CoderReviewer as C, type DelegateCodeArgs as D, type FactJudgeVerdict as E, type FactCandidate as F, type FeedbackRating as G, type FeedbackRefersTo as H, type FleetWorkspaceExecutorOptions as I, type ResearchOutputShape as J, type KbGateResult as K, createDefaultCoderDelegate as L, createFleetWorkspaceExecutor as M, createKbGate as N, createSiblingSandboxExecutor as O, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, type CoderWinnerSelection as a, type CreateKbGateOptions as b, type FleetHandle as c, type DelegationExecutor as d, type DelegateFeedbackArgs as e, type DelegationFeedbackSnapshot as f, type DelegationProfile as g, type DelegateResearchArgs as h, type DelegationStatus as i, type DelegationProgress as j, type DelegationResultPayload as k, type DelegationError as l, type DelegationStatusResult as m, type DelegationHistoryArgs as n, type DelegationHistoryEntry as o, type CoderDelegate as p, type DelegateCodeResult as q, type DelegateFeedbackResult as r, type ResearchSource as s, type DelegateResearchResult as t, type DelegationHistoryResult as u, type DelegationStatusArgs as v, type CoderReview as w, type CreateDefaultCoderDelegateOptions as x, type DelegateCodeConfig as y, type DelegateResearchConfig as z };
@@ -0,0 +1,192 @@
1
+ import { Scenario } from '@tangle-network/agent-eval/campaign';
2
+ import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
3
+ import { O as OptimizePromptOptions, a as OptimizePromptResult } from './optimize-prompt-D-urF2wW.js';
4
+ import { T as TopologyPlanner, D as DynamicDecision } from './dynamic-DcrwVGuV.js';
5
+ import { L as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec, a as LoopResult } from './types-B9O7l-ij.js';
6
+ import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-YdPNEagq.js';
7
+ import { CoderOutput } from './profiles.js';
8
+
9
+ /**
10
+ * @experimental
11
+ *
12
+ * `runDelegatedLoop` — the configured delegated loop-runner.
13
+ *
14
+ * One typed entrypoint a worker agent (or a scheduled routine) calls to run a
15
+ * disciplined loop in a chosen MODE, over agent-runtime's hardened engines:
16
+ *
17
+ * code → build-in-a-loop via the coder delegate (no-op + secret floor,
18
+ * optional reviewer gate, winner-selection)
19
+ * review → code mode with a REQUIRED reviewer (the gate is the point)
20
+ * research → research-in-a-loop with valid-only KB growth (createKbGate)
21
+ * audit → analyze trace/run data → findings (runAnalystLoop, caller-wired)
22
+ * self-improve → identity-gated prompt optimization (optimizePrompt, caller-wired)
23
+ * dynamic → agent-authored topology (runLoop + createDynamicDriver)
24
+ *
25
+ * It is intentionally a thin façade: the value is that EVERY product reuses the
26
+ * one hardened engine instead of forking delegation logic. The dispatcher owns
27
+ * mode routing, timing, fail-loud on an unregistered mode, and a uniform result
28
+ * shape; each mode's engine is a pre-configured runner in the registry (build it
29
+ * with the factories below, or inject your own / a stub).
30
+ */
31
+
32
+ /** @experimental Every delegated-loop mode, for validation + CLI surfaces. */
33
+ declare const DELEGATED_LOOP_MODES: readonly ["code", "review", "research", "audit", "self-improve", "dynamic"];
34
+ /** @experimental */
35
+ type DelegatedLoopMode = (typeof DELEGATED_LOOP_MODES)[number];
36
+ /** @experimental Type guard for an untrusted mode string (CLI / config input). */
37
+ declare function isDelegatedLoopMode(value: unknown): value is DelegatedLoopMode;
38
+ /** @experimental A pre-configured loop for one mode. Returns the mode's raw
39
+ * output; the dispatcher wraps it in a {@link DelegatedLoopResult}. */
40
+ type DelegatedLoopRunner<T = unknown> = (signal: AbortSignal) => Promise<T>;
41
+ /** @experimental Mode → configured runner. Partial: only register the modes a
42
+ * given product/routine actually uses. */
43
+ type DelegatedLoopRegistry = Partial<Record<DelegatedLoopMode, DelegatedLoopRunner>>;
44
+ /** @experimental Uniform result — never throws from a registered runner; a
45
+ * thrown engine becomes `{ ok: false, error }` so a routine can record + move on. */
46
+ interface DelegatedLoopResult<T = unknown> {
47
+ mode: DelegatedLoopMode;
48
+ ok: boolean;
49
+ output?: T;
50
+ error?: string;
51
+ durationMs: number;
52
+ }
53
+ /** @experimental */
54
+ interface RunDelegatedLoopOptions {
55
+ signal?: AbortSignal;
56
+ /** Clock override for deterministic tests. */
57
+ now?: () => number;
58
+ }
59
+ /**
60
+ * @experimental
61
+ *
62
+ * Dispatch a configured loop by mode. Fails loud (throws `ConfigError`) when no
63
+ * runner is registered for the mode — a routine pointed at an unwired mode is a
64
+ * config bug, not a silent no-op. A runner that throws is captured as
65
+ * `{ ok: false }` so unattended runs record the failure rather than crash.
66
+ */
67
+ declare function runDelegatedLoop<T = unknown>(mode: DelegatedLoopMode, registry: DelegatedLoopRegistry, options?: RunDelegatedLoopOptions): Promise<DelegatedLoopResult<T>>;
68
+ /** @experimental Options for the default `code`/`review` runner. */
69
+ interface CoderLoopRunnerOptions {
70
+ sandboxClient: LoopSandboxClient;
71
+ /** What to build — the delegate args (goal, repoRoot, variants, config, …). */
72
+ args: DelegateCodeArgs;
73
+ /** Adversarial reviewer. REQUIRED for `review` mode (see `reviewLoopRunner`). */
74
+ reviewer?: CoderReviewer;
75
+ /** Winner-selection strategy. Default `highest-score`. */
76
+ winnerSelection?: CoderWinnerSelection;
77
+ /** Harnesses for `variants > 1` fanout. */
78
+ fanoutHarnesses?: string[];
79
+ }
80
+ /** @experimental Build a `code`-mode runner over the hardened coder delegate. */
81
+ declare function coderLoopRunner(options: CoderLoopRunnerOptions): DelegatedLoopRunner<CoderOutput>;
82
+ /**
83
+ * @experimental
84
+ *
85
+ * `review` mode = `code` with a REQUIRED reviewer. The gate is the whole point,
86
+ * so the type forces a reviewer (a "review loop" with no reviewer is a code loop).
87
+ */
88
+ declare function reviewLoopRunner(options: CoderLoopRunnerOptions & {
89
+ reviewer: CoderReviewer;
90
+ }): DelegatedLoopRunner<CoderOutput>;
91
+ /** @experimental Options for the default `dynamic` runner. */
92
+ interface DynamicLoopRunnerOptions<Task, Output> {
93
+ sandboxClient: LoopSandboxClient;
94
+ /** The agent-authored topology planner (e.g. `createSandboxPlanner(...)`). */
95
+ planner: TopologyPlanner<Task, Output>;
96
+ task: Task;
97
+ output: OutputAdapter<Output>;
98
+ validator?: Validator<Output>;
99
+ /** Exactly one of `agentRun` / `agentRuns` (runLoop validates). */
100
+ agentRun?: AgentRunSpec<Task>;
101
+ agentRuns?: AgentRunSpec<Task>[];
102
+ maxIterations?: number;
103
+ maxFanout?: number;
104
+ }
105
+ /** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */
106
+ declare function dynamicLoopRunner<Task, Output>(o: DynamicLoopRunnerOptions<Task, Output>): DelegatedLoopRunner<LoopResult<Task, Output, DynamicDecision>>;
107
+ /** @experimental A fact rejected at the KB gate — surfaced, never dropped. */
108
+ interface VetoedFact {
109
+ candidate: FactCandidate;
110
+ vetoedBy?: string;
111
+ reason?: string;
112
+ }
113
+ /** @experimental */
114
+ interface ResearchLoopResult {
115
+ /** Facts that passed the fail-closed gate — safe to write to the KB. */
116
+ accepted: FactCandidate[];
117
+ /** Facts the gate vetoed in the final round — escalate, do not silently drop. */
118
+ vetoed: VetoedFact[];
119
+ /** Research rounds actually run. */
120
+ rounds: number;
121
+ }
122
+ /** @experimental Options for the default `research` runner. */
123
+ interface ResearchLoopRunnerOptions {
124
+ /**
125
+ * The research engine (the consumer's web/doc searcher + extractor). Called
126
+ * each round with the prior round's vetoes so it can re-research the gaps.
127
+ * Returns fact candidates carrying their grounding (`verbatimPassage` +
128
+ * `sourceText`).
129
+ */
130
+ research: (round: number, vetoed: VetoedFact[]) => Promise<FactCandidate[]>;
131
+ /** Gate config (extra judges, self-artifact kinds, …). The floor is always on. */
132
+ gate?: CreateKbGateOptions;
133
+ /** Max research rounds (correct-on-veto remediation). Default 1. */
134
+ maxRounds?: number;
135
+ }
136
+ /**
137
+ * @experimental `research` mode — research-in-a-loop with valid-only KB growth.
138
+ *
139
+ * Each round: research → gate every candidate (fail-closed; passage MUST be in
140
+ * the source) → accept the clean ones → re-research the vetoed ones next round,
141
+ * up to `maxRounds`. Vetoed facts in the final round are RETURNED (escalate,
142
+ * never silently dropped) so the caller audits vs retries.
143
+ */
144
+ declare function researchLoopRunner(o: ResearchLoopRunnerOptions): DelegatedLoopRunner<ResearchLoopResult>;
145
+ /** @experimental `self-improve` mode — identity-gated prompt optimization. */
146
+ declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options: OptimizePromptOptions<TScenario, TArtifact>): DelegatedLoopRunner<OptimizePromptResult<TArtifact, TScenario>>;
147
+ /** @experimental `audit` mode — analyst loop over captured trace/run data. */
148
+ declare function auditLoopRunner<TProposal = unknown, TEdit = unknown>(options: RunAnalystLoopOpts): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>>;
149
+
150
+ /**
151
+ * @experimental
152
+ *
153
+ * `agent-runtime-loop` — the schedulable entrypoint for the configured
154
+ * delegated loop-runner. A cron job / routine / Makefile target invokes:
155
+ *
156
+ * agent-runtime-loop --mode research --config ./loops.config.js
157
+ *
158
+ * The config module wires the registry (with full access to env / creds —
159
+ * which is why the deps live there, not in this generic bin). It must default-
160
+ * export a `DelegatedLoopRegistry`, or a `() => DelegatedLoopRegistry | Promise<…>`.
161
+ * The bin runs the selected mode, prints the `DelegatedLoopResult` as JSON, and
162
+ * exits 0 on `ok`, 1 on a recorded failure, 2 on a usage/config error.
163
+ */
164
+
165
+ /** @experimental Parsed CLI invocation. */
166
+ interface LoopRunnerCliArgs {
167
+ mode: string;
168
+ /** Loads the registry — the bin wires this from `--config`; tests inject a stub. */
169
+ loadRegistry: () => Promise<DelegatedLoopRegistry> | DelegatedLoopRegistry;
170
+ now?: () => number;
171
+ }
172
+ /** @experimental */
173
+ interface LoopRunnerCliResult {
174
+ exitCode: number;
175
+ result?: DelegatedLoopResult;
176
+ error?: string;
177
+ }
178
+ /**
179
+ * @experimental
180
+ *
181
+ * Pure CLI core (no process / argv / IO) so it's unit-testable: validate the
182
+ * mode, load the registry, dispatch, map to an exit code (0 ok / 1 failed /
183
+ * 2 usage). Exported for embedding in custom runners + tests.
184
+ */
185
+ declare function runLoopRunnerCli(args: LoopRunnerCliArgs): Promise<LoopRunnerCliResult>;
186
+ /** Parse `--mode X --config Y` from an argv tail (`process.argv.slice(2)`). */
187
+ declare function parseLoopRunnerArgv(argv: string[]): {
188
+ mode?: string;
189
+ config?: string;
190
+ };
191
+
192
+ export { type CoderLoopRunnerOptions as C, DELEGATED_LOOP_MODES as D, type LoopRunnerCliArgs as L, type ResearchLoopResult as R, type VetoedFact as V, type DelegatedLoopMode as a, type DelegatedLoopRegistry as b, type DelegatedLoopResult as c, type DelegatedLoopRunner as d, type DynamicLoopRunnerOptions as e, type LoopRunnerCliResult as f, type ResearchLoopRunnerOptions as g, type RunDelegatedLoopOptions as h, auditLoopRunner as i, coderLoopRunner as j, dynamicLoopRunner as k, isDelegatedLoopMode as l, reviewLoopRunner as m, runDelegatedLoop as n, runLoopRunnerCli as o, parseLoopRunnerArgv as p, researchLoopRunner as r, selfImproveLoopRunner as s };
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env node
2
+ export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-DgZj0zfJ.js';
3
+ import '@tangle-network/agent-eval/campaign';
4
+ import './types-p8dWBIXL.js';
5
+ import '@tangle-network/agent-eval';
6
+ import './optimize-prompt-D-urF2wW.js';
7
+ import './dynamic-DcrwVGuV.js';
8
+ import './types-B9O7l-ij.js';
9
+ import '@tangle-network/sandbox';
10
+ import './types-CsCCryln.js';
11
+ import './kb-gate-YdPNEagq.js';
12
+ import './profiles.js';
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ parseLoopRunnerArgv,
4
+ runLoopRunnerCli
5
+ } from "./chunk-AXWGLYSF.js";
6
+ import "./chunk-XBUG326M.js";
7
+ import "./chunk-VOX6Z3II.js";
8
+ import "./chunk-FNMGYYSS.js";
9
+ import "./chunk-VLXRXMTF.js";
10
+ import "./chunk-7JBDJQLO.js";
11
+ import "./chunk-3HMHSN22.js";
12
+ import "./chunk-PY6NMZYX.js";
13
+ import "./chunk-SQSCRJ7U.js";
14
+ import "./chunk-DGUM43GV.js";
15
+ export {
16
+ parseLoopRunnerArgv,
17
+ runLoopRunnerCli
18
+ };
19
+ //# sourceMappingURL=loop-runner-bin.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
package/dist/loops.d.ts CHANGED
@@ -1,115 +1,14 @@
1
1
  import { AgentProfile, SandboxEvent } from '@tangle-network/sandbox';
2
2
  export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
3
- import { I as Iteration, D as Driver, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, a as LoopWinner, b as LoopResult } from './types-CmTjKLyB.js';
4
- export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as LoopPlanDescription, i as LoopPlanPayload, j as LoopSandboxPlacement, k as LoopStartedPayload, l as LoopTokenUsage, m as LoopTraceEmitter, n as LoopTraceEvent, o as ValidationCtx } from './types-CmTjKLyB.js';
3
+ import { P as PlannerContext, T as TopologyPlanner } from './dynamic-DcrwVGuV.js';
4
+ export { C as CreateDynamicDriverOptions, D as DynamicDecision, a as TopologyMove, c as createDynamicDriver, s as summarizeHistory } from './dynamic-DcrwVGuV.js';
5
+ import { D as Driver, I as Iteration, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, b as LoopWinner, a as LoopResult } from './types-B9O7l-ij.js';
6
+ export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as LoopPlanDescription, i as LoopPlanPayload, j as LoopSandboxPlacement, k as LoopStartedPayload, l as LoopTokenUsage, m as LoopTraceEmitter, n as LoopTraceEvent, o as ValidationCtx } from './types-B9O7l-ij.js';
5
7
  import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
6
8
  export { DefaultVerdict } from '@tangle-network/agent-eval';
7
9
  import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
8
10
  import './types-CsCCryln.js';
9
11
 
10
- /**
11
- * @experimental
12
- *
13
- * Dynamic driver — the agent authors the loop topology at runtime.
14
- *
15
- * Where `refine` and `fanout-vote` encode a fixed shape as a pure function of
16
- * history, this driver delegates the per-round shape to an injected
17
- * `TopologyPlanner`. Each round the planner inspects the task + iteration
18
- * history and emits one `TopologyMove`:
19
- * - `refine` → one task next round (optionally rewritten from the prior attempt)
20
- * - `fanout` → N tasks next round (the kernel round-robins `agentRuns`, so a
21
- * 2-harness fanout dispatches branch 0 to harness A and branch 1 to harness B)
22
- * - `stop` → terminate; the kernel selects the winner across all iterations
23
- *
24
- * The planner is the brain; this driver is the structure. It maps moves onto
25
- * the kernel's `plan`/`decide` contract, enforces the iteration + fanout caps,
26
- * and fails loud on a malformed move. The planner is injected exactly like
27
- * `refine`'s `refineTask` and `fanout-vote`'s `selector` — so a test can drive
28
- * a deterministic policy through the real kernel, and production can wire it to
29
- * an LLM via `createSandboxPlanner`.
30
- *
31
- * Topology is orthogonal to harness: the planner never names a backend. Which
32
- * harness runs a branch is decided by the `AgentRunSpec` the kernel round-robins
33
- * to, so one dynamic driver works across claude-code, codex, opencode, pi —
34
- * including fanning a single round across several at once.
35
- */
36
-
37
- /** Terminal once `decide` returns `'done'` (a kernel terminal decision). */
38
- type DynamicDecision = 'continue' | 'done';
39
- /**
40
- * One topology decision for the next round. `fanout` carries explicit tasks
41
- * rather than a count so the planner can issue heterogeneous branches (a
42
- * different sub-task per harness); pass N copies of one task for a homogeneous
43
- * fanout that relies on `agentRuns` diversity instead.
44
- *
45
- * @experimental
46
- */
47
- type TopologyMove<Task> = {
48
- kind: 'refine';
49
- task: Task;
50
- rationale?: string;
51
- } | {
52
- kind: 'fanout';
53
- tasks: Task[];
54
- rationale?: string;
55
- } | {
56
- kind: 'stop';
57
- rationale?: string;
58
- };
59
- /** @experimental */
60
- interface PlannerContext<Task, Output> {
61
- /** The root task the loop was invoked with — stable across rounds. */
62
- task: Task;
63
- /** Every iteration so far, in dispatch order, with outputs + verdicts. */
64
- history: ReadonlyArray<Iteration<Task, Output>>;
65
- /** `history.length` — iterations already spent. */
66
- iterationsSpent: number;
67
- /** Iterations left before the driver's `maxIterations` cap forces a stop. */
68
- iterationsRemaining: number;
69
- }
70
- /**
71
- * Chooses the next topology move from the task + history. Sync or async; an
72
- * async planner is where an LLM call goes (see `createSandboxPlanner`).
73
- *
74
- * @experimental
75
- */
76
- type TopologyPlanner<Task, Output> = (ctx: PlannerContext<Task, Output>) => TopologyMove<Task> | Promise<TopologyMove<Task>>;
77
- /** @experimental */
78
- interface CreateDynamicDriverOptions<Task, Output> {
79
- /** The agent-authored topology policy. Invoked once per round in `plan`. */
80
- planner: TopologyPlanner<Task, Output>;
81
- /**
82
- * Hard safety cap on total iterations. When reached, the driver stops before
83
- * consulting the planner. Default 8. Set the kernel's `runLoop`
84
- * `maxIterations >= ` this so the driver's cap governs and the loop closes on
85
- * a clean `'done'` rather than a truncated `'continue'`.
86
- */
87
- maxIterations?: number;
88
- /** Max branches a single `fanout` move may dispatch. Default 4. */
89
- maxFanout?: number;
90
- /** Stable identifier surfaced in trace events. Default `'dynamic'`. */
91
- name?: string;
92
- }
93
- /** @experimental */
94
- declare function createDynamicDriver<Task, Output>(options: CreateDynamicDriverOptions<Task, Output>): Driver<Task, Output, DynamicDecision>;
95
- /**
96
- * Compact, planner-friendly view of iteration history — what an LLM planner
97
- * needs to choose the next move without the raw event streams. Output is
98
- * truncated so a long run's prompt stays bounded.
99
- *
100
- * @experimental
101
- */
102
- declare function summarizeHistory<Task, Output>(history: ReadonlyArray<Iteration<Task, Output>>, opts?: {
103
- maxOutputChars?: number;
104
- }): Array<{
105
- index: number;
106
- agentRunName: string;
107
- valid?: boolean;
108
- score?: number;
109
- error?: string;
110
- output?: string;
111
- }>;
112
-
113
12
  /**
114
13
  * @experimental
115
14
  *
@@ -413,4 +312,4 @@ interface UsageSink {
413
312
  */
414
313
  declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
415
314
 
416
- export { AgentRunSpec, type CreateDynamicDriverOptions, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, type DynamicDecision, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, type PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMove, type TopologyMoveEnvelope, type TopologyPlanner, type UsageSink, Validator, createDynamicDriver, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, loopCampaignDispatch, loopDispatch, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations, summarizeHistory };
315
+ export { AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, Validator, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, loopCampaignDispatch, loopDispatch, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations };
package/dist/mcp/bin.js CHANGED
@@ -2,10 +2,11 @@
2
2
  import {
3
3
  createMcpServer,
4
4
  detectExecutor
5
- } from "../chunk-M65QJD35.js";
5
+ } from "../chunk-PK5DYSNO.js";
6
+ import "../chunk-HSX6PFZR.js";
6
7
  import {
7
8
  createDefaultCoderDelegate
8
- } from "../chunk-V6GURW4W.js";
9
+ } from "../chunk-VLXRXMTF.js";
9
10
  import "../chunk-GLR25NG7.js";
10
11
  import {
11
12
  runLoop