@tangle-network/agent-runtime 0.37.0 → 0.39.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.d.ts +3 -3
- package/dist/analyst-loop.d.ts +2 -2
- package/dist/analyst-loop.js +3 -257
- package/dist/analyst-loop.js.map +1 -1
- package/dist/{chunk-T3GJBKHA.js → chunk-7ZECSZ3C.js} +2 -2
- package/dist/chunk-AXWGLYSF.js +201 -0
- package/dist/chunk-AXWGLYSF.js.map +1 -0
- package/dist/chunk-FNMGYYSS.js +60 -0
- package/dist/chunk-FNMGYYSS.js.map +1 -0
- package/dist/{chunk-V6GURW4W.js → chunk-HSX6PFZR.js} +1 -209
- package/dist/chunk-HSX6PFZR.js.map +1 -0
- package/dist/{chunk-M65QJD35.js → chunk-PK5DYSNO.js} +5 -3
- package/dist/{chunk-M65QJD35.js.map → chunk-PK5DYSNO.js.map} +1 -1
- package/dist/chunk-VLXRXMTF.js +212 -0
- package/dist/chunk-VLXRXMTF.js.map +1 -0
- package/dist/chunk-VOX6Z3II.js +90 -0
- package/dist/chunk-VOX6Z3II.js.map +1 -0
- package/dist/chunk-XBUG326M.js +261 -0
- package/dist/chunk-XBUG326M.js.map +1 -0
- package/dist/dynamic-DcrwVGuV.d.ts +106 -0
- package/dist/{improvement-adapter-CaZxFxTd.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
- package/dist/improvement.d.ts +6 -130
- package/dist/improvement.js +4 -85
- package/dist/improvement.js.map +1 -1
- package/dist/index.d.ts +10 -85
- package/dist/index.js +27 -44
- package/dist/index.js.map +1 -1
- package/dist/{otel-export-DgFMwsVy.d.ts → kb-gate-YdPNEagq.d.ts} +62 -176
- package/dist/loop-runner-bin-DgZj0zfJ.d.ts +192 -0
- package/dist/loop-runner-bin.d.ts +12 -0
- package/dist/loop-runner-bin.js +19 -0
- package/dist/loop-runner-bin.js.map +1 -0
- package/dist/loops.d.ts +5 -106
- package/dist/mcp/bin.js +3 -2
- package/dist/mcp/bin.js.map +1 -1
- package/dist/mcp/index.d.ts +6 -79
- package/dist/mcp/index.js +11 -62
- package/dist/mcp/index.js.map +1 -1
- package/dist/optimize-prompt-D-urF2wW.d.ts +129 -0
- package/dist/otel-export-xgf4J6bo.d.ts +191 -0
- package/dist/profiles.d.ts +1 -1
- package/dist/{types-CmTjKLyB.d.ts → types-B9O7l-ij.d.ts} +2 -2
- package/dist/{types-D_MXrmJP.d.ts → types-p8dWBIXL.d.ts} +1 -1
- package/package.json +3 -2
- package/dist/chunk-V6GURW4W.js.map +0 -1
- /package/dist/{chunk-T3GJBKHA.js.map → chunk-7ZECSZ3C.js.map} +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { CoderOutput, CoderTask } from './profiles.js';
|
|
2
|
-
import { L as LoopSandboxClient } from './types-
|
|
2
|
+
import { L as LoopSandboxClient } from './types-B9O7l-ij.js';
|
|
3
3
|
import { SandboxInstance } from '@tangle-network/sandbox';
|
|
4
|
-
import { O as OpenAIChatTool } from './types-CsCCryln.js';
|
|
5
4
|
|
|
6
5
|
/**
|
|
7
6
|
* @experimental
|
|
@@ -364,189 +363,76 @@ declare function createDefaultCoderDelegate(options: CreateDefaultCoderDelegateO
|
|
|
364
363
|
/**
|
|
365
364
|
* @experimental
|
|
366
365
|
*
|
|
367
|
-
*
|
|
368
|
-
*
|
|
366
|
+
* `createKbGate` — the valid-only knowledge-base growth gate, distilled from
|
|
367
|
+
* physim's KB-research subsystem. A research-in-a-loop delegate (or any KB
|
|
368
|
+
* writer) runs candidate facts through this before persisting, so the KB grows
|
|
369
|
+
* with ONLY grounded facts — hallucinated, unsourced, or laundered claims are
|
|
370
|
+
* vetoed at the gate.
|
|
369
371
|
*
|
|
370
|
-
*
|
|
371
|
-
*
|
|
372
|
-
*
|
|
373
|
-
*
|
|
374
|
-
*
|
|
375
|
-
* caller's responsibility (typically the parent sandbox runtime's MCP
|
|
376
|
-
* mount).
|
|
372
|
+
* Fail-closed by construction: every judge must `accept`; the FIRST veto wins
|
|
373
|
+
* and the fact is rejected. The non-negotiable floor (always on, can't be
|
|
374
|
+
* disabled) is the **passage-present guard** — a fact's `verbatimPassage` MUST
|
|
375
|
+
* literally appear in its `sourceText`. That single check kills the dominant
|
|
376
|
+
* failure mode (a confident claim decoupled from any real source).
|
|
377
377
|
*
|
|
378
|
-
*
|
|
379
|
-
*
|
|
380
|
-
*
|
|
381
|
-
*
|
|
382
|
-
*
|
|
383
|
-
* `DELEGATE_*` constants exported by `./tools/*` so the projection cannot
|
|
384
|
-
* drift from the server's own validators.
|
|
385
|
-
*/
|
|
386
|
-
|
|
387
|
-
/**
|
|
388
|
-
* @experimental
|
|
389
|
-
*
|
|
390
|
-
* Returns the 5 delegation tools projected into OpenAI Chat Completions
|
|
391
|
-
* `tools[]` shape. The order is stable: `delegate_code`,
|
|
392
|
-
* `delegate_research`, `delegate_feedback`, `delegation_status`,
|
|
393
|
-
* `delegation_history`.
|
|
394
|
-
*/
|
|
395
|
-
declare function mcpToolsForRuntimeMcp(): OpenAIChatTool[];
|
|
396
|
-
/**
|
|
397
|
-
* @experimental
|
|
398
|
-
*
|
|
399
|
-
* Subset filter — return only the projected tools whose `function.name`
|
|
400
|
-
* appears in `names`. Useful for curated mounts (e.g. only the queue-bound
|
|
401
|
-
* delegation tools, omitting `delegate_feedback`). Unknown names are
|
|
402
|
-
* silently ignored; pass an empty array to get an empty result.
|
|
378
|
+
* Pure + dependency-free: it operates on fact candidates, not on a store, so it
|
|
379
|
+
* composes with `@tangle-network/agent-knowledge` or any persistence layer
|
|
380
|
+
* without importing it. The remediation policy (correct-on-veto vs
|
|
381
|
+
* escalate-as-unverified) is the caller's — this returns the verdict; it never
|
|
382
|
+
* drops a fact silently.
|
|
403
383
|
*/
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
/**
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
interface OtelExportConfig {
|
|
417
|
-
/** OTLP endpoint. Reads OTEL_EXPORTER_OTLP_ENDPOINT env by default. */
|
|
418
|
-
endpoint?: string;
|
|
419
|
-
/** OTLP headers. Reads OTEL_EXPORTER_OTLP_HEADERS env by default. */
|
|
420
|
-
headers?: Record<string, string>;
|
|
421
|
-
/** Batch size before flush. Default 64. */
|
|
422
|
-
batchSize?: number;
|
|
423
|
-
/** Flush interval ms. Default 5000. */
|
|
424
|
-
flushIntervalMs?: number;
|
|
425
|
-
/** Resource attributes stamped on every export. */
|
|
426
|
-
resourceAttributes?: Record<string, string | number | boolean>;
|
|
427
|
-
/** Service name. Default 'agent-runtime'. */
|
|
428
|
-
serviceName?: string;
|
|
384
|
+
/** @experimental A fact proposed for the KB, with its grounding. */
|
|
385
|
+
interface FactCandidate {
|
|
386
|
+
/** The atomic claim text. */
|
|
387
|
+
claim: string;
|
|
388
|
+
/** Optional extracted value (number or string) the claim asserts. */
|
|
389
|
+
value?: string | number;
|
|
390
|
+
/** Verbatim span lifted from the source that backs the claim. */
|
|
391
|
+
verbatimPassage: string;
|
|
392
|
+
/** The raw source text the passage must be grounded in. */
|
|
393
|
+
sourceText: string;
|
|
394
|
+
/** Where the fact claims to come from — checked for circular/self citations. */
|
|
395
|
+
citation?: string;
|
|
429
396
|
}
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
flush(): Promise<void>;
|
|
435
|
-
/** Shutdown cleanly. */
|
|
436
|
-
shutdown(): Promise<void>;
|
|
397
|
+
/** @experimental */
|
|
398
|
+
interface FactJudgeVerdict {
|
|
399
|
+
accept: boolean;
|
|
400
|
+
reason?: string;
|
|
437
401
|
}
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
parentSpanId?: string;
|
|
402
|
+
/** @experimental A pluggable fact validator. Throw is NOT allowed — return a
|
|
403
|
+
* verdict; a thrown judge is a programmer error, not a veto. */
|
|
404
|
+
interface FactJudge {
|
|
442
405
|
name: string;
|
|
443
|
-
|
|
444
|
-
startTimeUnixNano: string;
|
|
445
|
-
endTimeUnixNano: string;
|
|
446
|
-
attributes?: OtelAttribute[];
|
|
447
|
-
status?: {
|
|
448
|
-
code: number;
|
|
449
|
-
message?: string;
|
|
450
|
-
};
|
|
451
|
-
}
|
|
452
|
-
interface OtelAttribute {
|
|
453
|
-
key: string;
|
|
454
|
-
value: {
|
|
455
|
-
stringValue?: string;
|
|
456
|
-
intValue?: string;
|
|
457
|
-
doubleValue?: number;
|
|
458
|
-
boolValue?: boolean;
|
|
459
|
-
};
|
|
406
|
+
judge(candidate: FactCandidate): FactJudgeVerdict | Promise<FactJudgeVerdict>;
|
|
460
407
|
}
|
|
461
|
-
/**
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
*/
|
|
468
|
-
declare function loopEventToOtelSpan(event: {
|
|
469
|
-
kind: string;
|
|
470
|
-
runId: string;
|
|
471
|
-
timestamp: number;
|
|
472
|
-
payload: object;
|
|
473
|
-
}, traceId: string, parentSpanId?: string): OtelSpan;
|
|
474
|
-
/**
|
|
475
|
-
* Build a nested, real-duration OTLP span tree for ONE loop run from its full
|
|
476
|
-
* ordered `LoopTraceEvent` stream. Unlike `loopEventToOtelSpan` (one flat,
|
|
477
|
-
* zero-duration span per event), this reconstructs the topology hierarchy a
|
|
478
|
-
* GenAI trace viewer renders natively:
|
|
479
|
-
*
|
|
480
|
-
* loop (invoke_workflow)
|
|
481
|
-
* └─ loop.round[k] (invoke_workflow) ← tangle.loop.move.{kind,width,rationale}
|
|
482
|
-
* ├─ loop.iteration[i] (invoke_agent) ← gen_ai.agent.name + usage + verdict + placement
|
|
483
|
-
* └─ …
|
|
484
|
-
*
|
|
485
|
-
* Attributes follow the current GenAI semconv (`gen_ai.*`) where they apply and
|
|
486
|
-
* a namespaced `tangle.loop.*` / `tangle.cost.usd` extension for topology /
|
|
487
|
-
* verdict / placement / cost (not yet standardized). Pure: feed it a buffered
|
|
488
|
-
* per-runId event array (e.g. flushed on `loop.ended`) and export the result.
|
|
489
|
-
*/
|
|
490
|
-
declare function buildLoopOtelSpans(events: ReadonlyArray<{
|
|
491
|
-
kind: string;
|
|
492
|
-
runId: string;
|
|
493
|
-
timestamp: number;
|
|
494
|
-
payload: object;
|
|
495
|
-
}>, traceId: string, rootParentSpanId?: string): OtelSpan[];
|
|
496
|
-
/** Wire version the eval-runs ingest enforces (X-Tangle-Wire-Version + body). */
|
|
497
|
-
declare const INTELLIGENCE_WIRE_VERSION = "2026-05-26.v1";
|
|
498
|
-
interface EvalRunGeneration {
|
|
499
|
-
/** 0-based ordinal of this generation within the run (required by ingest). */
|
|
500
|
-
index: number;
|
|
501
|
-
/** Identity of the proposed surface change (content-addressed hash). */
|
|
502
|
-
surfaceHash: string;
|
|
503
|
-
/** Arbitrary provenance for this generation (rationale, evidence, source). */
|
|
504
|
-
surface?: unknown;
|
|
505
|
-
/** Per-scenario results; empty until the generation is measured. */
|
|
506
|
-
cells?: unknown[];
|
|
507
|
-
/** Mean composite score (0 when unmeasured — pair with labels.measured). */
|
|
508
|
-
compositeMean: number;
|
|
509
|
-
costUsd: number;
|
|
510
|
-
durationMs: number;
|
|
511
|
-
}
|
|
512
|
-
interface EvalRunEvent {
|
|
513
|
-
runId: string;
|
|
514
|
-
runDir: string;
|
|
515
|
-
/** ISO timestamp. */
|
|
516
|
-
timestamp: string;
|
|
517
|
-
status: 'started' | 'baseline-complete' | 'generation-complete' | 'gate-decided' | 'finished' | 'errored';
|
|
518
|
-
labels?: Record<string, string>;
|
|
519
|
-
baseline?: EvalRunGeneration;
|
|
520
|
-
generations?: EvalRunGeneration[];
|
|
521
|
-
gateDecision?: 'ship' | 'hold' | 'need_more_work' | 'model_ceiling' | 'arch_ceiling';
|
|
522
|
-
holdoutLift?: number;
|
|
523
|
-
totalCostUsd: number;
|
|
524
|
-
totalDurationMs: number;
|
|
525
|
-
errorMessage?: string;
|
|
526
|
-
}
|
|
527
|
-
interface EvalRunsExportConfig {
|
|
528
|
-
/** Bearer key — tenant is resolved server-side from it. Reads TANGLE_API_KEY. */
|
|
529
|
-
apiKey?: string;
|
|
530
|
-
/** Intelligence base. Reads INTELLIGENCE_BASE env, else prod. */
|
|
531
|
-
base?: string;
|
|
532
|
-
/** Idempotency-Key header (e.g. the runId) — safe retries + upsert. */
|
|
533
|
-
idempotencyKey?: string;
|
|
408
|
+
/** @experimental */
|
|
409
|
+
interface KbGateResult {
|
|
410
|
+
accepted: boolean;
|
|
411
|
+
/** Name of the judge that vetoed; undefined when accepted. */
|
|
412
|
+
vetoedBy?: string;
|
|
413
|
+
reason?: string;
|
|
534
414
|
}
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
415
|
+
/** @experimental */
|
|
416
|
+
interface CreateKbGateOptions {
|
|
417
|
+
/** Extra judges appended after the built-in floor (e.g. an LLM judge). */
|
|
418
|
+
judges?: FactJudge[];
|
|
419
|
+
/** Minimum verbatim-passage length. Default 12 — kills empty/stub passages. */
|
|
420
|
+
minPassageChars?: number;
|
|
421
|
+
/**
|
|
422
|
+
* Citation tokens that denote a SELF-generated artifact (e.g. `'spec'`,
|
|
423
|
+
* `'cad_params'`, `'requirements'`). A citation naming one is circular
|
|
424
|
+
* (laundering) — the fact cites a derived artifact, not a real source.
|
|
425
|
+
* Default `[]` (no circular check unless the consumer declares its kinds).
|
|
426
|
+
*/
|
|
427
|
+
selfArtifactKinds?: string[];
|
|
543
428
|
}
|
|
544
429
|
/**
|
|
545
|
-
*
|
|
546
|
-
*
|
|
547
|
-
*
|
|
548
|
-
*
|
|
430
|
+
* @experimental
|
|
431
|
+
*
|
|
432
|
+
* Build a fail-closed KB gate. The returned function runs the built-in floor
|
|
433
|
+
* (passage-non-empty → passage-present → value-in-passage → no-circular-citation)
|
|
434
|
+
* then any consumer judges, returning on the first veto.
|
|
549
435
|
*/
|
|
550
|
-
declare function
|
|
436
|
+
declare function createKbGate(options?: CreateKbGateOptions): (candidate: FactCandidate) => Promise<KbGateResult>;
|
|
551
437
|
|
|
552
|
-
export { type
|
|
438
|
+
export { type DelegateRunCtx as A, type FactJudge as B, type CoderReviewer as C, type DelegateCodeArgs as D, type FactJudgeVerdict as E, type FactCandidate as F, type FeedbackRating as G, type FeedbackRefersTo as H, type FleetWorkspaceExecutorOptions as I, type ResearchOutputShape as J, type KbGateResult as K, createDefaultCoderDelegate as L, createFleetWorkspaceExecutor as M, createKbGate as N, createSiblingSandboxExecutor as O, type ResearcherDelegate as R, type SiblingSandboxExecutorOptions as S, type CoderWinnerSelection as a, type CreateKbGateOptions as b, type FleetHandle as c, type DelegationExecutor as d, type DelegateFeedbackArgs as e, type DelegationFeedbackSnapshot as f, type DelegationProfile as g, type DelegateResearchArgs as h, type DelegationStatus as i, type DelegationProgress as j, type DelegationResultPayload as k, type DelegationError as l, type DelegationStatusResult as m, type DelegationHistoryArgs as n, type DelegationHistoryEntry as o, type CoderDelegate as p, type DelegateCodeResult as q, type DelegateFeedbackResult as r, type ResearchSource as s, type DelegateResearchResult as t, type DelegationHistoryResult as u, type DelegationStatusArgs as v, type CoderReview as w, type CreateDefaultCoderDelegateOptions as x, type DelegateCodeConfig as y, type DelegateResearchConfig as z };
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { Scenario } from '@tangle-network/agent-eval/campaign';
|
|
2
|
+
import { R as RunAnalystLoopOpts, a as RunAnalystLoopResult } from './types-p8dWBIXL.js';
|
|
3
|
+
import { O as OptimizePromptOptions, a as OptimizePromptResult } from './optimize-prompt-D-urF2wW.js';
|
|
4
|
+
import { T as TopologyPlanner, D as DynamicDecision } from './dynamic-DcrwVGuV.js';
|
|
5
|
+
import { L as LoopSandboxClient, O as OutputAdapter, V as Validator, A as AgentRunSpec, a as LoopResult } from './types-B9O7l-ij.js';
|
|
6
|
+
import { D as DelegateCodeArgs, C as CoderReviewer, a as CoderWinnerSelection, F as FactCandidate, b as CreateKbGateOptions } from './kb-gate-YdPNEagq.js';
|
|
7
|
+
import { CoderOutput } from './profiles.js';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* @experimental
|
|
11
|
+
*
|
|
12
|
+
* `runDelegatedLoop` — the configured delegated loop-runner.
|
|
13
|
+
*
|
|
14
|
+
* One typed entrypoint a worker agent (or a scheduled routine) calls to run a
|
|
15
|
+
* disciplined loop in a chosen MODE, over agent-runtime's hardened engines:
|
|
16
|
+
*
|
|
17
|
+
* code → build-in-a-loop via the coder delegate (no-op + secret floor,
|
|
18
|
+
* optional reviewer gate, winner-selection)
|
|
19
|
+
* review → code mode with a REQUIRED reviewer (the gate is the point)
|
|
20
|
+
* research → research-in-a-loop with valid-only KB growth (createKbGate)
|
|
21
|
+
* audit → analyze trace/run data → findings (runAnalystLoop, caller-wired)
|
|
22
|
+
* self-improve → identity-gated prompt optimization (optimizePrompt, caller-wired)
|
|
23
|
+
* dynamic → agent-authored topology (runLoop + createDynamicDriver)
|
|
24
|
+
*
|
|
25
|
+
* It is intentionally a thin façade: the value is that EVERY product reuses the
|
|
26
|
+
* one hardened engine instead of forking delegation logic. The dispatcher owns
|
|
27
|
+
* mode routing, timing, fail-loud on an unregistered mode, and a uniform result
|
|
28
|
+
* shape; each mode's engine is a pre-configured runner in the registry (build it
|
|
29
|
+
* with the factories below, or inject your own / a stub).
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
/** @experimental Every delegated-loop mode, for validation + CLI surfaces. */
|
|
33
|
+
declare const DELEGATED_LOOP_MODES: readonly ["code", "review", "research", "audit", "self-improve", "dynamic"];
|
|
34
|
+
/** @experimental */
|
|
35
|
+
type DelegatedLoopMode = (typeof DELEGATED_LOOP_MODES)[number];
|
|
36
|
+
/** @experimental Type guard for an untrusted mode string (CLI / config input). */
|
|
37
|
+
declare function isDelegatedLoopMode(value: unknown): value is DelegatedLoopMode;
|
|
38
|
+
/** @experimental A pre-configured loop for one mode. Returns the mode's raw
|
|
39
|
+
* output; the dispatcher wraps it in a {@link DelegatedLoopResult}. */
|
|
40
|
+
type DelegatedLoopRunner<T = unknown> = (signal: AbortSignal) => Promise<T>;
|
|
41
|
+
/** @experimental Mode → configured runner. Partial: only register the modes a
|
|
42
|
+
* given product/routine actually uses. */
|
|
43
|
+
type DelegatedLoopRegistry = Partial<Record<DelegatedLoopMode, DelegatedLoopRunner>>;
|
|
44
|
+
/** @experimental Uniform result — never throws from a registered runner; a
|
|
45
|
+
* thrown engine becomes `{ ok: false, error }` so a routine can record + move on. */
|
|
46
|
+
interface DelegatedLoopResult<T = unknown> {
|
|
47
|
+
mode: DelegatedLoopMode;
|
|
48
|
+
ok: boolean;
|
|
49
|
+
output?: T;
|
|
50
|
+
error?: string;
|
|
51
|
+
durationMs: number;
|
|
52
|
+
}
|
|
53
|
+
/** @experimental */
|
|
54
|
+
interface RunDelegatedLoopOptions {
|
|
55
|
+
signal?: AbortSignal;
|
|
56
|
+
/** Clock override for deterministic tests. */
|
|
57
|
+
now?: () => number;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* @experimental
|
|
61
|
+
*
|
|
62
|
+
* Dispatch a configured loop by mode. Fails loud (throws `ConfigError`) when no
|
|
63
|
+
* runner is registered for the mode — a routine pointed at an unwired mode is a
|
|
64
|
+
* config bug, not a silent no-op. A runner that throws is captured as
|
|
65
|
+
* `{ ok: false }` so unattended runs record the failure rather than crash.
|
|
66
|
+
*/
|
|
67
|
+
declare function runDelegatedLoop<T = unknown>(mode: DelegatedLoopMode, registry: DelegatedLoopRegistry, options?: RunDelegatedLoopOptions): Promise<DelegatedLoopResult<T>>;
|
|
68
|
+
/** @experimental Options for the default `code`/`review` runner. */
|
|
69
|
+
interface CoderLoopRunnerOptions {
|
|
70
|
+
sandboxClient: LoopSandboxClient;
|
|
71
|
+
/** What to build — the delegate args (goal, repoRoot, variants, config, …). */
|
|
72
|
+
args: DelegateCodeArgs;
|
|
73
|
+
/** Adversarial reviewer. REQUIRED for `review` mode (see `reviewLoopRunner`). */
|
|
74
|
+
reviewer?: CoderReviewer;
|
|
75
|
+
/** Winner-selection strategy. Default `highest-score`. */
|
|
76
|
+
winnerSelection?: CoderWinnerSelection;
|
|
77
|
+
/** Harnesses for `variants > 1` fanout. */
|
|
78
|
+
fanoutHarnesses?: string[];
|
|
79
|
+
}
|
|
80
|
+
/** @experimental Build a `code`-mode runner over the hardened coder delegate. */
|
|
81
|
+
declare function coderLoopRunner(options: CoderLoopRunnerOptions): DelegatedLoopRunner<CoderOutput>;
|
|
82
|
+
/**
|
|
83
|
+
* @experimental
|
|
84
|
+
*
|
|
85
|
+
* `review` mode = `code` with a REQUIRED reviewer. The gate is the whole point,
|
|
86
|
+
* so the type forces a reviewer (a "review loop" with no reviewer is a code loop).
|
|
87
|
+
*/
|
|
88
|
+
declare function reviewLoopRunner(options: CoderLoopRunnerOptions & {
|
|
89
|
+
reviewer: CoderReviewer;
|
|
90
|
+
}): DelegatedLoopRunner<CoderOutput>;
|
|
91
|
+
/** @experimental Options for the default `dynamic` runner. */
|
|
92
|
+
interface DynamicLoopRunnerOptions<Task, Output> {
|
|
93
|
+
sandboxClient: LoopSandboxClient;
|
|
94
|
+
/** The agent-authored topology planner (e.g. `createSandboxPlanner(...)`). */
|
|
95
|
+
planner: TopologyPlanner<Task, Output>;
|
|
96
|
+
task: Task;
|
|
97
|
+
output: OutputAdapter<Output>;
|
|
98
|
+
validator?: Validator<Output>;
|
|
99
|
+
/** Exactly one of `agentRun` / `agentRuns` (runLoop validates). */
|
|
100
|
+
agentRun?: AgentRunSpec<Task>;
|
|
101
|
+
agentRuns?: AgentRunSpec<Task>[];
|
|
102
|
+
maxIterations?: number;
|
|
103
|
+
maxFanout?: number;
|
|
104
|
+
}
|
|
105
|
+
/** @experimental `dynamic` mode — agent-authored topology over `runLoop`. */
|
|
106
|
+
declare function dynamicLoopRunner<Task, Output>(o: DynamicLoopRunnerOptions<Task, Output>): DelegatedLoopRunner<LoopResult<Task, Output, DynamicDecision>>;
|
|
107
|
+
/** @experimental A fact rejected at the KB gate — surfaced, never dropped. */
|
|
108
|
+
interface VetoedFact {
|
|
109
|
+
candidate: FactCandidate;
|
|
110
|
+
vetoedBy?: string;
|
|
111
|
+
reason?: string;
|
|
112
|
+
}
|
|
113
|
+
/** @experimental */
|
|
114
|
+
interface ResearchLoopResult {
|
|
115
|
+
/** Facts that passed the fail-closed gate — safe to write to the KB. */
|
|
116
|
+
accepted: FactCandidate[];
|
|
117
|
+
/** Facts the gate vetoed in the final round — escalate, do not silently drop. */
|
|
118
|
+
vetoed: VetoedFact[];
|
|
119
|
+
/** Research rounds actually run. */
|
|
120
|
+
rounds: number;
|
|
121
|
+
}
|
|
122
|
+
/** @experimental Options for the default `research` runner. */
|
|
123
|
+
interface ResearchLoopRunnerOptions {
|
|
124
|
+
/**
|
|
125
|
+
* The research engine (the consumer's web/doc searcher + extractor). Called
|
|
126
|
+
* each round with the prior round's vetoes so it can re-research the gaps.
|
|
127
|
+
* Returns fact candidates carrying their grounding (`verbatimPassage` +
|
|
128
|
+
* `sourceText`).
|
|
129
|
+
*/
|
|
130
|
+
research: (round: number, vetoed: VetoedFact[]) => Promise<FactCandidate[]>;
|
|
131
|
+
/** Gate config (extra judges, self-artifact kinds, …). The floor is always on. */
|
|
132
|
+
gate?: CreateKbGateOptions;
|
|
133
|
+
/** Max research rounds (correct-on-veto remediation). Default 1. */
|
|
134
|
+
maxRounds?: number;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* @experimental `research` mode — research-in-a-loop with valid-only KB growth.
|
|
138
|
+
*
|
|
139
|
+
* Each round: research → gate every candidate (fail-closed; passage MUST be in
|
|
140
|
+
* the source) → accept the clean ones → re-research the vetoed ones next round,
|
|
141
|
+
* up to `maxRounds`. Vetoed facts in the final round are RETURNED (escalate,
|
|
142
|
+
* never silently dropped) so the caller audits vs retries.
|
|
143
|
+
*/
|
|
144
|
+
declare function researchLoopRunner(o: ResearchLoopRunnerOptions): DelegatedLoopRunner<ResearchLoopResult>;
|
|
145
|
+
/** @experimental `self-improve` mode — identity-gated prompt optimization. */
|
|
146
|
+
declare function selfImproveLoopRunner<TScenario extends Scenario, TArtifact>(options: OptimizePromptOptions<TScenario, TArtifact>): DelegatedLoopRunner<OptimizePromptResult<TArtifact, TScenario>>;
|
|
147
|
+
/** @experimental `audit` mode — analyst loop over captured trace/run data. */
|
|
148
|
+
declare function auditLoopRunner<TProposal = unknown, TEdit = unknown>(options: RunAnalystLoopOpts): DelegatedLoopRunner<RunAnalystLoopResult<TProposal, TEdit>>;
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* @experimental
|
|
152
|
+
*
|
|
153
|
+
* `agent-runtime-loop` — the schedulable entrypoint for the configured
|
|
154
|
+
* delegated loop-runner. A cron job / routine / Makefile target invokes:
|
|
155
|
+
*
|
|
156
|
+
* agent-runtime-loop --mode research --config ./loops.config.js
|
|
157
|
+
*
|
|
158
|
+
* The config module wires the registry (with full access to env / creds —
|
|
159
|
+
* which is why the deps live there, not in this generic bin). It must default-
|
|
160
|
+
* export a `DelegatedLoopRegistry`, or a `() => DelegatedLoopRegistry | Promise<…>`.
|
|
161
|
+
* The bin runs the selected mode, prints the `DelegatedLoopResult` as JSON, and
|
|
162
|
+
* exits 0 on `ok`, 1 on a recorded failure, 2 on a usage/config error.
|
|
163
|
+
*/
|
|
164
|
+
|
|
165
|
+
/** @experimental Parsed CLI invocation. */
|
|
166
|
+
interface LoopRunnerCliArgs {
|
|
167
|
+
mode: string;
|
|
168
|
+
/** Loads the registry — the bin wires this from `--config`; tests inject a stub. */
|
|
169
|
+
loadRegistry: () => Promise<DelegatedLoopRegistry> | DelegatedLoopRegistry;
|
|
170
|
+
now?: () => number;
|
|
171
|
+
}
|
|
172
|
+
/** @experimental */
|
|
173
|
+
interface LoopRunnerCliResult {
|
|
174
|
+
exitCode: number;
|
|
175
|
+
result?: DelegatedLoopResult;
|
|
176
|
+
error?: string;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* @experimental
|
|
180
|
+
*
|
|
181
|
+
* Pure CLI core (no process / argv / IO) so it's unit-testable: validate the
|
|
182
|
+
* mode, load the registry, dispatch, map to an exit code (0 ok / 1 failed /
|
|
183
|
+
* 2 usage). Exported for embedding in custom runners + tests.
|
|
184
|
+
*/
|
|
185
|
+
declare function runLoopRunnerCli(args: LoopRunnerCliArgs): Promise<LoopRunnerCliResult>;
|
|
186
|
+
/** Parse `--mode X --config Y` from an argv tail (`process.argv.slice(2)`). */
|
|
187
|
+
declare function parseLoopRunnerArgv(argv: string[]): {
|
|
188
|
+
mode?: string;
|
|
189
|
+
config?: string;
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
export { type CoderLoopRunnerOptions as C, DELEGATED_LOOP_MODES as D, type LoopRunnerCliArgs as L, type ResearchLoopResult as R, type VetoedFact as V, type DelegatedLoopMode as a, type DelegatedLoopRegistry as b, type DelegatedLoopResult as c, type DelegatedLoopRunner as d, type DynamicLoopRunnerOptions as e, type LoopRunnerCliResult as f, type ResearchLoopRunnerOptions as g, type RunDelegatedLoopOptions as h, auditLoopRunner as i, coderLoopRunner as j, dynamicLoopRunner as k, isDelegatedLoopMode as l, reviewLoopRunner as m, runDelegatedLoop as n, runLoopRunnerCli as o, parseLoopRunnerArgv as p, researchLoopRunner as r, selfImproveLoopRunner as s };
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
export { L as LoopRunnerCliArgs, f as LoopRunnerCliResult, p as parseLoopRunnerArgv, o as runLoopRunnerCli } from './loop-runner-bin-DgZj0zfJ.js';
|
|
3
|
+
import '@tangle-network/agent-eval/campaign';
|
|
4
|
+
import './types-p8dWBIXL.js';
|
|
5
|
+
import '@tangle-network/agent-eval';
|
|
6
|
+
import './optimize-prompt-D-urF2wW.js';
|
|
7
|
+
import './dynamic-DcrwVGuV.js';
|
|
8
|
+
import './types-B9O7l-ij.js';
|
|
9
|
+
import '@tangle-network/sandbox';
|
|
10
|
+
import './types-CsCCryln.js';
|
|
11
|
+
import './kb-gate-YdPNEagq.js';
|
|
12
|
+
import './profiles.js';
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
parseLoopRunnerArgv,
|
|
4
|
+
runLoopRunnerCli
|
|
5
|
+
} from "./chunk-AXWGLYSF.js";
|
|
6
|
+
import "./chunk-XBUG326M.js";
|
|
7
|
+
import "./chunk-VOX6Z3II.js";
|
|
8
|
+
import "./chunk-FNMGYYSS.js";
|
|
9
|
+
import "./chunk-VLXRXMTF.js";
|
|
10
|
+
import "./chunk-7JBDJQLO.js";
|
|
11
|
+
import "./chunk-3HMHSN22.js";
|
|
12
|
+
import "./chunk-PY6NMZYX.js";
|
|
13
|
+
import "./chunk-SQSCRJ7U.js";
|
|
14
|
+
import "./chunk-DGUM43GV.js";
|
|
15
|
+
export {
|
|
16
|
+
parseLoopRunnerArgv,
|
|
17
|
+
runLoopRunnerCli
|
|
18
|
+
};
|
|
19
|
+
//# sourceMappingURL=loop-runner-bin.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
package/dist/loops.d.ts
CHANGED
|
@@ -1,115 +1,14 @@
|
|
|
1
1
|
import { AgentProfile, SandboxEvent } from '@tangle-network/sandbox';
|
|
2
2
|
export { AgentProfile, CreateSandboxOptions, SandboxEvent, SandboxInstance } from '@tangle-network/sandbox';
|
|
3
|
-
import {
|
|
4
|
-
export {
|
|
3
|
+
import { P as PlannerContext, T as TopologyPlanner } from './dynamic-DcrwVGuV.js';
|
|
4
|
+
export { C as CreateDynamicDriverOptions, D as DynamicDecision, a as TopologyMove, c as createDynamicDriver, s as summarizeHistory } from './dynamic-DcrwVGuV.js';
|
|
5
|
+
import { D as Driver, I as Iteration, L as LoopSandboxClient, A as AgentRunSpec, O as OutputAdapter, V as Validator, E as ExecCtx, b as LoopWinner, a as LoopResult } from './types-B9O7l-ij.js';
|
|
6
|
+
export { c as LoopDecisionPayload, d as LoopEndedPayload, e as LoopIterationDispatchPayload, f as LoopIterationEndedPayload, g as LoopIterationStartedPayload, h as LoopPlanDescription, i as LoopPlanPayload, j as LoopSandboxPlacement, k as LoopStartedPayload, l as LoopTokenUsage, m as LoopTraceEmitter, n as LoopTraceEvent, o as ValidationCtx } from './types-B9O7l-ij.js';
|
|
5
7
|
import { DefaultVerdict, AgentProfile as AgentProfile$1 } from '@tangle-network/agent-eval';
|
|
6
8
|
export { DefaultVerdict } from '@tangle-network/agent-eval';
|
|
7
9
|
import { Scenario, DispatchFn, ProfileDispatchFn } from '@tangle-network/agent-eval/campaign';
|
|
8
10
|
import './types-CsCCryln.js';
|
|
9
11
|
|
|
10
|
-
/**
|
|
11
|
-
* @experimental
|
|
12
|
-
*
|
|
13
|
-
* Dynamic driver — the agent authors the loop topology at runtime.
|
|
14
|
-
*
|
|
15
|
-
* Where `refine` and `fanout-vote` encode a fixed shape as a pure function of
|
|
16
|
-
* history, this driver delegates the per-round shape to an injected
|
|
17
|
-
* `TopologyPlanner`. Each round the planner inspects the task + iteration
|
|
18
|
-
* history and emits one `TopologyMove`:
|
|
19
|
-
* - `refine` → one task next round (optionally rewritten from the prior attempt)
|
|
20
|
-
* - `fanout` → N tasks next round (the kernel round-robins `agentRuns`, so a
|
|
21
|
-
* 2-harness fanout dispatches branch 0 to harness A and branch 1 to harness B)
|
|
22
|
-
* - `stop` → terminate; the kernel selects the winner across all iterations
|
|
23
|
-
*
|
|
24
|
-
* The planner is the brain; this driver is the structure. It maps moves onto
|
|
25
|
-
* the kernel's `plan`/`decide` contract, enforces the iteration + fanout caps,
|
|
26
|
-
* and fails loud on a malformed move. The planner is injected exactly like
|
|
27
|
-
* `refine`'s `refineTask` and `fanout-vote`'s `selector` — so a test can drive
|
|
28
|
-
* a deterministic policy through the real kernel, and production can wire it to
|
|
29
|
-
* an LLM via `createSandboxPlanner`.
|
|
30
|
-
*
|
|
31
|
-
* Topology is orthogonal to harness: the planner never names a backend. Which
|
|
32
|
-
* harness runs a branch is decided by the `AgentRunSpec` the kernel round-robins
|
|
33
|
-
* to, so one dynamic driver works across claude-code, codex, opencode, pi —
|
|
34
|
-
* including fanning a single round across several at once.
|
|
35
|
-
*/
|
|
36
|
-
|
|
37
|
-
/** Terminal once `decide` returns `'done'` (a kernel terminal decision). */
|
|
38
|
-
type DynamicDecision = 'continue' | 'done';
|
|
39
|
-
/**
|
|
40
|
-
* One topology decision for the next round. `fanout` carries explicit tasks
|
|
41
|
-
* rather than a count so the planner can issue heterogeneous branches (a
|
|
42
|
-
* different sub-task per harness); pass N copies of one task for a homogeneous
|
|
43
|
-
* fanout that relies on `agentRuns` diversity instead.
|
|
44
|
-
*
|
|
45
|
-
* @experimental
|
|
46
|
-
*/
|
|
47
|
-
type TopologyMove<Task> = {
|
|
48
|
-
kind: 'refine';
|
|
49
|
-
task: Task;
|
|
50
|
-
rationale?: string;
|
|
51
|
-
} | {
|
|
52
|
-
kind: 'fanout';
|
|
53
|
-
tasks: Task[];
|
|
54
|
-
rationale?: string;
|
|
55
|
-
} | {
|
|
56
|
-
kind: 'stop';
|
|
57
|
-
rationale?: string;
|
|
58
|
-
};
|
|
59
|
-
/** @experimental */
|
|
60
|
-
interface PlannerContext<Task, Output> {
|
|
61
|
-
/** The root task the loop was invoked with — stable across rounds. */
|
|
62
|
-
task: Task;
|
|
63
|
-
/** Every iteration so far, in dispatch order, with outputs + verdicts. */
|
|
64
|
-
history: ReadonlyArray<Iteration<Task, Output>>;
|
|
65
|
-
/** `history.length` — iterations already spent. */
|
|
66
|
-
iterationsSpent: number;
|
|
67
|
-
/** Iterations left before the driver's `maxIterations` cap forces a stop. */
|
|
68
|
-
iterationsRemaining: number;
|
|
69
|
-
}
|
|
70
|
-
/**
|
|
71
|
-
* Chooses the next topology move from the task + history. Sync or async; an
|
|
72
|
-
* async planner is where an LLM call goes (see `createSandboxPlanner`).
|
|
73
|
-
*
|
|
74
|
-
* @experimental
|
|
75
|
-
*/
|
|
76
|
-
type TopologyPlanner<Task, Output> = (ctx: PlannerContext<Task, Output>) => TopologyMove<Task> | Promise<TopologyMove<Task>>;
|
|
77
|
-
/** @experimental */
|
|
78
|
-
interface CreateDynamicDriverOptions<Task, Output> {
|
|
79
|
-
/** The agent-authored topology policy. Invoked once per round in `plan`. */
|
|
80
|
-
planner: TopologyPlanner<Task, Output>;
|
|
81
|
-
/**
|
|
82
|
-
* Hard safety cap on total iterations. When reached, the driver stops before
|
|
83
|
-
* consulting the planner. Default 8. Set the kernel's `runLoop`
|
|
84
|
-
* `maxIterations >= ` this so the driver's cap governs and the loop closes on
|
|
85
|
-
* a clean `'done'` rather than a truncated `'continue'`.
|
|
86
|
-
*/
|
|
87
|
-
maxIterations?: number;
|
|
88
|
-
/** Max branches a single `fanout` move may dispatch. Default 4. */
|
|
89
|
-
maxFanout?: number;
|
|
90
|
-
/** Stable identifier surfaced in trace events. Default `'dynamic'`. */
|
|
91
|
-
name?: string;
|
|
92
|
-
}
|
|
93
|
-
/** @experimental */
|
|
94
|
-
declare function createDynamicDriver<Task, Output>(options: CreateDynamicDriverOptions<Task, Output>): Driver<Task, Output, DynamicDecision>;
|
|
95
|
-
/**
|
|
96
|
-
* Compact, planner-friendly view of iteration history — what an LLM planner
|
|
97
|
-
* needs to choose the next move without the raw event streams. Output is
|
|
98
|
-
* truncated so a long run's prompt stays bounded.
|
|
99
|
-
*
|
|
100
|
-
* @experimental
|
|
101
|
-
*/
|
|
102
|
-
declare function summarizeHistory<Task, Output>(history: ReadonlyArray<Iteration<Task, Output>>, opts?: {
|
|
103
|
-
maxOutputChars?: number;
|
|
104
|
-
}): Array<{
|
|
105
|
-
index: number;
|
|
106
|
-
agentRunName: string;
|
|
107
|
-
valid?: boolean;
|
|
108
|
-
score?: number;
|
|
109
|
-
error?: string;
|
|
110
|
-
output?: string;
|
|
111
|
-
}>;
|
|
112
|
-
|
|
113
12
|
/**
|
|
114
13
|
* @experimental
|
|
115
14
|
*
|
|
@@ -413,4 +312,4 @@ interface UsageSink {
|
|
|
413
312
|
*/
|
|
414
313
|
declare function reportLoopUsage<Task, Output, Decision>(cost: UsageSink, result: Pick<LoopResult<Task, Output, Decision>, 'costUsd' | 'tokenUsage'>, source?: string): void;
|
|
415
314
|
|
|
416
|
-
export { AgentRunSpec, type
|
|
315
|
+
export { AgentRunSpec, type CreateFanoutVoteDriverOptions, type CreateRefineDriverOptions, type CreateSandboxPlannerOptions, Driver, ExecCtx, type FanoutVoteDecision, type FanoutVoteScored, Iteration, type LoopDispatchOptions, type LoopOptionsForDispatch, LoopResult, LoopSandboxClient, LoopWinner, OutputAdapter, PlannerContext, type RefineDecision, type RunLoopOptions, type TopologyMoveEnvelope, TopologyPlanner, type UsageSink, Validator, createFanoutVoteDriver, createRefineDriver, createSandboxPlanner, loopCampaignDispatch, loopDispatch, refineWinnerIndex, reportLoopUsage, runLoop, scoreFanoutVoteIterations };
|
package/dist/mcp/bin.js
CHANGED
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
import {
|
|
3
3
|
createMcpServer,
|
|
4
4
|
detectExecutor
|
|
5
|
-
} from "../chunk-
|
|
5
|
+
} from "../chunk-PK5DYSNO.js";
|
|
6
|
+
import "../chunk-HSX6PFZR.js";
|
|
6
7
|
import {
|
|
7
8
|
createDefaultCoderDelegate
|
|
8
|
-
} from "../chunk-
|
|
9
|
+
} from "../chunk-VLXRXMTF.js";
|
|
9
10
|
import "../chunk-GLR25NG7.js";
|
|
10
11
|
import {
|
|
11
12
|
runLoop
|