@tangle-network/agent-runtime 0.36.0 → 0.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/agent.d.ts +3 -3
  2. package/dist/analyst-loop.d.ts +2 -2
  3. package/dist/analyst-loop.js +3 -257
  4. package/dist/analyst-loop.js.map +1 -1
  5. package/dist/{chunk-NYGEI3NV.js → chunk-M65QJD35.js} +5 -211
  6. package/dist/chunk-M65QJD35.js.map +1 -0
  7. package/dist/{chunk-HSX6PFZR.js → chunk-V6GURW4W.js} +209 -1
  8. package/dist/chunk-V6GURW4W.js.map +1 -0
  9. package/dist/chunk-VOX6Z3II.js +90 -0
  10. package/dist/chunk-VOX6Z3II.js.map +1 -0
  11. package/dist/chunk-XBUG326M.js +261 -0
  12. package/dist/chunk-XBUG326M.js.map +1 -0
  13. package/dist/{chunk-7ZECSZ3C.js → chunk-Z523NPJK.js} +59 -2
  14. package/dist/chunk-Z523NPJK.js.map +1 -0
  15. package/dist/dynamic-DeOPeeAw.d.ts +106 -0
  16. package/dist/{improvement-adapter-CaZxFxTd.d.ts → improvement-adapter-BC4HhuAR.d.ts} +1 -1
  17. package/dist/improvement.d.ts +6 -130
  18. package/dist/improvement.js +4 -85
  19. package/dist/improvement.js.map +1 -1
  20. package/dist/index.d.ts +148 -3
  21. package/dist/index.js +109 -2
  22. package/dist/index.js.map +1 -1
  23. package/dist/loops.d.ts +5 -107
  24. package/dist/mcp/bin.js +4 -3
  25. package/dist/mcp/bin.js.map +1 -1
  26. package/dist/mcp/index.d.ts +6 -440
  27. package/dist/mcp/index.js +7 -62
  28. package/dist/mcp/index.js.map +1 -1
  29. package/dist/optimize-prompt-cmH9wZdH.d.ts +129 -0
  30. package/dist/otel-export-CNmeg_7B.d.ts +627 -0
  31. package/dist/profiles.d.ts +1 -2
  32. package/dist/{types-DrXVR2Fu.d.ts → types-CmkQl8qE.d.ts} +137 -3
  33. package/dist/{types-D_MXrmJP.d.ts → types-p8dWBIXL.d.ts} +1 -1
  34. package/package.json +1 -1
  35. package/dist/chunk-7ZECSZ3C.js.map +0 -1
  36. package/dist/chunk-HSX6PFZR.js.map +0 -1
  37. package/dist/chunk-NYGEI3NV.js.map +0 -1
  38. package/dist/otel-export-xgf4J6bo.d.ts +0 -191
  39. package/dist/runtime-run-B8VIiOhI.d.ts +0 -137
@@ -0,0 +1,129 @@
1
+ import { LlmClientOptions } from '@tangle-network/agent-eval';
2
+ import { Scenario, DispatchContext, JudgeConfig, ImprovementDriver, Gate, CampaignStorage, GateResult, RunImprovementLoopResult } from '@tangle-network/agent-eval/campaign';
3
+
4
+ /**
5
+ * @experimental
6
+ *
7
+ * `optimizePrompt` — identity-gated optimization for any TEXT prompt surface
8
+ * (system prompt, planner prompt, judge rubric, skill doc).
9
+ *
10
+ * The text-surface sibling to this module's `improvementDriver` (the
11
+ * CODE-surface / worktree path). Both feed agent-eval's `runImprovementLoop`;
12
+ * this one defaults the driver to agent-eval's `gepaDriver` (reflective text
13
+ * mutator) and the gate to `heldOutGate`.
14
+ *
15
+ * IDENTITY-GATED BY CONSTRUCTION — the whole point. The loop runs evals,
16
+ * collects per-scenario signal, proposes candidates, and the gate compares
17
+ * candidate-vs-baseline ON THE HELDOUT. `result.prompt` is the baseline
18
+ * (identity) UNLESS the gate decided `'ship'`. So wiring a surface up is safe:
19
+ * a surface with no beneficial mutation simply keeps its baseline. You never
20
+ * regress by registering a prompt — you only ever improve when the held-out
21
+ * data earns it.
22
+ *
23
+ * Generic over the runtime: `runWithPrompt` is the only domain seam — given a
24
+ * candidate prompt + scenario, run it however the surface runs (sandbox
25
+ * `streamPrompt`, a `runLoop`, a direct model call) and return the artifact the
26
+ * judges score. The optimizer never assumes how a prompt is executed.
27
+ */
28
+
29
+ /** Reflection config for the default `gepaDriver`. Omit when passing a custom
30
+ * `driver`. */
31
+ interface OptimizePromptReflection {
32
+ /** Router transport for the reflection model. */
33
+ llm: LlmClientOptions;
34
+ /** Model that performs the reflective rewrite. */
35
+ model: string;
36
+ /** What is being optimized — orients the reflection prompt. Default
37
+ * `'system prompt'`. */
38
+ target?: string;
39
+ /** Surface-specific mutation levers offered to the reflector. */
40
+ mutationPrimitives?: string[];
41
+ /** H2 (`## Foo`) headings that MUST survive every candidate. gepaDriver's
42
+ * only structural guard — load-bearing sections of the prompt should be
43
+ * `##` headings so a rewrite cannot drop them. */
44
+ preserveSections?: string[];
45
+ /** Max sentence-level edits per candidate vs the parent (a textual learning
46
+ * rate). Caps a rewrite from wiping prior rules in one generation. */
47
+ maxSentenceEdits?: number;
48
+ }
49
+ /** @experimental */
50
+ interface OptimizePromptOptions<TScenario extends Scenario, TArtifact> {
51
+ /** The prompt being optimized — the identity baseline the gate protects. */
52
+ baselinePrompt: string;
53
+ /** Domain seam: run a candidate prompt against a scenario → artifact the
54
+ * judges score. The optimizer is agnostic to HOW the prompt runs. */
55
+ runWithPrompt: (prompt: string, scenario: TScenario, ctx: DispatchContext) => Promise<TArtifact>;
56
+ /** Training pool — scored each generation to rank candidates. */
57
+ scenarios: TScenario[];
58
+ /** Held out of training — scored ONLY for the gate's baseline-vs-winner
59
+ * delta. Disjoint from `scenarios`; this is what makes promotion measure
60
+ * generalization, not memorization. */
61
+ holdoutScenarios: TScenario[];
62
+ /** Scorers — deterministic checks or LLM judges. */
63
+ judges: JudgeConfig<TArtifact, TScenario>[];
64
+ /** Where artifacts + traces land (opaque key under in-memory storage). */
65
+ runDir: string;
66
+ /** Default driver = `gepaDriver` built from this. Required UNLESS `driver`
67
+ * is supplied. */
68
+ reflection?: OptimizePromptReflection;
69
+ /** Override the improvement strategy (custom driver / deterministic tests). */
70
+ driver?: ImprovementDriver;
71
+ /** Override the promotion gate. Default `heldOutGate` over `holdoutScenarios`
72
+ * — zero extra LLM. Wrap `defaultProductionGate` for red-team/reward-hacking
73
+ * hardening on production wiring. */
74
+ gate?: Gate<TArtifact, TScenario>;
75
+ /** Minimum held-out composite lift to ship, forwarded to the default
76
+ * `heldOutGate`. When omitted the gate uses its own default. */
77
+ deltaThreshold?: number;
78
+ /** Candidates proposed per generation. Default 4. */
79
+ populationSize?: number;
80
+ /** Generations to run. Default 3. */
81
+ maxGenerations?: number;
82
+ /** Candidates carried to the next generation. Default 2. */
83
+ promoteTopK?: number;
84
+ /** Storage backend. Pass `inMemoryCampaignStorage()` for filesystem-less /
85
+ * test runs. Default: Node filesystem. */
86
+ storage?: CampaignStorage;
87
+ /** Reproducibility seed. Default 42. */
88
+ seed?: number;
89
+ /** Per-scenario replicates for CI bands. Default 1. */
90
+ reps?: number;
91
+ /** Max concurrent cells. Default 2. */
92
+ maxConcurrency?: number;
93
+ /** Test seam — override the wall clock. */
94
+ now?: () => Date;
95
+ /** On a shipped gate: `'pr'` opens a PR, `'none'` just reports. Default
96
+ * `'none'`. */
97
+ autoOnPromote?: 'pr' | 'none';
98
+ ghOwner?: string;
99
+ ghRepo?: string;
100
+ }
101
+ /** @experimental */
102
+ interface OptimizePromptResult<TArtifact, TScenario extends Scenario> {
103
+ /** The prompt to USE. Identity (the baseline) unless the gate shipped a
104
+ * winner — so a caller can always assign `result.prompt` unconditionally. */
105
+ prompt: string;
106
+ /** True only when the gate promoted a candidate over baseline on holdout. */
107
+ improved: boolean;
108
+ /** The gate's verdict (`'ship' | 'hold' | 'need_more_work' | ...`). */
109
+ decision: GateResult['decision'];
110
+ /** Human-readable reasons the gate gave. */
111
+ reasons: string[];
112
+ /** Mean held-out composite of the baseline. */
113
+ baselineComposite: number;
114
+ /** Mean held-out composite of the winner candidate. */
115
+ winnerComposite: number;
116
+ /** Held-out lift (winner − baseline); the gate's `delta` when it reported one. */
117
+ delta: number;
118
+ /** Why the winner was proposed — present when a shipped winner carried a
119
+ * driver rationale. */
120
+ rationale?: string;
121
+ /** Unified baseline→winner diff (empty when the winner is the baseline). */
122
+ diff: string;
123
+ /** The full loop result for callers that need generations / campaigns. */
124
+ raw: RunImprovementLoopResult<TArtifact, TScenario>;
125
+ }
126
+ /** @experimental */
127
+ declare function optimizePrompt<TScenario extends Scenario, TArtifact>(opts: OptimizePromptOptions<TScenario, TArtifact>): Promise<OptimizePromptResult<TArtifact, TScenario>>;
128
+
129
+ export { type OptimizePromptOptions as O, type OptimizePromptReflection as a, type OptimizePromptResult as b, optimizePrompt as o };