@vextlabs/theron-agent-sdk 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/CHANGELOG.md +59 -0
  2. package/LICENSE +21 -0
  3. package/README.md +270 -0
  4. package/dist/adapters/theron.cjs +92 -0
  5. package/dist/adapters/theron.d.cts +42 -0
  6. package/dist/adapters/theron.d.ts +42 -0
  7. package/dist/adapters/theron.js +89 -0
  8. package/dist/agent/index.cjs +33 -0
  9. package/dist/agent/index.d.cts +84 -0
  10. package/dist/agent/index.d.ts +84 -0
  11. package/dist/agent/index.js +31 -0
  12. package/dist/council/index.cjs +68 -0
  13. package/dist/council/index.d.cts +96 -0
  14. package/dist/council/index.d.ts +96 -0
  15. package/dist/council/index.js +66 -0
  16. package/dist/index.cjs +1288 -0
  17. package/dist/index.d.cts +60 -0
  18. package/dist/index.d.ts +60 -0
  19. package/dist/index.js +1244 -0
  20. package/dist/loop/index.cjs +106 -0
  21. package/dist/loop/index.d.cts +285 -0
  22. package/dist/loop/index.d.ts +285 -0
  23. package/dist/loop/index.js +95 -0
  24. package/dist/mcp/index.cjs +153 -0
  25. package/dist/mcp/index.d.cts +69 -0
  26. package/dist/mcp/index.d.ts +69 -0
  27. package/dist/mcp/index.js +150 -0
  28. package/dist/memory/index.cjs +53 -0
  29. package/dist/memory/index.d.cts +73 -0
  30. package/dist/memory/index.d.ts +73 -0
  31. package/dist/memory/index.js +50 -0
  32. package/dist/patterns/index.cjs +159 -0
  33. package/dist/patterns/index.d.cts +200 -0
  34. package/dist/patterns/index.d.ts +200 -0
  35. package/dist/patterns/index.js +150 -0
  36. package/dist/receipts/index.cjs +151 -0
  37. package/dist/receipts/index.d.cts +132 -0
  38. package/dist/receipts/index.d.ts +132 -0
  39. package/dist/receipts/index.js +146 -0
  40. package/dist/runtime/index.cjs +205 -0
  41. package/dist/runtime/index.d.cts +148 -0
  42. package/dist/runtime/index.d.ts +148 -0
  43. package/dist/runtime/index.js +203 -0
  44. package/dist/session/index.cjs +49 -0
  45. package/dist/session/index.d.cts +79 -0
  46. package/dist/session/index.d.ts +79 -0
  47. package/dist/session/index.js +47 -0
  48. package/dist/tools/index.cjs +51 -0
  49. package/dist/tools/index.d.cts +52 -0
  50. package/dist/tools/index.d.ts +52 -0
  51. package/dist/tools/index.js +46 -0
  52. package/dist/verifiers/index.cjs +96 -0
  53. package/dist/verifiers/index.d.cts +63 -0
  54. package/dist/verifiers/index.d.ts +63 -0
  55. package/dist/verifiers/index.js +93 -0
  56. package/examples/01_code_reviewer.ts +90 -0
  57. package/examples/02_research_assistant.ts +85 -0
  58. package/examples/03_council_of_three.ts +91 -0
  59. package/examples/_adapters/openrouter.ts +90 -0
  60. package/examples/adapters/openrouter.ts +144 -0
  61. package/examples/adapters/theron.ts +105 -0
  62. package/examples/basic-agent.ts +56 -0
  63. package/examples/council-deliberation.ts +90 -0
  64. package/examples/cyber-recon-bot.ts +163 -0
  65. package/examples/loop-primitives.ts +50 -0
  66. package/examples/meeting-prep-bot.ts +172 -0
  67. package/examples/reasoning-patterns.ts +125 -0
  68. package/examples/support-triage-bot.ts +181 -0
  69. package/examples/verifier-kernel.ts +108 -0
  70. package/package.json +154 -0
@@ -0,0 +1,106 @@
1
+ 'use strict';
2
+
3
+ // src/loop/index.ts
4
+ function stepCountIs(n) {
5
+ return (s) => s.step >= n;
6
+ }
7
+ function costUsdAtLeast(min) {
8
+ return (s) => s.cost_usd >= min;
9
+ }
10
+ function verifierSatisfied(kernelName) {
11
+ return (s) => {
12
+ if (!s.verifier_results) return false;
13
+ return s.verifier_results.some((r) => r.kernel === kernelName && r.pass === true);
14
+ };
15
+ }
16
+ function anyOf(...preds) {
17
+ return (s) => preds.some((p) => p(s));
18
+ }
19
+ function allOf(...preds) {
20
+ return (s) => preds.every((p) => p(s));
21
+ }
22
+ function verifiedRatchet(opts) {
23
+ const threshold = opts?.minConfidence ?? 0.6;
24
+ return (v) => {
25
+ if (v === void 0) {
26
+ return { advance: false, reason: "no verdict" };
27
+ }
28
+ if (v.verdict !== "sufficient") {
29
+ return {
30
+ advance: false,
31
+ reason: `verdict '${v.verdict}' is not 'sufficient'${v.source ? ` (source: ${v.source})` : ""}`
32
+ };
33
+ }
34
+ if (v.confidence < threshold) {
35
+ return {
36
+ advance: false,
37
+ reason: `verdict is 'sufficient' but confidence ${v.confidence.toFixed(3)} < threshold ${threshold.toFixed(3)}`
38
+ };
39
+ }
40
+ return {
41
+ advance: true,
42
+ reason: `verified sufficient at confidence ${v.confidence.toFixed(3)} >= ${threshold.toFixed(3)}${v.source ? ` (source: ${v.source})` : ""}`
43
+ };
44
+ };
45
+ }
46
+ async function runImprovementCycle(spec) {
47
+ const ratchet = spec.ratchet ?? verifiedRatchet();
48
+ const proposal = await spec.propose();
49
+ const trial = await spec.trial(proposal);
50
+ const verdict = await spec.verify(proposal, trial);
51
+ const decision = ratchet(verdict);
52
+ return { proposal, trial, verdict, decision };
53
+ }
54
+ var SUMMARY_PREFIX = "[Summary of earlier conversation]";
55
+ async function compactHistory(opts) {
56
+ const keepRecent = Math.max(1, Math.floor(opts.keepRecent ?? 6));
57
+ const msgs = opts.messages ?? [];
58
+ const totalChars = msgs.reduce((n, m) => n + (m.content?.length ?? 0), 0);
59
+ const maxChars = opts.maxChars ?? 0;
60
+ if (msgs.length <= keepRecent || maxChars > 0 && totalChars <= maxChars) {
61
+ return { messages: msgs, compacted: false, droppedCount: 0 };
62
+ }
63
+ const older = msgs.slice(0, msgs.length - keepRecent);
64
+ const recent = msgs.slice(msgs.length - keepRecent);
65
+ const summary = String(await opts.summarize(older));
66
+ return {
67
+ messages: [{ role: "system", content: `${SUMMARY_PREFIX}
68
+ ${summary}` }, ...recent],
69
+ compacted: true,
70
+ summary,
71
+ droppedCount: older.length
72
+ };
73
+ }
74
+ async function runUntil(opts) {
75
+ const maxSteps = Math.max(1, Math.floor(opts.maxSteps ?? 100));
76
+ let state = opts.initial;
77
+ for (let i = 0; i < maxSteps; i++) {
78
+ state = await opts.step(state, i);
79
+ if (opts.onCheckpoint) await opts.onCheckpoint(state, i);
80
+ if (opts.stopWhen(state, i)) return { state, steps: i + 1, stopped: "predicate" };
81
+ }
82
+ return { state, steps: maxSteps, stopped: "maxSteps" };
83
+ }
84
+ function boundWorkingSet(items, max) {
85
+ const cap = Math.max(0, Math.floor(max));
86
+ if (items.length <= cap) return { kept: items, evicted: [] };
87
+ const pinned = items.filter((i) => i.pinned);
88
+ const rest = items.filter((i) => !i.pinned);
89
+ const slots = Math.max(0, cap - pinned.length);
90
+ const ranked = [...rest].sort((a, b) => (b.importance ?? 0) - (a.importance ?? 0) || b.seq - a.seq);
91
+ const keepRest = new Set(ranked.slice(0, slots));
92
+ const kept = items.filter((i) => i.pinned || keepRest.has(i));
93
+ const evicted = items.filter((i) => !i.pinned && !keepRest.has(i));
94
+ return { kept, evicted };
95
+ }
96
+
97
+ exports.allOf = allOf;
98
+ exports.anyOf = anyOf;
99
+ exports.boundWorkingSet = boundWorkingSet;
100
+ exports.compactHistory = compactHistory;
101
+ exports.costUsdAtLeast = costUsdAtLeast;
102
+ exports.runImprovementCycle = runImprovementCycle;
103
+ exports.runUntil = runUntil;
104
+ exports.stepCountIs = stepCountIs;
105
+ exports.verifiedRatchet = verifiedRatchet;
106
+ exports.verifierSatisfied = verifierSatisfied;
@@ -0,0 +1,285 @@
1
+ import { VerifierResult } from '../verifiers/index.cjs';
2
+
3
+ /**
4
+ * Snapshot of agent-loop state at a given step. Passed to every StopPredicate
5
+ * so predicates compose without coupling to the outer loop implementation.
6
+ */
7
+ interface LoopState {
8
+ /** Zero-indexed iteration count. */
9
+ step: number;
10
+ /** Cumulative cost incurred so far, in USD. */
11
+ cost_usd: number;
12
+ /** The agent's latest output string. */
13
+ output: string;
14
+ /**
15
+ * All verifier results collected so far.
16
+ * Present when the caller runs verifiers; absent when running without them.
17
+ */
18
+ verifier_results?: VerifierResult[];
19
+ }
20
+ /**
21
+ * A pure function that returns `true` when the loop SHOULD STOP.
22
+ *
23
+ * Predicates are composable via anyOf / allOf. They receive the full LoopState
24
+ * so callers can build rich multi-factor stop conditions without modifying the
25
+ * loop implementation.
26
+ *
27
+ * No public agent SDK ships a typed, composable, verifier-aware stop-predicate
28
+ * as a first-class primitive — this is that primitive.
29
+ */
30
+ type StopPredicate = (s: LoopState) => boolean;
31
+ /**
32
+ * Stop when the loop has completed exactly `n` steps (step index reaches n).
33
+ *
34
+ * Because step is zero-indexed, stepCountIs(3) fires after steps 0, 1, 2 — i.e.
35
+ * when the loop is about to execute its 4th iteration.
36
+ *
37
+ * @param n - Maximum number of completed steps before stopping.
38
+ */
39
+ declare function stepCountIs(n: number): StopPredicate;
40
+ /**
41
+ * Stop when the cumulative cost has reached or exceeded `min` USD.
42
+ *
43
+ * This is the budget-exhausted stop signal — use it in anyOf() or allOf()
44
+ * alongside other predicates to cap agent spend.
45
+ *
46
+ * @param min - Budget floor after which the loop must stop.
47
+ */
48
+ declare function costUsdAtLeast(min: number): StopPredicate;
49
+ /**
50
+ * Stop when the named verifier has produced a passing result.
51
+ *
52
+ * No public agent SDK makes verifier satisfaction a first-class loop-termination
53
+ * condition. This is the bridge: the loop runs until a trusted, parameter-free
54
+ * checker (not the LLM's self-report) confirms the output is correct.
55
+ *
56
+ * Returns `true` (stop) when ANY result in `verifier_results` whose
57
+ * `kernel === kernelName` has `pass === true`.
58
+ * Returns `false` (keep going) if the kernel has not run yet or has not passed.
59
+ *
60
+ * @param kernelName - The `VerifierResult.kernel` name to match.
61
+ */
62
+ declare function verifierSatisfied(kernelName: string): StopPredicate;
63
+ /**
64
+ * Stop when ANY of the provided predicates returns `true`.
65
+ *
66
+ * Useful for "stop on the first exit condition" semantics — e.g. stop if the
67
+ * verifier passes OR if the budget is exhausted, whichever comes first.
68
+ *
69
+ * @param preds - One or more StopPredicates to OR together.
70
+ */
71
+ declare function anyOf(...preds: StopPredicate[]): StopPredicate;
72
+ /**
73
+ * Stop when ALL of the provided predicates return `true`.
74
+ *
75
+ * Useful for "stop only when every condition is satisfied simultaneously" —
76
+ * e.g. stop when the verifier passes AND at least one step has been taken.
77
+ *
78
+ * @param preds - One or more StopPredicates to AND together.
79
+ */
80
+ declare function allOf(...preds: StopPredicate[]): StopPredicate;
81
+ /**
82
+ * A structured verdict emitted by a verifier or judge function.
83
+ *
84
+ * 'sufficient' means the candidate output meets the correctness bar for the
85
+ * ratchet to advance. Any other string (including 'insufficient') causes the
86
+ * ratchet to hold.
87
+ *
88
+ * `confidence` is a [0,1] float. The verified ratchet uses this to gate
89
+ * advancement — a high-confidence insufficient is just as much a hold signal
90
+ * as a low-confidence sufficient. Explicit confidence prevents the LLM
91
+ * self-assessment problem: the model cannot softly claim it is done.
92
+ *
93
+ * `source` is optional provenance — which kernel, model, or tool produced the
94
+ * verdict. Included in audit logs.
95
+ */
96
+ interface RatchetVerdict {
97
+ verdict: "sufficient" | "insufficient" | string;
98
+ confidence: number;
99
+ source?: string;
100
+ }
101
+ /**
102
+ * The ratchet's typed decision: advance the loop state, or hold.
103
+ *
104
+ * `advance === true` means the candidate output has been verified correct and
105
+ * the loop may commit this state and move to the next phase.
106
+ *
107
+ * `reason` is a human-readable string that explains WHY the ratchet
108
+ * advanced or held. This is surfaced in receipts and audit logs so the
109
+ * decision is not a black box.
110
+ */
111
+ interface RatchetDecision {
112
+ advance: boolean;
113
+ reason: string;
114
+ }
115
+ /**
116
+ * A Ratchet is a pure function that maps a RatchetVerdict (or undefined, if no
117
+ * verifier ran yet) to a RatchetDecision.
118
+ *
119
+ * The verified ratchet is the primitive that no public agent SDK ships:
120
+ * - Loop state advances ONLY on a proven verifier pass.
121
+ * - The confidence threshold is explicit and configured at construction time.
122
+ * - An absent verdict is treated as hold, not as pass — absence of proof is
123
+ * not proof of absence.
124
+ *
125
+ * This matches Theron's own server-side CIP loop, where weight commits require
126
+ * a verifier pass + confidence >= threshold before the ratchet clicks forward.
127
+ */
128
+ type Ratchet = (v: RatchetVerdict | undefined) => RatchetDecision;
129
+ /**
130
+ * verifiedRatchet — the canonical Theron loop gate.
131
+ *
132
+ * Returns a Ratchet that advances ONLY when:
133
+ * 1. A verdict is present (not undefined).
134
+ * 2. verdict.verdict === 'sufficient'.
135
+ * 3. verdict.confidence >= minConfidence (default 0.6).
136
+ *
137
+ * Any other combination produces { advance: false } with an honest reason.
138
+ *
139
+ * WHY DEFAULT 0.6?
140
+ * A confidence of 0.6 is the minimum that meaningfully separates "I think
141
+ * this is right" from a coin flip. For high-stakes agentic actions raise it
142
+ * to 0.8 or 0.9; for iterative drafting 0.6 is a reasonable starting point.
143
+ *
144
+ * WHY NOT LET THE LLM DECIDE?
145
+ * LLMs are systematically overconfident. A parameter-free verifier + explicit
146
+ * numeric threshold is auditable, reproducible, and falsifiable. The model
147
+ * cannot talk its way past the gate.
148
+ *
149
+ * @param opts.minConfidence - Confidence floor. Default 0.6.
150
+ */
151
+ declare function verifiedRatchet(opts?: {
152
+ minConfidence?: number;
153
+ }): Ratchet;
154
+ /**
155
+ * Specification for a single improvement cycle.
156
+ *
157
+ * A cycle is one atomic unit of the agent's inner loop:
158
+ * 1. propose() — generate a candidate (e.g. a new model output, a plan, a patch).
159
+ * 2. trial(p) — execute/evaluate the candidate (e.g. run tests, score output).
160
+ * 3. verify(p,t) — produce a RatchetVerdict judging whether the trial result meets the bar.
161
+ * 4. ratchet(v) — gate advancement; only commits if the verdict passes.
162
+ *
163
+ * This is a pure orchestration contract: no networking, no model calls. The
164
+ * caller supplies every async function, keeping the primitive testable and
165
+ * framework-agnostic.
166
+ *
167
+ * No public agent SDK exposes this propose→trial→verify→ratchet cycle as a
168
+ * typed, composable primitive with a verifier gate at step 3. That is the gap
169
+ * this fills.
170
+ */
171
+ interface ImprovementCycleSpec<P, T> {
172
+ /** Generate a candidate proposal. */
173
+ propose: () => Promise<P> | P;
174
+ /** Execute / evaluate the proposal, producing trial evidence. */
175
+ trial: (proposal: P) => Promise<T> | T;
176
+ /**
177
+ * Judge the proposal against the trial result.
178
+ * Must return a RatchetVerdict — not a boolean, so confidence is always
179
+ * surfaced rather than buried inside the verifier implementation.
180
+ */
181
+ verify: (proposal: P, trial: T) => Promise<RatchetVerdict> | RatchetVerdict;
182
+ /**
183
+ * The ratchet to use. Defaults to verifiedRatchet() (minConfidence 0.6).
184
+ * Supply a custom Ratchet to adjust the threshold or plug in domain-specific
185
+ * gate logic.
186
+ */
187
+ ratchet?: Ratchet;
188
+ }
189
+ /**
190
+ * The result of a completed improvement cycle.
191
+ *
192
+ * All four artefacts are returned so the caller can log them to a receipt,
193
+ * feed the decision back into the outer loop, or surface the reason to the user.
194
+ */
195
+ interface ImprovementResult<P, T> {
196
+ proposal: P;
197
+ trial: T;
198
+ verdict: RatchetVerdict;
199
+ decision: RatchetDecision;
200
+ }
201
+ /**
202
+ * runImprovementCycle — execute one propose → trial → verify → ratchet cycle.
203
+ *
204
+ * This is the pure orchestration primitive for verified agent improvement. It
205
+ * is intentionally thin: no retry logic, no outer loop, no networking. The
206
+ * caller decides what to do with the ImprovementResult (advance, retry,
207
+ * escalate, receipt it).
208
+ *
209
+ * Example:
210
+ * const result = await runImprovementCycle({
211
+ * propose: () => model.draft(prompt),
212
+ * trial: (draft) => runner.execute(draft),
213
+ * verify: (draft, output) => mathVerifier.score(draft, output),
214
+ * });
215
+ * if (result.decision.advance) commitToWeights(result.proposal);
216
+ *
217
+ * @param spec - The ImprovementCycleSpec describing all four phases.
218
+ * @returns ImprovementResult containing all artefacts from the cycle.
219
+ */
220
+ declare function runImprovementCycle<P, T>(spec: ImprovementCycleSpec<P, T>): Promise<ImprovementResult<P, T>>;
221
+ interface ChatMessage {
222
+ role: string;
223
+ content: string;
224
+ }
225
+ interface CompactHistoryOptions {
226
+ messages: ChatMessage[];
227
+ /** Keep the most recent N messages verbatim (default 6). */
228
+ keepRecent?: number;
229
+ /** Summarize the older messages into one string. Provider-agnostic. */
230
+ summarize: (older: ChatMessage[]) => Promise<string> | string;
231
+ /** Only compact when total content chars exceed this (default 0 = compact
232
+ * whenever there is more history than keepRecent). */
233
+ maxChars?: number;
234
+ }
235
+ interface CompactHistoryResult {
236
+ /** The compacted message list: [summary-as-system, ...recent] when compacted. */
237
+ messages: ChatMessage[];
238
+ compacted: boolean;
239
+ summary?: string;
240
+ /** How many older messages were folded into the summary. */
241
+ droppedCount: number;
242
+ }
243
+ /** Summarize-and-continue: fold older messages into one summary while keeping the
244
+ * most recent verbatim — so a conversation/loop can run far past the context
245
+ * window. No-op (compacted:false) when history is already small. */
246
+ declare function compactHistory(opts: CompactHistoryOptions): Promise<CompactHistoryResult>;
247
+ interface RunUntilOptions<S> {
248
+ /** Initial loop state. */
249
+ initial: S;
250
+ /** One step: given the current state + index, produce the next state. */
251
+ step: (state: S, i: number) => Promise<S> | S;
252
+ /** Stop when this returns true (checked AFTER each step). */
253
+ stopWhen: (state: S, i: number) => boolean;
254
+ /** Hard ceiling on steps (the long-horizon safety net). Default 100. */
255
+ maxSteps?: number;
256
+ /** Optional checkpoint hook after each step — persist state for durable resume. */
257
+ onCheckpoint?: (state: S, i: number) => Promise<void> | void;
258
+ }
259
+ interface RunUntilResult<S> {
260
+ state: S;
261
+ steps: number;
262
+ stopped: 'predicate' | 'maxSteps';
263
+ }
264
+ /** Drive a long-horizon loop: run `step` repeatedly until `stopWhen` holds or
265
+ * `maxSteps` is hit, checkpointing after each step. The bounded, resumable core
266
+ * of a long-running agent (pair `step` with compactHistory to stay in budget). */
267
+ declare function runUntil<S>(opts: RunUntilOptions<S>): Promise<RunUntilResult<S>>;
268
+ interface WorkingItem {
269
+ /** Lower = older (insertion order / step index). */
270
+ seq: number;
271
+ /** Importance in [0,1] — higher is kept preferentially. */
272
+ importance?: number;
273
+ /** Pinned items are never evicted (e.g. the goal, a committed decision). */
274
+ pinned?: boolean;
275
+ }
276
+ interface BoundWorkingSetResult<T> {
277
+ kept: T[];
278
+ evicted: T[];
279
+ }
280
+ /** Keep at most `max` items: all pinned, then the highest-importance (ties → most
281
+ * recent), preserving original order in the output. The bounded-working-set
282
+ * primitive for long-running agents — drop the least useful, never the pinned. */
283
+ declare function boundWorkingSet<T extends WorkingItem>(items: T[], max: number): BoundWorkingSetResult<T>;
284
+
285
+ export { type BoundWorkingSetResult, type ChatMessage, type CompactHistoryOptions, type CompactHistoryResult, type ImprovementCycleSpec, type ImprovementResult, type LoopState, type Ratchet, type RatchetDecision, type RatchetVerdict, type RunUntilOptions, type RunUntilResult, type StopPredicate, type WorkingItem, allOf, anyOf, boundWorkingSet, compactHistory, costUsdAtLeast, runImprovementCycle, runUntil, stepCountIs, verifiedRatchet, verifierSatisfied };
@@ -0,0 +1,285 @@
1
+ import { VerifierResult } from '../verifiers/index.js';
2
+
3
+ /**
4
+ * Snapshot of agent-loop state at a given step. Passed to every StopPredicate
5
+ * so predicates compose without coupling to the outer loop implementation.
6
+ */
7
+ interface LoopState {
8
+ /** Zero-indexed iteration count. */
9
+ step: number;
10
+ /** Cumulative cost incurred so far, in USD. */
11
+ cost_usd: number;
12
+ /** The agent's latest output string. */
13
+ output: string;
14
+ /**
15
+ * All verifier results collected so far.
16
+ * Present when the caller runs verifiers; absent when running without them.
17
+ */
18
+ verifier_results?: VerifierResult[];
19
+ }
20
+ /**
21
+ * A pure function that returns `true` when the loop SHOULD STOP.
22
+ *
23
+ * Predicates are composable via anyOf / allOf. They receive the full LoopState
24
+ * so callers can build rich multi-factor stop conditions without modifying the
25
+ * loop implementation.
26
+ *
27
+ * No public agent SDK ships a typed, composable, verifier-aware stop-predicate
28
+ * as a first-class primitive — this is that primitive.
29
+ */
30
+ type StopPredicate = (s: LoopState) => boolean;
31
+ /**
32
+ * Stop when the loop has completed exactly `n` steps (step index reaches n).
33
+ *
34
+ * Because step is zero-indexed, stepCountIs(3) fires after steps 0, 1, 2 — i.e.
35
+ * when the loop is about to execute its 4th iteration.
36
+ *
37
+ * @param n - Maximum number of completed steps before stopping.
38
+ */
39
+ declare function stepCountIs(n: number): StopPredicate;
40
+ /**
41
+ * Stop when the cumulative cost has reached or exceeded `min` USD.
42
+ *
43
+ * This is the budget-exhausted stop signal — use it in anyOf() or allOf()
44
+ * alongside other predicates to cap agent spend.
45
+ *
46
+ * @param min - Budget floor after which the loop must stop.
47
+ */
48
+ declare function costUsdAtLeast(min: number): StopPredicate;
49
+ /**
50
+ * Stop when the named verifier has produced a passing result.
51
+ *
52
+ * No public agent SDK makes verifier satisfaction a first-class loop-termination
53
+ * condition. This is the bridge: the loop runs until a trusted, parameter-free
54
+ * checker (not the LLM's self-report) confirms the output is correct.
55
+ *
56
+ * Returns `true` (stop) when ANY result in `verifier_results` whose
57
+ * `kernel === kernelName` has `pass === true`.
58
+ * Returns `false` (keep going) if the kernel has not run yet or has not passed.
59
+ *
60
+ * @param kernelName - The `VerifierResult.kernel` name to match.
61
+ */
62
+ declare function verifierSatisfied(kernelName: string): StopPredicate;
63
+ /**
64
+ * Stop when ANY of the provided predicates returns `true`.
65
+ *
66
+ * Useful for "stop on the first exit condition" semantics — e.g. stop if the
67
+ * verifier passes OR if the budget is exhausted, whichever comes first.
68
+ *
69
+ * @param preds - One or more StopPredicates to OR together.
70
+ */
71
+ declare function anyOf(...preds: StopPredicate[]): StopPredicate;
72
+ /**
73
+ * Stop when ALL of the provided predicates return `true`.
74
+ *
75
+ * Useful for "stop only when every condition is satisfied simultaneously" —
76
+ * e.g. stop when the verifier passes AND at least one step has been taken.
77
+ *
78
+ * @param preds - One or more StopPredicates to AND together.
79
+ */
80
+ declare function allOf(...preds: StopPredicate[]): StopPredicate;
81
+ /**
82
+ * A structured verdict emitted by a verifier or judge function.
83
+ *
84
+ * 'sufficient' means the candidate output meets the correctness bar for the
85
+ * ratchet to advance. Any other string (including 'insufficient') causes the
86
+ * ratchet to hold.
87
+ *
88
+ * `confidence` is a [0,1] float. The verified ratchet uses this to gate
89
+ * advancement — a high-confidence insufficient is just as much a hold signal
90
+ * as a low-confidence sufficient. Explicit confidence prevents the LLM
91
+ * self-assessment problem: the model cannot softly claim it is done.
92
+ *
93
+ * `source` is optional provenance — which kernel, model, or tool produced the
94
+ * verdict. Included in audit logs.
95
+ */
96
+ interface RatchetVerdict {
97
+ verdict: "sufficient" | "insufficient" | string;
98
+ confidence: number;
99
+ source?: string;
100
+ }
101
+ /**
102
+ * The ratchet's typed decision: advance the loop state, or hold.
103
+ *
104
+ * `advance === true` means the candidate output has been verified correct and
105
+ * the loop may commit this state and move to the next phase.
106
+ *
107
+ * `reason` is a human-readable string that explains WHY the ratchet
108
+ * advanced or held. This is surfaced in receipts and audit logs so the
109
+ * decision is not a black box.
110
+ */
111
+ interface RatchetDecision {
112
+ advance: boolean;
113
+ reason: string;
114
+ }
115
+ /**
116
+ * A Ratchet is a pure function that maps a RatchetVerdict (or undefined, if no
117
+ * verifier ran yet) to a RatchetDecision.
118
+ *
119
+ * The verified ratchet is the primitive that no public agent SDK ships:
120
+ * - Loop state advances ONLY on a proven verifier pass.
121
+ * - The confidence threshold is explicit and configured at construction time.
122
+ * - An absent verdict is treated as hold, not as pass — absence of proof is
123
+ * not proof of absence.
124
+ *
125
+ * This matches Theron's own server-side CIP loop, where weight commits require
126
+ * a verifier pass + confidence >= threshold before the ratchet clicks forward.
127
+ */
128
+ type Ratchet = (v: RatchetVerdict | undefined) => RatchetDecision;
129
+ /**
130
+ * verifiedRatchet — the canonical Theron loop gate.
131
+ *
132
+ * Returns a Ratchet that advances ONLY when:
133
+ * 1. A verdict is present (not undefined).
134
+ * 2. verdict.verdict === 'sufficient'.
135
+ * 3. verdict.confidence >= minConfidence (default 0.6).
136
+ *
137
+ * Any other combination produces { advance: false } with an honest reason.
138
+ *
139
+ * WHY DEFAULT 0.6?
140
+ * A confidence of 0.6 is the minimum that meaningfully separates "I think
141
+ * this is right" from a coin flip. For high-stakes agentic actions raise it
142
+ * to 0.8 or 0.9; for iterative drafting 0.6 is a reasonable starting point.
143
+ *
144
+ * WHY NOT LET THE LLM DECIDE?
145
+ * LLMs are systematically overconfident. A parameter-free verifier + explicit
146
+ * numeric threshold is auditable, reproducible, and falsifiable. The model
147
+ * cannot talk its way past the gate.
148
+ *
149
+ * @param opts.minConfidence - Confidence floor. Default 0.6.
150
+ */
151
+ declare function verifiedRatchet(opts?: {
152
+ minConfidence?: number;
153
+ }): Ratchet;
154
+ /**
155
+ * Specification for a single improvement cycle.
156
+ *
157
+ * A cycle is one atomic unit of the agent's inner loop:
158
+ * 1. propose() — generate a candidate (e.g. a new model output, a plan, a patch).
159
+ * 2. trial(p) — execute/evaluate the candidate (e.g. run tests, score output).
160
+ * 3. verify(p,t) — produce a RatchetVerdict judging whether the trial result meets the bar.
161
+ * 4. ratchet(v) — gate advancement; only commits if the verdict passes.
162
+ *
163
+ * This is a pure orchestration contract: no networking, no model calls. The
164
+ * caller supplies every async function, keeping the primitive testable and
165
+ * framework-agnostic.
166
+ *
167
+ * No public agent SDK exposes this propose→trial→verify→ratchet cycle as a
168
+ * typed, composable primitive with a verifier gate at step 3. That is the gap
169
+ * this fills.
170
+ */
171
+ interface ImprovementCycleSpec<P, T> {
172
+ /** Generate a candidate proposal. */
173
+ propose: () => Promise<P> | P;
174
+ /** Execute / evaluate the proposal, producing trial evidence. */
175
+ trial: (proposal: P) => Promise<T> | T;
176
+ /**
177
+ * Judge the proposal against the trial result.
178
+ * Must return a RatchetVerdict — not a boolean, so confidence is always
179
+ * surfaced rather than buried inside the verifier implementation.
180
+ */
181
+ verify: (proposal: P, trial: T) => Promise<RatchetVerdict> | RatchetVerdict;
182
+ /**
183
+ * The ratchet to use. Defaults to verifiedRatchet() (minConfidence 0.6).
184
+ * Supply a custom Ratchet to adjust the threshold or plug in domain-specific
185
+ * gate logic.
186
+ */
187
+ ratchet?: Ratchet;
188
+ }
189
+ /**
190
+ * The result of a completed improvement cycle.
191
+ *
192
+ * All four artefacts are returned so the caller can log them to a receipt,
193
+ * feed the decision back into the outer loop, or surface the reason to the user.
194
+ */
195
+ interface ImprovementResult<P, T> {
196
+ proposal: P;
197
+ trial: T;
198
+ verdict: RatchetVerdict;
199
+ decision: RatchetDecision;
200
+ }
201
+ /**
202
+ * runImprovementCycle — execute one propose → trial → verify → ratchet cycle.
203
+ *
204
+ * This is the pure orchestration primitive for verified agent improvement. It
205
+ * is intentionally thin: no retry logic, no outer loop, no networking. The
206
+ * caller decides what to do with the ImprovementResult (advance, retry,
207
+ * escalate, receipt it).
208
+ *
209
+ * Example:
210
+ * const result = await runImprovementCycle({
211
+ * propose: () => model.draft(prompt),
212
+ * trial: (draft) => runner.execute(draft),
213
+ * verify: (draft, output) => mathVerifier.score(draft, output),
214
+ * });
215
+ * if (result.decision.advance) commitToWeights(result.proposal);
216
+ *
217
+ * @param spec - The ImprovementCycleSpec describing all four phases.
218
+ * @returns ImprovementResult containing all artefacts from the cycle.
219
+ */
220
+ declare function runImprovementCycle<P, T>(spec: ImprovementCycleSpec<P, T>): Promise<ImprovementResult<P, T>>;
221
+ interface ChatMessage {
222
+ role: string;
223
+ content: string;
224
+ }
225
+ interface CompactHistoryOptions {
226
+ messages: ChatMessage[];
227
+ /** Keep the most recent N messages verbatim (default 6). */
228
+ keepRecent?: number;
229
+ /** Summarize the older messages into one string. Provider-agnostic. */
230
+ summarize: (older: ChatMessage[]) => Promise<string> | string;
231
+ /** Only compact when total content chars exceed this (default 0 = compact
232
+ * whenever there is more history than keepRecent). */
233
+ maxChars?: number;
234
+ }
235
+ interface CompactHistoryResult {
236
+ /** The compacted message list: [summary-as-system, ...recent] when compacted. */
237
+ messages: ChatMessage[];
238
+ compacted: boolean;
239
+ summary?: string;
240
+ /** How many older messages were folded into the summary. */
241
+ droppedCount: number;
242
+ }
243
+ /** Summarize-and-continue: fold older messages into one summary while keeping the
244
+ * most recent verbatim — so a conversation/loop can run far past the context
245
+ * window. No-op (compacted:false) when history is already small. */
246
+ declare function compactHistory(opts: CompactHistoryOptions): Promise<CompactHistoryResult>;
247
+ interface RunUntilOptions<S> {
248
+ /** Initial loop state. */
249
+ initial: S;
250
+ /** One step: given the current state + index, produce the next state. */
251
+ step: (state: S, i: number) => Promise<S> | S;
252
+ /** Stop when this returns true (checked AFTER each step). */
253
+ stopWhen: (state: S, i: number) => boolean;
254
+ /** Hard ceiling on steps (the long-horizon safety net). Default 100. */
255
+ maxSteps?: number;
256
+ /** Optional checkpoint hook after each step — persist state for durable resume. */
257
+ onCheckpoint?: (state: S, i: number) => Promise<void> | void;
258
+ }
259
+ interface RunUntilResult<S> {
260
+ state: S;
261
+ steps: number;
262
+ stopped: 'predicate' | 'maxSteps';
263
+ }
264
+ /** Drive a long-horizon loop: run `step` repeatedly until `stopWhen` holds or
265
+ * `maxSteps` is hit, checkpointing after each step. The bounded, resumable core
266
+ * of a long-running agent (pair `step` with compactHistory to stay in budget). */
267
+ declare function runUntil<S>(opts: RunUntilOptions<S>): Promise<RunUntilResult<S>>;
268
+ interface WorkingItem {
269
+ /** Lower = older (insertion order / step index). */
270
+ seq: number;
271
+ /** Importance in [0,1] — higher is kept preferentially. */
272
+ importance?: number;
273
+ /** Pinned items are never evicted (e.g. the goal, a committed decision). */
274
+ pinned?: boolean;
275
+ }
276
+ interface BoundWorkingSetResult<T> {
277
+ kept: T[];
278
+ evicted: T[];
279
+ }
280
+ /** Keep at most `max` items: all pinned, then the highest-importance (ties → most
281
+ * recent), preserving original order in the output. The bounded-working-set
282
+ * primitive for long-running agents — drop the least useful, never the pinned. */
283
+ declare function boundWorkingSet<T extends WorkingItem>(items: T[], max: number): BoundWorkingSetResult<T>;
284
+
285
+ export { type BoundWorkingSetResult, type ChatMessage, type CompactHistoryOptions, type CompactHistoryResult, type ImprovementCycleSpec, type ImprovementResult, type LoopState, type Ratchet, type RatchetDecision, type RatchetVerdict, type RunUntilOptions, type RunUntilResult, type StopPredicate, type WorkingItem, allOf, anyOf, boundWorkingSet, compactHistory, costUsdAtLeast, runImprovementCycle, runUntil, stepCountIs, verifiedRatchet, verifierSatisfied };