@kernel.chat/kbot 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +6 -0
  2. package/dist/cache-warmth.d.ts +25 -0
  3. package/dist/cache-warmth.js +131 -0
  4. package/dist/futures/debate/index.d.ts +7 -0
  5. package/dist/futures/debate/index.js +6 -0
  6. package/dist/futures/debate/runner.d.ts +34 -0
  7. package/dist/futures/debate/runner.js +140 -0
  8. package/dist/futures/debate/synthesis.d.ts +25 -0
  9. package/dist/futures/debate/synthesis.js +81 -0
  10. package/dist/futures/debate/types.d.ts +72 -0
  11. package/dist/futures/debate/types.js +12 -0
  12. package/dist/futures/forecast/index.d.ts +5 -0
  13. package/dist/futures/forecast/index.js +5 -0
  14. package/dist/futures/forecast/projection.d.ts +31 -0
  15. package/dist/futures/forecast/projection.js +177 -0
  16. package/dist/futures/forecast/synthesize.d.ts +19 -0
  17. package/dist/futures/forecast/synthesize.js +89 -0
  18. package/dist/futures/forecast/types.d.ts +59 -0
  19. package/dist/futures/forecast/types.js +15 -0
  20. package/dist/futures/harness/critic-evaluator.d.ts +39 -0
  21. package/dist/futures/harness/critic-evaluator.js +131 -0
  22. package/dist/futures/harness/evolution-loop.d.ts +41 -0
  23. package/dist/futures/harness/evolution-loop.js +168 -0
  24. package/dist/futures/harness/index.d.ts +16 -0
  25. package/dist/futures/harness/index.js +13 -0
  26. package/dist/futures/harness/meta-evolution.d.ts +32 -0
  27. package/dist/futures/harness/meta-evolution.js +52 -0
  28. package/dist/futures/harness/noop-evolution.d.ts +23 -0
  29. package/dist/futures/harness/noop-evolution.js +29 -0
  30. package/dist/futures/harness/persistence.d.ts +30 -0
  31. package/dist/futures/harness/persistence.js +99 -0
  32. package/dist/futures/harness/types.d.ts +147 -0
  33. package/dist/futures/harness/types.js +18 -0
  34. package/dist/futures/index.d.ts +16 -0
  35. package/dist/futures/index.js +22 -0
  36. package/dist/futures/latent-state/envelope.d.ts +39 -0
  37. package/dist/futures/latent-state/envelope.js +178 -0
  38. package/dist/futures/latent-state/index.d.ts +5 -0
  39. package/dist/futures/latent-state/index.js +3 -0
  40. package/dist/futures/latent-state/types.d.ts +47 -0
  41. package/dist/futures/latent-state/types.js +13 -0
  42. package/dist/futures/persona/check.d.ts +45 -0
  43. package/dist/futures/persona/check.js +205 -0
  44. package/dist/futures/persona/index.d.ts +5 -0
  45. package/dist/futures/persona/index.js +5 -0
  46. package/dist/futures/persona/registry.d.ts +22 -0
  47. package/dist/futures/persona/registry.js +124 -0
  48. package/dist/futures/persona/types.d.ts +68 -0
  49. package/dist/futures/persona/types.js +28 -0
  50. package/dist/futures/skill-graph/graph.d.ts +31 -0
  51. package/dist/futures/skill-graph/graph.js +151 -0
  52. package/dist/futures/skill-graph/index.d.ts +13 -0
  53. package/dist/futures/skill-graph/index.js +10 -0
  54. package/dist/futures/skill-graph/synthesis.d.ts +20 -0
  55. package/dist/futures/skill-graph/synthesis.js +83 -0
  56. package/dist/futures/skill-graph/types.d.ts +53 -0
  57. package/dist/futures/skill-graph/types.js +19 -0
  58. package/dist/streaming.js +18 -0
  59. package/package.json +1 -1
@@ -0,0 +1,168 @@
1
+ /**
2
+ * Harness Evolution Loop — inner loop (Algorithm 1 from Sylph).
3
+ *
4
+ * for i in 1..maxIterations:
5
+ * trace = Worker.execute(task, harness)
6
+ * report = Evaluator.evaluate(trace, task)
7
+ * record = { iteration, harness, trace, report, verdict }
8
+ * history.push(record)
9
+ * if report.score > bestScore: best = harness
10
+ * if earlyStopScore reached on consecutive iterations: stop
11
+ * if regression > revertThreshold: revert harness to best
12
+ * harness = EvolutionAgent.evolve(history, best)
13
+ *
14
+ * Pure orchestration — Worker / Evaluator / EvolutionAgent are injected,
15
+ * which makes the whole loop deterministic and testable with stub
16
+ * implementations. No LLM calls happen here directly.
17
+ */
18
+ import { appendTrace } from './persistence.js';
19
+ function compareVerdict(prev, current, revertThreshold) {
20
+ if (current > prev)
21
+ return 'improved';
22
+ if (current === prev)
23
+ return 'no-op';
24
+ if (revertThreshold !== undefined && (prev - current) >= revertThreshold) {
25
+ // A "regressed" verdict signals the loop should revert to best harness.
26
+ return 'regressed';
27
+ }
28
+ return 'regressed';
29
+ }
30
+ /**
31
+ * Run the inner Harness Evolution Loop against a single task.
32
+ *
33
+ * Always returns an `EvolutionResult` — never throws on Worker / Evaluator
34
+ * exceptions; instead, records a failure step and continues. (The
35
+ * Evaluator is supposed to score failures, not the loop itself.)
36
+ */
37
+ export async function runEvolutionLoop(protocol, task, options = {}) {
38
+ const { worker, evaluator, evolution, initialHarness, hyperparams } = protocol;
39
+ const maxIterations = Math.max(1, hyperparams.maxIterations | 0);
40
+ const earlyStopScore = hyperparams.earlyStopScore;
41
+ const revertThreshold = hyperparams.revertThreshold;
42
+ const earlyStopStreak = Math.max(1, options.earlyStopStreak ?? 1);
43
+ const shouldPersist = options.persist !== false && !!options.persistDir;
44
+ const history = [];
45
+ let harness = initialHarness;
46
+ let bestHarness = initialHarness;
47
+ let bestScore = -Infinity;
48
+ let prevScore = -Infinity;
49
+ let earlyHits = 0;
50
+ for (let iteration = 1; iteration <= maxIterations; iteration++) {
51
+ let trace;
52
+ try {
53
+ trace = await worker.execute(task, harness);
54
+ }
55
+ catch (err) {
56
+ // Synthesize a minimal failure trace so the evaluator can still grade.
57
+ trace = {
58
+ taskId: task.id,
59
+ harnessId: harness.id,
60
+ steps: [
61
+ {
62
+ index: 0,
63
+ phase: 'observe',
64
+ action: 'worker-error',
65
+ error: err instanceof Error ? err.message : String(err),
66
+ durationMs: 0,
67
+ },
68
+ ],
69
+ finalState: {},
70
+ llmTimeMs: 0,
71
+ toolTimeMs: 0,
72
+ };
73
+ }
74
+ let report;
75
+ try {
76
+ report = await evaluator.evaluate(trace, task);
77
+ }
78
+ catch (err) {
79
+ report = {
80
+ taskId: task.id,
81
+ harnessId: harness.id,
82
+ pass: false,
83
+ score: 0,
84
+ criteriaResults: task.acceptance.map((c) => ({
85
+ criterion: c,
86
+ passed: false,
87
+ evidence: 'evaluator-error',
88
+ })),
89
+ failureModes: [
90
+ {
91
+ kind: 'other',
92
+ detail: err instanceof Error ? err.message : String(err),
93
+ },
94
+ ],
95
+ notes: 'evaluator threw; auto-fail',
96
+ };
97
+ }
98
+ const verdict = compareVerdict(prevScore, report.score, revertThreshold);
99
+ const record = {
100
+ iteration,
101
+ harness,
102
+ trace,
103
+ report,
104
+ verdict,
105
+ };
106
+ history.push(record);
107
+ if (options.onRecord) {
108
+ await options.onRecord(record);
109
+ }
110
+ if (shouldPersist) {
111
+ try {
112
+ await appendTrace(task.id, record, options.persistDir);
113
+ }
114
+ catch {
115
+ // persistence is best-effort; never block evolution on disk failure
116
+ }
117
+ }
118
+ // Track best harness.
119
+ if (report.score > bestScore) {
120
+ bestScore = report.score;
121
+ bestHarness = harness;
122
+ }
123
+ // Early-stop check.
124
+ if (earlyStopScore !== undefined && report.score >= earlyStopScore) {
125
+ earlyHits++;
126
+ if (earlyHits >= earlyStopStreak) {
127
+ break;
128
+ }
129
+ }
130
+ else {
131
+ earlyHits = 0;
132
+ }
133
+ // Revert on regression past threshold.
134
+ if (revertThreshold !== undefined &&
135
+ bestScore - report.score >= revertThreshold) {
136
+ harness = bestHarness;
137
+ }
138
+ prevScore = report.score;
139
+ // Short-circuit before the final evolve call — no point mutating on the
140
+ // last iteration since we'll never execute the new harness.
141
+ if (iteration === maxIterations)
142
+ break;
143
+ try {
144
+ harness = await evolution.evolve(history, bestHarness);
145
+ }
146
+ catch {
147
+ // EvolutionAgent failed — keep current harness, keep going.
148
+ harness = bestHarness;
149
+ }
150
+ }
151
+ // If nothing ran (shouldn't happen with maxIterations >= 1) make sure
152
+ // bestHarness is still defined.
153
+ if (history.length === 0) {
154
+ return {
155
+ taskId: task.id,
156
+ bestHarness: initialHarness,
157
+ bestScore: 0,
158
+ history,
159
+ };
160
+ }
161
+ return {
162
+ taskId: task.id,
163
+ bestHarness,
164
+ bestScore: bestScore === -Infinity ? 0 : bestScore,
165
+ history,
166
+ };
167
+ }
168
+ //# sourceMappingURL=evolution-loop.js.map
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Harness Evolution Loop — public surface.
3
+ *
4
+ * See `./README.md` for the high-level overview and `./types.ts` for the
5
+ * contract every other file in this directory targets.
6
+ */
7
+ export * from './types.js';
8
+ export { runEvolutionLoop } from './evolution-loop.js';
9
+ export type { RunOptions } from './evolution-loop.js';
10
+ export { runMetaEvolution } from './meta-evolution.js';
11
+ export type { MetaOptions } from './meta-evolution.js';
12
+ export { CriticEvaluator, createCriticEvaluator, } from './critic-evaluator.js';
13
+ export type { CriticEvaluatorOpts } from './critic-evaluator.js';
14
+ export { NoopEvolutionAgent, createNoopEvolutionAgent, } from './noop-evolution.js';
15
+ export { appendTrace, readHistory, pruneOlderThan, defaultStateDir, } from './persistence.js';
16
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Harness Evolution Loop — public surface.
3
+ *
4
+ * See `./README.md` for the high-level overview and `./types.ts` for the
5
+ * contract every other file in this directory targets.
6
+ */
7
+ export * from './types.js';
8
+ export { runEvolutionLoop } from './evolution-loop.js';
9
+ export { runMetaEvolution } from './meta-evolution.js';
10
+ export { CriticEvaluator, createCriticEvaluator, } from './critic-evaluator.js';
11
+ export { NoopEvolutionAgent, createNoopEvolutionAgent, } from './noop-evolution.js';
12
+ export { appendTrace, readHistory, pruneOlderThan, defaultStateDir, } from './persistence.js';
13
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Harness Meta-Evolution — outer loop (Algorithm 2 from Sylph).
3
+ *
4
+ * The inner loop optimizes one harness against one task. The outer loop
5
+ * runs the inner loop across a portfolio of tasks, aggregating per-task
6
+ * results and selecting the best protocol overall. Currently the
7
+ * "selection" step is averaging — when a real MetaEvolutionAgent ships,
8
+ * it'll consume the perTask EvolutionResult[] and propose protocol
9
+ * mutations.
10
+ *
11
+ * Pure orchestration. Tasks are run sequentially to keep the trace
12
+ * ordering deterministic; parallelism is a future concern.
13
+ */
14
+ import type { EvolutionProtocol, EvolutionResult, MetaResult, Task } from './types.js';
15
+ import { type RunOptions } from './evolution-loop.js';
16
+ export interface MetaOptions extends RunOptions {
17
+ /** Called after each task's inner loop completes. */
18
+ onTaskComplete?: (result: EvolutionResult) => void | Promise<void>;
19
+ /**
20
+ * When `true`, abort the outer loop on the first task whose best score
21
+ * is below `failBelow`. Default false — always run the full portfolio.
22
+ */
23
+ abortOnFailure?: boolean;
24
+ failBelow?: number;
25
+ }
26
+ /**
27
+ * Run the inner Evolution Loop across a portfolio of tasks, returning
28
+ * the best protocol (currently always the input protocol — there is no
29
+ * MetaEvolutionAgent yet) plus per-task results and the aggregate score.
30
+ */
31
+ export declare function runMetaEvolution(protocol: EvolutionProtocol, tasks: Task[], options?: MetaOptions): Promise<MetaResult>;
32
+ //# sourceMappingURL=meta-evolution.d.ts.map
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Harness Meta-Evolution — outer loop (Algorithm 2 from Sylph).
3
+ *
4
+ * The inner loop optimizes one harness against one task. The outer loop
5
+ * runs the inner loop across a portfolio of tasks, aggregating per-task
6
+ * results and selecting the best protocol overall. Currently the
7
+ * "selection" step is averaging — when a real MetaEvolutionAgent ships,
8
+ * it'll consume the perTask EvolutionResult[] and propose protocol
9
+ * mutations.
10
+ *
11
+ * Pure orchestration. Tasks are run sequentially to keep the trace
12
+ * ordering deterministic; parallelism is a future concern.
13
+ */
14
+ import { runEvolutionLoop } from './evolution-loop.js';
15
+ /**
16
+ * Run the inner Evolution Loop across a portfolio of tasks, returning
17
+ * the best protocol (currently always the input protocol — there is no
18
+ * MetaEvolutionAgent yet) plus per-task results and the aggregate score.
19
+ */
20
+ export async function runMetaEvolution(protocol, tasks, options = {}) {
21
+ if (tasks.length === 0) {
22
+ return {
23
+ bestProtocol: protocol,
24
+ bestMetaScore: 0,
25
+ perTask: [],
26
+ };
27
+ }
28
+ const perTask = [];
29
+ let scoreSum = 0;
30
+ for (const task of tasks) {
31
+ const result = await runEvolutionLoop(protocol, task, options);
32
+ perTask.push(result);
33
+ scoreSum += result.bestScore;
34
+ if (options.onTaskComplete) {
35
+ await options.onTaskComplete(result);
36
+ }
37
+ if (options.abortOnFailure &&
38
+ options.failBelow !== undefined &&
39
+ result.bestScore < options.failBelow) {
40
+ break;
41
+ }
42
+ }
43
+ // Aggregate score: mean of per-task best scores. Cheap, defensible, and
44
+ // matches the "average across tasks" framing in the Sylph outer loop.
45
+ const meanScore = perTask.length > 0 ? scoreSum / perTask.length : 0;
46
+ return {
47
+ bestProtocol: protocol,
48
+ bestMetaScore: meanScore,
49
+ perTask,
50
+ };
51
+ }
52
+ //# sourceMappingURL=meta-evolution.js.map
@@ -0,0 +1,23 @@
1
+ /**
2
+ * No-op EvolutionAgent — records but never rewrites the harness.
3
+ *
4
+ * The Sylph paper's outer-loop value comes from the EvolutionAgent
5
+ * mutating the harness based on history. Real, safe code rewriting is a
6
+ * multi-month problem; this stub satisfies the interface so the inner
7
+ * loop runs end-to-end. Every other piece of the substrate (trace
8
+ * persistence, regression detection, A/B evaluation, harness diffing)
9
+ * works without any actual mutation.
10
+ *
11
+ * The contract is met; the substrate is shipped.
12
+ */
13
+ import type { EvolutionAgent, EvolutionRecord, Harness } from './types.js';
14
+ export declare class NoopEvolutionAgent implements EvolutionAgent {
15
+ /**
16
+ * Returns the input harness unchanged. Reads `history` only to allow
17
+ * subclasses to subscribe to inspection without forcing a re-read.
18
+ */
19
+ evolve(history: EvolutionRecord[], best: Harness): Promise<Harness>;
20
+ }
21
+ /** Convenience factory mirroring the rest of the futures module style. */
22
+ export declare function createNoopEvolutionAgent(): EvolutionAgent;
23
+ //# sourceMappingURL=noop-evolution.d.ts.map
@@ -0,0 +1,29 @@
1
+ /**
2
+ * No-op EvolutionAgent — records but never rewrites the harness.
3
+ *
4
+ * The Sylph paper's outer-loop value comes from the EvolutionAgent
5
+ * mutating the harness based on history. Real, safe code rewriting is a
6
+ * multi-month problem; this stub satisfies the interface so the inner
7
+ * loop runs end-to-end. Every other piece of the substrate (trace
8
+ * persistence, regression detection, A/B evaluation, harness diffing)
9
+ * works without any actual mutation.
10
+ *
11
+ * The contract is met; the substrate is shipped.
12
+ */
13
+ export class NoopEvolutionAgent {
14
+ /**
15
+ * Returns the input harness unchanged. Reads `history` only to allow
16
+ * subclasses to subscribe to inspection without forcing a re-read.
17
+ */
18
+ async evolve(history, best) {
19
+ // Touch parameters explicitly so the type-checker stays happy in strict
20
+ // mode and the intent — "we saw history, we chose not to act" — is clear.
21
+ void history;
22
+ return best;
23
+ }
24
+ }
25
+ /** Convenience factory mirroring the rest of the futures module style. */
26
+ export function createNoopEvolutionAgent() {
27
+ return new NoopEvolutionAgent();
28
+ }
29
+ //# sourceMappingURL=noop-evolution.js.map
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Harness Evolution Loop — JSONL trace persistence.
3
+ *
4
+ * Each task gets its own append-only JSONL file at
5
+ * `~/.kbot/futures/harness/<task-id>.jsonl`. One line per `EvolutionRecord`
6
+ * (or arbitrary JSON-serializable record). Append-only on the hot path so
7
+ * concurrent loops don't trample each other; reads parse line-by-line and
8
+ * skip malformed lines rather than throwing on a single bad row.
9
+ *
10
+ * Pattern mirrors `src/planner/hierarchical/persistence.ts`: state dir is
11
+ * configurable (default `~/.kbot/futures/harness`), atomic writes where
12
+ * possible, ENOENT swallowed on read paths.
13
+ */
14
+ import type { EvolutionRecord } from './types.js';
15
+ /** Default on-disk root: `~/.kbot/futures/harness/`. */
16
+ export declare function defaultStateDir(): string;
17
+ /** Append a single record as one JSONL line. */
18
+ export declare function appendTrace(taskId: string, record: EvolutionRecord, stateDir?: string): Promise<void>;
19
+ /**
20
+ * Read all records for a task in append order. Returns empty array if the
21
+ * file doesn't exist. Malformed lines are skipped — one bad row never
22
+ * invalidates the whole history.
23
+ */
24
+ export declare function readHistory(taskId: string, stateDir?: string): Promise<EvolutionRecord[]>;
25
+ /**
26
+ * Delete trace files older than `days` (by mtime). Returns the list of
27
+ * removed task ids. Pure janitor — never throws on individual failures.
28
+ */
29
+ export declare function pruneOlderThan(days: number, stateDir?: string): Promise<string[]>;
30
+ //# sourceMappingURL=persistence.d.ts.map
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Harness Evolution Loop — JSONL trace persistence.
3
+ *
4
+ * Each task gets its own append-only JSONL file at
5
+ * `~/.kbot/futures/harness/<task-id>.jsonl`. One line per `EvolutionRecord`
6
+ * (or arbitrary JSON-serializable record). Append-only on the hot path so
7
+ * concurrent loops don't trample each other; reads parse line-by-line and
8
+ * skip malformed lines rather than throwing on a single bad row.
9
+ *
10
+ * Pattern mirrors `src/planner/hierarchical/persistence.ts`: state dir is
11
+ * configurable (default `~/.kbot/futures/harness`), atomic writes where
12
+ * possible, ENOENT swallowed on read paths.
13
+ */
14
+ import { promises as fs } from 'node:fs';
15
+ import * as os from 'node:os';
16
+ import * as path from 'node:path';
17
+ /** Default on-disk root: `~/.kbot/futures/harness/`. */
18
+ export function defaultStateDir() {
19
+ return path.join(os.homedir(), '.kbot', 'futures', 'harness');
20
+ }
21
+ function safeId(taskId) {
22
+ // Restrict to filesystem-safe chars; collapse anything else to '_'.
23
+ return taskId.replace(/[^a-zA-Z0-9._-]/g, '_');
24
+ }
25
+ function tracePath(stateDir, taskId) {
26
+ return path.join(stateDir, `${safeId(taskId)}.jsonl`);
27
+ }
28
+ async function ensureDir(dir) {
29
+ await fs.mkdir(dir, { recursive: true });
30
+ }
31
+ /** Append a single record as one JSONL line. */
32
+ export async function appendTrace(taskId, record, stateDir = defaultStateDir()) {
33
+ await ensureDir(stateDir);
34
+ const line = JSON.stringify(record) + '\n';
35
+ await fs.appendFile(tracePath(stateDir, taskId), line, 'utf8');
36
+ }
37
+ /**
38
+ * Read all records for a task in append order. Returns empty array if the
39
+ * file doesn't exist. Malformed lines are skipped — one bad row never
40
+ * invalidates the whole history.
41
+ */
42
+ export async function readHistory(taskId, stateDir = defaultStateDir()) {
43
+ let raw;
44
+ try {
45
+ raw = await fs.readFile(tracePath(stateDir, taskId), 'utf8');
46
+ }
47
+ catch (err) {
48
+ if (err.code === 'ENOENT')
49
+ return [];
50
+ throw err;
51
+ }
52
+ const out = [];
53
+ for (const line of raw.split('\n')) {
54
+ const trimmed = line.trim();
55
+ if (!trimmed)
56
+ continue;
57
+ try {
58
+ out.push(JSON.parse(trimmed));
59
+ }
60
+ catch {
61
+ // skip malformed line
62
+ }
63
+ }
64
+ return out;
65
+ }
66
+ /**
67
+ * Delete trace files older than `days` (by mtime). Returns the list of
68
+ * removed task ids. Pure janitor — never throws on individual failures.
69
+ */
70
+ export async function pruneOlderThan(days, stateDir = defaultStateDir()) {
71
+ const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
72
+ let entries;
73
+ try {
74
+ entries = await fs.readdir(stateDir);
75
+ }
76
+ catch (err) {
77
+ if (err.code === 'ENOENT')
78
+ return [];
79
+ throw err;
80
+ }
81
+ const removed = [];
82
+ for (const entry of entries) {
83
+ if (!entry.endsWith('.jsonl'))
84
+ continue;
85
+ const full = path.join(stateDir, entry);
86
+ try {
87
+ const stat = await fs.stat(full);
88
+ if (stat.mtimeMs < cutoff) {
89
+ await fs.unlink(full);
90
+ removed.push(entry.replace(/\.jsonl$/, ''));
91
+ }
92
+ }
93
+ catch {
94
+ // skip — permission, race, etc.
95
+ }
96
+ }
97
+ return removed;
98
+ }
99
+ //# sourceMappingURL=persistence.js.map
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Harness Evolution Loop — type definitions.
3
+ *
4
+ * Maps onto the formalism from "The Last Harness You'll Ever Build"
5
+ * (Seong, Yin, Zhang — Sylph.AI, arXiv:2604.21003):
6
+ *
7
+ * Agent = Model + Harness
8
+ * Harness = prompts + tools + orchestration + hooks + model config
9
+ *
10
+ * Inner loop: Worker(τ) → Evaluator(τ) → EvolutionAgent(history) → H'
11
+ * Outer loop: HarnessEvolution × N tasks → MetaEvolutionAgent(history) → Λ'
12
+ *
13
+ * This module is types-only. Runtime lives in evolution-loop.ts and
14
+ * meta-evolution.ts. No imports from heavy modules so it can be loaded by
15
+ * tools, tests, and remote runners cheaply.
16
+ */
17
+ /** Identifier for a single task instance the worker is being optimized against. */
18
+ export interface Task {
19
+ id: string;
20
+ /** Concrete instructions the worker reads. */
21
+ instructions: string;
22
+ /** Verifiable success criteria — the evaluator's checklist. */
23
+ acceptance: string[];
24
+ /** Optional free-form metadata (domain, expected runtime, etc.). */
25
+ meta?: Record<string, unknown>;
26
+ }
27
+ /**
28
+ * The harness — every piece of code/config that surrounds the model.
29
+ * Kept as data so it can be diffed, persisted, and rewritten by EvolutionAgent.
30
+ */
31
+ export interface Harness {
32
+ /** Stable identifier; new id on every evolution step. */
33
+ id: string;
34
+ systemPrompt: string;
35
+ /** Tool names the worker is allowed to call. */
36
+ toolAllowlist: string[];
37
+ /** Hooks/middleware applied around tool calls. */
38
+ hooks: HookSpec[];
39
+ /** Model routing — which model handles which subtask kind. */
40
+ modelRouting: ModelRoute[];
41
+ /** Loop hyperparameters: max iterations, parallelism, revert thresholds. */
42
+ hyperparams: Hyperparams;
43
+ }
44
+ export interface HookSpec {
45
+ name: string;
46
+ /** When to fire: before tool call, after, on error, etc. */
47
+ phase: 'pre-tool' | 'post-tool' | 'on-error' | 'pre-response';
48
+ /** Free-form config; the runtime resolves to actual hook code. */
49
+ config?: Record<string, unknown>;
50
+ }
51
+ export interface ModelRoute {
52
+ /** Pattern match on task kind / tool / phase. */
53
+ match: string;
54
+ model: string;
55
+ temperature?: number;
56
+ maxTokens?: number;
57
+ }
58
+ export interface Hyperparams {
59
+ maxIterations: number;
60
+ /** Stop early if score >= this on successive iterations. */
61
+ earlyStopScore?: number;
62
+ /** If a step regresses by more than this, revert to best. */
63
+ revertThreshold?: number;
64
+ }
65
+ /** Trace produced by Worker.execute() for the Evaluator to inspect. */
66
+ export interface ExecutionTrace {
67
+ taskId: string;
68
+ harnessId: string;
69
+ steps: TraceStep[];
70
+ finalState: Record<string, unknown>;
71
+ llmTimeMs: number;
72
+ toolTimeMs: number;
73
+ }
74
+ export interface TraceStep {
75
+ index: number;
76
+ phase: 'plan' | 'tool' | 'response' | 'observe';
77
+ action: string;
78
+ output?: string;
79
+ error?: string;
80
+ durationMs: number;
81
+ }
82
+ /** Evaluator output: pass/fail + score + diagnostic narrative. */
83
+ export interface EvaluationReport {
84
+ taskId: string;
85
+ harnessId: string;
86
+ pass: boolean;
87
+ /** Two-tier score: pass yields 1.0, scaled by efficiency tiebreaker. */
88
+ score: number;
89
+ /** Per-criterion verdict. Length matches Task.acceptance. */
90
+ criteriaResults: CriterionResult[];
91
+ /** Categorized failure modes for the Evolution Agent to act on. */
92
+ failureModes: FailureMode[];
93
+ /** Free-form diagnostic prose. */
94
+ notes?: string;
95
+ }
96
+ export interface CriterionResult {
97
+ criterion: string;
98
+ passed: boolean;
99
+ evidence?: string;
100
+ }
101
+ export type FailureModeKind = 'incorrect-tool-usage' | 'reasoning-loop' | 'misinterpreted-state' | 'excessive-latency' | 'missing-capability' | 'hallucinated-state' | 'other';
102
+ export interface FailureMode {
103
+ kind: FailureModeKind;
104
+ detail: string;
105
+ }
106
+ /** One row in the evolution history. */
107
+ export interface EvolutionRecord {
108
+ iteration: number;
109
+ harness: Harness;
110
+ trace: ExecutionTrace;
111
+ report: EvaluationReport;
112
+ verdict: 'improved' | 'regressed' | 'no-op';
113
+ }
114
+ /** Final result of a single inner-loop run. */
115
+ export interface EvolutionResult {
116
+ taskId: string;
117
+ bestHarness: Harness;
118
+ bestScore: number;
119
+ history: EvolutionRecord[];
120
+ }
121
+ /** Worker = the agent under optimization, parameterized by harness. */
122
+ export interface Worker {
123
+ execute(task: Task, harness: Harness): Promise<ExecutionTrace>;
124
+ }
125
+ /** Evaluator = adversarial reviewer; produces EvaluationReport. */
126
+ export interface Evaluator {
127
+ evaluate(trace: ExecutionTrace, task: Task): Promise<EvaluationReport>;
128
+ }
129
+ /** EvolutionAgent = mutates the harness based on history. */
130
+ export interface EvolutionAgent {
131
+ evolve(history: EvolutionRecord[], best: Harness): Promise<Harness>;
132
+ }
133
+ /** Λ — the evolution protocol itself. */
134
+ export interface EvolutionProtocol {
135
+ worker: Worker;
136
+ evaluator: Evaluator;
137
+ evolution: EvolutionAgent;
138
+ initialHarness: Harness;
139
+ hyperparams: Hyperparams;
140
+ }
141
+ /** Outer-loop result. */
142
+ export interface MetaResult {
143
+ bestProtocol: EvolutionProtocol;
144
+ bestMetaScore: number;
145
+ perTask: EvolutionResult[];
146
+ }
147
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Harness Evolution Loop — type definitions.
3
+ *
4
+ * Maps onto the formalism from "The Last Harness You'll Ever Build"
5
+ * (Seong, Yin, Zhang — Sylph.AI, arXiv:2604.21003):
6
+ *
7
+ * Agent = Model + Harness
8
+ * Harness = prompts + tools + orchestration + hooks + model config
9
+ *
10
+ * Inner loop: Worker(τ) → Evaluator(τ) → EvolutionAgent(history) → H'
11
+ * Outer loop: HarnessEvolution × N tasks → MetaEvolutionAgent(history) → Λ'
12
+ *
13
+ * This module is types-only. Runtime lives in evolution-loop.ts and
14
+ * meta-evolution.ts. No imports from heavy modules so it can be loaded by
15
+ * tools, tests, and remote runners cheaply.
16
+ */
17
+ export {};
18
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1,16 @@
1
+ /**
2
+ * kbot v5 futures — experimental architectural skeleton.
3
+ *
4
+ * Six modules drawn from frontier research published in late April 2026.
5
+ * Each is opt-in, additive, and reversible. None of them changes default
6
+ * agent behavior unless explicitly invoked.
7
+ *
8
+ * Plan: packages/kbot/V5_FUTURES_PLAN.md
9
+ */
10
+ export * as harness from './harness/index.js';
11
+ export * as skillGraph from './skill-graph/index.js';
12
+ export * as latentState from './latent-state/index.js';
13
+ export * as forecast from './forecast/index.js';
14
+ export * as persona from './persona/index.js';
15
+ export * as debate from './debate/index.js';
16
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,22 @@
1
+ /**
2
+ * kbot v5 futures — experimental architectural skeleton.
3
+ *
4
+ * Six modules drawn from frontier research published in late April 2026.
5
+ * Each is opt-in, additive, and reversible. None of them changes default
6
+ * agent behavior unless explicitly invoked.
7
+ *
8
+ * Plan: packages/kbot/V5_FUTURES_PLAN.md
9
+ */
10
+ // Harness Evolution Loop — Sylph.AI 2604.21003
11
+ export * as harness from './harness/index.js';
12
+ // Skill Graph — Tencent Hunyuan 2604.25727
13
+ export * as skillGraph from './skill-graph/index.js';
14
+ // Latent State Envelope — Recursive MAS 2604.25917
15
+ export * as latentState from './latent-state/index.js';
16
+ // Forecast — predictions module
17
+ export * as forecast from './forecast/index.js';
18
+ // Persona — privilege scoping (Cequence)
19
+ export * as persona from './persona/index.js';
20
+ // Debate — BARRED-style asymmetric debate runner
21
+ export * as debate from './debate/index.js';
22
+ //# sourceMappingURL=index.js.map