@kernel.chat/kbot 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/cache-warmth.d.ts +25 -0
- package/dist/cache-warmth.js +131 -0
- package/dist/futures/debate/index.d.ts +7 -0
- package/dist/futures/debate/index.js +6 -0
- package/dist/futures/debate/runner.d.ts +34 -0
- package/dist/futures/debate/runner.js +140 -0
- package/dist/futures/debate/synthesis.d.ts +25 -0
- package/dist/futures/debate/synthesis.js +81 -0
- package/dist/futures/debate/types.d.ts +72 -0
- package/dist/futures/debate/types.js +12 -0
- package/dist/futures/forecast/index.d.ts +5 -0
- package/dist/futures/forecast/index.js +5 -0
- package/dist/futures/forecast/projection.d.ts +31 -0
- package/dist/futures/forecast/projection.js +177 -0
- package/dist/futures/forecast/synthesize.d.ts +19 -0
- package/dist/futures/forecast/synthesize.js +89 -0
- package/dist/futures/forecast/types.d.ts +59 -0
- package/dist/futures/forecast/types.js +15 -0
- package/dist/futures/harness/critic-evaluator.d.ts +39 -0
- package/dist/futures/harness/critic-evaluator.js +131 -0
- package/dist/futures/harness/evolution-loop.d.ts +41 -0
- package/dist/futures/harness/evolution-loop.js +168 -0
- package/dist/futures/harness/index.d.ts +16 -0
- package/dist/futures/harness/index.js +13 -0
- package/dist/futures/harness/meta-evolution.d.ts +32 -0
- package/dist/futures/harness/meta-evolution.js +52 -0
- package/dist/futures/harness/noop-evolution.d.ts +23 -0
- package/dist/futures/harness/noop-evolution.js +29 -0
- package/dist/futures/harness/persistence.d.ts +30 -0
- package/dist/futures/harness/persistence.js +99 -0
- package/dist/futures/harness/types.d.ts +147 -0
- package/dist/futures/harness/types.js +18 -0
- package/dist/futures/index.d.ts +16 -0
- package/dist/futures/index.js +22 -0
- package/dist/futures/latent-state/envelope.d.ts +39 -0
- package/dist/futures/latent-state/envelope.js +178 -0
- package/dist/futures/latent-state/index.d.ts +5 -0
- package/dist/futures/latent-state/index.js +3 -0
- package/dist/futures/latent-state/types.d.ts +47 -0
- package/dist/futures/latent-state/types.js +13 -0
- package/dist/futures/persona/check.d.ts +45 -0
- package/dist/futures/persona/check.js +205 -0
- package/dist/futures/persona/index.d.ts +5 -0
- package/dist/futures/persona/index.js +5 -0
- package/dist/futures/persona/registry.d.ts +22 -0
- package/dist/futures/persona/registry.js +124 -0
- package/dist/futures/persona/types.d.ts +68 -0
- package/dist/futures/persona/types.js +28 -0
- package/dist/futures/skill-graph/graph.d.ts +31 -0
- package/dist/futures/skill-graph/graph.js +151 -0
- package/dist/futures/skill-graph/index.d.ts +13 -0
- package/dist/futures/skill-graph/index.js +10 -0
- package/dist/futures/skill-graph/synthesis.d.ts +20 -0
- package/dist/futures/skill-graph/synthesis.js +83 -0
- package/dist/futures/skill-graph/types.d.ts +53 -0
- package/dist/futures/skill-graph/types.js +19 -0
- package/dist/streaming.js +18 -0
- package/package.json +1 -1
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Evolution Loop — inner loop (Algorithm 1 from Sylph).
|
|
3
|
+
*
|
|
4
|
+
* for i in 1..maxIterations:
|
|
5
|
+
* trace = Worker.execute(task, harness)
|
|
6
|
+
* report = Evaluator.evaluate(trace, task)
|
|
7
|
+
* record = { iteration, harness, trace, report, verdict }
|
|
8
|
+
* history.push(record)
|
|
9
|
+
* if report.score > bestScore: best = harness
|
|
10
|
+
* if earlyStopScore reached on consecutive iterations: stop
|
|
11
|
+
* if regression > revertThreshold: revert harness to best
|
|
12
|
+
* harness = EvolutionAgent.evolve(history, best)
|
|
13
|
+
*
|
|
14
|
+
* Pure orchestration — Worker / Evaluator / EvolutionAgent are injected,
|
|
15
|
+
* which makes the whole loop deterministic and testable with stub
|
|
16
|
+
* implementations. No LLM calls happen here directly.
|
|
17
|
+
*/
|
|
18
|
+
import { appendTrace } from './persistence.js';
|
|
19
|
+
function compareVerdict(prev, current, revertThreshold) {
|
|
20
|
+
if (current > prev)
|
|
21
|
+
return 'improved';
|
|
22
|
+
if (current === prev)
|
|
23
|
+
return 'no-op';
|
|
24
|
+
if (revertThreshold !== undefined && (prev - current) >= revertThreshold) {
|
|
25
|
+
// A "regressed" verdict signals the loop should revert to best harness.
|
|
26
|
+
return 'regressed';
|
|
27
|
+
}
|
|
28
|
+
return 'regressed';
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Run the inner Harness Evolution Loop against a single task.
|
|
32
|
+
*
|
|
33
|
+
* Always returns an `EvolutionResult` — never throws on Worker / Evaluator
|
|
34
|
+
* exceptions; instead, records a failure step and continues. (The
|
|
35
|
+
* Evaluator is supposed to score failures, not the loop itself.)
|
|
36
|
+
*/
|
|
37
|
+
export async function runEvolutionLoop(protocol, task, options = {}) {
|
|
38
|
+
const { worker, evaluator, evolution, initialHarness, hyperparams } = protocol;
|
|
39
|
+
const maxIterations = Math.max(1, hyperparams.maxIterations | 0);
|
|
40
|
+
const earlyStopScore = hyperparams.earlyStopScore;
|
|
41
|
+
const revertThreshold = hyperparams.revertThreshold;
|
|
42
|
+
const earlyStopStreak = Math.max(1, options.earlyStopStreak ?? 1);
|
|
43
|
+
const shouldPersist = options.persist !== false && !!options.persistDir;
|
|
44
|
+
const history = [];
|
|
45
|
+
let harness = initialHarness;
|
|
46
|
+
let bestHarness = initialHarness;
|
|
47
|
+
let bestScore = -Infinity;
|
|
48
|
+
let prevScore = -Infinity;
|
|
49
|
+
let earlyHits = 0;
|
|
50
|
+
for (let iteration = 1; iteration <= maxIterations; iteration++) {
|
|
51
|
+
let trace;
|
|
52
|
+
try {
|
|
53
|
+
trace = await worker.execute(task, harness);
|
|
54
|
+
}
|
|
55
|
+
catch (err) {
|
|
56
|
+
// Synthesize a minimal failure trace so the evaluator can still grade.
|
|
57
|
+
trace = {
|
|
58
|
+
taskId: task.id,
|
|
59
|
+
harnessId: harness.id,
|
|
60
|
+
steps: [
|
|
61
|
+
{
|
|
62
|
+
index: 0,
|
|
63
|
+
phase: 'observe',
|
|
64
|
+
action: 'worker-error',
|
|
65
|
+
error: err instanceof Error ? err.message : String(err),
|
|
66
|
+
durationMs: 0,
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
finalState: {},
|
|
70
|
+
llmTimeMs: 0,
|
|
71
|
+
toolTimeMs: 0,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
let report;
|
|
75
|
+
try {
|
|
76
|
+
report = await evaluator.evaluate(trace, task);
|
|
77
|
+
}
|
|
78
|
+
catch (err) {
|
|
79
|
+
report = {
|
|
80
|
+
taskId: task.id,
|
|
81
|
+
harnessId: harness.id,
|
|
82
|
+
pass: false,
|
|
83
|
+
score: 0,
|
|
84
|
+
criteriaResults: task.acceptance.map((c) => ({
|
|
85
|
+
criterion: c,
|
|
86
|
+
passed: false,
|
|
87
|
+
evidence: 'evaluator-error',
|
|
88
|
+
})),
|
|
89
|
+
failureModes: [
|
|
90
|
+
{
|
|
91
|
+
kind: 'other',
|
|
92
|
+
detail: err instanceof Error ? err.message : String(err),
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
notes: 'evaluator threw; auto-fail',
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
const verdict = compareVerdict(prevScore, report.score, revertThreshold);
|
|
99
|
+
const record = {
|
|
100
|
+
iteration,
|
|
101
|
+
harness,
|
|
102
|
+
trace,
|
|
103
|
+
report,
|
|
104
|
+
verdict,
|
|
105
|
+
};
|
|
106
|
+
history.push(record);
|
|
107
|
+
if (options.onRecord) {
|
|
108
|
+
await options.onRecord(record);
|
|
109
|
+
}
|
|
110
|
+
if (shouldPersist) {
|
|
111
|
+
try {
|
|
112
|
+
await appendTrace(task.id, record, options.persistDir);
|
|
113
|
+
}
|
|
114
|
+
catch {
|
|
115
|
+
// persistence is best-effort; never block evolution on disk failure
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
// Track best harness.
|
|
119
|
+
if (report.score > bestScore) {
|
|
120
|
+
bestScore = report.score;
|
|
121
|
+
bestHarness = harness;
|
|
122
|
+
}
|
|
123
|
+
// Early-stop check.
|
|
124
|
+
if (earlyStopScore !== undefined && report.score >= earlyStopScore) {
|
|
125
|
+
earlyHits++;
|
|
126
|
+
if (earlyHits >= earlyStopStreak) {
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
earlyHits = 0;
|
|
132
|
+
}
|
|
133
|
+
// Revert on regression past threshold.
|
|
134
|
+
if (revertThreshold !== undefined &&
|
|
135
|
+
bestScore - report.score >= revertThreshold) {
|
|
136
|
+
harness = bestHarness;
|
|
137
|
+
}
|
|
138
|
+
prevScore = report.score;
|
|
139
|
+
// Short-circuit before the final evolve call — no point mutating on the
|
|
140
|
+
// last iteration since we'll never execute the new harness.
|
|
141
|
+
if (iteration === maxIterations)
|
|
142
|
+
break;
|
|
143
|
+
try {
|
|
144
|
+
harness = await evolution.evolve(history, bestHarness);
|
|
145
|
+
}
|
|
146
|
+
catch {
|
|
147
|
+
// EvolutionAgent failed — keep current harness, keep going.
|
|
148
|
+
harness = bestHarness;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
// If nothing ran (shouldn't happen with maxIterations >= 1) make sure
|
|
152
|
+
// bestHarness is still defined.
|
|
153
|
+
if (history.length === 0) {
|
|
154
|
+
return {
|
|
155
|
+
taskId: task.id,
|
|
156
|
+
bestHarness: initialHarness,
|
|
157
|
+
bestScore: 0,
|
|
158
|
+
history,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
return {
|
|
162
|
+
taskId: task.id,
|
|
163
|
+
bestHarness,
|
|
164
|
+
bestScore: bestScore === -Infinity ? 0 : bestScore,
|
|
165
|
+
history,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
//# sourceMappingURL=evolution-loop.js.map
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Evolution Loop — public surface.
|
|
3
|
+
*
|
|
4
|
+
* See `./README.md` for the high-level overview and `./types.ts` for the
|
|
5
|
+
* contract every other file in this directory targets.
|
|
6
|
+
*/
|
|
7
|
+
export * from './types.js';
|
|
8
|
+
export { runEvolutionLoop } from './evolution-loop.js';
|
|
9
|
+
export type { RunOptions } from './evolution-loop.js';
|
|
10
|
+
export { runMetaEvolution } from './meta-evolution.js';
|
|
11
|
+
export type { MetaOptions } from './meta-evolution.js';
|
|
12
|
+
export { CriticEvaluator, createCriticEvaluator, } from './critic-evaluator.js';
|
|
13
|
+
export type { CriticEvaluatorOpts } from './critic-evaluator.js';
|
|
14
|
+
export { NoopEvolutionAgent, createNoopEvolutionAgent, } from './noop-evolution.js';
|
|
15
|
+
export { appendTrace, readHistory, pruneOlderThan, defaultStateDir, } from './persistence.js';
|
|
16
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Evolution Loop — public surface.
|
|
3
|
+
*
|
|
4
|
+
* See `./README.md` for the high-level overview and `./types.ts` for the
|
|
5
|
+
* contract every other file in this directory targets.
|
|
6
|
+
*/
|
|
7
|
+
export * from './types.js';
|
|
8
|
+
export { runEvolutionLoop } from './evolution-loop.js';
|
|
9
|
+
export { runMetaEvolution } from './meta-evolution.js';
|
|
10
|
+
export { CriticEvaluator, createCriticEvaluator, } from './critic-evaluator.js';
|
|
11
|
+
export { NoopEvolutionAgent, createNoopEvolutionAgent, } from './noop-evolution.js';
|
|
12
|
+
export { appendTrace, readHistory, pruneOlderThan, defaultStateDir, } from './persistence.js';
|
|
13
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Meta-Evolution — outer loop (Algorithm 2 from Sylph).
|
|
3
|
+
*
|
|
4
|
+
* The inner loop optimizes one harness against one task. The outer loop
|
|
5
|
+
* runs the inner loop across a portfolio of tasks, aggregating per-task
|
|
6
|
+
* results and selecting the best protocol overall. Currently the
|
|
7
|
+
* "selection" step is averaging — when a real MetaEvolutionAgent ships,
|
|
8
|
+
* it'll consume the perTask EvolutionResult[] and propose protocol
|
|
9
|
+
* mutations.
|
|
10
|
+
*
|
|
11
|
+
* Pure orchestration. Tasks are run sequentially to keep the trace
|
|
12
|
+
* ordering deterministic; parallelism is a future concern.
|
|
13
|
+
*/
|
|
14
|
+
import type { EvolutionProtocol, EvolutionResult, MetaResult, Task } from './types.js';
|
|
15
|
+
import { type RunOptions } from './evolution-loop.js';
|
|
16
|
+
export interface MetaOptions extends RunOptions {
|
|
17
|
+
/** Called after each task's inner loop completes. */
|
|
18
|
+
onTaskComplete?: (result: EvolutionResult) => void | Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* When `true`, abort the outer loop on the first task whose best score
|
|
21
|
+
* is below `failBelow`. Default false — always run the full portfolio.
|
|
22
|
+
*/
|
|
23
|
+
abortOnFailure?: boolean;
|
|
24
|
+
failBelow?: number;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Run the inner Evolution Loop across a portfolio of tasks, returning
|
|
28
|
+
* the best protocol (currently always the input protocol — there is no
|
|
29
|
+
* MetaEvolutionAgent yet) plus per-task results and the aggregate score.
|
|
30
|
+
*/
|
|
31
|
+
export declare function runMetaEvolution(protocol: EvolutionProtocol, tasks: Task[], options?: MetaOptions): Promise<MetaResult>;
|
|
32
|
+
//# sourceMappingURL=meta-evolution.d.ts.map
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Meta-Evolution — outer loop (Algorithm 2 from Sylph).
|
|
3
|
+
*
|
|
4
|
+
* The inner loop optimizes one harness against one task. The outer loop
|
|
5
|
+
* runs the inner loop across a portfolio of tasks, aggregating per-task
|
|
6
|
+
* results and selecting the best protocol overall. Currently the
|
|
7
|
+
* "selection" step is averaging — when a real MetaEvolutionAgent ships,
|
|
8
|
+
* it'll consume the perTask EvolutionResult[] and propose protocol
|
|
9
|
+
* mutations.
|
|
10
|
+
*
|
|
11
|
+
* Pure orchestration. Tasks are run sequentially to keep the trace
|
|
12
|
+
* ordering deterministic; parallelism is a future concern.
|
|
13
|
+
*/
|
|
14
|
+
import { runEvolutionLoop } from './evolution-loop.js';
|
|
15
|
+
/**
|
|
16
|
+
* Run the inner Evolution Loop across a portfolio of tasks, returning
|
|
17
|
+
* the best protocol (currently always the input protocol — there is no
|
|
18
|
+
* MetaEvolutionAgent yet) plus per-task results and the aggregate score.
|
|
19
|
+
*/
|
|
20
|
+
export async function runMetaEvolution(protocol, tasks, options = {}) {
|
|
21
|
+
if (tasks.length === 0) {
|
|
22
|
+
return {
|
|
23
|
+
bestProtocol: protocol,
|
|
24
|
+
bestMetaScore: 0,
|
|
25
|
+
perTask: [],
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
const perTask = [];
|
|
29
|
+
let scoreSum = 0;
|
|
30
|
+
for (const task of tasks) {
|
|
31
|
+
const result = await runEvolutionLoop(protocol, task, options);
|
|
32
|
+
perTask.push(result);
|
|
33
|
+
scoreSum += result.bestScore;
|
|
34
|
+
if (options.onTaskComplete) {
|
|
35
|
+
await options.onTaskComplete(result);
|
|
36
|
+
}
|
|
37
|
+
if (options.abortOnFailure &&
|
|
38
|
+
options.failBelow !== undefined &&
|
|
39
|
+
result.bestScore < options.failBelow) {
|
|
40
|
+
break;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
// Aggregate score: mean of per-task best scores. Cheap, defensible, and
|
|
44
|
+
// matches the "average across tasks" framing in the Sylph outer loop.
|
|
45
|
+
const meanScore = perTask.length > 0 ? scoreSum / perTask.length : 0;
|
|
46
|
+
return {
|
|
47
|
+
bestProtocol: protocol,
|
|
48
|
+
bestMetaScore: meanScore,
|
|
49
|
+
perTask,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
//# sourceMappingURL=meta-evolution.js.map
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* No-op EvolutionAgent — records but never rewrites the harness.
|
|
3
|
+
*
|
|
4
|
+
* The Sylph paper's outer-loop value comes from the EvolutionAgent
|
|
5
|
+
* mutating the harness based on history. Real, safe code rewriting is a
|
|
6
|
+
* multi-month problem; this stub satisfies the interface so the inner
|
|
7
|
+
* loop runs end-to-end. Every other piece of the substrate (trace
|
|
8
|
+
* persistence, regression detection, A/B evaluation, harness diffing)
|
|
9
|
+
* works without any actual mutation.
|
|
10
|
+
*
|
|
11
|
+
* The contract is met; the substrate is shipped.
|
|
12
|
+
*/
|
|
13
|
+
import type { EvolutionAgent, EvolutionRecord, Harness } from './types.js';
|
|
14
|
+
export declare class NoopEvolutionAgent implements EvolutionAgent {
|
|
15
|
+
/**
|
|
16
|
+
* Returns the input harness unchanged. Reads `history` only to allow
|
|
17
|
+
* subclasses to subscribe to inspection without forcing a re-read.
|
|
18
|
+
*/
|
|
19
|
+
evolve(history: EvolutionRecord[], best: Harness): Promise<Harness>;
|
|
20
|
+
}
|
|
21
|
+
/** Convenience factory mirroring the rest of the futures module style. */
|
|
22
|
+
export declare function createNoopEvolutionAgent(): EvolutionAgent;
|
|
23
|
+
//# sourceMappingURL=noop-evolution.d.ts.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* No-op EvolutionAgent — records but never rewrites the harness.
|
|
3
|
+
*
|
|
4
|
+
* The Sylph paper's outer-loop value comes from the EvolutionAgent
|
|
5
|
+
* mutating the harness based on history. Real, safe code rewriting is a
|
|
6
|
+
* multi-month problem; this stub satisfies the interface so the inner
|
|
7
|
+
* loop runs end-to-end. Every other piece of the substrate (trace
|
|
8
|
+
* persistence, regression detection, A/B evaluation, harness diffing)
|
|
9
|
+
* works without any actual mutation.
|
|
10
|
+
*
|
|
11
|
+
* The contract is met; the substrate is shipped.
|
|
12
|
+
*/
|
|
13
|
+
export class NoopEvolutionAgent {
|
|
14
|
+
/**
|
|
15
|
+
* Returns the input harness unchanged. Reads `history` only to allow
|
|
16
|
+
* subclasses to subscribe to inspection without forcing a re-read.
|
|
17
|
+
*/
|
|
18
|
+
async evolve(history, best) {
|
|
19
|
+
// Touch parameters explicitly so the type-checker stays happy in strict
|
|
20
|
+
// mode and the intent — "we saw history, we chose not to act" — is clear.
|
|
21
|
+
void history;
|
|
22
|
+
return best;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
/** Convenience factory mirroring the rest of the futures module style. */
|
|
26
|
+
export function createNoopEvolutionAgent() {
|
|
27
|
+
return new NoopEvolutionAgent();
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=noop-evolution.js.map
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Evolution Loop — JSONL trace persistence.
|
|
3
|
+
*
|
|
4
|
+
* Each task gets its own append-only JSONL file at
|
|
5
|
+
* `~/.kbot/futures/harness/<task-id>.jsonl`. One line per `EvolutionRecord`
|
|
6
|
+
* (or arbitrary JSON-serializable record). Append-only on the hot path so
|
|
7
|
+
* concurrent loops don't trample each other; reads parse line-by-line and
|
|
8
|
+
* skip malformed lines rather than throwing on a single bad row.
|
|
9
|
+
*
|
|
10
|
+
* Pattern mirrors `src/planner/hierarchical/persistence.ts`: state dir is
|
|
11
|
+
* configurable (default `~/.kbot/futures/harness`), atomic writes where
|
|
12
|
+
* possible, ENOENT swallowed on read paths.
|
|
13
|
+
*/
|
|
14
|
+
import type { EvolutionRecord } from './types.js';
|
|
15
|
+
/** Default on-disk root: `~/.kbot/futures/harness/`. */
|
|
16
|
+
export declare function defaultStateDir(): string;
|
|
17
|
+
/** Append a single record as one JSONL line. */
|
|
18
|
+
export declare function appendTrace(taskId: string, record: EvolutionRecord, stateDir?: string): Promise<void>;
|
|
19
|
+
/**
|
|
20
|
+
* Read all records for a task in append order. Returns empty array if the
|
|
21
|
+
* file doesn't exist. Malformed lines are skipped — one bad row never
|
|
22
|
+
* invalidates the whole history.
|
|
23
|
+
*/
|
|
24
|
+
export declare function readHistory(taskId: string, stateDir?: string): Promise<EvolutionRecord[]>;
|
|
25
|
+
/**
|
|
26
|
+
* Delete trace files older than `days` (by mtime). Returns the list of
|
|
27
|
+
* removed task ids. Pure janitor — never throws on individual failures.
|
|
28
|
+
*/
|
|
29
|
+
export declare function pruneOlderThan(days: number, stateDir?: string): Promise<string[]>;
|
|
30
|
+
//# sourceMappingURL=persistence.d.ts.map
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Evolution Loop — JSONL trace persistence.
|
|
3
|
+
*
|
|
4
|
+
* Each task gets its own append-only JSONL file at
|
|
5
|
+
* `~/.kbot/futures/harness/<task-id>.jsonl`. One line per `EvolutionRecord`
|
|
6
|
+
* (or arbitrary JSON-serializable record). Append-only on the hot path so
|
|
7
|
+
* concurrent loops don't trample each other; reads parse line-by-line and
|
|
8
|
+
* skip malformed lines rather than throwing on a single bad row.
|
|
9
|
+
*
|
|
10
|
+
* Pattern mirrors `src/planner/hierarchical/persistence.ts`: state dir is
|
|
11
|
+
* configurable (default `~/.kbot/futures/harness`), atomic writes where
|
|
12
|
+
* possible, ENOENT swallowed on read paths.
|
|
13
|
+
*/
|
|
14
|
+
import { promises as fs } from 'node:fs';
|
|
15
|
+
import * as os from 'node:os';
|
|
16
|
+
import * as path from 'node:path';
|
|
17
|
+
/** Default on-disk root: `~/.kbot/futures/harness/`. */
|
|
18
|
+
export function defaultStateDir() {
|
|
19
|
+
return path.join(os.homedir(), '.kbot', 'futures', 'harness');
|
|
20
|
+
}
|
|
21
|
+
function safeId(taskId) {
|
|
22
|
+
// Restrict to filesystem-safe chars; collapse anything else to '_'.
|
|
23
|
+
return taskId.replace(/[^a-zA-Z0-9._-]/g, '_');
|
|
24
|
+
}
|
|
25
|
+
function tracePath(stateDir, taskId) {
|
|
26
|
+
return path.join(stateDir, `${safeId(taskId)}.jsonl`);
|
|
27
|
+
}
|
|
28
|
+
async function ensureDir(dir) {
|
|
29
|
+
await fs.mkdir(dir, { recursive: true });
|
|
30
|
+
}
|
|
31
|
+
/** Append a single record as one JSONL line. */
|
|
32
|
+
export async function appendTrace(taskId, record, stateDir = defaultStateDir()) {
|
|
33
|
+
await ensureDir(stateDir);
|
|
34
|
+
const line = JSON.stringify(record) + '\n';
|
|
35
|
+
await fs.appendFile(tracePath(stateDir, taskId), line, 'utf8');
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Read all records for a task in append order. Returns empty array if the
|
|
39
|
+
* file doesn't exist. Malformed lines are skipped — one bad row never
|
|
40
|
+
* invalidates the whole history.
|
|
41
|
+
*/
|
|
42
|
+
export async function readHistory(taskId, stateDir = defaultStateDir()) {
|
|
43
|
+
let raw;
|
|
44
|
+
try {
|
|
45
|
+
raw = await fs.readFile(tracePath(stateDir, taskId), 'utf8');
|
|
46
|
+
}
|
|
47
|
+
catch (err) {
|
|
48
|
+
if (err.code === 'ENOENT')
|
|
49
|
+
return [];
|
|
50
|
+
throw err;
|
|
51
|
+
}
|
|
52
|
+
const out = [];
|
|
53
|
+
for (const line of raw.split('\n')) {
|
|
54
|
+
const trimmed = line.trim();
|
|
55
|
+
if (!trimmed)
|
|
56
|
+
continue;
|
|
57
|
+
try {
|
|
58
|
+
out.push(JSON.parse(trimmed));
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
// skip malformed line
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return out;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Delete trace files older than `days` (by mtime). Returns the list of
|
|
68
|
+
* removed task ids. Pure janitor — never throws on individual failures.
|
|
69
|
+
*/
|
|
70
|
+
export async function pruneOlderThan(days, stateDir = defaultStateDir()) {
|
|
71
|
+
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
|
|
72
|
+
let entries;
|
|
73
|
+
try {
|
|
74
|
+
entries = await fs.readdir(stateDir);
|
|
75
|
+
}
|
|
76
|
+
catch (err) {
|
|
77
|
+
if (err.code === 'ENOENT')
|
|
78
|
+
return [];
|
|
79
|
+
throw err;
|
|
80
|
+
}
|
|
81
|
+
const removed = [];
|
|
82
|
+
for (const entry of entries) {
|
|
83
|
+
if (!entry.endsWith('.jsonl'))
|
|
84
|
+
continue;
|
|
85
|
+
const full = path.join(stateDir, entry);
|
|
86
|
+
try {
|
|
87
|
+
const stat = await fs.stat(full);
|
|
88
|
+
if (stat.mtimeMs < cutoff) {
|
|
89
|
+
await fs.unlink(full);
|
|
90
|
+
removed.push(entry.replace(/\.jsonl$/, ''));
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch {
|
|
94
|
+
// skip — permission, race, etc.
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return removed;
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=persistence.js.map
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Evolution Loop — type definitions.
|
|
3
|
+
*
|
|
4
|
+
* Maps onto the formalism from "The Last Harness You'll Ever Build"
|
|
5
|
+
* (Seong, Yin, Zhang — Sylph.AI, arXiv:2604.21003):
|
|
6
|
+
*
|
|
7
|
+
* Agent = Model + Harness
|
|
8
|
+
* Harness = prompts + tools + orchestration + hooks + model config
|
|
9
|
+
*
|
|
10
|
+
* Inner loop: Worker(τ) → Evaluator(τ) → EvolutionAgent(history) → H'
|
|
11
|
+
* Outer loop: HarnessEvolution × N tasks → MetaEvolutionAgent(history) → Λ'
|
|
12
|
+
*
|
|
13
|
+
* This module is types-only. Runtime lives in evolution-loop.ts and
|
|
14
|
+
* meta-evolution.ts. No imports from heavy modules so it can be loaded by
|
|
15
|
+
* tools, tests, and remote runners cheaply.
|
|
16
|
+
*/
|
|
17
|
+
/** Identifier for a single task instance the worker is being optimized against. */
|
|
18
|
+
export interface Task {
|
|
19
|
+
id: string;
|
|
20
|
+
/** Concrete instructions the worker reads. */
|
|
21
|
+
instructions: string;
|
|
22
|
+
/** Verifiable success criteria — the evaluator's checklist. */
|
|
23
|
+
acceptance: string[];
|
|
24
|
+
/** Optional free-form metadata (domain, expected runtime, etc.). */
|
|
25
|
+
meta?: Record<string, unknown>;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* The harness — every piece of code/config that surrounds the model.
|
|
29
|
+
* Kept as data so it can be diffed, persisted, and rewritten by EvolutionAgent.
|
|
30
|
+
*/
|
|
31
|
+
export interface Harness {
|
|
32
|
+
/** Stable identifier; new id on every evolution step. */
|
|
33
|
+
id: string;
|
|
34
|
+
systemPrompt: string;
|
|
35
|
+
/** Tool names the worker is allowed to call. */
|
|
36
|
+
toolAllowlist: string[];
|
|
37
|
+
/** Hooks/middleware applied around tool calls. */
|
|
38
|
+
hooks: HookSpec[];
|
|
39
|
+
/** Model routing — which model handles which subtask kind. */
|
|
40
|
+
modelRouting: ModelRoute[];
|
|
41
|
+
/** Loop hyperparameters: max iterations, parallelism, revert thresholds. */
|
|
42
|
+
hyperparams: Hyperparams;
|
|
43
|
+
}
|
|
44
|
+
export interface HookSpec {
|
|
45
|
+
name: string;
|
|
46
|
+
/** When to fire: before tool call, after, on error, etc. */
|
|
47
|
+
phase: 'pre-tool' | 'post-tool' | 'on-error' | 'pre-response';
|
|
48
|
+
/** Free-form config; the runtime resolves to actual hook code. */
|
|
49
|
+
config?: Record<string, unknown>;
|
|
50
|
+
}
|
|
51
|
+
export interface ModelRoute {
|
|
52
|
+
/** Pattern match on task kind / tool / phase. */
|
|
53
|
+
match: string;
|
|
54
|
+
model: string;
|
|
55
|
+
temperature?: number;
|
|
56
|
+
maxTokens?: number;
|
|
57
|
+
}
|
|
58
|
+
export interface Hyperparams {
|
|
59
|
+
maxIterations: number;
|
|
60
|
+
/** Stop early if score >= this on successive iterations. */
|
|
61
|
+
earlyStopScore?: number;
|
|
62
|
+
/** If a step regresses by more than this, revert to best. */
|
|
63
|
+
revertThreshold?: number;
|
|
64
|
+
}
|
|
65
|
+
/** Trace produced by Worker.execute() for the Evaluator to inspect. */
|
|
66
|
+
export interface ExecutionTrace {
|
|
67
|
+
taskId: string;
|
|
68
|
+
harnessId: string;
|
|
69
|
+
steps: TraceStep[];
|
|
70
|
+
finalState: Record<string, unknown>;
|
|
71
|
+
llmTimeMs: number;
|
|
72
|
+
toolTimeMs: number;
|
|
73
|
+
}
|
|
74
|
+
export interface TraceStep {
|
|
75
|
+
index: number;
|
|
76
|
+
phase: 'plan' | 'tool' | 'response' | 'observe';
|
|
77
|
+
action: string;
|
|
78
|
+
output?: string;
|
|
79
|
+
error?: string;
|
|
80
|
+
durationMs: number;
|
|
81
|
+
}
|
|
82
|
+
/** Evaluator output: pass/fail + score + diagnostic narrative. */
|
|
83
|
+
export interface EvaluationReport {
|
|
84
|
+
taskId: string;
|
|
85
|
+
harnessId: string;
|
|
86
|
+
pass: boolean;
|
|
87
|
+
/** Two-tier score: pass yields 1.0, scaled by efficiency tiebreaker. */
|
|
88
|
+
score: number;
|
|
89
|
+
/** Per-criterion verdict. Length matches Task.acceptance. */
|
|
90
|
+
criteriaResults: CriterionResult[];
|
|
91
|
+
/** Categorized failure modes for the Evolution Agent to act on. */
|
|
92
|
+
failureModes: FailureMode[];
|
|
93
|
+
/** Free-form diagnostic prose. */
|
|
94
|
+
notes?: string;
|
|
95
|
+
}
|
|
96
|
+
export interface CriterionResult {
|
|
97
|
+
criterion: string;
|
|
98
|
+
passed: boolean;
|
|
99
|
+
evidence?: string;
|
|
100
|
+
}
|
|
101
|
+
export type FailureModeKind = 'incorrect-tool-usage' | 'reasoning-loop' | 'misinterpreted-state' | 'excessive-latency' | 'missing-capability' | 'hallucinated-state' | 'other';
|
|
102
|
+
export interface FailureMode {
|
|
103
|
+
kind: FailureModeKind;
|
|
104
|
+
detail: string;
|
|
105
|
+
}
|
|
106
|
+
/** One row in the evolution history. */
|
|
107
|
+
export interface EvolutionRecord {
|
|
108
|
+
iteration: number;
|
|
109
|
+
harness: Harness;
|
|
110
|
+
trace: ExecutionTrace;
|
|
111
|
+
report: EvaluationReport;
|
|
112
|
+
verdict: 'improved' | 'regressed' | 'no-op';
|
|
113
|
+
}
|
|
114
|
+
/** Final result of a single inner-loop run. */
|
|
115
|
+
export interface EvolutionResult {
|
|
116
|
+
taskId: string;
|
|
117
|
+
bestHarness: Harness;
|
|
118
|
+
bestScore: number;
|
|
119
|
+
history: EvolutionRecord[];
|
|
120
|
+
}
|
|
121
|
+
/** Worker = the agent under optimization, parameterized by harness. */
|
|
122
|
+
export interface Worker {
|
|
123
|
+
execute(task: Task, harness: Harness): Promise<ExecutionTrace>;
|
|
124
|
+
}
|
|
125
|
+
/** Evaluator = adversarial reviewer; produces EvaluationReport. */
|
|
126
|
+
export interface Evaluator {
|
|
127
|
+
evaluate(trace: ExecutionTrace, task: Task): Promise<EvaluationReport>;
|
|
128
|
+
}
|
|
129
|
+
/** EvolutionAgent = mutates the harness based on history. */
|
|
130
|
+
export interface EvolutionAgent {
|
|
131
|
+
evolve(history: EvolutionRecord[], best: Harness): Promise<Harness>;
|
|
132
|
+
}
|
|
133
|
+
/** Λ — the evolution protocol itself. */
|
|
134
|
+
export interface EvolutionProtocol {
|
|
135
|
+
worker: Worker;
|
|
136
|
+
evaluator: Evaluator;
|
|
137
|
+
evolution: EvolutionAgent;
|
|
138
|
+
initialHarness: Harness;
|
|
139
|
+
hyperparams: Hyperparams;
|
|
140
|
+
}
|
|
141
|
+
/** Outer-loop result. */
|
|
142
|
+
export interface MetaResult {
|
|
143
|
+
bestProtocol: EvolutionProtocol;
|
|
144
|
+
bestMetaScore: number;
|
|
145
|
+
perTask: EvolutionResult[];
|
|
146
|
+
}
|
|
147
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness Evolution Loop — type definitions.
|
|
3
|
+
*
|
|
4
|
+
* Maps onto the formalism from "The Last Harness You'll Ever Build"
|
|
5
|
+
* (Seong, Yin, Zhang — Sylph.AI, arXiv:2604.21003):
|
|
6
|
+
*
|
|
7
|
+
* Agent = Model + Harness
|
|
8
|
+
* Harness = prompts + tools + orchestration + hooks + model config
|
|
9
|
+
*
|
|
10
|
+
* Inner loop: Worker(τ) → Evaluator(τ) → EvolutionAgent(history) → H'
|
|
11
|
+
* Outer loop: HarnessEvolution × N tasks → MetaEvolutionAgent(history) → Λ'
|
|
12
|
+
*
|
|
13
|
+
* This module is types-only. Runtime lives in evolution-loop.ts and
|
|
14
|
+
* meta-evolution.ts. No imports from heavy modules so it can be loaded by
|
|
15
|
+
* tools, tests, and remote runners cheaply.
|
|
16
|
+
*/
|
|
17
|
+
export {};
|
|
18
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* kbot v5 futures — experimental architectural skeleton.
|
|
3
|
+
*
|
|
4
|
+
* Six modules drawn from frontier research published in late April 2026.
|
|
5
|
+
* Each is opt-in, additive, and reversible. None of them changes default
|
|
6
|
+
* agent behavior unless explicitly invoked.
|
|
7
|
+
*
|
|
8
|
+
* Plan: packages/kbot/V5_FUTURES_PLAN.md
|
|
9
|
+
*/
|
|
10
|
+
export * as harness from './harness/index.js';
|
|
11
|
+
export * as skillGraph from './skill-graph/index.js';
|
|
12
|
+
export * as latentState from './latent-state/index.js';
|
|
13
|
+
export * as forecast from './forecast/index.js';
|
|
14
|
+
export * as persona from './persona/index.js';
|
|
15
|
+
export * as debate from './debate/index.js';
|
|
16
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* kbot v5 futures — experimental architectural skeleton.
|
|
3
|
+
*
|
|
4
|
+
* Six modules drawn from frontier research published in late April 2026.
|
|
5
|
+
* Each is opt-in, additive, and reversible. None of them changes default
|
|
6
|
+
* agent behavior unless explicitly invoked.
|
|
7
|
+
*
|
|
8
|
+
* Plan: packages/kbot/V5_FUTURES_PLAN.md
|
|
9
|
+
*/
|
|
10
|
+
// Harness Evolution Loop — Sylph.AI 2604.21003
|
|
11
|
+
export * as harness from './harness/index.js';
|
|
12
|
+
// Skill Graph — Tencent Hunyuan 2604.25727
|
|
13
|
+
export * as skillGraph from './skill-graph/index.js';
|
|
14
|
+
// Latent State Envelope — Recursive MAS 2604.25917
|
|
15
|
+
export * as latentState from './latent-state/index.js';
|
|
16
|
+
// Forecast — predictions module
|
|
17
|
+
export * as forecast from './forecast/index.js';
|
|
18
|
+
// Persona — privilege scoping (Cequence)
|
|
19
|
+
export * as persona from './persona/index.js';
|
|
20
|
+
// Debate — BARRED-style asymmetric debate runner
|
|
21
|
+
export * as debate from './debate/index.js';
|
|
22
|
+
//# sourceMappingURL=index.js.map
|