@kernel.chat/kbot 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +6 -0
  2. package/dist/cache-warmth.d.ts +25 -0
  3. package/dist/cache-warmth.js +131 -0
  4. package/dist/futures/debate/index.d.ts +7 -0
  5. package/dist/futures/debate/index.js +6 -0
  6. package/dist/futures/debate/runner.d.ts +34 -0
  7. package/dist/futures/debate/runner.js +140 -0
  8. package/dist/futures/debate/synthesis.d.ts +25 -0
  9. package/dist/futures/debate/synthesis.js +81 -0
  10. package/dist/futures/debate/types.d.ts +72 -0
  11. package/dist/futures/debate/types.js +12 -0
  12. package/dist/futures/forecast/index.d.ts +5 -0
  13. package/dist/futures/forecast/index.js +5 -0
  14. package/dist/futures/forecast/projection.d.ts +31 -0
  15. package/dist/futures/forecast/projection.js +177 -0
  16. package/dist/futures/forecast/synthesize.d.ts +19 -0
  17. package/dist/futures/forecast/synthesize.js +89 -0
  18. package/dist/futures/forecast/types.d.ts +59 -0
  19. package/dist/futures/forecast/types.js +15 -0
  20. package/dist/futures/harness/critic-evaluator.d.ts +39 -0
  21. package/dist/futures/harness/critic-evaluator.js +131 -0
  22. package/dist/futures/harness/evolution-loop.d.ts +41 -0
  23. package/dist/futures/harness/evolution-loop.js +168 -0
  24. package/dist/futures/harness/index.d.ts +16 -0
  25. package/dist/futures/harness/index.js +13 -0
  26. package/dist/futures/harness/meta-evolution.d.ts +32 -0
  27. package/dist/futures/harness/meta-evolution.js +52 -0
  28. package/dist/futures/harness/noop-evolution.d.ts +23 -0
  29. package/dist/futures/harness/noop-evolution.js +29 -0
  30. package/dist/futures/harness/persistence.d.ts +30 -0
  31. package/dist/futures/harness/persistence.js +99 -0
  32. package/dist/futures/harness/types.d.ts +147 -0
  33. package/dist/futures/harness/types.js +18 -0
  34. package/dist/futures/index.d.ts +16 -0
  35. package/dist/futures/index.js +22 -0
  36. package/dist/futures/latent-state/envelope.d.ts +39 -0
  37. package/dist/futures/latent-state/envelope.js +178 -0
  38. package/dist/futures/latent-state/index.d.ts +5 -0
  39. package/dist/futures/latent-state/index.js +3 -0
  40. package/dist/futures/latent-state/types.d.ts +47 -0
  41. package/dist/futures/latent-state/types.js +13 -0
  42. package/dist/futures/persona/check.d.ts +45 -0
  43. package/dist/futures/persona/check.js +205 -0
  44. package/dist/futures/persona/index.d.ts +5 -0
  45. package/dist/futures/persona/index.js +5 -0
  46. package/dist/futures/persona/registry.d.ts +22 -0
  47. package/dist/futures/persona/registry.js +124 -0
  48. package/dist/futures/persona/types.d.ts +68 -0
  49. package/dist/futures/persona/types.js +28 -0
  50. package/dist/futures/skill-graph/graph.d.ts +31 -0
  51. package/dist/futures/skill-graph/graph.js +151 -0
  52. package/dist/futures/skill-graph/index.d.ts +13 -0
  53. package/dist/futures/skill-graph/index.js +10 -0
  54. package/dist/futures/skill-graph/synthesis.d.ts +20 -0
  55. package/dist/futures/skill-graph/synthesis.js +83 -0
  56. package/dist/futures/skill-graph/types.d.ts +53 -0
  57. package/dist/futures/skill-graph/types.js +19 -0
  58. package/dist/streaming.js +18 -0
  59. package/package.json +1 -1
@@ -0,0 +1,177 @@
1
+ // futures/forecast/projection — project Signal arrays forward in time.
2
+ // Pure functions, no IO. Three model families (linear, exponential, flat)
3
+ // + a `bestProjection` selector that picks the highest r² with bias toward
4
+ // flat when neither model fits well.
5
+ import { HORIZON_MS } from './types.js';
6
+ /**
7
+ * Least-squares fit of y = a + b*x. Returns intercept, slope, r², and
8
+ * residual stddev (sqrt of mean squared residual). Assumes points.length >= 2.
9
+ */
10
+ function leastSquares(xs, ys) {
11
+ const n = xs.length;
12
+ let sumX = 0;
13
+ let sumY = 0;
14
+ for (let i = 0; i < n; i++) {
15
+ sumX += xs[i];
16
+ sumY += ys[i];
17
+ }
18
+ const meanX = sumX / n;
19
+ const meanY = sumY / n;
20
+ let num = 0;
21
+ let den = 0;
22
+ for (let i = 0; i < n; i++) {
23
+ const dx = xs[i] - meanX;
24
+ num += dx * (ys[i] - meanY);
25
+ den += dx * dx;
26
+ }
27
+ const slope = den === 0 ? 0 : num / den;
28
+ const intercept = meanY - slope * meanX;
29
+ // r²: 1 - SS_res / SS_tot
30
+ let ssRes = 0;
31
+ let ssTot = 0;
32
+ for (let i = 0; i < n; i++) {
33
+ const yhat = intercept + slope * xs[i];
34
+ const resid = ys[i] - yhat;
35
+ ssRes += resid * resid;
36
+ ssTot += (ys[i] - meanY) ** 2;
37
+ }
38
+ const r2 = ssTot === 0 ? (ssRes === 0 ? 1 : 0) : 1 - ssRes / ssTot;
39
+ const residualStd = Math.sqrt(ssRes / Math.max(1, n - 1));
40
+ return { intercept, slope, r2, residualStd };
41
+ }
42
+ /** Sort signal values ascending by timestamp; drops empty signals safely. */
43
+ function sortedValues(signal) {
44
+ return [...signal.values].sort((a, b) => a.ts - b.ts);
45
+ }
46
+ /** Map r² + sample size to a 0..1 confidence score. */
47
+ function confidenceFrom(r2, n) {
48
+ if (n < 2)
49
+ return 0;
50
+ // Linearly bonus more samples up to 12; r² floor at 0 (negatives clamp).
51
+ const sizeFactor = Math.min(1, n / 12);
52
+ const r2Clamped = Math.max(0, Math.min(1, r2));
53
+ return Math.max(0, Math.min(1, r2Clamped * 0.7 + sizeFactor * 0.3));
54
+ }
55
+ function flatForecast(signal, horizon, method = 'flat-mean') {
56
+ const pts = sortedValues(signal);
57
+ const mean = pts.length === 0 ? 0 : pts.reduce((s, p) => s + p.value, 0) / pts.length;
58
+ let std = 0;
59
+ if (pts.length > 1) {
60
+ const sq = pts.reduce((s, p) => s + (p.value - mean) ** 2, 0);
61
+ std = Math.sqrt(sq / (pts.length - 1));
62
+ }
63
+ const trend = { kind: 'flat', slope: 0, r2: 0 };
64
+ return {
65
+ signal: signal.name,
66
+ horizon,
67
+ trend,
68
+ pointEstimate: mean,
69
+ lowerBound: mean - 2 * std,
70
+ upperBound: mean + 2 * std,
71
+ confidence: pts.length >= 3 ? 0.4 : 0.1,
72
+ method,
73
+ };
74
+ }
75
+ /**
76
+ * Linear projection: value ~ a + b*t. Bounds = point ± 2 * residual stddev.
77
+ * Falls back to flat for fewer than 2 distinct timestamps.
78
+ */
79
+ export function linearProjection(signal, horizon) {
80
+ const pts = sortedValues(signal);
81
+ if (pts.length < 2)
82
+ return flatForecast(signal, horizon, 'flat-too-few');
83
+ const xs = pts.map((p) => p.ts);
84
+ const ys = pts.map((p) => p.value);
85
+ const fit = leastSquares(xs, ys);
86
+ const lastTs = xs[xs.length - 1];
87
+ const targetTs = lastTs + HORIZON_MS[horizon];
88
+ const point = fit.intercept + fit.slope * targetTs;
89
+ const trend = { kind: 'linear', slope: fit.slope, r2: fit.r2 };
90
+ return {
91
+ signal: signal.name,
92
+ horizon,
93
+ trend,
94
+ pointEstimate: point,
95
+ lowerBound: point - 2 * fit.residualStd,
96
+ upperBound: point + 2 * fit.residualStd,
97
+ confidence: confidenceFrom(fit.r2, pts.length),
98
+ method: 'linear-lsq',
99
+ };
100
+ }
101
+ /**
102
+ * Exponential projection: log-transform, fit linear, exp back.
103
+ * Drops non-positive values (log undefined). Falls back to flat if too few
104
+ * positive points remain.
105
+ */
106
+ export function exponentialProjection(signal, horizon) {
107
+ const pts = sortedValues(signal).filter((p) => p.value > 0);
108
+ if (pts.length < 2)
109
+ return flatForecast(signal, horizon, 'flat-nonpositive');
110
+ const xs = pts.map((p) => p.ts);
111
+ const ys = pts.map((p) => Math.log(p.value));
112
+ const fit = leastSquares(xs, ys);
113
+ const lastTs = xs[xs.length - 1];
114
+ const targetTs = lastTs + HORIZON_MS[horizon];
115
+ const logPoint = fit.intercept + fit.slope * targetTs;
116
+ const point = Math.exp(logPoint);
117
+ // Bounds in log space, then exp back (asymmetric in raw space — correct).
118
+ const lower = Math.exp(logPoint - 2 * fit.residualStd);
119
+ const upper = Math.exp(logPoint + 2 * fit.residualStd);
120
+ const trend = { kind: 'exponential', slope: fit.slope, r2: fit.r2 };
121
+ return {
122
+ signal: signal.name,
123
+ horizon,
124
+ trend,
125
+ pointEstimate: point,
126
+ lowerBound: lower,
127
+ upperBound: upper,
128
+ confidence: confidenceFrom(fit.r2, pts.length),
129
+ method: 'exp-loglin',
130
+ };
131
+ }
132
+ /** Public flat projection (callers may prefer to force this). */
133
+ export function flatProjection(signal, horizon) {
134
+ return flatForecast(signal, horizon);
135
+ }
136
+ /**
137
+ * Pick the model with the best r². If both linear and exponential fit
138
+ * poorly (r² < 0.4 in both), return flat — low-variance signals shouldn't
139
+ * generate over-confident projections.
140
+ */
141
+ export function bestProjection(signal, horizon) {
142
+ const pts = sortedValues(signal);
143
+ if (pts.length < 2)
144
+ return flatProjection(signal, horizon);
145
+ const lin = linearProjection(signal, horizon);
146
+ // Exponential only meaningful when all values positive.
147
+ const allPositive = pts.every((p) => p.value > 0);
148
+ const exp = allPositive ? exponentialProjection(signal, horizon) : null;
149
+ const linR2 = lin.trend.r2;
150
+ const expR2 = exp ? exp.trend.r2 : -Infinity;
151
+ if (linR2 < 0.4 && expR2 < 0.4) {
152
+ return flatProjection(signal, horizon);
153
+ }
154
+ if (exp && expR2 > linR2)
155
+ return exp;
156
+ return lin;
157
+ }
158
+ /**
159
+ * Guard against projecting absurdly far past available history.
160
+ * Returns true if the horizon is acceptable given the timespan covered.
161
+ * Rule: horizon must not exceed history span * 3.
162
+ *
163
+ * `history` is the timespan in ms from earliest to latest observation.
164
+ */
165
+ export function clampHorizon(h, history) {
166
+ if (history <= 0)
167
+ return false;
168
+ return HORIZON_MS[h] <= history * 3;
169
+ }
170
+ /** Helper for tests/synthesize: compute the timespan of a Signal in ms. */
171
+ export function signalHistory(signal) {
172
+ if (signal.values.length < 2)
173
+ return 0;
174
+ const sorted = sortedValues(signal);
175
+ return sorted[sorted.length - 1].ts - sorted[0].ts;
176
+ }
177
+ //# sourceMappingURL=projection.js.map
@@ -0,0 +1,19 @@
1
+ import type { Forecast, Horizon, Signal } from './types.js';
2
+ /**
3
+ * Project every signal forward at the given horizon. Skips signals whose
4
+ * history is too short for the horizon (clampHorizon = false). Returns
5
+ * forecasts sorted by absolute slope descending so the most-moving signals
6
+ * surface first.
7
+ */
8
+ export declare function synthesizeForecasts(signals: Signal[], horizon: Horizon): Forecast[];
9
+ /**
10
+ * Markdown one-liner for a forecast.
11
+ * Example: `📈 npm downloads → 14.2k (in 30 days, linear, r²=0.78, ±890)`
12
+ */
13
+ export declare function formatForecast(f: Forecast): string;
14
+ /**
15
+ * Take a list of forecasts and produce a short paragraph naming the top-3
16
+ * by absolute slope. If empty, returns a polite no-data sentence.
17
+ */
18
+ export declare function narrative(forecasts: Forecast[]): string;
19
+ //# sourceMappingURL=synthesize.d.ts.map
@@ -0,0 +1,89 @@
1
+ // futures/forecast/synthesize — fan a list of Signals into Forecasts and
2
+ // produce human-readable summaries. Pure formatting; no IO.
3
+ import { bestProjection, clampHorizon, signalHistory } from './projection.js';
4
+ /**
5
+ * Project every signal forward at the given horizon. Skips signals whose
6
+ * history is too short for the horizon (clampHorizon = false). Returns
7
+ * forecasts sorted by absolute slope descending so the most-moving signals
8
+ * surface first.
9
+ */
10
+ export function synthesizeForecasts(signals, horizon) {
11
+ const out = [];
12
+ for (const sig of signals) {
13
+ if (!clampHorizon(horizon, signalHistory(sig)))
14
+ continue;
15
+ out.push(bestProjection(sig, horizon));
16
+ }
17
+ out.sort((a, b) => Math.abs(b.trend.slope) - Math.abs(a.trend.slope));
18
+ return out;
19
+ }
20
+ function formatNumber(n) {
21
+ if (!Number.isFinite(n))
22
+ return '—';
23
+ const abs = Math.abs(n);
24
+ if (abs >= 1_000_000)
25
+ return `${(n / 1_000_000).toFixed(1)}M`;
26
+ if (abs >= 1_000)
27
+ return `${(n / 1_000).toFixed(1)}k`.replace('.0k', 'k');
28
+ if (abs >= 10)
29
+ return n.toFixed(0);
30
+ if (abs >= 1)
31
+ return n.toFixed(1);
32
+ return n.toFixed(2);
33
+ }
34
+ const HORIZON_LABEL = {
35
+ '1d': 'in 1 day',
36
+ '7d': 'in 7 days',
37
+ '30d': 'in 30 days',
38
+ '90d': 'in 90 days',
39
+ };
40
+ const ARROW = {
41
+ up: '📈',
42
+ down: '📉',
43
+ flat: '➖',
44
+ };
45
+ function arrowFor(slope, kind) {
46
+ if (kind === 'flat')
47
+ return ARROW.flat;
48
+ if (slope > 0)
49
+ return ARROW.up;
50
+ if (slope < 0)
51
+ return ARROW.down;
52
+ return ARROW.flat;
53
+ }
54
+ /**
55
+ * Markdown one-liner for a forecast.
56
+ * Example: `📈 npm downloads → 14.2k (in 30 days, linear, r²=0.78, ±890)`
57
+ */
58
+ export function formatForecast(f) {
59
+ const arrow = arrowFor(f.trend.slope, f.trend.kind);
60
+ const point = formatNumber(f.pointEstimate);
61
+ const halfWidth = (f.upperBound - f.lowerBound) / 2;
62
+ const interval = Number.isFinite(halfWidth) ? `±${formatNumber(halfWidth)}` : '±?';
63
+ const r2Str = f.trend.kind === 'flat' ? 'flat' : `${f.trend.kind}, r²=${f.trend.r2.toFixed(2)}`;
64
+ return `${arrow} ${f.signal} → ${point} (${HORIZON_LABEL[f.horizon]}, ${r2Str}, ${interval})`;
65
+ }
66
+ /**
67
+ * Take a list of forecasts and produce a short paragraph naming the top-3
68
+ * by absolute slope. If empty, returns a polite no-data sentence.
69
+ */
70
+ export function narrative(forecasts) {
71
+ if (forecasts.length === 0) {
72
+ return 'No forecasts available — not enough signal history to project.';
73
+ }
74
+ const top = forecasts.slice(0, 3);
75
+ const phrases = [];
76
+ for (const f of top) {
77
+ const direction = f.trend.kind === 'flat'
78
+ ? 'is holding flat'
79
+ : f.trend.slope > 0
80
+ ? 'is trending up'
81
+ : 'is trending down';
82
+ const point = formatNumber(f.pointEstimate);
83
+ const conf = `${(f.confidence * 100).toFixed(0)}% conf`;
84
+ phrases.push(`${f.signal} ${direction} toward ${point} ${HORIZON_LABEL[f.horizon]} (${conf})`);
85
+ }
86
+ const head = phrases.length === 1 ? phrases[0] : phrases.slice(0, -1).join('; ') + '; and ' + phrases[phrases.length - 1];
87
+ return `Top movers: ${head}.`;
88
+ }
89
+ //# sourceMappingURL=synthesize.js.map
@@ -0,0 +1,59 @@
1
+ /**
2
+ * A time series. Timestamps are ms since epoch (matches Date.now()).
3
+ * Values are arbitrary scalars (counts, percentages, etc.).
4
+ * Order is not required; projection.ts sorts internally.
5
+ */
6
+ export interface Signal {
7
+ name: string;
8
+ values: Array<{
9
+ ts: number;
10
+ value: number;
11
+ }>;
12
+ }
13
+ /**
14
+ * Trend describes the shape of the fit chosen for a Signal.
15
+ * - 'linear' : value ~ a + b*t (slope = b in value/ms)
16
+ * - 'exponential' : value ~ exp(a + b*t) (slope = b in log(value)/ms)
17
+ * - 'flat' : low-variance fallback; slope == 0, r2 == 0
18
+ *
19
+ * r2 is the coefficient of determination on whatever space was fit
20
+ * (raw values for linear, log(values) for exponential).
21
+ */
22
+ export interface Trend {
23
+ kind: 'linear' | 'exponential' | 'flat';
24
+ slope: number;
25
+ r2: number;
26
+ }
27
+ /**
28
+ * Projection horizon. Always relative to the last observed timestamp.
29
+ * 1d/7d/30d/90d cover the practical surface (daily through quarterly).
30
+ */
31
+ export type Horizon = '1d' | '7d' | '30d' | '90d';
32
+ /**
33
+ * A single forecast for a single signal at a single horizon.
34
+ *
35
+ * - pointEstimate is the model's expected value at horizon end
36
+ * - lowerBound / upperBound is roughly a 95% interval
37
+ * (point ± 2 * residual stddev, exp-transformed for exponential fits)
38
+ * - confidence is a 0..1 score derived from r2 and history length;
39
+ * higher means "trust this projection more"
40
+ * - method is a short human-readable tag, e.g. "linear-lsq", "exp-loglin",
41
+ * "flat-mean"
42
+ */
43
+ export interface Forecast {
44
+ signal: string;
45
+ horizon: Horizon;
46
+ trend: Trend;
47
+ pointEstimate: number;
48
+ lowerBound: number;
49
+ upperBound: number;
50
+ confidence: number;
51
+ method: string;
52
+ }
53
+ /**
54
+ * Number of milliseconds in each horizon. Exported as a const for use by
55
+ * projection.ts and synthesize.ts; not exported as a value-typed enum so
56
+ * callers can keep using string literals.
57
+ */
58
+ export declare const HORIZON_MS: Record<Horizon, number>;
59
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,15 @@
1
+ // futures/forecast — type definitions for projecting growth/research signals
2
+ // forward in time. Pure types: no runtime, no IO. Consumers (synthesize.ts,
3
+ // projection.ts) build Forecasts from raw Signal arrays.
4
+ /**
5
+ * Number of milliseconds in each horizon. Exported as a const for use by
6
+ * projection.ts and synthesize.ts; not exported as a value-typed enum so
7
+ * callers can keep using string literals.
8
+ */
9
+ export const HORIZON_MS = {
10
+ '1d': 24 * 60 * 60 * 1000,
11
+ '7d': 7 * 24 * 60 * 60 * 1000,
12
+ '30d': 30 * 24 * 60 * 60 * 1000,
13
+ '90d': 90 * 24 * 60 * 60 * 1000,
14
+ };
15
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1,39 @@
1
+ /**
2
+ * CriticEvaluator — adapts `critic-gate.ts` to the harness `Evaluator`
3
+ * interface from `types.ts`.
4
+ *
5
+ * critic-gate.ts gates a single tool result. The harness Evaluator grades
6
+ * an entire trace against a Task's acceptance criteria. This adapter
7
+ * walks the trace's tool steps, runs each through `gateToolResult`, and
8
+ * aggregates the per-step verdicts into a single EvaluationReport:
9
+ *
10
+ * - `pass` = every acceptance criterion satisfied AND no critic rejected
11
+ * a tool step
12
+ * - `score` = (criteriaPassRate * 0.7) + (toolAcceptRate * 0.3),
13
+ * efficiency-tiebroken by total step time
14
+ * - failureModes derived from critic verdicts' `failure_class` (RF-NN-*)
15
+ *
16
+ * Acceptance criteria are matched via case-insensitive substring against
17
+ * the trace's flattened `output | error | finalState | action` text.
18
+ * That keeps the adapter dependency-free; richer matchers can subclass.
19
+ *
20
+ * critic-gate.ts is NOT modified — this is a pure consumer.
21
+ */
22
+ import type { EvaluationReport, Evaluator, ExecutionTrace, Task } from './types.js';
23
+ import type { CriticVerdict, GateOpts } from '../../critic-gate.js';
24
+ export interface CriticEvaluatorOpts {
25
+ /** Forwarded to `gateToolResult` — strictness, provider, llmClient stub. */
26
+ gate?: GateOpts;
27
+ /**
28
+ * If set, replaces `gateToolResult`. Lets tests inject a fully synchronous
29
+ * decision function and skip the critic-gate provider plumbing entirely.
30
+ */
31
+ gateFn?: (tool: string, args: Record<string, unknown>, result: unknown) => Promise<CriticVerdict> | CriticVerdict;
32
+ }
33
+ export declare class CriticEvaluator implements Evaluator {
34
+ private readonly opts;
35
+ constructor(opts?: CriticEvaluatorOpts);
36
+ evaluate(trace: ExecutionTrace, task: Task): Promise<EvaluationReport>;
37
+ }
38
+ export declare function createCriticEvaluator(opts?: CriticEvaluatorOpts): Evaluator;
39
+ //# sourceMappingURL=critic-evaluator.d.ts.map
@@ -0,0 +1,131 @@
1
+ /**
2
+ * CriticEvaluator — adapts `critic-gate.ts` to the harness `Evaluator`
3
+ * interface from `types.ts`.
4
+ *
5
+ * critic-gate.ts gates a single tool result. The harness Evaluator grades
6
+ * an entire trace against a Task's acceptance criteria. This adapter
7
+ * walks the trace's tool steps, runs each through `gateToolResult`, and
8
+ * aggregates the per-step verdicts into a single EvaluationReport:
9
+ *
10
+ * - `pass` = every acceptance criterion satisfied AND no critic rejected
11
+ * a tool step
12
+ * - `score` = (criteriaPassRate * 0.7) + (toolAcceptRate * 0.3),
13
+ * efficiency-tiebroken by total step time
14
+ * - failureModes derived from critic verdicts' `failure_class` (RF-NN-*)
15
+ *
16
+ * Acceptance criteria are matched via case-insensitive substring against
17
+ * the trace's flattened `output | error | finalState | action` text.
18
+ * That keeps the adapter dependency-free; richer matchers can subclass.
19
+ *
20
+ * critic-gate.ts is NOT modified — this is a pure consumer.
21
+ */
22
+ import { gateToolResult } from '../../critic-gate.js';
23
+ /** Map RF taxonomy classes onto harness FailureMode kinds. */
24
+ function rfToFailureKind(rf) {
25
+ switch (rf) {
26
+ case 'RF-01-fabricated-evidence':
27
+ case 'RF-10-simulation-role-confusion':
28
+ return 'hallucinated-state';
29
+ case 'RF-02-metric-interpretation':
30
+ case 'RF-03-confused-provenance':
31
+ case 'RF-04-temporal-misordering':
32
+ return 'misinterpreted-state';
33
+ case 'RF-12-repetition-failure-to-resume':
34
+ return 'reasoning-loop';
35
+ case 'RF-08-evidential-insufficiency':
36
+ case 'RF-11-excessive-speculation':
37
+ return 'missing-capability';
38
+ case 'RF-16-arithmetic-error':
39
+ case 'RF-14-invalid-inference-pattern':
40
+ case 'RF-15-internal-contradiction':
41
+ return 'incorrect-tool-usage';
42
+ default:
43
+ return 'other';
44
+ }
45
+ }
46
+ function flattenTrace(trace) {
47
+ const parts = [];
48
+ for (const s of trace.steps) {
49
+ if (s.action)
50
+ parts.push(s.action);
51
+ if (s.output)
52
+ parts.push(s.output);
53
+ if (s.error)
54
+ parts.push(s.error);
55
+ }
56
+ try {
57
+ parts.push(JSON.stringify(trace.finalState));
58
+ }
59
+ catch {
60
+ /* ignore unserializable */
61
+ }
62
+ return parts.join('\n');
63
+ }
64
+ export class CriticEvaluator {
65
+ opts;
66
+ constructor(opts = {}) {
67
+ this.opts = opts;
68
+ }
69
+ async evaluate(trace, task) {
70
+ const haystack = flattenTrace(trace).toLowerCase();
71
+ const criteriaResults = task.acceptance.map((criterion) => {
72
+ const passed = haystack.includes(criterion.toLowerCase());
73
+ return {
74
+ criterion,
75
+ passed,
76
+ evidence: passed ? 'substring match in trace' : 'no match in flattened trace',
77
+ };
78
+ });
79
+ const criteriaPassRate = criteriaResults.length === 0
80
+ ? 1
81
+ : criteriaResults.filter((c) => c.passed).length / criteriaResults.length;
82
+ // Run critic on each tool step. Missing tools / responses are skipped.
83
+ const toolSteps = trace.steps.filter((s) => s.phase === 'tool');
84
+ const failureModes = [];
85
+ let toolAccepts = 0;
86
+ for (const step of toolSteps) {
87
+ const verdict = this.opts.gateFn
88
+ ? await this.opts.gateFn(step.action, {}, step.output ?? step.error ?? '')
89
+ : await gateToolResult(step.action, {}, step.output ?? step.error ?? '', this.opts.gate);
90
+ if (verdict.accept) {
91
+ toolAccepts++;
92
+ }
93
+ else if (verdict.failure_class) {
94
+ failureModes.push({
95
+ kind: rfToFailureKind(verdict.failure_class),
96
+ detail: `${verdict.failure_class}: ${verdict.reason || 'critic rejected'}`,
97
+ });
98
+ }
99
+ else {
100
+ failureModes.push({
101
+ kind: 'other',
102
+ detail: verdict.reason || 'critic rejected without failure class',
103
+ });
104
+ }
105
+ }
106
+ const toolAcceptRate = toolSteps.length === 0 ? 1 : toolAccepts / toolSteps.length;
107
+ const baseScore = criteriaPassRate * 0.7 + toolAcceptRate * 0.3;
108
+ // Efficiency tiebreaker: small bonus inversely proportional to time.
109
+ const totalMs = trace.llmTimeMs + trace.toolTimeMs;
110
+ const efficiency = totalMs > 0 ? Math.min(0.05, 1000 / (totalMs + 1000) * 0.05) : 0.05;
111
+ const score = Math.max(0, Math.min(1, baseScore + efficiency));
112
+ const allCriteriaPass = criteriaResults.every((c) => c.passed);
113
+ const noToolRejections = failureModes.length === 0;
114
+ const pass = allCriteriaPass && noToolRejections;
115
+ return {
116
+ taskId: task.id,
117
+ harnessId: trace.harnessId,
118
+ pass,
119
+ score,
120
+ criteriaResults,
121
+ failureModes,
122
+ notes: pass
123
+ ? 'all criteria passed; critic accepted every tool step'
124
+ : `pass=${pass} criteria=${criteriaPassRate.toFixed(2)} tools=${toolAcceptRate.toFixed(2)}`,
125
+ };
126
+ }
127
+ }
128
+ export function createCriticEvaluator(opts = {}) {
129
+ return new CriticEvaluator(opts);
130
+ }
131
+ //# sourceMappingURL=critic-evaluator.js.map
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Harness Evolution Loop — inner loop (Algorithm 1 from Sylph).
3
+ *
4
+ * for i in 1..maxIterations:
5
+ * trace = Worker.execute(task, harness)
6
+ * report = Evaluator.evaluate(trace, task)
7
+ * record = { iteration, harness, trace, report, verdict }
8
+ * history.push(record)
9
+ * if report.score > bestScore: best = harness
10
+ * if earlyStopScore reached on consecutive iterations: stop
11
+ * if regression > revertThreshold: revert harness to best
12
+ * harness = EvolutionAgent.evolve(history, best)
13
+ *
14
+ * Pure orchestration — Worker / Evaluator / EvolutionAgent are injected,
15
+ * which makes the whole loop deterministic and testable with stub
16
+ * implementations. No LLM calls happen here directly.
17
+ */
18
+ import type { EvolutionProtocol, EvolutionRecord, EvolutionResult, Task } from './types.js';
19
+ /** Optional hooks the caller can plug in to observe / persist each step. */
20
+ export interface RunOptions {
21
+ /** Called after each record is appended to in-memory history. */
22
+ onRecord?: (record: EvolutionRecord) => void | Promise<void>;
23
+ /** When set, `appendTrace` is invoked under this state dir for every record. */
24
+ persistDir?: string;
25
+ /** When false, skip filesystem persistence even if `persistDir` is set. Default true. */
26
+ persist?: boolean;
27
+ /**
28
+ * How many consecutive iterations must hit `earlyStopScore` to stop.
29
+ * Defaults to 1 — first hit ends the loop.
30
+ */
31
+ earlyStopStreak?: number;
32
+ }
33
+ /**
34
+ * Run the inner Harness Evolution Loop against a single task.
35
+ *
36
+ * Always returns an `EvolutionResult` — never throws on Worker / Evaluator
37
+ * exceptions; instead, records a failure step and continues. (The
38
+ * Evaluator is supposed to score failures, not the loop itself.)
39
+ */
40
+ export declare function runEvolutionLoop(protocol: EvolutionProtocol, task: Task, options?: RunOptions): Promise<EvolutionResult>;
41
+ //# sourceMappingURL=evolution-loop.d.ts.map