agent-composer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +148 -0
  2. package/composer.config.schema.json +79 -0
  3. package/dist/cli/init.d.ts +20 -0
  4. package/dist/cli/init.js +122 -0
  5. package/dist/cli/init.js.map +1 -0
  6. package/dist/config/env.d.ts +13 -0
  7. package/dist/config/env.js +65 -0
  8. package/dist/config/env.js.map +1 -0
  9. package/dist/config/loader.d.ts +3 -0
  10. package/dist/config/loader.js +34 -0
  11. package/dist/config/loader.js.map +1 -0
  12. package/dist/config/schema.d.ts +93 -0
  13. package/dist/config/schema.js +44 -0
  14. package/dist/config/schema.js.map +1 -0
  15. package/dist/evolve/budget.d.ts +23 -0
  16. package/dist/evolve/budget.js +55 -0
  17. package/dist/evolve/budget.js.map +1 -0
  18. package/dist/evolve/lengthPenalty.d.ts +3 -0
  19. package/dist/evolve/lengthPenalty.js +30 -0
  20. package/dist/evolve/lengthPenalty.js.map +1 -0
  21. package/dist/evolve/operators.d.ts +24 -0
  22. package/dist/evolve/operators.js +110 -0
  23. package/dist/evolve/operators.js.map +1 -0
  24. package/dist/evolve/pareto.d.ts +24 -0
  25. package/dist/evolve/pareto.js +153 -0
  26. package/dist/evolve/pareto.js.map +1 -0
  27. package/dist/evolve/plateau.d.ts +18 -0
  28. package/dist/evolve/plateau.js +45 -0
  29. package/dist/evolve/plateau.js.map +1 -0
  30. package/dist/evolve/postflight.d.ts +12 -0
  31. package/dist/evolve/postflight.js +61 -0
  32. package/dist/evolve/postflight.js.map +1 -0
  33. package/dist/evolve/preflight.d.ts +13 -0
  34. package/dist/evolve/preflight.js +39 -0
  35. package/dist/evolve/preflight.js.map +1 -0
  36. package/dist/evolve/reflection.d.ts +12 -0
  37. package/dist/evolve/reflection.js +41 -0
  38. package/dist/evolve/reflection.js.map +1 -0
  39. package/dist/evolve/runner.d.ts +62 -0
  40. package/dist/evolve/runner.js +202 -0
  41. package/dist/evolve/runner.js.map +1 -0
  42. package/dist/evolve/s2-deny.d.ts +26 -0
  43. package/dist/evolve/s2-deny.js +75 -0
  44. package/dist/evolve/s2-deny.js.map +1 -0
  45. package/dist/index.d.ts +2 -0
  46. package/dist/index.js +36 -0
  47. package/dist/index.js.map +1 -0
  48. package/dist/providers/AnthropicCompatibleProvider.d.ts +48 -0
  49. package/dist/providers/AnthropicCompatibleProvider.js +50 -0
  50. package/dist/providers/AnthropicCompatibleProvider.js.map +1 -0
  51. package/dist/providers/CLIProvider.d.ts +30 -0
  52. package/dist/providers/CLIProvider.js +106 -0
  53. package/dist/providers/CLIProvider.js.map +1 -0
  54. package/dist/providers/IProvider.d.ts +17 -0
  55. package/dist/providers/IProvider.js +4 -0
  56. package/dist/providers/IProvider.js.map +1 -0
  57. package/dist/providers/MockProvider.d.ts +28 -0
  58. package/dist/providers/MockProvider.js +66 -0
  59. package/dist/providers/MockProvider.js.map +1 -0
  60. package/dist/registry.d.ts +21 -0
  61. package/dist/registry.js +79 -0
  62. package/dist/registry.js.map +1 -0
  63. package/dist/server.d.ts +6 -0
  64. package/dist/server.js +85 -0
  65. package/dist/server.js.map +1 -0
  66. package/dist/util/slug.d.ts +1 -0
  67. package/dist/util/slug.js +7 -0
  68. package/dist/util/slug.js.map +1 -0
  69. package/package.json +56 -0
@@ -0,0 +1,23 @@
1
+ export interface EvolveBudgetConfig {
2
+ maxCalls: number;
3
+ maxUsd: number;
4
+ }
5
+ export declare const DEFAULT_EVOLVE_BUDGET: EvolveBudgetConfig;
6
+ export declare class EvolveBudgetExceededError extends Error {
7
+ constructor(reason: string);
8
+ }
9
+ export interface EvolveBudgetStats {
10
+ calls: number;
11
+ usd: number;
12
+ }
13
+ export declare class EvolveBudgetGuard {
14
+ private readonly config;
15
+ private _calls;
16
+ private _usd;
17
+ constructor(config: EvolveBudgetConfig);
18
+ /** Record a completed call and its measured USD cost. Throws on cap. */
19
+ spent(usdCost: number): void;
20
+ get stats(): EvolveBudgetStats;
21
+ get remaining(): EvolveBudgetStats;
22
+ exhausted(): boolean;
23
+ }
@@ -0,0 +1,55 @@
1
+ // Wave 3 Step 2 — per-/evolve-session BudgetGuard.
2
+ //
3
+ // Distinct from the Wave 2 per-eval BudgetGuard (tests/eval/budget.ts):
4
+ // that one caps a single eval run; this one caps the whole evolve loop
5
+ // (~100 eval calls + ~30 reflection calls ≈ $3.50, capped at $4.00).
6
+ //
7
+ // Reflection LM is GLM 5.1: $0.98 in / $3.08 out per MTok.
8
+ export const DEFAULT_EVOLVE_BUDGET = {
9
+ maxCalls: 100,
10
+ maxUsd: 4.0,
11
+ };
12
+ export class EvolveBudgetExceededError extends Error {
13
+ constructor(reason) {
14
+ super(`Evolve budget exceeded: ${reason}`);
15
+ this.name = "EvolveBudgetExceededError";
16
+ }
17
+ }
18
+ export class EvolveBudgetGuard {
19
+ config;
20
+ _calls = 0;
21
+ _usd = 0;
22
+ constructor(config) {
23
+ this.config = config;
24
+ if (config.maxCalls <= 0)
25
+ throw new Error("EvolveBudgetGuard: maxCalls must be positive");
26
+ if (config.maxUsd <= 0)
27
+ throw new Error("EvolveBudgetGuard: maxUsd must be positive");
28
+ }
29
+ /** Record a completed call and its measured USD cost. Throws on cap. */
30
+ spent(usdCost) {
31
+ if (usdCost < 0)
32
+ throw new Error("EvolveBudgetGuard.spent: usdCost must be >= 0");
33
+ this._calls += 1;
34
+ if (this._calls > this.config.maxCalls) {
35
+ throw new EvolveBudgetExceededError(`call cap ${this.config.maxCalls} (this would be call #${this._calls})`);
36
+ }
37
+ this._usd += usdCost;
38
+ if (this._usd > this.config.maxUsd) {
39
+ throw new EvolveBudgetExceededError(`USD cap $${this.config.maxUsd.toFixed(2)} (now $${this._usd.toFixed(4)})`);
40
+ }
41
+ }
42
+ get stats() {
43
+ return { calls: this._calls, usd: this._usd };
44
+ }
45
+ get remaining() {
46
+ return {
47
+ calls: this.config.maxCalls - this._calls,
48
+ usd: this.config.maxUsd - this._usd,
49
+ };
50
+ }
51
+ exhausted() {
52
+ return this._calls >= this.config.maxCalls || this._usd >= this.config.maxUsd;
53
+ }
54
+ }
55
+ //# sourceMappingURL=budget.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"budget.js","sourceRoot":"","sources":["../../src/evolve/budget.ts"],"names":[],"mappings":"AAAA,mDAAmD;AACnD,EAAE;AACF,wEAAwE;AACxE,uEAAuE;AACvE,qEAAqE;AACrE,EAAE;AACF,2DAA2D;AAO3D,MAAM,CAAC,MAAM,qBAAqB,GAAuB;IACvD,QAAQ,EAAE,GAAG;IACb,MAAM,EAAE,GAAG;CACZ,CAAC;AAEF,MAAM,OAAO,yBAA0B,SAAQ,KAAK;IAClD,YAAY,MAAc;QACxB,KAAK,CAAC,2BAA2B,MAAM,EAAE,CAAC,CAAC;QAC3C,IAAI,CAAC,IAAI,GAAG,2BAA2B,CAAC;IAC1C,CAAC;CACF;AAOD,MAAM,OAAO,iBAAiB;IAIC;IAHrB,MAAM,GAAG,CAAC,CAAC;IACX,IAAI,GAAG,CAAC,CAAC;IAEjB,YAA6B,MAA0B;QAA1B,WAAM,GAAN,MAAM,CAAoB;QACrD,IAAI,MAAM,CAAC,QAAQ,IAAI,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAC1F,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;IACxF,CAAC;IAED,wEAAwE;IACxE,KAAK,CAAC,OAAe;QACnB,IAAI,OAAO,GAAG,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QAClF,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;QACjB,IAAI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YACvC,MAAM,IAAI,yBAAyB,CACjC,YAAY,IAAI,CAAC,MAAM,CAAC,QAAQ,yBAAyB,IAAI,CAAC,MAAM,GAAG,CACxE,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,IAAI,IAAI,OAAO,CAAC;QACrB,IAAI,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACnC,MAAM,IAAI,yBAAyB,CACjC,YAAY,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAC3E,CAAC;QACJ,CAAC;IACH,CAAC;IAED,IAAI,KAAK;QACP,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC;IAChD,CAAC;IAED,IAAI,SAAS;QACX,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM;YACzC,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI;SACpC,CAAC;IACJ,CAAC;IAED,SAAS;QACP,OAAO,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAChF,CAAC;CACF"}
@@ -0,0 +1,3 @@
1
+ export declare const DEFAULT_LAMBDA = 0.001;
2
+ export declare function estimateTokens(text: string): number;
3
+ export declare function lengthPenalty(skill: string, lambda?: number): number;
@@ -0,0 +1,30 @@
1
+ // Wave 3 Step 2 — length penalty for bloat-drift control.
2
+ //
3
+ // score_adjusted = score_raw − λ · tokens(skill)
4
+ //
5
+ // Replaces the `remove_bloat` operator (excluded — 0% keep-rate per
6
+ // Karpathy data). λ default 0.001 means a 1000-token skill loses 1
7
+ // score point versus a 0-token skill — large enough to overcome
8
+ // noise, small enough to keep useful additions.
9
+ //
10
+ // Token estimator is intentionally crude (whitespace + bullets);
11
+ // the metric only needs to be monotonic in skill length to gate
12
+ // bloat — exact tiktoken counts add a dep for no gain here.
13
+ export const DEFAULT_LAMBDA = 0.001;
14
+ export function estimateTokens(text) {
15
+ if (!text)
16
+ return 0;
17
+ return text
18
+ .split(/\s+|[-*]\s+/)
19
+ .map((s) => s.trim())
20
+ .filter((s) => s.length > 0).length;
21
+ }
22
+ export function lengthPenalty(skill, lambda = DEFAULT_LAMBDA) {
23
+ if (lambda < 0) {
24
+ throw new Error(`lengthPenalty: lambda must be >= 0, got ${lambda}`);
25
+ }
26
+ if (!skill)
27
+ return 0;
28
+ return -lambda * estimateTokens(skill);
29
+ }
30
+ //# sourceMappingURL=lengthPenalty.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"lengthPenalty.js","sourceRoot":"","sources":["../../src/evolve/lengthPenalty.ts"],"names":[],"mappings":"AAAA,0DAA0D;AAC1D,EAAE;AACF,iDAAiD;AACjD,EAAE;AACF,oEAAoE;AACpE,mEAAmE;AACnE,gEAAgE;AAChE,gDAAgD;AAChD,EAAE;AACF,iEAAiE;AACjE,gEAAgE;AAChE,4DAA4D;AAE5D,MAAM,CAAC,MAAM,cAAc,GAAG,KAAK,CAAC;AAEpC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,CAAC;IACpB,OAAO,IAAI;SACR,KAAK,CAAC,aAAa,CAAC;SACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAa,EAAE,SAAiB,cAAc;IAC1E,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,2CAA2C,MAAM,EAAE,CAAC,CAAC;IACvE,CAAC;IACD,IAAI,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC;IACrB,OAAO,CAAC,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;AACzC,CAAC"}
@@ -0,0 +1,24 @@
1
+ export interface OperatorContext {
2
+ counterexample?: string;
3
+ constraint?: string;
4
+ negativeExample?: string;
5
+ /** GEPA reflection LM callback. Returns rewritten skill text. */
6
+ reflect?: (text: string, ctx: OperatorContext) => Promise<string>;
7
+ /** Preflight ecosystem snapshot, surfaced into reflect prompt by runner. */
8
+ currentEcosystem?: string;
9
+ }
10
+ export interface OperatorMeta {
11
+ name: string;
12
+ /** Empirical keep-rate from Karpathy autoresearch dataset. */
13
+ keepRate: number;
14
+ apply: (skill: string, ctx: OperatorContext) => Promise<string>;
15
+ }
16
+ export declare function addCounterexample(skill: string, ctx: OperatorContext): string;
17
+ export declare function tightenLanguage(skill: string, _ctx: OperatorContext): string;
18
+ export declare function addConstraint(skill: string, ctx: OperatorContext): string;
19
+ export declare function addNegativeExample(skill: string, ctx: OperatorContext): string;
20
+ export declare function reflectAndRewrite(skill: string, ctx: OperatorContext): Promise<string>;
21
+ export declare const OPERATORS: ReadonlyArray<OperatorMeta>;
22
+ export declare function pickOperator(index: number): OperatorMeta;
23
+ export declare const OPERATOR_BY_CLI_NAME: Readonly<Record<string, OperatorMeta>>;
24
+ export declare const VALID_OPERATOR_CLI_NAMES: ReadonlyArray<string>;
@@ -0,0 +1,110 @@
1
+ // Wave 3 Step 2 — mutation operators for self-evolve loop.
2
+ //
3
+ // 5 operators kept (Karpathy autoresearch empirical keep-rates):
4
+ // add_counterexample 100%
5
+ // tighten_language 67%
6
+ // add_constraint 50%
7
+ // add_negative_example 50%
8
+ // reflect_and_rewrite GEPA core
9
+ //
10
+ // Skipped: restructure + remove_bloat (both 0% keep-rate). Bloat drift
11
+ // is countered by lengthPenalty.ts, not by a remove_bloat operator.
12
+ const HEDGES = /\b(probably|maybe|perhaps|might|consider(?:ing)?|kind of|sort of)\b\s*/gi;
13
+ const SOFT_SUBJECT = /\b(you should|you could|you might|you can)\s+/gi;
14
+ export function addCounterexample(skill, ctx) {
15
+ const ex = ctx.counterexample?.trim();
16
+ if (!ex)
17
+ return skill;
18
+ const header = "## Counterexamples";
19
+ if (skill.includes(header)) {
20
+ return skill.replace(header, `${header}\n\n- ${ex}\n`).replace(/\n\n- ${ex}\n\n/, `\n\n- ${ex}\n`);
21
+ }
22
+ const sep = skill.endsWith("\n") ? "\n" : "\n\n";
23
+ return `${skill}${sep}${header}\n\n- ${ex}\n`;
24
+ }
25
+ export function tightenLanguage(skill, _ctx) {
26
+ HEDGES.lastIndex = 0;
27
+ SOFT_SUBJECT.lastIndex = 0;
28
+ if (!HEDGES.test(skill) && !SOFT_SUBJECT.test(skill)) {
29
+ HEDGES.lastIndex = 0;
30
+ SOFT_SUBJECT.lastIndex = 0;
31
+ return skill;
32
+ }
33
+ HEDGES.lastIndex = 0;
34
+ SOFT_SUBJECT.lastIndex = 0;
35
+ return skill
36
+ .replace(SOFT_SUBJECT, "")
37
+ .replace(HEDGES, "")
38
+ .replace(/[ \t]+/g, " ")
39
+ .replace(/ ([.,;:])/g, "$1")
40
+ .trim();
41
+ }
42
+ export function addConstraint(skill, ctx) {
43
+ const c = ctx.constraint?.trim();
44
+ if (!c)
45
+ return skill;
46
+ const header = "## Constraints";
47
+ const line = `- ${c}`;
48
+ if (skill.includes(header)) {
49
+ if (skill.includes(line))
50
+ return skill;
51
+ return skill.replace(header, `${header}\n\n${line}`);
52
+ }
53
+ const sep = skill.endsWith("\n") ? "\n" : "\n\n";
54
+ return `${skill}${sep}${header}\n\n${line}\n`;
55
+ }
56
+ export function addNegativeExample(skill, ctx) {
57
+ const ne = ctx.negativeExample?.trim();
58
+ if (!ne)
59
+ return skill;
60
+ const line = `DO NOT ${ne}`;
61
+ if (skill.includes(line))
62
+ return skill;
63
+ const sep = skill.endsWith("\n") ? "\n" : "\n\n";
64
+ return `${skill}${sep}${line}\n`;
65
+ }
66
+ export async function reflectAndRewrite(skill, ctx) {
67
+ if (!ctx.reflect)
68
+ return skill;
69
+ return ctx.reflect(skill, ctx);
70
+ }
71
+ export const OPERATORS = [
72
+ {
73
+ name: "add_counterexample",
74
+ keepRate: 1.0,
75
+ apply: async (s, c) => addCounterexample(s, c),
76
+ },
77
+ {
78
+ name: "tighten_language",
79
+ keepRate: 0.67,
80
+ apply: async (s, c) => tightenLanguage(s, c),
81
+ },
82
+ {
83
+ name: "add_constraint",
84
+ keepRate: 0.5,
85
+ apply: async (s, c) => addConstraint(s, c),
86
+ },
87
+ {
88
+ name: "add_negative_example",
89
+ keepRate: 0.5,
90
+ apply: async (s, c) => addNegativeExample(s, c),
91
+ },
92
+ {
93
+ name: "reflect_and_rewrite",
94
+ keepRate: 0.0, // GEPA core; keep-rate measured live, not pre-seeded.
95
+ apply: reflectAndRewrite,
96
+ },
97
+ ];
98
+ export function pickOperator(index) {
99
+ if (index < 0 || !Number.isInteger(index)) {
100
+ throw new Error(`pickOperator: index must be non-negative integer, got ${index}`);
101
+ }
102
+ return OPERATORS[index % OPERATORS.length];
103
+ }
104
+ // CLI-facing lookup: snake_case op.name → camelCase key (e.g. "tighten_language" → "tightenLanguage").
105
+ export const OPERATOR_BY_CLI_NAME = Object.fromEntries(OPERATORS.map((op) => {
106
+ const camel = op.name.replace(/_([a-z])/g, (_, c) => c.toUpperCase());
107
+ return [camel, op];
108
+ }));
109
+ export const VALID_OPERATOR_CLI_NAMES = Object.keys(OPERATOR_BY_CLI_NAME);
110
+ //# sourceMappingURL=operators.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"operators.js","sourceRoot":"","sources":["../../src/evolve/operators.ts"],"names":[],"mappings":"AAAA,2DAA2D;AAC3D,EAAE;AACF,iEAAiE;AACjE,8BAA8B;AAC9B,8BAA8B;AAC9B,8BAA8B;AAC9B,8BAA8B;AAC9B,oCAAoC;AACpC,EAAE;AACF,uEAAuE;AACvE,oEAAoE;AAmBpE,MAAM,MAAM,GAAG,0EAA0E,CAAC;AAC1F,MAAM,YAAY,GAAG,iDAAiD,CAAC;AAEvE,MAAM,UAAU,iBAAiB,CAAC,KAAa,EAAE,GAAoB;IACnE,MAAM,EAAE,GAAG,GAAG,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC;IACtC,IAAI,CAAC,EAAE;QAAE,OAAO,KAAK,CAAC;IACtB,MAAM,MAAM,GAAG,oBAAoB,CAAC;IACpC,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,MAAM,SAAS,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,iBAAiB,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;IACrG,CAAC;IACD,MAAM,GAAG,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;IACjD,OAAO,GAAG,KAAK,GAAG,GAAG,GAAG,MAAM,SAAS,EAAE,IAAI,CAAC;AAChD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,KAAa,EAAE,IAAqB;IAClE,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,YAAY,CAAC,SAAS,GAAG,CAAC,CAAC;IAC3B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACrD,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;QACrB,YAAY,CAAC,SAAS,GAAG,CAAC,CAAC;QAC3B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,YAAY,CAAC,SAAS,GAAG,CAAC,CAAC;IAC3B,OAAO,KAAK;SACT,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;SACzB,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;SACnB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC;SAC3B,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,KAAa,EAAE,GAAoB;IAC/D,MAAM,CAAC,GAAG,GAAG,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC;IACjC,IAAI,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC;IACrB,MAAM,MAAM,GAAG,gBAAgB,CAAC;IAChC,MAAM,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;IACtB,IAAI,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;QACvC,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,MAAM,OAAO,IAAI,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,GAAG,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;IACjD,OAAO,GAAG,KAAK,GAAG,GAAG,GAAG,MAAM,OAAO,IAAI,IAAI,CAAC;AAChD,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAa,EAAE,GAAoB;IACpE,MAAM,EAAE,GAAG,GAAG,CAAC,eAAe,EAAE,IAAI,EAAE,CAAC;IACvC,IAAI,CAAC,EAAE;QAAE,OAAO,KAAK,CAAC;IACtB,MAAM,IAAI,GAAG,UAAU,EAAE,EAAE,CAAC;IAC5B,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IACvC,MAAM,GAAG,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;IACjD,OAAO,GAAG,KAAK,GAAG,GAAG,GAAG,IAAI,IAAI,CAAC;AACnC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAAa,EACb,GAAoB;IAEpB,IAAI,CAAC,GAAG,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAC/B,OAAO,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,CAAC,MAAM,SAAS,GAAgC;IACpD;QACE,IAAI,EAAE,oBAAoB;QAC1B,QAAQ,EAAE,GAAG;QACb,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC;KAC/C;IACD;QACE,IAAI,EAAE,kBAAkB;QACxB,QAAQ,EAAE,IAAI;QACd,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,EAAE,CAAC,CAAC;KAC7C;IACD;QACE,IAAI,EAAE,gBAAgB;QACtB,QAAQ,EAAE,GAAG;QACb,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,CAAC,CAAC;KAC3C;IACD;QACE,IAAI,EAAE,sBAAsB;QAC5B,QAAQ,EAAE,GAAG;QACb,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC;KAChD;IACD;QACE,IAAI,EAAE,qBAAqB;QAC3B,QAAQ,EAAE,GAAG,EAAE,sDAAsD;QACrE,KAAK,EAAE,iBAAiB;KACzB;CACF,CAAC;AAEF,MAAM,UAAU,YAAY,CAAC,KAAa;IACxC,IAAI,KAAK,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,yDAAyD,KAAK,EAAE,CAAC,CAAC;IACpF,CAAC;IACD,OAAO,SAAS,CAAC,KAAK,GAAG,SAAS,CAAC,MAAM,CAAE,CAAC;AAC9C,CAAC;AAED,uGAAuG;AACvG,MAAM,CAAC,MAAM,oBAAoB,GAA2C,MAAM,CAAC,WAAW,CAC5F,SAAS,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE;IACnB,MAAM,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;IAC9E,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AACrB,CAAC,CAAC,CACH,CAAC;AAEF,MAAM,CAAC,MAAM,wBAAwB,GAA0B,MAAM,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC"}
@@ -0,0 +1,24 @@
1
+ export interface CI {
2
+ lower: number;
3
+ upper: number;
4
+ }
5
+ export declare function mean(xs: ReadonlyArray<number>): number;
6
+ export declare function stdDev(xs: ReadonlyArray<number>): number;
7
+ export declare function ci95(xs: ReadonlyArray<number>): CI;
8
+ export declare function wilcoxonSignedRankP(parent: ReadonlyArray<number>, candidate: ReadonlyArray<number>): number;
9
+ export interface BeatsResult {
10
+ beats: boolean;
11
+ reason: string;
12
+ parentMean: number;
13
+ candidateMean: number;
14
+ wilcoxonP?: number;
15
+ parentCI?: CI;
16
+ candidateCI?: CI;
17
+ }
18
+ export interface BeatsOptions {
19
+ parentTokens?: number;
20
+ candidateTokens?: number;
21
+ /** Wilcoxon p-value threshold. Locked: 0.1 per handoff. */
22
+ pThreshold?: number;
23
+ }
24
+ export declare function candidateBeatsParent(parentScores: ReadonlyArray<number>, candidateScores: ReadonlyArray<number>, opts?: BeatsOptions): BeatsResult;
@@ -0,0 +1,153 @@
1
+ // Wave 3 Step 2 — variance handling for candidate selection.
2
+ //
3
+ // Per locked design:
4
+ // - temperature=0, N=3-5 re-runs on candidates that beat parent
5
+ // - accept iff non-overlapping 95% CIs OR paired Wilcoxon p<0.1
6
+ // - Occam tiebreak: on numerical tie, shorter prompt wins.
7
+ //
8
+ // Wilcoxon is the small-sample non-parametric paired test. We compute
9
+ // the two-sided p-value via a normal-approx of the signed-rank
10
+ // statistic with continuity correction — good enough for N=3..20.
11
+ // At very small N (≤5) the normal approx is loose; runner falls back
12
+ // on the CI check, so loose p is OK here.
13
+ export function mean(xs) {
14
+ if (xs.length === 0)
15
+ return 0;
16
+ return xs.reduce((a, b) => a + b, 0) / xs.length;
17
+ }
18
+ export function stdDev(xs) {
19
+ if (xs.length < 2)
20
+ return 0;
21
+ const m = mean(xs);
22
+ const ssq = xs.reduce((acc, x) => acc + (x - m) ** 2, 0);
23
+ return Math.sqrt(ssq / (xs.length - 1));
24
+ }
25
+ export function ci95(xs) {
26
+ if (xs.length === 0)
27
+ return { lower: 0, upper: 0 };
28
+ if (xs.length === 1)
29
+ return { lower: xs[0], upper: xs[0] };
30
+ const m = mean(xs);
31
+ const se = stdDev(xs) / Math.sqrt(xs.length);
32
+ // 1.96 z-score; we don't bother with t-table at N≥3
33
+ const half = 1.96 * se;
34
+ return { lower: m - half, upper: m + half };
35
+ }
36
+ function rankWithTies(absDiffs) {
37
+ const indexed = absDiffs.map((v, i) => ({ v, i }));
38
+ indexed.sort((a, b) => a.v - b.v);
39
+ const ranks = new Array(absDiffs.length).fill(0);
40
+ let i = 0;
41
+ while (i < indexed.length) {
42
+ let j = i;
43
+ while (j + 1 < indexed.length && indexed[j + 1].v === indexed[i].v)
44
+ j++;
45
+ const avg = (i + j) / 2 + 1; // 1-indexed average rank
46
+ for (let k = i; k <= j; k++)
47
+ ranks[indexed[k].i] = avg;
48
+ i = j + 1;
49
+ }
50
+ return ranks;
51
+ }
52
+ function normalCdf(z) {
53
+ // Abramowitz & Stegun 7.1.26 approximation
54
+ const t = 1 / (1 + 0.2316419 * Math.abs(z));
55
+ const d = 0.39894228 * Math.exp(-(z * z) / 2);
56
+ const p = d *
57
+ t *
58
+ (0.31938153 +
59
+ t *
60
+ (-0.356563782 +
61
+ t * (1.781477937 + t * (-1.821255978 + t * 1.330274429))));
62
+ return z > 0 ? 1 - p : p;
63
+ }
64
+ export function wilcoxonSignedRankP(parent, candidate) {
65
+ if (parent.length !== candidate.length) {
66
+ throw new Error("wilcoxonSignedRankP: paired samples must have equal length");
67
+ }
68
+ const diffs = [];
69
+ for (let i = 0; i < parent.length; i++) {
70
+ const d = candidate[i] - parent[i];
71
+ if (d !== 0)
72
+ diffs.push(d);
73
+ }
74
+ if (diffs.length === 0)
75
+ return 1;
76
+ const ranks = rankWithTies(diffs.map(Math.abs));
77
+ const wPlus = ranks.reduce((acc, r, i) => acc + (diffs[i] > 0 ? r : 0), 0);
78
+ const wMinus = ranks.reduce((acc, r, i) => acc + (diffs[i] < 0 ? r : 0), 0);
79
+ const w = Math.min(wPlus, wMinus);
80
+ const n = diffs.length;
81
+ const mu = (n * (n + 1)) / 4;
82
+ const sigma = Math.sqrt((n * (n + 1) * (2 * n + 1)) / 24);
83
+ if (sigma === 0)
84
+ return 1;
85
+ const z = (w - mu + 0.5) / sigma; // continuity correction
86
+ return 2 * normalCdf(-Math.abs(z));
87
+ }
88
+ export function candidateBeatsParent(parentScores, candidateScores, opts = {}) {
89
+ const pMean = parentScores.length ? mean(parentScores) : 0;
90
+ const cMean = candidateScores.length ? mean(candidateScores) : 0;
91
+ // Empty-side guard: cannot promote a candidate that has no evaluable scores
92
+ // (all task evals failed asymmetrically). Refuse rather than crash on
93
+ // downstream stats. Parent-empty is similarly meaningless.
94
+ if (parentScores.length === 0 || candidateScores.length === 0) {
95
+ return {
96
+ beats: false,
97
+ reason: `inconclusive — empty score array (parent=${parentScores.length}, candidate=${candidateScores.length})`,
98
+ parentMean: pMean,
99
+ candidateMean: cMean,
100
+ };
101
+ }
102
+ const pCI = ci95(parentScores);
103
+ const cCI = ci95(candidateScores);
104
+ const pThresh = opts.pThreshold ?? 0.1;
105
+ // CI disjoint and candidate higher → win.
106
+ if (cCI.lower > pCI.upper) {
107
+ return {
108
+ beats: true,
109
+ reason: "non-overlapping 95% CIs (candidate higher)",
110
+ parentMean: pMean,
111
+ candidateMean: cMean,
112
+ parentCI: pCI,
113
+ candidateCI: cCI,
114
+ };
115
+ }
116
+ // Wilcoxon — only meaningful if candidate mean ≥ parent AND arrays are
117
+ // paired (equal length). When real-eval evaluator hits asymmetric per-task
118
+ // failures (e.g. one task crashed on the candidate side but not the parent),
119
+ // skip the paired stat and let the next gate (Occam tiebreak / no-op) decide.
120
+ if (cMean > pMean && parentScores.length === candidateScores.length && parentScores.length >= 2) {
121
+ const p = wilcoxonSignedRankP(parentScores, candidateScores);
122
+ if (p < pThresh) {
123
+ return {
124
+ beats: true,
125
+ reason: `Wilcoxon p=${p.toFixed(3)} < ${pThresh}`,
126
+ parentMean: pMean,
127
+ candidateMean: cMean,
128
+ wilcoxonP: p,
129
+ };
130
+ }
131
+ }
132
+ // Occam tiebreak — strict tie on means + shorter prompt → win.
133
+ if (cMean === pMean &&
134
+ opts.parentTokens !== undefined &&
135
+ opts.candidateTokens !== undefined &&
136
+ opts.candidateTokens < opts.parentTokens) {
137
+ return {
138
+ beats: true,
139
+ reason: "Occam tiebreak: shorter prompt at equal score",
140
+ parentMean: pMean,
141
+ candidateMean: cMean,
142
+ };
143
+ }
144
+ return {
145
+ beats: false,
146
+ reason: "no significant improvement",
147
+ parentMean: pMean,
148
+ candidateMean: cMean,
149
+ parentCI: pCI,
150
+ candidateCI: cCI,
151
+ };
152
+ }
153
+ //# sourceMappingURL=pareto.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pareto.js","sourceRoot":"","sources":["../../src/evolve/pareto.ts"],"names":[],"mappings":"AAAA,6DAA6D;AAC7D,EAAE;AACF,qBAAqB;AACrB,kEAAkE;AAClE,kEAAkE;AAClE,6DAA6D;AAC7D,EAAE;AACF,sEAAsE;AACtE,+DAA+D;AAC/D,kEAAkE;AAClE,qEAAqE;AACrE,0CAA0C;AAO1C,MAAM,UAAU,IAAI,CAAC,EAAyB;IAC5C,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC9B,OAAO,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC;AACnD,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,EAAyB;IAC9C,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAC5B,MAAM,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC;IACnB,MAAM,GAAG,GAAG,EAAE,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IACzD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,IAAI,CAAC,EAAyB;IAC5C,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IACnD,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,CAAE,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,CAAE,EAAE,CAAC;IAC7D,MAAM,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC;IACnB,MAAM,EAAE,GAAG,MAAM,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;IAC7C,oDAAoD;IACpD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAE,CAAC;IACvB,OAAO,EAAE,KAAK,EAAE,CAAC,GAAG,IAAI,EAAE,KAAK,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC;AAC9C,CAAC;AAED,SAAS,YAAY,CAAC,QAA+B;IACnD,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAClC,MAAM,KAAK,GAAG,IAAI,KAAK,CAAS,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzD,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAC1B,IAAI,CAAC,GAAG,CAAC,CAAC;QACV,OAAO,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC,KAAK,OAAO,CAAC,CAAC,CAAE,CAAC,CAAC;YAAE,CAAC,EAAE,CAAC;QAC1E,MAAM,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,yBAAyB;QACtD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;YAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QACxD,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACZ,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,SAAS,CAAC,CAAS;IAC1B,2CAA2C;IAC3C,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5C,MAAM,CAAC,GAAG,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,CAAC,GACL,CAAC;QACD,CAAC;QACD,CAAC,UAAU;YACT,CAAC;gBACC,CAAC,CAAC,WAAW;oBACX,CAAC,GAAG,CAAC,WAAW,GAAG,CAAC,GAAG,CAAC,CAAC,WAAW,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC3B,CAAC;AAED,MAAM,UAAU,mBAAmB,CACjC,MAA6B,EAC7B,SAAgC;IAEhC,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,CAAC,MAAM,EAAE,CAAC;QACvC,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;IAChF,CAAC;IACD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,CAAC,GAAG,SAAS,CAAC,CAAC,CAAE,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QACrC,IAAI,CAAC,KAAK,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC;IACD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACjC,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC5E,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7E,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IAClC,MAAM,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,MAAM,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAC7B,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;IAC1D,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC1B,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,wBAAwB;IAC1D,OAAO,CAAC,GAAG,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AACrC,CAAC;AAmBD,MAAM,UAAU,oBAAoB,CAClC,YAAmC,EACnC,eAAsC,EACtC,OAAqB,EAAE;IAEvB,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACjE,4EAA4E;IAC5E,sEAAsE;IACtE,2DAA2D;IAC3D,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9D,OAAO;YACL,KAAK,EAAE,KAAK;YACZ,MAAM,EAAE,4CAA4C,YAAY,CAAC,MAAM,eAAe,eAAe,CAAC,MAAM,GAAG;YAC/G,UAAU,EAAE,KAAK;YACjB,aAAa,EAAE,KAAK;SACrB,CAAC;IACJ,CAAC;IACD,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,CAAC,CAAC;IAC/B,MAAM,GAAG,GAAG,IAAI,CAAC,eAAe,CAAC,CAAC;IAClC,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,IAAI,GAAG,CAAC;IAEvC,0CAA0C;IAC1C,IAAI,GAAG,CAAC,KAAK,GAAG,GAAG,CAAC,KAAK,EAAE,CAAC;QAC1B,OAAO;YACL,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,4CAA4C;YACpD,UAAU,EAAE,KAAK;YACjB,aAAa,EAAE,KAAK;YACpB,QAAQ,EAAE,GAAG;YACb,WAAW,EAAE,GAAG;SACjB,CAAC;IACJ,CAAC;IACD,uEAAuE;IACvE,2EAA2E;IAC3E,6EAA6E;IAC7E,8EAA8E;IAC9E,IAAI,KAAK,GAAG,KAAK,IAAI,YAAY,CAAC,MAAM,KAAK,eAAe,CAAC,MAAM,IAAI,YAAY,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAChG,MAAM,CAAC,GAAG,mBAAmB,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;QAC7D,IAAI,CAAC,GAAG,OAAO,EAAE,CAAC;YAChB,OAAO;gBACL,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,cAAc,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,OAAO,EAAE;gBACjD,UAAU,EAAE,KAAK;gBACjB,aAAa,EAAE,KAAK;gBACpB,SAAS,EAAE,CAAC;aACb,CAAC;QACJ,CAAC;IACH,CAAC;IACD,+DAA+D;IAC/D,IACE,KAAK,KAAK,KAAK;QACf,IAAI,CAAC,YAAY,KAAK,SAAS;QAC/B,IAAI,CAAC,eAAe,KAAK,SAAS;QAClC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,YAAY,EACxC,CAAC;QACD,OAAO;YACL,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,+CAA+C;YACvD,UAAU,EAAE,KAAK;YACjB,aAAa,EAAE,KAAK;SACrB,CAAC;IACJ,CAAC;IACD,OAAO;QACL,KAAK,EAAE,KAAK;QACZ,MAAM,EAAE,4BAA4B;QACpC,UAAU,EAAE,KAAK;QACjB,aAAa,EAAE,KAAK;QACpB,QAAQ,EAAE,GAAG;QACb,WAAW,EAAE,GAAG;KACjB,CAAC;AACJ,CAAC"}
@@ -0,0 +1,18 @@
1
+ export declare const DEFAULT_PLATEAU_ROUNDS = 5;
2
+ export declare const DEFAULT_MIN_DELTA = 0.01;
3
+ export interface PlateauConfig {
4
+ rounds?: number;
5
+ minDelta?: number;
6
+ }
7
+ export declare class PlateauDetector {
8
+ private readonly rounds;
9
+ private readonly minDelta;
10
+ private last;
11
+ private flat;
12
+ constructor(cfg?: PlateauConfig);
13
+ observe(score: number): void;
14
+ get consecutiveFlatRounds(): number;
15
+ shouldStop(): boolean;
16
+ /** Final gate — runner calls this with the N=3 re-run survival flag. */
17
+ terminate(reSurvived: boolean): boolean;
18
+ }
@@ -0,0 +1,45 @@
1
+ // Wave 3 Step 2 — plateau-stop detector for the evolve loop.
2
+ //
3
+ // Locked rule: stop after `rounds` consecutive holdout observations
4
+ // that fail to improve by `minDelta`. Defaults: 5 rounds × 0.01 delta.
5
+ // Termination additionally requires `reSurvived` — an N=3 re-run on
6
+ // the best candidate must survive — so the runner gets a second
7
+ // sanity check before declaring done.
8
+ export const DEFAULT_PLATEAU_ROUNDS = 5;
9
+ export const DEFAULT_MIN_DELTA = 0.01;
10
+ export class PlateauDetector {
11
+ rounds;
12
+ minDelta;
13
+ last = null;
14
+ flat = 0;
15
+ constructor(cfg = {}) {
16
+ this.rounds = cfg.rounds ?? DEFAULT_PLATEAU_ROUNDS;
17
+ this.minDelta = cfg.minDelta ?? DEFAULT_MIN_DELTA;
18
+ }
19
+ observe(score) {
20
+ if (this.last === null) {
21
+ this.last = score;
22
+ this.flat = 0;
23
+ return;
24
+ }
25
+ const delta = score - this.last;
26
+ if (delta >= this.minDelta) {
27
+ this.flat = 0;
28
+ }
29
+ else {
30
+ this.flat += 1;
31
+ }
32
+ this.last = score;
33
+ }
34
+ get consecutiveFlatRounds() {
35
+ return this.flat;
36
+ }
37
+ shouldStop() {
38
+ return this.flat >= this.rounds;
39
+ }
40
+ /** Final gate — runner calls this with the N=3 re-run survival flag. */
41
+ terminate(reSurvived) {
42
+ return this.shouldStop() && reSurvived;
43
+ }
44
+ }
45
+ //# sourceMappingURL=plateau.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"plateau.js","sourceRoot":"","sources":["../../src/evolve/plateau.ts"],"names":[],"mappings":"AAAA,6DAA6D;AAC7D,EAAE;AACF,oEAAoE;AACpE,uEAAuE;AACvE,oEAAoE;AACpE,gEAAgE;AAChE,sCAAsC;AAEtC,MAAM,CAAC,MAAM,sBAAsB,GAAG,CAAC,CAAC;AACxC,MAAM,CAAC,MAAM,iBAAiB,GAAG,IAAI,CAAC;AAOtC,MAAM,OAAO,eAAe;IACT,MAAM,CAAS;IACf,QAAQ,CAAS;IAC1B,IAAI,GAAkB,IAAI,CAAC;IAC3B,IAAI,GAAG,CAAC,CAAC;IAEjB,YAAY,MAAqB,EAAE;QACjC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,sBAAsB,CAAC;QACnD,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,IAAI,iBAAiB,CAAC;IACpD,CAAC;IAED,OAAO,CAAC,KAAa;QACnB,IAAI,IAAI,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;YACvB,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;YAClB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;YACd,OAAO;QACT,CAAC;QACD,MAAM,KAAK,GAAG,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC;QAChC,IAAI,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC3B,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;QAChB,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,IAAI,IAAI,CAAC,CAAC;QACjB,CAAC;QACD,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC;IACpB,CAAC;IAED,IAAI,qBAAqB;QACvB,OAAO,IAAI,CAAC,IAAI,CAAC;IACnB,CAAC;IAED,UAAU;QACR,OAAO,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,CAAC;IAClC,CAAC;IAED,wEAAwE;IACxE,SAAS,CAAC,UAAmB;QAC3B,OAAO,IAAI,CAAC,UAAU,EAAE,IAAI,UAAU,CAAC;IACzC,CAAC;CACF"}
@@ -0,0 +1,12 @@
1
+ import type { IProvider } from "../providers/IProvider.js";
2
+ export interface PostflightInput {
3
+ ecosystem: string;
4
+ candidate: string;
5
+ }
6
+ export interface Verdict {
7
+ accept: boolean;
8
+ reason: string;
9
+ }
10
+ export declare function buildPostflightPrompt(input: PostflightInput): string;
11
+ export declare function parseVerdict(reply: string): Verdict;
12
+ export declare function runPostflight(provider: IProvider, input: PostflightInput): Promise<Verdict>;
@@ -0,0 +1,61 @@
1
+ // Wave 3 Step 2 — postflight candidate validation via `agy` CLI.
2
+ //
3
+ // Runs AFTER the evolve loop picks a winning candidate. Asks the
4
+ // research LM (Gemini 3.1 via agy) to check the candidate against
5
+ // the preflight ecosystem snapshot, looking for references to
6
+ // deprecated APIs or now-stale best practices.
7
+ //
8
+ // Fail-safe: any non-ACCEPT (ambiguous reply, provider error, missing
9
+ // verdict) is treated as REJECT. The /evolve command keeps the prior
10
+ // `*.candidate.md` for manual review rather than promoting a maybe-bad
11
+ // candidate.
12
+ import { s2DenyPrefilter } from "./s2-deny.js";
13
+ export function buildPostflightPrompt(input) {
14
+ return [
15
+ "You are a postflight validator. Decide whether a self-evolved skill",
16
+ "candidate is safe to promote, given the current ecosystem snapshot.",
17
+ "",
18
+ "## Ecosystem snapshot",
19
+ input.ecosystem || "(none — preflight failed)",
20
+ "",
21
+ "## Candidate skill",
22
+ input.candidate,
23
+ "",
24
+ "Reject if the candidate references APIs / patterns the snapshot",
25
+ "lists as deprecated or removed. Otherwise accept.",
26
+ "",
27
+ "Reply with the FIRST line being exactly `VERDICT: ACCEPT` or",
28
+ "`VERDICT: REJECT`. Next line(s): one-sentence reason.",
29
+ ].join("\n");
30
+ }
31
+ const VERDICT_RE = /^\s*VERDICT:\s*(ACCEPT|REJECT)\b/im;
32
+ export function parseVerdict(reply) {
33
+ const m = reply.match(VERDICT_RE);
34
+ if (!m)
35
+ return { accept: false, reason: "no verdict marker in reply (fail-safe REJECT)" };
36
+ const accept = m[1].toUpperCase() === "ACCEPT";
37
+ const reason = reply.replace(VERDICT_RE, "").trim() || (accept ? "no reason given" : "rejected");
38
+ return { accept, reason };
39
+ }
40
+ export async function runPostflight(provider, input) {
41
+ // ADR 0003 S2: deterministic deny-pattern prefilter runs BEFORE the LLM
42
+ // researcher. Short-circuits on any escalation primitive or boundary-
43
+ // bypass directive — the LLM cannot be prompt-injected to accept these
44
+ // because the LLM is never called on these candidates.
45
+ const s2 = s2DenyPrefilter(input.candidate);
46
+ if (!s2.allowed) {
47
+ return { accept: false, reason: s2.reason ?? "S2 deny-pattern matched" };
48
+ }
49
+ const prompt = buildPostflightPrompt(input);
50
+ try {
51
+ const out = await provider.execute({ prompt });
52
+ return parseVerdict(out.text);
53
+ }
54
+ catch (err) {
55
+ return {
56
+ accept: false,
57
+ reason: err instanceof Error ? err.message : String(err),
58
+ };
59
+ }
60
+ }
61
+ //# sourceMappingURL=postflight.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"postflight.js","sourceRoot":"","sources":["../../src/evolve/postflight.ts"],"names":[],"mappings":"AAAA,iEAAiE;AACjE,EAAE;AACF,iEAAiE;AACjE,kEAAkE;AAClE,8DAA8D;AAC9D,+CAA+C;AAC/C,EAAE;AACF,sEAAsE;AACtE,qEAAqE;AACrE,uEAAuE;AACvE,aAAa;AAGb,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAY/C,MAAM,UAAU,qBAAqB,CAAC,KAAsB;IAC1D,OAAO;QACL,qEAAqE;QACrE,qEAAqE;QACrE,EAAE;QACF,uBAAuB;QACvB,KAAK,CAAC,SAAS,IAAI,2BAA2B;QAC9C,EAAE;QACF,oBAAoB;QACpB,KAAK,CAAC,SAAS;QACf,EAAE;QACF,iEAAiE;QACjE,mDAAmD;QACnD,EAAE;QACF,8DAA8D;QAC9D,uDAAuD;KACxD,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,MAAM,UAAU,GAAG,oCAAoC,CAAC;AAExD,MAAM,UAAU,YAAY,CAAC,KAAa;IACxC,MAAM,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IAClC,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,+CAA+C,EAAE,CAAC;IAC1F,MAAM,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,KAAK,QAAQ,CAAC;IAChD,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;IACjG,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;AAC5B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAmB,EACnB,KAAsB;IAEtB,wEAAwE;IACxE,sEAAsE;IACtE,uEAAuE;IACvE,uDAAuD;IACvD,MAAM,EAAE,GAAG,eAAe,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAC5C,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC;QAChB,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,MAAM,IAAI,yBAAyB,EAAE,CAAC;IAC3E,CAAC;IACD,MAAM,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAC5C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/C,OAAO,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;SACzD,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,13 @@
1
+ import type { IProvider } from "../providers/IProvider.js";
2
+ export interface PreflightInput {
3
+ skillDomain: string;
4
+ lastEvolveDate?: string;
5
+ }
6
+ export interface PreflightSnapshot {
7
+ text: string;
8
+ prompt: string;
9
+ fetchedAt: string;
10
+ error?: string;
11
+ }
12
+ export declare function buildPreflightPrompt(input: PreflightInput): string;
13
+ export declare function runPreflight(provider: IProvider, input: PreflightInput): Promise<PreflightSnapshot>;