darwin-agents 0.6.0-alpha.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +97 -0
  2. package/README.md +43 -1
  3. package/dist/src/cli/run.js +1 -1
  4. package/dist/src/cli/run.js.map +1 -1
  5. package/dist/src/evolution/alignment.d.ts +44 -0
  6. package/dist/src/evolution/alignment.d.ts.map +1 -1
  7. package/dist/src/evolution/alignment.js +124 -0
  8. package/dist/src/evolution/alignment.js.map +1 -1
  9. package/dist/src/evolution/loop.d.ts +56 -0
  10. package/dist/src/evolution/loop.d.ts.map +1 -1
  11. package/dist/src/evolution/loop.js +168 -16
  12. package/dist/src/evolution/loop.js.map +1 -1
  13. package/dist/src/evolution/multi-critic.d.ts +30 -1
  14. package/dist/src/evolution/multi-critic.d.ts.map +1 -1
  15. package/dist/src/evolution/multi-critic.js +57 -2
  16. package/dist/src/evolution/multi-critic.js.map +1 -1
  17. package/dist/src/evolution/optimizer-gepa.d.ts +47 -4
  18. package/dist/src/evolution/optimizer-gepa.d.ts.map +1 -1
  19. package/dist/src/evolution/optimizer-gepa.js +44 -5
  20. package/dist/src/evolution/optimizer-gepa.js.map +1 -1
  21. package/dist/src/evolution/pareto.d.ts +98 -3
  22. package/dist/src/evolution/pareto.d.ts.map +1 -1
  23. package/dist/src/evolution/pareto.js +193 -30
  24. package/dist/src/evolution/pareto.js.map +1 -1
  25. package/dist/src/evolution/safety.d.ts +35 -1
  26. package/dist/src/evolution/safety.d.ts.map +1 -1
  27. package/dist/src/evolution/safety.js +56 -2
  28. package/dist/src/evolution/safety.js.map +1 -1
  29. package/dist/src/evolution/sequential.d.ts +149 -0
  30. package/dist/src/evolution/sequential.d.ts.map +1 -0
  31. package/dist/src/evolution/sequential.js +239 -0
  32. package/dist/src/evolution/sequential.js.map +1 -0
  33. package/dist/src/evolution/tracker.d.ts +12 -0
  34. package/dist/src/evolution/tracker.d.ts.map +1 -1
  35. package/dist/src/evolution/tracker.js +24 -0
  36. package/dist/src/evolution/tracker.js.map +1 -1
  37. package/dist/src/index.d.ts +5 -3
  38. package/dist/src/index.d.ts.map +1 -1
  39. package/dist/src/index.js +10 -3
  40. package/dist/src/index.js.map +1 -1
  41. package/dist/src/types.d.ts +84 -0
  42. package/dist/src/types.d.ts.map +1 -1
  43. package/dist/src/types.js.map +1 -1
  44. package/package.json +1 -1
@@ -6,6 +6,7 @@
6
6
  * rollback triggers, and A/B test evaluation rules.
7
7
  */
8
8
  import { DEFAULT_SAFETY } from '../types.js';
9
+ import { msprtTwoSample, hoeffdingTwoSample } from './sequential.js';
9
10
  /** Default minRuns range for dynamic sizing */
10
11
  const DYNAMIC_MIN_RUNS_FLOOR = 10;
11
12
  const DYNAMIC_MIN_RUNS_CEIL = 30;
@@ -21,6 +22,17 @@ export class SafetyGate {
21
22
  canEvolve(_agentName, stats) {
22
23
  return stats.totalRuns >= this.thresholds.minDataPoints;
23
24
  }
25
+ /**
26
+ * v0.7.0 — True iff the peeking guard is configured to use a sequential
27
+ * method (mSPRT / Hoeffding), which needs the per-arm composite samples.
28
+ * The loop calls this to decide whether to load that (slightly more
29
+ * expensive) per-sample data before calling {@link evaluateABTest}.
30
+ */
31
+ usesSequentialConfidence() {
32
+ return (this.thresholds.requireConfidence === true &&
33
+ (this.thresholds.confidenceMethod === 'msprt' ||
34
+ this.thresholds.confidenceMethod === 'hoeffding'));
35
+ }
24
36
  /**
25
37
  * Check whether score B is NOT a regression beyond the allowed threshold.
26
38
  *
@@ -58,7 +70,7 @@ export class SafetyGate {
58
70
  * @param overrideMinRuns — Per-test minimum runs (from ABTest.minRuns).
59
71
  * Falls back to SafetyThresholds.minDataPoints if not provided.
60
72
  */
61
- evaluateABTest(compositeA, compositeB, runsA, runsB, failsA = 0, failsB = 0, overrideMinRuns) {
73
+ evaluateABTest(compositeA, compositeB, runsA, runsB, failsA = 0, failsB = 0, overrideMinRuns, samples) {
62
74
  const minRuns = overrideMinRuns ?? this.thresholds.minDataPoints;
63
75
  const totalA = runsA + failsA;
64
76
  const totalB = runsB + failsB;
@@ -113,7 +125,7 @@ export class SafetyGate {
113
125
  // tie-break below so the test still terminates (an early 'continue' here
114
126
  // would loop forever on a persistent small-margin challenger).
115
127
  if (!this.thresholds.requireConfidence ||
116
- this.meetsConfidence(adjustedA, adjustedB, runsA, runsB, minRuns)) {
128
+ this.isConfident(adjustedA, adjustedB, runsA, runsB, minRuns, marginOutcome, samples)) {
117
129
  return marginOutcome;
118
130
  }
119
131
  }
@@ -165,6 +177,48 @@ export class SafetyGate {
165
177
  const effectSize = Math.abs(scoreA - scoreB) / pooled;
166
178
  return effectSize >= 0.2 && runsA + runsB >= minRuns * 2;
167
179
  }
180
+ /**
181
+ * v0.7.0 — Dispatch the peeking-resistant confidence gate to the
182
+ * configured {@link SafetyThresholds.confidenceMethod}.
183
+ *
184
+ * - `'effect-size'` (default): the v0.6.0 heuristic ({@link meetsConfidence}).
185
+ * Byte-for-byte unchanged when no method is set.
186
+ * - `'msprt'` / `'hoeffding'`: an always-valid sequential test over the
187
+ * RAW per-arm composite samples (reliability is already handled by the
188
+ * auto-loss rule upstream, so the statistical test uses the unadjusted
189
+ * scores). The verdict must be `decisive` AND point in the SAME
190
+ * direction as the score margin — a sequential test that fires for the
191
+ * opposite arm does not confirm this margin.
192
+ *
193
+ * Falls back to the effect-size heuristic when a sequential method is set
194
+ * but no per-sample data was supplied (graceful — never throws).
195
+ */
196
+ isConfident(adjustedA, adjustedB, runsA, runsB, minRuns, marginOutcome, samples) {
197
+ const method = this.thresholds.confidenceMethod ?? 'effect-size';
198
+ if (method === 'effect-size' || !samples) {
199
+ return this.meetsConfidence(adjustedA, adjustedB, runsA, runsB, minRuns);
200
+ }
201
+ const opts = {
202
+ alpha: this.thresholds.confidenceAlpha,
203
+ minSamplesPerArm: this.thresholds.confidenceMinSamples,
204
+ };
205
+ const verdict = method === 'hoeffding'
206
+ ? hoeffdingTwoSample(samples.a, samples.b, {
207
+ ...opts,
208
+ lo: this.thresholds.confidenceScoreRange?.[0],
209
+ hi: this.thresholds.confidenceScoreRange?.[1],
210
+ })
211
+ : msprtTwoSample(samples.a, samples.b, {
212
+ ...opts,
213
+ tau: this.thresholds.confidenceTau,
214
+ });
215
+ if (!verdict.decisive)
216
+ return false;
217
+ // The sequential test must confirm the SAME winner as the score margin.
218
+ // direction +1 = B>A (b_wins), −1 = A>B (a_wins).
219
+ const expected = marginOutcome === 'b_wins' ? 1 : -1;
220
+ return verdict.direction === expected;
221
+ }
168
222
  /**
169
223
  * Compute dynamic minRuns based on observed quality score variance.
170
224
  *
@@ -1 +1 @@
1
- {"version":3,"file":"safety.js","sourceRoot":"","sources":["../../../src/evolution/safety.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAW7C,+CAA+C;AAC/C,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAClC,MAAM,qBAAqB,GAAG,EAAE,CAAC;AAEjC,MAAM,OAAO,UAAU;IACb,UAAU,CAAmB;IAErC,YAAY,aAA+B,cAAc;QACvD,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED;;;OAGG;IACH,SAAS,CAAC,UAAkB,EAAE,KAAyB;QACrD,OAAO,KAAK,CAAC,SAAS,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;IAC1D,CAAC;IAED;;;;;;;;;OASG;IACH,eAAe,CAAC,MAAc,EAAE,MAAc;QAC5C,8DAA8D;QAC9D,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;YAChB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;QACxC,OAAO,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;IAC/C,CAAC;IAED;;;OAGG;IACH,cAAc,CAAC,mBAA2B;QACxC,OAAO,mBAAmB,IAAI,IAAI,CAAC,UAAU,CAAC,wBAAwB,CAAC;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,cAAc,CACZ,UAAkB,EAClB,UAAkB,EAClB,KAAa,EACb,KAAa,EACb,SAAiB,CAAC,EAClB,SAAiB,CAAC,EAClB,eAAwB;QAExB,MAAM,OAAO,GAAG,eAAe,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QACjE,MAAM,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;QAC9B,MAAM,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;QAE9B,mFAAmF;QACnF,MAAM,yBAAyB,GAAG,CAAC,CAAC;QACpC,IAAI,MAAM,IAAI,yBAAyB,IAAI,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC;YACjE,OAAO,QAAQ,CAAC,CAAC,kBAAkB;QACrC,CAAC;QACD,IAAI,MAAM,IAAI,yBAAyB,IAAI,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC;YACjE,OAAO,QAAQ,CAAC,CAAC,kBAAkB;QACrC,CAAC;QAED,2DAA2D;QAC3D,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,OAAO,EAAE,CAAC;YACvC,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,MAAM,oBAAoB,GAAG,IAAI,CAAC,CAAC,iCAAiC;QAEpE,qEAAqE;QACrE,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,SAAS,GAAG,UAAU,GAAG,YAAY,CAAC;QAC5C,MAAM,SAAS,GAAG,UAAU,GAAG,YAAY,CAAC;QAE5C,yBAAyB;QACzB,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,yEAAyE;QACzE,IAAI,aAAa,GAAyB,IAAI,CAAC;QAC/C,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,MAAM,GAAG,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,SAAS,CAAC;YACnD,IAAI,MAAM,GAAG,oBAAoB,EAAE,CAAC;gBAClC,aAAa,GAAG,QAAQ,CAAC;YAC3B,CAAC;QACH,CAAC;aAAM,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YACzB,aAAa,GAAG,QAAQ,CAAC;QAC3B,CAAC;QACD,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YAC3B,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBAClB,MAAM,MAAM,GAAG,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,SAAS,CAAC;gBACnD,IAAI,MAAM,GAAG,oBAAoB,EAAE,CAAC;oBAClC,aAAa,GAAG,QAAQ,CAAC;gBAC3B,CAAC;YACH,CAAC;iBAAM,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBACzB,aAAa,GAAG,QAAQ,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YAC3B,uEAAuE;YACvE,wEAAwE;YACxE,uEAAuE;YACvE,yEAAyE;YACzE,+DAA+D;YAC/D,IACE,CAAC,IAAI,CAAC,UAAU,CAAC,iBAAiB;gBAClC,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE,SAAS,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,EACjE,CAAC;gBACD,OAAO,aAAa,CAAC;YACvB,CAAC;QACH,CAAC;QAED,wDAAwD;QACxD,6EAA6E;QAC7E,4EAA4E;QAC5E,6DAA6D;QAC7D,MAAM,cAAc,GAAG,OAAO,GAAG,CAAC,CAAC;QACnC,IAAI,KAAK,IAAI,cAAc,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;YACvD,OAAO,QAAQ,CAAC,CAAC,qEAAqE;QACxF,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;;OAIG;IACH,mBAAmB,CACjB,UAAkB,EAClB,UAAkB,EAClB,KAAa,EACb,KAAa;QAEb,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAE9C,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,OAAO,EAAE,CAAC;YACvC,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;QAC7C,CAAC;QAED,qDAAqD;QACrD,MAAM,MAAM,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAC7C,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;QAC7C,CAAC;QAED,6DAA6D;QAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,MAAM,CAAC;QAE9D,gEAAgE;QAChE,MAAM,YAAY,GAAG,KAAK,GAAG,KAAK,CAAC;QACnC,MAAM,SAAS,GAAG,UAAU,IAAI,GAAG,IAAI,YAAY,IAAI,OAAO,GAAG,CAAC,CAAC;QAEnE,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC;IACnC,CAAC;IAED;;;;;;;OAOG;IACK,eAAe,CACrB,MAAc,EACd,MAAc,EACd,KAAa,EACb,KAAa,EACb,OAAe;QAEf,MAAM,MAAM,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;QACtD,OAAO,UAAU,IAAI,GAAG,IAAI,KAAK,GAAG,KAAK,IAAI,OAAO,GAAG,CAAC,CAAC;IAC3D,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,qBAAqB,CACnB,WAA+B,EAC/B,aAAsB;QAEtB,MAAM,KAAK,GAAG,aAAa,IAAI,sBAAsB,CAAC;QACtD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,qBAAqB,CAAC,CAAC;QAEpD,sDAAsD;QACtD,MAAM,aAAa,GAAG,WAAW;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC;aAClC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;QAE1C,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;QAED,MAAM,IAAI,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAC;QAC7E,kFAAkF;QAClF,oFAAoF;QACpF,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACzG,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEhC,mEAAmE;QACnE,IAAI,GAAG,IAAI,GAAG,EAAE,CAAC;YACf,OAAO,KAAK,CAAC;QACf,CAAC;QAED,qDAAqD;QACrD,IAAI,GAAG,GAAG,GAAG,EAAE,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC;QAED,yDAAyD;QACzD,kCAAkC;QAClC,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,6BAA6B;QAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF"}
1
+ {"version":3,"file":"safety.js","sourceRoot":"","sources":["../../../src/evolution/safety.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAC;AAsBrE,+CAA+C;AAC/C,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAClC,MAAM,qBAAqB,GAAG,EAAE,CAAC;AAEjC,MAAM,OAAO,UAAU;IACb,UAAU,CAAmB;IAErC,YAAY,aAA+B,cAAc;QACvD,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED;;;OAGG;IACH,SAAS,CAAC,UAAkB,EAAE,KAAyB;QACrD,OAAO,KAAK,CAAC,SAAS,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;IAC1D,CAAC;IAED;;;;;OAKG;IACH,wBAAwB;QACtB,OAAO,CACL,IAAI,CAAC,UAAU,CAAC,iBAAiB,KAAK,IAAI;YAC1C,CAAC,IAAI,CAAC,UAAU,CAAC,gBAAgB,KAAK,OAAO;gBAC3C,IAAI,CAAC,UAAU,CAAC,gBAAgB,KAAK,WAAW,CAAC,CACpD,CAAC;IACJ,CAAC;IAED;;;;;;;;;OASG;IACH,eAAe,CAAC,MAAc,EAAE,MAAc;QAC5C,8DAA8D;QAC9D,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;YAChB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;QACxC,OAAO,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;IAC/C,CAAC;IAED;;;OAGG;IACH,cAAc,CAAC,mBAA2B;QACxC,OAAO,mBAAmB,IAAI,IAAI,CAAC,UAAU,CAAC,wBAAwB,CAAC;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,cAAc,CACZ,UAAkB,EAClB,UAAkB,EAClB,KAAa,EACb,KAAa,EACb,SAAiB,CAAC,EAClB,SAAiB,CAAC,EAClB,eAAwB,EACxB,OAAuB;QAEvB,MAAM,OAAO,GAAG,eAAe,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QACjE,MAAM,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;QAC9B,MAAM,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;QAE9B,mFAAmF;QACnF,MAAM,yBAAyB,GAAG,CAAC,CAAC;QACpC,IAAI,MAAM,IAAI,yBAAyB,IAAI,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC;YACjE,OAAO,QAAQ,CAAC,CAAC,kBAAkB;QACrC,CAAC;QACD,IAAI,MAAM,IAAI,yBAAyB,IAAI,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC;YACjE,OAAO,QAAQ,CAAC,CAAC,kBAAkB;QACrC,CAAC;QAED,2DAA2D;QAC3D,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,OAAO,EAAE,CAAC;YACvC,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,MAAM,oBAAoB,GAAG,IAAI,CAAC,CAAC,iCAAiC;QAEpE,qEAAqE;QACrE,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,SAAS,GAAG,UAAU,GAAG,YAAY,CAAC;QAC5C,MAAM,SAAS,GAAG,UAAU,GAAG,YAAY,CAAC;QAE5C,yBAAyB;QACzB,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,yEAAyE;QACzE,IAAI,aAAa,GAAyB,IAAI,CAAC;QAC/C,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,MAAM,GAAG,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,SAAS,CAAC;YACnD,IAAI,MAAM,GAAG,oBAAoB,EAAE,CAAC;gBAClC,aAAa,GAAG,QAAQ,CAAC;YAC3B,CAAC;QACH,CAAC;aAAM,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YACzB,aAAa,GAAG,QAAQ,CAAC;QAC3B,CAAC;QACD,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YAC3B,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBAClB,MAAM,MAAM,GAAG,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,SAAS,CAAC;gBACnD,IAAI,MAAM,GAAG,oBAAoB,EAAE,CAAC;oBAClC,aAAa,GAAG,QAAQ,CAAC;gBAC3B,CAAC;YACH,CAAC;iBAAM,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;gBACzB,aAAa,GAAG,QAAQ,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YAC3B,uEAAuE;YACvE,wEAAwE;YACxE,uEAAuE;YACvE,yEAAyE;YACzE,+DAA+D;YAC/D,IACE,CAAC,IAAI,CAAC,UAAU,CAAC,iBAAiB;gBAClC,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE,SAAS,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,aAAa,EAAE,OAAO,CAAC,EACrF,CAAC;gBACD,OAAO,aAAa,CAAC;YACvB,CAAC;QACH,CAAC;QAED,wDAAwD;QACxD,6EAA6E;QAC7E,4EAA4E;QAC5E,6DAA6D;QAC7D,MAAM,cAAc,GAAG,OAAO,GAAG,CAAC,CAAC;QACnC,IAAI,KAAK,IAAI,cAAc,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;YACvD,OAAO,QAAQ,CAAC,CAAC,qEAAqE;QACxF,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;;OAIG;IACH,mBAAmB,CACjB,UAAkB,EAClB,UAAkB,EAClB,KAAa,EACb,KAAa;QAEb,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAE9C,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,OAAO,EAAE,CAAC;YACvC,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;QAC7C,CAAC;QAED,qDAAqD;QACrD,MAAM,MAAM,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAC7C,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;QAC7C,CAAC;QAED,6DAA6D;QAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,MAAM,CAAC;QAE9D,gEAAgE;QAChE,MAAM,YAAY,GAAG,KAAK,GAAG,KAAK,CAAC;QACnC,MAAM,SAAS,GAAG,UAAU,IAAI,GAAG,IAAI,YAAY,IAAI,OAAO,GAAG,CAAC,CAAC;QAEnE,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC;IACnC,CAAC;IAED;;;;;;;OAOG;IACK,eAAe,CACrB,MAAc,EACd,MAAc,EACd,KAAa,EACb,KAAa,EACb,OAAe;QAEf,MAAM,MAAM,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;QACtD,OAAO,UAAU,IAAI,GAAG,IAAI,KAAK,GAAG,KAAK,IAAI,OAAO,GAAG,CAAC,CAAC;IAC3D,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACK,WAAW,CACjB,SAAiB,EACjB,SAAiB,EACjB,KAAa,EACb,KAAa,EACb,OAAe,EACf,aAA4B,EAC5B,OAAuB;QAEvB,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,gBAAgB,IAAI,aAAa,CAAC;QAEjE,IAAI,MAAM,KAAK,aAAa,IAAI,CAAC,OAAO,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC,eAAe,CAAC,SAAS,EAAE,SAAS,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;QAC3E,CAAC;QAED,MAAM,IAAI,GAAG;YACX,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,eAAe;YACtC,gBAAgB,EAAE,IAAI,CAAC,UAAU,CAAC,oBAAoB;SACvD,CAAC;QAEF,MAAM,OAAO,GACX,MAAM,KAAK,WAAW;YACpB,CAAC,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,EAAE;gBACvC,GAAG,IAAI;gBACP,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAAC;gBAC7C,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAAC;aAC9C,CAAC;YACJ,CAAC,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,EAAE;gBACnC,GAAG,IAAI;gBACP,GAAG,EAAE,IAAI,CAAC,UAAU,CAAC,aAAa;aACnC,CAAC,CAAC;QAET,IAAI,CAAC,OAAO,CAAC,QAAQ;YAAE,OAAO,KAAK,CAAC;QACpC,wEAAwE;QACxE,kDAAkD;QAClD,MAAM,QAAQ,GAAG,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,OAAO,OAAO,CAAC,SAAS,KAAK,QAAQ,CAAC;IACxC,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,qBAAqB,CACnB,WAA+B,EAC/B,aAAsB;QAEtB,MAAM,KAAK,GAAG,aAAa,IAAI,sBAAsB,CAAC;QACtD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,qBAAqB,CAAC,CAAC;QAEpD,sDAAsD;QACtD,MAAM,aAAa,GAAG,WAAW;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC;aAClC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;QAE1C,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;QAED,MAAM,IAAI,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAC;QAC7E,kFAAkF;QAClF,oFAAoF;QACpF,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACzG,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEhC,mEAAmE;QACnE,IAAI,GAAG,IAAI,GAAG,EAAE,CAAC;YACf,OAAO,KAAK,CAAC;QACf,CAAC;QAED,qDAAqD;QACrD,IAAI,GAAG,GAAG,GAAG,EAAE,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC;QAED,yDAAyD;QACzD,kCAAkC;QAClC,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,6BAA6B;QAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF"}
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Darwin — Always-Valid Sequential Testing (v0.7.0)
3
+ *
4
+ * Pure statistical primitives for peeking-resistant A/B decisions during
5
+ * prompt evolution. This module exists because Darwin's safety gate calls
6
+ * `evaluateABTest` after EVERY run — continuous monitoring with a fixed
7
+ * relative-improvement threshold inflates the false-positive rate (the
8
+ * classic "peeking problem"). v0.6.0 shipped a first-step effect-size
9
+ * heuristic (`SafetyGate.calculateConfidence`, |Δ| / pooled-mean ≥ 0.2);
10
+ * this module is the rigorous upgrade promised in the v0.6 roadmap notes.
11
+ *
12
+ * Two methods, both **always-valid** (the decision stays statistically
13
+ * sound no matter how many times you peek):
14
+ *
15
+ * 1. {@link msprtTwoSample} — Mixture Sequential Probability Ratio Test
16
+ * (Johari, Pekelis & Walsh 2017, arXiv:1512.04922; the engine behind
17
+ * Optimizely/Statsig's "stats engine"). Gaussian mixture prior over
18
+ * the effect size; uses the observed (pooled) variance. Most powerful
19
+ * when the per-arm sample variance is meaningful — i.e. once each arm
20
+ * has accumulated a handful of runs (see {@link MsprtOptions.minSamplesPerArm}).
21
+ *
22
+ * 2. {@link hoeffdingTwoSample} — a σ-free time-uniform confidence
23
+ * sequence for variables bounded to a known range (Darwin composite
24
+ * scores live in [0, 1]). Valid at ANY sample size with no variance
25
+ * estimate, so it is the honest choice when only a few runs exist.
26
+ * More conservative than mSPRT (wider intervals) by design.
27
+ *
28
+ * **Pure** — no LLM calls, no I/O, no `Date.now()`, no `Math.random()`.
29
+ * Fully deterministic, so tests pin exact statistic values.
30
+ *
31
+ * Caveat on warmup (documented, not hidden): mSPRT with an *estimated*
32
+ * variance is only asymptotically always-valid; with very few samples the
33
+ * variance estimate is noisy. Darwin's A/B sample sizes (minRuns 10–30) sit
34
+ * below the ~100-sample comfort zone for tight σ-estimation, so we expose
35
+ * `minSamplesPerArm` (default 5) below which mSPRT abstains (`decisive:false`)
36
+ * rather than fire on noise, and we offer Hoeffding as the σ-free fallback.
37
+ */
38
+ /** Which confidence method the safety gate uses for the peeking guard. */
39
+ export type ConfidenceMethod = "effect-size" | "msprt" | "hoeffding";
40
+ /** Verdict from a sequential test. `decisive` answers "is the gap real?". */
41
+ export interface SequentialVerdict {
42
+ /** True iff the test crossed its always-valid threshold (reject H0: equal means). */
43
+ decisive: boolean;
44
+ /** Which method produced this verdict. */
45
+ method: ConfidenceMethod;
46
+ /** Sign of the effect (mean B − mean A): +1 if B>A, −1 if A>B, 0 if tie/undecided. */
47
+ direction: -1 | 0 | 1;
48
+ /**
49
+ * The test statistic: for mSPRT the mixture likelihood ratio Λ (compare to
50
+ * `threshold = 1/alpha`); for Hoeffding the absolute mean gap |Δ| (compare
51
+ * to `threshold` = summed CS half-widths). NaN-free.
52
+ */
53
+ statistic: number;
54
+ /** The threshold `statistic` must exceed for `decisive` to be true. */
55
+ threshold: number;
56
+ /** Effective per-arm sample counts after NaN filtering. */
57
+ nA: number;
58
+ nB: number;
59
+ /** Human-readable reason, e.g. "warmup: 3<5 samples on arm A". */
60
+ reason: string;
61
+ }
62
+ export interface MeanVar {
63
+ mean: number;
64
+ /** Sample variance with Bessel's correction (n−1). 0 when n<2. */
65
+ variance: number;
66
+ n: number;
67
+ }
68
+ /**
69
+ * Mean + Bessel-corrected sample variance over finite values. Non-finite
70
+ * entries (NaN/Infinity) are dropped — a single bad score never poisons the
71
+ * estimate. Returns `{mean:0, variance:0, n:0}` for an all-invalid/empty input.
72
+ */
73
+ export declare function meanVar(samples: ReadonlyArray<number>): MeanVar;
74
+ export interface MsprtOptions {
75
+ /** Significance level. Reject H0 when Λ ≥ 1/alpha. Default 0.05. */
76
+ alpha?: number;
77
+ /**
78
+ * Mixing-prior standard deviation over the true mean DIFFERENCE δ (in raw
79
+ * score units, since the test runs in estimator coordinates). Larger τ ⇒
80
+ * optimised for bigger effects (fires faster on large gaps, slower on small
81
+ * ones). Default 0.1 — tuned for composite scores in [0,1] where a
82
+ * "meaningful" lift in the mean difference is on the order of ~0.1.
83
+ */
84
+ tau?: number;
85
+ /**
86
+ * Per-arm warmup floor. Below this many valid samples on EITHER arm the
87
+ * test abstains (`decisive:false`) instead of firing on a noisy variance
88
+ * estimate. Default 5.
89
+ */
90
+ minSamplesPerArm?: number;
91
+ }
92
+ /**
93
+ * Two-sample mixture SPRT for a difference in means with always-valid
94
+ * inference. Models H0: μ_A = μ_B against a Gaussian mixture alternative on
95
+ * the effect (prior δ ~ N(0, τ²) on the true mean difference). Returns
96
+ * `decisive:true` when the mixture likelihood ratio Λ crosses 1/alpha — a
97
+ * threshold valid at every n (no peeking penalty).
98
+ *
99
+ * Closed form in ESTIMATOR coordinates. Let δ̂ = x̄_B − x̄_A be the observed
100
+ * mean difference and v = Var(δ̂) its variance. Integrating the per-θ Gaussian
101
+ * likelihood ratio against the N(0, τ²) mixture prior (Johari, Pekelis &
102
+ * Walsh 2017) gives:
103
+ *
104
+ * Λ = sqrt( v / (v + τ²) ) · exp( τ²·δ̂² / (2·v·(v + τ²)) ), Λ ≥ 1/α ⇒ reject H0
105
+ *
106
+ * We estimate v with the WELCH variance of the difference of means,
107
+ * v = s²_A/n_A + s²_B/n_B (Bessel-corrected per-arm sample variances). Welch
108
+ * (rather than a pooled within-arm variance) keeps the form unambiguous and
109
+ * robust to unequal arm variances — it does not assume homoscedasticity. In
110
+ * estimator coordinates no `nEff` factor appears: the sample sizes enter only
111
+ * through v (a larger n shrinks v, which grows Λ), so the historical
112
+ * "n² vs n" ambiguity of the sample-mean form is avoided entirely.
113
+ *
114
+ * Defensive: empty/below-warmup arms ⇒ abstain; zero variance on either arm
115
+ * with a non-zero gap AND ≥2 samples per arm ⇒ decisive (deterministic arms
116
+ * differ); <2 samples ⇒ abstain (cannot estimate variance); NaN-free.
117
+ */
118
+ export declare function msprtTwoSample(samplesA: ReadonlyArray<number>, samplesB: ReadonlyArray<number>, opts?: MsprtOptions): SequentialVerdict;
119
+ export interface HoeffdingOptions {
120
+ /** Significance level for the confidence sequence. Default 0.05. */
121
+ alpha?: number;
122
+ /** Lower bound of the score range. Default 0 (Darwin composite scores). */
123
+ lo?: number;
124
+ /** Upper bound of the score range. Default 1 (Darwin composite scores). */
125
+ hi?: number;
126
+ /** Per-arm warmup floor (≥1). Default 2 — Hoeffding is valid at any n≥1
127
+ * but a 1-sample arm gives a useless [lo,hi]-wide interval. */
128
+ minSamplesPerArm?: number;
129
+ }
130
+ /**
131
+ * Two-sample, variance-free, always-valid decision via per-arm time-uniform
132
+ * Hoeffding confidence sequences for bounded variables.
133
+ *
134
+ * Each arm's mean is bracketed by a half-width that shrinks with n while
135
+ * staying valid under continuous monitoring:
136
+ *
137
+ * w(n) = (hi − lo) · sqrt( ln( (n+1)/alpha ) / (2n) )
138
+ *
139
+ * (a standard union-bound / Cramér–Chernoff time-uniform Hoeffding bound).
140
+ * The arms are declared decisively different when their mean gap exceeds the
141
+ * sum of the two half-widths — i.e. the confidence intervals no longer
142
+ * overlap. No variance estimate needed, so this is the honest method when
143
+ * only a handful of runs exist or the score distribution is skewed/bounded.
144
+ *
145
+ * Conservative by construction (wider than mSPRT) — prefer mSPRT once both
146
+ * arms have enough runs for a stable variance estimate.
147
+ */
148
+ export declare function hoeffdingTwoSample(samplesA: ReadonlyArray<number>, samplesB: ReadonlyArray<number>, opts?: HoeffdingOptions): SequentialVerdict;
149
+ //# sourceMappingURL=sequential.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sequential.d.ts","sourceRoot":"","sources":["../../../src/evolution/sequential.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAEH,0EAA0E;AAC1E,MAAM,MAAM,gBAAgB,GAAG,aAAa,GAAG,OAAO,GAAG,WAAW,CAAC;AAErE,6EAA6E;AAC7E,MAAM,WAAW,iBAAiB;IAChC,qFAAqF;IACrF,QAAQ,EAAE,OAAO,CAAC;IAClB,0CAA0C;IAC1C,MAAM,EAAE,gBAAgB,CAAC;IACzB,sFAAsF;IACtF,SAAS,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACtB;;;;OAIG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB,uEAAuE;IACvE,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,EAAE,EAAE,MAAM,CAAC;IACX,EAAE,EAAE,MAAM,CAAC;IACX,kEAAkE;IAClE,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,kEAAkE;IAClE,QAAQ,EAAE,MAAM,CAAC;IACjB,CAAC,EAAE,MAAM,CAAC;CACX;AAED;;;;GAIG;AACH,wBAAgB,OAAO,CAAC,OAAO,EAAE,aAAa,CAAC,MAAM,CAAC,GAAG,OAAO,CAmB/D;AAED,MAAM,WAAW,YAAY;IAC3B,oEAAoE;IACpE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;;;;OAMG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAMD;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,aAAa,CAAC,MAAM,CAAC,EAC/B,QAAQ,EAAE,aAAa,CAAC,MAAM,CAAC,EAC/B,IAAI,GAAE,YAAiB,GACtB,iBAAiB,CAmFnB;AAED,MAAM,WAAW,gBAAgB;IAC/B,oEAAoE;IACpE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,2EAA2E;IAC3E,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,2EAA2E;IAC3E,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ;oEACgE;IAChE,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,kBAAkB,CAChC,QAAQ,EAAE,aAAa,CAAC,MAAM,CAAC,EAC/B,QAAQ,EAAE,aAAa,CAAC,MAAM,CAAC,EAC/B,IAAI,GAAE,gBAAqB,GAC1B,iBAAiB,CA8CnB"}
@@ -0,0 +1,239 @@
1
+ /**
2
+ * Darwin — Always-Valid Sequential Testing (v0.7.0)
3
+ *
4
+ * Pure statistical primitives for peeking-resistant A/B decisions during
5
+ * prompt evolution. This module exists because Darwin's safety gate calls
6
+ * `evaluateABTest` after EVERY run — continuous monitoring with a fixed
7
+ * relative-improvement threshold inflates the false-positive rate (the
8
+ * classic "peeking problem"). v0.6.0 shipped a first-step effect-size
9
+ * heuristic (`SafetyGate.calculateConfidence`, |Δ| / pooled-mean ≥ 0.2);
10
+ * this module is the rigorous upgrade promised in the v0.6 roadmap notes.
11
+ *
12
+ * Two methods, both **always-valid** (the decision stays statistically
13
+ * sound no matter how many times you peek):
14
+ *
15
+ * 1. {@link msprtTwoSample} — Mixture Sequential Probability Ratio Test
16
+ * (Johari, Pekelis & Walsh 2017, arXiv:1512.04922; the engine behind
17
+ * Optimizely/Statsig's "stats engine"). Gaussian mixture prior over
18
+ * the effect size; uses the observed (pooled) variance. Most powerful
19
+ * when the per-arm sample variance is meaningful — i.e. once each arm
20
+ * has accumulated a handful of runs (see {@link MsprtOptions.minSamplesPerArm}).
21
+ *
22
+ * 2. {@link hoeffdingTwoSample} — a σ-free time-uniform confidence
23
+ * sequence for variables bounded to a known range (Darwin composite
24
+ * scores live in [0, 1]). Valid at ANY sample size with no variance
25
+ * estimate, so it is the honest choice when only a few runs exist.
26
+ * More conservative than mSPRT (wider intervals) by design.
27
+ *
28
+ * **Pure** — no LLM calls, no I/O, no `Date.now()`, no `Math.random()`.
29
+ * Fully deterministic, so tests pin exact statistic values.
30
+ *
31
+ * Caveat on warmup (documented, not hidden): mSPRT with an *estimated*
32
+ * variance is only asymptotically always-valid; with very few samples the
33
+ * variance estimate is noisy. Darwin's A/B sample sizes (minRuns 10–30) sit
34
+ * below the ~100-sample comfort zone for tight σ-estimation, so we expose
35
+ * `minSamplesPerArm` (default 5) below which mSPRT abstains (`decisive:false`)
36
+ * rather than fire on noise, and we offer Hoeffding as the σ-free fallback.
37
+ */
38
+ /**
39
+ * Mean + Bessel-corrected sample variance over finite values. Non-finite
40
+ * entries (NaN/Infinity) are dropped — a single bad score never poisons the
41
+ * estimate. Returns `{mean:0, variance:0, n:0}` for an all-invalid/empty input.
42
+ */
43
+ export function meanVar(samples) {
44
+ let n = 0;
45
+ let sum = 0;
46
+ for (const s of samples) {
47
+ if (typeof s === "number" && Number.isFinite(s)) {
48
+ n++;
49
+ sum += s;
50
+ }
51
+ }
52
+ if (n === 0)
53
+ return { mean: 0, variance: 0, n: 0 };
54
+ const mean = sum / n;
55
+ if (n < 2)
56
+ return { mean, variance: 0, n };
57
+ let sse = 0;
58
+ for (const s of samples) {
59
+ if (typeof s === "number" && Number.isFinite(s)) {
60
+ sse += (s - mean) ** 2;
61
+ }
62
+ }
63
+ return { mean, variance: sse / (n - 1), n };
64
+ }
65
+ const DEFAULT_ALPHA = 0.05;
66
+ const DEFAULT_TAU = 0.1;
67
+ const DEFAULT_MIN_SAMPLES = 5;
68
+ /**
69
+ * Two-sample mixture SPRT for a difference in means with always-valid
70
+ * inference. Models H0: μ_A = μ_B against a Gaussian mixture alternative on
71
+ * the effect (prior δ ~ N(0, τ²) on the true mean difference). Returns
72
+ * `decisive:true` when the mixture likelihood ratio Λ crosses 1/alpha — a
73
+ * threshold valid at every n (no peeking penalty).
74
+ *
75
+ * Closed form in ESTIMATOR coordinates. Let δ̂ = x̄_B − x̄_A be the observed
76
+ * mean difference and v = Var(δ̂) its variance. Integrating the per-θ Gaussian
77
+ * likelihood ratio against the N(0, τ²) mixture prior (Johari, Pekelis &
78
+ * Walsh 2017) gives:
79
+ *
80
+ * Λ = sqrt( v / (v + τ²) ) · exp( τ²·δ̂² / (2·v·(v + τ²)) ), Λ ≥ 1/α ⇒ reject H0
81
+ *
82
+ * We estimate v with the WELCH variance of the difference of means,
83
+ * v = s²_A/n_A + s²_B/n_B (Bessel-corrected per-arm sample variances). Welch
84
+ * (rather than a pooled within-arm variance) keeps the form unambiguous and
85
+ * robust to unequal arm variances — it does not assume homoscedasticity. In
86
+ * estimator coordinates no `nEff` factor appears: the sample sizes enter only
87
+ * through v (a larger n shrinks v, which grows Λ), so the historical
88
+ * "n² vs n" ambiguity of the sample-mean form is avoided entirely.
89
+ *
90
+ * Defensive: empty/below-warmup arms ⇒ abstain; zero variance on either arm
91
+ * with a non-zero gap AND ≥2 samples per arm ⇒ decisive (deterministic arms
92
+ * differ); <2 samples ⇒ abstain (cannot estimate variance); NaN-free.
93
+ */
94
+ export function msprtTwoSample(samplesA, samplesB, opts = {}) {
95
+ const alpha = clampAlpha(opts.alpha);
96
+ const tau = Number.isFinite(opts.tau) && opts.tau > 0 ? opts.tau : DEFAULT_TAU;
97
+ const minSamples = Number.isFinite(opts.minSamplesPerArm) && opts.minSamplesPerArm >= 1
98
+ ? Math.floor(opts.minSamplesPerArm)
99
+ : DEFAULT_MIN_SAMPLES;
100
+ const threshold = 1 / alpha;
101
+ const a = meanVar(samplesA);
102
+ const b = meanVar(samplesB);
103
+ const base = {
104
+ method: "msprt",
105
+ threshold,
106
+ nA: a.n,
107
+ nB: b.n,
108
+ };
109
+ if (a.n < minSamples || b.n < minSamples) {
110
+ return {
111
+ ...base,
112
+ decisive: false,
113
+ direction: 0,
114
+ statistic: 0,
115
+ reason: `warmup: need ≥${minSamples} samples/arm, have A=${a.n} B=${b.n}`,
116
+ };
117
+ }
118
+ const delta = b.mean - a.mean;
119
+ const direction = delta > 0 ? 1 : delta < 0 ? -1 : 0;
120
+ // Welch variance of the difference of means: v = Var(δ̂) = s²_A/n_A + s²_B/n_B.
121
+ // This is the noise scale the mixture SPRT runs against; using it directly
122
+ // (not a pooled within-arm variance) handles unequal arm variances and
123
+ // removes the n-scaling ambiguity of the sample-mean form.
124
+ const varDelta = a.variance / a.n + b.variance / b.n;
125
+ // Degenerate branch: (near-)zero observed variance on the difference. With
126
+ // ≥2 samples per arm a non-zero gap between two deterministic arms is fully
127
+ // decisive; with <2 samples we cannot estimate variance at all → abstain.
128
+ if (!(varDelta > 0)) {
129
+ if (delta === 0 || a.n < 2 || b.n < 2) {
130
+ return {
131
+ ...base,
132
+ decisive: false,
133
+ direction: 0,
134
+ statistic: 0,
135
+ reason: a.n < 2 || b.n < 2
136
+ ? "insufficient samples to estimate variance"
137
+ : "identical constant arms",
138
+ };
139
+ }
140
+ return {
141
+ ...base,
142
+ decisive: true,
143
+ direction,
144
+ statistic: Number.POSITIVE_INFINITY,
145
+ reason: "deterministic arms differ (zero variance)",
146
+ };
147
+ }
148
+ // Mixture SPRT closed form (estimator coordinates, prior δ ~ N(0, τ²)):
149
+ // Λ = √(v/(v+τ²)) · exp( τ²·δ̂² / (2·v·(v+τ²)) ), v = Var(δ̂)
150
+ const denom = varDelta + tau * tau;
151
+ const logLambda = 0.5 * Math.log(varDelta / denom) +
152
+ (tau * tau * delta * delta) / (2 * varDelta * denom);
153
+ const lambda = Math.exp(logLambda);
154
+ // Compare in log-space against log(1/alpha) for numerical robustness when Λ
155
+ // is astronomically large (exp overflow → Infinity is still > threshold).
156
+ const decisive = logLambda >= Math.log(threshold);
157
+ return {
158
+ ...base,
159
+ decisive,
160
+ direction: decisive ? direction : 0,
161
+ statistic: lambda,
162
+ reason: decisive
163
+ ? `Λ=${fmt(lambda)} ≥ 1/α=${fmt(threshold)}`
164
+ : `Λ=${fmt(lambda)} < 1/α=${fmt(threshold)} (keep testing)`,
165
+ };
166
+ }
167
+ /**
168
+ * Two-sample, variance-free, always-valid decision via per-arm time-uniform
169
+ * Hoeffding confidence sequences for bounded variables.
170
+ *
171
+ * Each arm's mean is bracketed by a half-width that shrinks with n while
172
+ * staying valid under continuous monitoring:
173
+ *
174
+ * w(n) = (hi − lo) · sqrt( ln( (n+1)/alpha ) / (2n) )
175
+ *
176
+ * (a standard union-bound / Cramér–Chernoff time-uniform Hoeffding bound).
177
+ * The arms are declared decisively different when their mean gap exceeds the
178
+ * sum of the two half-widths — i.e. the confidence intervals no longer
179
+ * overlap. No variance estimate needed, so this is the honest method when
180
+ * only a handful of runs exist or the score distribution is skewed/bounded.
181
+ *
182
+ * Conservative by construction (wider than mSPRT) — prefer mSPRT once both
183
+ * arms have enough runs for a stable variance estimate.
184
+ */
185
+ export function hoeffdingTwoSample(samplesA, samplesB, opts = {}) {
186
+ const alpha = clampAlpha(opts.alpha);
187
+ const lo = Number.isFinite(opts.lo) ? opts.lo : 0;
188
+ const hiRaw = Number.isFinite(opts.hi) ? opts.hi : 1;
189
+ const range = hiRaw > lo ? hiRaw - lo : 1; // guard inverted/zero range
190
+ const minSamples = Number.isFinite(opts.minSamplesPerArm) && opts.minSamplesPerArm >= 1
191
+ ? Math.floor(opts.minSamplesPerArm)
192
+ : 2;
193
+ const a = meanVar(samplesA);
194
+ const b = meanVar(samplesB);
195
+ const base = { method: "hoeffding", nA: a.n, nB: b.n };
196
+ if (a.n < minSamples || b.n < minSamples) {
197
+ return {
198
+ ...base,
199
+ decisive: false,
200
+ direction: 0,
201
+ statistic: 0,
202
+ threshold: range,
203
+ reason: `warmup: need ≥${minSamples} samples/arm, have A=${a.n} B=${b.n}`,
204
+ };
205
+ }
206
+ const halfWidth = (n) => range * Math.sqrt(Math.log((n + 1) / alpha) / (2 * n));
207
+ const wA = halfWidth(a.n);
208
+ const wB = halfWidth(b.n);
209
+ const gap = Math.abs(b.mean - a.mean);
210
+ const threshold = wA + wB;
211
+ const decisive = gap > threshold;
212
+ const delta = b.mean - a.mean;
213
+ const direction = decisive ? (delta > 0 ? 1 : -1) : 0;
214
+ return {
215
+ ...base,
216
+ decisive,
217
+ direction,
218
+ statistic: gap,
219
+ threshold,
220
+ reason: decisive
221
+ ? `|Δ|=${fmt(gap)} > CS half-widths ${fmt(threshold)} (non-overlap)`
222
+ : `|Δ|=${fmt(gap)} ≤ CS half-widths ${fmt(threshold)} (overlap)`,
223
+ };
224
+ }
225
+ function clampAlpha(alpha) {
226
+ if (!Number.isFinite(alpha))
227
+ return DEFAULT_ALPHA;
228
+ const a = alpha;
229
+ // Keep strictly inside (0,1); silly inputs fall back to the default.
230
+ if (a <= 0 || a >= 1)
231
+ return DEFAULT_ALPHA;
232
+ return a;
233
+ }
234
+ function fmt(x) {
235
+ if (!Number.isFinite(x))
236
+ return x > 0 ? "∞" : "-∞";
237
+ return x.toFixed(3);
238
+ }
239
+ //# sourceMappingURL=sequential.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sequential.js","sourceRoot":"","sources":["../../../src/evolution/sequential.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAmCH;;;;GAIG;AACH,MAAM,UAAU,OAAO,CAAC,OAA8B;IACpD,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAChD,CAAC,EAAE,CAAC;YACJ,GAAG,IAAI,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IACD,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC;IACnD,MAAM,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC;IAC3C,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAChD,GAAG,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC;AAC9C,CAAC;AAqBD,MAAM,aAAa,GAAG,IAAI,CAAC;AAC3B,MAAM,WAAW,GAAG,GAAG,CAAC;AACxB,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAE9B;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,MAAM,UAAU,cAAc,CAC5B,QAA+B,EAC/B,QAA+B,EAC/B,OAAqB,EAAE;IAEvB,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACrC,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAK,IAAI,CAAC,GAAc,GAAG,CAAC,CAAC,CAAC,CAAE,IAAI,CAAC,GAAc,CAAC,CAAC,CAAC,WAAW,CAAC;IACvG,MAAM,UAAU,GACd,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAK,IAAI,CAAC,gBAA2B,IAAI,CAAC;QAC9E,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,gBAA0B,CAAC;QAC7C,CAAC,CAAC,mBAAmB,CAAC;IAC1B,MAAM,SAAS,GAAG,CAAC,GAAG,KAAK,CAAC;IAE5B,MAAM,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5B,MAAM,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG;QACX,MAAM,EAAE,OAAgB;QACxB,SAAS;QACT,EAAE,EAAE,CAAC,CAAC,CAAC;QACP,EAAE,EAAE,CAAC,CAAC,CAAC;KACR,CAAC;IAEF,IAAI,CAAC,CAAC,CAAC,GAAG,UAAU,IAAI,CAAC,CAAC,CAAC,GAAG,UAAU,EAAE,CAAC;QACzC,OAAO;YACL,GAAG,IAAI;YACP,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,CAAC;YACZ,SAAS,EAAE,CAAC;YACZ,MAAM,EAAE,iBAAiB,UAAU,wBAAwB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;SAC1E,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC;IAC9B,MAAM,SAAS,GAAe,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAEjE,gFAAgF;IAChF,2EAA2E;IAC3E,uEAAuE;IACvE,2DAA2D;IAC3D,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC;IAErD,2EAA2E;IAC3E,4EAA4E;IAC5E,0EAA0E;IAC1E,IAAI,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,EAAE,CAAC;QACpB,IAAI,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YACtC,OAAO;gBACL,GAAG,IAAI;gBACP,QAAQ,EAAE,KAAK;gBACf,SAAS,EAAE,CAAC;gBACZ,SAAS,EAAE,CAAC;gBACZ,MAAM,EACJ,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC;oBAChB,CAAC,CAAC,2CAA2C;oBAC7C,CAAC,CAAC,yBAAyB;aAChC,CAAC;QACJ,CAAC;QACD,OAAO;YACL,GAAG,IAAI;YACP,QAAQ,EAAE,IAAI;YACd,SAAS;YACT,SAAS,EAAE,MAAM,CAAC,iBAAiB;YACnC,MAAM,EAAE,2CAA2C;SACpD,CAAC;IACJ,CAAC;IAED,wEAAwE;IACxE,iEAAiE;IACjE,MAAM,KAAK,GAAG,QAAQ,GAAG,GAAG,GAAG,GAAG,CAAC;IACnC,MAAM,SAAS,GACb,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,KAAK,CAAC;QAChC,CAAC,GAAG,GAAG,GAAG,GAAG,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,GAAG,KAAK,CAAC,CAAC;IACvD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAEnC,4EAA4E;IAC5E,0EAA0E;IAC1E,MAAM,QAAQ,GAAG,SAAS,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAElD,OAAO;QACL,GAAG,IAAI;QACP,QAAQ;QACR,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QACnC,SAAS,EAAE,MAAM;QACjB,MAAM,EAAE,QAAQ;YACd,CAAC,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,UAAU,GAAG,CAAC,SAAS,CAAC,EAAE;YAC5C,CAAC,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,UAAU,GAAG,CAAC,SAAS,CAAC,iBAAiB;KAC9D,CAAC;AACJ,CAAC;AAcD;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,kBAAkB,CAChC,QAA+B,EAC/B,QAA+B,EAC/B,OAAyB,EAAE;IAE3B,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACrC,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAE,IAAI,CAAC,EAAa,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9D,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAE,IAAI,CAAC,EAAa,CAAC,CAAC,CAAC,CAAC,CAAC;IACjE,MAAM,KAAK,GAAG,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,4BAA4B;IACvE,MAAM,UAAU,GACd,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAK,IAAI,CAAC,gBAA2B,IAAI,CAAC;QAC9E,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,gBAA0B,CAAC;QAC7C,CAAC,CAAC,CAAC,CAAC;IAER,MAAM,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5B,MAAM,CAAC,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,EAAE,MAAM,EAAE,WAAoB,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAEhE,IAAI,CAAC,CAAC,CAAC,GAAG,UAAU,IAAI,CAAC,CAAC,CAAC,GAAG,UAAU,EAAE,CAAC;QACzC,OAAO;YACL,GAAG,IAAI;YACP,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,CAAC;YACZ,SAAS,EAAE,CAAC;YACZ,SAAS,EAAE,KAAK;YAChB,MAAM,EAAE,iBAAiB,UAAU,wBAAwB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;SAC1E,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,CAAC,CAAS,EAAU,EAAE,CACtC,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAEzD,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1B,MAAM,EAAE,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;IACtC,MAAM,SAAS,GAAG,EAAE,GAAG,EAAE,CAAC;IAC1B,MAAM,QAAQ,GAAG,GAAG,GAAG,SAAS,CAAC;IACjC,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC;IAC9B,MAAM,SAAS,GAAe,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAElE,OAAO;QACL,GAAG,IAAI;QACP,QAAQ;QACR,SAAS;QACT,SAAS,EAAE,GAAG;QACd,SAAS;QACT,MAAM,EAAE,QAAQ;YACd,CAAC,CAAC,OAAO,GAAG,CAAC,GAAG,CAAC,qBAAqB,GAAG,CAAC,SAAS,CAAC,gBAAgB;YACpE,CAAC,CAAC,OAAO,GAAG,CAAC,GAAG,CAAC,qBAAqB,GAAG,CAAC,SAAS,CAAC,YAAY;KACnE,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,KAAyB;IAC3C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,aAAa,CAAC;IAClD,MAAM,CAAC,GAAG,KAAe,CAAC;IAC1B,qEAAqE;IACrE,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,aAAa,CAAC;IAC3C,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,GAAG,CAAC,CAAS;IACpB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;QAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;IACnD,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACtB,CAAC"}
@@ -44,6 +44,18 @@ export declare class ExperimentTracker {
44
44
  * (otherwise the incumbent version's historical data skews the comparison).
45
45
  */
46
46
  getAverageComposite(agentName: string, version: string, weights?: MetricWeights, since?: string): Promise<number>;
47
+ /**
48
+ * v0.7.0 — Per-experiment composite scores for a specific agent + prompt
49
+ * version, in chronological order. Unlike {@link getAverageComposite} this
50
+ * does NOT collapse to a scalar — it feeds the always-valid sequential
51
+ * confidence gate (mSPRT / Hoeffding), which needs the individual samples
52
+ * (and therefore their variance), not just the mean.
53
+ *
54
+ * If `since` is provided, only experiments at/after that ISO timestamp are
55
+ * included — pass the A/B test start so the incumbent's historical runs do
56
+ * not skew the comparison (same convention as {@link getAverageComposite}).
57
+ */
58
+ getCompositeScores(agentName: string, version: string, weights?: MetricWeights, since?: string): Promise<number[]>;
47
59
  /**
48
60
  * v0.6.0 — Average raw metric vector for a specific agent + prompt version,
49
61
  * keyed by the names in `DarwinMetrics` / `DARWIN_DEFAULT_OBJECTIVES`
@@ -1 +1 @@
1
- {"version":3,"file":"tracker.d.ts","sourceRoot":"","sources":["../../../src/evolution/tracker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EACV,gBAAgB,EAEhB,cAAc,EACd,aAAa,EACb,kBAAkB,EACnB,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAGpD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,MAAM,CAAiB;gBAEnB,MAAM,EAAE,cAAc;IAIlC;;;;OAIG;IACG,gBAAgB,CAAC,GAAG,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA+B5D;;;OAGG;IACG,QAAQ,CACZ,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,kBAAkB,CAAC;IA6C9B;;;;;;;;;OASG;IACH,iBAAiB,CACf,GAAG,EAAE,gBAAgB,EACrB,OAAO,GAAE,aAA+B,GACvC,MAAM;IA8BT;;;OAGG;IACG,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IA6BrE;;;;;;OAMG;IACG,mBAAmB,CACvB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,aAA+B,EACxC,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,MAAM,CAAC;IAoBlB;;;;;;;;;;;;;;;;OAgBG;IACG,iBAAiB,CACrB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CA2BnC"}
1
+ {"version":3,"file":"tracker.d.ts","sourceRoot":"","sources":["../../../src/evolution/tracker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EACV,gBAAgB,EAEhB,cAAc,EACd,aAAa,EACb,kBAAkB,EACnB,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAGpD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,MAAM,CAAiB;gBAEnB,MAAM,EAAE,cAAc;IAIlC;;;;OAIG;IACG,gBAAgB,CAAC,GAAG,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA+B5D;;;OAGG;IACG,QAAQ,CACZ,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,kBAAkB,CAAC;IA6C9B;;;;;;;;;OASG;IACH,iBAAiB,CACf,GAAG,EAAE,gBAAgB,EACrB,OAAO,GAAE,aAA+B,GACvC,MAAM;IA8BT;;;OAGG;IACG,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IA6BrE;;;;;;OAMG;IACG,mBAAmB,CACvB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,aAA+B,EACxC,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,MAAM,CAAC;IAoBlB;;;;;;;;;;OAUG;IACG,kBAAkB,CACtB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,aAA+B,EACxC,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,MAAM,EAAE,CAAC;IAgBpB;;;;;;;;;;;;;;;;OAgBG;IACG,iBAAiB,CACrB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CA2BnC"}
@@ -159,6 +159,30 @@ export class ExperimentTracker {
159
159
  const total = filtered.reduce((sum, exp) => sum + this.getCompositeScore(exp, weights), 0);
160
160
  return total / filtered.length;
161
161
  }
162
+ /**
163
+ * v0.7.0 — Per-experiment composite scores for a specific agent + prompt
164
+ * version, in chronological order. Unlike {@link getAverageComposite} this
165
+ * does NOT collapse to a scalar — it feeds the always-valid sequential
166
+ * confidence gate (mSPRT / Hoeffding), which needs the individual samples
167
+ * (and therefore their variance), not just the mean.
168
+ *
169
+ * If `since` is provided, only experiments at/after that ISO timestamp are
170
+ * included — pass the A/B test start so the incumbent's historical runs do
171
+ * not skew the comparison (same convention as {@link getAverageComposite}).
172
+ */
173
+ async getCompositeScores(agentName, version, weights = DEFAULT_WEIGHTS, since) {
174
+ const experiments = await this.memory.loadExperiments(agentName);
175
+ let filtered = experiments.filter((e) => e.promptVersion === version);
176
+ if (since) {
177
+ filtered = filtered.filter((e) => e.startedAt >= since);
178
+ }
179
+ // loadExperiments() returns newest-first; reverse to chronological order
180
+ // so the sequence mirrors how the data actually accrued during the test.
181
+ return filtered
182
+ .slice()
183
+ .reverse()
184
+ .map((exp) => this.getCompositeScore(exp, weights));
185
+ }
162
186
  /**
163
187
  * v0.6.0 — Average raw metric vector for a specific agent + prompt version,
164
188
  * keyed by the names in `DarwinMetrics` / `DARWIN_DEFAULT_OBJECTIVES`
@@ -1 +1 @@
1
- {"version":3,"file":"tracker.js","sourceRoot":"","sources":["../../../src/evolution/tracker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAUH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAiB;IAE/B,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CAAC,GAAqB;QAC1C,gCAAgC;QAChC,MAAM,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAEtC,oDAAoD;QACpD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvE,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,GAAG,CAAC,aAAa,CAAC,CAAC;QACtE,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,aAAa,CAAC,CAAC;YAC3E,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC;YAC7B,MAAM,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAC/C,CAAC;QAED,+EAA+E;QAC/E,qEAAqE;QACrE,+EAA+E;QAC/E,MAAM,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,EAAE;YACtC,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC;gBACnC,CAAC,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YAEnD,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;gBAChB,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC;oBACtC,CAAC,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACxD,CAAC;YAED,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CACZ,SAAiB,EACjB,OAAgB;QAEhB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QAEjE,MAAM,QAAQ,GAAG,OAAO;YACtB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC;YACxD,CAAC,CAAC,WAAW,CAAC;QAEhB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO;gBACL,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,WAAW,EAAE,CAAC;gBACd,cAAc,EAAE,CAAC;aAClB,CAAC;QACJ,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC;QAClC,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAE9D,4DAA4D;QAC5D,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CACjC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CACvC,CAAC;QACF,MAAM,UAAU,GACd,WAAW,CAAC,MAAM,GAAG,CAAC;YACpB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC;gBACtE,WAAW,CAAC,MAAM;YACpB,CAAC,CAAC,CAAC,CAAC;QAER,MAAM,WAAW,GACf,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC;QAEzE,MAAM,cAAc,GAClB,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC;QAE1E,OAAO;YACL,SAAS;YACT,UAAU;YACV,WAAW;YACX,WAAW,EAAE,YAAY,GAAG,SAAS;YACrC,cAAc;SACf,CAAC;IACJ,CAAC;IAED;;;;;;;;;OASG;IACH,iBAAiB,CACf,GAAqB,EACrB,UAAyB,eAAe;QAExC,iFAAiF;QACjF,iEAAiE;QACjE,MAAM,UAAU,GAAG,GAAG,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CAAC;QACrD,MAAM,WAAW,GAAG,UAAU,CAAC,CAAC,CAAE,GAAG,CAAC,OAAO,CAAC,YAAuB,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAE/E,oEAAoE;QACpE,MAAM,sBAAsB,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAChE,MAAM,SAAS,GAAG,sBAAsB,GAAG,OAAO,CAAC,WAAW,GAAG,OAAO,CAAC,YAAY,GAAG,OAAO,CAAC,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC;QAC3H,MAAM,KAAK,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,UAAU,GAAG;YACjB,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,GAAG,EAAE,EAAE,CAAC,CAAC;YACtD,YAAY,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,GAAG,KAAK,EAAE,CAAC,CAAC;YAC3D,QAAQ,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,GAAG,MAAM,EAAE,CAAC,CAAC;YAC1D,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;SAC7B,CAAC;QAEF,MAAM,KAAK,GAAG,CACZ,UAAU,CAAC,OAAO,GAAG,sBAAsB;YAC3C,UAAU,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW;YAC5C,UAAU,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY;YAC9C,UAAU,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ;YACtC,UAAU,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CACrC,GAAG,KAAK,CAAC;QAEV,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,kBAAkB,CAAC,SAAiB;QACxC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,MAAM,UAAU,GAAG,IAAI,GAAG,EAA8B,CAAC;QAEzD,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,GAAG,CAAC,QAAQ,IAAI,SAAS,CAAC;YACtC,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACjC,IAAI,IAAI,EAAE,CAAC;gBACT,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,KAAK,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,UAAU,EAAE,CAAC;YAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CAAC,CAAC;YACxE,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC;gBACvC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM;gBACzF,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;YACzF,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;YAEvE,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;QAC7F,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IAC1D,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,mBAAmB,CACvB,SAAiB,EACjB,OAAe,EACf,UAAyB,eAAe,EACxC,KAAc;QAEd,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC,CAAC;QAEtE,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,KAAK,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAC3B,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,EACxD,CAAC,CACF,CAAC;QAEF,OAAO,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC;IACjC,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,iBAAiB,CACrB,SAAiB,EACjB,OAAe,EACf,KAAc;QAEd,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC,CAAC;QAEtE,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,KAAK,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC;QAC1B,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CAAC,CAAC;QAC5E,MAAM,UAAU,GACd,WAAW,CAAC,MAAM,GAAG,CAAC;YACpB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC;gBAClE,WAAW,CAAC,MAAM;YACpB,CAAC,CAAC,CAAC,CAAC;QAER,OAAO;YACL,YAAY,EAAE,UAAU;YACxB,WAAW,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,GAAG,CAAC;YACxE,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,GAAG,CAAC;YAC1E,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,CAAC;SACvE,CAAC;IACJ,CAAC;CACF"}
1
+ {"version":3,"file":"tracker.js","sourceRoot":"","sources":["../../../src/evolution/tracker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAUH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAiB;IAE/B,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CAAC,GAAqB;QAC1C,gCAAgC;QAChC,MAAM,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAEtC,oDAAoD;QACpD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvE,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,GAAG,CAAC,aAAa,CAAC,CAAC;QACtE,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,aAAa,CAAC,CAAC;YAC3E,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC;YAC7B,MAAM,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAC/C,CAAC;QAED,+EAA+E;QAC/E,qEAAqE;QACrE,+EAA+E;QAC/E,MAAM,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,EAAE;YACtC,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC;gBACnC,CAAC,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YAEnD,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;gBAChB,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC;oBACtC,CAAC,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACxD,CAAC;YAED,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CACZ,SAAiB,EACjB,OAAgB;QAEhB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QAEjE,MAAM,QAAQ,GAAG,OAAO;YACtB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC;YACxD,CAAC,CAAC,WAAW,CAAC;QAEhB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO;gBACL,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,WAAW,EAAE,CAAC;gBACd,cAAc,EAAE,CAAC;aAClB,CAAC;QACJ,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC;QAClC,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAE9D,4DAA4D;QAC5D,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CACjC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CACvC,CAAC;QACF,MAAM,UAAU,GACd,WAAW,CAAC,MAAM,GAAG,CAAC;YACpB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC;gBACtE,WAAW,CAAC,MAAM;YACpB,CAAC,CAAC,CAAC,CAAC;QAER,MAAM,WAAW,GACf,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC;QAEzE,MAAM,cAAc,GAClB,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC;QAE1E,OAAO;YACL,SAAS;YACT,UAAU;YACV,WAAW;YACX,WAAW,EAAE,YAAY,GAAG,SAAS;YACrC,cAAc;SACf,CAAC;IACJ,CAAC;IAED;;;;;;;;;OASG;IACH,iBAAiB,CACf,GAAqB,EACrB,UAAyB,eAAe;QAExC,iFAAiF;QACjF,iEAAiE;QACjE,MAAM,UAAU,GAAG,GAAG,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CAAC;QACrD,MAAM,WAAW,GAAG,UAAU,CAAC,CAAC,CAAE,GAAG,CAAC,OAAO,CAAC,YAAuB,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAE/E,oEAAoE;QACpE,MAAM,sBAAsB,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAChE,MAAM,SAAS,GAAG,sBAAsB,GAAG,OAAO,CAAC,WAAW,GAAG,OAAO,CAAC,YAAY,GAAG,OAAO,CAAC,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC;QAC3H,MAAM,KAAK,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,UAAU,GAAG;YACjB,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,GAAG,EAAE,EAAE,CAAC,CAAC;YACtD,YAAY,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,GAAG,KAAK,EAAE,CAAC,CAAC;YAC3D,QAAQ,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,GAAG,MAAM,EAAE,CAAC,CAAC;YAC1D,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;SAC7B,CAAC;QAEF,MAAM,KAAK,GAAG,CACZ,UAAU,CAAC,OAAO,GAAG,sBAAsB;YAC3C,UAAU,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW;YAC5C,UAAU,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY;YAC9C,UAAU,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ;YACtC,UAAU,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CACrC,GAAG,KAAK,CAAC;QAEV,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,kBAAkB,CAAC,SAAiB;QACxC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,MAAM,UAAU,GAAG,IAAI,GAAG,EAA8B,CAAC;QAEzD,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,GAAG,CAAC,QAAQ,IAAI,SAAS,CAAC;YACtC,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACjC,IAAI,IAAI,EAAE,CAAC;gBACT,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,KAAK,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,UAAU,EAAE,CAAC;YAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CAAC,CAAC;YACxE,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC;gBACvC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM;gBACzF,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;YACzF,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;YAEvE,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;QAC7F,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IAC1D,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,mBAAmB,CACvB,SAAiB,EACjB,OAAe,EACf,UAAyB,eAAe,EACxC,KAAc;QAEd,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC,CAAC;QAEtE,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,KAAK,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAC3B,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,EACxD,CAAC,CACF,CAAC;QAEF,OAAO,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC;IACjC,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,kBAAkB,CACtB,SAAiB,EACjB,OAAe,EACf,UAAyB,eAAe,EACxC,KAAc;QAEd,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC,CAAC;QAEtE,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,KAAK,CAAC,CAAC;QAC1D,CAAC;QAED,yEAAyE;QACzE,yEAAyE;QACzE,OAAO,QAAQ;aACZ,KAAK,EAAE;aACP,OAAO,EAAE;aACT,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC;IACxD,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,iBAAiB,CACrB,SAAiB,EACjB,OAAe,EACf,KAAc;QAEd,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC,CAAC;QAEtE,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,KAAK,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC;QAC1B,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CAAC,CAAC;QAC5E,MAAM,UAAU,GACd,WAAW,CAAC,MAAM,GAAG,CAAC;YACpB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC;gBAClE,WAAW,CAAC,MAAM;YACpB,CAAC,CAAC,CAAC,CAAC;QAER,OAAO;YACL,YAAY,EAAE,UAAU;YACxB,WAAW,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,GAAG,CAAC;YACxE,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC,GAAG,CAAC;YAC1E,UAAU,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,CAAC;SACvE,CAAC;IACJ,CAAC;CACF"}
@@ -31,9 +31,11 @@ export { ClaudeCliProvider } from './providers/claude-cli.js';
31
31
  export { createMemory, SqliteMemoryProvider, PostgresMemoryProvider } from './memory/index.js';
32
32
  export { loadNotificationConfig } from './evolution/notifications.js';
33
33
  export type { NotificationConfig } from './evolution/notifications.js';
34
- export { dominates, nonDominatedFront, paretoSelect, scalarise, crowdingDistance, DARWIN_DEFAULT_OBJECTIVES, type ParetoObjective, type ParetoTruncationStrategy, } from './evolution/pareto.js';
34
+ export { dominates, dominatesEpsilon, nonDominatedFront, paretoSelect, scalarise, crowdingDistance, coverageFrontier, coverageWeights, selectByCoverage, sampleByCoverage, DARWIN_DEFAULT_OBJECTIVES, type ParetoObjective, type ParetoTruncationStrategy, type FrontierKey, type CoverageScores, } from './evolution/pareto.js';
35
35
  export { Reflector, type ReflectiveFeedback, type ReflectOptions, } from './evolution/reflector.js';
36
36
  export type { RunPromptFn } from './evolution/run-prompt-fn.js';
37
- export { GepaOptimizer, type ScoredVariant, type GenerateOptions as GepaGenerateOptions, type NextGenerationOptions as GepaNextGenerationOptions, type GepaOptimizerOptions, type MergeOptions as GepaMergeOptions, } from './evolution/optimizer-gepa.js';
38
- export { checkAlignmentPreservation, SAFETY_PATTERNS, } from './evolution/alignment.js';
37
+ export { GepaOptimizer, epochShuffledMinibatch, type ScoredVariant, type GenerateOptions as GepaGenerateOptions, type NextGenerationOptions as GepaNextGenerationOptions, type GepaOptimizerOptions, type MergeOptions as GepaMergeOptions, } from './evolution/optimizer-gepa.js';
38
+ export { checkAlignmentPreservation, checkAlignmentPreservationSemantic, SAFETY_PATTERNS, type EmbedFn, type SemanticAlignmentOptions, } from './evolution/alignment.js';
39
+ export { runMultiCritic, stripMarkdownForJudging, getCriticPrompts, type RunCriticFn, type RunMultiCriticOptions, type CriticPromptDef, type CriticScore, type MultiCriticResult, } from './evolution/multi-critic.js';
40
+ export { meanVar, msprtTwoSample, hoeffdingTwoSample, type ConfidenceMethod, type SequentialVerdict, type MeanVar, type MsprtOptions, type HoeffdingOptions, } from './evolution/sequential.js';
39
41
  //# sourceMappingURL=index.d.ts.map