@takk/bayesoutputgate 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/CHANGELOG.md +92 -0
  2. package/LICENSE +190 -0
  3. package/NOTICE +45 -0
  4. package/README.md +403 -0
  5. package/SECURITY.md +98 -0
  6. package/SPEC.md +467 -0
  7. package/dist/adapter/index.cjs +411 -0
  8. package/dist/adapter/index.d.cts +29 -0
  9. package/dist/adapter/index.d.ts +29 -0
  10. package/dist/adapter/index.js +404 -0
  11. package/dist/audit/index.cjs +82 -0
  12. package/dist/audit/index.d.cts +40 -0
  13. package/dist/audit/index.d.ts +40 -0
  14. package/dist/audit/index.js +77 -0
  15. package/dist/bayesfactor/index.cjs +152 -0
  16. package/dist/bayesfactor/index.d.cts +15 -0
  17. package/dist/bayesfactor/index.d.ts +15 -0
  18. package/dist/bayesfactor/index.js +149 -0
  19. package/dist/beta/index.cjs +180 -0
  20. package/dist/beta/index.d.cts +45 -0
  21. package/dist/beta/index.d.ts +45 -0
  22. package/dist/beta/index.js +178 -0
  23. package/dist/calibration/index.cjs +339 -0
  24. package/dist/calibration/index.d.cts +53 -0
  25. package/dist/calibration/index.d.ts +53 -0
  26. package/dist/calibration/index.js +333 -0
  27. package/dist/cli/index.cjs +968 -0
  28. package/dist/cli/index.d.cts +1 -0
  29. package/dist/cli/index.d.ts +1 -0
  30. package/dist/cli/index.js +966 -0
  31. package/dist/dimensions/index.cjs +106 -0
  32. package/dist/dimensions/index.d.cts +33 -0
  33. package/dist/dimensions/index.d.ts +33 -0
  34. package/dist/dimensions/index.js +104 -0
  35. package/dist/edge/index.cjs +1141 -0
  36. package/dist/edge/index.d.cts +12 -0
  37. package/dist/edge/index.d.ts +12 -0
  38. package/dist/edge/index.js +1109 -0
  39. package/dist/gate/index.cjs +803 -0
  40. package/dist/gate/index.d.cts +77 -0
  41. package/dist/gate/index.d.ts +77 -0
  42. package/dist/gate/index.js +799 -0
  43. package/dist/hypothesis/index.cjs +268 -0
  44. package/dist/hypothesis/index.d.cts +38 -0
  45. package/dist/hypothesis/index.d.ts +38 -0
  46. package/dist/hypothesis/index.js +266 -0
  47. package/dist/index.cjs +1141 -0
  48. package/dist/index.d.cts +29 -0
  49. package/dist/index.d.ts +29 -0
  50. package/dist/index.js +1109 -0
  51. package/dist/likelihood/index.cjs +137 -0
  52. package/dist/likelihood/index.d.cts +23 -0
  53. package/dist/likelihood/index.d.ts +23 -0
  54. package/dist/likelihood/index.js +132 -0
  55. package/dist/node/index.cjs +1282 -0
  56. package/dist/node/index.d.cts +24 -0
  57. package/dist/node/index.d.ts +24 -0
  58. package/dist/node/index.js +1246 -0
  59. package/dist/policy/index.cjs +88 -0
  60. package/dist/policy/index.d.cts +11 -0
  61. package/dist/policy/index.d.ts +11 -0
  62. package/dist/policy/index.js +85 -0
  63. package/dist/types-bMjn1j4e.d.cts +159 -0
  64. package/dist/types-bMjn1j4e.d.ts +159 -0
  65. package/package.json +142 -0
@@ -0,0 +1,178 @@
1
+ // src/errors.ts
2
+ var BayesOutputGateError = class _BayesOutputGateError extends Error {
3
+ code;
4
+ constructor(code, message) {
5
+ super(message);
6
+ this.name = "BayesOutputGateError";
7
+ this.code = code;
8
+ Object.setPrototypeOf(this, _BayesOutputGateError.prototype);
9
+ }
10
+ };
11
+ function invariant(condition, code, message) {
12
+ if (!condition) {
13
+ throw new BayesOutputGateError(code, message);
14
+ }
15
+ }
16
+
17
+ // src/mathspecial.ts
18
+ var LN_SQRT_2PI = 0.9189385332046728;
19
+ var LANCZOS_G = 7;
20
+ var LANCZOS_COEFFICIENTS = [
21
+ 0.9999999999998099,
22
+ 676.5203681218851,
23
+ -1259.1392167224028,
24
+ 771.3234287776531,
25
+ -176.6150291621406,
26
+ 12.507343278686905,
27
+ -0.13857109526572012,
28
+ 9984369578019572e-21,
29
+ 15056327351493116e-23
30
+ ];
31
+ function lgamma(x) {
32
+ if (!Number.isFinite(x) || x <= 0) {
33
+ throw new BayesOutputGateError(
34
+ "NUMERIC",
35
+ `lgamma requires a positive finite argument, got ${x}`
36
+ );
37
+ }
38
+ const z = x - 1;
39
+ let acc = LANCZOS_COEFFICIENTS[0];
40
+ for (let i = 1; i < LANCZOS_COEFFICIENTS.length; i++) {
41
+ acc += LANCZOS_COEFFICIENTS[i] / (z + i);
42
+ }
43
+ const t = z + LANCZOS_G + 0.5;
44
+ return LN_SQRT_2PI + (z + 0.5) * Math.log(t) - t + Math.log(acc);
45
+ }
46
+ function lbeta(a, b) {
47
+ return lgamma(a) + lgamma(b) - lgamma(a + b);
48
+ }
49
+ function betaLogDensity(x, a, b) {
50
+ if (a <= 0 || b <= 0) {
51
+ throw new BayesOutputGateError(
52
+ "NUMERIC",
53
+ `Beta shape parameters must be positive, got a=${a}, b=${b}`
54
+ );
55
+ }
56
+ if (x < 0 || x > 1 || !Number.isFinite(x)) {
57
+ return Number.NEGATIVE_INFINITY;
58
+ }
59
+ if (x === 0) {
60
+ if (a < 1) return Number.POSITIVE_INFINITY;
61
+ if (a > 1) return Number.NEGATIVE_INFINITY;
62
+ return -lbeta(a, b) + (b - 1) * Math.log(1);
63
+ }
64
+ if (x === 1) {
65
+ if (b < 1) return Number.POSITIVE_INFINITY;
66
+ if (b > 1) return Number.NEGATIVE_INFINITY;
67
+ return -lbeta(a, b);
68
+ }
69
+ return (a - 1) * Math.log(x) + (b - 1) * Math.log1p(-x) - lbeta(a, b);
70
+ }
71
+ function clamp(x, lo, hi) {
72
+ if (x < lo) return lo;
73
+ if (x > hi) return hi;
74
+ return x;
75
+ }
76
+
77
+ // src/beta/index.ts
78
+ var VARIANCE_FLOOR = 1e-9;
79
+ var CONCENTRATION_FLOOR = 1e-3;
80
+ var BetaModel = class _BetaModel {
81
+ priorA;
82
+ priorB;
83
+ count = 0;
84
+ sum = 0;
85
+ sumSquares = 0;
86
+ constructor(options = {}) {
87
+ const prior = options.prior ?? { a: 1, b: 1 };
88
+ invariant(
89
+ prior.a > 0 && prior.b > 0 && Number.isFinite(prior.a) && Number.isFinite(prior.b),
90
+ "INVALID_CONFIG",
91
+ `prior Beta parameters must be positive and finite, got a=${prior.a}, b=${prior.b}`
92
+ );
93
+ this.priorA = prior.a;
94
+ this.priorB = prior.b;
95
+ }
96
+ /** Build a model from labeled scores in one pass. */
97
+ static fromSamples(samples, options = {}) {
98
+ const model = new _BetaModel(options);
99
+ for (const s of samples) {
100
+ model.observe(s);
101
+ }
102
+ return model;
103
+ }
104
+ /** Restore a model from a snapshot. */
105
+ static fromSnapshot(snapshot) {
106
+ const model = new _BetaModel({ prior: snapshot.prior });
107
+ invariant(
108
+ snapshot.count >= 0 && Number.isFinite(snapshot.sum) && Number.isFinite(snapshot.sumSquares),
109
+ "INVALID_SNAPSHOT",
110
+ "snapshot fields must be finite and non-negative"
111
+ );
112
+ model.count = snapshot.count;
113
+ model.sum = snapshot.sum;
114
+ model.sumSquares = snapshot.sumSquares;
115
+ return model;
116
+ }
117
+ /** Number of real observations folded into this model so far. */
118
+ get observations() {
119
+ return this.count;
120
+ }
121
+ /** Fold a single score in [0, 1] into the model, updating its calibration online. */
122
+ observe(score) {
123
+ invariant(
124
+ Number.isFinite(score) && score >= 0 && score <= 1,
125
+ "INVALID_SCORE",
126
+ `score must be a finite number in [0, 1], got ${score}`
127
+ );
128
+ this.count += 1;
129
+ this.sum += score;
130
+ this.sumSquares += score * score;
131
+ return this;
132
+ }
133
+ /**
134
+ * The fitted Beta parameters. The prior is mixed in as `priorA + priorB` pseudo-observations with
135
+ * the prior's own mean and variance, then a and b come from method of moments on the blend, so a
136
+ * cold model returns its prior and a warm model is data-driven.
137
+ */
138
+ params() {
139
+ const priorStrength = this.priorA + this.priorB;
140
+ const priorMean = this.priorA / priorStrength;
141
+ const priorVariance = priorMean * (1 - priorMean) / (priorStrength + 1);
142
+ const totalCount = this.count + priorStrength;
143
+ const totalSum = this.sum + priorStrength * priorMean;
144
+ const totalSumSquares = this.sumSquares + priorStrength * (priorVariance + priorMean * priorMean);
145
+ const mean = clamp(totalSum / totalCount, VARIANCE_FLOOR, 1 - VARIANCE_FLOOR);
146
+ const rawVariance = totalSumSquares / totalCount - mean * mean;
147
+ const maxVariance = mean * (1 - mean);
148
+ const variance = clamp(rawVariance, VARIANCE_FLOOR, maxVariance * (1 - VARIANCE_FLOOR));
149
+ const concentration = Math.max(mean * (1 - mean) / variance - 1, CONCENTRATION_FLOOR);
150
+ const a = Math.max(mean * concentration, CONCENTRATION_FLOOR);
151
+ const b = Math.max((1 - mean) * concentration, CONCENTRATION_FLOOR);
152
+ if (!Number.isFinite(a) || !Number.isFinite(b)) {
153
+ throw new BayesOutputGateError("NUMERIC", "Beta fit produced non-finite parameters");
154
+ }
155
+ return { a, b };
156
+ }
157
+ /** Mean of the fitted Beta. */
158
+ mean() {
159
+ const { a, b } = this.params();
160
+ return a / (a + b);
161
+ }
162
+ /** Log-density of an observed score under the fitted Beta. */
163
+ logDensity(score) {
164
+ const { a, b } = this.params();
165
+ return betaLogDensity(score, a, b);
166
+ }
167
+ /** A serializable snapshot of the current state. */
168
+ snapshot() {
169
+ return {
170
+ count: this.count,
171
+ sum: this.sum,
172
+ sumSquares: this.sumSquares,
173
+ prior: { a: this.priorA, b: this.priorB }
174
+ };
175
+ }
176
+ };
177
+
178
+ export { BetaModel };
@@ -0,0 +1,339 @@
1
+ 'use strict';
2
+
3
+ // src/errors.ts
4
+ var BayesOutputGateError = class _BayesOutputGateError extends Error {
5
+ code;
6
+ constructor(code, message) {
7
+ super(message);
8
+ this.name = "BayesOutputGateError";
9
+ this.code = code;
10
+ Object.setPrototypeOf(this, _BayesOutputGateError.prototype);
11
+ }
12
+ };
13
+ function invariant(condition, code, message) {
14
+ if (!condition) {
15
+ throw new BayesOutputGateError(code, message);
16
+ }
17
+ }
18
+
19
+ // src/dimensions/index.ts
20
+ function dependenceDiagnostic(history, options = {}) {
21
+ const threshold = options.threshold ?? 0.5;
22
+ invariant(
23
+ Number.isFinite(threshold) && threshold >= 0 && threshold <= 1,
24
+ "INVALID_CONFIG",
25
+ `threshold must be in [0, 1], got ${threshold}`
26
+ );
27
+ invariant(
28
+ history.length >= 2,
29
+ "INVALID_OBSERVATION",
30
+ "dependence diagnostic needs at least two score vectors"
31
+ );
32
+ const dimensionSet = /* @__PURE__ */ new Set();
33
+ for (const vector of history) {
34
+ for (const score of vector) {
35
+ dimensionSet.add(score.dimension);
36
+ }
37
+ }
38
+ const dimensions = [...dimensionSet].sort();
39
+ const rows = history.map((vector) => {
40
+ const map = /* @__PURE__ */ new Map();
41
+ for (const score of vector) {
42
+ map.set(score.dimension, score.value);
43
+ }
44
+ return map;
45
+ });
46
+ const pairs = [];
47
+ for (let i = 0; i < dimensions.length; i++) {
48
+ for (let j = i + 1; j < dimensions.length; j++) {
49
+ const a = dimensions[i];
50
+ const b = dimensions[j];
51
+ const xs = [];
52
+ const ys = [];
53
+ for (const row of rows) {
54
+ const x = row.get(a);
55
+ const y = row.get(b);
56
+ if (x !== void 0 && y !== void 0) {
57
+ xs.push(x);
58
+ ys.push(y);
59
+ }
60
+ }
61
+ if (xs.length >= 2) {
62
+ const correlation = pearson(xs, ys);
63
+ if (Number.isFinite(correlation)) {
64
+ pairs.push({ a, b, correlation, samples: xs.length });
65
+ }
66
+ }
67
+ }
68
+ }
69
+ pairs.sort((left, right) => Math.abs(right.correlation) - Math.abs(left.correlation));
70
+ const maxAbsCorrelation = pairs.length > 0 ? Math.abs(pairs[0].correlation) : 0;
71
+ const flagged = pairs.filter((pair) => Math.abs(pair.correlation) >= threshold);
72
+ return {
73
+ dimensions,
74
+ pairs,
75
+ flagged,
76
+ maxAbsCorrelation,
77
+ independenceAssumptionSafe: flagged.length === 0
78
+ };
79
+ }
80
+ function pearson(xs, ys) {
81
+ const n = xs.length;
82
+ let sumX = 0;
83
+ let sumY = 0;
84
+ for (let i = 0; i < n; i++) {
85
+ sumX += xs[i];
86
+ sumY += ys[i];
87
+ }
88
+ const meanX = sumX / n;
89
+ const meanY = sumY / n;
90
+ let covariance = 0;
91
+ let varianceX = 0;
92
+ let varianceY = 0;
93
+ for (let i = 0; i < n; i++) {
94
+ const dx = xs[i] - meanX;
95
+ const dy = ys[i] - meanY;
96
+ covariance += dx * dy;
97
+ varianceX += dx * dx;
98
+ varianceY += dy * dy;
99
+ }
100
+ if (varianceX === 0 || varianceY === 0) {
101
+ return 0;
102
+ }
103
+ return covariance / Math.sqrt(varianceX * varianceY);
104
+ }
105
+
106
+ // src/mathspecial.ts
107
+ var LN_SQRT_2PI = 0.9189385332046728;
108
+ var LANCZOS_G = 7;
109
+ var LANCZOS_COEFFICIENTS = [
110
+ 0.9999999999998099,
111
+ 676.5203681218851,
112
+ -1259.1392167224028,
113
+ 771.3234287776531,
114
+ -176.6150291621406,
115
+ 12.507343278686905,
116
+ -0.13857109526572012,
117
+ 9984369578019572e-21,
118
+ 15056327351493116e-23
119
+ ];
120
+ function lgamma(x) {
121
+ if (!Number.isFinite(x) || x <= 0) {
122
+ throw new BayesOutputGateError(
123
+ "NUMERIC",
124
+ `lgamma requires a positive finite argument, got ${x}`
125
+ );
126
+ }
127
+ const z = x - 1;
128
+ let acc = LANCZOS_COEFFICIENTS[0];
129
+ for (let i = 1; i < LANCZOS_COEFFICIENTS.length; i++) {
130
+ acc += LANCZOS_COEFFICIENTS[i] / (z + i);
131
+ }
132
+ const t = z + LANCZOS_G + 0.5;
133
+ return LN_SQRT_2PI + (z + 0.5) * Math.log(t) - t + Math.log(acc);
134
+ }
135
+ function regularizedIncompleteBeta(x, a, b) {
136
+ if (a <= 0 || b <= 0) {
137
+ throw new BayesOutputGateError(
138
+ "NUMERIC",
139
+ `incomplete beta requires positive shapes, got a=${a}, b=${b}`
140
+ );
141
+ }
142
+ if (x <= 0) return 0;
143
+ if (x >= 1) return 1;
144
+ const logFront = lgamma(a + b) - lgamma(a) - lgamma(b) + a * Math.log(x) + b * Math.log1p(-x);
145
+ const front = Math.exp(logFront);
146
+ if (x < (a + 1) / (a + b + 2)) {
147
+ return front * betaContinuedFraction(x, a, b) / a;
148
+ }
149
+ return 1 - front * betaContinuedFraction(1 - x, b, a) / b;
150
+ }
151
+ function betaCdf(x, a, b) {
152
+ return regularizedIncompleteBeta(x, a, b);
153
+ }
154
+ function betaContinuedFraction(x, a, b) {
155
+ const maxIterations = 200;
156
+ const epsilon = 3e-12;
157
+ const tiny = 1e-300;
158
+ const qab = a + b;
159
+ const qap = a + 1;
160
+ const qam = a - 1;
161
+ let c = 1;
162
+ let d = 1 - qab * x / qap;
163
+ if (Math.abs(d) < tiny) d = tiny;
164
+ d = 1 / d;
165
+ let h = d;
166
+ for (let m = 1; m <= maxIterations; m++) {
167
+ const m2 = 2 * m;
168
+ let aa = m * (b - m) * x / ((qam + m2) * (a + m2));
169
+ d = 1 + aa * d;
170
+ if (Math.abs(d) < tiny) d = tiny;
171
+ c = 1 + aa / c;
172
+ if (Math.abs(c) < tiny) c = tiny;
173
+ d = 1 / d;
174
+ h *= d * c;
175
+ aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2));
176
+ d = 1 + aa * d;
177
+ if (Math.abs(d) < tiny) d = tiny;
178
+ c = 1 + aa / c;
179
+ if (Math.abs(c) < tiny) c = tiny;
180
+ d = 1 / d;
181
+ const delta = d * c;
182
+ h *= delta;
183
+ if (Math.abs(delta - 1) < epsilon) {
184
+ break;
185
+ }
186
+ }
187
+ return h;
188
+ }
189
+
190
+ // src/calibration/index.ts
191
+ function goodnessOfFit(samples, params, alpha = 0.05) {
192
+ invariant(
193
+ samples.length >= 5,
194
+ "INVALID_OBSERVATION",
195
+ "goodnessOfFit needs at least five samples"
196
+ );
197
+ invariant(params.a > 0 && params.b > 0, "INVALID_CONFIG", "Beta parameters must be positive");
198
+ const sorted = [...samples].sort((left, right) => left - right);
199
+ const n = sorted.length;
200
+ let ks = 0;
201
+ for (let i = 0; i < n; i++) {
202
+ const value = sorted[i];
203
+ invariant(
204
+ Number.isFinite(value) && value >= 0 && value <= 1,
205
+ "INVALID_SCORE",
206
+ `samples must be finite and in [0, 1], got ${value}`
207
+ );
208
+ const cdf = betaCdf(value, params.a, params.b);
209
+ const upper = (i + 1) / n - cdf;
210
+ const lower = cdf - i / n;
211
+ ks = Math.max(ks, upper, lower);
212
+ }
213
+ const coefficient = alpha <= 0.01 ? 1.628 : alpha <= 0.05 ? 1.358 : 1.224;
214
+ const criticalValue = coefficient / Math.sqrt(n);
215
+ return { ksStatistic: ks, criticalValue, adequate: ks <= criticalValue, samples: n };
216
+ }
217
+ function brierScore(predictions) {
218
+ invariant(
219
+ predictions.length > 0,
220
+ "INVALID_OBSERVATION",
221
+ "brierScore needs at least one prediction"
222
+ );
223
+ let sum = 0;
224
+ for (const prediction of predictions) {
225
+ validatePrediction(prediction);
226
+ sum += (prediction.probability - prediction.outcome) ** 2;
227
+ }
228
+ return sum / predictions.length;
229
+ }
230
+ function reliability(predictions, bins = 10) {
231
+ invariant(
232
+ Number.isInteger(bins) && bins >= 1,
233
+ "INVALID_CONFIG",
234
+ `bins must be a positive integer, got ${bins}`
235
+ );
236
+ invariant(
237
+ predictions.length > 0,
238
+ "INVALID_OBSERVATION",
239
+ "reliability needs at least one prediction"
240
+ );
241
+ const sums = new Array(bins).fill(0);
242
+ const hits = new Array(bins).fill(0);
243
+ const counts = new Array(bins).fill(0);
244
+ for (const prediction of predictions) {
245
+ validatePrediction(prediction);
246
+ const index = Math.min(bins - 1, Math.floor(prediction.probability * bins));
247
+ sums[index] = sums[index] + prediction.probability;
248
+ hits[index] = hits[index] + prediction.outcome;
249
+ counts[index] = counts[index] + 1;
250
+ }
251
+ const out = [];
252
+ for (let b = 0; b < bins; b++) {
253
+ const count = counts[b];
254
+ out.push({
255
+ lower: b / bins,
256
+ upper: (b + 1) / bins,
257
+ count,
258
+ meanPredicted: count > 0 ? sums[b] / count : 0,
259
+ empiricalRate: count > 0 ? hits[b] / count : 0
260
+ });
261
+ }
262
+ return out;
263
+ }
264
+ function expectedCalibrationError(predictions, bins = 10) {
265
+ const diagram = reliability(predictions, bins);
266
+ const total = predictions.length;
267
+ let ece = 0;
268
+ for (const bin of diagram) {
269
+ if (bin.count > 0) {
270
+ ece += bin.count / total * Math.abs(bin.empiricalRate - bin.meanPredicted);
271
+ }
272
+ }
273
+ return ece;
274
+ }
275
+ function scoresForDimension(history, dimension, label) {
276
+ const out = [];
277
+ for (const observation of history) {
278
+ if (observation.label !== label) {
279
+ continue;
280
+ }
281
+ for (const score of observation.scores) {
282
+ if (score.dimension === dimension) {
283
+ out.push(score.value);
284
+ }
285
+ }
286
+ }
287
+ return out;
288
+ }
289
+ function assessAssumptions(history, models, options = {}) {
290
+ const alpha = options.alpha ?? 0.05;
291
+ const minSamples = options.minSamples ?? 5;
292
+ const inadequateDimensions = [];
293
+ for (const model of models) {
294
+ const high = scoresForDimension(history, model.dimension, "high");
295
+ const low = scoresForDimension(history, model.dimension, "low");
296
+ const highBad = high.length >= minSamples && !goodnessOfFit(high, model.high, alpha).adequate;
297
+ const lowBad = low.length >= minSamples && !goodnessOfFit(low, model.low, alpha).adequate;
298
+ if (highBad || lowBad) {
299
+ inadequateDimensions.push(model.dimension);
300
+ }
301
+ }
302
+ const vectors = history.map((observation) => observation.scores);
303
+ let independenceAssumptionSafe = true;
304
+ const dependentPairs = [];
305
+ if (vectors.length >= 2) {
306
+ const diagnostic = dependenceDiagnostic(
307
+ vectors,
308
+ options.dependenceThreshold !== void 0 ? { threshold: options.dependenceThreshold } : {}
309
+ );
310
+ independenceAssumptionSafe = diagnostic.independenceAssumptionSafe;
311
+ for (const pair of diagnostic.flagged) {
312
+ dependentPairs.push(`${pair.a}~${pair.b}`);
313
+ }
314
+ }
315
+ return {
316
+ goodnessOfFitAdequate: inadequateDimensions.length === 0,
317
+ independenceAssumptionSafe,
318
+ inadequateDimensions,
319
+ dependentPairs
320
+ };
321
+ }
322
+ function validatePrediction(prediction) {
323
+ invariant(
324
+ Number.isFinite(prediction.probability) && prediction.probability >= 0 && prediction.probability <= 1,
325
+ "INVALID_SCORE",
326
+ `prediction probability must be in [0, 1], got ${prediction.probability}`
327
+ );
328
+ invariant(
329
+ prediction.outcome === 0 || prediction.outcome === 1,
330
+ "INVALID_OBSERVATION",
331
+ `prediction outcome must be 0 or 1, got ${prediction.outcome}`
332
+ );
333
+ }
334
+
335
+ exports.assessAssumptions = assessAssumptions;
336
+ exports.brierScore = brierScore;
337
+ exports.expectedCalibrationError = expectedCalibrationError;
338
+ exports.goodnessOfFit = goodnessOfFit;
339
+ exports.reliability = reliability;
@@ -0,0 +1,53 @@
1
+ import { L as LabeledObservation, e as DimensionModel, A as AssumptionReport, b as BetaParams } from '../types-bMjn1j4e.cjs';
2
+
3
+ /** A posterior probability of high quality paired with the realized binary outcome (1 high, 0 low). */
4
+ interface Prediction {
5
+ readonly probability: number;
6
+ readonly outcome: number;
7
+ }
8
+ /** The Kolmogorov-Smirnov goodness-of-fit of scores against a fitted Beta. */
9
+ interface GoodnessOfFit {
10
+ readonly ksStatistic: number;
11
+ readonly criticalValue: number;
12
+ /** True when the KS statistic is within the critical value, so the Beta assumption is reasonable. */
13
+ readonly adequate: boolean;
14
+ readonly samples: number;
15
+ }
16
+ /**
17
+ * Goodness-of-fit of observed scores against a fitted Beta, by the Kolmogorov-Smirnov statistic
18
+ * (the largest gap between the empirical CDF and the Beta CDF). When `adequate` is false, the scores
19
+ * are not Beta-distributed and the density-ratio likelihood should not be trusted for those scores.
20
+ */
21
+ declare function goodnessOfFit(samples: readonly number[], params: BetaParams, alpha?: number): GoodnessOfFit;
22
+ /** Mean squared error between predicted probabilities and realized binary outcomes. Lower is better. */
23
+ declare function brierScore(predictions: readonly Prediction[]): number;
24
+ /** One bin of a reliability diagram. */
25
+ interface ReliabilityBin {
26
+ readonly lower: number;
27
+ readonly upper: number;
28
+ readonly count: number;
29
+ readonly meanPredicted: number;
30
+ readonly empiricalRate: number;
31
+ }
32
+ /** Bin predictions by predicted probability into a reliability diagram. */
33
+ declare function reliability(predictions: readonly Prediction[], bins?: number): ReliabilityBin[];
34
+ /** Expected calibration error: the count-weighted mean gap between predicted and empirical rates. */
35
+ declare function expectedCalibrationError(predictions: readonly Prediction[], bins?: number): number;
36
+ /** Options for {@link assessAssumptions}. */
37
+ interface AssessOptions {
38
+ /** Significance level for the per-dimension goodness-of-fit test. Defaults to 0.05. */
39
+ readonly alpha?: number;
40
+ /** Absolute-correlation threshold above which a dimension pair is dependent. Defaults to 0.5. */
41
+ readonly dependenceThreshold?: number;
42
+ /** Minimum samples per hypothesis before a dimension's fit is tested. Defaults to 5. */
43
+ readonly minSamples?: number;
44
+ }
45
+ /**
46
+ * Assess whether the gate's modeling assumptions hold for a labeled history under a set of dimension
47
+ * models: the per-dimension Beta goodness-of-fit for both hypotheses, and the conditional-independence
48
+ * of the dimensions. Produces an {@link AssumptionReport} a gate can consult to escalate instead of
49
+ * trusting a Bayes Factor computed from a misspecified model.
50
+ */
51
+ declare function assessAssumptions(history: readonly LabeledObservation[], models: readonly DimensionModel[], options?: AssessOptions): AssumptionReport;
52
+
53
+ export { type AssessOptions, type GoodnessOfFit, type Prediction, type ReliabilityBin, assessAssumptions, brierScore, expectedCalibrationError, goodnessOfFit, reliability };
@@ -0,0 +1,53 @@
1
+ import { L as LabeledObservation, e as DimensionModel, A as AssumptionReport, b as BetaParams } from '../types-bMjn1j4e.js';
2
+
3
+ /** A posterior probability of high quality paired with the realized binary outcome (1 high, 0 low). */
4
+ interface Prediction {
5
+ readonly probability: number;
6
+ readonly outcome: number;
7
+ }
8
+ /** The Kolmogorov-Smirnov goodness-of-fit of scores against a fitted Beta. */
9
+ interface GoodnessOfFit {
10
+ readonly ksStatistic: number;
11
+ readonly criticalValue: number;
12
+ /** True when the KS statistic is within the critical value, so the Beta assumption is reasonable. */
13
+ readonly adequate: boolean;
14
+ readonly samples: number;
15
+ }
16
+ /**
17
+ * Goodness-of-fit of observed scores against a fitted Beta, by the Kolmogorov-Smirnov statistic
18
+ * (the largest gap between the empirical CDF and the Beta CDF). When `adequate` is false, the scores
19
+ * are not Beta-distributed and the density-ratio likelihood should not be trusted for those scores.
20
+ */
21
+ declare function goodnessOfFit(samples: readonly number[], params: BetaParams, alpha?: number): GoodnessOfFit;
22
+ /** Mean squared error between predicted probabilities and realized binary outcomes. Lower is better. */
23
+ declare function brierScore(predictions: readonly Prediction[]): number;
24
+ /** One bin of a reliability diagram. */
25
+ interface ReliabilityBin {
26
+ readonly lower: number;
27
+ readonly upper: number;
28
+ readonly count: number;
29
+ readonly meanPredicted: number;
30
+ readonly empiricalRate: number;
31
+ }
32
+ /** Bin predictions by predicted probability into a reliability diagram. */
33
+ declare function reliability(predictions: readonly Prediction[], bins?: number): ReliabilityBin[];
34
+ /** Expected calibration error: the count-weighted mean gap between predicted and empirical rates. */
35
+ declare function expectedCalibrationError(predictions: readonly Prediction[], bins?: number): number;
36
+ /** Options for {@link assessAssumptions}. */
37
+ interface AssessOptions {
38
+ /** Significance level for the per-dimension goodness-of-fit test. Defaults to 0.05. */
39
+ readonly alpha?: number;
40
+ /** Absolute-correlation threshold above which a dimension pair is dependent. Defaults to 0.5. */
41
+ readonly dependenceThreshold?: number;
42
+ /** Minimum samples per hypothesis before a dimension's fit is tested. Defaults to 5. */
43
+ readonly minSamples?: number;
44
+ }
45
+ /**
46
+ * Assess whether the gate's modeling assumptions hold for a labeled history under a set of dimension
47
+ * models: the per-dimension Beta goodness-of-fit for both hypotheses, and the conditional-independence
48
+ * of the dimensions. Produces an {@link AssumptionReport} a gate can consult to escalate instead of
49
+ * trusting a Bayes Factor computed from a misspecified model.
50
+ */
51
+ declare function assessAssumptions(history: readonly LabeledObservation[], models: readonly DimensionModel[], options?: AssessOptions): AssumptionReport;
52
+
53
+ export { type AssessOptions, type GoodnessOfFit, type Prediction, type ReliabilityBin, assessAssumptions, brierScore, expectedCalibrationError, goodnessOfFit, reliability };