@qmilab/lodestar-core 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +12 -0
- package/dist/index.js.map +1 -1
- package/dist/schemas/action.d.ts +31 -13
- package/dist/schemas/action.d.ts.map +1 -1
- package/dist/schemas/action.js +20 -1
- package/dist/schemas/action.js.map +1 -1
- package/dist/schemas/approval.d.ts +271 -0
- package/dist/schemas/approval.d.ts.map +1 -0
- package/dist/schemas/approval.js +119 -0
- package/dist/schemas/approval.js.map +1 -0
- package/dist/schemas/belief.d.ts.map +1 -1
- package/dist/schemas/belief.js +7 -1
- package/dist/schemas/belief.js.map +1 -1
- package/dist/schemas/calibration.d.ts +977 -0
- package/dist/schemas/calibration.d.ts.map +1 -0
- package/dist/schemas/calibration.js +187 -0
- package/dist/schemas/calibration.js.map +1 -0
- package/dist/schemas/claim.d.ts.map +1 -1
- package/dist/schemas/claim.js +4 -2
- package/dist/schemas/claim.js.map +1 -1
- package/dist/schemas/common.d.ts.map +1 -1
- package/dist/schemas/common.js +11 -5
- package/dist/schemas/common.js.map +1 -1
- package/dist/schemas/policy.d.ts +768 -0
- package/dist/schemas/policy.d.ts.map +1 -0
- package/dist/schemas/policy.js +200 -0
- package/dist/schemas/policy.js.map +1 -0
- package/dist/schemas/probe-pack.d.ts +152 -0
- package/dist/schemas/probe-pack.d.ts.map +1 -0
- package/dist/schemas/probe-pack.js +140 -0
- package/dist/schemas/probe-pack.js.map +1 -0
- package/dist/schemas/reflection.d.ts +405 -0
- package/dist/schemas/reflection.d.ts.map +1 -0
- package/dist/schemas/reflection.js +154 -0
- package/dist/schemas/reflection.js.map +1 -0
- package/dist/schemas/revision.d.ts.map +1 -1
- package/dist/schemas/revision.js.map +1 -1
- package/dist/schemas/sentinel.d.ts +134 -0
- package/dist/schemas/sentinel.d.ts.map +1 -0
- package/dist/schemas/sentinel.js +97 -0
- package/dist/schemas/sentinel.js.map +1 -0
- package/package.json +2 -7
- package/src/index.ts +18 -0
- package/src/schemas/action.ts +20 -1
- package/src/schemas/approval.ts +136 -0
- package/src/schemas/belief.ts +7 -1
- package/src/schemas/calibration.ts +212 -0
- package/src/schemas/claim.ts +15 -8
- package/src/schemas/common.ts +16 -10
- package/src/schemas/policy.ts +231 -0
- package/src/schemas/probe-pack.ts +169 -0
- package/src/schemas/reflection.ts +166 -0
- package/src/schemas/revision.ts +7 -5
- package/src/schemas/sentinel.ts +104 -0
|
@@ -0,0 +1,977 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
/**
|
|
3
|
+
* Calibration wire format.
|
|
4
|
+
*
|
|
5
|
+
* These schemas describe what the harness `Calibrator` measures —
|
|
6
|
+
* per-class ECE / Brier / calibration-gap tables and the flagged classes.
|
|
7
|
+
* They lived in `@qmilab/lodestar-harness` while the calibrator was a
|
|
8
|
+
* return-value-only surface; they **graduated to `@qmilab/lodestar-core`**
|
|
9
|
+
* when the durable `calibration.computed@1` event landed (ADR-0011), so
|
|
10
|
+
* the event payload can embed the report (core is the dependency root and
|
|
11
|
+
* cannot import the harness). The harness re-exports them unchanged, so
|
|
12
|
+
* harness consumers are unaffected.
|
|
13
|
+
*
|
|
14
|
+
* The Calibrator stays measure-only: it returns a {@link CalibrationReport}
|
|
15
|
+
* and never writes. Recording a report as a `calibration.computed@1` event
|
|
16
|
+
* is a separate publish step (`lodestar harness calibrate` / the harness
|
|
17
|
+
* `eventLogCalibrationSink`), the same measure/record split the sentinels
|
|
18
|
+
* follow (a `Sentinel` returns findings; `eventLogAlertSink` writes them).
|
|
19
|
+
*
|
|
20
|
+
* Everything here is validated at the calibrator and event-sink boundaries,
|
|
21
|
+
* the same discipline the probe-run observation and sentinel-alert builders
|
|
22
|
+
* hold.
|
|
23
|
+
*/
|
|
24
|
+
/**
|
|
25
|
+
* Which signal in the event log produced a calibration sample.
|
|
26
|
+
* - `action_outcome`: a belief → decision → action chain where the
|
|
27
|
+
* action's realised result (terminal phase or an explicit Outcome) is
|
|
28
|
+
* the label.
|
|
29
|
+
* - `truth_status`: the firewall transitioned the belief's `truth_status`
|
|
30
|
+
* to `supported` / `contradicted` — the world adjudicating the belief.
|
|
31
|
+
*/
|
|
32
|
+
export declare const SampleSourceSchema: z.ZodEnum<["action_outcome", "truth_status"]>;
|
|
33
|
+
export type SampleSource = z.infer<typeof SampleSourceSchema>;
|
|
34
|
+
/** The scored metrics for a set of samples (one class, or the pool). */
|
|
35
|
+
export declare const CalibrationMetricsSchema: z.ZodObject<{
|
|
36
|
+
n: z.ZodNumber;
|
|
37
|
+
/** mean of stated confidence */
|
|
38
|
+
mean_confidence: z.ZodNumber;
|
|
39
|
+
/** realised positive rate, mean(correct) */
|
|
40
|
+
empirical_accuracy: z.ZodNumber;
|
|
41
|
+
/** mean((p - y)^2); 0 is perfect, lower is better */
|
|
42
|
+
brier_score: z.ZodNumber;
|
|
43
|
+
/** expected calibration error over equal-width confidence bins */
|
|
44
|
+
ece: z.ZodNumber;
|
|
45
|
+
/** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
|
|
46
|
+
calibration_gap: z.ZodNumber;
|
|
47
|
+
overconfident: z.ZodBoolean;
|
|
48
|
+
}, "strip", z.ZodTypeAny, {
|
|
49
|
+
n: number;
|
|
50
|
+
mean_confidence: number;
|
|
51
|
+
empirical_accuracy: number;
|
|
52
|
+
brier_score: number;
|
|
53
|
+
ece: number;
|
|
54
|
+
calibration_gap: number;
|
|
55
|
+
overconfident: boolean;
|
|
56
|
+
}, {
|
|
57
|
+
n: number;
|
|
58
|
+
mean_confidence: number;
|
|
59
|
+
empirical_accuracy: number;
|
|
60
|
+
brier_score: number;
|
|
61
|
+
ece: number;
|
|
62
|
+
calibration_gap: number;
|
|
63
|
+
overconfident: boolean;
|
|
64
|
+
}>;
|
|
65
|
+
export type CalibrationMetrics = z.infer<typeof CalibrationMetricsSchema>;
|
|
66
|
+
/** One non-empty bin of a reliability diagram. */
|
|
67
|
+
export declare const ReliabilityBinSchema: z.ZodObject<{
|
|
68
|
+
lower: z.ZodNumber;
|
|
69
|
+
upper: z.ZodNumber;
|
|
70
|
+
n: z.ZodNumber;
|
|
71
|
+
mean_confidence: z.ZodNumber;
|
|
72
|
+
empirical_accuracy: z.ZodNumber;
|
|
73
|
+
}, "strip", z.ZodTypeAny, {
|
|
74
|
+
n: number;
|
|
75
|
+
mean_confidence: number;
|
|
76
|
+
empirical_accuracy: number;
|
|
77
|
+
lower: number;
|
|
78
|
+
upper: number;
|
|
79
|
+
}, {
|
|
80
|
+
n: number;
|
|
81
|
+
mean_confidence: number;
|
|
82
|
+
empirical_accuracy: number;
|
|
83
|
+
lower: number;
|
|
84
|
+
upper: number;
|
|
85
|
+
}>;
|
|
86
|
+
export type ReliabilityBin = z.infer<typeof ReliabilityBinSchema>;
|
|
87
|
+
/** Per-class result: metrics, the reliability bins, and the verdict. */
|
|
88
|
+
export declare const CalibrationClassResultSchema: z.ZodObject<{
|
|
89
|
+
calibration_class: z.ZodString;
|
|
90
|
+
metrics: z.ZodObject<{
|
|
91
|
+
n: z.ZodNumber;
|
|
92
|
+
/** mean of stated confidence */
|
|
93
|
+
mean_confidence: z.ZodNumber;
|
|
94
|
+
/** realised positive rate, mean(correct) */
|
|
95
|
+
empirical_accuracy: z.ZodNumber;
|
|
96
|
+
/** mean((p - y)^2); 0 is perfect, lower is better */
|
|
97
|
+
brier_score: z.ZodNumber;
|
|
98
|
+
/** expected calibration error over equal-width confidence bins */
|
|
99
|
+
ece: z.ZodNumber;
|
|
100
|
+
/** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
|
|
101
|
+
calibration_gap: z.ZodNumber;
|
|
102
|
+
overconfident: z.ZodBoolean;
|
|
103
|
+
}, "strip", z.ZodTypeAny, {
|
|
104
|
+
n: number;
|
|
105
|
+
mean_confidence: number;
|
|
106
|
+
empirical_accuracy: number;
|
|
107
|
+
brier_score: number;
|
|
108
|
+
ece: number;
|
|
109
|
+
calibration_gap: number;
|
|
110
|
+
overconfident: boolean;
|
|
111
|
+
}, {
|
|
112
|
+
n: number;
|
|
113
|
+
mean_confidence: number;
|
|
114
|
+
empirical_accuracy: number;
|
|
115
|
+
brier_score: number;
|
|
116
|
+
ece: number;
|
|
117
|
+
calibration_gap: number;
|
|
118
|
+
overconfident: boolean;
|
|
119
|
+
}>;
|
|
120
|
+
/** non-empty bins only, ascending by `lower` */
|
|
121
|
+
reliability_bins: z.ZodArray<z.ZodObject<{
|
|
122
|
+
lower: z.ZodNumber;
|
|
123
|
+
upper: z.ZodNumber;
|
|
124
|
+
n: z.ZodNumber;
|
|
125
|
+
mean_confidence: z.ZodNumber;
|
|
126
|
+
empirical_accuracy: z.ZodNumber;
|
|
127
|
+
}, "strip", z.ZodTypeAny, {
|
|
128
|
+
n: number;
|
|
129
|
+
mean_confidence: number;
|
|
130
|
+
empirical_accuracy: number;
|
|
131
|
+
lower: number;
|
|
132
|
+
upper: number;
|
|
133
|
+
}, {
|
|
134
|
+
n: number;
|
|
135
|
+
mean_confidence: number;
|
|
136
|
+
empirical_accuracy: number;
|
|
137
|
+
lower: number;
|
|
138
|
+
upper: number;
|
|
139
|
+
}>, "many">;
|
|
140
|
+
flagged: z.ZodBoolean;
|
|
141
|
+
/** human-legible reason when flagged; `null` when not */
|
|
142
|
+
flag_reason: z.ZodNullable<z.ZodString>;
|
|
143
|
+
}, "strip", z.ZodTypeAny, {
|
|
144
|
+
calibration_class: string;
|
|
145
|
+
metrics: {
|
|
146
|
+
n: number;
|
|
147
|
+
mean_confidence: number;
|
|
148
|
+
empirical_accuracy: number;
|
|
149
|
+
brier_score: number;
|
|
150
|
+
ece: number;
|
|
151
|
+
calibration_gap: number;
|
|
152
|
+
overconfident: boolean;
|
|
153
|
+
};
|
|
154
|
+
reliability_bins: {
|
|
155
|
+
n: number;
|
|
156
|
+
mean_confidence: number;
|
|
157
|
+
empirical_accuracy: number;
|
|
158
|
+
lower: number;
|
|
159
|
+
upper: number;
|
|
160
|
+
}[];
|
|
161
|
+
flagged: boolean;
|
|
162
|
+
flag_reason: string | null;
|
|
163
|
+
}, {
|
|
164
|
+
calibration_class: string;
|
|
165
|
+
metrics: {
|
|
166
|
+
n: number;
|
|
167
|
+
mean_confidence: number;
|
|
168
|
+
empirical_accuracy: number;
|
|
169
|
+
brier_score: number;
|
|
170
|
+
ece: number;
|
|
171
|
+
calibration_gap: number;
|
|
172
|
+
overconfident: boolean;
|
|
173
|
+
};
|
|
174
|
+
reliability_bins: {
|
|
175
|
+
n: number;
|
|
176
|
+
mean_confidence: number;
|
|
177
|
+
empirical_accuracy: number;
|
|
178
|
+
lower: number;
|
|
179
|
+
upper: number;
|
|
180
|
+
}[];
|
|
181
|
+
flagged: boolean;
|
|
182
|
+
flag_reason: string | null;
|
|
183
|
+
}>;
|
|
184
|
+
export type CalibrationClassResult = z.infer<typeof CalibrationClassResultSchema>;
|
|
185
|
+
/** The thresholds and toggles actually applied, echoed for reproducibility. */
|
|
186
|
+
export declare const ResolvedCalibratorConfigSchema: z.ZodObject<{
|
|
187
|
+
bins: z.ZodNumber;
|
|
188
|
+
min_samples: z.ZodNumber;
|
|
189
|
+
ece_threshold: z.ZodNumber;
|
|
190
|
+
gap_threshold: z.ZodNumber;
|
|
191
|
+
outcome_sources: z.ZodArray<z.ZodEnum<["action_outcome", "truth_status"]>, "many">;
|
|
192
|
+
include_synthetic_authority: z.ZodBoolean;
|
|
193
|
+
}, "strip", z.ZodTypeAny, {
|
|
194
|
+
bins: number;
|
|
195
|
+
min_samples: number;
|
|
196
|
+
ece_threshold: number;
|
|
197
|
+
gap_threshold: number;
|
|
198
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
199
|
+
include_synthetic_authority: boolean;
|
|
200
|
+
}, {
|
|
201
|
+
bins: number;
|
|
202
|
+
min_samples: number;
|
|
203
|
+
ece_threshold: number;
|
|
204
|
+
gap_threshold: number;
|
|
205
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
206
|
+
include_synthetic_authority: boolean;
|
|
207
|
+
}>;
|
|
208
|
+
export type ResolvedCalibratorConfig = z.infer<typeof ResolvedCalibratorConfigSchema>;
|
|
209
|
+
/**
|
|
210
|
+
* The calibrator's output: per-class tables, a pooled `overall` block,
|
|
211
|
+
* the flagged class names, and the config that produced it. A pure
|
|
212
|
+
* function of `(events, config)` — no clock, no scope inference — so it
|
|
213
|
+
* is deterministic and testable, and re-running it over the same event
|
|
214
|
+
* window reproduces the report (the property the `cursor` on
|
|
215
|
+
* {@link CalibrationComputedPayloadSchema} makes auditable).
|
|
216
|
+
*/
|
|
217
|
+
export declare const CalibrationReportSchema: z.ZodObject<{
|
|
218
|
+
/** total samples resolved and included (after exclusions) */
|
|
219
|
+
sample_count: z.ZodNumber;
|
|
220
|
+
classes: z.ZodArray<z.ZodObject<{
|
|
221
|
+
calibration_class: z.ZodString;
|
|
222
|
+
metrics: z.ZodObject<{
|
|
223
|
+
n: z.ZodNumber;
|
|
224
|
+
/** mean of stated confidence */
|
|
225
|
+
mean_confidence: z.ZodNumber;
|
|
226
|
+
/** realised positive rate, mean(correct) */
|
|
227
|
+
empirical_accuracy: z.ZodNumber;
|
|
228
|
+
/** mean((p - y)^2); 0 is perfect, lower is better */
|
|
229
|
+
brier_score: z.ZodNumber;
|
|
230
|
+
/** expected calibration error over equal-width confidence bins */
|
|
231
|
+
ece: z.ZodNumber;
|
|
232
|
+
/** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
|
|
233
|
+
calibration_gap: z.ZodNumber;
|
|
234
|
+
overconfident: z.ZodBoolean;
|
|
235
|
+
}, "strip", z.ZodTypeAny, {
|
|
236
|
+
n: number;
|
|
237
|
+
mean_confidence: number;
|
|
238
|
+
empirical_accuracy: number;
|
|
239
|
+
brier_score: number;
|
|
240
|
+
ece: number;
|
|
241
|
+
calibration_gap: number;
|
|
242
|
+
overconfident: boolean;
|
|
243
|
+
}, {
|
|
244
|
+
n: number;
|
|
245
|
+
mean_confidence: number;
|
|
246
|
+
empirical_accuracy: number;
|
|
247
|
+
brier_score: number;
|
|
248
|
+
ece: number;
|
|
249
|
+
calibration_gap: number;
|
|
250
|
+
overconfident: boolean;
|
|
251
|
+
}>;
|
|
252
|
+
/** non-empty bins only, ascending by `lower` */
|
|
253
|
+
reliability_bins: z.ZodArray<z.ZodObject<{
|
|
254
|
+
lower: z.ZodNumber;
|
|
255
|
+
upper: z.ZodNumber;
|
|
256
|
+
n: z.ZodNumber;
|
|
257
|
+
mean_confidence: z.ZodNumber;
|
|
258
|
+
empirical_accuracy: z.ZodNumber;
|
|
259
|
+
}, "strip", z.ZodTypeAny, {
|
|
260
|
+
n: number;
|
|
261
|
+
mean_confidence: number;
|
|
262
|
+
empirical_accuracy: number;
|
|
263
|
+
lower: number;
|
|
264
|
+
upper: number;
|
|
265
|
+
}, {
|
|
266
|
+
n: number;
|
|
267
|
+
mean_confidence: number;
|
|
268
|
+
empirical_accuracy: number;
|
|
269
|
+
lower: number;
|
|
270
|
+
upper: number;
|
|
271
|
+
}>, "many">;
|
|
272
|
+
flagged: z.ZodBoolean;
|
|
273
|
+
/** human-legible reason when flagged; `null` when not */
|
|
274
|
+
flag_reason: z.ZodNullable<z.ZodString>;
|
|
275
|
+
}, "strip", z.ZodTypeAny, {
|
|
276
|
+
calibration_class: string;
|
|
277
|
+
metrics: {
|
|
278
|
+
n: number;
|
|
279
|
+
mean_confidence: number;
|
|
280
|
+
empirical_accuracy: number;
|
|
281
|
+
brier_score: number;
|
|
282
|
+
ece: number;
|
|
283
|
+
calibration_gap: number;
|
|
284
|
+
overconfident: boolean;
|
|
285
|
+
};
|
|
286
|
+
reliability_bins: {
|
|
287
|
+
n: number;
|
|
288
|
+
mean_confidence: number;
|
|
289
|
+
empirical_accuracy: number;
|
|
290
|
+
lower: number;
|
|
291
|
+
upper: number;
|
|
292
|
+
}[];
|
|
293
|
+
flagged: boolean;
|
|
294
|
+
flag_reason: string | null;
|
|
295
|
+
}, {
|
|
296
|
+
calibration_class: string;
|
|
297
|
+
metrics: {
|
|
298
|
+
n: number;
|
|
299
|
+
mean_confidence: number;
|
|
300
|
+
empirical_accuracy: number;
|
|
301
|
+
brier_score: number;
|
|
302
|
+
ece: number;
|
|
303
|
+
calibration_gap: number;
|
|
304
|
+
overconfident: boolean;
|
|
305
|
+
};
|
|
306
|
+
reliability_bins: {
|
|
307
|
+
n: number;
|
|
308
|
+
mean_confidence: number;
|
|
309
|
+
empirical_accuracy: number;
|
|
310
|
+
lower: number;
|
|
311
|
+
upper: number;
|
|
312
|
+
}[];
|
|
313
|
+
flagged: boolean;
|
|
314
|
+
flag_reason: string | null;
|
|
315
|
+
}>, "many">;
|
|
316
|
+
overall: z.ZodObject<{
|
|
317
|
+
n: z.ZodNumber;
|
|
318
|
+
/** mean of stated confidence */
|
|
319
|
+
mean_confidence: z.ZodNumber;
|
|
320
|
+
/** realised positive rate, mean(correct) */
|
|
321
|
+
empirical_accuracy: z.ZodNumber;
|
|
322
|
+
/** mean((p - y)^2); 0 is perfect, lower is better */
|
|
323
|
+
brier_score: z.ZodNumber;
|
|
324
|
+
/** expected calibration error over equal-width confidence bins */
|
|
325
|
+
ece: z.ZodNumber;
|
|
326
|
+
/** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
|
|
327
|
+
calibration_gap: z.ZodNumber;
|
|
328
|
+
overconfident: z.ZodBoolean;
|
|
329
|
+
}, "strip", z.ZodTypeAny, {
|
|
330
|
+
n: number;
|
|
331
|
+
mean_confidence: number;
|
|
332
|
+
empirical_accuracy: number;
|
|
333
|
+
brier_score: number;
|
|
334
|
+
ece: number;
|
|
335
|
+
calibration_gap: number;
|
|
336
|
+
overconfident: boolean;
|
|
337
|
+
}, {
|
|
338
|
+
n: number;
|
|
339
|
+
mean_confidence: number;
|
|
340
|
+
empirical_accuracy: number;
|
|
341
|
+
brier_score: number;
|
|
342
|
+
ece: number;
|
|
343
|
+
calibration_gap: number;
|
|
344
|
+
overconfident: boolean;
|
|
345
|
+
}>;
|
|
346
|
+
flagged_classes: z.ZodArray<z.ZodString, "many">;
|
|
347
|
+
config: z.ZodObject<{
|
|
348
|
+
bins: z.ZodNumber;
|
|
349
|
+
min_samples: z.ZodNumber;
|
|
350
|
+
ece_threshold: z.ZodNumber;
|
|
351
|
+
gap_threshold: z.ZodNumber;
|
|
352
|
+
outcome_sources: z.ZodArray<z.ZodEnum<["action_outcome", "truth_status"]>, "many">;
|
|
353
|
+
include_synthetic_authority: z.ZodBoolean;
|
|
354
|
+
}, "strip", z.ZodTypeAny, {
|
|
355
|
+
bins: number;
|
|
356
|
+
min_samples: number;
|
|
357
|
+
ece_threshold: number;
|
|
358
|
+
gap_threshold: number;
|
|
359
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
360
|
+
include_synthetic_authority: boolean;
|
|
361
|
+
}, {
|
|
362
|
+
bins: number;
|
|
363
|
+
min_samples: number;
|
|
364
|
+
ece_threshold: number;
|
|
365
|
+
gap_threshold: number;
|
|
366
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
367
|
+
include_synthetic_authority: boolean;
|
|
368
|
+
}>;
|
|
369
|
+
}, "strip", z.ZodTypeAny, {
|
|
370
|
+
sample_count: number;
|
|
371
|
+
classes: {
|
|
372
|
+
calibration_class: string;
|
|
373
|
+
metrics: {
|
|
374
|
+
n: number;
|
|
375
|
+
mean_confidence: number;
|
|
376
|
+
empirical_accuracy: number;
|
|
377
|
+
brier_score: number;
|
|
378
|
+
ece: number;
|
|
379
|
+
calibration_gap: number;
|
|
380
|
+
overconfident: boolean;
|
|
381
|
+
};
|
|
382
|
+
reliability_bins: {
|
|
383
|
+
n: number;
|
|
384
|
+
mean_confidence: number;
|
|
385
|
+
empirical_accuracy: number;
|
|
386
|
+
lower: number;
|
|
387
|
+
upper: number;
|
|
388
|
+
}[];
|
|
389
|
+
flagged: boolean;
|
|
390
|
+
flag_reason: string | null;
|
|
391
|
+
}[];
|
|
392
|
+
overall: {
|
|
393
|
+
n: number;
|
|
394
|
+
mean_confidence: number;
|
|
395
|
+
empirical_accuracy: number;
|
|
396
|
+
brier_score: number;
|
|
397
|
+
ece: number;
|
|
398
|
+
calibration_gap: number;
|
|
399
|
+
overconfident: boolean;
|
|
400
|
+
};
|
|
401
|
+
flagged_classes: string[];
|
|
402
|
+
config: {
|
|
403
|
+
bins: number;
|
|
404
|
+
min_samples: number;
|
|
405
|
+
ece_threshold: number;
|
|
406
|
+
gap_threshold: number;
|
|
407
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
408
|
+
include_synthetic_authority: boolean;
|
|
409
|
+
};
|
|
410
|
+
}, {
|
|
411
|
+
sample_count: number;
|
|
412
|
+
classes: {
|
|
413
|
+
calibration_class: string;
|
|
414
|
+
metrics: {
|
|
415
|
+
n: number;
|
|
416
|
+
mean_confidence: number;
|
|
417
|
+
empirical_accuracy: number;
|
|
418
|
+
brier_score: number;
|
|
419
|
+
ece: number;
|
|
420
|
+
calibration_gap: number;
|
|
421
|
+
overconfident: boolean;
|
|
422
|
+
};
|
|
423
|
+
reliability_bins: {
|
|
424
|
+
n: number;
|
|
425
|
+
mean_confidence: number;
|
|
426
|
+
empirical_accuracy: number;
|
|
427
|
+
lower: number;
|
|
428
|
+
upper: number;
|
|
429
|
+
}[];
|
|
430
|
+
flagged: boolean;
|
|
431
|
+
flag_reason: string | null;
|
|
432
|
+
}[];
|
|
433
|
+
overall: {
|
|
434
|
+
n: number;
|
|
435
|
+
mean_confidence: number;
|
|
436
|
+
empirical_accuracy: number;
|
|
437
|
+
brier_score: number;
|
|
438
|
+
ece: number;
|
|
439
|
+
calibration_gap: number;
|
|
440
|
+
overconfident: boolean;
|
|
441
|
+
};
|
|
442
|
+
flagged_classes: string[];
|
|
443
|
+
config: {
|
|
444
|
+
bins: number;
|
|
445
|
+
min_samples: number;
|
|
446
|
+
ece_threshold: number;
|
|
447
|
+
gap_threshold: number;
|
|
448
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
449
|
+
include_synthetic_authority: boolean;
|
|
450
|
+
};
|
|
451
|
+
}>;
|
|
452
|
+
export type CalibrationReport = z.infer<typeof CalibrationReportSchema>;
|
|
453
|
+
/**
|
|
454
|
+
* What invoked a calibration pass.
|
|
455
|
+
*
|
|
456
|
+
* `cli` — a human ran `lodestar harness calibrate --session <id>`.
|
|
457
|
+
* `programmatic` — a host computed and recorded calibration from its own
|
|
458
|
+
* code (e.g. a guarded loop at a deliberate checkpoint).
|
|
459
|
+
*/
|
|
460
|
+
export declare const CalibrationTriggerSchema: z.ZodEnum<["cli", "programmatic"]>;
|
|
461
|
+
export type CalibrationTrigger = z.infer<typeof CalibrationTriggerSchema>;
|
|
462
|
+
/**
|
|
463
|
+
* The seq window a calibration pass measured.
|
|
464
|
+
*
|
|
465
|
+
* Replayability is by cursor: re-running `calibrate` over the same events
|
|
466
|
+
* in this window — those with `seq` strictly greater than `from_seq` and
|
|
467
|
+
* less than or equal to `to_seq`, within the event's own session slice —
|
|
468
|
+
* reproduces the embedded `report` (the calibrator is a pure function of
|
|
469
|
+
* `(events, config)`). This is what makes calibration drift auditable
|
|
470
|
+
* across time — two `calibration.computed@1` events with overlapping
|
|
471
|
+
* windows can be diffed, and either can be recomputed from the log to
|
|
472
|
+
* verify it was not tampered with. (`seq` is per-project, so the session
|
|
473
|
+
* slice is the natural replay scope; a v0 calibration pass reads one
|
|
474
|
+
* session.)
|
|
475
|
+
*/
|
|
476
|
+
export declare const CalibrationCursorSchema: z.ZodEffects<z.ZodObject<{
|
|
477
|
+
from_seq: z.ZodNumber;
|
|
478
|
+
to_seq: z.ZodNumber;
|
|
479
|
+
}, "strip", z.ZodTypeAny, {
|
|
480
|
+
from_seq: number;
|
|
481
|
+
to_seq: number;
|
|
482
|
+
}, {
|
|
483
|
+
from_seq: number;
|
|
484
|
+
to_seq: number;
|
|
485
|
+
}>, {
|
|
486
|
+
from_seq: number;
|
|
487
|
+
to_seq: number;
|
|
488
|
+
}, {
|
|
489
|
+
from_seq: number;
|
|
490
|
+
to_seq: number;
|
|
491
|
+
}>;
|
|
492
|
+
export type CalibrationCursor = z.infer<typeof CalibrationCursorSchema>;
|
|
493
|
+
/**
|
|
494
|
+
* The payload of a `calibration.computed@1` event.
|
|
495
|
+
*
|
|
496
|
+
* The durable record of one calibration pass: the verdict (`report`), the
|
|
497
|
+
* window it measured (`cursor`, for replay), and provenance (`computed_at`,
|
|
498
|
+
* `triggered_by`, `computation_id`). It does NOT enforce anything — the
|
|
499
|
+
* Policy Kernel's arbitrate hook reads an in-process `CalibrationReport`
|
|
500
|
+
* snapshot, not this event (see `docs/architecture/calibrator.md` and
|
|
501
|
+
* ADR-0011). This event exists so calibration drift is auditable and
|
|
502
|
+
* replayable, the way a probe run or a sentinel finding already is.
|
|
503
|
+
*
|
|
504
|
+
* Not signed in v0: the event inherits the log's canonical-hash
|
|
505
|
+
* tamper-evidence, and nothing un-parks a held action on the strength of a
|
|
506
|
+
* calibration *event* (the gate only ever escalates — the conservative
|
|
507
|
+
* direction). If a future slice makes the gate consume persisted
|
|
508
|
+
* calibration events as an authority, signing graduates then, the same
|
|
509
|
+
* staged path the approval resolution followed (ADR-0010).
|
|
510
|
+
*/
|
|
511
|
+
export declare const CalibrationComputedPayloadSchema: z.ZodEffects<z.ZodObject<{
|
|
512
|
+
/** Stable id for this pass, so the audit chain can reference it. */
|
|
513
|
+
computation_id: z.ZodString;
|
|
514
|
+
triggered_by: z.ZodEnum<["cli", "programmatic"]>;
|
|
515
|
+
/** The seq window measured — re-running `calibrate` over it reproduces `report`. */
|
|
516
|
+
cursor: z.ZodEffects<z.ZodObject<{
|
|
517
|
+
from_seq: z.ZodNumber;
|
|
518
|
+
to_seq: z.ZodNumber;
|
|
519
|
+
}, "strip", z.ZodTypeAny, {
|
|
520
|
+
from_seq: number;
|
|
521
|
+
to_seq: number;
|
|
522
|
+
}, {
|
|
523
|
+
from_seq: number;
|
|
524
|
+
to_seq: number;
|
|
525
|
+
}>, {
|
|
526
|
+
from_seq: number;
|
|
527
|
+
to_seq: number;
|
|
528
|
+
}, {
|
|
529
|
+
from_seq: number;
|
|
530
|
+
to_seq: number;
|
|
531
|
+
}>;
|
|
532
|
+
/** The verdict: the full report this pass produced. */
|
|
533
|
+
report: z.ZodObject<{
|
|
534
|
+
/** total samples resolved and included (after exclusions) */
|
|
535
|
+
sample_count: z.ZodNumber;
|
|
536
|
+
classes: z.ZodArray<z.ZodObject<{
|
|
537
|
+
calibration_class: z.ZodString;
|
|
538
|
+
metrics: z.ZodObject<{
|
|
539
|
+
n: z.ZodNumber;
|
|
540
|
+
/** mean of stated confidence */
|
|
541
|
+
mean_confidence: z.ZodNumber;
|
|
542
|
+
/** realised positive rate, mean(correct) */
|
|
543
|
+
empirical_accuracy: z.ZodNumber;
|
|
544
|
+
/** mean((p - y)^2); 0 is perfect, lower is better */
|
|
545
|
+
brier_score: z.ZodNumber;
|
|
546
|
+
/** expected calibration error over equal-width confidence bins */
|
|
547
|
+
ece: z.ZodNumber;
|
|
548
|
+
/** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
|
|
549
|
+
calibration_gap: z.ZodNumber;
|
|
550
|
+
overconfident: z.ZodBoolean;
|
|
551
|
+
}, "strip", z.ZodTypeAny, {
|
|
552
|
+
n: number;
|
|
553
|
+
mean_confidence: number;
|
|
554
|
+
empirical_accuracy: number;
|
|
555
|
+
brier_score: number;
|
|
556
|
+
ece: number;
|
|
557
|
+
calibration_gap: number;
|
|
558
|
+
overconfident: boolean;
|
|
559
|
+
}, {
|
|
560
|
+
n: number;
|
|
561
|
+
mean_confidence: number;
|
|
562
|
+
empirical_accuracy: number;
|
|
563
|
+
brier_score: number;
|
|
564
|
+
ece: number;
|
|
565
|
+
calibration_gap: number;
|
|
566
|
+
overconfident: boolean;
|
|
567
|
+
}>;
|
|
568
|
+
/** non-empty bins only, ascending by `lower` */
|
|
569
|
+
reliability_bins: z.ZodArray<z.ZodObject<{
|
|
570
|
+
lower: z.ZodNumber;
|
|
571
|
+
upper: z.ZodNumber;
|
|
572
|
+
n: z.ZodNumber;
|
|
573
|
+
mean_confidence: z.ZodNumber;
|
|
574
|
+
empirical_accuracy: z.ZodNumber;
|
|
575
|
+
}, "strip", z.ZodTypeAny, {
|
|
576
|
+
n: number;
|
|
577
|
+
mean_confidence: number;
|
|
578
|
+
empirical_accuracy: number;
|
|
579
|
+
lower: number;
|
|
580
|
+
upper: number;
|
|
581
|
+
}, {
|
|
582
|
+
n: number;
|
|
583
|
+
mean_confidence: number;
|
|
584
|
+
empirical_accuracy: number;
|
|
585
|
+
lower: number;
|
|
586
|
+
upper: number;
|
|
587
|
+
}>, "many">;
|
|
588
|
+
flagged: z.ZodBoolean;
|
|
589
|
+
/** human-legible reason when flagged; `null` when not */
|
|
590
|
+
flag_reason: z.ZodNullable<z.ZodString>;
|
|
591
|
+
}, "strip", z.ZodTypeAny, {
|
|
592
|
+
calibration_class: string;
|
|
593
|
+
metrics: {
|
|
594
|
+
n: number;
|
|
595
|
+
mean_confidence: number;
|
|
596
|
+
empirical_accuracy: number;
|
|
597
|
+
brier_score: number;
|
|
598
|
+
ece: number;
|
|
599
|
+
calibration_gap: number;
|
|
600
|
+
overconfident: boolean;
|
|
601
|
+
};
|
|
602
|
+
reliability_bins: {
|
|
603
|
+
n: number;
|
|
604
|
+
mean_confidence: number;
|
|
605
|
+
empirical_accuracy: number;
|
|
606
|
+
lower: number;
|
|
607
|
+
upper: number;
|
|
608
|
+
}[];
|
|
609
|
+
flagged: boolean;
|
|
610
|
+
flag_reason: string | null;
|
|
611
|
+
}, {
|
|
612
|
+
calibration_class: string;
|
|
613
|
+
metrics: {
|
|
614
|
+
n: number;
|
|
615
|
+
mean_confidence: number;
|
|
616
|
+
empirical_accuracy: number;
|
|
617
|
+
brier_score: number;
|
|
618
|
+
ece: number;
|
|
619
|
+
calibration_gap: number;
|
|
620
|
+
overconfident: boolean;
|
|
621
|
+
};
|
|
622
|
+
reliability_bins: {
|
|
623
|
+
n: number;
|
|
624
|
+
mean_confidence: number;
|
|
625
|
+
empirical_accuracy: number;
|
|
626
|
+
lower: number;
|
|
627
|
+
upper: number;
|
|
628
|
+
}[];
|
|
629
|
+
flagged: boolean;
|
|
630
|
+
flag_reason: string | null;
|
|
631
|
+
}>, "many">;
|
|
632
|
+
overall: z.ZodObject<{
|
|
633
|
+
n: z.ZodNumber;
|
|
634
|
+
/** mean of stated confidence */
|
|
635
|
+
mean_confidence: z.ZodNumber;
|
|
636
|
+
/** realised positive rate, mean(correct) */
|
|
637
|
+
empirical_accuracy: z.ZodNumber;
|
|
638
|
+
/** mean((p - y)^2); 0 is perfect, lower is better */
|
|
639
|
+
brier_score: z.ZodNumber;
|
|
640
|
+
/** expected calibration error over equal-width confidence bins */
|
|
641
|
+
ece: z.ZodNumber;
|
|
642
|
+
/** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
|
|
643
|
+
calibration_gap: z.ZodNumber;
|
|
644
|
+
overconfident: z.ZodBoolean;
|
|
645
|
+
}, "strip", z.ZodTypeAny, {
|
|
646
|
+
n: number;
|
|
647
|
+
mean_confidence: number;
|
|
648
|
+
empirical_accuracy: number;
|
|
649
|
+
brier_score: number;
|
|
650
|
+
ece: number;
|
|
651
|
+
calibration_gap: number;
|
|
652
|
+
overconfident: boolean;
|
|
653
|
+
}, {
|
|
654
|
+
n: number;
|
|
655
|
+
mean_confidence: number;
|
|
656
|
+
empirical_accuracy: number;
|
|
657
|
+
brier_score: number;
|
|
658
|
+
ece: number;
|
|
659
|
+
calibration_gap: number;
|
|
660
|
+
overconfident: boolean;
|
|
661
|
+
}>;
|
|
662
|
+
flagged_classes: z.ZodArray<z.ZodString, "many">;
|
|
663
|
+
config: z.ZodObject<{
|
|
664
|
+
bins: z.ZodNumber;
|
|
665
|
+
min_samples: z.ZodNumber;
|
|
666
|
+
ece_threshold: z.ZodNumber;
|
|
667
|
+
gap_threshold: z.ZodNumber;
|
|
668
|
+
outcome_sources: z.ZodArray<z.ZodEnum<["action_outcome", "truth_status"]>, "many">;
|
|
669
|
+
include_synthetic_authority: z.ZodBoolean;
|
|
670
|
+
}, "strip", z.ZodTypeAny, {
|
|
671
|
+
bins: number;
|
|
672
|
+
min_samples: number;
|
|
673
|
+
ece_threshold: number;
|
|
674
|
+
gap_threshold: number;
|
|
675
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
676
|
+
include_synthetic_authority: boolean;
|
|
677
|
+
}, {
|
|
678
|
+
bins: number;
|
|
679
|
+
min_samples: number;
|
|
680
|
+
ece_threshold: number;
|
|
681
|
+
gap_threshold: number;
|
|
682
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
683
|
+
include_synthetic_authority: boolean;
|
|
684
|
+
}>;
|
|
685
|
+
}, "strip", z.ZodTypeAny, {
|
|
686
|
+
sample_count: number;
|
|
687
|
+
classes: {
|
|
688
|
+
calibration_class: string;
|
|
689
|
+
metrics: {
|
|
690
|
+
n: number;
|
|
691
|
+
mean_confidence: number;
|
|
692
|
+
empirical_accuracy: number;
|
|
693
|
+
brier_score: number;
|
|
694
|
+
ece: number;
|
|
695
|
+
calibration_gap: number;
|
|
696
|
+
overconfident: boolean;
|
|
697
|
+
};
|
|
698
|
+
reliability_bins: {
|
|
699
|
+
n: number;
|
|
700
|
+
mean_confidence: number;
|
|
701
|
+
empirical_accuracy: number;
|
|
702
|
+
lower: number;
|
|
703
|
+
upper: number;
|
|
704
|
+
}[];
|
|
705
|
+
flagged: boolean;
|
|
706
|
+
flag_reason: string | null;
|
|
707
|
+
}[];
|
|
708
|
+
overall: {
|
|
709
|
+
n: number;
|
|
710
|
+
mean_confidence: number;
|
|
711
|
+
empirical_accuracy: number;
|
|
712
|
+
brier_score: number;
|
|
713
|
+
ece: number;
|
|
714
|
+
calibration_gap: number;
|
|
715
|
+
overconfident: boolean;
|
|
716
|
+
};
|
|
717
|
+
flagged_classes: string[];
|
|
718
|
+
config: {
|
|
719
|
+
bins: number;
|
|
720
|
+
min_samples: number;
|
|
721
|
+
ece_threshold: number;
|
|
722
|
+
gap_threshold: number;
|
|
723
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
724
|
+
include_synthetic_authority: boolean;
|
|
725
|
+
};
|
|
726
|
+
}, {
|
|
727
|
+
sample_count: number;
|
|
728
|
+
classes: {
|
|
729
|
+
calibration_class: string;
|
|
730
|
+
metrics: {
|
|
731
|
+
n: number;
|
|
732
|
+
mean_confidence: number;
|
|
733
|
+
empirical_accuracy: number;
|
|
734
|
+
brier_score: number;
|
|
735
|
+
ece: number;
|
|
736
|
+
calibration_gap: number;
|
|
737
|
+
overconfident: boolean;
|
|
738
|
+
};
|
|
739
|
+
reliability_bins: {
|
|
740
|
+
n: number;
|
|
741
|
+
mean_confidence: number;
|
|
742
|
+
empirical_accuracy: number;
|
|
743
|
+
lower: number;
|
|
744
|
+
upper: number;
|
|
745
|
+
}[];
|
|
746
|
+
flagged: boolean;
|
|
747
|
+
flag_reason: string | null;
|
|
748
|
+
}[];
|
|
749
|
+
overall: {
|
|
750
|
+
n: number;
|
|
751
|
+
mean_confidence: number;
|
|
752
|
+
empirical_accuracy: number;
|
|
753
|
+
brier_score: number;
|
|
754
|
+
ece: number;
|
|
755
|
+
calibration_gap: number;
|
|
756
|
+
overconfident: boolean;
|
|
757
|
+
};
|
|
758
|
+
flagged_classes: string[];
|
|
759
|
+
config: {
|
|
760
|
+
bins: number;
|
|
761
|
+
min_samples: number;
|
|
762
|
+
ece_threshold: number;
|
|
763
|
+
gap_threshold: number;
|
|
764
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
765
|
+
include_synthetic_authority: boolean;
|
|
766
|
+
};
|
|
767
|
+
}>;
|
|
768
|
+
computed_at: z.ZodString;
|
|
769
|
+
}, "strip", z.ZodTypeAny, {
|
|
770
|
+
triggered_by: "cli" | "programmatic";
|
|
771
|
+
cursor: {
|
|
772
|
+
from_seq: number;
|
|
773
|
+
to_seq: number;
|
|
774
|
+
};
|
|
775
|
+
computation_id: string;
|
|
776
|
+
report: {
|
|
777
|
+
sample_count: number;
|
|
778
|
+
classes: {
|
|
779
|
+
calibration_class: string;
|
|
780
|
+
metrics: {
|
|
781
|
+
n: number;
|
|
782
|
+
mean_confidence: number;
|
|
783
|
+
empirical_accuracy: number;
|
|
784
|
+
brier_score: number;
|
|
785
|
+
ece: number;
|
|
786
|
+
calibration_gap: number;
|
|
787
|
+
overconfident: boolean;
|
|
788
|
+
};
|
|
789
|
+
reliability_bins: {
|
|
790
|
+
n: number;
|
|
791
|
+
mean_confidence: number;
|
|
792
|
+
empirical_accuracy: number;
|
|
793
|
+
lower: number;
|
|
794
|
+
upper: number;
|
|
795
|
+
}[];
|
|
796
|
+
flagged: boolean;
|
|
797
|
+
flag_reason: string | null;
|
|
798
|
+
}[];
|
|
799
|
+
overall: {
|
|
800
|
+
n: number;
|
|
801
|
+
mean_confidence: number;
|
|
802
|
+
empirical_accuracy: number;
|
|
803
|
+
brier_score: number;
|
|
804
|
+
ece: number;
|
|
805
|
+
calibration_gap: number;
|
|
806
|
+
overconfident: boolean;
|
|
807
|
+
};
|
|
808
|
+
flagged_classes: string[];
|
|
809
|
+
config: {
|
|
810
|
+
bins: number;
|
|
811
|
+
min_samples: number;
|
|
812
|
+
ece_threshold: number;
|
|
813
|
+
gap_threshold: number;
|
|
814
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
815
|
+
include_synthetic_authority: boolean;
|
|
816
|
+
};
|
|
817
|
+
};
|
|
818
|
+
computed_at: string;
|
|
819
|
+
}, {
|
|
820
|
+
triggered_by: "cli" | "programmatic";
|
|
821
|
+
cursor: {
|
|
822
|
+
from_seq: number;
|
|
823
|
+
to_seq: number;
|
|
824
|
+
};
|
|
825
|
+
computation_id: string;
|
|
826
|
+
report: {
|
|
827
|
+
sample_count: number;
|
|
828
|
+
classes: {
|
|
829
|
+
calibration_class: string;
|
|
830
|
+
metrics: {
|
|
831
|
+
n: number;
|
|
832
|
+
mean_confidence: number;
|
|
833
|
+
empirical_accuracy: number;
|
|
834
|
+
brier_score: number;
|
|
835
|
+
ece: number;
|
|
836
|
+
calibration_gap: number;
|
|
837
|
+
overconfident: boolean;
|
|
838
|
+
};
|
|
839
|
+
reliability_bins: {
|
|
840
|
+
n: number;
|
|
841
|
+
mean_confidence: number;
|
|
842
|
+
empirical_accuracy: number;
|
|
843
|
+
lower: number;
|
|
844
|
+
upper: number;
|
|
845
|
+
}[];
|
|
846
|
+
flagged: boolean;
|
|
847
|
+
flag_reason: string | null;
|
|
848
|
+
}[];
|
|
849
|
+
overall: {
|
|
850
|
+
n: number;
|
|
851
|
+
mean_confidence: number;
|
|
852
|
+
empirical_accuracy: number;
|
|
853
|
+
brier_score: number;
|
|
854
|
+
ece: number;
|
|
855
|
+
calibration_gap: number;
|
|
856
|
+
overconfident: boolean;
|
|
857
|
+
};
|
|
858
|
+
flagged_classes: string[];
|
|
859
|
+
config: {
|
|
860
|
+
bins: number;
|
|
861
|
+
min_samples: number;
|
|
862
|
+
ece_threshold: number;
|
|
863
|
+
gap_threshold: number;
|
|
864
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
865
|
+
include_synthetic_authority: boolean;
|
|
866
|
+
};
|
|
867
|
+
};
|
|
868
|
+
computed_at: string;
|
|
869
|
+
}>, {
|
|
870
|
+
triggered_by: "cli" | "programmatic";
|
|
871
|
+
cursor: {
|
|
872
|
+
from_seq: number;
|
|
873
|
+
to_seq: number;
|
|
874
|
+
};
|
|
875
|
+
computation_id: string;
|
|
876
|
+
report: {
|
|
877
|
+
sample_count: number;
|
|
878
|
+
classes: {
|
|
879
|
+
calibration_class: string;
|
|
880
|
+
metrics: {
|
|
881
|
+
n: number;
|
|
882
|
+
mean_confidence: number;
|
|
883
|
+
empirical_accuracy: number;
|
|
884
|
+
brier_score: number;
|
|
885
|
+
ece: number;
|
|
886
|
+
calibration_gap: number;
|
|
887
|
+
overconfident: boolean;
|
|
888
|
+
};
|
|
889
|
+
reliability_bins: {
|
|
890
|
+
n: number;
|
|
891
|
+
mean_confidence: number;
|
|
892
|
+
empirical_accuracy: number;
|
|
893
|
+
lower: number;
|
|
894
|
+
upper: number;
|
|
895
|
+
}[];
|
|
896
|
+
flagged: boolean;
|
|
897
|
+
flag_reason: string | null;
|
|
898
|
+
}[];
|
|
899
|
+
overall: {
|
|
900
|
+
n: number;
|
|
901
|
+
mean_confidence: number;
|
|
902
|
+
empirical_accuracy: number;
|
|
903
|
+
brier_score: number;
|
|
904
|
+
ece: number;
|
|
905
|
+
calibration_gap: number;
|
|
906
|
+
overconfident: boolean;
|
|
907
|
+
};
|
|
908
|
+
flagged_classes: string[];
|
|
909
|
+
config: {
|
|
910
|
+
bins: number;
|
|
911
|
+
min_samples: number;
|
|
912
|
+
ece_threshold: number;
|
|
913
|
+
gap_threshold: number;
|
|
914
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
915
|
+
include_synthetic_authority: boolean;
|
|
916
|
+
};
|
|
917
|
+
};
|
|
918
|
+
computed_at: string;
|
|
919
|
+
}, {
|
|
920
|
+
triggered_by: "cli" | "programmatic";
|
|
921
|
+
cursor: {
|
|
922
|
+
from_seq: number;
|
|
923
|
+
to_seq: number;
|
|
924
|
+
};
|
|
925
|
+
computation_id: string;
|
|
926
|
+
report: {
|
|
927
|
+
sample_count: number;
|
|
928
|
+
classes: {
|
|
929
|
+
calibration_class: string;
|
|
930
|
+
metrics: {
|
|
931
|
+
n: number;
|
|
932
|
+
mean_confidence: number;
|
|
933
|
+
empirical_accuracy: number;
|
|
934
|
+
brier_score: number;
|
|
935
|
+
ece: number;
|
|
936
|
+
calibration_gap: number;
|
|
937
|
+
overconfident: boolean;
|
|
938
|
+
};
|
|
939
|
+
reliability_bins: {
|
|
940
|
+
n: number;
|
|
941
|
+
mean_confidence: number;
|
|
942
|
+
empirical_accuracy: number;
|
|
943
|
+
lower: number;
|
|
944
|
+
upper: number;
|
|
945
|
+
}[];
|
|
946
|
+
flagged: boolean;
|
|
947
|
+
flag_reason: string | null;
|
|
948
|
+
}[];
|
|
949
|
+
overall: {
|
|
950
|
+
n: number;
|
|
951
|
+
mean_confidence: number;
|
|
952
|
+
empirical_accuracy: number;
|
|
953
|
+
brier_score: number;
|
|
954
|
+
ece: number;
|
|
955
|
+
calibration_gap: number;
|
|
956
|
+
overconfident: boolean;
|
|
957
|
+
};
|
|
958
|
+
flagged_classes: string[];
|
|
959
|
+
config: {
|
|
960
|
+
bins: number;
|
|
961
|
+
min_samples: number;
|
|
962
|
+
ece_threshold: number;
|
|
963
|
+
gap_threshold: number;
|
|
964
|
+
outcome_sources: ("truth_status" | "action_outcome")[];
|
|
965
|
+
include_synthetic_authority: boolean;
|
|
966
|
+
};
|
|
967
|
+
};
|
|
968
|
+
computed_at: string;
|
|
969
|
+
}>;
|
|
970
|
+
export type CalibrationComputedPayload = z.infer<typeof CalibrationComputedPayloadSchema>;
|
|
971
|
+
/**
|
|
972
|
+
* Event-type literal. Use this constant rather than the bare string so a
|
|
973
|
+
* future rename is grep-safe. Mirrors `reflection.completed@1`.
|
|
974
|
+
*/
|
|
975
|
+
export declare const CALIBRATION_COMPUTED_EVENT_TYPE: "calibration.computed";
|
|
976
|
+
export declare const CALIBRATION_COMPUTED_SCHEMA_VERSION: "1";
|
|
977
|
+
//# sourceMappingURL=calibration.d.ts.map
|