@qmilab/lodestar-core 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/index.d.ts +6 -0
  2. package/dist/index.d.ts.map +1 -1
  3. package/dist/index.js +12 -0
  4. package/dist/index.js.map +1 -1
  5. package/dist/schemas/action.d.ts +31 -13
  6. package/dist/schemas/action.d.ts.map +1 -1
  7. package/dist/schemas/action.js +20 -1
  8. package/dist/schemas/action.js.map +1 -1
  9. package/dist/schemas/approval.d.ts +271 -0
  10. package/dist/schemas/approval.d.ts.map +1 -0
  11. package/dist/schemas/approval.js +119 -0
  12. package/dist/schemas/approval.js.map +1 -0
  13. package/dist/schemas/belief.d.ts.map +1 -1
  14. package/dist/schemas/belief.js +7 -1
  15. package/dist/schemas/belief.js.map +1 -1
  16. package/dist/schemas/calibration.d.ts +977 -0
  17. package/dist/schemas/calibration.d.ts.map +1 -0
  18. package/dist/schemas/calibration.js +187 -0
  19. package/dist/schemas/calibration.js.map +1 -0
  20. package/dist/schemas/claim.d.ts.map +1 -1
  21. package/dist/schemas/claim.js +4 -2
  22. package/dist/schemas/claim.js.map +1 -1
  23. package/dist/schemas/common.d.ts.map +1 -1
  24. package/dist/schemas/common.js +11 -5
  25. package/dist/schemas/common.js.map +1 -1
  26. package/dist/schemas/policy.d.ts +768 -0
  27. package/dist/schemas/policy.d.ts.map +1 -0
  28. package/dist/schemas/policy.js +200 -0
  29. package/dist/schemas/policy.js.map +1 -0
  30. package/dist/schemas/probe-pack.d.ts +152 -0
  31. package/dist/schemas/probe-pack.d.ts.map +1 -0
  32. package/dist/schemas/probe-pack.js +140 -0
  33. package/dist/schemas/probe-pack.js.map +1 -0
  34. package/dist/schemas/reflection.d.ts +405 -0
  35. package/dist/schemas/reflection.d.ts.map +1 -0
  36. package/dist/schemas/reflection.js +154 -0
  37. package/dist/schemas/reflection.js.map +1 -0
  38. package/dist/schemas/revision.d.ts.map +1 -1
  39. package/dist/schemas/revision.js.map +1 -1
  40. package/dist/schemas/sentinel.d.ts +134 -0
  41. package/dist/schemas/sentinel.d.ts.map +1 -0
  42. package/dist/schemas/sentinel.js +97 -0
  43. package/dist/schemas/sentinel.js.map +1 -0
  44. package/package.json +2 -7
  45. package/src/index.ts +18 -0
  46. package/src/schemas/action.ts +20 -1
  47. package/src/schemas/approval.ts +136 -0
  48. package/src/schemas/belief.ts +7 -1
  49. package/src/schemas/calibration.ts +212 -0
  50. package/src/schemas/claim.ts +15 -8
  51. package/src/schemas/common.ts +16 -10
  52. package/src/schemas/policy.ts +231 -0
  53. package/src/schemas/probe-pack.ts +169 -0
  54. package/src/schemas/reflection.ts +166 -0
  55. package/src/schemas/revision.ts +7 -5
  56. package/src/schemas/sentinel.ts +104 -0
@@ -0,0 +1,977 @@
1
+ import { z } from "zod";
2
+ /**
3
+ * Calibration wire format.
4
+ *
5
+ * These schemas describe what the harness `Calibrator` measures —
6
+ * per-class ECE / Brier / calibration-gap tables and the flagged classes.
7
+ * They lived in `@qmilab/lodestar-harness` while the calibrator was a
8
+ * return-value-only surface; they **graduated to `@qmilab/lodestar-core`**
9
+ * when the durable `calibration.computed@1` event landed (ADR-0011), so
10
+ * the event payload can embed the report (core is the dependency root and
11
+ * cannot import the harness). The harness re-exports them unchanged, so
12
+ * harness consumers are unaffected.
13
+ *
14
+ * The Calibrator stays measure-only: it returns a {@link CalibrationReport}
15
+ * and never writes. Recording a report as a `calibration.computed@1` event
16
+ * is a separate publish step (`lodestar harness calibrate` / the harness
17
+ * `eventLogCalibrationSink`), the same measure/record split the sentinels
18
+ * follow (a `Sentinel` returns findings; `eventLogAlertSink` writes them).
19
+ *
20
+ * Everything here is validated at the calibrator and event-sink boundaries,
21
+ * the same discipline the probe-run observation and sentinel-alert builders
22
+ * hold.
23
+ */
24
+ /**
25
+ * Which signal in the event log produced a calibration sample.
26
+ * - `action_outcome`: a belief → decision → action chain where the
27
+ * action's realised result (terminal phase or an explicit Outcome) is
28
+ * the label.
29
+ * - `truth_status`: the firewall transitioned the belief's `truth_status`
30
+ * to `supported` / `contradicted` — the world adjudicating the belief.
31
+ */
32
+ export declare const SampleSourceSchema: z.ZodEnum<["action_outcome", "truth_status"]>;
33
+ export type SampleSource = z.infer<typeof SampleSourceSchema>;
34
+ /** The scored metrics for a set of samples (one class, or the pool). */
35
+ export declare const CalibrationMetricsSchema: z.ZodObject<{
36
+ n: z.ZodNumber;
37
+ /** mean of stated confidence */
38
+ mean_confidence: z.ZodNumber;
39
+ /** realised positive rate, mean(correct) */
40
+ empirical_accuracy: z.ZodNumber;
41
+ /** mean((p - y)^2); 0 is perfect, lower is better */
42
+ brier_score: z.ZodNumber;
43
+ /** expected calibration error over equal-width confidence bins */
44
+ ece: z.ZodNumber;
45
+ /** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
46
+ calibration_gap: z.ZodNumber;
47
+ overconfident: z.ZodBoolean;
48
+ }, "strip", z.ZodTypeAny, {
49
+ n: number;
50
+ mean_confidence: number;
51
+ empirical_accuracy: number;
52
+ brier_score: number;
53
+ ece: number;
54
+ calibration_gap: number;
55
+ overconfident: boolean;
56
+ }, {
57
+ n: number;
58
+ mean_confidence: number;
59
+ empirical_accuracy: number;
60
+ brier_score: number;
61
+ ece: number;
62
+ calibration_gap: number;
63
+ overconfident: boolean;
64
+ }>;
65
+ export type CalibrationMetrics = z.infer<typeof CalibrationMetricsSchema>;
66
+ /** One non-empty bin of a reliability diagram. */
67
+ export declare const ReliabilityBinSchema: z.ZodObject<{
68
+ lower: z.ZodNumber;
69
+ upper: z.ZodNumber;
70
+ n: z.ZodNumber;
71
+ mean_confidence: z.ZodNumber;
72
+ empirical_accuracy: z.ZodNumber;
73
+ }, "strip", z.ZodTypeAny, {
74
+ n: number;
75
+ mean_confidence: number;
76
+ empirical_accuracy: number;
77
+ lower: number;
78
+ upper: number;
79
+ }, {
80
+ n: number;
81
+ mean_confidence: number;
82
+ empirical_accuracy: number;
83
+ lower: number;
84
+ upper: number;
85
+ }>;
86
+ export type ReliabilityBin = z.infer<typeof ReliabilityBinSchema>;
87
+ /** Per-class result: metrics, the reliability bins, and the verdict. */
88
+ export declare const CalibrationClassResultSchema: z.ZodObject<{
89
+ calibration_class: z.ZodString;
90
+ metrics: z.ZodObject<{
91
+ n: z.ZodNumber;
92
+ /** mean of stated confidence */
93
+ mean_confidence: z.ZodNumber;
94
+ /** realised positive rate, mean(correct) */
95
+ empirical_accuracy: z.ZodNumber;
96
+ /** mean((p - y)^2); 0 is perfect, lower is better */
97
+ brier_score: z.ZodNumber;
98
+ /** expected calibration error over equal-width confidence bins */
99
+ ece: z.ZodNumber;
100
+ /** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
101
+ calibration_gap: z.ZodNumber;
102
+ overconfident: z.ZodBoolean;
103
+ }, "strip", z.ZodTypeAny, {
104
+ n: number;
105
+ mean_confidence: number;
106
+ empirical_accuracy: number;
107
+ brier_score: number;
108
+ ece: number;
109
+ calibration_gap: number;
110
+ overconfident: boolean;
111
+ }, {
112
+ n: number;
113
+ mean_confidence: number;
114
+ empirical_accuracy: number;
115
+ brier_score: number;
116
+ ece: number;
117
+ calibration_gap: number;
118
+ overconfident: boolean;
119
+ }>;
120
+ /** non-empty bins only, ascending by `lower` */
121
+ reliability_bins: z.ZodArray<z.ZodObject<{
122
+ lower: z.ZodNumber;
123
+ upper: z.ZodNumber;
124
+ n: z.ZodNumber;
125
+ mean_confidence: z.ZodNumber;
126
+ empirical_accuracy: z.ZodNumber;
127
+ }, "strip", z.ZodTypeAny, {
128
+ n: number;
129
+ mean_confidence: number;
130
+ empirical_accuracy: number;
131
+ lower: number;
132
+ upper: number;
133
+ }, {
134
+ n: number;
135
+ mean_confidence: number;
136
+ empirical_accuracy: number;
137
+ lower: number;
138
+ upper: number;
139
+ }>, "many">;
140
+ flagged: z.ZodBoolean;
141
+ /** human-legible reason when flagged; `null` when not */
142
+ flag_reason: z.ZodNullable<z.ZodString>;
143
+ }, "strip", z.ZodTypeAny, {
144
+ calibration_class: string;
145
+ metrics: {
146
+ n: number;
147
+ mean_confidence: number;
148
+ empirical_accuracy: number;
149
+ brier_score: number;
150
+ ece: number;
151
+ calibration_gap: number;
152
+ overconfident: boolean;
153
+ };
154
+ reliability_bins: {
155
+ n: number;
156
+ mean_confidence: number;
157
+ empirical_accuracy: number;
158
+ lower: number;
159
+ upper: number;
160
+ }[];
161
+ flagged: boolean;
162
+ flag_reason: string | null;
163
+ }, {
164
+ calibration_class: string;
165
+ metrics: {
166
+ n: number;
167
+ mean_confidence: number;
168
+ empirical_accuracy: number;
169
+ brier_score: number;
170
+ ece: number;
171
+ calibration_gap: number;
172
+ overconfident: boolean;
173
+ };
174
+ reliability_bins: {
175
+ n: number;
176
+ mean_confidence: number;
177
+ empirical_accuracy: number;
178
+ lower: number;
179
+ upper: number;
180
+ }[];
181
+ flagged: boolean;
182
+ flag_reason: string | null;
183
+ }>;
184
+ export type CalibrationClassResult = z.infer<typeof CalibrationClassResultSchema>;
185
+ /** The thresholds and toggles actually applied, echoed for reproducibility. */
186
+ export declare const ResolvedCalibratorConfigSchema: z.ZodObject<{
187
+ bins: z.ZodNumber;
188
+ min_samples: z.ZodNumber;
189
+ ece_threshold: z.ZodNumber;
190
+ gap_threshold: z.ZodNumber;
191
+ outcome_sources: z.ZodArray<z.ZodEnum<["action_outcome", "truth_status"]>, "many">;
192
+ include_synthetic_authority: z.ZodBoolean;
193
+ }, "strip", z.ZodTypeAny, {
194
+ bins: number;
195
+ min_samples: number;
196
+ ece_threshold: number;
197
+ gap_threshold: number;
198
+ outcome_sources: ("truth_status" | "action_outcome")[];
199
+ include_synthetic_authority: boolean;
200
+ }, {
201
+ bins: number;
202
+ min_samples: number;
203
+ ece_threshold: number;
204
+ gap_threshold: number;
205
+ outcome_sources: ("truth_status" | "action_outcome")[];
206
+ include_synthetic_authority: boolean;
207
+ }>;
208
+ export type ResolvedCalibratorConfig = z.infer<typeof ResolvedCalibratorConfigSchema>;
209
+ /**
210
+ * The calibrator's output: per-class tables, a pooled `overall` block,
211
+ * the flagged class names, and the config that produced it. A pure
212
+ * function of `(events, config)` — no clock, no scope inference — so it
213
+ * is deterministic and testable, and re-running it over the same event
214
+ * window reproduces the report (the property the `cursor` on
215
+ * {@link CalibrationComputedPayloadSchema} makes auditable).
216
+ */
217
+ export declare const CalibrationReportSchema: z.ZodObject<{
218
+ /** total samples resolved and included (after exclusions) */
219
+ sample_count: z.ZodNumber;
220
+ classes: z.ZodArray<z.ZodObject<{
221
+ calibration_class: z.ZodString;
222
+ metrics: z.ZodObject<{
223
+ n: z.ZodNumber;
224
+ /** mean of stated confidence */
225
+ mean_confidence: z.ZodNumber;
226
+ /** realised positive rate, mean(correct) */
227
+ empirical_accuracy: z.ZodNumber;
228
+ /** mean((p - y)^2); 0 is perfect, lower is better */
229
+ brier_score: z.ZodNumber;
230
+ /** expected calibration error over equal-width confidence bins */
231
+ ece: z.ZodNumber;
232
+ /** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
233
+ calibration_gap: z.ZodNumber;
234
+ overconfident: z.ZodBoolean;
235
+ }, "strip", z.ZodTypeAny, {
236
+ n: number;
237
+ mean_confidence: number;
238
+ empirical_accuracy: number;
239
+ brier_score: number;
240
+ ece: number;
241
+ calibration_gap: number;
242
+ overconfident: boolean;
243
+ }, {
244
+ n: number;
245
+ mean_confidence: number;
246
+ empirical_accuracy: number;
247
+ brier_score: number;
248
+ ece: number;
249
+ calibration_gap: number;
250
+ overconfident: boolean;
251
+ }>;
252
+ /** non-empty bins only, ascending by `lower` */
253
+ reliability_bins: z.ZodArray<z.ZodObject<{
254
+ lower: z.ZodNumber;
255
+ upper: z.ZodNumber;
256
+ n: z.ZodNumber;
257
+ mean_confidence: z.ZodNumber;
258
+ empirical_accuracy: z.ZodNumber;
259
+ }, "strip", z.ZodTypeAny, {
260
+ n: number;
261
+ mean_confidence: number;
262
+ empirical_accuracy: number;
263
+ lower: number;
264
+ upper: number;
265
+ }, {
266
+ n: number;
267
+ mean_confidence: number;
268
+ empirical_accuracy: number;
269
+ lower: number;
270
+ upper: number;
271
+ }>, "many">;
272
+ flagged: z.ZodBoolean;
273
+ /** human-legible reason when flagged; `null` when not */
274
+ flag_reason: z.ZodNullable<z.ZodString>;
275
+ }, "strip", z.ZodTypeAny, {
276
+ calibration_class: string;
277
+ metrics: {
278
+ n: number;
279
+ mean_confidence: number;
280
+ empirical_accuracy: number;
281
+ brier_score: number;
282
+ ece: number;
283
+ calibration_gap: number;
284
+ overconfident: boolean;
285
+ };
286
+ reliability_bins: {
287
+ n: number;
288
+ mean_confidence: number;
289
+ empirical_accuracy: number;
290
+ lower: number;
291
+ upper: number;
292
+ }[];
293
+ flagged: boolean;
294
+ flag_reason: string | null;
295
+ }, {
296
+ calibration_class: string;
297
+ metrics: {
298
+ n: number;
299
+ mean_confidence: number;
300
+ empirical_accuracy: number;
301
+ brier_score: number;
302
+ ece: number;
303
+ calibration_gap: number;
304
+ overconfident: boolean;
305
+ };
306
+ reliability_bins: {
307
+ n: number;
308
+ mean_confidence: number;
309
+ empirical_accuracy: number;
310
+ lower: number;
311
+ upper: number;
312
+ }[];
313
+ flagged: boolean;
314
+ flag_reason: string | null;
315
+ }>, "many">;
316
+ overall: z.ZodObject<{
317
+ n: z.ZodNumber;
318
+ /** mean of stated confidence */
319
+ mean_confidence: z.ZodNumber;
320
+ /** realised positive rate, mean(correct) */
321
+ empirical_accuracy: z.ZodNumber;
322
+ /** mean((p - y)^2); 0 is perfect, lower is better */
323
+ brier_score: z.ZodNumber;
324
+ /** expected calibration error over equal-width confidence bins */
325
+ ece: z.ZodNumber;
326
+ /** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
327
+ calibration_gap: z.ZodNumber;
328
+ overconfident: z.ZodBoolean;
329
+ }, "strip", z.ZodTypeAny, {
330
+ n: number;
331
+ mean_confidence: number;
332
+ empirical_accuracy: number;
333
+ brier_score: number;
334
+ ece: number;
335
+ calibration_gap: number;
336
+ overconfident: boolean;
337
+ }, {
338
+ n: number;
339
+ mean_confidence: number;
340
+ empirical_accuracy: number;
341
+ brier_score: number;
342
+ ece: number;
343
+ calibration_gap: number;
344
+ overconfident: boolean;
345
+ }>;
346
+ flagged_classes: z.ZodArray<z.ZodString, "many">;
347
+ config: z.ZodObject<{
348
+ bins: z.ZodNumber;
349
+ min_samples: z.ZodNumber;
350
+ ece_threshold: z.ZodNumber;
351
+ gap_threshold: z.ZodNumber;
352
+ outcome_sources: z.ZodArray<z.ZodEnum<["action_outcome", "truth_status"]>, "many">;
353
+ include_synthetic_authority: z.ZodBoolean;
354
+ }, "strip", z.ZodTypeAny, {
355
+ bins: number;
356
+ min_samples: number;
357
+ ece_threshold: number;
358
+ gap_threshold: number;
359
+ outcome_sources: ("truth_status" | "action_outcome")[];
360
+ include_synthetic_authority: boolean;
361
+ }, {
362
+ bins: number;
363
+ min_samples: number;
364
+ ece_threshold: number;
365
+ gap_threshold: number;
366
+ outcome_sources: ("truth_status" | "action_outcome")[];
367
+ include_synthetic_authority: boolean;
368
+ }>;
369
+ }, "strip", z.ZodTypeAny, {
370
+ sample_count: number;
371
+ classes: {
372
+ calibration_class: string;
373
+ metrics: {
374
+ n: number;
375
+ mean_confidence: number;
376
+ empirical_accuracy: number;
377
+ brier_score: number;
378
+ ece: number;
379
+ calibration_gap: number;
380
+ overconfident: boolean;
381
+ };
382
+ reliability_bins: {
383
+ n: number;
384
+ mean_confidence: number;
385
+ empirical_accuracy: number;
386
+ lower: number;
387
+ upper: number;
388
+ }[];
389
+ flagged: boolean;
390
+ flag_reason: string | null;
391
+ }[];
392
+ overall: {
393
+ n: number;
394
+ mean_confidence: number;
395
+ empirical_accuracy: number;
396
+ brier_score: number;
397
+ ece: number;
398
+ calibration_gap: number;
399
+ overconfident: boolean;
400
+ };
401
+ flagged_classes: string[];
402
+ config: {
403
+ bins: number;
404
+ min_samples: number;
405
+ ece_threshold: number;
406
+ gap_threshold: number;
407
+ outcome_sources: ("truth_status" | "action_outcome")[];
408
+ include_synthetic_authority: boolean;
409
+ };
410
+ }, {
411
+ sample_count: number;
412
+ classes: {
413
+ calibration_class: string;
414
+ metrics: {
415
+ n: number;
416
+ mean_confidence: number;
417
+ empirical_accuracy: number;
418
+ brier_score: number;
419
+ ece: number;
420
+ calibration_gap: number;
421
+ overconfident: boolean;
422
+ };
423
+ reliability_bins: {
424
+ n: number;
425
+ mean_confidence: number;
426
+ empirical_accuracy: number;
427
+ lower: number;
428
+ upper: number;
429
+ }[];
430
+ flagged: boolean;
431
+ flag_reason: string | null;
432
+ }[];
433
+ overall: {
434
+ n: number;
435
+ mean_confidence: number;
436
+ empirical_accuracy: number;
437
+ brier_score: number;
438
+ ece: number;
439
+ calibration_gap: number;
440
+ overconfident: boolean;
441
+ };
442
+ flagged_classes: string[];
443
+ config: {
444
+ bins: number;
445
+ min_samples: number;
446
+ ece_threshold: number;
447
+ gap_threshold: number;
448
+ outcome_sources: ("truth_status" | "action_outcome")[];
449
+ include_synthetic_authority: boolean;
450
+ };
451
+ }>;
452
+ export type CalibrationReport = z.infer<typeof CalibrationReportSchema>;
453
+ /**
454
+ * What invoked a calibration pass.
455
+ *
456
+ * `cli` — a human ran `lodestar harness calibrate --session <id>`.
457
+ * `programmatic` — a host computed and recorded calibration from its own
458
+ * code (e.g. a guarded loop at a deliberate checkpoint).
459
+ */
460
+ export declare const CalibrationTriggerSchema: z.ZodEnum<["cli", "programmatic"]>;
461
+ export type CalibrationTrigger = z.infer<typeof CalibrationTriggerSchema>;
462
+ /**
463
+ * The seq window a calibration pass measured.
464
+ *
465
+ * Replayability is by cursor: re-running `calibrate` over the same events
466
+ * in this window — those with `seq` strictly greater than `from_seq` and
467
+ * less than or equal to `to_seq`, within the event's own session slice —
468
+ * reproduces the embedded `report` (the calibrator is a pure function of
469
+ * `(events, config)`). This is what makes calibration drift auditable
470
+ * across time — two `calibration.computed@1` events with overlapping
471
+ * windows can be diffed, and either can be recomputed from the log to
472
+ * verify it was not tampered with. (`seq` is per-project, so the session
473
+ * slice is the natural replay scope; a v0 calibration pass reads one
474
+ * session.)
475
+ */
476
+ export declare const CalibrationCursorSchema: z.ZodEffects<z.ZodObject<{
477
+ from_seq: z.ZodNumber;
478
+ to_seq: z.ZodNumber;
479
+ }, "strip", z.ZodTypeAny, {
480
+ from_seq: number;
481
+ to_seq: number;
482
+ }, {
483
+ from_seq: number;
484
+ to_seq: number;
485
+ }>, {
486
+ from_seq: number;
487
+ to_seq: number;
488
+ }, {
489
+ from_seq: number;
490
+ to_seq: number;
491
+ }>;
492
+ export type CalibrationCursor = z.infer<typeof CalibrationCursorSchema>;
493
+ /**
494
+ * The payload of a `calibration.computed@1` event.
495
+ *
496
+ * The durable record of one calibration pass: the verdict (`report`), the
497
+ * window it measured (`cursor`, for replay), and provenance (`computed_at`,
498
+ * `triggered_by`, `computation_id`). It does NOT enforce anything — the
499
+ * Policy Kernel's arbitrate hook reads an in-process `CalibrationReport`
500
+ * snapshot, not this event (see `docs/architecture/calibrator.md` and
501
+ * ADR-0011). This event exists so calibration drift is auditable and
502
+ * replayable, the way a probe run or a sentinel finding already is.
503
+ *
504
+ * Not signed in v0: the event inherits the log's canonical-hash
505
+ * tamper-evidence, and nothing un-parks a held action on the strength of a
506
+ * calibration *event* (the gate only ever escalates — the conservative
507
+ * direction). If a future slice makes the gate consume persisted
508
+ * calibration events as an authority, signing graduates then, the same
509
+ * staged path the approval resolution followed (ADR-0010).
510
+ */
511
+ export declare const CalibrationComputedPayloadSchema: z.ZodEffects<z.ZodObject<{
512
+ /** Stable id for this pass, so the audit chain can reference it. */
513
+ computation_id: z.ZodString;
514
+ triggered_by: z.ZodEnum<["cli", "programmatic"]>;
515
+ /** The seq window measured — re-running `calibrate` over it reproduces `report`. */
516
+ cursor: z.ZodEffects<z.ZodObject<{
517
+ from_seq: z.ZodNumber;
518
+ to_seq: z.ZodNumber;
519
+ }, "strip", z.ZodTypeAny, {
520
+ from_seq: number;
521
+ to_seq: number;
522
+ }, {
523
+ from_seq: number;
524
+ to_seq: number;
525
+ }>, {
526
+ from_seq: number;
527
+ to_seq: number;
528
+ }, {
529
+ from_seq: number;
530
+ to_seq: number;
531
+ }>;
532
+ /** The verdict: the full report this pass produced. */
533
+ report: z.ZodObject<{
534
+ /** total samples resolved and included (after exclusions) */
535
+ sample_count: z.ZodNumber;
536
+ classes: z.ZodArray<z.ZodObject<{
537
+ calibration_class: z.ZodString;
538
+ metrics: z.ZodObject<{
539
+ n: z.ZodNumber;
540
+ /** mean of stated confidence */
541
+ mean_confidence: z.ZodNumber;
542
+ /** realised positive rate, mean(correct) */
543
+ empirical_accuracy: z.ZodNumber;
544
+ /** mean((p - y)^2); 0 is perfect, lower is better */
545
+ brier_score: z.ZodNumber;
546
+ /** expected calibration error over equal-width confidence bins */
547
+ ece: z.ZodNumber;
548
+ /** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
549
+ calibration_gap: z.ZodNumber;
550
+ overconfident: z.ZodBoolean;
551
+ }, "strip", z.ZodTypeAny, {
552
+ n: number;
553
+ mean_confidence: number;
554
+ empirical_accuracy: number;
555
+ brier_score: number;
556
+ ece: number;
557
+ calibration_gap: number;
558
+ overconfident: boolean;
559
+ }, {
560
+ n: number;
561
+ mean_confidence: number;
562
+ empirical_accuracy: number;
563
+ brier_score: number;
564
+ ece: number;
565
+ calibration_gap: number;
566
+ overconfident: boolean;
567
+ }>;
568
+ /** non-empty bins only, ascending by `lower` */
569
+ reliability_bins: z.ZodArray<z.ZodObject<{
570
+ lower: z.ZodNumber;
571
+ upper: z.ZodNumber;
572
+ n: z.ZodNumber;
573
+ mean_confidence: z.ZodNumber;
574
+ empirical_accuracy: z.ZodNumber;
575
+ }, "strip", z.ZodTypeAny, {
576
+ n: number;
577
+ mean_confidence: number;
578
+ empirical_accuracy: number;
579
+ lower: number;
580
+ upper: number;
581
+ }, {
582
+ n: number;
583
+ mean_confidence: number;
584
+ empirical_accuracy: number;
585
+ lower: number;
586
+ upper: number;
587
+ }>, "many">;
588
+ flagged: z.ZodBoolean;
589
+ /** human-legible reason when flagged; `null` when not */
590
+ flag_reason: z.ZodNullable<z.ZodString>;
591
+ }, "strip", z.ZodTypeAny, {
592
+ calibration_class: string;
593
+ metrics: {
594
+ n: number;
595
+ mean_confidence: number;
596
+ empirical_accuracy: number;
597
+ brier_score: number;
598
+ ece: number;
599
+ calibration_gap: number;
600
+ overconfident: boolean;
601
+ };
602
+ reliability_bins: {
603
+ n: number;
604
+ mean_confidence: number;
605
+ empirical_accuracy: number;
606
+ lower: number;
607
+ upper: number;
608
+ }[];
609
+ flagged: boolean;
610
+ flag_reason: string | null;
611
+ }, {
612
+ calibration_class: string;
613
+ metrics: {
614
+ n: number;
615
+ mean_confidence: number;
616
+ empirical_accuracy: number;
617
+ brier_score: number;
618
+ ece: number;
619
+ calibration_gap: number;
620
+ overconfident: boolean;
621
+ };
622
+ reliability_bins: {
623
+ n: number;
624
+ mean_confidence: number;
625
+ empirical_accuracy: number;
626
+ lower: number;
627
+ upper: number;
628
+ }[];
629
+ flagged: boolean;
630
+ flag_reason: string | null;
631
+ }>, "many">;
632
+ overall: z.ZodObject<{
633
+ n: z.ZodNumber;
634
+ /** mean of stated confidence */
635
+ mean_confidence: z.ZodNumber;
636
+ /** realised positive rate, mean(correct) */
637
+ empirical_accuracy: z.ZodNumber;
638
+ /** mean((p - y)^2); 0 is perfect, lower is better */
639
+ brier_score: z.ZodNumber;
640
+ /** expected calibration error over equal-width confidence bins */
641
+ ece: z.ZodNumber;
642
+ /** signed mean_confidence - empirical_accuracy; > 0 is overconfident */
643
+ calibration_gap: z.ZodNumber;
644
+ overconfident: z.ZodBoolean;
645
+ }, "strip", z.ZodTypeAny, {
646
+ n: number;
647
+ mean_confidence: number;
648
+ empirical_accuracy: number;
649
+ brier_score: number;
650
+ ece: number;
651
+ calibration_gap: number;
652
+ overconfident: boolean;
653
+ }, {
654
+ n: number;
655
+ mean_confidence: number;
656
+ empirical_accuracy: number;
657
+ brier_score: number;
658
+ ece: number;
659
+ calibration_gap: number;
660
+ overconfident: boolean;
661
+ }>;
662
+ flagged_classes: z.ZodArray<z.ZodString, "many">;
663
+ config: z.ZodObject<{
664
+ bins: z.ZodNumber;
665
+ min_samples: z.ZodNumber;
666
+ ece_threshold: z.ZodNumber;
667
+ gap_threshold: z.ZodNumber;
668
+ outcome_sources: z.ZodArray<z.ZodEnum<["action_outcome", "truth_status"]>, "many">;
669
+ include_synthetic_authority: z.ZodBoolean;
670
+ }, "strip", z.ZodTypeAny, {
671
+ bins: number;
672
+ min_samples: number;
673
+ ece_threshold: number;
674
+ gap_threshold: number;
675
+ outcome_sources: ("truth_status" | "action_outcome")[];
676
+ include_synthetic_authority: boolean;
677
+ }, {
678
+ bins: number;
679
+ min_samples: number;
680
+ ece_threshold: number;
681
+ gap_threshold: number;
682
+ outcome_sources: ("truth_status" | "action_outcome")[];
683
+ include_synthetic_authority: boolean;
684
+ }>;
685
+ }, "strip", z.ZodTypeAny, {
686
+ sample_count: number;
687
+ classes: {
688
+ calibration_class: string;
689
+ metrics: {
690
+ n: number;
691
+ mean_confidence: number;
692
+ empirical_accuracy: number;
693
+ brier_score: number;
694
+ ece: number;
695
+ calibration_gap: number;
696
+ overconfident: boolean;
697
+ };
698
+ reliability_bins: {
699
+ n: number;
700
+ mean_confidence: number;
701
+ empirical_accuracy: number;
702
+ lower: number;
703
+ upper: number;
704
+ }[];
705
+ flagged: boolean;
706
+ flag_reason: string | null;
707
+ }[];
708
+ overall: {
709
+ n: number;
710
+ mean_confidence: number;
711
+ empirical_accuracy: number;
712
+ brier_score: number;
713
+ ece: number;
714
+ calibration_gap: number;
715
+ overconfident: boolean;
716
+ };
717
+ flagged_classes: string[];
718
+ config: {
719
+ bins: number;
720
+ min_samples: number;
721
+ ece_threshold: number;
722
+ gap_threshold: number;
723
+ outcome_sources: ("truth_status" | "action_outcome")[];
724
+ include_synthetic_authority: boolean;
725
+ };
726
+ }, {
727
+ sample_count: number;
728
+ classes: {
729
+ calibration_class: string;
730
+ metrics: {
731
+ n: number;
732
+ mean_confidence: number;
733
+ empirical_accuracy: number;
734
+ brier_score: number;
735
+ ece: number;
736
+ calibration_gap: number;
737
+ overconfident: boolean;
738
+ };
739
+ reliability_bins: {
740
+ n: number;
741
+ mean_confidence: number;
742
+ empirical_accuracy: number;
743
+ lower: number;
744
+ upper: number;
745
+ }[];
746
+ flagged: boolean;
747
+ flag_reason: string | null;
748
+ }[];
749
+ overall: {
750
+ n: number;
751
+ mean_confidence: number;
752
+ empirical_accuracy: number;
753
+ brier_score: number;
754
+ ece: number;
755
+ calibration_gap: number;
756
+ overconfident: boolean;
757
+ };
758
+ flagged_classes: string[];
759
+ config: {
760
+ bins: number;
761
+ min_samples: number;
762
+ ece_threshold: number;
763
+ gap_threshold: number;
764
+ outcome_sources: ("truth_status" | "action_outcome")[];
765
+ include_synthetic_authority: boolean;
766
+ };
767
+ }>;
768
+ computed_at: z.ZodString;
769
+ }, "strip", z.ZodTypeAny, {
770
+ triggered_by: "cli" | "programmatic";
771
+ cursor: {
772
+ from_seq: number;
773
+ to_seq: number;
774
+ };
775
+ computation_id: string;
776
+ report: {
777
+ sample_count: number;
778
+ classes: {
779
+ calibration_class: string;
780
+ metrics: {
781
+ n: number;
782
+ mean_confidence: number;
783
+ empirical_accuracy: number;
784
+ brier_score: number;
785
+ ece: number;
786
+ calibration_gap: number;
787
+ overconfident: boolean;
788
+ };
789
+ reliability_bins: {
790
+ n: number;
791
+ mean_confidence: number;
792
+ empirical_accuracy: number;
793
+ lower: number;
794
+ upper: number;
795
+ }[];
796
+ flagged: boolean;
797
+ flag_reason: string | null;
798
+ }[];
799
+ overall: {
800
+ n: number;
801
+ mean_confidence: number;
802
+ empirical_accuracy: number;
803
+ brier_score: number;
804
+ ece: number;
805
+ calibration_gap: number;
806
+ overconfident: boolean;
807
+ };
808
+ flagged_classes: string[];
809
+ config: {
810
+ bins: number;
811
+ min_samples: number;
812
+ ece_threshold: number;
813
+ gap_threshold: number;
814
+ outcome_sources: ("truth_status" | "action_outcome")[];
815
+ include_synthetic_authority: boolean;
816
+ };
817
+ };
818
+ computed_at: string;
819
+ }, {
820
+ triggered_by: "cli" | "programmatic";
821
+ cursor: {
822
+ from_seq: number;
823
+ to_seq: number;
824
+ };
825
+ computation_id: string;
826
+ report: {
827
+ sample_count: number;
828
+ classes: {
829
+ calibration_class: string;
830
+ metrics: {
831
+ n: number;
832
+ mean_confidence: number;
833
+ empirical_accuracy: number;
834
+ brier_score: number;
835
+ ece: number;
836
+ calibration_gap: number;
837
+ overconfident: boolean;
838
+ };
839
+ reliability_bins: {
840
+ n: number;
841
+ mean_confidence: number;
842
+ empirical_accuracy: number;
843
+ lower: number;
844
+ upper: number;
845
+ }[];
846
+ flagged: boolean;
847
+ flag_reason: string | null;
848
+ }[];
849
+ overall: {
850
+ n: number;
851
+ mean_confidence: number;
852
+ empirical_accuracy: number;
853
+ brier_score: number;
854
+ ece: number;
855
+ calibration_gap: number;
856
+ overconfident: boolean;
857
+ };
858
+ flagged_classes: string[];
859
+ config: {
860
+ bins: number;
861
+ min_samples: number;
862
+ ece_threshold: number;
863
+ gap_threshold: number;
864
+ outcome_sources: ("truth_status" | "action_outcome")[];
865
+ include_synthetic_authority: boolean;
866
+ };
867
+ };
868
+ computed_at: string;
869
+ }>, {
870
+ triggered_by: "cli" | "programmatic";
871
+ cursor: {
872
+ from_seq: number;
873
+ to_seq: number;
874
+ };
875
+ computation_id: string;
876
+ report: {
877
+ sample_count: number;
878
+ classes: {
879
+ calibration_class: string;
880
+ metrics: {
881
+ n: number;
882
+ mean_confidence: number;
883
+ empirical_accuracy: number;
884
+ brier_score: number;
885
+ ece: number;
886
+ calibration_gap: number;
887
+ overconfident: boolean;
888
+ };
889
+ reliability_bins: {
890
+ n: number;
891
+ mean_confidence: number;
892
+ empirical_accuracy: number;
893
+ lower: number;
894
+ upper: number;
895
+ }[];
896
+ flagged: boolean;
897
+ flag_reason: string | null;
898
+ }[];
899
+ overall: {
900
+ n: number;
901
+ mean_confidence: number;
902
+ empirical_accuracy: number;
903
+ brier_score: number;
904
+ ece: number;
905
+ calibration_gap: number;
906
+ overconfident: boolean;
907
+ };
908
+ flagged_classes: string[];
909
+ config: {
910
+ bins: number;
911
+ min_samples: number;
912
+ ece_threshold: number;
913
+ gap_threshold: number;
914
+ outcome_sources: ("truth_status" | "action_outcome")[];
915
+ include_synthetic_authority: boolean;
916
+ };
917
+ };
918
+ computed_at: string;
919
+ }, {
920
+ triggered_by: "cli" | "programmatic";
921
+ cursor: {
922
+ from_seq: number;
923
+ to_seq: number;
924
+ };
925
+ computation_id: string;
926
+ report: {
927
+ sample_count: number;
928
+ classes: {
929
+ calibration_class: string;
930
+ metrics: {
931
+ n: number;
932
+ mean_confidence: number;
933
+ empirical_accuracy: number;
934
+ brier_score: number;
935
+ ece: number;
936
+ calibration_gap: number;
937
+ overconfident: boolean;
938
+ };
939
+ reliability_bins: {
940
+ n: number;
941
+ mean_confidence: number;
942
+ empirical_accuracy: number;
943
+ lower: number;
944
+ upper: number;
945
+ }[];
946
+ flagged: boolean;
947
+ flag_reason: string | null;
948
+ }[];
949
+ overall: {
950
+ n: number;
951
+ mean_confidence: number;
952
+ empirical_accuracy: number;
953
+ brier_score: number;
954
+ ece: number;
955
+ calibration_gap: number;
956
+ overconfident: boolean;
957
+ };
958
+ flagged_classes: string[];
959
+ config: {
960
+ bins: number;
961
+ min_samples: number;
962
+ ece_threshold: number;
963
+ gap_threshold: number;
964
+ outcome_sources: ("truth_status" | "action_outcome")[];
965
+ include_synthetic_authority: boolean;
966
+ };
967
+ };
968
+ computed_at: string;
969
+ }>;
970
+ export type CalibrationComputedPayload = z.infer<typeof CalibrationComputedPayloadSchema>;
971
+ /**
972
+ * Event-type literal. Use this constant rather than the bare string so a
973
+ * future rename is grep-safe. Mirrors `reflection.completed@1`.
974
+ */
975
+ export declare const CALIBRATION_COMPUTED_EVENT_TYPE: "calibration.computed";
976
+ export declare const CALIBRATION_COMPUTED_SCHEMA_VERSION: "1";
977
+ //# sourceMappingURL=calibration.d.ts.map