@mcptoolshop/research-os 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,547 @@
1
+ import { z } from 'zod';
2
+
3
+ declare const AggregateMetricSchema: z.ZodObject<{
4
+ median: z.ZodNumber;
5
+ min: z.ZodNumber;
6
+ max: z.ZodNumber;
7
+ values: z.ZodArray<z.ZodNumber, "many">;
8
+ }, "strip", z.ZodTypeAny, {
9
+ median: number;
10
+ min: number;
11
+ max: number;
12
+ values: number[];
13
+ }, {
14
+ median: number;
15
+ min: number;
16
+ max: number;
17
+ values: number[];
18
+ }>;
19
+ declare const PerCategoryAggregateEntrySchema: z.ZodObject<{
20
+ median_ratio: z.ZodNumber;
21
+ min_ratio: z.ZodNumber;
22
+ max_ratio: z.ZodNumber;
23
+ total: z.ZodNumber;
24
+ per_run_ratios: z.ZodArray<z.ZodNumber, "many">;
25
+ }, "strip", z.ZodTypeAny, {
26
+ median_ratio: number;
27
+ min_ratio: number;
28
+ max_ratio: number;
29
+ total: number;
30
+ per_run_ratios: number[];
31
+ }, {
32
+ median_ratio: number;
33
+ min_ratio: number;
34
+ max_ratio: number;
35
+ total: number;
36
+ per_run_ratios: number[];
37
+ }>;
38
+ declare const PerCategoryAggregateSchema: z.ZodRecord<z.ZodString, z.ZodObject<{
39
+ median_ratio: z.ZodNumber;
40
+ min_ratio: z.ZodNumber;
41
+ max_ratio: z.ZodNumber;
42
+ total: z.ZodNumber;
43
+ per_run_ratios: z.ZodArray<z.ZodNumber, "many">;
44
+ }, "strip", z.ZodTypeAny, {
45
+ median_ratio: number;
46
+ min_ratio: number;
47
+ max_ratio: number;
48
+ total: number;
49
+ per_run_ratios: number[];
50
+ }, {
51
+ median_ratio: number;
52
+ min_ratio: number;
53
+ max_ratio: number;
54
+ total: number;
55
+ per_run_ratios: number[];
56
+ }>>;
57
+ declare const AggregatePassFailSchema: z.ZodObject<{
58
+ fp_ceiling: z.ZodEnum<["PASS", "FAIL"]>;
59
+ any_flag_recall_floor: z.ZodEnum<["PASS", "FAIL"]>;
60
+ per_category_any_flag_floor: z.ZodEnum<["PASS", "FAIL"]>;
61
+ strict_recall_floor: z.ZodEnum<["PASS", "FAIL"]>;
62
+ decision_vocab_completeness: z.ZodEnum<["PASS", "FAIL"]>;
63
+ latency_soft: z.ZodEnum<["PASS", "WARN"]>;
64
+ latency_hard: z.ZodEnum<["PASS", "FAIL"]>;
65
+ empty_or_malformed: z.ZodEnum<["PASS", "FAIL"]>;
66
+ overall: z.ZodEnum<["PASS", "FAIL"]>;
67
+ }, "strip", z.ZodTypeAny, {
68
+ fp_ceiling: "PASS" | "FAIL";
69
+ any_flag_recall_floor: "PASS" | "FAIL";
70
+ per_category_any_flag_floor: "PASS" | "FAIL";
71
+ strict_recall_floor: "PASS" | "FAIL";
72
+ decision_vocab_completeness: "PASS" | "FAIL";
73
+ latency_soft: "PASS" | "WARN";
74
+ latency_hard: "PASS" | "FAIL";
75
+ empty_or_malformed: "PASS" | "FAIL";
76
+ overall: "PASS" | "FAIL";
77
+ }, {
78
+ fp_ceiling: "PASS" | "FAIL";
79
+ any_flag_recall_floor: "PASS" | "FAIL";
80
+ per_category_any_flag_floor: "PASS" | "FAIL";
81
+ strict_recall_floor: "PASS" | "FAIL";
82
+ decision_vocab_completeness: "PASS" | "FAIL";
83
+ latency_soft: "PASS" | "WARN";
84
+ latency_hard: "PASS" | "FAIL";
85
+ empty_or_malformed: "PASS" | "FAIL";
86
+ overall: "PASS" | "FAIL";
87
+ }>;
88
+ declare const AggregateDecisionVocabBarSchema: z.ZodObject<{
89
+ architecture: z.ZodEnum<["single-pass", "two-pass"]>;
90
+ required: z.ZodNumber;
91
+ median_produced: z.ZodNumber;
92
+ passed: z.ZodBoolean;
93
+ }, "strip", z.ZodTypeAny, {
94
+ required: number;
95
+ architecture: "single-pass" | "two-pass";
96
+ median_produced: number;
97
+ passed: boolean;
98
+ }, {
99
+ required: number;
100
+ architecture: "single-pass" | "two-pass";
101
+ median_produced: number;
102
+ passed: boolean;
103
+ }>;
104
+ declare const AggregateCalibrationReceiptSchema: z.ZodObject<{
105
+ schema_version: z.ZodLiteral<1>;
106
+ receipt_kind: z.ZodLiteral<"aggregate">;
107
+ profile_name: z.ZodString;
108
+ status: z.ZodEnum<["trusted_baseline", "conditional_pass", "failed", "comparison_only"]>;
109
+ model: z.ZodString;
110
+ architecture: z.ZodEnum<["single-pass", "two-pass"]>;
111
+ fixture: z.ZodString;
112
+ fixture_total_claims: z.ZodNumber;
113
+ fixture_good_claims: z.ZodNumber;
114
+ fixture_bad_claims: z.ZodNumber;
115
+ runs_count: z.ZodNumber;
116
+ run_files: z.ZodArray<z.ZodString, "many">;
117
+ aggregated_at: z.ZodString;
118
+ research_os_version: z.ZodString;
119
+ good_fp_count: z.ZodObject<{
120
+ median: z.ZodNumber;
121
+ min: z.ZodNumber;
122
+ max: z.ZodNumber;
123
+ values: z.ZodArray<z.ZodNumber, "many">;
124
+ }, "strip", z.ZodTypeAny, {
125
+ median: number;
126
+ min: number;
127
+ max: number;
128
+ values: number[];
129
+ }, {
130
+ median: number;
131
+ min: number;
132
+ max: number;
133
+ values: number[];
134
+ }>;
135
+ any_flag_recall_ratio: z.ZodObject<{
136
+ median: z.ZodNumber;
137
+ min: z.ZodNumber;
138
+ max: z.ZodNumber;
139
+ values: z.ZodArray<z.ZodNumber, "many">;
140
+ }, "strip", z.ZodTypeAny, {
141
+ median: number;
142
+ min: number;
143
+ max: number;
144
+ values: number[];
145
+ }, {
146
+ median: number;
147
+ min: number;
148
+ max: number;
149
+ values: number[];
150
+ }>;
151
+ strict_recall_ratio: z.ZodObject<{
152
+ median: z.ZodNumber;
153
+ min: z.ZodNumber;
154
+ max: z.ZodNumber;
155
+ values: z.ZodArray<z.ZodNumber, "many">;
156
+ }, "strip", z.ZodTypeAny, {
157
+ median: number;
158
+ min: number;
159
+ max: number;
160
+ values: number[];
161
+ }, {
162
+ median: number;
163
+ min: number;
164
+ max: number;
165
+ values: number[];
166
+ }>;
167
+ decisions_produced_count: z.ZodObject<{
168
+ median: z.ZodNumber;
169
+ min: z.ZodNumber;
170
+ max: z.ZodNumber;
171
+ values: z.ZodArray<z.ZodNumber, "many">;
172
+ }, "strip", z.ZodTypeAny, {
173
+ median: number;
174
+ min: number;
175
+ max: number;
176
+ values: number[];
177
+ }, {
178
+ median: number;
179
+ min: number;
180
+ max: number;
181
+ values: number[];
182
+ }>;
183
+ runtime_ms: z.ZodObject<{
184
+ median: z.ZodNumber;
185
+ min: z.ZodNumber;
186
+ max: z.ZodNumber;
187
+ values: z.ZodArray<z.ZodNumber, "many">;
188
+ }, "strip", z.ZodTypeAny, {
189
+ median: number;
190
+ min: number;
191
+ max: number;
192
+ values: number[];
193
+ }, {
194
+ median: number;
195
+ min: number;
196
+ max: number;
197
+ values: number[];
198
+ }>;
199
+ empty_or_malformed_responses: z.ZodObject<{
200
+ median: z.ZodNumber;
201
+ min: z.ZodNumber;
202
+ max: z.ZodNumber;
203
+ values: z.ZodArray<z.ZodNumber, "many">;
204
+ }, "strip", z.ZodTypeAny, {
205
+ median: number;
206
+ min: number;
207
+ max: number;
208
+ values: number[];
209
+ }, {
210
+ median: number;
211
+ min: number;
212
+ max: number;
213
+ values: number[];
214
+ }>;
215
+ per_category_any_flag: z.ZodRecord<z.ZodString, z.ZodObject<{
216
+ median_ratio: z.ZodNumber;
217
+ min_ratio: z.ZodNumber;
218
+ max_ratio: z.ZodNumber;
219
+ total: z.ZodNumber;
220
+ per_run_ratios: z.ZodArray<z.ZodNumber, "many">;
221
+ }, "strip", z.ZodTypeAny, {
222
+ median_ratio: number;
223
+ min_ratio: number;
224
+ max_ratio: number;
225
+ total: number;
226
+ per_run_ratios: number[];
227
+ }, {
228
+ median_ratio: number;
229
+ min_ratio: number;
230
+ max_ratio: number;
231
+ total: number;
232
+ per_run_ratios: number[];
233
+ }>>;
234
+ per_category_strict: z.ZodRecord<z.ZodString, z.ZodObject<{
235
+ median_ratio: z.ZodNumber;
236
+ min_ratio: z.ZodNumber;
237
+ max_ratio: z.ZodNumber;
238
+ total: z.ZodNumber;
239
+ per_run_ratios: z.ZodArray<z.ZodNumber, "many">;
240
+ }, "strip", z.ZodTypeAny, {
241
+ median_ratio: number;
242
+ min_ratio: number;
243
+ max_ratio: number;
244
+ total: number;
245
+ per_run_ratios: number[];
246
+ }, {
247
+ median_ratio: number;
248
+ min_ratio: number;
249
+ max_ratio: number;
250
+ total: number;
251
+ per_run_ratios: number[];
252
+ }>>;
253
+ decision_vocabulary: z.ZodRecord<z.ZodString, z.ZodObject<{
254
+ median: z.ZodNumber;
255
+ min: z.ZodNumber;
256
+ max: z.ZodNumber;
257
+ values: z.ZodArray<z.ZodNumber, "many">;
258
+ }, "strip", z.ZodTypeAny, {
259
+ median: number;
260
+ min: number;
261
+ max: number;
262
+ values: number[];
263
+ }, {
264
+ median: number;
265
+ min: number;
266
+ max: number;
267
+ values: number[];
268
+ }>>;
269
+ decision_vocab_bar: z.ZodObject<{
270
+ architecture: z.ZodEnum<["single-pass", "two-pass"]>;
271
+ required: z.ZodNumber;
272
+ median_produced: z.ZodNumber;
273
+ passed: z.ZodBoolean;
274
+ }, "strip", z.ZodTypeAny, {
275
+ required: number;
276
+ architecture: "single-pass" | "two-pass";
277
+ median_produced: number;
278
+ passed: boolean;
279
+ }, {
280
+ required: number;
281
+ architecture: "single-pass" | "two-pass";
282
+ median_produced: number;
283
+ passed: boolean;
284
+ }>;
285
+ unreachable_decisions: z.ZodArray<z.ZodString, "many">;
286
+ pass_fail: z.ZodObject<{
287
+ fp_ceiling: z.ZodEnum<["PASS", "FAIL"]>;
288
+ any_flag_recall_floor: z.ZodEnum<["PASS", "FAIL"]>;
289
+ per_category_any_flag_floor: z.ZodEnum<["PASS", "FAIL"]>;
290
+ strict_recall_floor: z.ZodEnum<["PASS", "FAIL"]>;
291
+ decision_vocab_completeness: z.ZodEnum<["PASS", "FAIL"]>;
292
+ latency_soft: z.ZodEnum<["PASS", "WARN"]>;
293
+ latency_hard: z.ZodEnum<["PASS", "FAIL"]>;
294
+ empty_or_malformed: z.ZodEnum<["PASS", "FAIL"]>;
295
+ overall: z.ZodEnum<["PASS", "FAIL"]>;
296
+ }, "strip", z.ZodTypeAny, {
297
+ fp_ceiling: "PASS" | "FAIL";
298
+ any_flag_recall_floor: "PASS" | "FAIL";
299
+ per_category_any_flag_floor: "PASS" | "FAIL";
300
+ strict_recall_floor: "PASS" | "FAIL";
301
+ decision_vocab_completeness: "PASS" | "FAIL";
302
+ latency_soft: "PASS" | "WARN";
303
+ latency_hard: "PASS" | "FAIL";
304
+ empty_or_malformed: "PASS" | "FAIL";
305
+ overall: "PASS" | "FAIL";
306
+ }, {
307
+ fp_ceiling: "PASS" | "FAIL";
308
+ any_flag_recall_floor: "PASS" | "FAIL";
309
+ per_category_any_flag_floor: "PASS" | "FAIL";
310
+ strict_recall_floor: "PASS" | "FAIL";
311
+ decision_vocab_completeness: "PASS" | "FAIL";
312
+ latency_soft: "PASS" | "WARN";
313
+ latency_hard: "PASS" | "FAIL";
314
+ empty_or_malformed: "PASS" | "FAIL";
315
+ overall: "PASS" | "FAIL";
316
+ }>;
317
+ recurring_bar_failures: z.ZodArray<z.ZodString, "many">;
318
+ notes: z.ZodArray<z.ZodString, "many">;
319
+ reviewer_options: z.ZodOptional<z.ZodObject<{
320
+ num_ctx: z.ZodOptional<z.ZodNumber>;
321
+ temperature: z.ZodOptional<z.ZodNumber>;
322
+ seed: z.ZodOptional<z.ZodNumber>;
323
+ top_p: z.ZodOptional<z.ZodNumber>;
324
+ top_k: z.ZodOptional<z.ZodNumber>;
325
+ repeat_penalty: z.ZodOptional<z.ZodNumber>;
326
+ }, "strip", z.ZodTypeAny, {
327
+ num_ctx?: number | undefined;
328
+ temperature?: number | undefined;
329
+ seed?: number | undefined;
330
+ top_p?: number | undefined;
331
+ top_k?: number | undefined;
332
+ repeat_penalty?: number | undefined;
333
+ }, {
334
+ num_ctx?: number | undefined;
335
+ temperature?: number | undefined;
336
+ seed?: number | undefined;
337
+ top_p?: number | undefined;
338
+ top_k?: number | undefined;
339
+ repeat_penalty?: number | undefined;
340
+ }>>;
341
+ }, "strip", z.ZodTypeAny, {
342
+ status: "trusted_baseline" | "conditional_pass" | "failed" | "comparison_only";
343
+ architecture: "single-pass" | "two-pass";
344
+ schema_version: 1;
345
+ receipt_kind: "aggregate";
346
+ profile_name: string;
347
+ model: string;
348
+ fixture: string;
349
+ fixture_total_claims: number;
350
+ fixture_good_claims: number;
351
+ fixture_bad_claims: number;
352
+ runs_count: number;
353
+ run_files: string[];
354
+ aggregated_at: string;
355
+ research_os_version: string;
356
+ good_fp_count: {
357
+ median: number;
358
+ min: number;
359
+ max: number;
360
+ values: number[];
361
+ };
362
+ any_flag_recall_ratio: {
363
+ median: number;
364
+ min: number;
365
+ max: number;
366
+ values: number[];
367
+ };
368
+ strict_recall_ratio: {
369
+ median: number;
370
+ min: number;
371
+ max: number;
372
+ values: number[];
373
+ };
374
+ decisions_produced_count: {
375
+ median: number;
376
+ min: number;
377
+ max: number;
378
+ values: number[];
379
+ };
380
+ runtime_ms: {
381
+ median: number;
382
+ min: number;
383
+ max: number;
384
+ values: number[];
385
+ };
386
+ empty_or_malformed_responses: {
387
+ median: number;
388
+ min: number;
389
+ max: number;
390
+ values: number[];
391
+ };
392
+ per_category_any_flag: Record<string, {
393
+ median_ratio: number;
394
+ min_ratio: number;
395
+ max_ratio: number;
396
+ total: number;
397
+ per_run_ratios: number[];
398
+ }>;
399
+ per_category_strict: Record<string, {
400
+ median_ratio: number;
401
+ min_ratio: number;
402
+ max_ratio: number;
403
+ total: number;
404
+ per_run_ratios: number[];
405
+ }>;
406
+ decision_vocabulary: Record<string, {
407
+ median: number;
408
+ min: number;
409
+ max: number;
410
+ values: number[];
411
+ }>;
412
+ decision_vocab_bar: {
413
+ required: number;
414
+ architecture: "single-pass" | "two-pass";
415
+ median_produced: number;
416
+ passed: boolean;
417
+ };
418
+ unreachable_decisions: string[];
419
+ pass_fail: {
420
+ fp_ceiling: "PASS" | "FAIL";
421
+ any_flag_recall_floor: "PASS" | "FAIL";
422
+ per_category_any_flag_floor: "PASS" | "FAIL";
423
+ strict_recall_floor: "PASS" | "FAIL";
424
+ decision_vocab_completeness: "PASS" | "FAIL";
425
+ latency_soft: "PASS" | "WARN";
426
+ latency_hard: "PASS" | "FAIL";
427
+ empty_or_malformed: "PASS" | "FAIL";
428
+ overall: "PASS" | "FAIL";
429
+ };
430
+ recurring_bar_failures: string[];
431
+ notes: string[];
432
+ reviewer_options?: {
433
+ num_ctx?: number | undefined;
434
+ temperature?: number | undefined;
435
+ seed?: number | undefined;
436
+ top_p?: number | undefined;
437
+ top_k?: number | undefined;
438
+ repeat_penalty?: number | undefined;
439
+ } | undefined;
440
+ }, {
441
+ status: "trusted_baseline" | "conditional_pass" | "failed" | "comparison_only";
442
+ architecture: "single-pass" | "two-pass";
443
+ schema_version: 1;
444
+ receipt_kind: "aggregate";
445
+ profile_name: string;
446
+ model: string;
447
+ fixture: string;
448
+ fixture_total_claims: number;
449
+ fixture_good_claims: number;
450
+ fixture_bad_claims: number;
451
+ runs_count: number;
452
+ run_files: string[];
453
+ aggregated_at: string;
454
+ research_os_version: string;
455
+ good_fp_count: {
456
+ median: number;
457
+ min: number;
458
+ max: number;
459
+ values: number[];
460
+ };
461
+ any_flag_recall_ratio: {
462
+ median: number;
463
+ min: number;
464
+ max: number;
465
+ values: number[];
466
+ };
467
+ strict_recall_ratio: {
468
+ median: number;
469
+ min: number;
470
+ max: number;
471
+ values: number[];
472
+ };
473
+ decisions_produced_count: {
474
+ median: number;
475
+ min: number;
476
+ max: number;
477
+ values: number[];
478
+ };
479
+ runtime_ms: {
480
+ median: number;
481
+ min: number;
482
+ max: number;
483
+ values: number[];
484
+ };
485
+ empty_or_malformed_responses: {
486
+ median: number;
487
+ min: number;
488
+ max: number;
489
+ values: number[];
490
+ };
491
+ per_category_any_flag: Record<string, {
492
+ median_ratio: number;
493
+ min_ratio: number;
494
+ max_ratio: number;
495
+ total: number;
496
+ per_run_ratios: number[];
497
+ }>;
498
+ per_category_strict: Record<string, {
499
+ median_ratio: number;
500
+ min_ratio: number;
501
+ max_ratio: number;
502
+ total: number;
503
+ per_run_ratios: number[];
504
+ }>;
505
+ decision_vocabulary: Record<string, {
506
+ median: number;
507
+ min: number;
508
+ max: number;
509
+ values: number[];
510
+ }>;
511
+ decision_vocab_bar: {
512
+ required: number;
513
+ architecture: "single-pass" | "two-pass";
514
+ median_produced: number;
515
+ passed: boolean;
516
+ };
517
+ unreachable_decisions: string[];
518
+ pass_fail: {
519
+ fp_ceiling: "PASS" | "FAIL";
520
+ any_flag_recall_floor: "PASS" | "FAIL";
521
+ per_category_any_flag_floor: "PASS" | "FAIL";
522
+ strict_recall_floor: "PASS" | "FAIL";
523
+ decision_vocab_completeness: "PASS" | "FAIL";
524
+ latency_soft: "PASS" | "WARN";
525
+ latency_hard: "PASS" | "FAIL";
526
+ empty_or_malformed: "PASS" | "FAIL";
527
+ overall: "PASS" | "FAIL";
528
+ };
529
+ recurring_bar_failures: string[];
530
+ notes: string[];
531
+ reviewer_options?: {
532
+ num_ctx?: number | undefined;
533
+ temperature?: number | undefined;
534
+ seed?: number | undefined;
535
+ top_p?: number | undefined;
536
+ top_k?: number | undefined;
537
+ repeat_penalty?: number | undefined;
538
+ } | undefined;
539
+ }>;
540
+ type AggregateMetric = z.infer<typeof AggregateMetricSchema>;
541
+ type PerCategoryAggregateEntry = z.infer<typeof PerCategoryAggregateEntrySchema>;
542
+ type PerCategoryAggregate = z.infer<typeof PerCategoryAggregateSchema>;
543
+ type AggregatePassFail = z.infer<typeof AggregatePassFailSchema>;
544
+ type AggregateDecisionVocabBar = z.infer<typeof AggregateDecisionVocabBarSchema>;
545
+ type AggregateCalibrationReceipt = z.infer<typeof AggregateCalibrationReceiptSchema>;
546
+
547
+ export { type AggregateCalibrationReceipt, AggregateCalibrationReceiptSchema, type AggregateDecisionVocabBar, AggregateDecisionVocabBarSchema, type AggregateMetric, AggregateMetricSchema, type AggregatePassFail, AggregatePassFailSchema, type PerCategoryAggregate, type PerCategoryAggregateEntry, PerCategoryAggregateEntrySchema, PerCategoryAggregateSchema };
@@ -0,0 +1,160 @@
1
+ // src/calibration/aggregate-receipt-schema.ts
2
+ import { z as z3 } from "zod";
3
+
4
+ // src/calibration/receipt-schema.ts
5
+ import { z as z2 } from "zod";
6
+
7
+ // src/review/reviewer-options-schema.ts
8
+ import { z } from "zod";
9
+ var ReviewerOptionsSchema = z.object({
10
+ num_ctx: z.number().int().positive().optional(),
11
+ temperature: z.number().min(0).max(2).optional(),
12
+ seed: z.number().int().optional(),
13
+ top_p: z.number().min(0).max(1).optional(),
14
+ top_k: z.number().int().nonnegative().optional(),
15
+ repeat_penalty: z.number().min(0).optional()
16
+ });
17
+
18
+ // src/calibration/receipt-schema.ts
19
+ var StatusLabelSchema = z2.enum([
20
+ "trusted_baseline",
21
+ "conditional_pass",
22
+ "failed",
23
+ "comparison_only"
24
+ ]);
25
+ var ArchitectureSchema = z2.enum(["single-pass", "two-pass"]);
26
+ var RecallSchema = z2.object({
27
+ matched: z2.number().int().nonnegative(),
28
+ total: z2.number().int().nonnegative(),
29
+ ratio: z2.number().min(0).max(1)
30
+ });
31
+ var PerCategoryRecallSchema = z2.record(z2.string(), RecallSchema);
32
+ var PassFailSchema = z2.object({
33
+ fp_ceiling: z2.enum(["PASS", "FAIL"]),
34
+ any_flag_recall_floor: z2.enum(["PASS", "FAIL"]),
35
+ per_category_any_flag_floor: z2.enum(["PASS", "FAIL"]),
36
+ strict_recall_floor: z2.enum(["PASS", "FAIL"]),
37
+ decision_vocab_completeness: z2.enum(["PASS", "FAIL"]),
38
+ latency_soft: z2.enum(["PASS", "WARN"]),
39
+ latency_hard: z2.enum(["PASS", "FAIL"]),
40
+ empty_or_malformed: z2.enum(["PASS", "FAIL"]),
41
+ overall: z2.enum(["PASS", "FAIL"])
42
+ });
43
+ var DecisionVocabBarSchema = z2.object({
44
+ architecture: ArchitectureSchema,
45
+ required: z2.number().int().positive(),
46
+ produced: z2.number().int().nonnegative(),
47
+ passed: z2.boolean()
48
+ });
49
+ var CalibrationReceiptSchema = z2.object({
50
+ schema_version: z2.literal(1),
51
+ profile_name: z2.string(),
52
+ status: StatusLabelSchema,
53
+ model: z2.string(),
54
+ architecture: ArchitectureSchema,
55
+ fixture: z2.string(),
56
+ fixture_total_claims: z2.number().int().positive(),
57
+ fixture_good_claims: z2.number().int().nonnegative(),
58
+ fixture_bad_claims: z2.number().int().nonnegative(),
59
+ calibrated_at: z2.string(),
60
+ research_os_version: z2.string(),
61
+ runtime_ms: z2.number().int().nonnegative(),
62
+ good_fp_count: z2.number().int().nonnegative(),
63
+ any_flag_recall: RecallSchema,
64
+ strict_recall: RecallSchema,
65
+ per_category_any_flag: PerCategoryRecallSchema,
66
+ per_category_strict: PerCategoryRecallSchema,
67
+ decision_vocabulary: z2.record(z2.string(), z2.number().int().nonnegative()),
68
+ decisions_produced_count: z2.number().int().nonnegative(),
69
+ decision_vocab_bar: DecisionVocabBarSchema,
70
+ unreachable_decisions: z2.array(z2.string()),
71
+ empty_or_malformed_responses: z2.number().int().nonnegative(),
72
+ pass_fail: PassFailSchema,
73
+ notes: z2.array(z2.string()),
74
+ reviewer_options: ReviewerOptionsSchema.optional()
75
+ });
76
+
77
+ // src/calibration/aggregate-receipt-schema.ts
78
+ var AggregateMetricSchema = z3.object({
79
+ median: z3.number(),
80
+ min: z3.number(),
81
+ max: z3.number(),
82
+ values: z3.array(z3.number())
83
+ // per-run values in run order (run-001, run-002, ...)
84
+ });
85
+ var PerCategoryAggregateEntrySchema = z3.object({
86
+ median_ratio: z3.number().min(0).max(1),
87
+ min_ratio: z3.number().min(0).max(1),
88
+ max_ratio: z3.number().min(0).max(1),
89
+ total: z3.number().int().nonnegative(),
90
+ // seed count — same across all runs
91
+ per_run_ratios: z3.array(z3.number())
92
+ });
93
+ var PerCategoryAggregateSchema = z3.record(z3.string(), PerCategoryAggregateEntrySchema);
94
+ var AggregatePassFailSchema = z3.object({
95
+ fp_ceiling: z3.enum(["PASS", "FAIL"]),
96
+ any_flag_recall_floor: z3.enum(["PASS", "FAIL"]),
97
+ per_category_any_flag_floor: z3.enum(["PASS", "FAIL"]),
98
+ strict_recall_floor: z3.enum(["PASS", "FAIL"]),
99
+ decision_vocab_completeness: z3.enum(["PASS", "FAIL"]),
100
+ latency_soft: z3.enum(["PASS", "WARN"]),
101
+ latency_hard: z3.enum(["PASS", "FAIL"]),
102
+ empty_or_malformed: z3.enum(["PASS", "FAIL"]),
103
+ overall: z3.enum(["PASS", "FAIL"])
104
+ });
105
+ var AggregateDecisionVocabBarSchema = z3.object({
106
+ architecture: ArchitectureSchema,
107
+ required: z3.number().int().positive(),
108
+ median_produced: z3.number(),
109
+ // float — median of per-run decisions_produced_count
110
+ passed: z3.boolean()
111
+ });
112
+ var AggregateCalibrationReceiptSchema = z3.object({
113
+ schema_version: z3.literal(1),
114
+ receipt_kind: z3.literal("aggregate"),
115
+ // discriminates from single-run receipt
116
+ profile_name: z3.string(),
117
+ status: StatusLabelSchema,
118
+ model: z3.string(),
119
+ architecture: ArchitectureSchema,
120
+ fixture: z3.string(),
121
+ fixture_total_claims: z3.number().int().positive(),
122
+ fixture_good_claims: z3.number().int().nonnegative(),
123
+ fixture_bad_claims: z3.number().int().nonnegative(),
124
+ runs_count: z3.number().int().min(2),
125
+ run_files: z3.array(z3.string()),
126
+ // relative paths: runs/run-001.json, etc.
127
+ aggregated_at: z3.string(),
128
+ // ISO 8601
129
+ research_os_version: z3.string(),
130
+ // Aggregate metrics — median + min + max + per-run values in run order
131
+ good_fp_count: AggregateMetricSchema,
132
+ any_flag_recall_ratio: AggregateMetricSchema,
133
+ strict_recall_ratio: AggregateMetricSchema,
134
+ decisions_produced_count: AggregateMetricSchema,
135
+ runtime_ms: AggregateMetricSchema,
136
+ empty_or_malformed_responses: AggregateMetricSchema,
137
+ per_category_any_flag: PerCategoryAggregateSchema,
138
+ per_category_strict: PerCategoryAggregateSchema,
139
+ // Decision vocabulary — union of all decisions seen across runs, median count each
140
+ decision_vocabulary: z3.record(z3.string(), AggregateMetricSchema),
141
+ decision_vocab_bar: AggregateDecisionVocabBarSchema,
142
+ unreachable_decisions: z3.array(z3.string()),
143
+ pass_fail: AggregatePassFailSchema,
144
+ // Bars that FAILed in >= ceil(runs_count/2) individual runs.
145
+ // Non-empty list demotes trusted_baseline to conditional_pass.
146
+ recurring_bar_failures: z3.array(z3.string()),
147
+ notes: z3.array(z3.string()),
148
+ // schema_version: 1 — additive-optional (Exp6 Session 2):
149
+ // Same options object stamped on every per-run receipt. Absent = stochastic run.
150
+ reviewer_options: ReviewerOptionsSchema.optional()
151
+ });
152
+ export {
153
+ AggregateCalibrationReceiptSchema,
154
+ AggregateDecisionVocabBarSchema,
155
+ AggregateMetricSchema,
156
+ AggregatePassFailSchema,
157
+ PerCategoryAggregateEntrySchema,
158
+ PerCategoryAggregateSchema
159
+ };
160
+ //# sourceMappingURL=aggregate-receipt-schema.js.map