@mcptoolshop/research-os 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +119 -0
- package/README.es.md +11 -3
- package/README.fr.md +14 -6
- package/README.hi.md +33 -20
- package/README.it.md +11 -3
- package/README.ja.md +15 -7
- package/README.md +20 -4
- package/README.pt-BR.md +11 -3
- package/README.zh.md +11 -3
- package/dist/calibration/aggregate-receipt-schema.d.ts +38 -0
- package/dist/calibration/aggregate-receipt-schema.js +99 -82
- package/dist/calibration/aggregate-receipt-schema.js.map +1 -1
- package/dist/calibration/aggregate.d.ts +2 -0
- package/dist/calibration/aggregate.js +123 -84
- package/dist/calibration/aggregate.js.map +1 -1
- package/dist/calibration/receipt-schema.d.ts +45 -6
- package/dist/calibration/receipt-schema.js +53 -38
- package/dist/calibration/receipt-schema.js.map +1 -1
- package/dist/calibration/receipt.d.ts +1 -0
- package/dist/calibration/receipt.js +20 -1
- package/dist/calibration/receipt.js.map +1 -1
- package/dist/cli.js +948 -885
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +145 -49
- package/dist/index.js +870 -814
- package/dist/index.js.map +1 -1
- package/dist/reviewer-options-schema-PZacF_MO.d.ts +27 -0
- package/package.json +1 -1
|
@@ -1,118 +1,132 @@
|
|
|
1
1
|
// src/calibration/aggregate-receipt-schema.ts
|
|
2
|
-
import { z as
|
|
2
|
+
import { z as z3 } from "zod";
|
|
3
3
|
|
|
4
4
|
// src/calibration/receipt-schema.ts
|
|
5
|
+
import { z as z2 } from "zod";
|
|
6
|
+
|
|
7
|
+
// src/review/reviewer-options-schema.ts
|
|
5
8
|
import { z } from "zod";
|
|
6
|
-
var
|
|
9
|
+
var ReviewerOptionsSchema = z.object({
|
|
10
|
+
num_ctx: z.number().int().positive().optional(),
|
|
11
|
+
temperature: z.number().min(0).max(2).optional(),
|
|
12
|
+
seed: z.number().int().optional(),
|
|
13
|
+
top_p: z.number().min(0).max(1).optional(),
|
|
14
|
+
top_k: z.number().int().nonnegative().optional(),
|
|
15
|
+
repeat_penalty: z.number().min(0).optional()
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
// src/calibration/receipt-schema.ts
|
|
19
|
+
var StatusLabelSchema = z2.enum([
|
|
7
20
|
"trusted_baseline",
|
|
8
21
|
"conditional_pass",
|
|
9
22
|
"failed",
|
|
10
23
|
"comparison_only"
|
|
11
24
|
]);
|
|
12
|
-
var ArchitectureSchema =
|
|
13
|
-
var RecallSchema =
|
|
14
|
-
matched:
|
|
15
|
-
total:
|
|
16
|
-
ratio:
|
|
25
|
+
var ArchitectureSchema = z2.enum(["single-pass", "two-pass"]);
|
|
26
|
+
var RecallSchema = z2.object({
|
|
27
|
+
matched: z2.number().int().nonnegative(),
|
|
28
|
+
total: z2.number().int().nonnegative(),
|
|
29
|
+
ratio: z2.number().min(0).max(1)
|
|
17
30
|
});
|
|
18
|
-
var PerCategoryRecallSchema =
|
|
19
|
-
var PassFailSchema =
|
|
20
|
-
fp_ceiling:
|
|
21
|
-
any_flag_recall_floor:
|
|
22
|
-
per_category_any_flag_floor:
|
|
23
|
-
strict_recall_floor:
|
|
24
|
-
decision_vocab_completeness:
|
|
25
|
-
latency_soft:
|
|
26
|
-
latency_hard:
|
|
27
|
-
empty_or_malformed:
|
|
28
|
-
overall:
|
|
31
|
+
var PerCategoryRecallSchema = z2.record(z2.string(), RecallSchema);
|
|
32
|
+
var PassFailSchema = z2.object({
|
|
33
|
+
fp_ceiling: z2.enum(["PASS", "FAIL"]),
|
|
34
|
+
any_flag_recall_floor: z2.enum(["PASS", "FAIL"]),
|
|
35
|
+
per_category_any_flag_floor: z2.enum(["PASS", "FAIL"]),
|
|
36
|
+
strict_recall_floor: z2.enum(["PASS", "FAIL"]),
|
|
37
|
+
decision_vocab_completeness: z2.enum(["PASS", "FAIL"]),
|
|
38
|
+
latency_soft: z2.enum(["PASS", "WARN"]),
|
|
39
|
+
latency_hard: z2.enum(["PASS", "FAIL"]),
|
|
40
|
+
empty_or_malformed: z2.enum(["PASS", "FAIL"]),
|
|
41
|
+
overall: z2.enum(["PASS", "FAIL"])
|
|
29
42
|
});
|
|
30
|
-
var DecisionVocabBarSchema =
|
|
43
|
+
var DecisionVocabBarSchema = z2.object({
|
|
31
44
|
architecture: ArchitectureSchema,
|
|
32
|
-
required:
|
|
33
|
-
produced:
|
|
34
|
-
passed:
|
|
45
|
+
required: z2.number().int().positive(),
|
|
46
|
+
produced: z2.number().int().nonnegative(),
|
|
47
|
+
passed: z2.boolean()
|
|
35
48
|
});
|
|
36
|
-
var CalibrationReceiptSchema =
|
|
37
|
-
schema_version:
|
|
38
|
-
profile_name:
|
|
49
|
+
var CalibrationReceiptSchema = z2.object({
|
|
50
|
+
schema_version: z2.literal(1),
|
|
51
|
+
profile_name: z2.string(),
|
|
39
52
|
status: StatusLabelSchema,
|
|
40
|
-
model:
|
|
53
|
+
model: z2.string(),
|
|
41
54
|
architecture: ArchitectureSchema,
|
|
42
|
-
fixture:
|
|
43
|
-
fixture_total_claims:
|
|
44
|
-
fixture_good_claims:
|
|
45
|
-
fixture_bad_claims:
|
|
46
|
-
calibrated_at:
|
|
47
|
-
research_os_version:
|
|
48
|
-
runtime_ms:
|
|
49
|
-
good_fp_count:
|
|
55
|
+
fixture: z2.string(),
|
|
56
|
+
fixture_total_claims: z2.number().int().positive(),
|
|
57
|
+
fixture_good_claims: z2.number().int().nonnegative(),
|
|
58
|
+
fixture_bad_claims: z2.number().int().nonnegative(),
|
|
59
|
+
calibrated_at: z2.string(),
|
|
60
|
+
research_os_version: z2.string(),
|
|
61
|
+
runtime_ms: z2.number().int().nonnegative(),
|
|
62
|
+
good_fp_count: z2.number().int().nonnegative(),
|
|
50
63
|
any_flag_recall: RecallSchema,
|
|
51
64
|
strict_recall: RecallSchema,
|
|
52
65
|
per_category_any_flag: PerCategoryRecallSchema,
|
|
53
66
|
per_category_strict: PerCategoryRecallSchema,
|
|
54
|
-
decision_vocabulary:
|
|
55
|
-
decisions_produced_count:
|
|
67
|
+
decision_vocabulary: z2.record(z2.string(), z2.number().int().nonnegative()),
|
|
68
|
+
decisions_produced_count: z2.number().int().nonnegative(),
|
|
56
69
|
decision_vocab_bar: DecisionVocabBarSchema,
|
|
57
|
-
unreachable_decisions:
|
|
58
|
-
empty_or_malformed_responses:
|
|
70
|
+
unreachable_decisions: z2.array(z2.string()),
|
|
71
|
+
empty_or_malformed_responses: z2.number().int().nonnegative(),
|
|
59
72
|
pass_fail: PassFailSchema,
|
|
60
|
-
notes:
|
|
73
|
+
notes: z2.array(z2.string()),
|
|
74
|
+
reviewer_options: ReviewerOptionsSchema.optional()
|
|
61
75
|
});
|
|
62
76
|
|
|
63
77
|
// src/calibration/aggregate-receipt-schema.ts
|
|
64
|
-
var AggregateMetricSchema =
|
|
65
|
-
median:
|
|
66
|
-
min:
|
|
67
|
-
max:
|
|
68
|
-
values:
|
|
78
|
+
var AggregateMetricSchema = z3.object({
|
|
79
|
+
median: z3.number(),
|
|
80
|
+
min: z3.number(),
|
|
81
|
+
max: z3.number(),
|
|
82
|
+
values: z3.array(z3.number())
|
|
69
83
|
// per-run values in run order (run-001, run-002, ...)
|
|
70
84
|
});
|
|
71
|
-
var PerCategoryAggregateEntrySchema =
|
|
72
|
-
median_ratio:
|
|
73
|
-
min_ratio:
|
|
74
|
-
max_ratio:
|
|
75
|
-
total:
|
|
85
|
+
var PerCategoryAggregateEntrySchema = z3.object({
|
|
86
|
+
median_ratio: z3.number().min(0).max(1),
|
|
87
|
+
min_ratio: z3.number().min(0).max(1),
|
|
88
|
+
max_ratio: z3.number().min(0).max(1),
|
|
89
|
+
total: z3.number().int().nonnegative(),
|
|
76
90
|
// seed count — same across all runs
|
|
77
|
-
per_run_ratios:
|
|
91
|
+
per_run_ratios: z3.array(z3.number())
|
|
78
92
|
});
|
|
79
|
-
var PerCategoryAggregateSchema =
|
|
80
|
-
var AggregatePassFailSchema =
|
|
81
|
-
fp_ceiling:
|
|
82
|
-
any_flag_recall_floor:
|
|
83
|
-
per_category_any_flag_floor:
|
|
84
|
-
strict_recall_floor:
|
|
85
|
-
decision_vocab_completeness:
|
|
86
|
-
latency_soft:
|
|
87
|
-
latency_hard:
|
|
88
|
-
empty_or_malformed:
|
|
89
|
-
overall:
|
|
93
|
+
var PerCategoryAggregateSchema = z3.record(z3.string(), PerCategoryAggregateEntrySchema);
|
|
94
|
+
var AggregatePassFailSchema = z3.object({
|
|
95
|
+
fp_ceiling: z3.enum(["PASS", "FAIL"]),
|
|
96
|
+
any_flag_recall_floor: z3.enum(["PASS", "FAIL"]),
|
|
97
|
+
per_category_any_flag_floor: z3.enum(["PASS", "FAIL"]),
|
|
98
|
+
strict_recall_floor: z3.enum(["PASS", "FAIL"]),
|
|
99
|
+
decision_vocab_completeness: z3.enum(["PASS", "FAIL"]),
|
|
100
|
+
latency_soft: z3.enum(["PASS", "WARN"]),
|
|
101
|
+
latency_hard: z3.enum(["PASS", "FAIL"]),
|
|
102
|
+
empty_or_malformed: z3.enum(["PASS", "FAIL"]),
|
|
103
|
+
overall: z3.enum(["PASS", "FAIL"])
|
|
90
104
|
});
|
|
91
|
-
var AggregateDecisionVocabBarSchema =
|
|
105
|
+
var AggregateDecisionVocabBarSchema = z3.object({
|
|
92
106
|
architecture: ArchitectureSchema,
|
|
93
|
-
required:
|
|
94
|
-
median_produced:
|
|
107
|
+
required: z3.number().int().positive(),
|
|
108
|
+
median_produced: z3.number(),
|
|
95
109
|
// float — median of per-run decisions_produced_count
|
|
96
|
-
passed:
|
|
110
|
+
passed: z3.boolean()
|
|
97
111
|
});
|
|
98
|
-
var AggregateCalibrationReceiptSchema =
|
|
99
|
-
schema_version:
|
|
100
|
-
receipt_kind:
|
|
112
|
+
var AggregateCalibrationReceiptSchema = z3.object({
|
|
113
|
+
schema_version: z3.literal(1),
|
|
114
|
+
receipt_kind: z3.literal("aggregate"),
|
|
101
115
|
// discriminates from single-run receipt
|
|
102
|
-
profile_name:
|
|
116
|
+
profile_name: z3.string(),
|
|
103
117
|
status: StatusLabelSchema,
|
|
104
|
-
model:
|
|
118
|
+
model: z3.string(),
|
|
105
119
|
architecture: ArchitectureSchema,
|
|
106
|
-
fixture:
|
|
107
|
-
fixture_total_claims:
|
|
108
|
-
fixture_good_claims:
|
|
109
|
-
fixture_bad_claims:
|
|
110
|
-
runs_count:
|
|
111
|
-
run_files:
|
|
120
|
+
fixture: z3.string(),
|
|
121
|
+
fixture_total_claims: z3.number().int().positive(),
|
|
122
|
+
fixture_good_claims: z3.number().int().nonnegative(),
|
|
123
|
+
fixture_bad_claims: z3.number().int().nonnegative(),
|
|
124
|
+
runs_count: z3.number().int().min(2),
|
|
125
|
+
run_files: z3.array(z3.string()),
|
|
112
126
|
// relative paths: runs/run-001.json, etc.
|
|
113
|
-
aggregated_at:
|
|
127
|
+
aggregated_at: z3.string(),
|
|
114
128
|
// ISO 8601
|
|
115
|
-
research_os_version:
|
|
129
|
+
research_os_version: z3.string(),
|
|
116
130
|
// Aggregate metrics — median + min + max + per-run values in run order
|
|
117
131
|
good_fp_count: AggregateMetricSchema,
|
|
118
132
|
any_flag_recall_ratio: AggregateMetricSchema,
|
|
@@ -123,14 +137,17 @@ var AggregateCalibrationReceiptSchema = z2.object({
|
|
|
123
137
|
per_category_any_flag: PerCategoryAggregateSchema,
|
|
124
138
|
per_category_strict: PerCategoryAggregateSchema,
|
|
125
139
|
// Decision vocabulary — union of all decisions seen across runs, median count each
|
|
126
|
-
decision_vocabulary:
|
|
140
|
+
decision_vocabulary: z3.record(z3.string(), AggregateMetricSchema),
|
|
127
141
|
decision_vocab_bar: AggregateDecisionVocabBarSchema,
|
|
128
|
-
unreachable_decisions:
|
|
142
|
+
unreachable_decisions: z3.array(z3.string()),
|
|
129
143
|
pass_fail: AggregatePassFailSchema,
|
|
130
144
|
// Bars that FAILed in >= ceil(runs_count/2) individual runs.
|
|
131
145
|
// Non-empty list demotes trusted_baseline to conditional_pass.
|
|
132
|
-
recurring_bar_failures:
|
|
133
|
-
notes:
|
|
146
|
+
recurring_bar_failures: z3.array(z3.string()),
|
|
147
|
+
notes: z3.array(z3.string()),
|
|
148
|
+
// schema_version: 1 — additive-optional (Exp6 Session 2):
|
|
149
|
+
// Same options object stamped on every per-run receipt. Absent = stochastic run.
|
|
150
|
+
reviewer_options: ReviewerOptionsSchema.optional()
|
|
134
151
|
});
|
|
135
152
|
|
|
136
153
|
// src/calibration/aggregate.ts
|
|
@@ -340,9 +357,30 @@ function aggregateReceipts(runs, opts) {
|
|
|
340
357
|
unreachable_decisions: first.unreachable_decisions,
|
|
341
358
|
pass_fail: aggregatePassFail,
|
|
342
359
|
recurring_bar_failures: recurringBarFailures,
|
|
343
|
-
notes
|
|
360
|
+
notes,
|
|
361
|
+
...opts.reviewerOptions && Object.keys(opts.reviewerOptions).length > 0 && {
|
|
362
|
+
reviewer_options: opts.reviewerOptions
|
|
363
|
+
}
|
|
344
364
|
});
|
|
345
365
|
}
|
|
366
|
+
var REVIEWER_OPTIONS_KEY_ORDER = [
|
|
367
|
+
"num_ctx",
|
|
368
|
+
"temperature",
|
|
369
|
+
"seed",
|
|
370
|
+
"top_p",
|
|
371
|
+
"top_k",
|
|
372
|
+
"repeat_penalty"
|
|
373
|
+
];
|
|
374
|
+
function buildReviewerOptionsSection(opts) {
|
|
375
|
+
if (!opts) return "";
|
|
376
|
+
const lines = REVIEWER_OPTIONS_KEY_ORDER.filter((k) => opts[k] !== void 0).map((k) => `- ${k}: ${opts[k]}`);
|
|
377
|
+
if (lines.length === 0) return "";
|
|
378
|
+
return `
|
|
379
|
+
## Reviewer options
|
|
380
|
+
|
|
381
|
+
${lines.join("\n")}
|
|
382
|
+
`;
|
|
383
|
+
}
|
|
346
384
|
function buildAggregateReceiptMarkdown(r) {
|
|
347
385
|
const pct = (ratio) => `${Math.round(ratio * 100)}%`;
|
|
348
386
|
const secRounded = (ms) => `${(ms / 1e3).toFixed(1)}s`;
|
|
@@ -385,6 +423,7 @@ function buildAggregateReceiptMarkdown(r) {
|
|
|
385
423
|
|
|
386
424
|
${r.notes.map((n) => `- ${n}`).join("\n")}
|
|
387
425
|
` : "";
|
|
426
|
+
const reviewerOptionsSection = buildReviewerOptionsSection(r.reviewer_options);
|
|
388
427
|
return `# Calibration Receipt \u2014 ${r.profile_name} (aggregate, N=${r.runs_count} runs)
|
|
389
428
|
|
|
390
429
|
- **Model:** ${r.model}
|
|
@@ -395,7 +434,7 @@ ${r.notes.map((n) => `- ${n}`).join("\n")}
|
|
|
395
434
|
- **Research-OS version:** ${r.research_os_version}
|
|
396
435
|
- **Run count:** ${r.runs_count}
|
|
397
436
|
- **Run files:** ${runFileList}
|
|
398
|
-
|
|
437
|
+
${reviewerOptionsSection}
|
|
399
438
|
## Headline metrics (median across runs)
|
|
400
439
|
|
|
401
440
|
- FP: median ${fp.median} / ${r.fixture_good_claims} (range ${fp.min}\u2013${fp.max})
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/calibration/aggregate-receipt-schema.ts","../../src/calibration/receipt-schema.ts","../../src/calibration/aggregate.ts"],"sourcesContent":["import { z } from 'zod';\nimport { StatusLabelSchema, ArchitectureSchema } from './receipt-schema.js';\n\nexport const AggregateMetricSchema = z.object({\n median: z.number(),\n min: z.number(),\n max: z.number(),\n values: z.array(z.number()), // per-run values in run order (run-001, run-002, ...)\n});\n\nexport const PerCategoryAggregateEntrySchema = z.object({\n median_ratio: z.number().min(0).max(1),\n min_ratio: z.number().min(0).max(1),\n max_ratio: z.number().min(0).max(1),\n total: z.number().int().nonnegative(), // seed count — same across all runs\n per_run_ratios: z.array(z.number()),\n});\n\nexport const PerCategoryAggregateSchema = z.record(z.string(), PerCategoryAggregateEntrySchema);\n\nexport const AggregatePassFailSchema = z.object({\n fp_ceiling: z.enum(['PASS', 'FAIL']),\n any_flag_recall_floor: z.enum(['PASS', 'FAIL']),\n per_category_any_flag_floor: z.enum(['PASS', 'FAIL']),\n strict_recall_floor: z.enum(['PASS', 'FAIL']),\n decision_vocab_completeness: z.enum(['PASS', 'FAIL']),\n latency_soft: z.enum(['PASS', 'WARN']),\n latency_hard: z.enum(['PASS', 'FAIL']),\n empty_or_malformed: z.enum(['PASS', 'FAIL']),\n overall: z.enum(['PASS', 'FAIL']),\n});\n\nexport const AggregateDecisionVocabBarSchema = z.object({\n architecture: ArchitectureSchema,\n required: z.number().int().positive(),\n median_produced: z.number(), // float — median of per-run decisions_produced_count\n passed: z.boolean(),\n});\n\nexport const AggregateCalibrationReceiptSchema = z.object({\n schema_version: z.literal(1),\n receipt_kind: z.literal('aggregate'), // discriminates from single-run receipt\n profile_name: z.string(),\n status: StatusLabelSchema,\n model: z.string(),\n architecture: ArchitectureSchema,\n fixture: z.string(),\n fixture_total_claims: z.number().int().positive(),\n fixture_good_claims: z.number().int().nonnegative(),\n fixture_bad_claims: z.number().int().nonnegative(),\n runs_count: z.number().int().min(2),\n run_files: z.array(z.string()), // relative paths: runs/run-001.json, etc.\n aggregated_at: z.string(), // ISO 8601\n research_os_version: z.string(),\n\n // Aggregate metrics — median + min + max + per-run values in run order\n good_fp_count: AggregateMetricSchema,\n any_flag_recall_ratio: AggregateMetricSchema,\n strict_recall_ratio: AggregateMetricSchema,\n decisions_produced_count: AggregateMetricSchema,\n runtime_ms: AggregateMetricSchema,\n empty_or_malformed_responses: AggregateMetricSchema,\n\n per_category_any_flag: PerCategoryAggregateSchema,\n per_category_strict: PerCategoryAggregateSchema,\n\n // Decision vocabulary — union of all decisions seen across runs, median count each\n decision_vocabulary: z.record(z.string(), AggregateMetricSchema),\n decision_vocab_bar: AggregateDecisionVocabBarSchema,\n unreachable_decisions: z.array(z.string()),\n\n pass_fail: AggregatePassFailSchema,\n // Bars that FAILed in >= ceil(runs_count/2) individual runs.\n // Non-empty list demotes trusted_baseline to conditional_pass.\n recurring_bar_failures: z.array(z.string()),\n\n notes: z.array(z.string()),\n});\n\nexport type AggregateMetric = z.infer<typeof AggregateMetricSchema>;\nexport type PerCategoryAggregateEntry = z.infer<typeof PerCategoryAggregateEntrySchema>;\nexport type PerCategoryAggregate = z.infer<typeof PerCategoryAggregateSchema>;\nexport type AggregatePassFail = z.infer<typeof AggregatePassFailSchema>;\nexport type AggregateDecisionVocabBar = z.infer<typeof AggregateDecisionVocabBarSchema>;\nexport type AggregateCalibrationReceipt = z.infer<typeof AggregateCalibrationReceiptSchema>;\n","import { z } from 'zod';\n\nexport const StatusLabelSchema = z.enum([\n 'trusted_baseline',\n 'conditional_pass',\n 'failed',\n 'comparison_only',\n]);\n\nexport const ArchitectureSchema = z.enum(['single-pass', 'two-pass']);\n\nexport const RecallSchema = z.object({\n matched: z.number().int().nonnegative(),\n total: z.number().int().nonnegative(),\n ratio: z.number().min(0).max(1),\n});\n\nexport const PerCategoryRecallSchema = z.record(z.string(), RecallSchema);\n\nexport const PassFailSchema = z.object({\n fp_ceiling: z.enum(['PASS', 'FAIL']),\n any_flag_recall_floor: z.enum(['PASS', 'FAIL']),\n per_category_any_flag_floor: z.enum(['PASS', 'FAIL']),\n strict_recall_floor: z.enum(['PASS', 'FAIL']),\n decision_vocab_completeness: z.enum(['PASS', 'FAIL']),\n latency_soft: z.enum(['PASS', 'WARN']),\n latency_hard: z.enum(['PASS', 'FAIL']),\n empty_or_malformed: z.enum(['PASS', 'FAIL']),\n overall: z.enum(['PASS', 'FAIL']),\n});\n\nexport const DecisionVocabBarSchema = z.object({\n architecture: ArchitectureSchema,\n required: z.number().int().positive(),\n produced: z.number().int().nonnegative(),\n passed: z.boolean(),\n});\n\nexport const CalibrationReceiptSchema = z.object({\n schema_version: z.literal(1),\n profile_name: z.string(),\n status: StatusLabelSchema,\n model: z.string(),\n architecture: ArchitectureSchema,\n fixture: z.string(),\n fixture_total_claims: z.number().int().positive(),\n fixture_good_claims: z.number().int().nonnegative(),\n fixture_bad_claims: z.number().int().nonnegative(),\n calibrated_at: z.string(),\n research_os_version: z.string(),\n runtime_ms: z.number().int().nonnegative(),\n good_fp_count: z.number().int().nonnegative(),\n any_flag_recall: RecallSchema,\n strict_recall: RecallSchema,\n per_category_any_flag: PerCategoryRecallSchema,\n per_category_strict: PerCategoryRecallSchema,\n decision_vocabulary: z.record(z.string(), z.number().int().nonnegative()),\n decisions_produced_count: z.number().int().nonnegative(),\n decision_vocab_bar: DecisionVocabBarSchema,\n unreachable_decisions: z.array(z.string()),\n empty_or_malformed_responses: z.number().int().nonnegative(),\n pass_fail: PassFailSchema,\n notes: z.array(z.string()),\n});\n\nexport type StatusLabel = z.infer<typeof StatusLabelSchema>;\nexport type Architecture = z.infer<typeof ArchitectureSchema>;\nexport type Recall = z.infer<typeof RecallSchema>;\nexport type PerCategoryRecall = z.infer<typeof PerCategoryRecallSchema>;\nexport type PassFail = z.infer<typeof PassFailSchema>;\nexport type DecisionVocabBar = z.infer<typeof DecisionVocabBarSchema>;\nexport type CalibrationReceipt = z.infer<typeof CalibrationReceiptSchema>;\n","import type { Architecture, CalibrationReceipt, PassFail, PerCategoryRecall } from './receipt-schema.js';\nimport {\n AggregateCalibrationReceiptSchema,\n type AggregateCalibrationReceipt,\n type AggregateMetric,\n type AggregatePassFail,\n type PerCategoryAggregate,\n} from './aggregate-receipt-schema.js';\nimport type { StatusLabel } from './receipt-schema.js';\n\n// Compute median of a sorted or unsorted array.\n// Throws on empty input — callers always have at least one run.\n// For even-length arrays: mean of two middle values (float, not rounded).\n// Integer-valued metrics (FP count, decisions) stay as floats here;\n// the caller's bar comparisons (>= 3, === 0) work correctly on exact floats\n// because the inputs are small integers.\nexport function median(values: number[]): number {\n if (values.length === 0) throw new Error('median: empty array');\n const sorted = [...values].sort((a, b) => a - b);\n const mid = Math.floor(sorted.length / 2);\n if (sorted.length % 2 === 1) return sorted[mid];\n return (sorted[mid - 1] + sorted[mid]) / 2;\n}\n\n// Aggregate a list of per-run scalar values into { median, min, max, values }.\n// values preserves input order (run-001, run-002, ...) for traceability.\nexport function aggregateMetric(values: number[]): AggregateMetric {\n const m = median(values);\n return {\n median: m,\n min: Math.min(...values),\n max: Math.max(...values),\n values,\n };\n}\n\n// Aggregate per-run per-category recall objects.\n// Each element of perRunBuckets is one run's PerCategoryRecall\n// (Record<category, { matched, total, ratio }>).\n// Returns PerCategoryAggregate: per-category median/min/max ratio + per-run ratios.\n// total is taken from the first run that has the category (same across runs —\n// SEEDS is static so category totals never change between runs).\nexport function aggregatePerCategoryRecall(\n perRunBuckets: PerCategoryRecall[],\n): PerCategoryAggregate {\n const cats = new Set<string>();\n for (const run of perRunBuckets) {\n for (const cat of Object.keys(run)) cats.add(cat);\n }\n\n const result: PerCategoryAggregate = {};\n for (const cat of cats) {\n const ratios = perRunBuckets.map((run) => run[cat]?.ratio ?? 0);\n const total = perRunBuckets.find((run) => run[cat] !== undefined)?.[cat]?.total ?? 0;\n result[cat] = {\n median_ratio: median(ratios),\n min_ratio: Math.min(...ratios),\n max_ratio: Math.max(...ratios),\n total,\n per_run_ratios: ratios,\n };\n }\n return result;\n}\n\n// Aggregate per-run decision vocabulary count dicts.\n// Each element is one run's decision_vocabulary (Record<decision, count>).\n// Returns Record<decision, AggregateMetric> with median count per decision.\nexport function aggregateDecisionVocabulary(\n perRunDicts: Record<string, number>[],\n): Record<string, AggregateMetric> {\n const decisions = new Set<string>();\n for (const run of perRunDicts) {\n for (const d of Object.keys(run)) decisions.add(d);\n }\n\n const result: Record<string, AggregateMetric> = {};\n for (const d of decisions) {\n const values = perRunDicts.map((run) => run[d] ?? 0);\n result[d] = aggregateMetric(values);\n }\n return result;\n}\n\n// Compute aggregate PASS/FAIL bars from aggregated metrics.\n//\n// Advisor-locked rules (gospel):\n// FP ceiling: median <= 1 AND max <= 2\n// Any-flag recall: median >= 0.65\n// Per-category: median_ratio >= 0.50 for categories with total >= 2\n// Strict recall: median >= 0.20\n// Decision vocab: median >= required (architecture-aware: two-pass=3, single-pass=4)\n// Latency soft: median <= 600_000 → WARN only, never FAIL\n// Latency hard: every-run rule — max <= 1_200_000\n// Empty/malformed: every-run rule — max === 0\nexport function computeAggregatePassFail(input: {\n good_fp_count: AggregateMetric;\n any_flag_recall_ratio: AggregateMetric;\n per_category_any_flag: PerCategoryAggregate;\n strict_recall_ratio: AggregateMetric;\n decisions_produced_count: AggregateMetric;\n architecture: Architecture;\n runtime_ms: AggregateMetric;\n empty_or_malformed_responses: AggregateMetric;\n}): AggregatePassFail {\n const fp_ceiling: 'PASS' | 'FAIL' =\n input.good_fp_count.median <= 1 && input.good_fp_count.max <= 2 ? 'PASS' : 'FAIL';\n\n const any_flag_recall_floor: 'PASS' | 'FAIL' =\n input.any_flag_recall_ratio.median >= 0.65 ? 'PASS' : 'FAIL';\n\n let per_category_any_flag_floor: 'PASS' | 'FAIL' = 'PASS';\n for (const entry of Object.values(input.per_category_any_flag)) {\n if (entry.total >= 2 && entry.median_ratio < 0.5) {\n per_category_any_flag_floor = 'FAIL';\n break;\n }\n }\n\n const strict_recall_floor: 'PASS' | 'FAIL' =\n input.strict_recall_ratio.median >= 0.2 ? 'PASS' : 'FAIL';\n\n const dvRequired = input.architecture === 'two-pass' ? 3 : 4;\n const decision_vocab_completeness: 'PASS' | 'FAIL' =\n input.decisions_produced_count.median >= dvRequired ? 'PASS' : 'FAIL';\n\n // Latency soft: WARN-only signal — no FAIL contribution\n const latency_soft: 'PASS' | 'WARN' =\n input.runtime_ms.median <= 600_000 ? 'PASS' : 'WARN';\n\n // Latency hard: every-run rule — enforced via max\n const latency_hard: 'PASS' | 'FAIL' =\n input.runtime_ms.max <= 1_200_000 ? 'PASS' : 'FAIL';\n\n // Empty/malformed: every-run rule — enforced via max\n const empty_or_malformed: 'PASS' | 'FAIL' =\n input.empty_or_malformed_responses.max === 0 ? 'PASS' : 'FAIL';\n\n const hardBars: ('PASS' | 'FAIL')[] = [\n fp_ceiling,\n any_flag_recall_floor,\n per_category_any_flag_floor,\n strict_recall_floor,\n decision_vocab_completeness,\n latency_hard,\n empty_or_malformed,\n ];\n const overall: 'PASS' | 'FAIL' = hardBars.every((v) => v === 'PASS') ? 'PASS' : 'FAIL';\n\n return {\n fp_ceiling,\n any_flag_recall_floor,\n per_category_any_flag_floor,\n strict_recall_floor,\n decision_vocab_completeness,\n latency_soft,\n latency_hard,\n empty_or_malformed,\n overall,\n };\n}\n\n// Compute which hard bars FAILed in >= ceil(N/2) individual runs.\n// A non-empty result means that bar was SYSTEMATICALLY unreliable —\n// not just a one-run outlier that happened to median-pass.\n// This is used by computeAggregateStatusLabel to prevent a profile from\n// earning trusted_baseline when one bar failed in the majority of runs.\n//\n// Hard bars checked (latency_soft and overall are excluded):\n// fp_ceiling, any_flag_recall_floor, per_category_any_flag_floor,\n// strict_recall_floor, decision_vocab_completeness, latency_hard, empty_or_malformed\nexport function computeRecurringBarFailures(\n perRunPassFails: PassFail[],\n totalRuns: number,\n): string[] {\n const threshold = Math.ceil(totalRuns / 2);\n const HARD_BARS: (keyof PassFail)[] = [\n 'fp_ceiling',\n 'any_flag_recall_floor',\n 'per_category_any_flag_floor',\n 'strict_recall_floor',\n 'decision_vocab_completeness',\n 'latency_hard',\n 'empty_or_malformed',\n ];\n\n const recurring: string[] = [];\n for (const bar of HARD_BARS) {\n const failCount = perRunPassFails.filter((pf) => pf[bar] === 'FAIL').length;\n if (failCount >= threshold) recurring.push(bar);\n }\n return recurring;\n}\n\n// Assign aggregate status label.\n//\n// Advisor-locked predicates (priority order):\n// 1. comparison_only — explicit mode flag OR single-pass Hermes (regardless of pass/fail)\n// 2. failed — aggregate pass_fail.overall === FAIL\n// 3. trusted_baseline — Hermes two-pass AND aggregate PASS AND median(FP) === 0\n// AND recurring_bar_failures.length === 0\n// The recurring-failure check prevents a profile from earning trusted_baseline\n// when any hard bar FAILed in >= ceil(N/2) runs even if the median still passed.\n// Intent: \"one lucky median cannot mask systemic bar weakness.\"\n// 4. conditional_pass — fallthrough (passes but doesn't earn trusted_baseline)\n// Mistral two-pass is capped at conditional_pass regardless of aggregate result.\nexport function computeAggregateStatusLabel(input: {\n profileName: string;\n architecture: Architecture;\n aggregatePassFail: AggregatePassFail;\n medianGoodFpCount: number;\n recurringBarFailures: string[];\n modeOverride?: 'comparison_only';\n}): StatusLabel {\n if (input.modeOverride === 'comparison_only') return 'comparison_only';\n\n if (input.architecture === 'single-pass' && /hermes/i.test(input.profileName)) {\n return 'comparison_only';\n }\n\n if (input.aggregatePassFail.overall === 'FAIL') return 'failed';\n\n const isHermesTwoPass =\n /hermes/i.test(input.profileName) && input.architecture === 'two-pass';\n if (\n isHermesTwoPass &&\n input.medianGoodFpCount === 0 &&\n input.recurringBarFailures.length === 0\n ) {\n return 'trusted_baseline';\n }\n\n return 'conditional_pass';\n}\n\n// Aggregate N single-run receipts into one AggregateCalibrationReceipt.\n// All receipts must be from the same profile/model/architecture.\n// opts.runFiles: relative paths for each run (e.g. 'runs/run-001.json').\n// opts.modeOverride: forward 'comparison_only' to status-label predicate.\n// opts.aggregatedAt: ISO timestamp (defaults to now).\nexport function aggregateReceipts(\n runs: CalibrationReceipt[],\n opts: {\n runFiles: string[];\n modeOverride?: 'comparison_only';\n aggregatedAt?: string;\n },\n): AggregateCalibrationReceipt {\n if (runs.length === 0) throw new Error('aggregateReceipts: no runs provided');\n const first = runs[0];\n\n const fpMetric = aggregateMetric(runs.map((r) => r.good_fp_count));\n const anyFlagRatioMetric = aggregateMetric(runs.map((r) => r.any_flag_recall.ratio));\n const strictRatioMetric = aggregateMetric(runs.map((r) => r.strict_recall.ratio));\n const decisionsMetric = aggregateMetric(runs.map((r) => r.decisions_produced_count));\n const runtimeMetric = aggregateMetric(runs.map((r) => r.runtime_ms));\n const emptyOrMalformedMetric = aggregateMetric(\n runs.map((r) => r.empty_or_malformed_responses),\n );\n\n const perCatAnyFlag = aggregatePerCategoryRecall(runs.map((r) => r.per_category_any_flag));\n const perCatStrict = aggregatePerCategoryRecall(runs.map((r) => r.per_category_strict));\n const decisionVocab = aggregateDecisionVocabulary(runs.map((r) => r.decision_vocabulary));\n\n const dvRequired = first.architecture === 'two-pass' ? 3 : 4;\n const decisionVocabBar = {\n architecture: first.architecture,\n required: dvRequired,\n median_produced: decisionsMetric.median,\n passed: decisionsMetric.median >= dvRequired,\n };\n\n const aggregatePassFail = computeAggregatePassFail({\n good_fp_count: fpMetric,\n any_flag_recall_ratio: anyFlagRatioMetric,\n per_category_any_flag: perCatAnyFlag,\n strict_recall_ratio: strictRatioMetric,\n decisions_produced_count: decisionsMetric,\n architecture: first.architecture,\n runtime_ms: runtimeMetric,\n empty_or_malformed_responses: emptyOrMalformedMetric,\n });\n\n const recurringBarFailures = computeRecurringBarFailures(\n runs.map((r) => r.pass_fail),\n runs.length,\n );\n\n const status = computeAggregateStatusLabel({\n profileName: first.profile_name,\n architecture: first.architecture,\n aggregatePassFail,\n medianGoodFpCount: fpMetric.median,\n recurringBarFailures,\n modeOverride: opts.modeOverride,\n });\n\n const notes: string[] = [];\n if (aggregatePassFail.latency_soft === 'WARN') {\n notes.push(\n `Latency warning: median ${(runtimeMetric.median / 1000).toFixed(1)}s exceeds soft limit of 600s`,\n );\n }\n if (fpMetric.median > 0) {\n notes.push(`FP at ceiling: median ${fpMetric.median} false positive(s) on good claims`);\n }\n if (recurringBarFailures.length > 0) {\n notes.push(`Recurring bar failures (>= ceil(N/2) runs): ${recurringBarFailures.join(', ')}`);\n }\n if (status === 'comparison_only') {\n notes.push(\n 'comparison_only: architectural side-run, not a production admission candidate',\n );\n }\n if (status === 'conditional_pass') {\n notes.push('conditional_pass: passes all bars but carries a production caution');\n }\n\n return AggregateCalibrationReceiptSchema.parse({\n schema_version: 1,\n receipt_kind: 'aggregate',\n profile_name: first.profile_name,\n status,\n model: first.model,\n architecture: first.architecture,\n fixture: first.fixture,\n fixture_total_claims: first.fixture_total_claims,\n fixture_good_claims: first.fixture_good_claims,\n fixture_bad_claims: first.fixture_bad_claims,\n runs_count: runs.length,\n run_files: opts.runFiles,\n aggregated_at: opts.aggregatedAt ?? new Date().toISOString(),\n research_os_version: first.research_os_version,\n good_fp_count: fpMetric,\n any_flag_recall_ratio: anyFlagRatioMetric,\n strict_recall_ratio: strictRatioMetric,\n decisions_produced_count: decisionsMetric,\n runtime_ms: runtimeMetric,\n empty_or_malformed_responses: emptyOrMalformedMetric,\n per_category_any_flag: perCatAnyFlag,\n per_category_strict: perCatStrict,\n decision_vocabulary: decisionVocab,\n decision_vocab_bar: decisionVocabBar,\n unreachable_decisions: first.unreachable_decisions,\n pass_fail: aggregatePassFail,\n recurring_bar_failures: recurringBarFailures,\n notes,\n });\n}\n\n// Render the aggregate calibration receipt as compact Markdown.\n// Operator proof artifact — no prose.\nexport function buildAggregateReceiptMarkdown(r: AggregateCalibrationReceipt): string {\n const pct = (ratio: number) => `${Math.round(ratio * 100)}%`;\n const secRounded = (ms: number) => `${(ms / 1000).toFixed(1)}s`;\n\n const af = r.any_flag_recall_ratio;\n const sr = r.strict_recall_ratio;\n const fp = r.good_fp_count;\n const dec = r.decisions_produced_count;\n const rt = r.runtime_ms;\n const pf = r.pass_fail;\n const bar = r.decision_vocab_bar;\n\n const runFileList =\n r.run_files.length > 0\n ? `${r.run_files[0]} … ${r.run_files[r.run_files.length - 1]}`\n : '(none)';\n\n const perCatAnyFlagRows = Object.entries(r.per_category_any_flag)\n .map(([cat, entry]) => {\n const st = r.per_category_strict[cat];\n return (\n `| ${cat} | ${pct(entry.median_ratio)} | ${pct(entry.min_ratio)}–${pct(entry.max_ratio)} | ${entry.total} |` +\n (st\n ? ` ${pct(st.median_ratio)} | ${pct(st.min_ratio)}–${pct(st.max_ratio)} |`\n : ' — | — |')\n );\n })\n .join('\\n');\n\n const ALL_DECISIONS = [\n 'accepted_for_synthesis',\n 'rejected',\n 'needs_scope_repair',\n 'needs_source_repair',\n 'needs_contradiction_mapping',\n 'needs_human_review',\n ];\n const dvRows = ALL_DECISIONS.map((d) => {\n const metric = r.decision_vocabulary[d];\n const unreachable = r.unreachable_decisions.includes(d)\n ? ` (unreachable from ${r.fixture})`\n : '';\n if (!metric) return `| ${d} | — | — |${unreachable}`;\n return `| ${d} | ${metric.median.toFixed(1)} | ${metric.min}–${metric.max}${unreachable} |`;\n }).join('\\n');\n\n // Per-run summary table — pulled from run_files labels for clarity\n const perRunRows = r.any_flag_recall_ratio.values\n .map((afr, i) => {\n const fp_i = r.good_fp_count.values[i] ?? '?';\n const sr_i = r.strict_recall_ratio.values[i] ?? '?';\n const dec_i = r.decisions_produced_count.values[i] ?? '?';\n const rt_i = r.runtime_ms.values[i] ?? '?';\n return `| ${i + 1} | ${fp_i}/${r.fixture_good_claims} | ${typeof afr === 'number' ? pct(afr) : '?'} | ${typeof sr_i === 'number' ? pct(sr_i) : '?'} | ${dec_i}/6 | ${typeof rt_i === 'number' ? secRounded(rt_i) : '?'} |`;\n })\n .join('\\n');\n\n const recurringSection =\n r.recurring_bar_failures.length > 0\n ? r.recurring_bar_failures.map((b) => `- ${b}`).join('\\n')\n : 'None.';\n\n const notesSection =\n r.notes.length > 0 ? `\\n## Notes\\n\\n${r.notes.map((n) => `- ${n}`).join('\\n')}\\n` : '';\n\n return `# Calibration Receipt — ${r.profile_name} (aggregate, N=${r.runs_count} runs)\n\n- **Model:** ${r.model}\n- **Architecture:** ${r.architecture}\n- **Status:** ${r.status}\n- **Fixture:** ${r.fixture} (${r.fixture_total_claims} claims = ${r.fixture_good_claims} good + ${r.fixture_bad_claims} bad)\n- **Aggregated at:** ${r.aggregated_at}\n- **Research-OS version:** ${r.research_os_version}\n- **Run count:** ${r.runs_count}\n- **Run files:** ${runFileList}\n\n## Headline metrics (median across runs)\n\n- FP: median ${fp.median} / ${r.fixture_good_claims} (range ${fp.min}–${fp.max})\n- Any-flag recall: median ${pct(af.median)} (range ${pct(af.min)}–${pct(af.max)})\n- Strict recall: median ${pct(sr.median)} (range ${pct(sr.min)}–${pct(sr.max)})\n- Decisions produced: median ${dec.median} / 6 (range ${dec.min}–${dec.max})\n\n## PASS / FAIL (aggregate)\n\n| Bar | Rule | Result |\n|---|---|---|\n| FP ceiling | median=${fp.median}, max=${fp.max} (median ≤1 AND max ≤2) | ${pf.fp_ceiling} |\n| Any-flag recall | median=${pct(af.median)} (≥65%) | ${pf.any_flag_recall_floor} |\n| Per-category any-flag | median ≥50% per cat (see below) | ${pf.per_category_any_flag_floor} |\n| Strict recall | median=${pct(sr.median)} (≥20%) | ${pf.strict_recall_floor} |\n| Decision vocab | median=${dec.median} / 6 (${bar.architecture} ≥${bar.required}) | ${pf.decision_vocab_completeness} |\n| Latency soft | median=${secRounded(rt.median)} (≤600s, WARN only) | ${pf.latency_soft} |\n| Latency hard | max=${secRounded(rt.max)} (every run ≤1200s) | ${pf.latency_hard} |\n| Empty/malformed | max=${r.empty_or_malformed_responses.max} (every run =0) | ${pf.empty_or_malformed} |\n| **OVERALL** | | **${pf.overall}** |\n\n## Recurring hard-bar failures\n\n${recurringSection}\n\n## Per-category recall (median across runs)\n\n| Category | Any-flag median | Any-flag range | Total | Strict median | Strict range |\n|---|---|---|---|---|---|\n${perCatAnyFlagRows}\n\n## Decision vocabulary (median count across runs)\n\n| Decision | Median | Range |\n|---|---|---|\n${dvRows}\n\n## Per-run summary\n\n| Run | FP | Any-flag | Strict | Decisions | Runtime |\n|---|---|---|---|---|---|\n${perRunRows}\n${notesSection}`;\n}\n"],"mappings":";AAAA,SAAS,KAAAA,UAAS;;;ACAlB,SAAS,SAAS;AAEX,IAAM,oBAAoB,EAAE,KAAK;AAAA,EACtC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,IAAM,qBAAqB,EAAE,KAAK,CAAC,eAAe,UAAU,CAAC;AAE7D,IAAM,eAAe,EAAE,OAAO;AAAA,EACnC,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACtC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACpC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAChC,CAAC;AAEM,IAAM,0BAA0B,EAAE,OAAO,EAAE,OAAO,GAAG,YAAY;AAEjE,IAAM,iBAAiB,EAAE,OAAO;AAAA,EACrC,YAAY,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACnC,uBAAuB,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC9C,6BAA6B,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,qBAAqB,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC5C,6BAA6B,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,cAAc,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,cAAc,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,oBAAoB,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC3C,SAAS,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAClC,CAAC;AAEM,IAAM,yBAAyB,EAAE,OAAO;AAAA,EAC7C,cAAc;AAAA,EACd,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACpC,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACvC,QAAQ,EAAE,QAAQ;AACpB,CAAC;AAEM,IAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,gBAAgB,EAAE,QAAQ,CAAC;AAAA,EAC3B,cAAc,EAAE,OAAO;AAAA,EACvB,QAAQ;AAAA,EACR,OAAO,EAAE,OAAO;AAAA,EAChB,cAAc;AAAA,EACd,SAAS,EAAE,OAAO;AAAA,EAClB,sBAAsB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAChD,qBAAqB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAClD,oBAAoB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACjD,eAAe,EAAE,OAAO;AAAA,EACxB,qBAAqB,EAAE,OAAO;AAAA,EAC9B,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACzC,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAC5C,iBAAiB;AAAA,EACjB,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,qBAAqB;AAAA,EACrB,qBAAqB,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,CAAC;AAAA,EACxE,0BAA0B,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACvD,oBAAoB;AAAA,EACpB,uBAAuB,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EACzC,8BAA8B,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAC3D,WAAW;AAAA,EACX,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC;AAC3B,CAAC;;;AD5DM,IAAM,wBAAwBC,GAAE,OAAO;AAAA,EAC5C,QAAQA,GAAE,OAAO;AAAA,EACjB,KAAKA,GAAE,OAAO;AAAA,EACd,KAAKA,GAAE,OAAO;AAAA,EACd,QAAQA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA;AAC5B,CAAC;AAEM,IAAM,kCAAkCA,GAAE,OAAO;AAAA,EACtD,cAAcA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EACrC,WAAWA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAClC,WAAWA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAClC,OAAOA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA;AAAA,EACpC,gBAAgBA,GAAE,MAAMA,GAAE,OAAO,CAAC;AACpC,CAAC;AAEM,IAAM,6BAA6BA,GAAE,OAAOA,GAAE,OAAO,GAAG,+BAA+B;AAEvF,IAAM,0BAA0BA,GAAE,OAAO;AAAA,EAC9C,YAAYA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACnC,uBAAuBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC9C,6BAA6BA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,qBAAqBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC5C,6BAA6BA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,cAAcA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,cAAcA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,oBAAoBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC3C,SAASA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAClC,CAAC;AAEM,IAAM,kCAAkCA,GAAE,OAAO;AAAA,EACtD,cAAc;AAAA,EACd,UAAUA,GAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACpC,iBAAiBA,GAAE,OAAO;AAAA;AAAA,EAC1B,QAAQA,GAAE,QAAQ;AACpB,CAAC;AAEM,IAAM,oCAAoCA,GAAE,OAAO;AAAA,EACxD,gBAAgBA,GAAE,QAAQ,CAAC;AAAA,EAC3B,cAAcA,GAAE,QAAQ,WAAW;AAAA;AAAA,EACnC,cAAcA,GAAE,OAAO;AAAA,EACvB,QAAQ;AAAA,EACR,OAAOA,GAAE,OAAO;AAAA,EAChB,cAAc;AAAA,EACd,SAASA,GAAE,OAAO;AAAA,EAClB,sBAAsBA,GAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAChD,qBAAqBA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAClD,oBAAoBA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACjD,YAAYA,GAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC;AAAA,EAClC,WAAWA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA;AAAA,EAC7B,eAAeA,GAAE,OAAO;AAAA;AAAA,EACxB,qBAAqBA,GAAE,OAAO;AAAA;AAAA,EAG9B,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,qBAAqB;AAAA,EACrB,0BAA0B;AAAA,EAC1B,YAAY;AAAA,EACZ,8BAA8B;AAAA,EAE9B,uBAAuB;AAAA,EACvB,qBAAqB;AAAA;AAAA,EAGrB,qBAAqBA,GAAE,OAAOA,GAAE,OAAO,GAAG,qBAAqB;AAAA,EAC/D,oBAAoB;AAAA,EACpB,uBAAuBA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA,EAEzC,WAAW;AAAA;AAAA;AAAA,EAGX,wBAAwBA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA,EAE1C,OAAOA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAC3B,CAAC;;;AE7DM,SAAS,OAAO,QAA0B;AAC/C,MAAI,OAAO,WAAW,EAAG,OAAM,IAAI,MAAM,qBAAqB;AAC9D,QAAM,SAAS,CAAC,GAAG,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC/C,QAAM,MAAM,KAAK,MAAM,OAAO,SAAS,CAAC;AACxC,MAAI,OAAO,SAAS,MAAM,EAAG,QAAO,OAAO,GAAG;AAC9C,UAAQ,OAAO,MAAM,CAAC,IAAI,OAAO,GAAG,KAAK;AAC3C;AAIO,SAAS,gBAAgB,QAAmC;AACjE,QAAM,IAAI,OAAO,MAAM;AACvB,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,KAAK,KAAK,IAAI,GAAG,MAAM;AAAA,IACvB,KAAK,KAAK,IAAI,GAAG,MAAM;AAAA,IACvB;AAAA,EACF;AACF;AAQO,SAAS,2BACd,eACsB;AACtB,QAAM,OAAO,oBAAI,IAAY;AAC7B,aAAW,OAAO,eAAe;AAC/B,eAAW,OAAO,OAAO,KAAK,GAAG,EAAG,MAAK,IAAI,GAAG;AAAA,EAClD;AAEA,QAAM,SAA+B,CAAC;AACtC,aAAW,OAAO,MAAM;AACtB,UAAM,SAAS,cAAc,IAAI,CAAC,QAAQ,IAAI,GAAG,GAAG,SAAS,CAAC;AAC9D,UAAM,QAAQ,cAAc,KAAK,CAAC,QAAQ,IAAI,GAAG,MAAM,MAAS,IAAI,GAAG,GAAG,SAAS;AACnF,WAAO,GAAG,IAAI;AAAA,MACZ,cAAc,OAAO,MAAM;AAAA,MAC3B,WAAW,KAAK,IAAI,GAAG,MAAM;AAAA,MAC7B,WAAW,KAAK,IAAI,GAAG,MAAM;AAAA,MAC7B;AAAA,MACA,gBAAgB;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAKO,SAAS,4BACd,aACiC;AACjC,QAAM,YAAY,oBAAI,IAAY;AAClC,aAAW,OAAO,aAAa;AAC7B,eAAW,KAAK,OAAO,KAAK,GAAG,EAAG,WAAU,IAAI,CAAC;AAAA,EACnD;AAEA,QAAM,SAA0C,CAAC;AACjD,aAAW,KAAK,WAAW;AACzB,UAAM,SAAS,YAAY,IAAI,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC;AACnD,WAAO,CAAC,IAAI,gBAAgB,MAAM;AAAA,EACpC;AACA,SAAO;AACT;AAaO,SAAS,yBAAyB,OASnB;AACpB,QAAM,aACJ,MAAM,cAAc,UAAU,KAAK,MAAM,cAAc,OAAO,IAAI,SAAS;AAE7E,QAAM,wBACJ,MAAM,sBAAsB,UAAU,OAAO,SAAS;AAExD,MAAI,8BAA+C;AACnD,aAAW,SAAS,OAAO,OAAO,MAAM,qBAAqB,GAAG;AAC9D,QAAI,MAAM,SAAS,KAAK,MAAM,eAAe,KAAK;AAChD,oCAA8B;AAC9B;AAAA,IACF;AAAA,EACF;AAEA,QAAM,sBACJ,MAAM,oBAAoB,UAAU,MAAM,SAAS;AAErD,QAAM,aAAa,MAAM,iBAAiB,aAAa,IAAI;AAC3D,QAAM,8BACJ,MAAM,yBAAyB,UAAU,aAAa,SAAS;AAGjE,QAAM,eACJ,MAAM,WAAW,UAAU,MAAU,SAAS;AAGhD,QAAM,eACJ,MAAM,WAAW,OAAO,OAAY,SAAS;AAG/C,QAAM,qBACJ,MAAM,6BAA6B,QAAQ,IAAI,SAAS;AAE1D,QAAM,WAAgC;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,UAA2B,SAAS,MAAM,CAAC,MAAM,MAAM,MAAM,IAAI,SAAS;AAEhF,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAWO,SAAS,4BACd,iBACA,WACU;AACV,QAAM,YAAY,KAAK,KAAK,YAAY,CAAC;AACzC,QAAM,YAAgC;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,QAAM,YAAsB,CAAC;AAC7B,aAAW,OAAO,WAAW;AAC3B,UAAM,YAAY,gBAAgB,OAAO,CAAC,OAAO,GAAG,GAAG,MAAM,MAAM,EAAE;AACrE,QAAI,aAAa,UAAW,WAAU,KAAK,GAAG;AAAA,EAChD;AACA,SAAO;AACT;AAcO,SAAS,4BAA4B,OAO5B;AACd,MAAI,MAAM,iBAAiB,kBAAmB,QAAO;AAErD,MAAI,MAAM,iBAAiB,iBAAiB,UAAU,KAAK,MAAM,WAAW,GAAG;AAC7E,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,kBAAkB,YAAY,OAAQ,QAAO;AAEvD,QAAM,kBACJ,UAAU,KAAK,MAAM,WAAW,KAAK,MAAM,iBAAiB;AAC9D,MACE,mBACA,MAAM,sBAAsB,KAC5B,MAAM,qBAAqB,WAAW,GACtC;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAOO,SAAS,kBACd,MACA,MAK6B;AAC7B,MAAI,KAAK,WAAW,EAAG,OAAM,IAAI,MAAM,qCAAqC;AAC5E,QAAM,QAAQ,KAAK,CAAC;AAEpB,QAAM,WAAW,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,aAAa,CAAC;AACjE,QAAM,qBAAqB,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,gBAAgB,KAAK,CAAC;AACnF,QAAM,oBAAoB,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,cAAc,KAAK,CAAC;AAChF,QAAM,kBAAkB,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,wBAAwB,CAAC;AACnF,QAAM,gBAAgB,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,UAAU,CAAC;AACnE,QAAM,yBAAyB;AAAA,IAC7B,KAAK,IAAI,CAAC,MAAM,EAAE,4BAA4B;AAAA,EAChD;AAEA,QAAM,gBAAgB,2BAA2B,KAAK,IAAI,CAAC,MAAM,EAAE,qBAAqB,CAAC;AACzF,QAAM,eAAe,2BAA2B,KAAK,IAAI,CAAC,MAAM,EAAE,mBAAmB,CAAC;AACtF,QAAM,gBAAgB,4BAA4B,KAAK,IAAI,CAAC,MAAM,EAAE,mBAAmB,CAAC;AAExF,QAAM,aAAa,MAAM,iBAAiB,aAAa,IAAI;AAC3D,QAAM,mBAAmB;AAAA,IACvB,cAAc,MAAM;AAAA,IACpB,UAAU;AAAA,IACV,iBAAiB,gBAAgB;AAAA,IACjC,QAAQ,gBAAgB,UAAU;AAAA,EACpC;AAEA,QAAM,oBAAoB,yBAAyB;AAAA,IACjD,eAAe;AAAA,IACf,uBAAuB;AAAA,IACvB,uBAAuB;AAAA,IACvB,qBAAqB;AAAA,IACrB,0BAA0B;AAAA,IAC1B,cAAc,MAAM;AAAA,IACpB,YAAY;AAAA,IACZ,8BAA8B;AAAA,EAChC,CAAC;AAED,QAAM,uBAAuB;AAAA,IAC3B,KAAK,IAAI,CAAC,MAAM,EAAE,SAAS;AAAA,IAC3B,KAAK;AAAA,EACP;AAEA,QAAM,SAAS,4BAA4B;AAAA,IACzC,aAAa,MAAM;AAAA,IACnB,cAAc,MAAM;AAAA,IACpB;AAAA,IACA,mBAAmB,SAAS;AAAA,IAC5B;AAAA,IACA,cAAc,KAAK;AAAA,EACrB,CAAC;AAED,QAAM,QAAkB,CAAC;AACzB,MAAI,kBAAkB,iBAAiB,QAAQ;AAC7C,UAAM;AAAA,MACJ,4BAA4B,cAAc,SAAS,KAAM,QAAQ,CAAC,CAAC;AAAA,IACrE;AAAA,EACF;AACA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,yBAAyB,SAAS,MAAM,mCAAmC;AAAA,EACxF;AACA,MAAI,qBAAqB,SAAS,GAAG;AACnC,UAAM,KAAK,+CAA+C,qBAAqB,KAAK,IAAI,CAAC,EAAE;AAAA,EAC7F;AACA,MAAI,WAAW,mBAAmB;AAChC,UAAM;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AACA,MAAI,WAAW,oBAAoB;AACjC,UAAM,KAAK,oEAAoE;AAAA,EACjF;AAEA,SAAO,kCAAkC,MAAM;AAAA,IAC7C,gBAAgB;AAAA,IAChB,cAAc;AAAA,IACd,cAAc,MAAM;AAAA,IACpB;AAAA,IACA,OAAO,MAAM;AAAA,IACb,cAAc,MAAM;AAAA,IACpB,SAAS,MAAM;AAAA,IACf,sBAAsB,MAAM;AAAA,IAC5B,qBAAqB,MAAM;AAAA,IAC3B,oBAAoB,MAAM;AAAA,IAC1B,YAAY,KAAK;AAAA,IACjB,WAAW,KAAK;AAAA,IAChB,eAAe,KAAK,iBAAgB,oBAAI,KAAK,GAAE,YAAY;AAAA,IAC3D,qBAAqB,MAAM;AAAA,IAC3B,eAAe;AAAA,IACf,uBAAuB;AAAA,IACvB,qBAAqB;AAAA,IACrB,0BAA0B;AAAA,IAC1B,YAAY;AAAA,IACZ,8BAA8B;AAAA,IAC9B,uBAAuB;AAAA,IACvB,qBAAqB;AAAA,IACrB,qBAAqB;AAAA,IACrB,oBAAoB;AAAA,IACpB,uBAAuB,MAAM;AAAA,IAC7B,WAAW;AAAA,IACX,wBAAwB;AAAA,IACxB;AAAA,EACF,CAAC;AACH;AAIO,SAAS,8BAA8B,GAAwC;AACpF,QAAM,MAAM,CAAC,UAAkB,GAAG,KAAK,MAAM,QAAQ,GAAG,CAAC;AACzD,QAAM,aAAa,CAAC,OAAe,IAAI,KAAK,KAAM,QAAQ,CAAC,CAAC;AAE5D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AACb,QAAM,MAAM,EAAE;AACd,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AACb,QAAM,MAAM,EAAE;AAEd,QAAM,cACJ,EAAE,UAAU,SAAS,IACjB,GAAG,EAAE,UAAU,CAAC,CAAC,WAAM,EAAE,UAAU,EAAE,UAAU,SAAS,CAAC,CAAC,KAC1D;AAEN,QAAM,oBAAoB,OAAO,QAAQ,EAAE,qBAAqB,EAC7D,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM;AACrB,UAAM,KAAK,EAAE,oBAAoB,GAAG;AACpC,WACE,KAAK,GAAG,MAAM,IAAI,MAAM,YAAY,CAAC,MAAM,IAAI,MAAM,SAAS,CAAC,SAAI,IAAI,MAAM,SAAS,CAAC,MAAM,MAAM,KAAK,QACvG,KACG,IAAI,IAAI,GAAG,YAAY,CAAC,MAAM,IAAI,GAAG,SAAS,CAAC,SAAI,IAAI,GAAG,SAAS,CAAC,OACpE;AAAA,EAER,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,SAAS,cAAc,IAAI,CAAC,MAAM;AACtC,UAAM,SAAS,EAAE,oBAAoB,CAAC;AACtC,UAAM,cAAc,EAAE,sBAAsB,SAAS,CAAC,IAClD,sBAAsB,EAAE,OAAO,MAC/B;AACJ,QAAI,CAAC,OAAQ,QAAO,KAAK,CAAC,uBAAa,WAAW;AAClD,WAAO,KAAK,CAAC,MAAM,OAAO,OAAO,QAAQ,CAAC,CAAC,MAAM,OAAO,GAAG,SAAI,OAAO,GAAG,GAAG,WAAW;AAAA,EACzF,CAAC,EAAE,KAAK,IAAI;AAGZ,QAAM,aAAa,EAAE,sBAAsB,OACxC,IAAI,CAAC,KAAK,MAAM;AACf,UAAM,OAAO,EAAE,cAAc,OAAO,CAAC,KAAK;AAC1C,UAAM,OAAO,EAAE,oBAAoB,OAAO,CAAC,KAAK;AAChD,UAAM,QAAQ,EAAE,yBAAyB,OAAO,CAAC,KAAK;AACtD,UAAM,OAAO,EAAE,WAAW,OAAO,CAAC,KAAK;AACvC,WAAO,KAAK,IAAI,CAAC,MAAM,IAAI,IAAI,EAAE,mBAAmB,MAAM,OAAO,QAAQ,WAAW,IAAI,GAAG,IAAI,GAAG,MAAM,OAAO,SAAS,WAAW,IAAI,IAAI,IAAI,GAAG,MAAM,KAAK,QAAQ,OAAO,SAAS,WAAW,WAAW,IAAI,IAAI,GAAG;AAAA,EACxN,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,mBACJ,EAAE,uBAAuB,SAAS,IAC9B,EAAE,uBAAuB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACvD;AAEN,QAAM,eACJ,EAAE,MAAM,SAAS,IAAI;AAAA;AAAA;AAAA,EAAiB,EAAE,MAAM,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,IAAO;AAEtF,SAAO,gCAA2B,EAAE,YAAY,kBAAkB,EAAE,UAAU;AAAA;AAAA,eAEjE,EAAE,KAAK;AAAA,sBACA,EAAE,YAAY;AAAA,gBACpB,EAAE,MAAM;AAAA,iBACP,EAAE,OAAO,KAAK,EAAE,oBAAoB,aAAa,EAAE,mBAAmB,WAAW,EAAE,kBAAkB;AAAA,uBAC/F,EAAE,aAAa;AAAA,6BACT,EAAE,mBAAmB;AAAA,mBAC/B,EAAE,UAAU;AAAA,mBACZ,WAAW;AAAA;AAAA;AAAA;AAAA,eAIf,GAAG,MAAM,MAAM,EAAE,mBAAmB,WAAW,GAAG,GAAG,SAAI,GAAG,GAAG;AAAA,4BAClD,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,GAAG,GAAG,CAAC,SAAI,IAAI,GAAG,GAAG,CAAC;AAAA,0BACrD,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,GAAG,GAAG,CAAC,SAAI,IAAI,GAAG,GAAG,CAAC;AAAA,+BAC9C,IAAI,MAAM,eAAe,IAAI,GAAG,SAAI,IAAI,GAAG;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,wBAMlD,GAAG,MAAM,SAAS,GAAG,GAAG,uCAA6B,GAAG,UAAU;AAAA,6BAC7D,IAAI,GAAG,MAAM,CAAC,kBAAa,GAAG,qBAAqB;AAAA,mEAClB,GAAG,2BAA2B;AAAA,2BACjE,IAAI,GAAG,MAAM,CAAC,kBAAa,GAAG,mBAAmB;AAAA,4BAChD,IAAI,MAAM,SAAS,IAAI,YAAY,UAAK,IAAI,QAAQ,OAAO,GAAG,2BAA2B;AAAA,0BAC3F,WAAW,GAAG,MAAM,CAAC,8BAAyB,GAAG,YAAY;AAAA,uBAChE,WAAW,GAAG,GAAG,CAAC,8BAAyB,GAAG,YAAY;AAAA,0BACvD,EAAE,6BAA6B,GAAG,qBAAqB,GAAG,kBAAkB;AAAA,sBAChF,GAAG,OAAO;AAAA;AAAA;AAAA;AAAA,EAI9B,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMhB,iBAAiB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMjB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMN,UAAU;AAAA,EACV,YAAY;AACd;","names":["z","z"]}
|
|
1
|
+
{"version":3,"sources":["../../src/calibration/aggregate-receipt-schema.ts","../../src/calibration/receipt-schema.ts","../../src/review/reviewer-options-schema.ts","../../src/calibration/aggregate.ts"],"sourcesContent":["import { z } from 'zod';\nimport { StatusLabelSchema, ArchitectureSchema, ReviewerOptionsSchema } from './receipt-schema.js';\n\nexport const AggregateMetricSchema = z.object({\n median: z.number(),\n min: z.number(),\n max: z.number(),\n values: z.array(z.number()), // per-run values in run order (run-001, run-002, ...)\n});\n\nexport const PerCategoryAggregateEntrySchema = z.object({\n median_ratio: z.number().min(0).max(1),\n min_ratio: z.number().min(0).max(1),\n max_ratio: z.number().min(0).max(1),\n total: z.number().int().nonnegative(), // seed count — same across all runs\n per_run_ratios: z.array(z.number()),\n});\n\nexport const PerCategoryAggregateSchema = z.record(z.string(), PerCategoryAggregateEntrySchema);\n\nexport const AggregatePassFailSchema = z.object({\n fp_ceiling: z.enum(['PASS', 'FAIL']),\n any_flag_recall_floor: z.enum(['PASS', 'FAIL']),\n per_category_any_flag_floor: z.enum(['PASS', 'FAIL']),\n strict_recall_floor: z.enum(['PASS', 'FAIL']),\n decision_vocab_completeness: z.enum(['PASS', 'FAIL']),\n latency_soft: z.enum(['PASS', 'WARN']),\n latency_hard: z.enum(['PASS', 'FAIL']),\n empty_or_malformed: z.enum(['PASS', 'FAIL']),\n overall: z.enum(['PASS', 'FAIL']),\n});\n\nexport const AggregateDecisionVocabBarSchema = z.object({\n architecture: ArchitectureSchema,\n required: z.number().int().positive(),\n median_produced: z.number(), // float — median of per-run decisions_produced_count\n passed: z.boolean(),\n});\n\nexport const AggregateCalibrationReceiptSchema = z.object({\n schema_version: z.literal(1),\n receipt_kind: z.literal('aggregate'), // discriminates from single-run receipt\n profile_name: z.string(),\n status: StatusLabelSchema,\n model: z.string(),\n architecture: ArchitectureSchema,\n fixture: z.string(),\n fixture_total_claims: z.number().int().positive(),\n fixture_good_claims: z.number().int().nonnegative(),\n fixture_bad_claims: z.number().int().nonnegative(),\n runs_count: z.number().int().min(2),\n run_files: z.array(z.string()), // relative paths: runs/run-001.json, etc.\n aggregated_at: z.string(), // ISO 8601\n research_os_version: z.string(),\n\n // Aggregate metrics — median + min + max + per-run values in run order\n good_fp_count: AggregateMetricSchema,\n any_flag_recall_ratio: AggregateMetricSchema,\n strict_recall_ratio: AggregateMetricSchema,\n decisions_produced_count: AggregateMetricSchema,\n runtime_ms: AggregateMetricSchema,\n empty_or_malformed_responses: AggregateMetricSchema,\n\n per_category_any_flag: PerCategoryAggregateSchema,\n per_category_strict: PerCategoryAggregateSchema,\n\n // Decision vocabulary — union of all decisions seen across runs, median count each\n decision_vocabulary: z.record(z.string(), AggregateMetricSchema),\n decision_vocab_bar: AggregateDecisionVocabBarSchema,\n unreachable_decisions: z.array(z.string()),\n\n pass_fail: AggregatePassFailSchema,\n // Bars that FAILed in >= ceil(runs_count/2) individual runs.\n // Non-empty list demotes trusted_baseline to conditional_pass.\n recurring_bar_failures: z.array(z.string()),\n\n notes: z.array(z.string()),\n\n // schema_version: 1 — additive-optional (Exp6 Session 2):\n // Same options object stamped on every per-run receipt. Absent = stochastic run.\n reviewer_options: ReviewerOptionsSchema.optional(),\n});\n\nexport type AggregateMetric = z.infer<typeof AggregateMetricSchema>;\nexport type PerCategoryAggregateEntry = z.infer<typeof PerCategoryAggregateEntrySchema>;\nexport type PerCategoryAggregate = z.infer<typeof PerCategoryAggregateSchema>;\nexport type AggregatePassFail = z.infer<typeof AggregatePassFailSchema>;\nexport type AggregateDecisionVocabBar = z.infer<typeof AggregateDecisionVocabBarSchema>;\nexport type AggregateCalibrationReceipt = z.infer<typeof AggregateCalibrationReceiptSchema>;\n","import { z } from 'zod';\nimport { ReviewerOptionsSchema } from '../review/reviewer-options-schema.js';\nexport { ReviewerOptionsSchema };\nexport type { ReviewerOptions } from '../review/reviewer-options-schema.js';\n\nexport const StatusLabelSchema = z.enum([\n 'trusted_baseline',\n 'conditional_pass',\n 'failed',\n 'comparison_only',\n]);\n\nexport const ArchitectureSchema = z.enum(['single-pass', 'two-pass']);\n\nexport const RecallSchema = z.object({\n matched: z.number().int().nonnegative(),\n total: z.number().int().nonnegative(),\n ratio: z.number().min(0).max(1),\n});\n\nexport const PerCategoryRecallSchema = z.record(z.string(), RecallSchema);\n\nexport const PassFailSchema = z.object({\n fp_ceiling: z.enum(['PASS', 'FAIL']),\n any_flag_recall_floor: z.enum(['PASS', 'FAIL']),\n per_category_any_flag_floor: z.enum(['PASS', 'FAIL']),\n strict_recall_floor: z.enum(['PASS', 'FAIL']),\n decision_vocab_completeness: z.enum(['PASS', 'FAIL']),\n latency_soft: z.enum(['PASS', 'WARN']),\n latency_hard: z.enum(['PASS', 'FAIL']),\n empty_or_malformed: z.enum(['PASS', 'FAIL']),\n overall: z.enum(['PASS', 'FAIL']),\n});\n\nexport const DecisionVocabBarSchema = z.object({\n architecture: ArchitectureSchema,\n required: z.number().int().positive(),\n produced: z.number().int().nonnegative(),\n passed: z.boolean(),\n});\n\n// schema_version: 1 — additive-optional additions (Exp6 Session 2):\n// reviewer_options: optional sampling params used during this calibration run.\n// Absent = stochastic run (pre-v0.6 compat preserved). Present = keys explicitly set.\nexport const CalibrationReceiptSchema = z.object({\n schema_version: z.literal(1),\n profile_name: z.string(),\n status: StatusLabelSchema,\n model: z.string(),\n architecture: ArchitectureSchema,\n fixture: z.string(),\n fixture_total_claims: z.number().int().positive(),\n fixture_good_claims: z.number().int().nonnegative(),\n fixture_bad_claims: z.number().int().nonnegative(),\n calibrated_at: z.string(),\n research_os_version: z.string(),\n runtime_ms: z.number().int().nonnegative(),\n good_fp_count: z.number().int().nonnegative(),\n any_flag_recall: RecallSchema,\n strict_recall: RecallSchema,\n per_category_any_flag: PerCategoryRecallSchema,\n per_category_strict: PerCategoryRecallSchema,\n decision_vocabulary: z.record(z.string(), z.number().int().nonnegative()),\n decisions_produced_count: z.number().int().nonnegative(),\n decision_vocab_bar: DecisionVocabBarSchema,\n unreachable_decisions: z.array(z.string()),\n empty_or_malformed_responses: z.number().int().nonnegative(),\n pass_fail: PassFailSchema,\n notes: z.array(z.string()),\n reviewer_options: ReviewerOptionsSchema.optional(),\n});\n\nexport type StatusLabel = z.infer<typeof StatusLabelSchema>;\nexport type Architecture = z.infer<typeof ArchitectureSchema>;\nexport type Recall = z.infer<typeof RecallSchema>;\nexport type PerCategoryRecall = z.infer<typeof PerCategoryRecallSchema>;\nexport type PassFail = z.infer<typeof PassFailSchema>;\nexport type DecisionVocabBar = z.infer<typeof DecisionVocabBarSchema>;\nexport type CalibrationReceipt = z.infer<typeof CalibrationReceiptSchema>;\n","import { z } from 'zod';\n\n// Sampling parameters passed verbatim to the Ollama /api/chat `options` field.\n// Used by OllamaInternReviewer to control determinism. All fields optional —\n// omitted keys fall back to Ollama/model defaults. Introduced in Experiment 6\n// Session 2 to make reviewer conditions explicit in calibration receipts.\n//\n// LOAD-BEARING: temperature: 0 is valid and must not be dropped. All merges\n// in OllamaInternReviewer use `!== undefined` checks, NOT truthiness.\nexport const ReviewerOptionsSchema = z.object({\n num_ctx: z.number().int().positive().optional(),\n temperature: z.number().min(0).max(2).optional(),\n seed: z.number().int().optional(),\n top_p: z.number().min(0).max(1).optional(),\n top_k: z.number().int().nonnegative().optional(),\n repeat_penalty: z.number().min(0).optional(),\n});\n\nexport type ReviewerOptions = z.infer<typeof ReviewerOptionsSchema>;\n","import type { Architecture, CalibrationReceipt, PassFail, PerCategoryRecall, ReviewerOptions } from './receipt-schema.js';\nimport {\n AggregateCalibrationReceiptSchema,\n type AggregateCalibrationReceipt,\n type AggregateMetric,\n type AggregatePassFail,\n type PerCategoryAggregate,\n} from './aggregate-receipt-schema.js';\nimport type { StatusLabel } from './receipt-schema.js';\n\n// Compute median of a sorted or unsorted array.\n// Throws on empty input — callers always have at least one run.\n// For even-length arrays: mean of two middle values (float, not rounded).\n// Integer-valued metrics (FP count, decisions) stay as floats here;\n// the caller's bar comparisons (>= 3, === 0) work correctly on exact floats\n// because the inputs are small integers.\nexport function median(values: number[]): number {\n if (values.length === 0) throw new Error('median: empty array');\n const sorted = [...values].sort((a, b) => a - b);\n const mid = Math.floor(sorted.length / 2);\n if (sorted.length % 2 === 1) return sorted[mid];\n return (sorted[mid - 1] + sorted[mid]) / 2;\n}\n\n// Aggregate a list of per-run scalar values into { median, min, max, values }.\n// values preserves input order (run-001, run-002, ...) for traceability.\nexport function aggregateMetric(values: number[]): AggregateMetric {\n const m = median(values);\n return {\n median: m,\n min: Math.min(...values),\n max: Math.max(...values),\n values,\n };\n}\n\n// Aggregate per-run per-category recall objects.\n// Each element of perRunBuckets is one run's PerCategoryRecall\n// (Record<category, { matched, total, ratio }>).\n// Returns PerCategoryAggregate: per-category median/min/max ratio + per-run ratios.\n// total is taken from the first run that has the category (same across runs —\n// SEEDS is static so category totals never change between runs).\nexport function aggregatePerCategoryRecall(\n perRunBuckets: PerCategoryRecall[],\n): PerCategoryAggregate {\n const cats = new Set<string>();\n for (const run of perRunBuckets) {\n for (const cat of Object.keys(run)) cats.add(cat);\n }\n\n const result: PerCategoryAggregate = {};\n for (const cat of cats) {\n const ratios = perRunBuckets.map((run) => run[cat]?.ratio ?? 0);\n const total = perRunBuckets.find((run) => run[cat] !== undefined)?.[cat]?.total ?? 0;\n result[cat] = {\n median_ratio: median(ratios),\n min_ratio: Math.min(...ratios),\n max_ratio: Math.max(...ratios),\n total,\n per_run_ratios: ratios,\n };\n }\n return result;\n}\n\n// Aggregate per-run decision vocabulary count dicts.\n// Each element is one run's decision_vocabulary (Record<decision, count>).\n// Returns Record<decision, AggregateMetric> with median count per decision.\nexport function aggregateDecisionVocabulary(\n perRunDicts: Record<string, number>[],\n): Record<string, AggregateMetric> {\n const decisions = new Set<string>();\n for (const run of perRunDicts) {\n for (const d of Object.keys(run)) decisions.add(d);\n }\n\n const result: Record<string, AggregateMetric> = {};\n for (const d of decisions) {\n const values = perRunDicts.map((run) => run[d] ?? 0);\n result[d] = aggregateMetric(values);\n }\n return result;\n}\n\n// Compute aggregate PASS/FAIL bars from aggregated metrics.\n//\n// Advisor-locked rules (gospel):\n// FP ceiling: median <= 1 AND max <= 2\n// Any-flag recall: median >= 0.65\n// Per-category: median_ratio >= 0.50 for categories with total >= 2\n// Strict recall: median >= 0.20\n// Decision vocab: median >= required (architecture-aware: two-pass=3, single-pass=4)\n// Latency soft: median <= 600_000 → WARN only, never FAIL\n// Latency hard: every-run rule — max <= 1_200_000\n// Empty/malformed: every-run rule — max === 0\nexport function computeAggregatePassFail(input: {\n good_fp_count: AggregateMetric;\n any_flag_recall_ratio: AggregateMetric;\n per_category_any_flag: PerCategoryAggregate;\n strict_recall_ratio: AggregateMetric;\n decisions_produced_count: AggregateMetric;\n architecture: Architecture;\n runtime_ms: AggregateMetric;\n empty_or_malformed_responses: AggregateMetric;\n}): AggregatePassFail {\n const fp_ceiling: 'PASS' | 'FAIL' =\n input.good_fp_count.median <= 1 && input.good_fp_count.max <= 2 ? 'PASS' : 'FAIL';\n\n const any_flag_recall_floor: 'PASS' | 'FAIL' =\n input.any_flag_recall_ratio.median >= 0.65 ? 'PASS' : 'FAIL';\n\n let per_category_any_flag_floor: 'PASS' | 'FAIL' = 'PASS';\n for (const entry of Object.values(input.per_category_any_flag)) {\n if (entry.total >= 2 && entry.median_ratio < 0.5) {\n per_category_any_flag_floor = 'FAIL';\n break;\n }\n }\n\n const strict_recall_floor: 'PASS' | 'FAIL' =\n input.strict_recall_ratio.median >= 0.2 ? 'PASS' : 'FAIL';\n\n const dvRequired = input.architecture === 'two-pass' ? 3 : 4;\n const decision_vocab_completeness: 'PASS' | 'FAIL' =\n input.decisions_produced_count.median >= dvRequired ? 'PASS' : 'FAIL';\n\n // Latency soft: WARN-only signal — no FAIL contribution\n const latency_soft: 'PASS' | 'WARN' =\n input.runtime_ms.median <= 600_000 ? 'PASS' : 'WARN';\n\n // Latency hard: every-run rule — enforced via max\n const latency_hard: 'PASS' | 'FAIL' =\n input.runtime_ms.max <= 1_200_000 ? 'PASS' : 'FAIL';\n\n // Empty/malformed: every-run rule — enforced via max\n const empty_or_malformed: 'PASS' | 'FAIL' =\n input.empty_or_malformed_responses.max === 0 ? 'PASS' : 'FAIL';\n\n const hardBars: ('PASS' | 'FAIL')[] = [\n fp_ceiling,\n any_flag_recall_floor,\n per_category_any_flag_floor,\n strict_recall_floor,\n decision_vocab_completeness,\n latency_hard,\n empty_or_malformed,\n ];\n const overall: 'PASS' | 'FAIL' = hardBars.every((v) => v === 'PASS') ? 'PASS' : 'FAIL';\n\n return {\n fp_ceiling,\n any_flag_recall_floor,\n per_category_any_flag_floor,\n strict_recall_floor,\n decision_vocab_completeness,\n latency_soft,\n latency_hard,\n empty_or_malformed,\n overall,\n };\n}\n\n// Compute which hard bars FAILed in >= ceil(N/2) individual runs.\n// A non-empty result means that bar was SYSTEMATICALLY unreliable —\n// not just a one-run outlier that happened to median-pass.\n// This is used by computeAggregateStatusLabel to prevent a profile from\n// earning trusted_baseline when one bar failed in the majority of runs.\n//\n// Hard bars checked (latency_soft and overall are excluded):\n// fp_ceiling, any_flag_recall_floor, per_category_any_flag_floor,\n// strict_recall_floor, decision_vocab_completeness, latency_hard, empty_or_malformed\nexport function computeRecurringBarFailures(\n perRunPassFails: PassFail[],\n totalRuns: number,\n): string[] {\n const threshold = Math.ceil(totalRuns / 2);\n const HARD_BARS: (keyof PassFail)[] = [\n 'fp_ceiling',\n 'any_flag_recall_floor',\n 'per_category_any_flag_floor',\n 'strict_recall_floor',\n 'decision_vocab_completeness',\n 'latency_hard',\n 'empty_or_malformed',\n ];\n\n const recurring: string[] = [];\n for (const bar of HARD_BARS) {\n const failCount = perRunPassFails.filter((pf) => pf[bar] === 'FAIL').length;\n if (failCount >= threshold) recurring.push(bar);\n }\n return recurring;\n}\n\n// Assign aggregate status label.\n//\n// Advisor-locked predicates (priority order):\n// 1. comparison_only — explicit mode flag OR single-pass Hermes (regardless of pass/fail)\n// 2. failed — aggregate pass_fail.overall === FAIL\n// 3. trusted_baseline — Hermes two-pass AND aggregate PASS AND median(FP) === 0\n// AND recurring_bar_failures.length === 0\n// The recurring-failure check prevents a profile from earning trusted_baseline\n// when any hard bar FAILed in >= ceil(N/2) runs even if the median still passed.\n// Intent: \"one lucky median cannot mask systemic bar weakness.\"\n// 4. conditional_pass — fallthrough (passes but doesn't earn trusted_baseline)\n// Mistral two-pass is capped at conditional_pass regardless of aggregate result.\nexport function computeAggregateStatusLabel(input: {\n profileName: string;\n architecture: Architecture;\n aggregatePassFail: AggregatePassFail;\n medianGoodFpCount: number;\n recurringBarFailures: string[];\n modeOverride?: 'comparison_only';\n}): StatusLabel {\n if (input.modeOverride === 'comparison_only') return 'comparison_only';\n\n if (input.architecture === 'single-pass' && /hermes/i.test(input.profileName)) {\n return 'comparison_only';\n }\n\n if (input.aggregatePassFail.overall === 'FAIL') return 'failed';\n\n const isHermesTwoPass =\n /hermes/i.test(input.profileName) && input.architecture === 'two-pass';\n if (\n isHermesTwoPass &&\n input.medianGoodFpCount === 0 &&\n input.recurringBarFailures.length === 0\n ) {\n return 'trusted_baseline';\n }\n\n return 'conditional_pass';\n}\n\n// Aggregate N single-run receipts into one AggregateCalibrationReceipt.\n// All receipts must be from the same profile/model/architecture.\n// opts.runFiles: relative paths for each run (e.g. 'runs/run-001.json').\n// opts.modeOverride: forward 'comparison_only' to status-label predicate.\n// opts.aggregatedAt: ISO timestamp (defaults to now).\n// opts.reviewerOptions: reviewer sampling options stamped on each per-run receipt.\n// Captured once at harness startup and reused across all N runs. The aggregate\n// carries the same object so consumers can reproduce the exact invocation.\nexport function aggregateReceipts(\n runs: CalibrationReceipt[],\n opts: {\n runFiles: string[];\n modeOverride?: 'comparison_only';\n aggregatedAt?: string;\n reviewerOptions?: ReviewerOptions;\n },\n): AggregateCalibrationReceipt {\n if (runs.length === 0) throw new Error('aggregateReceipts: no runs provided');\n const first = runs[0];\n\n const fpMetric = aggregateMetric(runs.map((r) => r.good_fp_count));\n const anyFlagRatioMetric = aggregateMetric(runs.map((r) => r.any_flag_recall.ratio));\n const strictRatioMetric = aggregateMetric(runs.map((r) => r.strict_recall.ratio));\n const decisionsMetric = aggregateMetric(runs.map((r) => r.decisions_produced_count));\n const runtimeMetric = aggregateMetric(runs.map((r) => r.runtime_ms));\n const emptyOrMalformedMetric = aggregateMetric(\n runs.map((r) => r.empty_or_malformed_responses),\n );\n\n const perCatAnyFlag = aggregatePerCategoryRecall(runs.map((r) => r.per_category_any_flag));\n const perCatStrict = aggregatePerCategoryRecall(runs.map((r) => r.per_category_strict));\n const decisionVocab = aggregateDecisionVocabulary(runs.map((r) => r.decision_vocabulary));\n\n const dvRequired = first.architecture === 'two-pass' ? 3 : 4;\n const decisionVocabBar = {\n architecture: first.architecture,\n required: dvRequired,\n median_produced: decisionsMetric.median,\n passed: decisionsMetric.median >= dvRequired,\n };\n\n const aggregatePassFail = computeAggregatePassFail({\n good_fp_count: fpMetric,\n any_flag_recall_ratio: anyFlagRatioMetric,\n per_category_any_flag: perCatAnyFlag,\n strict_recall_ratio: strictRatioMetric,\n decisions_produced_count: decisionsMetric,\n architecture: first.architecture,\n runtime_ms: runtimeMetric,\n empty_or_malformed_responses: emptyOrMalformedMetric,\n });\n\n const recurringBarFailures = computeRecurringBarFailures(\n runs.map((r) => r.pass_fail),\n runs.length,\n );\n\n const status = computeAggregateStatusLabel({\n profileName: first.profile_name,\n architecture: first.architecture,\n aggregatePassFail,\n medianGoodFpCount: fpMetric.median,\n recurringBarFailures,\n modeOverride: opts.modeOverride,\n });\n\n const notes: string[] = [];\n if (aggregatePassFail.latency_soft === 'WARN') {\n notes.push(\n `Latency warning: median ${(runtimeMetric.median / 1000).toFixed(1)}s exceeds soft limit of 600s`,\n );\n }\n if (fpMetric.median > 0) {\n notes.push(`FP at ceiling: median ${fpMetric.median} false positive(s) on good claims`);\n }\n if (recurringBarFailures.length > 0) {\n notes.push(`Recurring bar failures (>= ceil(N/2) runs): ${recurringBarFailures.join(', ')}`);\n }\n if (status === 'comparison_only') {\n notes.push(\n 'comparison_only: architectural side-run, not a production admission candidate',\n );\n }\n if (status === 'conditional_pass') {\n notes.push('conditional_pass: passes all bars but carries a production caution');\n }\n\n return AggregateCalibrationReceiptSchema.parse({\n schema_version: 1,\n receipt_kind: 'aggregate',\n profile_name: first.profile_name,\n status,\n model: first.model,\n architecture: first.architecture,\n fixture: first.fixture,\n fixture_total_claims: first.fixture_total_claims,\n fixture_good_claims: first.fixture_good_claims,\n fixture_bad_claims: first.fixture_bad_claims,\n runs_count: runs.length,\n run_files: opts.runFiles,\n aggregated_at: opts.aggregatedAt ?? new Date().toISOString(),\n research_os_version: first.research_os_version,\n good_fp_count: fpMetric,\n any_flag_recall_ratio: anyFlagRatioMetric,\n strict_recall_ratio: strictRatioMetric,\n decisions_produced_count: decisionsMetric,\n runtime_ms: runtimeMetric,\n empty_or_malformed_responses: emptyOrMalformedMetric,\n per_category_any_flag: perCatAnyFlag,\n per_category_strict: perCatStrict,\n decision_vocabulary: decisionVocab,\n decision_vocab_bar: decisionVocabBar,\n unreachable_decisions: first.unreachable_decisions,\n pass_fail: aggregatePassFail,\n recurring_bar_failures: recurringBarFailures,\n notes,\n ...(opts.reviewerOptions && Object.keys(opts.reviewerOptions).length > 0 && {\n reviewer_options: opts.reviewerOptions,\n }),\n });\n}\n\n// Stable key order for reviewer_options rendering (matches single-run receipt).\nconst REVIEWER_OPTIONS_KEY_ORDER = [\n 'num_ctx',\n 'temperature',\n 'seed',\n 'top_p',\n 'top_k',\n 'repeat_penalty',\n] as const;\n\nfunction buildReviewerOptionsSection(opts: AggregateCalibrationReceipt['reviewer_options']): string {\n if (!opts) return '';\n const lines = REVIEWER_OPTIONS_KEY_ORDER\n .filter((k) => opts[k] !== undefined)\n .map((k) => `- ${k}: ${opts[k]}`);\n if (lines.length === 0) return '';\n return `\\n## Reviewer options\\n\\n${lines.join('\\n')}\\n`;\n}\n\n// Render the aggregate calibration receipt as compact Markdown.\n// Operator proof artifact — no prose.\nexport function buildAggregateReceiptMarkdown(r: AggregateCalibrationReceipt): string {\n const pct = (ratio: number) => `${Math.round(ratio * 100)}%`;\n const secRounded = (ms: number) => `${(ms / 1000).toFixed(1)}s`;\n\n const af = r.any_flag_recall_ratio;\n const sr = r.strict_recall_ratio;\n const fp = r.good_fp_count;\n const dec = r.decisions_produced_count;\n const rt = r.runtime_ms;\n const pf = r.pass_fail;\n const bar = r.decision_vocab_bar;\n\n const runFileList =\n r.run_files.length > 0\n ? `${r.run_files[0]} … ${r.run_files[r.run_files.length - 1]}`\n : '(none)';\n\n const perCatAnyFlagRows = Object.entries(r.per_category_any_flag)\n .map(([cat, entry]) => {\n const st = r.per_category_strict[cat];\n return (\n `| ${cat} | ${pct(entry.median_ratio)} | ${pct(entry.min_ratio)}–${pct(entry.max_ratio)} | ${entry.total} |` +\n (st\n ? ` ${pct(st.median_ratio)} | ${pct(st.min_ratio)}–${pct(st.max_ratio)} |`\n : ' — | — |')\n );\n })\n .join('\\n');\n\n const ALL_DECISIONS = [\n 'accepted_for_synthesis',\n 'rejected',\n 'needs_scope_repair',\n 'needs_source_repair',\n 'needs_contradiction_mapping',\n 'needs_human_review',\n ];\n const dvRows = ALL_DECISIONS.map((d) => {\n const metric = r.decision_vocabulary[d];\n const unreachable = r.unreachable_decisions.includes(d)\n ? ` (unreachable from ${r.fixture})`\n : '';\n if (!metric) return `| ${d} | — | — |${unreachable}`;\n return `| ${d} | ${metric.median.toFixed(1)} | ${metric.min}–${metric.max}${unreachable} |`;\n }).join('\\n');\n\n // Per-run summary table — pulled from run_files labels for clarity\n const perRunRows = r.any_flag_recall_ratio.values\n .map((afr, i) => {\n const fp_i = r.good_fp_count.values[i] ?? '?';\n const sr_i = r.strict_recall_ratio.values[i] ?? '?';\n const dec_i = r.decisions_produced_count.values[i] ?? '?';\n const rt_i = r.runtime_ms.values[i] ?? '?';\n return `| ${i + 1} | ${fp_i}/${r.fixture_good_claims} | ${typeof afr === 'number' ? pct(afr) : '?'} | ${typeof sr_i === 'number' ? pct(sr_i) : '?'} | ${dec_i}/6 | ${typeof rt_i === 'number' ? secRounded(rt_i) : '?'} |`;\n })\n .join('\\n');\n\n const recurringSection =\n r.recurring_bar_failures.length > 0\n ? r.recurring_bar_failures.map((b) => `- ${b}`).join('\\n')\n : 'None.';\n\n const notesSection =\n r.notes.length > 0 ? `\\n## Notes\\n\\n${r.notes.map((n) => `- ${n}`).join('\\n')}\\n` : '';\n\n const reviewerOptionsSection = buildReviewerOptionsSection(r.reviewer_options);\n\n return `# Calibration Receipt — ${r.profile_name} (aggregate, N=${r.runs_count} runs)\n\n- **Model:** ${r.model}\n- **Architecture:** ${r.architecture}\n- **Status:** ${r.status}\n- **Fixture:** ${r.fixture} (${r.fixture_total_claims} claims = ${r.fixture_good_claims} good + ${r.fixture_bad_claims} bad)\n- **Aggregated at:** ${r.aggregated_at}\n- **Research-OS version:** ${r.research_os_version}\n- **Run count:** ${r.runs_count}\n- **Run files:** ${runFileList}\n${reviewerOptionsSection}\n## Headline metrics (median across runs)\n\n- FP: median ${fp.median} / ${r.fixture_good_claims} (range ${fp.min}–${fp.max})\n- Any-flag recall: median ${pct(af.median)} (range ${pct(af.min)}–${pct(af.max)})\n- Strict recall: median ${pct(sr.median)} (range ${pct(sr.min)}–${pct(sr.max)})\n- Decisions produced: median ${dec.median} / 6 (range ${dec.min}–${dec.max})\n\n## PASS / FAIL (aggregate)\n\n| Bar | Rule | Result |\n|---|---|---|\n| FP ceiling | median=${fp.median}, max=${fp.max} (median ≤1 AND max ≤2) | ${pf.fp_ceiling} |\n| Any-flag recall | median=${pct(af.median)} (≥65%) | ${pf.any_flag_recall_floor} |\n| Per-category any-flag | median ≥50% per cat (see below) | ${pf.per_category_any_flag_floor} |\n| Strict recall | median=${pct(sr.median)} (≥20%) | ${pf.strict_recall_floor} |\n| Decision vocab | median=${dec.median} / 6 (${bar.architecture} ≥${bar.required}) | ${pf.decision_vocab_completeness} |\n| Latency soft | median=${secRounded(rt.median)} (≤600s, WARN only) | ${pf.latency_soft} |\n| Latency hard | max=${secRounded(rt.max)} (every run ≤1200s) | ${pf.latency_hard} |\n| Empty/malformed | max=${r.empty_or_malformed_responses.max} (every run =0) | ${pf.empty_or_malformed} |\n| **OVERALL** | | **${pf.overall}** |\n\n## Recurring hard-bar failures\n\n${recurringSection}\n\n## Per-category recall (median across runs)\n\n| Category | Any-flag median | Any-flag range | Total | Strict median | Strict range |\n|---|---|---|---|---|---|\n${perCatAnyFlagRows}\n\n## Decision vocabulary (median count across runs)\n\n| Decision | Median | Range |\n|---|---|---|\n${dvRows}\n\n## Per-run summary\n\n| Run | FP | Any-flag | Strict | Decisions | Runtime |\n|---|---|---|---|---|---|\n${perRunRows}\n${notesSection}`;\n}\n"],"mappings":";AAAA,SAAS,KAAAA,UAAS;;;ACAlB,SAAS,KAAAC,UAAS;;;ACAlB,SAAS,SAAS;AASX,IAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS;AAAA,EAC9C,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EAC/C,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAChC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EACzC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS;AAAA,EAC/C,gBAAgB,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS;AAC7C,CAAC;;;ADXM,IAAM,oBAAoBC,GAAE,KAAK;AAAA,EACtC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,IAAM,qBAAqBA,GAAE,KAAK,CAAC,eAAe,UAAU,CAAC;AAE7D,IAAM,eAAeA,GAAE,OAAO;AAAA,EACnC,SAASA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACtC,OAAOA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACpC,OAAOA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAChC,CAAC;AAEM,IAAM,0BAA0BA,GAAE,OAAOA,GAAE,OAAO,GAAG,YAAY;AAEjE,IAAM,iBAAiBA,GAAE,OAAO;AAAA,EACrC,YAAYA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACnC,uBAAuBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC9C,6BAA6BA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,qBAAqBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC5C,6BAA6BA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,cAAcA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,cAAcA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,oBAAoBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC3C,SAASA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAClC,CAAC;AAEM,IAAM,yBAAyBA,GAAE,OAAO;AAAA,EAC7C,cAAc;AAAA,EACd,UAAUA,GAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACpC,UAAUA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACvC,QAAQA,GAAE,QAAQ;AACpB,CAAC;AAKM,IAAM,2BAA2BA,GAAE,OAAO;AAAA,EAC/C,gBAAgBA,GAAE,QAAQ,CAAC;AAAA,EAC3B,cAAcA,GAAE,OAAO;AAAA,EACvB,QAAQ;AAAA,EACR,OAAOA,GAAE,OAAO;AAAA,EAChB,cAAc;AAAA,EACd,SAASA,GAAE,OAAO;AAAA,EAClB,sBAAsBA,GAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAChD,qBAAqBA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAClD,oBAAoBA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACjD,eAAeA,GAAE,OAAO;AAAA,EACxB,qBAAqBA,GAAE,OAAO;AAAA,EAC9B,YAAYA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACzC,eAAeA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAC5C,iBAAiB;AAAA,EACjB,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,qBAAqB;AAAA,EACrB,qBAAqBA,GAAE,OAAOA,GAAE,OAAO,GAAGA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY,CAAC;AAAA,EACxE,0BAA0BA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACvD,oBAAoB;AAAA,EACpB,uBAAuBA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA,EACzC,8BAA8BA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAC3D,WAAW;AAAA,EACX,OAAOA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA,EACzB,kBAAkB,sBAAsB,SAAS;AACnD,CAAC;;;ADnEM,IAAM,wBAAwBC,GAAE,OAAO;AAAA,EAC5C,QAAQA,GAAE,OAAO;AAAA,EACjB,KAAKA,GAAE,OAAO;AAAA,EACd,KAAKA,GAAE,OAAO;AAAA,EACd,QAAQA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA;AAC5B,CAAC;AAEM,IAAM,kCAAkCA,GAAE,OAAO;AAAA,EACtD,cAAcA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EACrC,WAAWA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAClC,WAAWA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAAA,EAClC,OAAOA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA;AAAA,EACpC,gBAAgBA,GAAE,MAAMA,GAAE,OAAO,CAAC;AACpC,CAAC;AAEM,IAAM,6BAA6BA,GAAE,OAAOA,GAAE,OAAO,GAAG,+BAA+B;AAEvF,IAAM,0BAA0BA,GAAE,OAAO;AAAA,EAC9C,YAAYA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACnC,uBAAuBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC9C,6BAA6BA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,qBAAqBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC5C,6BAA6BA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,cAAcA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,cAAcA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,oBAAoBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC3C,SAASA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAClC,CAAC;AAEM,IAAM,kCAAkCA,GAAE,OAAO;AAAA,EACtD,cAAc;AAAA,EACd,UAAUA,GAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACpC,iBAAiBA,GAAE,OAAO;AAAA;AAAA,EAC1B,QAAQA,GAAE,QAAQ;AACpB,CAAC;AAEM,IAAM,oCAAoCA,GAAE,OAAO;AAAA,EACxD,gBAAgBA,GAAE,QAAQ,CAAC;AAAA,EAC3B,cAAcA,GAAE,QAAQ,WAAW;AAAA;AAAA,EACnC,cAAcA,GAAE,OAAO;AAAA,EACvB,QAAQ;AAAA,EACR,OAAOA,GAAE,OAAO;AAAA,EAChB,cAAc;AAAA,EACd,SAASA,GAAE,OAAO;AAAA,EAClB,sBAAsBA,GAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAChD,qBAAqBA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAClD,oBAAoBA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACjD,YAAYA,GAAE,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC;AAAA,EAClC,WAAWA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA;AAAA,EAC7B,eAAeA,GAAE,OAAO;AAAA;AAAA,EACxB,qBAAqBA,GAAE,OAAO;AAAA;AAAA,EAG9B,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,qBAAqB;AAAA,EACrB,0BAA0B;AAAA,EAC1B,YAAY;AAAA,EACZ,8BAA8B;AAAA,EAE9B,uBAAuB;AAAA,EACvB,qBAAqB;AAAA;AAAA,EAGrB,qBAAqBA,GAAE,OAAOA,GAAE,OAAO,GAAG,qBAAqB;AAAA,EAC/D,oBAAoB;AAAA,EACpB,uBAAuBA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA,EAEzC,WAAW;AAAA;AAAA;AAAA,EAGX,wBAAwBA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA,EAE1C,OAAOA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA;AAAA;AAAA,EAIzB,kBAAkB,sBAAsB,SAAS;AACnD,CAAC;;;AGjEM,SAAS,OAAO,QAA0B;AAC/C,MAAI,OAAO,WAAW,EAAG,OAAM,IAAI,MAAM,qBAAqB;AAC9D,QAAM,SAAS,CAAC,GAAG,MAAM,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC/C,QAAM,MAAM,KAAK,MAAM,OAAO,SAAS,CAAC;AACxC,MAAI,OAAO,SAAS,MAAM,EAAG,QAAO,OAAO,GAAG;AAC9C,UAAQ,OAAO,MAAM,CAAC,IAAI,OAAO,GAAG,KAAK;AAC3C;AAIO,SAAS,gBAAgB,QAAmC;AACjE,QAAM,IAAI,OAAO,MAAM;AACvB,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,KAAK,KAAK,IAAI,GAAG,MAAM;AAAA,IACvB,KAAK,KAAK,IAAI,GAAG,MAAM;AAAA,IACvB;AAAA,EACF;AACF;AAQO,SAAS,2BACd,eACsB;AACtB,QAAM,OAAO,oBAAI,IAAY;AAC7B,aAAW,OAAO,eAAe;AAC/B,eAAW,OAAO,OAAO,KAAK,GAAG,EAAG,MAAK,IAAI,GAAG;AAAA,EAClD;AAEA,QAAM,SAA+B,CAAC;AACtC,aAAW,OAAO,MAAM;AACtB,UAAM,SAAS,cAAc,IAAI,CAAC,QAAQ,IAAI,GAAG,GAAG,SAAS,CAAC;AAC9D,UAAM,QAAQ,cAAc,KAAK,CAAC,QAAQ,IAAI,GAAG,MAAM,MAAS,IAAI,GAAG,GAAG,SAAS;AACnF,WAAO,GAAG,IAAI;AAAA,MACZ,cAAc,OAAO,MAAM;AAAA,MAC3B,WAAW,KAAK,IAAI,GAAG,MAAM;AAAA,MAC7B,WAAW,KAAK,IAAI,GAAG,MAAM;AAAA,MAC7B;AAAA,MACA,gBAAgB;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAKO,SAAS,4BACd,aACiC;AACjC,QAAM,YAAY,oBAAI,IAAY;AAClC,aAAW,OAAO,aAAa;AAC7B,eAAW,KAAK,OAAO,KAAK,GAAG,EAAG,WAAU,IAAI,CAAC;AAAA,EACnD;AAEA,QAAM,SAA0C,CAAC;AACjD,aAAW,KAAK,WAAW;AACzB,UAAM,SAAS,YAAY,IAAI,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC;AACnD,WAAO,CAAC,IAAI,gBAAgB,MAAM;AAAA,EACpC;AACA,SAAO;AACT;AAaO,SAAS,yBAAyB,OASnB;AACpB,QAAM,aACJ,MAAM,cAAc,UAAU,KAAK,MAAM,cAAc,OAAO,IAAI,SAAS;AAE7E,QAAM,wBACJ,MAAM,sBAAsB,UAAU,OAAO,SAAS;AAExD,MAAI,8BAA+C;AACnD,aAAW,SAAS,OAAO,OAAO,MAAM,qBAAqB,GAAG;AAC9D,QAAI,MAAM,SAAS,KAAK,MAAM,eAAe,KAAK;AAChD,oCAA8B;AAC9B;AAAA,IACF;AAAA,EACF;AAEA,QAAM,sBACJ,MAAM,oBAAoB,UAAU,MAAM,SAAS;AAErD,QAAM,aAAa,MAAM,iBAAiB,aAAa,IAAI;AAC3D,QAAM,8BACJ,MAAM,yBAAyB,UAAU,aAAa,SAAS;AAGjE,QAAM,eACJ,MAAM,WAAW,UAAU,MAAU,SAAS;AAGhD,QAAM,eACJ,MAAM,WAAW,OAAO,OAAY,SAAS;AAG/C,QAAM,qBACJ,MAAM,6BAA6B,QAAQ,IAAI,SAAS;AAE1D,QAAM,WAAgC;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,UAA2B,SAAS,MAAM,CAAC,MAAM,MAAM,MAAM,IAAI,SAAS;AAEhF,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAWO,SAAS,4BACd,iBACA,WACU;AACV,QAAM,YAAY,KAAK,KAAK,YAAY,CAAC;AACzC,QAAM,YAAgC;AAAA,IACpC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,QAAM,YAAsB,CAAC;AAC7B,aAAW,OAAO,WAAW;AAC3B,UAAM,YAAY,gBAAgB,OAAO,CAAC,OAAO,GAAG,GAAG,MAAM,MAAM,EAAE;AACrE,QAAI,aAAa,UAAW,WAAU,KAAK,GAAG;AAAA,EAChD;AACA,SAAO;AACT;AAcO,SAAS,4BAA4B,OAO5B;AACd,MAAI,MAAM,iBAAiB,kBAAmB,QAAO;AAErD,MAAI,MAAM,iBAAiB,iBAAiB,UAAU,KAAK,MAAM,WAAW,GAAG;AAC7E,WAAO;AAAA,EACT;AAEA,MAAI,MAAM,kBAAkB,YAAY,OAAQ,QAAO;AAEvD,QAAM,kBACJ,UAAU,KAAK,MAAM,WAAW,KAAK,MAAM,iBAAiB;AAC9D,MACE,mBACA,MAAM,sBAAsB,KAC5B,MAAM,qBAAqB,WAAW,GACtC;AACA,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAUO,SAAS,kBACd,MACA,MAM6B;AAC7B,MAAI,KAAK,WAAW,EAAG,OAAM,IAAI,MAAM,qCAAqC;AAC5E,QAAM,QAAQ,KAAK,CAAC;AAEpB,QAAM,WAAW,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,aAAa,CAAC;AACjE,QAAM,qBAAqB,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,gBAAgB,KAAK,CAAC;AACnF,QAAM,oBAAoB,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,cAAc,KAAK,CAAC;AAChF,QAAM,kBAAkB,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,wBAAwB,CAAC;AACnF,QAAM,gBAAgB,gBAAgB,KAAK,IAAI,CAAC,MAAM,EAAE,UAAU,CAAC;AACnE,QAAM,yBAAyB;AAAA,IAC7B,KAAK,IAAI,CAAC,MAAM,EAAE,4BAA4B;AAAA,EAChD;AAEA,QAAM,gBAAgB,2BAA2B,KAAK,IAAI,CAAC,MAAM,EAAE,qBAAqB,CAAC;AACzF,QAAM,eAAe,2BAA2B,KAAK,IAAI,CAAC,MAAM,EAAE,mBAAmB,CAAC;AACtF,QAAM,gBAAgB,4BAA4B,KAAK,IAAI,CAAC,MAAM,EAAE,mBAAmB,CAAC;AAExF,QAAM,aAAa,MAAM,iBAAiB,aAAa,IAAI;AAC3D,QAAM,mBAAmB;AAAA,IACvB,cAAc,MAAM;AAAA,IACpB,UAAU;AAAA,IACV,iBAAiB,gBAAgB;AAAA,IACjC,QAAQ,gBAAgB,UAAU;AAAA,EACpC;AAEA,QAAM,oBAAoB,yBAAyB;AAAA,IACjD,eAAe;AAAA,IACf,uBAAuB;AAAA,IACvB,uBAAuB;AAAA,IACvB,qBAAqB;AAAA,IACrB,0BAA0B;AAAA,IAC1B,cAAc,MAAM;AAAA,IACpB,YAAY;AAAA,IACZ,8BAA8B;AAAA,EAChC,CAAC;AAED,QAAM,uBAAuB;AAAA,IAC3B,KAAK,IAAI,CAAC,MAAM,EAAE,SAAS;AAAA,IAC3B,KAAK;AAAA,EACP;AAEA,QAAM,SAAS,4BAA4B;AAAA,IACzC,aAAa,MAAM;AAAA,IACnB,cAAc,MAAM;AAAA,IACpB;AAAA,IACA,mBAAmB,SAAS;AAAA,IAC5B;AAAA,IACA,cAAc,KAAK;AAAA,EACrB,CAAC;AAED,QAAM,QAAkB,CAAC;AACzB,MAAI,kBAAkB,iBAAiB,QAAQ;AAC7C,UAAM;AAAA,MACJ,4BAA4B,cAAc,SAAS,KAAM,QAAQ,CAAC,CAAC;AAAA,IACrE;AAAA,EACF;AACA,MAAI,SAAS,SAAS,GAAG;AACvB,UAAM,KAAK,yBAAyB,SAAS,MAAM,mCAAmC;AAAA,EACxF;AACA,MAAI,qBAAqB,SAAS,GAAG;AACnC,UAAM,KAAK,+CAA+C,qBAAqB,KAAK,IAAI,CAAC,EAAE;AAAA,EAC7F;AACA,MAAI,WAAW,mBAAmB;AAChC,UAAM;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AACA,MAAI,WAAW,oBAAoB;AACjC,UAAM,KAAK,oEAAoE;AAAA,EACjF;AAEA,SAAO,kCAAkC,MAAM;AAAA,IAC7C,gBAAgB;AAAA,IAChB,cAAc;AAAA,IACd,cAAc,MAAM;AAAA,IACpB;AAAA,IACA,OAAO,MAAM;AAAA,IACb,cAAc,MAAM;AAAA,IACpB,SAAS,MAAM;AAAA,IACf,sBAAsB,MAAM;AAAA,IAC5B,qBAAqB,MAAM;AAAA,IAC3B,oBAAoB,MAAM;AAAA,IAC1B,YAAY,KAAK;AAAA,IACjB,WAAW,KAAK;AAAA,IAChB,eAAe,KAAK,iBAAgB,oBAAI,KAAK,GAAE,YAAY;AAAA,IAC3D,qBAAqB,MAAM;AAAA,IAC3B,eAAe;AAAA,IACf,uBAAuB;AAAA,IACvB,qBAAqB;AAAA,IACrB,0BAA0B;AAAA,IAC1B,YAAY;AAAA,IACZ,8BAA8B;AAAA,IAC9B,uBAAuB;AAAA,IACvB,qBAAqB;AAAA,IACrB,qBAAqB;AAAA,IACrB,oBAAoB;AAAA,IACpB,uBAAuB,MAAM;AAAA,IAC7B,WAAW;AAAA,IACX,wBAAwB;AAAA,IACxB;AAAA,IACA,GAAI,KAAK,mBAAmB,OAAO,KAAK,KAAK,eAAe,EAAE,SAAS,KAAK;AAAA,MAC1E,kBAAkB,KAAK;AAAA,IACzB;AAAA,EACF,CAAC;AACH;AAGA,IAAM,6BAA6B;AAAA,EACjC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEA,SAAS,4BAA4B,MAA+D;AAClG,MAAI,CAAC,KAAM,QAAO;AAClB,QAAM,QAAQ,2BACX,OAAO,CAAC,MAAM,KAAK,CAAC,MAAM,MAAS,EACnC,IAAI,CAAC,MAAM,KAAK,CAAC,KAAK,KAAK,CAAC,CAAC,EAAE;AAClC,MAAI,MAAM,WAAW,EAAG,QAAO;AAC/B,SAAO;AAAA;AAAA;AAAA,EAA4B,MAAM,KAAK,IAAI,CAAC;AAAA;AACrD;AAIO,SAAS,8BAA8B,GAAwC;AACpF,QAAM,MAAM,CAAC,UAAkB,GAAG,KAAK,MAAM,QAAQ,GAAG,CAAC;AACzD,QAAM,aAAa,CAAC,OAAe,IAAI,KAAK,KAAM,QAAQ,CAAC,CAAC;AAE5D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AACb,QAAM,MAAM,EAAE;AACd,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,EAAE;AACb,QAAM,MAAM,EAAE;AAEd,QAAM,cACJ,EAAE,UAAU,SAAS,IACjB,GAAG,EAAE,UAAU,CAAC,CAAC,WAAM,EAAE,UAAU,EAAE,UAAU,SAAS,CAAC,CAAC,KAC1D;AAEN,QAAM,oBAAoB,OAAO,QAAQ,EAAE,qBAAqB,EAC7D,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM;AACrB,UAAM,KAAK,EAAE,oBAAoB,GAAG;AACpC,WACE,KAAK,GAAG,MAAM,IAAI,MAAM,YAAY,CAAC,MAAM,IAAI,MAAM,SAAS,CAAC,SAAI,IAAI,MAAM,SAAS,CAAC,MAAM,MAAM,KAAK,QACvG,KACG,IAAI,IAAI,GAAG,YAAY,CAAC,MAAM,IAAI,GAAG,SAAS,CAAC,SAAI,IAAI,GAAG,SAAS,CAAC,OACpE;AAAA,EAER,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,gBAAgB;AAAA,IACpB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,SAAS,cAAc,IAAI,CAAC,MAAM;AACtC,UAAM,SAAS,EAAE,oBAAoB,CAAC;AACtC,UAAM,cAAc,EAAE,sBAAsB,SAAS,CAAC,IAClD,sBAAsB,EAAE,OAAO,MAC/B;AACJ,QAAI,CAAC,OAAQ,QAAO,KAAK,CAAC,uBAAa,WAAW;AAClD,WAAO,KAAK,CAAC,MAAM,OAAO,OAAO,QAAQ,CAAC,CAAC,MAAM,OAAO,GAAG,SAAI,OAAO,GAAG,GAAG,WAAW;AAAA,EACzF,CAAC,EAAE,KAAK,IAAI;AAGZ,QAAM,aAAa,EAAE,sBAAsB,OACxC,IAAI,CAAC,KAAK,MAAM;AACf,UAAM,OAAO,EAAE,cAAc,OAAO,CAAC,KAAK;AAC1C,UAAM,OAAO,EAAE,oBAAoB,OAAO,CAAC,KAAK;AAChD,UAAM,QAAQ,EAAE,yBAAyB,OAAO,CAAC,KAAK;AACtD,UAAM,OAAO,EAAE,WAAW,OAAO,CAAC,KAAK;AACvC,WAAO,KAAK,IAAI,CAAC,MAAM,IAAI,IAAI,EAAE,mBAAmB,MAAM,OAAO,QAAQ,WAAW,IAAI,GAAG,IAAI,GAAG,MAAM,OAAO,SAAS,WAAW,IAAI,IAAI,IAAI,GAAG,MAAM,KAAK,QAAQ,OAAO,SAAS,WAAW,WAAW,IAAI,IAAI,GAAG;AAAA,EACxN,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,mBACJ,EAAE,uBAAuB,SAAS,IAC9B,EAAE,uBAAuB,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,IACvD;AAEN,QAAM,eACJ,EAAE,MAAM,SAAS,IAAI;AAAA;AAAA;AAAA,EAAiB,EAAE,MAAM,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,IAAO;AAEtF,QAAM,yBAAyB,4BAA4B,EAAE,gBAAgB;AAE7E,SAAO,gCAA2B,EAAE,YAAY,kBAAkB,EAAE,UAAU;AAAA;AAAA,eAEjE,EAAE,KAAK;AAAA,sBACA,EAAE,YAAY;AAAA,gBACpB,EAAE,MAAM;AAAA,iBACP,EAAE,OAAO,KAAK,EAAE,oBAAoB,aAAa,EAAE,mBAAmB,WAAW,EAAE,kBAAkB;AAAA,uBAC/F,EAAE,aAAa;AAAA,6BACT,EAAE,mBAAmB;AAAA,mBAC/B,EAAE,UAAU;AAAA,mBACZ,WAAW;AAAA,EAC5B,sBAAsB;AAAA;AAAA;AAAA,eAGT,GAAG,MAAM,MAAM,EAAE,mBAAmB,WAAW,GAAG,GAAG,SAAI,GAAG,GAAG;AAAA,4BAClD,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,GAAG,GAAG,CAAC,SAAI,IAAI,GAAG,GAAG,CAAC;AAAA,0BACrD,IAAI,GAAG,MAAM,CAAC,WAAW,IAAI,GAAG,GAAG,CAAC,SAAI,IAAI,GAAG,GAAG,CAAC;AAAA,+BAC9C,IAAI,MAAM,eAAe,IAAI,GAAG,SAAI,IAAI,GAAG;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,wBAMlD,GAAG,MAAM,SAAS,GAAG,GAAG,uCAA6B,GAAG,UAAU;AAAA,6BAC7D,IAAI,GAAG,MAAM,CAAC,kBAAa,GAAG,qBAAqB;AAAA,mEAClB,GAAG,2BAA2B;AAAA,2BACjE,IAAI,GAAG,MAAM,CAAC,kBAAa,GAAG,mBAAmB;AAAA,4BAChD,IAAI,MAAM,SAAS,IAAI,YAAY,UAAK,IAAI,QAAQ,OAAO,GAAG,2BAA2B;AAAA,0BAC3F,WAAW,GAAG,MAAM,CAAC,8BAAyB,GAAG,YAAY;AAAA,uBAChE,WAAW,GAAG,GAAG,CAAC,8BAAyB,GAAG,YAAY;AAAA,0BACvD,EAAE,6BAA6B,GAAG,qBAAqB,GAAG,kBAAkB;AAAA,sBAChF,GAAG,OAAO;AAAA;AAAA;AAAA;AAAA,EAI9B,gBAAgB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMhB,iBAAiB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMjB,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMN,UAAU;AAAA,EACV,YAAY;AACd;","names":["z","z","z","z"]}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
+
export { R as ReviewerOptions, a as ReviewerOptionsSchema } from '../reviewer-options-schema-PZacF_MO.js';
|
|
2
3
|
|
|
3
4
|
declare const StatusLabelSchema: z.ZodEnum<["trusted_baseline", "conditional_pass", "failed", "comparison_only"]>;
|
|
4
5
|
declare const ArchitectureSchema: z.ZodEnum<["single-pass", "two-pass"]>;
|
|
@@ -193,19 +194,40 @@ declare const CalibrationReceiptSchema: z.ZodObject<{
|
|
|
193
194
|
overall: "PASS" | "FAIL";
|
|
194
195
|
}>;
|
|
195
196
|
notes: z.ZodArray<z.ZodString, "many">;
|
|
197
|
+
reviewer_options: z.ZodOptional<z.ZodObject<{
|
|
198
|
+
num_ctx: z.ZodOptional<z.ZodNumber>;
|
|
199
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
200
|
+
seed: z.ZodOptional<z.ZodNumber>;
|
|
201
|
+
top_p: z.ZodOptional<z.ZodNumber>;
|
|
202
|
+
top_k: z.ZodOptional<z.ZodNumber>;
|
|
203
|
+
repeat_penalty: z.ZodOptional<z.ZodNumber>;
|
|
204
|
+
}, "strip", z.ZodTypeAny, {
|
|
205
|
+
num_ctx?: number | undefined;
|
|
206
|
+
temperature?: number | undefined;
|
|
207
|
+
seed?: number | undefined;
|
|
208
|
+
top_p?: number | undefined;
|
|
209
|
+
top_k?: number | undefined;
|
|
210
|
+
repeat_penalty?: number | undefined;
|
|
211
|
+
}, {
|
|
212
|
+
num_ctx?: number | undefined;
|
|
213
|
+
temperature?: number | undefined;
|
|
214
|
+
seed?: number | undefined;
|
|
215
|
+
top_p?: number | undefined;
|
|
216
|
+
top_k?: number | undefined;
|
|
217
|
+
repeat_penalty?: number | undefined;
|
|
218
|
+
}>>;
|
|
196
219
|
}, "strip", z.ZodTypeAny, {
|
|
197
|
-
research_os_version: string;
|
|
198
220
|
status: "trusted_baseline" | "conditional_pass" | "failed" | "comparison_only";
|
|
199
|
-
|
|
221
|
+
architecture: "single-pass" | "two-pass";
|
|
200
222
|
schema_version: 1;
|
|
201
223
|
profile_name: string;
|
|
202
224
|
model: string;
|
|
203
|
-
architecture: "single-pass" | "two-pass";
|
|
204
225
|
fixture: string;
|
|
205
226
|
fixture_total_claims: number;
|
|
206
227
|
fixture_good_claims: number;
|
|
207
228
|
fixture_bad_claims: number;
|
|
208
229
|
calibrated_at: string;
|
|
230
|
+
research_os_version: string;
|
|
209
231
|
runtime_ms: number;
|
|
210
232
|
good_fp_count: number;
|
|
211
233
|
any_flag_recall: {
|
|
@@ -249,19 +271,27 @@ declare const CalibrationReceiptSchema: z.ZodObject<{
|
|
|
249
271
|
empty_or_malformed: "PASS" | "FAIL";
|
|
250
272
|
overall: "PASS" | "FAIL";
|
|
251
273
|
};
|
|
274
|
+
notes: string[];
|
|
275
|
+
reviewer_options?: {
|
|
276
|
+
num_ctx?: number | undefined;
|
|
277
|
+
temperature?: number | undefined;
|
|
278
|
+
seed?: number | undefined;
|
|
279
|
+
top_p?: number | undefined;
|
|
280
|
+
top_k?: number | undefined;
|
|
281
|
+
repeat_penalty?: number | undefined;
|
|
282
|
+
} | undefined;
|
|
252
283
|
}, {
|
|
253
|
-
research_os_version: string;
|
|
254
284
|
status: "trusted_baseline" | "conditional_pass" | "failed" | "comparison_only";
|
|
255
|
-
|
|
285
|
+
architecture: "single-pass" | "two-pass";
|
|
256
286
|
schema_version: 1;
|
|
257
287
|
profile_name: string;
|
|
258
288
|
model: string;
|
|
259
|
-
architecture: "single-pass" | "two-pass";
|
|
260
289
|
fixture: string;
|
|
261
290
|
fixture_total_claims: number;
|
|
262
291
|
fixture_good_claims: number;
|
|
263
292
|
fixture_bad_claims: number;
|
|
264
293
|
calibrated_at: string;
|
|
294
|
+
research_os_version: string;
|
|
265
295
|
runtime_ms: number;
|
|
266
296
|
good_fp_count: number;
|
|
267
297
|
any_flag_recall: {
|
|
@@ -305,6 +335,15 @@ declare const CalibrationReceiptSchema: z.ZodObject<{
|
|
|
305
335
|
empty_or_malformed: "PASS" | "FAIL";
|
|
306
336
|
overall: "PASS" | "FAIL";
|
|
307
337
|
};
|
|
338
|
+
notes: string[];
|
|
339
|
+
reviewer_options?: {
|
|
340
|
+
num_ctx?: number | undefined;
|
|
341
|
+
temperature?: number | undefined;
|
|
342
|
+
seed?: number | undefined;
|
|
343
|
+
top_p?: number | undefined;
|
|
344
|
+
top_k?: number | undefined;
|
|
345
|
+
repeat_penalty?: number | undefined;
|
|
346
|
+
} | undefined;
|
|
308
347
|
}>;
|
|
309
348
|
type StatusLabel = z.infer<typeof StatusLabelSchema>;
|
|
310
349
|
type Architecture = z.infer<typeof ArchitectureSchema>;
|