@mcptoolshop/research-os 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,60 +1,74 @@
1
1
  // src/calibration/receipt-schema.ts
2
+ import { z as z2 } from "zod";
3
+
4
+ // src/review/reviewer-options-schema.ts
2
5
  import { z } from "zod";
3
- var StatusLabelSchema = z.enum([
6
+ var ReviewerOptionsSchema = z.object({
7
+ num_ctx: z.number().int().positive().optional(),
8
+ temperature: z.number().min(0).max(2).optional(),
9
+ seed: z.number().int().optional(),
10
+ top_p: z.number().min(0).max(1).optional(),
11
+ top_k: z.number().int().nonnegative().optional(),
12
+ repeat_penalty: z.number().min(0).optional()
13
+ });
14
+
15
+ // src/calibration/receipt-schema.ts
16
+ var StatusLabelSchema = z2.enum([
4
17
  "trusted_baseline",
5
18
  "conditional_pass",
6
19
  "failed",
7
20
  "comparison_only"
8
21
  ]);
9
- var ArchitectureSchema = z.enum(["single-pass", "two-pass"]);
10
- var RecallSchema = z.object({
11
- matched: z.number().int().nonnegative(),
12
- total: z.number().int().nonnegative(),
13
- ratio: z.number().min(0).max(1)
22
+ var ArchitectureSchema = z2.enum(["single-pass", "two-pass"]);
23
+ var RecallSchema = z2.object({
24
+ matched: z2.number().int().nonnegative(),
25
+ total: z2.number().int().nonnegative(),
26
+ ratio: z2.number().min(0).max(1)
14
27
  });
15
- var PerCategoryRecallSchema = z.record(z.string(), RecallSchema);
16
- var PassFailSchema = z.object({
17
- fp_ceiling: z.enum(["PASS", "FAIL"]),
18
- any_flag_recall_floor: z.enum(["PASS", "FAIL"]),
19
- per_category_any_flag_floor: z.enum(["PASS", "FAIL"]),
20
- strict_recall_floor: z.enum(["PASS", "FAIL"]),
21
- decision_vocab_completeness: z.enum(["PASS", "FAIL"]),
22
- latency_soft: z.enum(["PASS", "WARN"]),
23
- latency_hard: z.enum(["PASS", "FAIL"]),
24
- empty_or_malformed: z.enum(["PASS", "FAIL"]),
25
- overall: z.enum(["PASS", "FAIL"])
28
+ var PerCategoryRecallSchema = z2.record(z2.string(), RecallSchema);
29
+ var PassFailSchema = z2.object({
30
+ fp_ceiling: z2.enum(["PASS", "FAIL"]),
31
+ any_flag_recall_floor: z2.enum(["PASS", "FAIL"]),
32
+ per_category_any_flag_floor: z2.enum(["PASS", "FAIL"]),
33
+ strict_recall_floor: z2.enum(["PASS", "FAIL"]),
34
+ decision_vocab_completeness: z2.enum(["PASS", "FAIL"]),
35
+ latency_soft: z2.enum(["PASS", "WARN"]),
36
+ latency_hard: z2.enum(["PASS", "FAIL"]),
37
+ empty_or_malformed: z2.enum(["PASS", "FAIL"]),
38
+ overall: z2.enum(["PASS", "FAIL"])
26
39
  });
27
- var DecisionVocabBarSchema = z.object({
40
+ var DecisionVocabBarSchema = z2.object({
28
41
  architecture: ArchitectureSchema,
29
- required: z.number().int().positive(),
30
- produced: z.number().int().nonnegative(),
31
- passed: z.boolean()
42
+ required: z2.number().int().positive(),
43
+ produced: z2.number().int().nonnegative(),
44
+ passed: z2.boolean()
32
45
  });
33
- var CalibrationReceiptSchema = z.object({
34
- schema_version: z.literal(1),
35
- profile_name: z.string(),
46
+ var CalibrationReceiptSchema = z2.object({
47
+ schema_version: z2.literal(1),
48
+ profile_name: z2.string(),
36
49
  status: StatusLabelSchema,
37
- model: z.string(),
50
+ model: z2.string(),
38
51
  architecture: ArchitectureSchema,
39
- fixture: z.string(),
40
- fixture_total_claims: z.number().int().positive(),
41
- fixture_good_claims: z.number().int().nonnegative(),
42
- fixture_bad_claims: z.number().int().nonnegative(),
43
- calibrated_at: z.string(),
44
- research_os_version: z.string(),
45
- runtime_ms: z.number().int().nonnegative(),
46
- good_fp_count: z.number().int().nonnegative(),
52
+ fixture: z2.string(),
53
+ fixture_total_claims: z2.number().int().positive(),
54
+ fixture_good_claims: z2.number().int().nonnegative(),
55
+ fixture_bad_claims: z2.number().int().nonnegative(),
56
+ calibrated_at: z2.string(),
57
+ research_os_version: z2.string(),
58
+ runtime_ms: z2.number().int().nonnegative(),
59
+ good_fp_count: z2.number().int().nonnegative(),
47
60
  any_flag_recall: RecallSchema,
48
61
  strict_recall: RecallSchema,
49
62
  per_category_any_flag: PerCategoryRecallSchema,
50
63
  per_category_strict: PerCategoryRecallSchema,
51
- decision_vocabulary: z.record(z.string(), z.number().int().nonnegative()),
52
- decisions_produced_count: z.number().int().nonnegative(),
64
+ decision_vocabulary: z2.record(z2.string(), z2.number().int().nonnegative()),
65
+ decisions_produced_count: z2.number().int().nonnegative(),
53
66
  decision_vocab_bar: DecisionVocabBarSchema,
54
- unreachable_decisions: z.array(z.string()),
55
- empty_or_malformed_responses: z.number().int().nonnegative(),
67
+ unreachable_decisions: z2.array(z2.string()),
68
+ empty_or_malformed_responses: z2.number().int().nonnegative(),
56
69
  pass_fail: PassFailSchema,
57
- notes: z.array(z.string())
70
+ notes: z2.array(z2.string()),
71
+ reviewer_options: ReviewerOptionsSchema.optional()
58
72
  });
59
73
  export {
60
74
  ArchitectureSchema,
@@ -63,6 +77,7 @@ export {
63
77
  PassFailSchema,
64
78
  PerCategoryRecallSchema,
65
79
  RecallSchema,
80
+ ReviewerOptionsSchema,
66
81
  StatusLabelSchema
67
82
  };
68
83
  //# sourceMappingURL=receipt-schema.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/calibration/receipt-schema.ts"],"sourcesContent":["import { z } from 'zod';\n\nexport const StatusLabelSchema = z.enum([\n 'trusted_baseline',\n 'conditional_pass',\n 'failed',\n 'comparison_only',\n]);\n\nexport const ArchitectureSchema = z.enum(['single-pass', 'two-pass']);\n\nexport const RecallSchema = z.object({\n matched: z.number().int().nonnegative(),\n total: z.number().int().nonnegative(),\n ratio: z.number().min(0).max(1),\n});\n\nexport const PerCategoryRecallSchema = z.record(z.string(), RecallSchema);\n\nexport const PassFailSchema = z.object({\n fp_ceiling: z.enum(['PASS', 'FAIL']),\n any_flag_recall_floor: z.enum(['PASS', 'FAIL']),\n per_category_any_flag_floor: z.enum(['PASS', 'FAIL']),\n strict_recall_floor: z.enum(['PASS', 'FAIL']),\n decision_vocab_completeness: z.enum(['PASS', 'FAIL']),\n latency_soft: z.enum(['PASS', 'WARN']),\n latency_hard: z.enum(['PASS', 'FAIL']),\n empty_or_malformed: z.enum(['PASS', 'FAIL']),\n overall: z.enum(['PASS', 'FAIL']),\n});\n\nexport const DecisionVocabBarSchema = z.object({\n architecture: ArchitectureSchema,\n required: z.number().int().positive(),\n produced: z.number().int().nonnegative(),\n passed: z.boolean(),\n});\n\nexport const CalibrationReceiptSchema = z.object({\n schema_version: z.literal(1),\n profile_name: z.string(),\n status: StatusLabelSchema,\n model: z.string(),\n architecture: ArchitectureSchema,\n fixture: z.string(),\n fixture_total_claims: z.number().int().positive(),\n fixture_good_claims: z.number().int().nonnegative(),\n fixture_bad_claims: z.number().int().nonnegative(),\n calibrated_at: z.string(),\n research_os_version: z.string(),\n runtime_ms: z.number().int().nonnegative(),\n good_fp_count: z.number().int().nonnegative(),\n any_flag_recall: RecallSchema,\n strict_recall: RecallSchema,\n per_category_any_flag: PerCategoryRecallSchema,\n per_category_strict: PerCategoryRecallSchema,\n decision_vocabulary: z.record(z.string(), z.number().int().nonnegative()),\n decisions_produced_count: z.number().int().nonnegative(),\n decision_vocab_bar: DecisionVocabBarSchema,\n unreachable_decisions: z.array(z.string()),\n empty_or_malformed_responses: z.number().int().nonnegative(),\n pass_fail: PassFailSchema,\n notes: z.array(z.string()),\n});\n\nexport type StatusLabel = z.infer<typeof StatusLabelSchema>;\nexport type Architecture = z.infer<typeof ArchitectureSchema>;\nexport type Recall = z.infer<typeof RecallSchema>;\nexport type PerCategoryRecall = z.infer<typeof PerCategoryRecallSchema>;\nexport type PassFail = z.infer<typeof PassFailSchema>;\nexport type DecisionVocabBar = z.infer<typeof DecisionVocabBarSchema>;\nexport type CalibrationReceipt = z.infer<typeof CalibrationReceiptSchema>;\n"],"mappings":";AAAA,SAAS,SAAS;AAEX,IAAM,oBAAoB,EAAE,KAAK;AAAA,EACtC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,IAAM,qBAAqB,EAAE,KAAK,CAAC,eAAe,UAAU,CAAC;AAE7D,IAAM,eAAe,EAAE,OAAO;AAAA,EACnC,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACtC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACpC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAChC,CAAC;AAEM,IAAM,0BAA0B,EAAE,OAAO,EAAE,OAAO,GAAG,YAAY;AAEjE,IAAM,iBAAiB,EAAE,OAAO;AAAA,EACrC,YAAY,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACnC,uBAAuB,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC9C,6BAA6B,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,qBAAqB,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC5C,6BAA6B,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,cAAc,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,cAAc,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,oBAAoB,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC3C,SAAS,EAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAClC,CAAC;AAEM,IAAM,yBAAyB,EAAE,OAAO;AAAA,EAC7C,cAAc;AAAA,EACd,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACpC,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACvC,QAAQ,EAAE,QAAQ;AACpB,CAAC;AAEM,IAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,gBAAgB,EAAE,QAAQ,CAAC;AAAA,EAC3B,cAAc,EAAE,OAAO;AAAA,EACvB,QAAQ;AAAA,EACR,OAAO,EAAE,OAAO;AAAA,EAChB,cAAc;AAAA,EACd,SAAS,EAAE,OAAO;AAAA,EAClB,sBAAsB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAChD,qBAAqB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAClD,oBAAoB,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACjD,eAAe,EAAE,OAAO;AAAA,EACxB,qBAAqB,EAAE,OAAO;AAAA,EAC9B,YAAY,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACzC,eAAe,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAC5C,iBAAiB;AAAA,EACjB,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,qBAAqB;AAAA,EACrB,qBAAqB,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,CAAC;AAAA,EACxE,0BAA0B,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACvD,oBAAoB;AAAA,EACpB,uBAAuB,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EACzC,8BAA8B,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAC3D,WAAW;AAAA,EACX,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC;AAC3B,CAAC;","names":[]}
1
+ {"version":3,"sources":["../../src/calibration/receipt-schema.ts","../../src/review/reviewer-options-schema.ts"],"sourcesContent":["import { z } from 'zod';\nimport { ReviewerOptionsSchema } from '../review/reviewer-options-schema.js';\nexport { ReviewerOptionsSchema };\nexport type { ReviewerOptions } from '../review/reviewer-options-schema.js';\n\nexport const StatusLabelSchema = z.enum([\n 'trusted_baseline',\n 'conditional_pass',\n 'failed',\n 'comparison_only',\n]);\n\nexport const ArchitectureSchema = z.enum(['single-pass', 'two-pass']);\n\nexport const RecallSchema = z.object({\n matched: z.number().int().nonnegative(),\n total: z.number().int().nonnegative(),\n ratio: z.number().min(0).max(1),\n});\n\nexport const PerCategoryRecallSchema = z.record(z.string(), RecallSchema);\n\nexport const PassFailSchema = z.object({\n fp_ceiling: z.enum(['PASS', 'FAIL']),\n any_flag_recall_floor: z.enum(['PASS', 'FAIL']),\n per_category_any_flag_floor: z.enum(['PASS', 'FAIL']),\n strict_recall_floor: z.enum(['PASS', 'FAIL']),\n decision_vocab_completeness: z.enum(['PASS', 'FAIL']),\n latency_soft: z.enum(['PASS', 'WARN']),\n latency_hard: z.enum(['PASS', 'FAIL']),\n empty_or_malformed: z.enum(['PASS', 'FAIL']),\n overall: z.enum(['PASS', 'FAIL']),\n});\n\nexport const DecisionVocabBarSchema = z.object({\n architecture: ArchitectureSchema,\n required: z.number().int().positive(),\n produced: z.number().int().nonnegative(),\n passed: z.boolean(),\n});\n\n// schema_version: 1 — additive-optional additions (Exp6 Session 2):\n// reviewer_options: optional sampling params used during this calibration run.\n// Absent = stochastic run (pre-v0.6 compat preserved). Present = keys explicitly set.\nexport const CalibrationReceiptSchema = z.object({\n schema_version: z.literal(1),\n profile_name: z.string(),\n status: StatusLabelSchema,\n model: z.string(),\n architecture: ArchitectureSchema,\n fixture: z.string(),\n fixture_total_claims: z.number().int().positive(),\n fixture_good_claims: z.number().int().nonnegative(),\n fixture_bad_claims: z.number().int().nonnegative(),\n calibrated_at: z.string(),\n research_os_version: z.string(),\n runtime_ms: z.number().int().nonnegative(),\n good_fp_count: z.number().int().nonnegative(),\n any_flag_recall: RecallSchema,\n strict_recall: RecallSchema,\n per_category_any_flag: PerCategoryRecallSchema,\n per_category_strict: PerCategoryRecallSchema,\n decision_vocabulary: z.record(z.string(), z.number().int().nonnegative()),\n decisions_produced_count: z.number().int().nonnegative(),\n decision_vocab_bar: DecisionVocabBarSchema,\n unreachable_decisions: z.array(z.string()),\n empty_or_malformed_responses: z.number().int().nonnegative(),\n pass_fail: PassFailSchema,\n notes: z.array(z.string()),\n reviewer_options: ReviewerOptionsSchema.optional(),\n});\n\nexport type StatusLabel = z.infer<typeof StatusLabelSchema>;\nexport type Architecture = z.infer<typeof ArchitectureSchema>;\nexport type Recall = z.infer<typeof RecallSchema>;\nexport type PerCategoryRecall = z.infer<typeof PerCategoryRecallSchema>;\nexport type PassFail = z.infer<typeof PassFailSchema>;\nexport type DecisionVocabBar = z.infer<typeof DecisionVocabBarSchema>;\nexport type CalibrationReceipt = z.infer<typeof CalibrationReceiptSchema>;\n","import { z } from 'zod';\n\n// Sampling parameters passed verbatim to the Ollama /api/chat `options` field.\n// Used by OllamaInternReviewer to control determinism. All fields optional —\n// omitted keys fall back to Ollama/model defaults. Introduced in Experiment 6\n// Session 2 to make reviewer conditions explicit in calibration receipts.\n//\n// LOAD-BEARING: temperature: 0 is valid and must not be dropped. All merges\n// in OllamaInternReviewer use `!== undefined` checks, NOT truthiness.\nexport const ReviewerOptionsSchema = z.object({\n num_ctx: z.number().int().positive().optional(),\n temperature: z.number().min(0).max(2).optional(),\n seed: z.number().int().optional(),\n top_p: z.number().min(0).max(1).optional(),\n top_k: z.number().int().nonnegative().optional(),\n repeat_penalty: z.number().min(0).optional(),\n});\n\nexport type ReviewerOptions = z.infer<typeof ReviewerOptionsSchema>;\n"],"mappings":";AAAA,SAAS,KAAAA,UAAS;;;ACAlB,SAAS,SAAS;AASX,IAAM,wBAAwB,EAAE,OAAO;AAAA,EAC5C,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS;AAAA,EAC9C,aAAa,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EAC/C,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAChC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EACzC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,SAAS;AAAA,EAC/C,gBAAgB,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,SAAS;AAC7C,CAAC;;;ADXM,IAAM,oBAAoBC,GAAE,KAAK;AAAA,EACtC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,IAAM,qBAAqBA,GAAE,KAAK,CAAC,eAAe,UAAU,CAAC;AAE7D,IAAM,eAAeA,GAAE,OAAO;AAAA,EACnC,SAASA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACtC,OAAOA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACpC,OAAOA,GAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAC;AAChC,CAAC;AAEM,IAAM,0BAA0BA,GAAE,OAAOA,GAAE,OAAO,GAAG,YAAY;AAEjE,IAAM,iBAAiBA,GAAE,OAAO;AAAA,EACrC,YAAYA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACnC,uBAAuBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC9C,6BAA6BA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,qBAAqBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC5C,6BAA6BA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACpD,cAAcA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,cAAcA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EACrC,oBAAoBA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAAA,EAC3C,SAASA,GAAE,KAAK,CAAC,QAAQ,MAAM,CAAC;AAClC,CAAC;AAEM,IAAM,yBAAyBA,GAAE,OAAO;AAAA,EAC7C,cAAc;AAAA,EACd,UAAUA,GAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EACpC,UAAUA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACvC,QAAQA,GAAE,QAAQ;AACpB,CAAC;AAKM,IAAM,2BAA2BA,GAAE,OAAO;AAAA,EAC/C,gBAAgBA,GAAE,QAAQ,CAAC;AAAA,EAC3B,cAAcA,GAAE,OAAO;AAAA,EACvB,QAAQ;AAAA,EACR,OAAOA,GAAE,OAAO;AAAA,EAChB,cAAc;AAAA,EACd,SAASA,GAAE,OAAO;AAAA,EAClB,sBAAsBA,GAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAChD,qBAAqBA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAClD,oBAAoBA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACjD,eAAeA,GAAE,OAAO;AAAA,EACxB,qBAAqBA,GAAE,OAAO;AAAA,EAC9B,YAAYA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACzC,eAAeA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAC5C,iBAAiB;AAAA,EACjB,eAAe;AAAA,EACf,uBAAuB;AAAA,EACvB,qBAAqB;AAAA,EACrB,qBAAqBA,GAAE,OAAOA,GAAE,OAAO,GAAGA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY,CAAC;AAAA,EACxE,0BAA0BA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EACvD,oBAAoB;AAAA,EACpB,uBAAuBA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA,EACzC,8BAA8BA,GAAE,OAAO,EAAE,IAAI,EAAE,YAAY;AAAA,EAC3D,WAAW;AAAA,EACX,OAAOA,GAAE,MAAMA,GAAE,OAAO,CAAC;AAAA,EACzB,kBAAkB,sBAAsB,SAAS;AACnD,CAAC;","names":["z","z"]}
@@ -1,5 +1,6 @@
1
1
  import { CalibrationReceipt, Architecture, DecisionVocabBar, Recall, PerCategoryRecall, PassFail, StatusLabel } from './receipt-schema.js';
2
2
  import 'zod';
3
+ import '../reviewer-options-schema-PZacF_MO.js';
3
4
 
4
5
  declare function computeDecisionVocabBar(architecture: Architecture, decisionsProducedCount: number): DecisionVocabBar;
5
6
  declare function computePassFail(input: {
@@ -71,6 +71,24 @@ function receiptToCalibrationSummary(receipt) {
71
71
  notes: `status=${receipt.status} model=${receipt.model} arch=${receipt.architecture} overall=${receipt.pass_fail.overall} decisions=${receipt.decisions_produced_count}/6`
72
72
  };
73
73
  }
74
+ var REVIEWER_OPTIONS_KEY_ORDER = [
75
+ "num_ctx",
76
+ "temperature",
77
+ "seed",
78
+ "top_p",
79
+ "top_k",
80
+ "repeat_penalty"
81
+ ];
82
+ function buildReviewerOptionsSection(opts) {
83
+ if (!opts) return "";
84
+ const lines = REVIEWER_OPTIONS_KEY_ORDER.filter((k) => opts[k] !== void 0).map((k) => `- ${k}: ${opts[k]}`);
85
+ if (lines.length === 0) return "";
86
+ return `
87
+ ## Reviewer options
88
+
89
+ ${lines.join("\n")}
90
+ `;
91
+ }
74
92
  function buildReceiptMarkdown(r) {
75
93
  const pct = (ratio) => `${Math.round(ratio * 100)}%`;
76
94
  const runtimeSec = (r.runtime_ms / 1e3).toFixed(1);
@@ -97,6 +115,7 @@ function buildReceiptMarkdown(r) {
97
115
 
98
116
  ${r.notes.map((n) => `- ${n}`).join("\n")}
99
117
  ` : "";
118
+ const reviewerOptionsSection = buildReviewerOptionsSection(r.reviewer_options);
100
119
  return `# Calibration Receipt \u2014 ${r.profile_name}
101
120
 
102
121
  - **Model:** ${r.model}
@@ -106,7 +125,7 @@ ${r.notes.map((n) => `- ${n}`).join("\n")}
106
125
  - **Calibrated at:** ${r.calibrated_at}
107
126
  - **Research-OS version:** ${r.research_os_version}
108
127
  - **Runtime:** ${runtimeSec} seconds
109
-
128
+ ${reviewerOptionsSection}
110
129
  ## Headline metrics
111
130
 
112
131
  - FP: ${r.good_fp_count} / ${r.fixture_good_claims}
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/calibration/receipt.ts"],"sourcesContent":["import type {\n Architecture,\n CalibrationReceipt,\n DecisionVocabBar,\n PassFail,\n PerCategoryRecall,\n Recall,\n StatusLabel,\n} from './receipt-schema.js';\n\n// Architecture-aware decision-vocab bar.\n// single-pass: narrow_critic pass is absent, so model uses full 6-decision\n// vocabulary. Bar: >= 4.\n// two-pass: narrow_critic collapses needs_human_review into harder decisions,\n// reducing diversity. Bar: >= 3 (F-49 resolution).\nexport function computeDecisionVocabBar(\n architecture: Architecture,\n decisionsProducedCount: number,\n): DecisionVocabBar {\n const required = architecture === 'two-pass' ? 3 : 4;\n return {\n architecture,\n required,\n produced: decisionsProducedCount,\n passed: decisionsProducedCount >= required,\n };\n}\n\n// Per-category any-flag floor: seeded categories with total >= 2 must have\n// ratio >= 0.50. Categories with fewer than 2 seeds are excluded (not enough\n// signal to enforce a floor — e.g. a 1-seed category with 0 misses is fine).\nfunction computePerCategoryFloor(perCategoryAnyFlag: PerCategoryRecall): 'PASS' | 'FAIL' {\n for (const [, recall] of Object.entries(perCategoryAnyFlag)) {\n if (recall.total >= 2 && recall.ratio < 0.5) return 'FAIL';\n }\n return 'PASS';\n}\n\nexport function computePassFail(input: {\n good_fp_count: number;\n any_flag_recall: Recall;\n per_category_any_flag: PerCategoryRecall;\n strict_recall: Recall;\n decision_vocab_bar: DecisionVocabBar;\n runtime_ms: number;\n empty_or_malformed_responses: number;\n}): PassFail {\n const fp_ceiling = input.good_fp_count <= 1 ? 'PASS' : 'FAIL';\n const any_flag_recall_floor = input.any_flag_recall.ratio >= 0.65 ? 'PASS' : 'FAIL';\n const per_category_any_flag_floor = computePerCategoryFloor(input.per_category_any_flag);\n const strict_recall_floor = input.strict_recall.ratio >= 0.2 ? 'PASS' : 'FAIL';\n const decision_vocab_completeness = input.decision_vocab_bar.passed ? 'PASS' : 'FAIL';\n // Latency soft: warn-only, never FAIL\n const latency_soft = input.runtime_ms <= 600_000 ? 'PASS' : 'WARN';\n const latency_hard = input.runtime_ms <= 1_200_000 ? 'PASS' : 'FAIL';\n const empty_or_malformed = input.empty_or_malformed_responses === 0 ? 'PASS' : 'FAIL';\n\n const hardBars: Array<'PASS' | 'FAIL'> = [\n fp_ceiling,\n any_flag_recall_floor,\n per_category_any_flag_floor,\n strict_recall_floor,\n decision_vocab_completeness,\n latency_hard,\n empty_or_malformed,\n ];\n const overall = hardBars.every((v) => v === 'PASS') ? 'PASS' : 'FAIL';\n\n return {\n fp_ceiling,\n any_flag_recall_floor,\n per_category_any_flag_floor,\n strict_recall_floor,\n decision_vocab_completeness,\n latency_soft,\n latency_hard,\n empty_or_malformed,\n overall,\n };\n}\n\n// Status-label assignment (advisor-locked predicates).\n//\n// Priority order:\n// 1. comparison_only — explicit flag OR single-pass Hermes (architectural side-run)\n// 2. failed — any hard bar FAIL\n// 3. trusted_baseline — canonical Hermes two-pass with PASS + FP=0\n// 4. conditional_pass — everything else that passes bars\n//\n// trusted_baseline encodes the canonical Hermes two-pass admission: the profile\n// is a named Hermes model run in two-pass architecture, all bars pass, and FP=0.\n// Hermes family is detected by case-insensitive substring match on profile name.\n//\n// conditional_pass is the admission status for non-baseline profiles that pass\n// all hard bars but carry a caution (FP at ceiling, non-hermes model, etc.).\n// mistral-nemo:12b two-pass = conditional_pass (FP=1, passes recalibrated bars).\nexport function computeStatusLabel(input: {\n profileName: string;\n architecture: Architecture;\n passFail: PassFail;\n goodFpCount: number;\n modeOverride?: 'comparison_only';\n}): StatusLabel {\n // comparison_only: explicit operator flag\n if (input.modeOverride === 'comparison_only') return 'comparison_only';\n\n // comparison_only: single-pass Hermes is an architectural side-run by design\n // (the canonical profile is two-pass; single-pass exists only for comparison)\n if (input.architecture === 'single-pass' && /hermes/i.test(input.profileName)) {\n return 'comparison_only';\n }\n\n // failed: any hard bar fails (latency_soft is WARN-only, never blocks)\n if (input.passFail.overall === 'FAIL') return 'failed';\n\n // trusted_baseline: canonical Hermes two-pass profile with perfect FP\n // Predicate: profile name contains \"hermes\" (case-insensitive) AND\n // architecture is two-pass AND all bars pass AND FP = 0\n const isHermesTwoPass =\n /hermes/i.test(input.profileName) && input.architecture === 'two-pass';\n if (isHermesTwoPass && input.goodFpCount === 0) return 'trusted_baseline';\n\n // conditional_pass: passes all bars but carries caution\n // (FP at ceiling, non-baseline profile, or non-hermes model)\n return 'conditional_pass';\n}\n\n// Map a receipt to the PromotionCalibrationSummary string shape used by\n// review-active.json. Called by the review-promote CLI when auto-populating\n// calibration_summary from a persisted receipt.\nexport function receiptToCalibrationSummary(receipt: CalibrationReceipt): {\n fixture: string | null;\n good_false_positive_rate: string | null;\n bad_any_flag_recall: string | null;\n strict_category_recall: string | null;\n unsupported_claim_recall: string | null;\n notes: string | null;\n} {\n const fp = receipt.good_fp_count;\n const fpTotal = receipt.fixture_good_claims;\n const fpPct = fpTotal > 0 ? Math.round((fp / fpTotal) * 100) : 0;\n\n const af = receipt.any_flag_recall;\n const sr = receipt.strict_recall;\n const unsupported = receipt.per_category_any_flag['unsupported_claim'];\n\n return {\n fixture: receipt.fixture,\n good_false_positive_rate: `${fp}/${fpTotal} (${fpPct}%)`,\n bad_any_flag_recall: `${af.matched}/${af.total} (${Math.round(af.ratio * 100)}%)`,\n strict_category_recall: `${sr.matched}/${sr.total} (${Math.round(sr.ratio * 100)}%)`,\n unsupported_claim_recall: unsupported\n ? `${unsupported.matched}/${unsupported.total} (${Math.round(unsupported.ratio * 100)}%)`\n : null,\n notes: `status=${receipt.status} model=${receipt.model} arch=${receipt.architecture} overall=${receipt.pass_fail.overall} decisions=${receipt.decisions_produced_count}/6`,\n };\n}\n\n// Render a compact Markdown receipt. Operator proof artifact — no prose.\nexport function buildReceiptMarkdown(r: CalibrationReceipt): string {\n const pct = (ratio: number) => `${Math.round(ratio * 100)}%`;\n const runtimeSec = (r.runtime_ms / 1000).toFixed(1);\n\n const perCatRows = Object.entries(r.per_category_any_flag)\n .map(([cat, af]) => {\n const st = r.per_category_strict[cat] ?? { matched: 0, total: af.total, ratio: 0 };\n return `| ${cat} | ${af.matched}/${af.total} (${pct(af.ratio)}) | ${st.matched}/${st.total} (${pct(st.ratio)}) |`;\n })\n .join('\\n');\n\n const dvRows = [\n 'accepted_for_synthesis',\n 'rejected',\n 'needs_scope_repair',\n 'needs_source_repair',\n 'needs_contradiction_mapping',\n 'needs_human_review',\n ]\n .map((d) => {\n const count = r.decision_vocabulary[d] ?? 0;\n const unreachable = r.unreachable_decisions.includes(d) ? ` (unreachable from ${r.fixture})` : '';\n return `| ${d} | ${count}${unreachable} |`;\n })\n .join('\\n');\n\n const pf = r.pass_fail;\n const bar = r.decision_vocab_bar;\n\n const notesSection =\n r.notes.length > 0 ? `\\n## Notes\\n\\n${r.notes.map((n) => `- ${n}`).join('\\n')}\\n` : '';\n\n return `# Calibration Receipt — ${r.profile_name}\n\n- **Model:** ${r.model}\n- **Architecture:** ${r.architecture}\n- **Status:** ${r.status}\n- **Fixture:** ${r.fixture} (${r.fixture_total_claims} claims = ${r.fixture_good_claims} good + ${r.fixture_bad_claims} bad)\n- **Calibrated at:** ${r.calibrated_at}\n- **Research-OS version:** ${r.research_os_version}\n- **Runtime:** ${runtimeSec} seconds\n\n## Headline metrics\n\n- FP: ${r.good_fp_count} / ${r.fixture_good_claims}\n- Any-flag recall: ${r.any_flag_recall.matched} / ${r.any_flag_recall.total} (${pct(r.any_flag_recall.ratio)})\n- Strict recall: ${r.strict_recall.matched} / ${r.strict_recall.total} (${pct(r.strict_recall.ratio)})\n- Decisions produced: ${r.decisions_produced_count} / 6\n\n## PASS / FAIL\n\n| Bar | Result |\n|---|---|\n| FP ceiling (≤1) | ${pf.fp_ceiling} |\n| Any-flag recall (≥65%) | ${pf.any_flag_recall_floor} |\n| Per-category any-flag (≥50%) | ${pf.per_category_any_flag_floor} |\n| Strict recall (≥20%) | ${pf.strict_recall_floor} |\n| Decision vocab (${bar.architecture} ≥ ${bar.required}) | ${pf.decision_vocab_completeness} |\n| Latency soft (≤10 min) | ${pf.latency_soft} |\n| Latency hard (≤20 min) | ${pf.latency_hard} |\n| Empty/malformed (=0) | ${pf.empty_or_malformed} |\n| **OVERALL** | **${pf.overall}** |\n\n## Per-category recall\n\n| Category | Any-flag | Strict |\n|---|---|---|\n${perCatRows}\n\n## Decision vocabulary\n\n| Decision | Count |\n|---|---:|\n${dvRows}\n${notesSection}`;\n}\n"],"mappings":";AAeO,SAAS,wBACd,cACA,wBACkB;AAClB,QAAM,WAAW,iBAAiB,aAAa,IAAI;AACnD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,UAAU;AAAA,IACV,QAAQ,0BAA0B;AAAA,EACpC;AACF;AAKA,SAAS,wBAAwB,oBAAwD;AACvF,aAAW,CAAC,EAAE,MAAM,KAAK,OAAO,QAAQ,kBAAkB,GAAG;AAC3D,QAAI,OAAO,SAAS,KAAK,OAAO,QAAQ,IAAK,QAAO;AAAA,EACtD;AACA,SAAO;AACT;AAEO,SAAS,gBAAgB,OAQnB;AACX,QAAM,aAAa,MAAM,iBAAiB,IAAI,SAAS;AACvD,QAAM,wBAAwB,MAAM,gBAAgB,SAAS,OAAO,SAAS;AAC7E,QAAM,8BAA8B,wBAAwB,MAAM,qBAAqB;AACvF,QAAM,sBAAsB,MAAM,cAAc,SAAS,MAAM,SAAS;AACxE,QAAM,8BAA8B,MAAM,mBAAmB,SAAS,SAAS;AAE/E,QAAM,eAAe,MAAM,cAAc,MAAU,SAAS;AAC5D,QAAM,eAAe,MAAM,cAAc,OAAY,SAAS;AAC9D,QAAM,qBAAqB,MAAM,iCAAiC,IAAI,SAAS;AAE/E,QAAM,WAAmC;AAAA,IACvC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,UAAU,SAAS,MAAM,CAAC,MAAM,MAAM,MAAM,IAAI,SAAS;AAE/D,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAiBO,SAAS,mBAAmB,OAMnB;AAEd,MAAI,MAAM,iBAAiB,kBAAmB,QAAO;AAIrD,MAAI,MAAM,iBAAiB,iBAAiB,UAAU,KAAK,MAAM,WAAW,GAAG;AAC7E,WAAO;AAAA,EACT;AAGA,MAAI,MAAM,SAAS,YAAY,OAAQ,QAAO;AAK9C,QAAM,kBACJ,UAAU,KAAK,MAAM,WAAW,KAAK,MAAM,iBAAiB;AAC9D,MAAI,mBAAmB,MAAM,gBAAgB,EAAG,QAAO;AAIvD,SAAO;AACT;AAKO,SAAS,4BAA4B,SAO1C;AACA,QAAM,KAAK,QAAQ;AACnB,QAAM,UAAU,QAAQ;AACxB,QAAM,QAAQ,UAAU,IAAI,KAAK,MAAO,KAAK,UAAW,GAAG,IAAI;AAE/D,QAAM,KAAK,QAAQ;AACnB,QAAM,KAAK,QAAQ;AACnB,QAAM,cAAc,QAAQ,sBAAsB,mBAAmB;AAErE,SAAO;AAAA,IACL,SAAS,QAAQ;AAAA,IACjB,0BAA0B,GAAG,EAAE,IAAI,OAAO,KAAK,KAAK;AAAA,IACpD,qBAAqB,GAAG,GAAG,OAAO,IAAI,GAAG,KAAK,KAAK,KAAK,MAAM,GAAG,QAAQ,GAAG,CAAC;AAAA,IAC7E,wBAAwB,GAAG,GAAG,OAAO,IAAI,GAAG,KAAK,KAAK,KAAK,MAAM,GAAG,QAAQ,GAAG,CAAC;AAAA,IAChF,0BAA0B,cACtB,GAAG,YAAY,OAAO,IAAI,YAAY,KAAK,KAAK,KAAK,MAAM,YAAY,QAAQ,GAAG,CAAC,OACnF;AAAA,IACJ,OAAO,UAAU,QAAQ,MAAM,UAAU,QAAQ,KAAK,SAAS,QAAQ,YAAY,YAAY,QAAQ,UAAU,OAAO,cAAc,QAAQ,wBAAwB;AAAA,EACxK;AACF;AAGO,SAAS,qBAAqB,GAA+B;AAClE,QAAM,MAAM,CAAC,UAAkB,GAAG,KAAK,MAAM,QAAQ,GAAG,CAAC;AACzD,QAAM,cAAc,EAAE,aAAa,KAAM,QAAQ,CAAC;AAElD,QAAM,aAAa,OAAO,QAAQ,EAAE,qBAAqB,EACtD,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM;AAClB,UAAM,KAAK,EAAE,oBAAoB,GAAG,KAAK,EAAE,SAAS,GAAG,OAAO,GAAG,OAAO,OAAO,EAAE;AACjF,WAAO,KAAK,GAAG,MAAM,GAAG,OAAO,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,CAAC,OAAO,GAAG,OAAO,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,CAAC;AAAA,EAC9G,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,SAAS;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EACG,IAAI,CAAC,MAAM;AACV,UAAM,QAAQ,EAAE,oBAAoB,CAAC,KAAK;AAC1C,UAAM,cAAc,EAAE,sBAAsB,SAAS,CAAC,IAAI,sBAAsB,EAAE,OAAO,MAAM;AAC/F,WAAO,KAAK,CAAC,MAAM,KAAK,GAAG,WAAW;AAAA,EACxC,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,KAAK,EAAE;AACb,QAAM,MAAM,EAAE;AAEd,QAAM,eACJ,EAAE,MAAM,SAAS,IAAI;AAAA;AAAA;AAAA,EAAiB,EAAE,MAAM,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,IAAO;AAEtF,SAAO,gCAA2B,EAAE,YAAY;AAAA;AAAA,eAEnC,EAAE,KAAK;AAAA,sBACA,EAAE,YAAY;AAAA,gBACpB,EAAE,MAAM;AAAA,iBACP,EAAE,OAAO,KAAK,EAAE,oBAAoB,aAAa,EAAE,mBAAmB,WAAW,EAAE,kBAAkB;AAAA,uBAC/F,EAAE,aAAa;AAAA,6BACT,EAAE,mBAAmB;AAAA,iBACjC,UAAU;AAAA;AAAA;AAAA;AAAA,QAInB,EAAE,aAAa,MAAM,EAAE,mBAAmB;AAAA,qBAC7B,EAAE,gBAAgB,OAAO,MAAM,EAAE,gBAAgB,KAAK,KAAK,IAAI,EAAE,gBAAgB,KAAK,CAAC;AAAA,mBACzF,EAAE,cAAc,OAAO,MAAM,EAAE,cAAc,KAAK,KAAK,IAAI,EAAE,cAAc,KAAK,CAAC;AAAA,wBAC5E,EAAE,wBAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,2BAM5B,GAAG,UAAU;AAAA,kCACN,GAAG,qBAAqB;AAAA,wCAClB,GAAG,2BAA2B;AAAA,gCACtC,GAAG,mBAAmB;AAAA,oBAC7B,IAAI,YAAY,WAAM,IAAI,QAAQ,OAAO,GAAG,2BAA2B;AAAA,kCAC9D,GAAG,YAAY;AAAA,kCACf,GAAG,YAAY;AAAA,2BACjB,GAAG,kBAAkB;AAAA,oBAC5B,GAAG,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAM5B,UAAU;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMV,MAAM;AAAA,EACN,YAAY;AACd;","names":[]}
1
+ {"version":3,"sources":["../../src/calibration/receipt.ts"],"sourcesContent":["import type {\n Architecture,\n CalibrationReceipt,\n DecisionVocabBar,\n PassFail,\n PerCategoryRecall,\n Recall,\n ReviewerOptions,\n StatusLabel,\n} from './receipt-schema.js';\n\n// Architecture-aware decision-vocab bar.\n// single-pass: narrow_critic pass is absent, so model uses full 6-decision\n// vocabulary. Bar: >= 4.\n// two-pass: narrow_critic collapses needs_human_review into harder decisions,\n// reducing diversity. Bar: >= 3 (F-49 resolution).\nexport function computeDecisionVocabBar(\n architecture: Architecture,\n decisionsProducedCount: number,\n): DecisionVocabBar {\n const required = architecture === 'two-pass' ? 3 : 4;\n return {\n architecture,\n required,\n produced: decisionsProducedCount,\n passed: decisionsProducedCount >= required,\n };\n}\n\n// Per-category any-flag floor: seeded categories with total >= 2 must have\n// ratio >= 0.50. Categories with fewer than 2 seeds are excluded (not enough\n// signal to enforce a floor — e.g. a 1-seed category with 0 misses is fine).\nfunction computePerCategoryFloor(perCategoryAnyFlag: PerCategoryRecall): 'PASS' | 'FAIL' {\n for (const [, recall] of Object.entries(perCategoryAnyFlag)) {\n if (recall.total >= 2 && recall.ratio < 0.5) return 'FAIL';\n }\n return 'PASS';\n}\n\nexport function computePassFail(input: {\n good_fp_count: number;\n any_flag_recall: Recall;\n per_category_any_flag: PerCategoryRecall;\n strict_recall: Recall;\n decision_vocab_bar: DecisionVocabBar;\n runtime_ms: number;\n empty_or_malformed_responses: number;\n}): PassFail {\n const fp_ceiling = input.good_fp_count <= 1 ? 'PASS' : 'FAIL';\n const any_flag_recall_floor = input.any_flag_recall.ratio >= 0.65 ? 'PASS' : 'FAIL';\n const per_category_any_flag_floor = computePerCategoryFloor(input.per_category_any_flag);\n const strict_recall_floor = input.strict_recall.ratio >= 0.2 ? 'PASS' : 'FAIL';\n const decision_vocab_completeness = input.decision_vocab_bar.passed ? 'PASS' : 'FAIL';\n // Latency soft: warn-only, never FAIL\n const latency_soft = input.runtime_ms <= 600_000 ? 'PASS' : 'WARN';\n const latency_hard = input.runtime_ms <= 1_200_000 ? 'PASS' : 'FAIL';\n const empty_or_malformed = input.empty_or_malformed_responses === 0 ? 'PASS' : 'FAIL';\n\n const hardBars: Array<'PASS' | 'FAIL'> = [\n fp_ceiling,\n any_flag_recall_floor,\n per_category_any_flag_floor,\n strict_recall_floor,\n decision_vocab_completeness,\n latency_hard,\n empty_or_malformed,\n ];\n const overall = hardBars.every((v) => v === 'PASS') ? 'PASS' : 'FAIL';\n\n return {\n fp_ceiling,\n any_flag_recall_floor,\n per_category_any_flag_floor,\n strict_recall_floor,\n decision_vocab_completeness,\n latency_soft,\n latency_hard,\n empty_or_malformed,\n overall,\n };\n}\n\n// Status-label assignment (advisor-locked predicates).\n//\n// Priority order:\n// 1. comparison_only — explicit flag OR single-pass Hermes (architectural side-run)\n// 2. failed — any hard bar FAIL\n// 3. trusted_baseline — canonical Hermes two-pass with PASS + FP=0\n// 4. conditional_pass — everything else that passes bars\n//\n// trusted_baseline encodes the canonical Hermes two-pass admission: the profile\n// is a named Hermes model run in two-pass architecture, all bars pass, and FP=0.\n// Hermes family is detected by case-insensitive substring match on profile name.\n//\n// conditional_pass is the admission status for non-baseline profiles that pass\n// all hard bars but carry a caution (FP at ceiling, non-hermes model, etc.).\n// mistral-nemo:12b two-pass = conditional_pass (FP=1, passes recalibrated bars).\nexport function computeStatusLabel(input: {\n profileName: string;\n architecture: Architecture;\n passFail: PassFail;\n goodFpCount: number;\n modeOverride?: 'comparison_only';\n}): StatusLabel {\n // comparison_only: explicit operator flag\n if (input.modeOverride === 'comparison_only') return 'comparison_only';\n\n // comparison_only: single-pass Hermes is an architectural side-run by design\n // (the canonical profile is two-pass; single-pass exists only for comparison)\n if (input.architecture === 'single-pass' && /hermes/i.test(input.profileName)) {\n return 'comparison_only';\n }\n\n // failed: any hard bar fails (latency_soft is WARN-only, never blocks)\n if (input.passFail.overall === 'FAIL') return 'failed';\n\n // trusted_baseline: canonical Hermes two-pass profile with perfect FP\n // Predicate: profile name contains \"hermes\" (case-insensitive) AND\n // architecture is two-pass AND all bars pass AND FP = 0\n const isHermesTwoPass =\n /hermes/i.test(input.profileName) && input.architecture === 'two-pass';\n if (isHermesTwoPass && input.goodFpCount === 0) return 'trusted_baseline';\n\n // conditional_pass: passes all bars but carries caution\n // (FP at ceiling, non-baseline profile, or non-hermes model)\n return 'conditional_pass';\n}\n\n// Map a receipt to the PromotionCalibrationSummary string shape used by\n// review-active.json. Called by the review-promote CLI when auto-populating\n// calibration_summary from a persisted receipt.\nexport function receiptToCalibrationSummary(receipt: CalibrationReceipt): {\n fixture: string | null;\n good_false_positive_rate: string | null;\n bad_any_flag_recall: string | null;\n strict_category_recall: string | null;\n unsupported_claim_recall: string | null;\n notes: string | null;\n} {\n const fp = receipt.good_fp_count;\n const fpTotal = receipt.fixture_good_claims;\n const fpPct = fpTotal > 0 ? Math.round((fp / fpTotal) * 100) : 0;\n\n const af = receipt.any_flag_recall;\n const sr = receipt.strict_recall;\n const unsupported = receipt.per_category_any_flag['unsupported_claim'];\n\n return {\n fixture: receipt.fixture,\n good_false_positive_rate: `${fp}/${fpTotal} (${fpPct}%)`,\n bad_any_flag_recall: `${af.matched}/${af.total} (${Math.round(af.ratio * 100)}%)`,\n strict_category_recall: `${sr.matched}/${sr.total} (${Math.round(sr.ratio * 100)}%)`,\n unsupported_claim_recall: unsupported\n ? `${unsupported.matched}/${unsupported.total} (${Math.round(unsupported.ratio * 100)}%)`\n : null,\n notes: `status=${receipt.status} model=${receipt.model} arch=${receipt.architecture} overall=${receipt.pass_fail.overall} decisions=${receipt.decisions_produced_count}/6`,\n };\n}\n\n// Stable key order for reviewer_options rendering.\n// Keys are rendered only when explicitly set (omitted keys stay absent).\nconst REVIEWER_OPTIONS_KEY_ORDER: (keyof ReviewerOptions)[] = [\n 'num_ctx',\n 'temperature',\n 'seed',\n 'top_p',\n 'top_k',\n 'repeat_penalty',\n];\n\n// Render a \"Reviewer options\" section when reviewer_options is present and non-empty.\n// Returns empty string when absent or empty (absence IS the disclosure for stochastic runs).\nfunction buildReviewerOptionsSection(opts: ReviewerOptions | undefined): string {\n if (!opts) return '';\n const lines = REVIEWER_OPTIONS_KEY_ORDER\n .filter((k) => opts[k] !== undefined)\n .map((k) => `- ${k}: ${opts[k]}`);\n if (lines.length === 0) return '';\n return `\\n## Reviewer options\\n\\n${lines.join('\\n')}\\n`;\n}\n\n// Render a compact Markdown receipt. Operator proof artifact — no prose.\nexport function buildReceiptMarkdown(r: CalibrationReceipt): string {\n const pct = (ratio: number) => `${Math.round(ratio * 100)}%`;\n const runtimeSec = (r.runtime_ms / 1000).toFixed(1);\n\n const perCatRows = Object.entries(r.per_category_any_flag)\n .map(([cat, af]) => {\n const st = r.per_category_strict[cat] ?? { matched: 0, total: af.total, ratio: 0 };\n return `| ${cat} | ${af.matched}/${af.total} (${pct(af.ratio)}) | ${st.matched}/${st.total} (${pct(st.ratio)}) |`;\n })\n .join('\\n');\n\n const dvRows = [\n 'accepted_for_synthesis',\n 'rejected',\n 'needs_scope_repair',\n 'needs_source_repair',\n 'needs_contradiction_mapping',\n 'needs_human_review',\n ]\n .map((d) => {\n const count = r.decision_vocabulary[d] ?? 0;\n const unreachable = r.unreachable_decisions.includes(d) ? ` (unreachable from ${r.fixture})` : '';\n return `| ${d} | ${count}${unreachable} |`;\n })\n .join('\\n');\n\n const pf = r.pass_fail;\n const bar = r.decision_vocab_bar;\n\n const notesSection =\n r.notes.length > 0 ? `\\n## Notes\\n\\n${r.notes.map((n) => `- ${n}`).join('\\n')}\\n` : '';\n\n const reviewerOptionsSection = buildReviewerOptionsSection(r.reviewer_options);\n\n return `# Calibration Receipt — ${r.profile_name}\n\n- **Model:** ${r.model}\n- **Architecture:** ${r.architecture}\n- **Status:** ${r.status}\n- **Fixture:** ${r.fixture} (${r.fixture_total_claims} claims = ${r.fixture_good_claims} good + ${r.fixture_bad_claims} bad)\n- **Calibrated at:** ${r.calibrated_at}\n- **Research-OS version:** ${r.research_os_version}\n- **Runtime:** ${runtimeSec} seconds\n${reviewerOptionsSection}\n## Headline metrics\n\n- FP: ${r.good_fp_count} / ${r.fixture_good_claims}\n- Any-flag recall: ${r.any_flag_recall.matched} / ${r.any_flag_recall.total} (${pct(r.any_flag_recall.ratio)})\n- Strict recall: ${r.strict_recall.matched} / ${r.strict_recall.total} (${pct(r.strict_recall.ratio)})\n- Decisions produced: ${r.decisions_produced_count} / 6\n\n## PASS / FAIL\n\n| Bar | Result |\n|---|---|\n| FP ceiling (≤1) | ${pf.fp_ceiling} |\n| Any-flag recall (≥65%) | ${pf.any_flag_recall_floor} |\n| Per-category any-flag (≥50%) | ${pf.per_category_any_flag_floor} |\n| Strict recall (≥20%) | ${pf.strict_recall_floor} |\n| Decision vocab (${bar.architecture} ≥ ${bar.required}) | ${pf.decision_vocab_completeness} |\n| Latency soft (≤10 min) | ${pf.latency_soft} |\n| Latency hard (≤20 min) | ${pf.latency_hard} |\n| Empty/malformed (=0) | ${pf.empty_or_malformed} |\n| **OVERALL** | **${pf.overall}** |\n\n## Per-category recall\n\n| Category | Any-flag | Strict |\n|---|---|---|\n${perCatRows}\n\n## Decision vocabulary\n\n| Decision | Count |\n|---|---:|\n${dvRows}\n${notesSection}`;\n}\n"],"mappings":";AAgBO,SAAS,wBACd,cACA,wBACkB;AAClB,QAAM,WAAW,iBAAiB,aAAa,IAAI;AACnD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,UAAU;AAAA,IACV,QAAQ,0BAA0B;AAAA,EACpC;AACF;AAKA,SAAS,wBAAwB,oBAAwD;AACvF,aAAW,CAAC,EAAE,MAAM,KAAK,OAAO,QAAQ,kBAAkB,GAAG;AAC3D,QAAI,OAAO,SAAS,KAAK,OAAO,QAAQ,IAAK,QAAO;AAAA,EACtD;AACA,SAAO;AACT;AAEO,SAAS,gBAAgB,OAQnB;AACX,QAAM,aAAa,MAAM,iBAAiB,IAAI,SAAS;AACvD,QAAM,wBAAwB,MAAM,gBAAgB,SAAS,OAAO,SAAS;AAC7E,QAAM,8BAA8B,wBAAwB,MAAM,qBAAqB;AACvF,QAAM,sBAAsB,MAAM,cAAc,SAAS,MAAM,SAAS;AACxE,QAAM,8BAA8B,MAAM,mBAAmB,SAAS,SAAS;AAE/E,QAAM,eAAe,MAAM,cAAc,MAAU,SAAS;AAC5D,QAAM,eAAe,MAAM,cAAc,OAAY,SAAS;AAC9D,QAAM,qBAAqB,MAAM,iCAAiC,IAAI,SAAS;AAE/E,QAAM,WAAmC;AAAA,IACvC;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACA,QAAM,UAAU,SAAS,MAAM,CAAC,MAAM,MAAM,MAAM,IAAI,SAAS;AAE/D,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AACF;AAiBO,SAAS,mBAAmB,OAMnB;AAEd,MAAI,MAAM,iBAAiB,kBAAmB,QAAO;AAIrD,MAAI,MAAM,iBAAiB,iBAAiB,UAAU,KAAK,MAAM,WAAW,GAAG;AAC7E,WAAO;AAAA,EACT;AAGA,MAAI,MAAM,SAAS,YAAY,OAAQ,QAAO;AAK9C,QAAM,kBACJ,UAAU,KAAK,MAAM,WAAW,KAAK,MAAM,iBAAiB;AAC9D,MAAI,mBAAmB,MAAM,gBAAgB,EAAG,QAAO;AAIvD,SAAO;AACT;AAKO,SAAS,4BAA4B,SAO1C;AACA,QAAM,KAAK,QAAQ;AACnB,QAAM,UAAU,QAAQ;AACxB,QAAM,QAAQ,UAAU,IAAI,KAAK,MAAO,KAAK,UAAW,GAAG,IAAI;AAE/D,QAAM,KAAK,QAAQ;AACnB,QAAM,KAAK,QAAQ;AACnB,QAAM,cAAc,QAAQ,sBAAsB,mBAAmB;AAErE,SAAO;AAAA,IACL,SAAS,QAAQ;AAAA,IACjB,0BAA0B,GAAG,EAAE,IAAI,OAAO,KAAK,KAAK;AAAA,IACpD,qBAAqB,GAAG,GAAG,OAAO,IAAI,GAAG,KAAK,KAAK,KAAK,MAAM,GAAG,QAAQ,GAAG,CAAC;AAAA,IAC7E,wBAAwB,GAAG,GAAG,OAAO,IAAI,GAAG,KAAK,KAAK,KAAK,MAAM,GAAG,QAAQ,GAAG,CAAC;AAAA,IAChF,0BAA0B,cACtB,GAAG,YAAY,OAAO,IAAI,YAAY,KAAK,KAAK,KAAK,MAAM,YAAY,QAAQ,GAAG,CAAC,OACnF;AAAA,IACJ,OAAO,UAAU,QAAQ,MAAM,UAAU,QAAQ,KAAK,SAAS,QAAQ,YAAY,YAAY,QAAQ,UAAU,OAAO,cAAc,QAAQ,wBAAwB;AAAA,EACxK;AACF;AAIA,IAAM,6BAAwD;AAAA,EAC5D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAIA,SAAS,4BAA4B,MAA2C;AAC9E,MAAI,CAAC,KAAM,QAAO;AAClB,QAAM,QAAQ,2BACX,OAAO,CAAC,MAAM,KAAK,CAAC,MAAM,MAAS,EACnC,IAAI,CAAC,MAAM,KAAK,CAAC,KAAK,KAAK,CAAC,CAAC,EAAE;AAClC,MAAI,MAAM,WAAW,EAAG,QAAO;AAC/B,SAAO;AAAA;AAAA;AAAA,EAA4B,MAAM,KAAK,IAAI,CAAC;AAAA;AACrD;AAGO,SAAS,qBAAqB,GAA+B;AAClE,QAAM,MAAM,CAAC,UAAkB,GAAG,KAAK,MAAM,QAAQ,GAAG,CAAC;AACzD,QAAM,cAAc,EAAE,aAAa,KAAM,QAAQ,CAAC;AAElD,QAAM,aAAa,OAAO,QAAQ,EAAE,qBAAqB,EACtD,IAAI,CAAC,CAAC,KAAK,EAAE,MAAM;AAClB,UAAM,KAAK,EAAE,oBAAoB,GAAG,KAAK,EAAE,SAAS,GAAG,OAAO,GAAG,OAAO,OAAO,EAAE;AACjF,WAAO,KAAK,GAAG,MAAM,GAAG,OAAO,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,CAAC,OAAO,GAAG,OAAO,IAAI,GAAG,KAAK,KAAK,IAAI,GAAG,KAAK,CAAC;AAAA,EAC9G,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,SAAS;AAAA,IACb;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EACG,IAAI,CAAC,MAAM;AACV,UAAM,QAAQ,EAAE,oBAAoB,CAAC,KAAK;AAC1C,UAAM,cAAc,EAAE,sBAAsB,SAAS,CAAC,IAAI,sBAAsB,EAAE,OAAO,MAAM;AAC/F,WAAO,KAAK,CAAC,MAAM,KAAK,GAAG,WAAW;AAAA,EACxC,CAAC,EACA,KAAK,IAAI;AAEZ,QAAM,KAAK,EAAE;AACb,QAAM,MAAM,EAAE;AAEd,QAAM,eACJ,EAAE,MAAM,SAAS,IAAI;AAAA;AAAA;AAAA,EAAiB,EAAE,MAAM,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,EAAE,KAAK,IAAI,CAAC;AAAA,IAAO;AAEtF,QAAM,yBAAyB,4BAA4B,EAAE,gBAAgB;AAE7E,SAAO,gCAA2B,EAAE,YAAY;AAAA;AAAA,eAEnC,EAAE,KAAK;AAAA,sBACA,EAAE,YAAY;AAAA,gBACpB,EAAE,MAAM;AAAA,iBACP,EAAE,OAAO,KAAK,EAAE,oBAAoB,aAAa,EAAE,mBAAmB,WAAW,EAAE,kBAAkB;AAAA,uBAC/F,EAAE,aAAa;AAAA,6BACT,EAAE,mBAAmB;AAAA,iBACjC,UAAU;AAAA,EACzB,sBAAsB;AAAA;AAAA;AAAA,QAGhB,EAAE,aAAa,MAAM,EAAE,mBAAmB;AAAA,qBAC7B,EAAE,gBAAgB,OAAO,MAAM,EAAE,gBAAgB,KAAK,KAAK,IAAI,EAAE,gBAAgB,KAAK,CAAC;AAAA,mBACzF,EAAE,cAAc,OAAO,MAAM,EAAE,cAAc,KAAK,KAAK,IAAI,EAAE,cAAc,KAAK,CAAC;AAAA,wBAC5E,EAAE,wBAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,2BAM5B,GAAG,UAAU;AAAA,kCACN,GAAG,qBAAqB;AAAA,wCAClB,GAAG,2BAA2B;AAAA,gCACtC,GAAG,mBAAmB;AAAA,oBAC7B,IAAI,YAAY,WAAM,IAAI,QAAQ,OAAO,GAAG,2BAA2B;AAAA,kCAC9D,GAAG,YAAY;AAAA,kCACf,GAAG,YAAY;AAAA,2BACjB,GAAG,kBAAkB;AAAA,oBAC5B,GAAG,OAAO;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAM5B,UAAU;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAMV,MAAM;AAAA,EACN,YAAY;AACd;","names":[]}