mentar 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,619 @@
1
+ """
2
+ verify_numeric.py — Deterministic fraction/integer verifier for Mentar.
3
+
4
+ SAFETY-CRITICAL: per SPEC §15 Layer 2, every numeric/worked step the LLM generates
5
+ must be computationally verified BEFORE display. A wrong-but-confident verification
6
+ is a safety failure. Err on safe-reject over false-pass.
7
+
8
+ Supports T1.3 (eval-time scoring) and T3.5 (runtime serve-time gate).
9
+ Stdlib only — fractions.Fraction + re. No third-party deps.
10
+
11
+ Design decisions documented inline:
12
+ - Decimals (e.g. "0.5"): SAFE_REJECT. Not in pilot scope (SPEC §23, fractions.md
13
+ "Out of scope: decimal/fraction conversion"). Accepting decimals silently could
14
+ produce a false-pass if the LLM gives "0.5" when the expected form is "1/2".
15
+ - Unicode vulgar fractions (½ ¼ ¾ etc.): mapped to their a/b equivalents before
16
+ parsing — cheap via a small lookup table and avoids SAFE_REJECT on copy-paste input.
17
+ - Mixed numbers ("1 1/2"): parsed as whole + fraction; ambiguous forms with more than
18
+ one space-separated token that look like independent fractions → SAFE_REJECT.
19
+ - Negative denominators: Fraction normalises these natively (e.g. 3/-6 → -1/2).
20
+ - Zero denominator: SAFE_REJECT (never crash, never accept).
21
+ - Multiple plausible candidates of equal precedence at the same position → SAFE_REJECT.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+ from dataclasses import dataclass
28
+ from enum import Enum
29
+ from fractions import Fraction
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Public types
34
+ # ---------------------------------------------------------------------------
35
+
36
+ class CheckResult(Enum):
37
+ PASS = "pass"
38
+ FAIL = "fail"
39
+ EXTRACT_FAIL = "extract_fail" # could not locate a candidate answer
40
+ SAFE_REJECT = "safe_reject" # input malformed / ambiguous — refuse to verify
41
+
42
+
43
+ @dataclass
44
+ class CheckOutcome:
45
+ result: CheckResult
46
+ extracted: str | None # what the verifier pulled out as the candidate
47
+ canonical: str | None # normalised form (e.g. "1/2") if extracted
48
+ detail: str # human-readable explanation
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Unicode vulgar-fraction table (optional bonus — cheap lookup)
53
+ # ---------------------------------------------------------------------------
54
+
55
+ _UNICODE_FRACTIONS: dict[str, str] = {
56
+ "½": "1/2",
57
+ "⅓": "1/3",
58
+ "⅔": "2/3",
59
+ "¼": "1/4",
60
+ "¾": "3/4",
61
+ "⅕": "1/5",
62
+ "⅖": "2/5",
63
+ "⅗": "3/5",
64
+ "⅘": "4/5",
65
+ "⅙": "1/6",
66
+ "⅚": "5/6",
67
+ "⅛": "1/8",
68
+ "⅜": "3/8",
69
+ "⅝": "5/8",
70
+ "⅞": "7/8",
71
+ "⅐": "1/7",
72
+ "⅑": "1/9",
73
+ "⅒": "1/10",
74
+ }
75
+
76
+ _UNICODE_FRAC_RE = re.compile("|".join(re.escape(c) for c in _UNICODE_FRACTIONS))
77
+
78
+
79
+ def _expand_unicode_fractions(text: str) -> str:
80
+ """Replace Unicode vulgar-fraction characters with their a/b form."""
81
+ return _UNICODE_FRAC_RE.sub(lambda m: _UNICODE_FRACTIONS[m.group()], text)
82
+
83
+
84
+ # ---------------------------------------------------------------------------
85
+ # Regex patterns
86
+ # ---------------------------------------------------------------------------
87
+
88
+ # Fraction pattern: optional leading sign, optional whole number, then a/b
89
+ # We deliberately do NOT allow spaces inside the numerator/denominator tokens.
90
+ # Matches: "1/2", "-3/5", "2/4", "10/3"
91
+ _FRAC_PAT = r"-?\d+\s*/\s*-?\d+"
92
+
93
+ # Mixed-number pattern: whole SP fraction (e.g. "1 1/2", "-2 3/4")
94
+ # Requires exactly one space between whole and fraction.
95
+ _MIXED_PAT = r"-?\d+\s+\d+\s*/\s*\d+"
96
+
97
+ # Pure integer (no slash)
98
+ _INT_PAT = r"-?\d+"
99
+
100
+ # <answer> tag extraction
101
+ _ANSWER_TAG_RE = re.compile(r"<answer>\s*(.*?)\s*</answer>", re.IGNORECASE | re.DOTALL)
102
+
103
+ # Multiple-choice letter (A-D) or digit (1-4), possibly in parentheses or quoted
104
+ _MC_LETTER_RE = re.compile(r"\b([A-Da-d])\b")
105
+ _MC_DIGIT_RE = re.compile(r"\b([1-4])\b")
106
+
107
+ # Full patterns compiled
108
+ _MIXED_RE = re.compile(_MIXED_PAT)
109
+ _FRAC_RE = re.compile(_FRAC_PAT)
110
+ _INT_RE = re.compile(_INT_PAT)
111
+
112
+ # Decimal detection — reject these explicitly
113
+ _DECIMAL_RE = re.compile(r"\b\d+\.\d+\b")
114
+
115
+
116
+ # ---------------------------------------------------------------------------
117
+ # normalise_fraction
118
+ # ---------------------------------------------------------------------------
119
+
120
+ def normalise_fraction(s: str) -> Fraction | None:
121
+ """
122
+ Parse a fraction string and return a normalised Fraction, or None on failure.
123
+
124
+ Accepted forms:
125
+ - "1/2", "2/4", "-3/5" → proper/improper fraction
126
+ - "3" → integer (whole number)
127
+ - "1 1/2", "2 3/4" → mixed number (whole + fraction)
128
+ - Unicode vulgar fractions via _expand_unicode_fractions pre-pass
129
+
130
+ SAFE_REJECT (returns None) for:
131
+ - Zero denominator ("1/0", "5/0")
132
+ - Non-integer components ("a/b", "1.5/2")
133
+ - Empty string
134
+ - Anything that doesn't match the above forms
135
+ """
136
+ if not s or not s.strip():
137
+ return None
138
+
139
+ s = _expand_unicode_fractions(s.strip())
140
+
141
+ # Decimal in the token → reject (not in pilot scope)
142
+ if _DECIMAL_RE.search(s):
143
+ return None
144
+
145
+ # Try mixed number first ("1 1/2")
146
+ mixed_m = re.fullmatch(r"\s*(-?\d+)\s+(\d+)\s*/\s*(\d+)\s*", s)
147
+ if mixed_m:
148
+ whole = int(mixed_m.group(1))
149
+ num = int(mixed_m.group(2))
150
+ den = int(mixed_m.group(3))
151
+ if den == 0:
152
+ return None # SAFE_REJECT
153
+ # mixed number sign: whole carries it; fraction part is always non-negative
154
+ try:
155
+ if whole < 0:
156
+ return Fraction(whole * den - num, den)
157
+ else:
158
+ return Fraction(whole * den + num, den)
159
+ except (ValueError, ZeroDivisionError):
160
+ return None
161
+
162
+ # Try plain fraction ("a/b")
163
+ frac_m = re.fullmatch(r"\s*(-?\d+)\s*/\s*(-?\d+)\s*", s)
164
+ if frac_m:
165
+ num = int(frac_m.group(1))
166
+ den = int(frac_m.group(2))
167
+ if den == 0:
168
+ return None # SAFE_REJECT — zero denominator
169
+ try:
170
+ return Fraction(num, den)
171
+ except (ValueError, ZeroDivisionError):
172
+ return None
173
+
174
+ # Try plain integer
175
+ int_m = re.fullmatch(r"\s*(-?\d+)\s*", s)
176
+ if int_m:
177
+ return Fraction(int(int_m.group(1)))
178
+
179
+ return None
180
+
181
+
182
+ # ---------------------------------------------------------------------------
183
+ # extract_answer
184
+ # ---------------------------------------------------------------------------
185
+
186
+ def extract_answer(text: str, answer_type: str) -> str | None:
187
+ """
188
+ Pull the candidate answer string from free-form LLM output.
189
+
190
+ Returns None if no candidate can be unambiguously extracted.
191
+
192
+ Strategy by answer_type:
193
+ - "fraction" / "int":
194
+ 1. Last <answer>…</answer> tag content if present.
195
+ 2. Else last mixed-number pattern (e.g. "1 1/2").
196
+ 3. Else last fraction pattern (e.g. "2/4").
197
+ 4. Else last integer.
198
+ Ambiguity rule: if two candidates of EQUAL precedence appear at the same
199
+ 'last position' (e.g. "1/2 or 3/4"), return None (SAFE_REJECT upstream).
200
+ - "mc4":
201
+ Last single letter A-D or digit 1-4 (case-insensitive), possibly in parens.
202
+ - Other: None.
203
+
204
+ Trailing punctuation (.!?,;:) and whitespace stripped before return.
205
+ """
206
+ if not text or not text.strip():
207
+ return None
208
+
209
+ # Expand unicode fractions first
210
+ text_expanded = _expand_unicode_fractions(text)
211
+
212
+ if answer_type == "mc4":
213
+ return _extract_mc(text_expanded)
214
+
215
+ if answer_type in ("fraction", "int"):
216
+ return _extract_numeric(text_expanded, answer_type)
217
+
218
+ return None
219
+
220
+
221
+ def _strip_punct(s: str) -> str:
222
+ """Strip trailing punctuation and whitespace."""
223
+ return s.rstrip(".!?,;: \t\n")
224
+
225
+
226
+ def _extract_mc(text: str) -> str | None:
227
+ """Extract last MC choice (A-D or 1-4) from text."""
228
+ # Find all letter matches and digit matches
229
+ letter_matches = list(_MC_LETTER_RE.finditer(text))
230
+ digit_matches = list(_MC_DIGIT_RE.finditer(text))
231
+
232
+ # Pick whichever type has its last match further right
233
+ last_letter = letter_matches[-1] if letter_matches else None
234
+ last_digit = digit_matches[-1] if digit_matches else None
235
+
236
+ if last_letter and last_digit:
237
+ if last_letter.start() > last_digit.start():
238
+ return last_letter.group(1).upper()
239
+ elif last_digit.start() > last_letter.start():
240
+ return last_digit.group(1)
241
+ else:
242
+ # Same position — ambiguous; return letter (letters take priority for MC)
243
+ return last_letter.group(1).upper()
244
+ elif last_letter:
245
+ return last_letter.group(1).upper()
246
+ elif last_digit:
247
+ return last_digit.group(1)
248
+ return None
249
+
250
+
251
+ def _extract_numeric(text: str, answer_type: str) -> str | None:
252
+ """
253
+ Extract a numeric candidate from text.
254
+
255
+ Priority:
256
+ 1. <answer> tag
257
+ 2. Last mixed number
258
+ 3. Last fraction
259
+ 4. Last integer (for answer_type="int" or as fallback for "fraction")
260
+
261
+ Ambiguity: if the last position contains two distinct fraction candidates
262
+ within 5 characters of each other (e.g. "1/2 or 3/4"), return None.
263
+ """
264
+ # 1. Try <answer> tag — use raw text (not expanded) to check for tag presence
265
+ tag_match = _ANSWER_TAG_RE.search(text)
266
+ if tag_match:
267
+ content = _strip_punct(tag_match.group(1).strip())
268
+ return content if content else None
269
+
270
+ # 2. Check for decimal — if present, the extraction will surface it; we reject
271
+ # decimals in normalise_fraction, so we don't block extraction here.
272
+
273
+ # 3. Find all mixed-number matches
274
+ mixed_matches = list(_MIXED_RE.finditer(text))
275
+
276
+ # 4. Find all fraction matches (exclude those that are part of a mixed number)
277
+ frac_matches = list(_FRAC_RE.finditer(text))
278
+ # Filter out fractions that are the trailing part of a mixed-number match
279
+ mixed_spans = {(m.start(), m.end()) for m in mixed_matches}
280
+ frac_matches_standalone = [
281
+ m for m in frac_matches
282
+ if not any(ms <= m.start() and m.end() <= me for ms, me in mixed_spans)
283
+ ]
284
+
285
+ # 5. Find all integer matches (exclude those inside fractions or mixed numbers)
286
+ int_matches = list(_INT_RE.finditer(text))
287
+ # Exclude integers that are substrings of fraction/mixed patterns
288
+ all_numeric_spans = {(m.start(), m.end()) for m in mixed_matches} | \
289
+ {(m.start(), m.end()) for m in frac_matches}
290
+ int_matches_standalone = [
291
+ m for m in int_matches
292
+ if not any(ms <= m.start() and m.end() <= me for ms, me in all_numeric_spans)
293
+ ]
294
+
295
+ # Select by precedence (mixed > fraction > int), using 'last' occurrence
296
+ if mixed_matches:
297
+ last_mixed = mixed_matches[-1]
298
+ # Check for ambiguity: is there another fraction-level candidate within
299
+ # 10 chars after the mixed match that isn't part of it?
300
+ if frac_matches_standalone:
301
+ last_frac = frac_matches_standalone[-1]
302
+ # If both are within 10 chars of each other at the end, ambiguous
303
+ if abs(last_frac.start() - last_mixed.start()) < 10:
304
+ # Could be "1 1/2 or 3/4" — check if there's a connecting word
305
+ between = text[min(last_mixed.end(), last_frac.end()):
306
+ max(last_mixed.start(), last_frac.start())]
307
+ if re.search(r'\bor\b', between, re.IGNORECASE):
308
+ return None # Ambiguous
309
+ return _strip_punct(last_mixed.group())
310
+
311
+ if frac_matches_standalone:
312
+ last_frac = frac_matches_standalone[-1]
313
+ # Ambiguity check: two fractions close together at the end
314
+ if len(frac_matches_standalone) >= 2:
315
+ second_last = frac_matches_standalone[-2]
316
+ gap = text[second_last.end():last_frac.start()]
317
+ if re.search(r'\bor\b', gap, re.IGNORECASE):
318
+ return None # "1/2 or 3/4" → ambiguous
319
+ # Also check if they are very close without connective word
320
+ if last_frac.start() - second_last.end() <= 5:
321
+ return None # Two fractions with no separator → ambiguous
322
+ return _strip_punct(last_frac.group())
323
+
324
+ if int_matches_standalone:
325
+ last_int = int_matches_standalone[-1]
326
+ return _strip_punct(last_int.group())
327
+
328
+ return None
329
+
330
+
331
+ # ---------------------------------------------------------------------------
332
+ # Canonical string representation
333
+ # ---------------------------------------------------------------------------
334
+
335
+ def _canonical_str(f: Fraction) -> str:
336
+ """Return the simplest string form of a normalised Fraction."""
337
+ if f.denominator == 1:
338
+ return str(f.numerator)
339
+ return f"{f.numerator}/{f.denominator}"
340
+
341
+
342
+ # ---------------------------------------------------------------------------
343
+ # check — main entry point
344
+ # ---------------------------------------------------------------------------
345
+
346
+ def check(
347
+ answer_type: str,
348
+ checker: str,
349
+ llm_output: str,
350
+ ground_truth: str,
351
+ ) -> CheckOutcome:
352
+ """
353
+ Verify an LLM-generated answer against ground truth.
354
+
355
+ Parameters
356
+ ----------
357
+ answer_type : str
358
+ One of "int", "fraction", "mc4", "free_text" (matches fractions.md verifier.answer_type).
359
+ checker : str
360
+ One of "int_exact", "fraction_equiv", "mc_choice", "none".
361
+ llm_output : str
362
+ The raw LLM-generated text containing (or purportedly containing) the answer.
363
+ ground_truth : str
364
+ The correct answer as a plain string (e.g. "3", "1/2", "A").
365
+
366
+ Returns
367
+ -------
368
+ CheckOutcome
369
+ result is PASS / FAIL / EXTRACT_FAIL / SAFE_REJECT.
370
+ Never raises — all errors surface as SAFE_REJECT.
371
+ """
372
+ # Guard: empty input
373
+ if not llm_output or not llm_output.strip():
374
+ return CheckOutcome(
375
+ result=CheckResult.EXTRACT_FAIL,
376
+ extracted=None,
377
+ canonical=None,
378
+ detail="Empty llm_output — nothing to verify.",
379
+ )
380
+
381
+ # Dispatch
382
+ try:
383
+ if checker == "none":
384
+ return _check_none()
385
+ elif checker == "int_exact":
386
+ return _check_int_exact(llm_output, ground_truth)
387
+ elif checker == "fraction_equiv":
388
+ return _check_fraction_equiv(llm_output, ground_truth)
389
+ elif checker == "mc_choice":
390
+ return _check_mc_choice(llm_output, ground_truth)
391
+ else:
392
+ return CheckOutcome(
393
+ result=CheckResult.SAFE_REJECT,
394
+ extracted=None,
395
+ canonical=None,
396
+ detail=f"Unknown checker '{checker}' — safe-reject to avoid false-pass.",
397
+ )
398
+ except Exception as exc: # noqa: BLE001
399
+ # Belt-and-suspenders: any unhandled exception → SAFE_REJECT, not crash
400
+ return CheckOutcome(
401
+ result=CheckResult.SAFE_REJECT,
402
+ extracted=None,
403
+ canonical=None,
404
+ detail=f"Unexpected error during verification: {exc!r} — safe-reject.",
405
+ )
406
+
407
+
408
+ # ---------------------------------------------------------------------------
409
+ # Individual checker implementations
410
+ # ---------------------------------------------------------------------------
411
+
412
+ def _check_none() -> CheckOutcome:
413
+ """Checker 'none' — always PASS (non-checkable free_text answers)."""
414
+ return CheckOutcome(
415
+ result=CheckResult.PASS,
416
+ extracted=None,
417
+ canonical=None,
418
+ detail="Checker 'none': concept is non-checkable; auto-pass.",
419
+ )
420
+
421
+
422
+ def _check_int_exact(llm_output: str, ground_truth: str) -> CheckOutcome:
423
+ """
424
+ Extract the last integer from llm_output and compare to int(ground_truth).
425
+ Malformed ground_truth → SAFE_REJECT.
426
+ """
427
+ # Validate ground_truth
428
+ try:
429
+ gt_val = int(ground_truth.strip())
430
+ except (ValueError, AttributeError):
431
+ return CheckOutcome(
432
+ result=CheckResult.SAFE_REJECT,
433
+ extracted=None,
434
+ canonical=None,
435
+ detail=f"ground_truth '{ground_truth}' is not a valid integer — safe-reject.",
436
+ )
437
+
438
+ # Pre-extraction decimal guard: an LLM answer of "0.5" must not silently
439
+ # fall through to the integer extraction (which would grab "5" or "0").
440
+ # Pilot-domain integer answers are whole-number division results.
441
+ if _DECIMAL_RE.search(llm_output):
442
+ return CheckOutcome(
443
+ result=CheckResult.SAFE_REJECT,
444
+ extracted=None,
445
+ canonical=None,
446
+ detail="llm_output contains a decimal — pilot expects integer answers; safe-reject.",
447
+ )
448
+
449
+ # Extract candidate
450
+ candidate = extract_answer(llm_output, "int")
451
+ if candidate is None:
452
+ return CheckOutcome(
453
+ result=CheckResult.EXTRACT_FAIL,
454
+ extracted=None,
455
+ canonical=None,
456
+ detail="Could not extract an integer candidate from llm_output.",
457
+ )
458
+
459
+ # Parse candidate as integer (it might look like a fraction — that's a fail not a reject)
460
+ try:
461
+ cand_val = int(candidate.strip())
462
+ except ValueError:
463
+ # Candidate extracted but not parseable as int (e.g. "3/4") — that's FAIL not SAFE_REJECT
464
+ return CheckOutcome(
465
+ result=CheckResult.FAIL,
466
+ extracted=candidate,
467
+ canonical=None,
468
+ detail=f"Extracted '{candidate}' but could not parse as integer (expected {gt_val}).",
469
+ )
470
+
471
+ canonical = str(cand_val)
472
+ result = CheckResult.PASS if cand_val == gt_val else CheckResult.FAIL
473
+ detail = (
474
+ f"Extracted {cand_val!r}, expected {gt_val!r}: {'match' if result == CheckResult.PASS else 'mismatch'}."
475
+ )
476
+ return CheckOutcome(result=result, extracted=candidate, canonical=canonical, detail=detail)
477
+
478
+
479
+ def _check_fraction_equiv(llm_output: str, ground_truth: str) -> CheckOutcome:
480
+ """
481
+ Extract a fraction/integer from llm_output, normalise both to Fraction,
482
+ and compare for equivalence.
483
+
484
+ Decimals in llm_output or ground_truth → SAFE_REJECT.
485
+ Zero denominator → SAFE_REJECT.
486
+ Unparseable → SAFE_REJECT (ground_truth) or EXTRACT_FAIL (candidate).
487
+ """
488
+ # Check for decimal in ground_truth → SAFE_REJECT (config error)
489
+ if _DECIMAL_RE.search(ground_truth):
490
+ return CheckOutcome(
491
+ result=CheckResult.SAFE_REJECT,
492
+ extracted=None,
493
+ canonical=None,
494
+ detail=f"ground_truth '{ground_truth}' contains a decimal — not in pilot scope; safe-reject.",
495
+ )
496
+
497
+ # Check for decimal in llm_output BEFORE extraction. Without this, "0.5" falls
498
+ # through to the trailing-integer fallback in _extract_numeric and produces a
499
+ # confident-wrong FAIL ("5" extracted, compared to "1/2"). Decimals are out of
500
+ # pilot scope (SPEC §23) — safe-reject any decimal-shaped LLM output.
501
+ if _DECIMAL_RE.search(llm_output):
502
+ return CheckOutcome(
503
+ result=CheckResult.SAFE_REJECT,
504
+ extracted=None,
505
+ canonical=None,
506
+ detail="llm_output contains a decimal — not in pilot scope; safe-reject.",
507
+ )
508
+
509
+ # Parse ground_truth
510
+ gt_frac = normalise_fraction(ground_truth.strip())
511
+ if gt_frac is None:
512
+ return CheckOutcome(
513
+ result=CheckResult.SAFE_REJECT,
514
+ extracted=None,
515
+ canonical=None,
516
+ detail=f"ground_truth '{ground_truth}' could not be normalised to a fraction — safe-reject.",
517
+ )
518
+
519
+ # Extract candidate from LLM output
520
+ candidate = extract_answer(llm_output, "fraction")
521
+
522
+ if candidate is None:
523
+ # Check if extraction returned None due to ambiguity (two fractions with 'or')
524
+ # vs. genuinely no fraction found — both are EXTRACT_FAIL at this level
525
+ # (the ambiguity check inside extract_answer returns None for both)
526
+ # We need to distinguish: if there ARE fraction-like tokens but we couldn't
527
+ # choose, that's SAFE_REJECT; if there are none, that's EXTRACT_FAIL.
528
+ # Heuristic: if there's a fraction pattern anywhere in the text, it's ambiguous.
529
+ expanded = _expand_unicode_fractions(llm_output)
530
+ has_frac = bool(_FRAC_RE.search(expanded)) or bool(_MIXED_RE.search(expanded))
531
+ if has_frac:
532
+ return CheckOutcome(
533
+ result=CheckResult.SAFE_REJECT,
534
+ extracted=None,
535
+ canonical=None,
536
+ detail="Multiple fraction candidates found but could not unambiguously select one — safe-reject.",
537
+ )
538
+ return CheckOutcome(
539
+ result=CheckResult.EXTRACT_FAIL,
540
+ extracted=None,
541
+ canonical=None,
542
+ detail="No fraction or integer candidate found in llm_output.",
543
+ )
544
+
545
+ # Detect decimal in extracted candidate
546
+ if _DECIMAL_RE.search(candidate):
547
+ return CheckOutcome(
548
+ result=CheckResult.SAFE_REJECT,
549
+ extracted=candidate,
550
+ canonical=None,
551
+ detail=f"Extracted candidate '{candidate}' contains a decimal — not in pilot scope; safe-reject.",
552
+ )
553
+
554
+ # Normalise candidate
555
+ cand_frac = normalise_fraction(candidate)
556
+ if cand_frac is None:
557
+ # Includes zero-denominator case
558
+ if re.search(r"/\s*0\b", candidate):
559
+ return CheckOutcome(
560
+ result=CheckResult.SAFE_REJECT,
561
+ extracted=candidate,
562
+ canonical=None,
563
+ detail=f"Extracted '{candidate}' has zero denominator — safe-reject.",
564
+ )
565
+ return CheckOutcome(
566
+ result=CheckResult.SAFE_REJECT,
567
+ extracted=candidate,
568
+ canonical=None,
569
+ detail=f"Extracted '{candidate}' could not be normalised — safe-reject.",
570
+ )
571
+
572
+ canonical = _canonical_str(cand_frac)
573
+ gt_canonical = _canonical_str(gt_frac)
574
+ result = CheckResult.PASS if cand_frac == gt_frac else CheckResult.FAIL
575
+ detail = (
576
+ f"Extracted '{candidate}' → {canonical}; "
577
+ f"expected '{ground_truth}' → {gt_canonical}: "
578
+ f"{'equivalent' if result == CheckResult.PASS else 'not equivalent'}."
579
+ )
580
+ return CheckOutcome(result=result, extracted=candidate, canonical=canonical, detail=detail)
581
+
582
+
583
+ def _check_mc_choice(llm_output: str, ground_truth: str) -> CheckOutcome:
584
+ """
585
+ Extract the last MC choice (A-D or 1-4) from llm_output and compare
586
+ case-insensitively to ground_truth.
587
+
588
+ Malformed ground_truth → SAFE_REJECT.
589
+ """
590
+ # Validate ground_truth: must be A-D or 1-4
591
+ gt = ground_truth.strip()
592
+ if not re.fullmatch(r"[A-Da-d1-4]", gt):
593
+ return CheckOutcome(
594
+ result=CheckResult.SAFE_REJECT,
595
+ extracted=None,
596
+ canonical=None,
597
+ detail=f"ground_truth '{gt}' is not a valid MC choice (A-D or 1-4) — safe-reject.",
598
+ )
599
+
600
+ candidate = extract_answer(llm_output, "mc4")
601
+ if candidate is None:
602
+ return CheckOutcome(
603
+ result=CheckResult.EXTRACT_FAIL,
604
+ extracted=None,
605
+ canonical=None,
606
+ detail="Could not extract an MC choice from llm_output.",
607
+ )
608
+
609
+ # Normalise: letters → uppercase, digits stay as-is
610
+ cand_norm = candidate.upper() if candidate.isalpha() else candidate
611
+ gt_norm = gt.upper() if gt.isalpha() else gt
612
+
613
+ result = CheckResult.PASS if cand_norm == gt_norm else CheckResult.FAIL
614
+ detail = (
615
+ f"Extracted '{candidate}' (normalised '{cand_norm}'), "
616
+ f"expected '{gt}' (normalised '{gt_norm}'): "
617
+ f"{'match' if result == CheckResult.PASS else 'mismatch'}."
618
+ )
619
+ return CheckOutcome(result=result, extracted=candidate, canonical=cand_norm, detail=detail)
@@ -0,0 +1,65 @@
1
+ """Grounding / ZIM-reader module: resolve a curriculum node's grounding block to a passage.
2
+
3
+ Primary path (pilot scope, anchor-resolution only):
4
+ resolve_grounding(node_grounding, cfg) -> str
5
+ node_grounding: dict with keys source, anchor, passage_hint (from curriculum YAML)
6
+ cfg: dict from config/inference.yaml grounding: block
7
+ returns: inner passage text for {{grounding_passage}}, or "" on any failure
8
+
9
+ Degradation contract (SAFETY §1.5 / SPEC §15):
10
+ ZIM missing | anchor not found | empty passage → returns "", logs a warning, NEVER raises.
11
+ A grounding failure must never crash a tutoring turn.
12
+
13
+ Scope: anchor-resolution only (pilot). Title-prediction / BM25 / embeddings deferred to W7.5.
14
+ Deps: libzim (runtime, pinned). OpenZIM MCP (MIT) = reference only. No MCP server, no JSON-RPC.
15
+ Spec: docs/design/W7_grounding_reader.md; SPEC §15 (layer-1 RAG); SAFETY §1.5 (grounding-as-data).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+
22
+ from mentar.grounding.resolve import resolve_grounding_inner
23
+ from mentar.grounding.wrapper import wrap_passage
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ __all__ = ["resolve_grounding"]
28
+
29
+
30
+ def resolve_grounding(node_grounding: dict, cfg: dict) -> str:
31
+ """Resolve a curriculum node's grounding block to a plain passage string.
32
+
33
+ This is the single entry-point the dialogue controller calls. It honours the
34
+ degradation contract: any failure returns "" — never an exception — so a missing
35
+ or broken ZIM never crashes a tutoring turn.
36
+
37
+ Args:
38
+ node_grounding: The ``grounding:`` sub-dict from a curriculum concept node,
39
+ with keys ``source``, ``anchor``, ``passage_hint``.
40
+ cfg: The ``grounding:`` section of ``config/inference.yaml``
41
+ (loaded by the caller; env-vars already expanded).
42
+
43
+ Returns:
44
+ Inner passage text ready for ``{{grounding_passage}}`` — empty string on
45
+ any failure (degradation contract).
46
+ """
47
+ # Type guard: a node may lack a grounding block entirely (None / malformed).
48
+ # Handle it before the try so the except handler can safely read .get() below.
49
+ if not isinstance(node_grounding, dict):
50
+ logger.warning(
51
+ "resolve_grounding: node_grounding is not a dict (%s) — returning empty passage",
52
+ type(node_grounding).__name__,
53
+ )
54
+ return ""
55
+
56
+ try:
57
+ raw = resolve_grounding_inner(node_grounding, cfg)
58
+ return wrap_passage(raw, cfg)
59
+ except Exception:
60
+ logger.warning(
61
+ "resolve_grounding: unexpected error for anchor=%r — returning empty passage",
62
+ node_grounding.get("anchor", "<unknown>"),
63
+ exc_info=True,
64
+ )
65
+ return ""