PyPI - pdfhell - Versions diffs - 0.1.2__tar.gz → 0.1.3__tar.gz - Mend

pdfhell 0.1.2tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{pdfhell-0.1.2 → pdfhell-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,14 @@
 Metadata-Version: 2.4
 Name: pdfhell
-Version: 0.1.2
+Version: 0.1.3
 Summary: PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge.
 Author: Multivon
 License: Apache-2.0
-Project-URL: Homepage, https://pdfhell.multivon.ai
+Project-URL: Homepage, https://multivon.ai/pdfhell
 Project-URL: Repository, https://github.com/multivon-ai/pdfhell
 Project-URL: Issues, https://github.com/multivon-ai/pdfhell/issues
-Project-URL: Leaderboard, https://pdfhell.multivon.ai/leaderboard
+Project-URL: Leaderboard, https://multivon.ai/leaderboard
+Project-URL: Documentation, https://docs.multivon.ai/pdfhell/quickstart
 Keywords: llm,evaluation,pdf,multimodal,benchmark,adversarial,document-ai,rag
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python :: 3
@@ -208,7 +209,7 @@ Target full suite: 10 trap families, ~50 cases.
 For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
-Email `hello@multivon.ai` for early access, or see [multivon.ai/pricing](https://multivon.ai/pricing).
+Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
 ## Installing

{pdfhell-0.1.2 → pdfhell-0.1.3}/README.md RENAMED Viewed

@@ -179,7 +179,7 @@ Target full suite: 10 trap families, ~50 cases.
 For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
-Email `hello@multivon.ai` for early access, or see [multivon.ai/pricing](https://multivon.ai/pricing).
+Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
 ## Installing

{pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/__init__.py RENAMED Viewed

@@ -16,7 +16,7 @@ layer; the runtime, scoring, and reporting come from multivon-eval.
 """
 from __future__ import annotations
-__version__ = "0.1.2"
+__version__ = "0.1.3"
 from .case import HellCase
 from .generators import (

{pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/scorer.py RENAMED Viewed

@@ -56,6 +56,11 @@ def wilson_ci(passes: int, n: int, *, z: float = 1.959963984540054) -> tuple[flo
 _WHITESPACE_RE = re.compile(r"\s+")
 _PUNCT_NORMALIZE_RE = re.compile(r"[.,;:]+\s*$")
+# Currency markers — matched immediately before a digit so we don't
+# strip stray $ in unrelated prose. Used by _contains_loose to give the
+# match a second pass when the model omitted the currency prefix the
+# answer key happened to include.
+_LEADING_CURRENCY_RE = re.compile(r"(?<![A-Za-z0-9])[$€£¥₹]\s*(?=\d)")
 def _normalize(s: str) -> str:
@@ -68,8 +73,31 @@ def _normalize(s: str) -> str:
     return s
+def _strip_currency(s: str) -> str:
+    """Drop a leading currency symbol that sits right before a digit. So
+    '$780,803.18' → '780,803.18' but 'invoice INV-$X-1' is left alone."""
+    return _LEADING_CURRENCY_RE.sub("", s)
 def _contains_loose(haystack: str, needle: str) -> bool:
-    return _normalize(needle) in _normalize(haystack)
+    """Tolerant contains-match used as the headline correctness signal.
+    First tries the straight normalised contains. If that fails AND the
+    needle starts with a currency symbol, retries with both sides stripped
+    of the leading currency prefix — so an answer key of '$780,803.18'
+    still matches a model output of '780,803.18'. This kept popping up
+    on the split_table_across_pages trap, where models tend to omit the
+    '$' even when the table header includes it.
+    """
+    nh = _normalize(haystack)
+    nn = _normalize(needle)
+    if nn in nh:
+        return True
+    nh_stripped = _strip_currency(nh)
+    nn_stripped = _strip_currency(nn)
+    if nn_stripped != nn and nn_stripped in nh_stripped:
+        return True
+    return False
 @dataclass(slots=True)

{pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/PKG-INFO RENAMED Viewed

@@ -1,13 +1,14 @@
 Metadata-Version: 2.4
 Name: pdfhell
-Version: 0.1.2
+Version: 0.1.3
 Summary: PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge.
 Author: Multivon
 License: Apache-2.0
-Project-URL: Homepage, https://pdfhell.multivon.ai
+Project-URL: Homepage, https://multivon.ai/pdfhell
 Project-URL: Repository, https://github.com/multivon-ai/pdfhell
 Project-URL: Issues, https://github.com/multivon-ai/pdfhell/issues
-Project-URL: Leaderboard, https://pdfhell.multivon.ai/leaderboard
+Project-URL: Leaderboard, https://multivon.ai/leaderboard
+Project-URL: Documentation, https://docs.multivon.ai/pdfhell/quickstart
 Keywords: llm,evaluation,pdf,multimodal,benchmark,adversarial,document-ai,rag
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python :: 3
@@ -208,7 +209,7 @@ Target full suite: 10 trap families, ~50 cases.
 For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
-Email `hello@multivon.ai` for early access, or see [multivon.ai/pricing](https://multivon.ai/pricing).
+Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
 ## Installing

{pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/SOURCES.txt RENAMED Viewed

@@ -26,4 +26,5 @@ tests/test_cli.py
 tests/test_generators.py
 tests/test_junit.py
 tests/test_scorer.py
+tests/test_scorer_currency.py
 tests/test_statistical.py

{pdfhell-0.1.2 → pdfhell-0.1.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pdfhell"
-version = "0.1.2"
+version = "0.1.3"
 description = "PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge."
 readme = "README.md"
 requires-python = ">=3.10"
@@ -44,10 +44,11 @@ all = []
 pdfhell = "pdfhell.cli:main"
 [project.urls]
-Homepage = "https://pdfhell.multivon.ai"
+Homepage = "https://multivon.ai/pdfhell"
 Repository = "https://github.com/multivon-ai/pdfhell"
 Issues = "https://github.com/multivon-ai/pdfhell/issues"
-Leaderboard = "https://pdfhell.multivon.ai/leaderboard"
+Leaderboard = "https://multivon.ai/leaderboard"
+Documentation = "https://docs.multivon.ai/pdfhell/quickstart"
 [tool.setuptools.packages.find]
 include = ["pdfhell*"]

pdfhell-0.1.3/tests/test_scorer_currency.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""Regression test: currency-prefix tolerance in score_case.
+Caught from user audit — GPT-4o output '780,803.18' for expected
+'$780,803.18' was previously marked incorrect. With the
+_strip_currency fallback in _contains_loose, both the prefixed and
+unprefixed forms now match (in either direction).
+"""
+from pdfhell.case import HellCase
+from pdfhell.scorer import score_case
+def _case(expected: str, forbidden=()) -> HellCase:
+    return HellCase(
+        id="t",
+        trap_family="hidden_ocr_mismatch",
+        seed=1,
+        question="q?",
+        expected_answer=expected,
+        forbidden_answers=list(forbidden),
+    )
+class TestCurrencyTolerance:
+    def test_unprefixed_output_matches_dollar_expected(self):
+        s = score_case(_case("$780,803.18"), "The total is 780,803.18.")
+        assert s.correct
+        assert s.matched_expected
+    def test_dollar_output_matches_dollar_expected(self):
+        s = score_case(_case("$780,803.18"), "Total: $780,803.18")
+        assert s.correct
+    def test_unprefixed_expected_matches_dollar_output(self):
+        s = score_case(_case("780,803.18"), "The amount is $780,803.18.")
+        assert s.correct
+    def test_euro_prefix_tolerated(self):
+        s = score_case(_case("€1,234.56"), "Refund: 1,234.56")
+        assert s.correct
+    def test_does_not_match_wrong_number(self):
+        s = score_case(_case("$780,803.18"), "Total: $780,000.")
+        assert not s.correct