pdfhell 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {pdfhell-0.1.2 → pdfhell-0.1.3}/PKG-INFO +5 -4
  2. {pdfhell-0.1.2 → pdfhell-0.1.3}/README.md +1 -1
  3. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/__init__.py +1 -1
  4. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/scorer.py +29 -1
  5. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/PKG-INFO +5 -4
  6. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/SOURCES.txt +1 -0
  7. {pdfhell-0.1.2 → pdfhell-0.1.3}/pyproject.toml +4 -3
  8. pdfhell-0.1.3/tests/test_scorer_currency.py +43 -0
  9. {pdfhell-0.1.2 → pdfhell-0.1.3}/LICENSE +0 -0
  10. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/auditpack.py +0 -0
  11. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/case.py +0 -0
  12. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/cli.py +0 -0
  13. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/__init__.py +0 -0
  14. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/_common.py +0 -0
  15. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/footnote_override.py +0 -0
  16. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/hidden_ocr_mismatch.py +0 -0
  17. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/split_table_across_pages.py +0 -0
  18. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/junit.py +0 -0
  19. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/runner.py +0 -0
  20. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/suite.py +0 -0
  21. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/vision.py +0 -0
  22. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/dependency_links.txt +0 -0
  23. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/entry_points.txt +0 -0
  24. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/requires.txt +0 -0
  25. {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/top_level.txt +0 -0
  26. {pdfhell-0.1.2 → pdfhell-0.1.3}/setup.cfg +0 -0
  27. {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_auditpack.py +0 -0
  28. {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_cli.py +0 -0
  29. {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_generators.py +0 -0
  30. {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_junit.py +0 -0
  31. {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_scorer.py +0 -0
  32. {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_statistical.py +0 -0
@@ -1,13 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdfhell
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge.
5
5
  Author: Multivon
6
6
  License: Apache-2.0
7
- Project-URL: Homepage, https://pdfhell.multivon.ai
7
+ Project-URL: Homepage, https://multivon.ai/pdfhell
8
8
  Project-URL: Repository, https://github.com/multivon-ai/pdfhell
9
9
  Project-URL: Issues, https://github.com/multivon-ai/pdfhell/issues
10
- Project-URL: Leaderboard, https://pdfhell.multivon.ai/leaderboard
10
+ Project-URL: Leaderboard, https://multivon.ai/leaderboard
11
+ Project-URL: Documentation, https://docs.multivon.ai/pdfhell/quickstart
11
12
  Keywords: llm,evaluation,pdf,multimodal,benchmark,adversarial,document-ai,rag
12
13
  Classifier: License :: OSI Approved :: Apache Software License
13
14
  Classifier: Programming Language :: Python :: 3
@@ -208,7 +209,7 @@ Target full suite: 10 trap families, ~50 cases.
208
209
 
209
210
  For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
210
211
 
211
- Email `hello@multivon.ai` for early access, or see [multivon.ai/pricing](https://multivon.ai/pricing).
212
+ Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
212
213
 
213
214
  ## Installing
214
215
 
@@ -179,7 +179,7 @@ Target full suite: 10 trap families, ~50 cases.
179
179
 
180
180
  For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
181
181
 
182
- Email `hello@multivon.ai` for early access, or see [multivon.ai/pricing](https://multivon.ai/pricing).
182
+ Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
183
183
 
184
184
  ## Installing
185
185
 
@@ -16,7 +16,7 @@ layer; the runtime, scoring, and reporting come from multivon-eval.
16
16
  """
17
17
  from __future__ import annotations
18
18
 
19
- __version__ = "0.1.2"
19
+ __version__ = "0.1.3"
20
20
 
21
21
  from .case import HellCase
22
22
  from .generators import (
@@ -56,6 +56,11 @@ def wilson_ci(passes: int, n: int, *, z: float = 1.959963984540054) -> tuple[flo
56
56
 
57
57
  _WHITESPACE_RE = re.compile(r"\s+")
58
58
  _PUNCT_NORMALIZE_RE = re.compile(r"[.,;:]+\s*$")
59
+ # Currency markers — matched immediately before a digit so we don't
60
+ # strip stray $ in unrelated prose. Used by _contains_loose to give the
61
+ # match a second pass when the model omitted the currency prefix the
62
+ # answer key happened to include.
63
+ _LEADING_CURRENCY_RE = re.compile(r"(?<![A-Za-z0-9])[$€£¥₹]\s*(?=\d)")
59
64
 
60
65
 
61
66
  def _normalize(s: str) -> str:
@@ -68,8 +73,31 @@ def _normalize(s: str) -> str:
68
73
  return s
69
74
 
70
75
 
76
+ def _strip_currency(s: str) -> str:
77
+ """Drop a leading currency symbol that sits right before a digit. So
78
+ '$780,803.18' → '780,803.18' but 'invoice INV-$X-1' is left alone."""
79
+ return _LEADING_CURRENCY_RE.sub("", s)
80
+
81
+
71
82
  def _contains_loose(haystack: str, needle: str) -> bool:
72
- return _normalize(needle) in _normalize(haystack)
83
+ """Tolerant contains-match used as the headline correctness signal.
84
+
85
+ First tries the straight normalised contains. If that fails AND the
86
+ needle starts with a currency symbol, retries with both sides stripped
87
+ of the leading currency prefix — so an answer key of '$780,803.18'
88
+ still matches a model output of '780,803.18'. This kept popping up
89
+ on the split_table_across_pages trap, where models tend to omit the
90
+ '$' even when the table header includes it.
91
+ """
92
+ nh = _normalize(haystack)
93
+ nn = _normalize(needle)
94
+ if nn in nh:
95
+ return True
96
+ nh_stripped = _strip_currency(nh)
97
+ nn_stripped = _strip_currency(nn)
98
+ if nn_stripped != nn and nn_stripped in nh_stripped:
99
+ return True
100
+ return False
73
101
 
74
102
 
75
103
  @dataclass(slots=True)
@@ -1,13 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pdfhell
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge.
5
5
  Author: Multivon
6
6
  License: Apache-2.0
7
- Project-URL: Homepage, https://pdfhell.multivon.ai
7
+ Project-URL: Homepage, https://multivon.ai/pdfhell
8
8
  Project-URL: Repository, https://github.com/multivon-ai/pdfhell
9
9
  Project-URL: Issues, https://github.com/multivon-ai/pdfhell/issues
10
- Project-URL: Leaderboard, https://pdfhell.multivon.ai/leaderboard
10
+ Project-URL: Leaderboard, https://multivon.ai/leaderboard
11
+ Project-URL: Documentation, https://docs.multivon.ai/pdfhell/quickstart
11
12
  Keywords: llm,evaluation,pdf,multimodal,benchmark,adversarial,document-ai,rag
12
13
  Classifier: License :: OSI Approved :: Apache Software License
13
14
  Classifier: Programming Language :: Python :: 3
@@ -208,7 +209,7 @@ Target full suite: 10 trap families, ~50 cases.
208
209
 
209
210
  For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
210
211
 
211
- Email `hello@multivon.ai` for early access, or see [multivon.ai/pricing](https://multivon.ai/pricing).
212
+ Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
212
213
 
213
214
  ## Installing
214
215
 
@@ -26,4 +26,5 @@ tests/test_cli.py
26
26
  tests/test_generators.py
27
27
  tests/test_junit.py
28
28
  tests/test_scorer.py
29
+ tests/test_scorer_currency.py
29
30
  tests/test_statistical.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "pdfhell"
7
- version = "0.1.2"
7
+ version = "0.1.3"
8
8
  description = "PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -44,10 +44,11 @@ all = []
44
44
  pdfhell = "pdfhell.cli:main"
45
45
 
46
46
  [project.urls]
47
- Homepage = "https://pdfhell.multivon.ai"
47
+ Homepage = "https://multivon.ai/pdfhell"
48
48
  Repository = "https://github.com/multivon-ai/pdfhell"
49
49
  Issues = "https://github.com/multivon-ai/pdfhell/issues"
50
- Leaderboard = "https://pdfhell.multivon.ai/leaderboard"
50
+ Leaderboard = "https://multivon.ai/leaderboard"
51
+ Documentation = "https://docs.multivon.ai/pdfhell/quickstart"
51
52
 
52
53
  [tool.setuptools.packages.find]
53
54
  include = ["pdfhell*"]
@@ -0,0 +1,43 @@
1
+ """Regression test: currency-prefix tolerance in score_case.
2
+
3
+ Caught from user audit — GPT-4o output '780,803.18' for expected
4
+ '$780,803.18' was previously marked incorrect. With the
5
+ _strip_currency fallback in _contains_loose, both the prefixed and
6
+ unprefixed forms now match (in either direction).
7
+ """
8
+ from pdfhell.case import HellCase
9
+ from pdfhell.scorer import score_case
10
+
11
+
12
+ def _case(expected: str, forbidden=()) -> HellCase:
13
+ return HellCase(
14
+ id="t",
15
+ trap_family="hidden_ocr_mismatch",
16
+ seed=1,
17
+ question="q?",
18
+ expected_answer=expected,
19
+ forbidden_answers=list(forbidden),
20
+ )
21
+
22
+
23
+ class TestCurrencyTolerance:
24
+ def test_unprefixed_output_matches_dollar_expected(self):
25
+ s = score_case(_case("$780,803.18"), "The total is 780,803.18.")
26
+ assert s.correct
27
+ assert s.matched_expected
28
+
29
+ def test_dollar_output_matches_dollar_expected(self):
30
+ s = score_case(_case("$780,803.18"), "Total: $780,803.18")
31
+ assert s.correct
32
+
33
+ def test_unprefixed_expected_matches_dollar_output(self):
34
+ s = score_case(_case("780,803.18"), "The amount is $780,803.18.")
35
+ assert s.correct
36
+
37
+ def test_euro_prefix_tolerated(self):
38
+ s = score_case(_case("€1,234.56"), "Refund: 1,234.56")
39
+ assert s.correct
40
+
41
+ def test_does_not_match_wrong_number(self):
42
+ s = score_case(_case("$780,803.18"), "Total: $780,000.")
43
+ assert not s.correct
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes