pdfhell 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pdfhell-0.1.2 → pdfhell-0.1.3}/PKG-INFO +5 -4
- {pdfhell-0.1.2 → pdfhell-0.1.3}/README.md +1 -1
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/__init__.py +1 -1
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/scorer.py +29 -1
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/PKG-INFO +5 -4
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/SOURCES.txt +1 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pyproject.toml +4 -3
- pdfhell-0.1.3/tests/test_scorer_currency.py +43 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/LICENSE +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/auditpack.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/case.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/cli.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/__init__.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/_common.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/footnote_override.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/hidden_ocr_mismatch.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/generators/split_table_across_pages.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/junit.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/runner.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/suite.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell/vision.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/dependency_links.txt +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/entry_points.txt +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/requires.txt +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/pdfhell.egg-info/top_level.txt +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/setup.cfg +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_auditpack.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_cli.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_generators.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_junit.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_scorer.py +0 -0
- {pdfhell-0.1.2 → pdfhell-0.1.3}/tests/test_statistical.py +0 -0
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pdfhell
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge.
|
|
5
5
|
Author: Multivon
|
|
6
6
|
License: Apache-2.0
|
|
7
|
-
Project-URL: Homepage, https://
|
|
7
|
+
Project-URL: Homepage, https://multivon.ai/pdfhell
|
|
8
8
|
Project-URL: Repository, https://github.com/multivon-ai/pdfhell
|
|
9
9
|
Project-URL: Issues, https://github.com/multivon-ai/pdfhell/issues
|
|
10
|
-
Project-URL: Leaderboard, https://
|
|
10
|
+
Project-URL: Leaderboard, https://multivon.ai/leaderboard
|
|
11
|
+
Project-URL: Documentation, https://docs.multivon.ai/pdfhell/quickstart
|
|
11
12
|
Keywords: llm,evaluation,pdf,multimodal,benchmark,adversarial,document-ai,rag
|
|
12
13
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
14
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -208,7 +209,7 @@ Target full suite: 10 trap families, ~50 cases.
|
|
|
208
209
|
|
|
209
210
|
For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
|
|
210
211
|
|
|
211
|
-
Email `hello@multivon.ai` for early access, or see [multivon.ai/
|
|
212
|
+
Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
|
|
212
213
|
|
|
213
214
|
## Installing
|
|
214
215
|
|
|
@@ -179,7 +179,7 @@ Target full suite: 10 trap families, ~50 cases.
|
|
|
179
179
|
|
|
180
180
|
For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
|
|
181
181
|
|
|
182
|
-
Email `hello@multivon.ai` for early access, or see [multivon.ai/
|
|
182
|
+
Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
|
|
183
183
|
|
|
184
184
|
## Installing
|
|
185
185
|
|
|
@@ -56,6 +56,11 @@ def wilson_ci(passes: int, n: int, *, z: float = 1.959963984540054) -> tuple[flo
|
|
|
56
56
|
|
|
57
57
|
_WHITESPACE_RE = re.compile(r"\s+")
|
|
58
58
|
_PUNCT_NORMALIZE_RE = re.compile(r"[.,;:]+\s*$")
|
|
59
|
+
# Currency markers — matched immediately before a digit so we don't
|
|
60
|
+
# strip stray $ in unrelated prose. Used by _contains_loose to give the
|
|
61
|
+
# match a second pass when the model omitted the currency prefix the
|
|
62
|
+
# answer key happened to include.
|
|
63
|
+
_LEADING_CURRENCY_RE = re.compile(r"(?<![A-Za-z0-9])[$€£¥₹]\s*(?=\d)")
|
|
59
64
|
|
|
60
65
|
|
|
61
66
|
def _normalize(s: str) -> str:
|
|
@@ -68,8 +73,31 @@ def _normalize(s: str) -> str:
|
|
|
68
73
|
return s
|
|
69
74
|
|
|
70
75
|
|
|
76
|
+
def _strip_currency(s: str) -> str:
|
|
77
|
+
"""Drop a leading currency symbol that sits right before a digit. So
|
|
78
|
+
'$780,803.18' → '780,803.18' but 'invoice INV-$X-1' is left alone."""
|
|
79
|
+
return _LEADING_CURRENCY_RE.sub("", s)
|
|
80
|
+
|
|
81
|
+
|
|
71
82
|
def _contains_loose(haystack: str, needle: str) -> bool:
|
|
72
|
-
|
|
83
|
+
"""Tolerant contains-match used as the headline correctness signal.
|
|
84
|
+
|
|
85
|
+
First tries the straight normalised contains. If that fails AND the
|
|
86
|
+
needle starts with a currency symbol, retries with both sides stripped
|
|
87
|
+
of the leading currency prefix — so an answer key of '$780,803.18'
|
|
88
|
+
still matches a model output of '780,803.18'. This kept popping up
|
|
89
|
+
on the split_table_across_pages trap, where models tend to omit the
|
|
90
|
+
'$' even when the table header includes it.
|
|
91
|
+
"""
|
|
92
|
+
nh = _normalize(haystack)
|
|
93
|
+
nn = _normalize(needle)
|
|
94
|
+
if nn in nh:
|
|
95
|
+
return True
|
|
96
|
+
nh_stripped = _strip_currency(nh)
|
|
97
|
+
nn_stripped = _strip_currency(nn)
|
|
98
|
+
if nn_stripped != nn and nn_stripped in nh_stripped:
|
|
99
|
+
return True
|
|
100
|
+
return False
|
|
73
101
|
|
|
74
102
|
|
|
75
103
|
@dataclass(slots=True)
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pdfhell
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge.
|
|
5
5
|
Author: Multivon
|
|
6
6
|
License: Apache-2.0
|
|
7
|
-
Project-URL: Homepage, https://
|
|
7
|
+
Project-URL: Homepage, https://multivon.ai/pdfhell
|
|
8
8
|
Project-URL: Repository, https://github.com/multivon-ai/pdfhell
|
|
9
9
|
Project-URL: Issues, https://github.com/multivon-ai/pdfhell/issues
|
|
10
|
-
Project-URL: Leaderboard, https://
|
|
10
|
+
Project-URL: Leaderboard, https://multivon.ai/leaderboard
|
|
11
|
+
Project-URL: Documentation, https://docs.multivon.ai/pdfhell/quickstart
|
|
11
12
|
Keywords: llm,evaluation,pdf,multimodal,benchmark,adversarial,document-ai,rag
|
|
12
13
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
14
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -208,7 +209,7 @@ Target full suite: 10 trap families, ~50 cases.
|
|
|
208
209
|
|
|
209
210
|
For document-AI teams who need adversarial test cases tailored to *their* templates (claims forms, MSAs, medical records, KYC docs), there's a hosted generator that takes your templates and produces adversarial variants with code-based ground truth — same methodology, your data shape.
|
|
210
211
|
|
|
211
|
-
Email `hello@multivon.ai` for early access, or see [multivon.ai/
|
|
212
|
+
Email `hello@multivon.ai` for early access, or see [multivon.ai/commercial](https://multivon.ai/commercial).
|
|
212
213
|
|
|
213
214
|
## Installing
|
|
214
215
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pdfhell"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.3"
|
|
8
8
|
description = "PDF Hell — adversarial PDFs that break AI document readers. Procedural ground truth, not LLM-as-judge."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -44,10 +44,11 @@ all = []
|
|
|
44
44
|
pdfhell = "pdfhell.cli:main"
|
|
45
45
|
|
|
46
46
|
[project.urls]
|
|
47
|
-
Homepage = "https://
|
|
47
|
+
Homepage = "https://multivon.ai/pdfhell"
|
|
48
48
|
Repository = "https://github.com/multivon-ai/pdfhell"
|
|
49
49
|
Issues = "https://github.com/multivon-ai/pdfhell/issues"
|
|
50
|
-
Leaderboard = "https://
|
|
50
|
+
Leaderboard = "https://multivon.ai/leaderboard"
|
|
51
|
+
Documentation = "https://docs.multivon.ai/pdfhell/quickstart"
|
|
51
52
|
|
|
52
53
|
[tool.setuptools.packages.find]
|
|
53
54
|
include = ["pdfhell*"]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Regression test: currency-prefix tolerance in score_case.
|
|
2
|
+
|
|
3
|
+
Caught from user audit — GPT-4o output '780,803.18' for expected
|
|
4
|
+
'$780,803.18' was previously marked incorrect. With the
|
|
5
|
+
_strip_currency fallback in _contains_loose, both the prefixed and
|
|
6
|
+
unprefixed forms now match (in either direction).
|
|
7
|
+
"""
|
|
8
|
+
from pdfhell.case import HellCase
|
|
9
|
+
from pdfhell.scorer import score_case
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _case(expected: str, forbidden=()) -> HellCase:
|
|
13
|
+
return HellCase(
|
|
14
|
+
id="t",
|
|
15
|
+
trap_family="hidden_ocr_mismatch",
|
|
16
|
+
seed=1,
|
|
17
|
+
question="q?",
|
|
18
|
+
expected_answer=expected,
|
|
19
|
+
forbidden_answers=list(forbidden),
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestCurrencyTolerance:
|
|
24
|
+
def test_unprefixed_output_matches_dollar_expected(self):
|
|
25
|
+
s = score_case(_case("$780,803.18"), "The total is 780,803.18.")
|
|
26
|
+
assert s.correct
|
|
27
|
+
assert s.matched_expected
|
|
28
|
+
|
|
29
|
+
def test_dollar_output_matches_dollar_expected(self):
|
|
30
|
+
s = score_case(_case("$780,803.18"), "Total: $780,803.18")
|
|
31
|
+
assert s.correct
|
|
32
|
+
|
|
33
|
+
def test_unprefixed_expected_matches_dollar_output(self):
|
|
34
|
+
s = score_case(_case("780,803.18"), "The amount is $780,803.18.")
|
|
35
|
+
assert s.correct
|
|
36
|
+
|
|
37
|
+
def test_euro_prefix_tolerated(self):
|
|
38
|
+
s = score_case(_case("€1,234.56"), "Refund: 1,234.56")
|
|
39
|
+
assert s.correct
|
|
40
|
+
|
|
41
|
+
def test_does_not_match_wrong_number(self):
|
|
42
|
+
s = score_case(_case("$780,803.18"), "Total: $780,000.")
|
|
43
|
+
assert not s.correct
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|