pdfhell 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ """Shared utilities for trap-family generators.
2
+
3
+ Each helper here is a small, well-typed primitive that the per-trap
4
+ generators compose. The aim is that adding a new trap family means
5
+ writing one new file under ``pdfhell/generators/`` and registering it in
6
+ ``__init__.py`` — without copy-pasting reportlab boilerplate.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import io
11
+ import random
12
+ from dataclasses import dataclass
13
+ from typing import Iterable, Sequence
14
+
15
+ from reportlab.lib.pagesizes import LETTER
16
+ from reportlab.lib.styles import getSampleStyleSheet
17
+ from reportlab.pdfgen import canvas
18
+
19
+
20
+ # Letter portrait. We keep the page size constant across traps so visual
21
+ # scoring across the suite is comparable.
22
+ PAGE_WIDTH, PAGE_HEIGHT = LETTER
23
+
24
+
25
+ @dataclass(slots=True)
26
+ class FontSpec:
27
+ """Font selection + size."""
28
+
29
+ family: str = "Helvetica"
30
+ size: float = 11.0
31
+ bold: bool = False
32
+
33
+ @property
34
+ def name(self) -> str:
35
+ return f"{self.family}-Bold" if self.bold else self.family
36
+
37
+
38
+ # A reproducible RNG that's seeded per-case. Generators must use this
39
+ # (not the global ``random``) so byte-identical PDFs come out of byte-
40
+ # identical seeds even when many generators run in the same process.
41
+ def rng_for(seed: int) -> random.Random:
42
+ return random.Random(seed)
43
+
44
+
45
+ def draw_paragraph(
46
+ c: "canvas.Canvas",
47
+ text: str,
48
+ x: float,
49
+ y: float,
50
+ *,
51
+ width: float = PAGE_WIDTH - 144,
52
+ font: FontSpec = FontSpec(),
53
+ leading: float | None = None,
54
+ ) -> float:
55
+ """Draw wrapped text with simple word-wrap. Returns the next free y.
56
+
57
+ We hand-roll wrapping rather than using Platypus flowables because
58
+ every trap family wants pixel-precise control over where text lands
59
+ (especially for tiny footnotes and split-table headers). Platypus
60
+ would fight us. canvas.drawString gives us the control.
61
+ """
62
+ leading = leading or (font.size * 1.25)
63
+ c.setFont(font.name, font.size)
64
+ words = text.split()
65
+ current: list[str] = []
66
+
67
+ def line_width(parts: list[str]) -> float:
68
+ return c.stringWidth(" ".join(parts), font.name, font.size)
69
+
70
+ cursor_y = y
71
+ for word in words:
72
+ current.append(word)
73
+ if line_width(current) > width:
74
+ current.pop()
75
+ c.drawString(x, cursor_y, " ".join(current))
76
+ cursor_y -= leading
77
+ current = [word]
78
+ if current:
79
+ c.drawString(x, cursor_y, " ".join(current))
80
+ cursor_y -= leading
81
+ return cursor_y
82
+
83
+
84
+ def draw_invisible_text(c: "canvas.Canvas", text: str, x: float, y: float, *, size: float = 11.0) -> None:
85
+ """Place a string in the PDF text stream that is invisible to the eye.
86
+
87
+ This is the core trick behind :mod:`hidden_ocr_mismatch`. PDFs can
88
+ contain text rendered as invisible (render mode 3 — neither stroke
89
+ nor fill). A human reader sees nothing. An OCR/text-extraction
90
+ pipeline that reads the underlying text stream sees the invisible
91
+ string. A vision-only model reads the page's pixels. A
92
+ text-extraction pipeline reads the invisible layer. The two answers
93
+ diverge.
94
+
95
+ This is exactly how scanned-then-re-OCR'd PDFs go wrong in the wild
96
+ — the OCR layer can drift from the rendered page. Procedurally
97
+ constructing this means we *know* both answers and can score either
98
+ correctly.
99
+ """
100
+ text_obj = c.beginText(x, y)
101
+ text_obj.setFont("Helvetica", size)
102
+ # Render mode 3 = neither stroke nor fill, so the glyphs are placed
103
+ # in the text content stream but never rasterised. Visible text is
104
+ # mode 0.
105
+ text_obj.setTextRenderMode(3)
106
+ text_obj.textOut(text)
107
+ c.drawText(text_obj)
108
+
109
+
110
+ def draw_table(
111
+ c: "canvas.Canvas",
112
+ rows: Sequence[Sequence[str]],
113
+ x: float,
114
+ y: float,
115
+ *,
116
+ col_widths: Sequence[float] | None = None,
117
+ row_height: float = 24,
118
+ font: FontSpec = FontSpec(size=10),
119
+ header_bold: bool = True,
120
+ ) -> float:
121
+ """Draw a borderless monospaced table. Returns the next free y.
122
+
123
+ Each generator that needs tables uses this to avoid reportlab's
124
+ Platypus tables (which paginate awkwardly when we explicitly *want*
125
+ to split a row across a page boundary).
126
+ """
127
+ if not rows:
128
+ return y
129
+ if col_widths is None:
130
+ col_count = max(len(r) for r in rows)
131
+ col_widths = [(PAGE_WIDTH - 144) / col_count] * col_count
132
+ for i, row in enumerate(rows):
133
+ cur_x = x
134
+ is_header = i == 0
135
+ c.setFont(
136
+ "Helvetica-Bold" if (is_header and header_bold) else font.name,
137
+ font.size,
138
+ )
139
+ for cell, w in zip(row, col_widths):
140
+ c.drawString(cur_x, y, cell)
141
+ cur_x += w
142
+ y -= row_height
143
+ return y
144
+
145
+
146
+ def page_break(c: "canvas.Canvas") -> None:
147
+ c.showPage()
148
+
149
+
150
+ def canvas_to_bytes(make: "Callable[[canvas.Canvas], None]") -> bytes: # noqa: F821
151
+ """Run a draw routine against a fresh canvas and return the bytes.
152
+
153
+ Centralised so every generator does ``return canvas_to_bytes(draw)``
154
+ rather than duplicating BytesIO + canvas wiring.
155
+
156
+ ``invariant=True`` is non-negotiable: it tells reportlab to zero out
157
+ the creation timestamp and use a deterministic document ID, so the
158
+ same generator + seed always produces byte-identical PDFs. Without
159
+ this, the strategy memo's reproducibility claim is a lie and the
160
+ published leaderboard can't be re-derived.
161
+ """
162
+ buf = io.BytesIO()
163
+ c = canvas.Canvas(buf, pagesize=LETTER, invariant=True)
164
+ make(c)
165
+ c.save()
166
+ return buf.getvalue()
167
+
168
+
169
+ def fmt_money(amount: int | float, currency: str = "$") -> str:
170
+ """Render money in a stable format. Generators set the *expected
171
+ answer* using this exact function so the answer string and the
172
+ rendered PDF text agree to the byte."""
173
+ return f"{currency}{amount:,.2f}"
174
+
175
+
176
+ def pick_from(rng: random.Random, choices: Iterable):
177
+ """Convenience for picking one element from an iterable using rng.
178
+
179
+ ``random.Random.choice`` requires a sequence; this lets generators
180
+ pass generators/sets without converting upfront.
181
+ """
182
+ items = list(choices)
183
+ return rng.choice(items)
@@ -0,0 +1,212 @@
1
+ """Trap family: footnote override.
2
+
3
+ The body of the document states a contractual position confidently — for
4
+ example, *"Customer's liability shall be capped at twelve (12) months of
5
+ fees paid."* — but a 6pt footnote near the bottom of the page overrides
6
+ it: *"Notwithstanding the foregoing, liability for breaches of Sections
7
+ 4.2 (Confidentiality) and 7.1 (Data Protection) shall be uncapped."*
8
+
9
+ A model that summarises the document by reading only the body will state
10
+ the cap is 12 months. The correct answer is "12 months, except for
11
+ breaches of Sections 4.2 and 7.1, which are uncapped." Missing the
12
+ footnote is the single most common failure mode for legal/contract AI
13
+ agents.
14
+
15
+ We procedurally fabricate this so the *exact* set of carve-outs is in
16
+ the answer key.
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import random
21
+
22
+ from reportlab.pdfgen import canvas
23
+
24
+ from ..case import HellCase
25
+ from . import _common as C
26
+
27
+
28
+ _CONTRACT_TYPES = [
29
+ "Master Services Agreement",
30
+ "Software License Agreement",
31
+ "Data Processing Addendum",
32
+ "Subscription Order Form",
33
+ "Statement of Work #4",
34
+ ]
35
+
36
+ _BODY_POSITIONS = [
37
+ # (label, body_text_template, footnote_template, expected_answer_template)
38
+ (
39
+ "liability_cap",
40
+ "The aggregate liability of either party for any claims arising out of or relating to "
41
+ "this Agreement shall not exceed an amount equal to {months} months of fees paid by Customer "
42
+ "during the twelve (12) month period immediately preceding the event giving rise to such liability.",
43
+ "Notwithstanding Section {section_num}, liability arising from "
44
+ "Sections {carveout_sections} shall be uncapped.",
45
+ "Liability is capped at {months} months of fees paid, EXCEPT that liability arising from "
46
+ "Sections {carveout_sections} is uncapped.",
47
+ "Liability is capped at {months} months of fees paid.", # the wrong/forbidden answer
48
+ ),
49
+ (
50
+ "termination_notice",
51
+ "Either party may terminate this Agreement for convenience upon "
52
+ "{notice_days} days written notice to the other party.",
53
+ "However, termination for convenience is not permitted during the "
54
+ "initial twelve (12) month term.",
55
+ "Either party may terminate for convenience on {notice_days} days notice, "
56
+ "BUT NOT during the initial 12-month term.",
57
+ "Either party may terminate for convenience on {notice_days} days notice.",
58
+ ),
59
+ (
60
+ "data_residency",
61
+ "Customer Data shall be stored and processed in the {primary_region} region.",
62
+ "Provided that, with Customer's written consent, Customer Data may also "
63
+ "be processed in {fallback_region} for purposes of disaster recovery.",
64
+ "Customer Data is stored in {primary_region}, with disaster-recovery "
65
+ "processing permitted in {fallback_region} ONLY with written consent.",
66
+ "Customer Data is stored in {primary_region}.",
67
+ ),
68
+ ]
69
+
70
+
71
+ def _random_sections(rng: random.Random) -> tuple[str, str]:
72
+ """Return ``(section_num, carveout_sections)`` for the carve-out clause."""
73
+ sec = f"{rng.randint(8, 14)}.{rng.randint(1, 5)}"
74
+ carve = ", ".join(
75
+ sorted(
76
+ {
77
+ f"{rng.randint(2, 7)}.{rng.randint(1, 4)}"
78
+ for _ in range(rng.randint(2, 3))
79
+ }
80
+ )
81
+ )
82
+ return sec, carve
83
+
84
+
85
+ def generate(seed: int) -> tuple[bytes, HellCase]:
86
+ rng = C.rng_for(seed)
87
+ contract = rng.choice(_CONTRACT_TYPES)
88
+ label, body_tpl, footnote_tpl, expected_tpl, wrong_tpl = rng.choice(_BODY_POSITIONS)
89
+
90
+ # Bind the per-template parameters.
91
+ # expected_tokens are the substrings any acceptable prose answer must
92
+ # contain — facts, not phrasing. The scorer requires ALL tokens.
93
+ if label == "liability_cap":
94
+ months = rng.choice([3, 6, 12, 24])
95
+ section_num, carveout_sections = _random_sections(rng)
96
+ ctx = {"months": months, "section_num": section_num, "carveout_sections": carveout_sections}
97
+ question = (
98
+ f"Read the attached {contract}. What is the LIABILITY CAP "
99
+ "and what carve-outs (if any) apply? Be precise about which Sections are uncapped."
100
+ )
101
+ # Acceptable: any prose that includes (1) the cap value, (2) the
102
+ # carve-out section refs, (3) the word "uncapped" or equivalent.
103
+ expected_tokens = [
104
+ f"{months} month",
105
+ "uncapped",
106
+ *carveout_sections.split(", "),
107
+ ]
108
+ elif label == "termination_notice":
109
+ notice_days = rng.choice([30, 60, 90])
110
+ ctx = {"notice_days": notice_days}
111
+ question = (
112
+ f"Read the attached {contract}. Under what conditions can either party "
113
+ "terminate this Agreement for convenience? Be specific about any restrictions."
114
+ )
115
+ expected_tokens = [
116
+ f"{notice_days} day",
117
+ "12 month", # the initial-term restriction
118
+ ]
119
+ else: # data_residency
120
+ primary_region = rng.choice(["us-east-1", "eu-west-1", "ap-southeast-2"])
121
+ fallback_region = rng.choice(["us-west-2", "eu-central-1", "ap-northeast-1"])
122
+ while fallback_region == primary_region:
123
+ fallback_region = rng.choice(["us-west-2", "eu-central-1", "ap-northeast-1"])
124
+ ctx = {"primary_region": primary_region, "fallback_region": fallback_region}
125
+ question = (
126
+ f"Read the attached {contract}. Where is Customer Data stored, "
127
+ "and under what conditions (if any) may it be processed elsewhere?"
128
+ )
129
+ expected_tokens = [
130
+ primary_region,
131
+ fallback_region,
132
+ "consent",
133
+ ]
134
+
135
+ body_text = body_tpl.format(**ctx)
136
+ footnote_text = footnote_tpl.format(**ctx)
137
+ expected_answer = expected_tpl.format(**ctx)
138
+ wrong_answer = wrong_tpl.format(**ctx)
139
+
140
+ section_index = rng.randint(8, 14)
141
+ section_label = f"{section_index}.{rng.randint(1, 5)}"
142
+
143
+ case_id = f"footnote_override-{seed:04d}"
144
+
145
+ def draw(c: canvas.Canvas) -> None:
146
+ # Heading
147
+ c.setFont("Helvetica-Bold", 16)
148
+ c.drawString(72, 720, contract.upper())
149
+ c.setFont("Helvetica-Oblique", 10)
150
+ c.drawString(72, 700, f"Effective Date: 2026-{rng.randint(1, 12):02d}-{rng.randint(1, 28):02d}")
151
+
152
+ # Intro paragraph (filler so the doc looks normal)
153
+ intro = (
154
+ "This Agreement is entered into between the Customer and Vendor (each a "
155
+ '"Party" and collectively the "Parties") and governs the Parties\' '
156
+ "respective rights and obligations with respect to the Services described in the Order Form. "
157
+ "Capitalised terms used but not defined herein have the meanings given in the Order Form."
158
+ )
159
+ y = C.draw_paragraph(c, intro, 72, 670, font=C.FontSpec(size=10))
160
+
161
+ # The clause of interest (body)
162
+ c.setFont("Helvetica-Bold", 11)
163
+ c.drawString(72, y - 10, f"{section_label} Limitation.")
164
+ y = C.draw_paragraph(
165
+ c,
166
+ body_text + f"¹", # superscript 1 — the footnote marker
167
+ 72, y - 30,
168
+ font=C.FontSpec(size=11),
169
+ )
170
+
171
+ # More filler so the footnote isn't suspiciously isolated
172
+ filler = (
173
+ "Each Party shall comply with all applicable laws and regulations in connection with "
174
+ "its performance under this Agreement and shall promptly notify the other Party of any "
175
+ "material non-compliance of which it becomes aware. The provisions of this Section shall "
176
+ "survive termination of this Agreement."
177
+ )
178
+ y = C.draw_paragraph(c, filler, 72, y - 10, font=C.FontSpec(size=10))
179
+
180
+ # The footnote (6pt — the trap)
181
+ # We deliberately place it near the bottom of page 1 to mimic
182
+ # real legal-doc layout where footnotes get visually compressed.
183
+ c.setFont("Helvetica", 6)
184
+ c.drawString(72, 100, f"¹ {footnote_text}")
185
+
186
+ # Page number
187
+ c.setFont("Helvetica", 9)
188
+ c.drawCentredString(C.PAGE_WIDTH / 2, 60, "Page 1 of 1")
189
+
190
+ pdf_bytes = C.canvas_to_bytes(draw)
191
+
192
+ case = HellCase(
193
+ id=case_id,
194
+ trap_family="footnote_override",
195
+ seed=seed,
196
+ question=question,
197
+ expected_answer=expected_answer,
198
+ expected_tokens=expected_tokens,
199
+ forbidden_answers=[wrong_answer],
200
+ metadata={
201
+ "contract_type": contract,
202
+ "clause_label": label,
203
+ "section_label": section_label,
204
+ "params": ctx,
205
+ "footnote_text": footnote_text,
206
+ "expected_failure_mode": (
207
+ "Model reads the body clause and ignores the 6pt footnote, missing the "
208
+ "material carve-out / exception."
209
+ ),
210
+ },
211
+ )
212
+ return pdf_bytes, case
@@ -0,0 +1,129 @@
1
+ """Trap family: hidden OCR mismatch.
2
+
3
+ The PDF *looks* like an invoice with an amount of ``$X``. But beneath
4
+ that visible glyph is an invisible text layer that says ``$Y`` instead
5
+ (rendered with PDF text render mode 3 — placed in the text content
6
+ stream but never rasterised).
7
+
8
+ A vision-only model reads the rendered pixels and answers ``$X``. A
9
+ text-extraction pipeline (pdfminer, PyMuPDF, pdfplumber, most RAG
10
+ loaders) reads the invisible layer and answers ``$Y``. An agent that
11
+ combines both — without resolving the conflict — answers whichever the
12
+ final stage trusted.
13
+
14
+ This is the most common silent failure mode for "PDF understanding" in
15
+ production: a scanned-then-OCR'd document where the OCR layer disagrees
16
+ with the rendered page. We procedurally fabricate it so we know exactly
17
+ which answer is correct (the visible one) and which answer is the
18
+ specific failure mode the trap was designed to catch (the hidden one).
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import random
23
+
24
+ from reportlab.pdfgen import canvas
25
+
26
+ from ..case import HellCase
27
+ from . import _common as C
28
+
29
+
30
+ # Visible amounts are sampled from a realistic invoice distribution.
31
+ # The hidden amount is *not* a small perturbation — it's a meaningfully
32
+ # different value, because if the agent's downstream user processes it,
33
+ # the dollar gap should be visible.
34
+ _VENDORS = [
35
+ "Acme Industrial Supplies LLC",
36
+ "Northwind Logistics Inc.",
37
+ "Globex Manufacturing Co.",
38
+ "Initech Software Solutions",
39
+ "Hooli Cloud Services, Inc.",
40
+ "Soylent Foods Corp.",
41
+ "Wayne Enterprises LP",
42
+ "Stark Industries Inc.",
43
+ "Tyrell Robotics Group",
44
+ "Cyberdyne Systems Inc.",
45
+ ]
46
+
47
+
48
+ def generate(seed: int) -> tuple[bytes, HellCase]:
49
+ rng = C.rng_for(seed)
50
+
51
+ # Two distinct amounts; the visible one is the ground truth.
52
+ visible_amount = rng.choice([1234.56, 2750.00, 4980.40, 7820.99, 12345.67, 18900.25])
53
+ hidden_amount = visible_amount + rng.choice([1000, 2000, 5000, 10000])
54
+ vendor = rng.choice(_VENDORS)
55
+ invoice_no = f"INV-{rng.randint(100000, 999999)}"
56
+ due_day = rng.randint(1, 28)
57
+
58
+ visible_str = C.fmt_money(visible_amount)
59
+ hidden_str = C.fmt_money(hidden_amount)
60
+
61
+ case_id = f"hidden_ocr_mismatch-{seed:04d}"
62
+ question = (
63
+ f"You are an accounts-payable assistant. The attached PDF is invoice "
64
+ f"{invoice_no} from {vendor}. What is the TOTAL AMOUNT DUE? "
65
+ "Respond with only the amount including currency symbol, no other text."
66
+ )
67
+
68
+ def draw(c: canvas.Canvas) -> None:
69
+ # Header
70
+ c.setFont("Helvetica-Bold", 18)
71
+ c.drawString(72, 720, "INVOICE")
72
+ c.setFont("Helvetica", 11)
73
+ c.drawString(72, 700, vendor)
74
+ c.drawString(72, 685, f"Invoice #: {invoice_no}")
75
+ c.drawString(72, 670, f"Due: 2026-08-{due_day:02d}")
76
+
77
+ # Line items (3-4 plausible rows so the doc doesn't look skeletal)
78
+ C.draw_paragraph(
79
+ c,
80
+ "ITEM QTY UNIT LINE TOTAL",
81
+ 72, 620,
82
+ font=C.FontSpec(size=10, bold=True),
83
+ )
84
+ items = [
85
+ ("Configuration consulting ", 4, 250.00, 1000.00),
86
+ ("On-site implementation ", 1, 1200.00, 1200.00),
87
+ ("Annual support (Tier 2) ", 1, visible_amount - 2200, visible_amount - 2200),
88
+ ]
89
+ y = 600
90
+ for desc, qty, unit, total in items:
91
+ c.drawString(72, y, f"{desc} {qty:>4} {C.fmt_money(unit):>10} {C.fmt_money(total):>12}")
92
+ y -= 16
93
+
94
+ # The "TOTAL DUE" line is what we're asking the model about.
95
+ c.setFont("Helvetica-Bold", 13)
96
+ c.drawString(72, y - 30, "TOTAL DUE:")
97
+ c.drawString(220, y - 30, visible_str)
98
+
99
+ # The trap: place the *wrong* amount in an invisible text layer.
100
+ # It lands at the same coordinate as the visible total, so a
101
+ # text-extraction pipeline that reads the underlying text stream
102
+ # by position would see the hidden value, not the visible one.
103
+ C.draw_invisible_text(c, hidden_str, x=220, y=y - 30, size=13)
104
+
105
+ # Footer
106
+ c.setFont("Helvetica-Oblique", 9)
107
+ c.drawString(72, 90, "Thank you for your business. Net 30 from invoice date.")
108
+
109
+ pdf_bytes = C.canvas_to_bytes(draw)
110
+
111
+ case = HellCase(
112
+ id=case_id,
113
+ trap_family="hidden_ocr_mismatch",
114
+ seed=seed,
115
+ question=question,
116
+ expected_answer=visible_str,
117
+ forbidden_answers=[hidden_str],
118
+ metadata={
119
+ "vendor": vendor,
120
+ "invoice_no": invoice_no,
121
+ "visible_amount": visible_amount,
122
+ "hidden_amount": hidden_amount,
123
+ "expected_failure_mode": (
124
+ "Model answers the hidden-OCR amount when it should answer the visible amount; "
125
+ "indicates the model trusted a text-extraction layer over the rendered page."
126
+ ),
127
+ },
128
+ )
129
+ return pdf_bytes, case
@@ -0,0 +1,174 @@
1
+ """Trap family: split table across pages.
2
+
3
+ A 6-column financial / inventory / pricing table is drawn so that the
4
+ column *header* row appears at the bottom of page 1 and the *body* rows
5
+ appear at the top of page 2. The visual continuity is obvious to a
6
+ human flipping pages but breaks every document-pipeline that processes
7
+ pages independently (most RAG loaders, most OCR pipelines).
8
+
9
+ The trap question asks the model about a specific cell — e.g. "What is
10
+ the Q3 Net Revenue for the Northwest region?". A model that loses the
11
+ header context on page 2 will either confuse columns (returning Gross
12
+ Revenue or Operating Income instead) or refuse to answer. Procedural
13
+ ground truth means we know exactly which column the answer is in.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import random
18
+
19
+ from reportlab.pdfgen import canvas
20
+
21
+ from ..case import HellCase
22
+ from . import _common as C
23
+
24
+
25
+ _REGIONS = ["Northwest", "Northeast", "Southwest", "Southeast", "Central"]
26
+ _QUARTERS = ["Q1", "Q2", "Q3", "Q4"]
27
+ _COLUMNS = [
28
+ ("Region", "region"),
29
+ ("Quarter", "quarter"),
30
+ ("Gross Revenue", "gross"),
31
+ ("Cost of Goods", "cogs"),
32
+ ("Operating Income", "op_income"),
33
+ ("Net Revenue", "net"),
34
+ ]
35
+
36
+
37
+ def _generate_row(rng: random.Random) -> dict:
38
+ region = rng.choice(_REGIONS)
39
+ quarter = rng.choice(_QUARTERS)
40
+ gross = round(rng.uniform(800_000, 5_000_000), 2)
41
+ cogs = round(gross * rng.uniform(0.35, 0.55), 2)
42
+ op_income = round(gross * rng.uniform(0.15, 0.30), 2)
43
+ net = round(op_income - rng.uniform(20_000, 80_000), 2)
44
+ return {
45
+ "region": region,
46
+ "quarter": quarter,
47
+ "gross": gross,
48
+ "cogs": cogs,
49
+ "op_income": op_income,
50
+ "net": net,
51
+ }
52
+
53
+
54
+ def generate(seed: int) -> tuple[bytes, HellCase]:
55
+ rng = C.rng_for(seed)
56
+
57
+ # Build 8 unique (region, quarter) rows.
58
+ seen: set[tuple[str, str]] = set()
59
+ rows: list[dict] = []
60
+ while len(rows) < 8:
61
+ row = _generate_row(rng)
62
+ key = (row["region"], row["quarter"])
63
+ if key in seen:
64
+ continue
65
+ seen.add(key)
66
+ rows.append(row)
67
+
68
+ # The case asks about ONE specific row and ONE specific column.
69
+ target_row = rng.choice(rows)
70
+ target_column_label, target_column_key = rng.choice(_COLUMNS[2:]) # skip region/quarter
71
+ expected_value = target_row[target_column_key]
72
+ expected_str = C.fmt_money(expected_value)
73
+
74
+ # The most plausible *wrong* answer is the value from an adjacent
75
+ # column in the same row (the "column-confusion" failure mode that
76
+ # page-split tables specifically elicit).
77
+ other_money_cols = [k for _, k in _COLUMNS[2:] if k != target_column_key]
78
+ wrong_col = rng.choice(other_money_cols)
79
+ wrong_str = C.fmt_money(target_row[wrong_col])
80
+
81
+ case_id = f"split_table_across_pages-{seed:04d}"
82
+ question = (
83
+ f"The attached PDF contains a financial-results table. "
84
+ f"What was the {target_column_label} for the {target_row['region']} region in "
85
+ f"{target_row['quarter']} of 2026? Respond with only the dollar amount, no other text."
86
+ )
87
+
88
+ def draw(c: canvas.Canvas) -> None:
89
+ # Page 1 — intro + header row at the bottom (the trap)
90
+ c.setFont("Helvetica-Bold", 16)
91
+ c.drawString(72, 720, "FY2026 REGIONAL FINANCIAL SUMMARY")
92
+ c.setFont("Helvetica", 10)
93
+ C.draw_paragraph(
94
+ c,
95
+ "The following table summarises gross and net revenue, cost of goods sold, and operating "
96
+ "income by region and quarter for fiscal year 2026. All amounts are reported in USD "
97
+ "and exclude inter-regional transfers. See Appendix B for the methodology used to allocate "
98
+ "shared infrastructure costs across regions.",
99
+ 72, 690,
100
+ font=C.FontSpec(size=10),
101
+ )
102
+
103
+ # Drop some filler so the header naturally ends up near the bottom
104
+ C.draw_paragraph(
105
+ c,
106
+ "Note that Q3 results reflect the regional reorganisation announced in our Q2 earnings "
107
+ "call. Comparisons to prior years should account for the boundary shift between the "
108
+ "Northwest and Central regions effective 2026-07-01.",
109
+ 72, 620,
110
+ font=C.FontSpec(size=10),
111
+ )
112
+
113
+ # Header row at the bottom of page 1
114
+ col_widths = [80, 60, 100, 100, 100, 100]
115
+ x_start = 72
116
+ header_y = 130
117
+ c.setFont("Helvetica-Bold", 10)
118
+ cx = x_start
119
+ for (label, _), w in zip(_COLUMNS, col_widths):
120
+ c.drawString(cx, header_y, label)
121
+ cx += w
122
+
123
+ # Page footer
124
+ c.setFont("Helvetica", 9)
125
+ c.drawCentredString(C.PAGE_WIDTH / 2, 60, "Page 1 of 2")
126
+
127
+ # Page break — body rows go on page 2 with no repeated header
128
+ C.page_break(c)
129
+
130
+ # Page 2 — the body rows, headerless
131
+ y = 720
132
+ c.setFont("Helvetica", 10)
133
+ for row in rows:
134
+ cx = x_start
135
+ cells = [
136
+ row["region"],
137
+ row["quarter"],
138
+ C.fmt_money(row["gross"]),
139
+ C.fmt_money(row["cogs"]),
140
+ C.fmt_money(row["op_income"]),
141
+ C.fmt_money(row["net"]),
142
+ ]
143
+ for cell, w in zip(cells, col_widths):
144
+ c.drawString(cx, y, cell)
145
+ cx += w
146
+ y -= 22
147
+
148
+ # Page footer
149
+ c.setFont("Helvetica", 9)
150
+ c.drawCentredString(C.PAGE_WIDTH / 2, 60, "Page 2 of 2")
151
+
152
+ pdf_bytes = C.canvas_to_bytes(draw)
153
+
154
+ case = HellCase(
155
+ id=case_id,
156
+ trap_family="split_table_across_pages",
157
+ seed=seed,
158
+ question=question,
159
+ expected_answer=expected_str,
160
+ forbidden_answers=[wrong_str],
161
+ metadata={
162
+ "target_region": target_row["region"],
163
+ "target_quarter": target_row["quarter"],
164
+ "target_column": target_column_label,
165
+ "target_column_key": target_column_key,
166
+ "expected_value": expected_value,
167
+ "row_count": len(rows),
168
+ "expected_failure_mode": (
169
+ "Model loses column-header context when reading page 2 in isolation; "
170
+ "returns a value from an adjacent column in the same row."
171
+ ),
172
+ },
173
+ )
174
+ return pdf_bytes, case