pdf-form-tools 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 רומן אוסטרובסקי
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdf-form-tools
3
+ Version: 2.0.0
4
+ Summary: Template-aware tools for filling scanned PDF forms with visual verification
5
+ License-Expression: MIT
6
+ Project-URL: Homepage, https://github.com/ceratops-code/pdf-form-tools
7
+ Project-URL: Repository, https://github.com/ceratops-code/pdf-form-tools
8
+ Project-URL: Issues, https://github.com/ceratops-code/pdf-form-tools/issues
9
+ Keywords: pdf,forms,opencv,pymupdf,document-processing
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Multimedia :: Graphics
18
+ Classifier: Topic :: Office/Business :: Office Suites
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: numpy
23
+ Requires-Dist: opencv-python-headless
24
+ Requires-Dist: pymupdf
25
+ Requires-Dist: pillow
26
+ Requires-Dist: pypdf
27
+ Requires-Dist: reportlab
28
+ Requires-Dist: python-bidi
29
+ Provides-Extra: dev
30
+ Requires-Dist: build>=1.2; extra == "dev"
31
+ Requires-Dist: pytest>=8.0; extra == "dev"
32
+ Requires-Dist: ruff>=0.6; extra == "dev"
33
+ Requires-Dist: twine>=5.1; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # pdf-form-tools
37
+
38
+ `pdf-form-tools` is an import-only Python package for filling layout-sensitive scanned PDF forms with deterministic placement helpers and visual verification primitives.
39
+
40
+ It is intentionally small:
41
+
42
+ - render PDF pages to raster images
43
+ - detect writable regions, checkbox boxes, signature lines, and ID slots
44
+ - draw text, checks, and signatures onto an overlay
45
+ - merge the overlay back into the original PDF
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ python -m pip install pdf-form-tools
51
+ ```
52
+
53
+ ## Example
54
+
55
+ ```python
56
+ from pathlib import Path
57
+
58
+ from pdf_form_tools import Rect, merge_overlay_pdf, render_pdf_page
59
+
60
+ source_pdf = Path("form.pdf")
61
+ preview_png = Path("preview-page1.png")
62
+ render_pdf_page(source_pdf, 0, 2, preview_png)
63
+
64
+ # draw your overlay separately, then merge it back
65
+ merge_overlay_pdf(source_pdf, Path("overlay-page1.png"), Path("form-filled.pdf"))
66
+ ```
67
+
68
+ ## Development
69
+
70
+ ```bash
71
+ python -m pip install -e ".[dev]"
72
+ python -m ruff check .
73
+ python -m pytest
74
+ python -m build
75
+ ```
76
+
77
+ ## Scope
78
+
79
+ This package contains reusable low-level helpers only. Form-specific filling flows belong in project-local scripts or thin runners, not in the shared library.
@@ -0,0 +1,44 @@
1
+ # pdf-form-tools
2
+
3
+ `pdf-form-tools` is an import-only Python package for filling layout-sensitive scanned PDF forms with deterministic placement helpers and visual verification primitives.
4
+
5
+ It is intentionally small:
6
+
7
+ - render PDF pages to raster images
8
+ - detect writable regions, checkbox boxes, signature lines, and ID slots
9
+ - draw text, checks, and signatures onto an overlay
10
+ - merge the overlay back into the original PDF
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ python -m pip install pdf-form-tools
16
+ ```
17
+
18
+ ## Example
19
+
20
+ ```python
21
+ from pathlib import Path
22
+
23
+ from pdf_form_tools import Rect, merge_overlay_pdf, render_pdf_page
24
+
25
+ source_pdf = Path("form.pdf")
26
+ preview_png = Path("preview-page1.png")
27
+ render_pdf_page(source_pdf, 0, 2, preview_png)
28
+
29
+ # draw your overlay separately, then merge it back
30
+ merge_overlay_pdf(source_pdf, Path("overlay-page1.png"), Path("form-filled.pdf"))
31
+ ```
32
+
33
+ ## Development
34
+
35
+ ```bash
36
+ python -m pip install -e ".[dev]"
37
+ python -m ruff check .
38
+ python -m pytest
39
+ python -m build
40
+ ```
41
+
42
+ ## Scope
43
+
44
+ This package contains reusable low-level helpers only. Form-specific filling flows belong in project-local scripts or thin runners, not in the shared library.
@@ -0,0 +1,55 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pdf-form-tools"
7
+ version = "2.0.0"
8
+ description = "Template-aware tools for filling scanned PDF forms with visual verification"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ keywords = ["pdf", "forms", "opencv", "pymupdf", "document-processing"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Operating System :: OS Independent",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Topic :: Multimedia :: Graphics",
23
+ "Topic :: Office/Business :: Office Suites",
24
+ ]
25
+ urls = { Homepage = "https://github.com/ceratops-code/pdf-form-tools", Repository = "https://github.com/ceratops-code/pdf-form-tools", Issues = "https://github.com/ceratops-code/pdf-form-tools/issues" }
26
+ dependencies = [
27
+ "numpy",
28
+ "opencv-python-headless",
29
+ "pymupdf",
30
+ "pillow",
31
+ "pypdf",
32
+ "reportlab",
33
+ "python-bidi",
34
+ ]
35
+
36
+ [project.optional-dependencies]
37
+ dev = [
38
+ "build>=1.2",
39
+ "pytest>=8.0",
40
+ "ruff>=0.6",
41
+ "twine>=5.1",
42
+ ]
43
+
44
+ [tool.setuptools.package-dir]
45
+ "" = "src"
46
+
47
+ [tool.setuptools.packages.find]
48
+ where = ["src"]
49
+ include = ["pdf_form_tools*"]
50
+
51
+ [tool.pytest.ini_options]
52
+ testpaths = ["tests"]
53
+
54
+ [tool.ruff]
55
+ target-version = "py311"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,27 @@
1
+ from .pdf_form_overlay import (
2
+ Rect,
3
+ centered_address_box,
4
+ detect_lines,
5
+ detect_square_boxes,
6
+ draw_check,
7
+ draw_id_number,
8
+ draw_text,
9
+ merge_overlay_pdf,
10
+ paste_signature,
11
+ render_pdf_page,
12
+ writable_box,
13
+ )
14
+
15
+ __all__ = [
16
+ "Rect",
17
+ "centered_address_box",
18
+ "detect_lines",
19
+ "detect_square_boxes",
20
+ "draw_check",
21
+ "draw_id_number",
22
+ "draw_text",
23
+ "merge_overlay_pdf",
24
+ "paste_signature",
25
+ "render_pdf_page",
26
+ "writable_box",
27
+ ]
@@ -0,0 +1,357 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from functools import lru_cache
5
+ from io import BytesIO
6
+ from pathlib import Path
7
+
8
+ import cv2
9
+ import fitz
10
+ import numpy as np
11
+ from bidi.algorithm import get_display
12
+ from PIL import Image, ImageDraw, ImageFont
13
+ from pypdf import PdfReader, PdfWriter
14
+ from reportlab.lib.utils import ImageReader
15
+ from reportlab.pdfgen import canvas
16
+
17
+
18
+ TEXT_COLOR = (20, 20, 20, 255)
19
+ FONT_CANDIDATES = {
20
+ False: [
21
+ Path(r"C:\Windows\Fonts\arial.ttf"),
22
+ Path("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"),
23
+ Path("/usr/share/fonts/truetype/liberation2/LiberationSans-Regular.ttf"),
24
+ Path("/Library/Fonts/Arial.ttf"),
25
+ Path("/System/Library/Fonts/Supplemental/Arial.ttf"),
26
+ ],
27
+ True: [
28
+ Path(r"C:\Windows\Fonts\arialbd.ttf"),
29
+ Path("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"),
30
+ Path("/usr/share/fonts/truetype/liberation2/LiberationSans-Bold.ttf"),
31
+ Path("/Library/Fonts/Arial Bold.ttf"),
32
+ Path("/System/Library/Fonts/Supplemental/Arial Bold.ttf"),
33
+ ],
34
+ }
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class Rect:
39
+ x: int
40
+ y: int
41
+ w: int
42
+ h: int
43
+
44
+ @property
45
+ def x2(self) -> int:
46
+ return self.x + self.w
47
+
48
+ @property
49
+ def y2(self) -> int:
50
+ return self.y + self.h
51
+
52
+ def inset(self, dx: int, dy: int | None = None) -> "Rect":
53
+ if dy is None:
54
+ dy = dx
55
+ return Rect(self.x + dx, self.y + dy, self.w - dx * 2, self.h - dy * 2)
56
+
57
+
58
+ def contains_hebrew(text: str) -> bool:
59
+ return any("\u0590" <= ch <= "\u05FF" for ch in text)
60
+
61
+
62
+ def visual_text(text: str) -> str:
63
+ return get_display(text) if contains_hebrew(text) else text
64
+
65
+
66
+ @lru_cache(maxsize=2)
67
+ def resolve_font_path(bold: bool = False) -> Path:
68
+ for candidate in FONT_CANDIDATES[bold]:
69
+ if candidate.exists():
70
+ return candidate
71
+ raise FileNotFoundError(f"Could not find a usable {'bold' if bold else 'regular'} TrueType font.")
72
+
73
+
74
+ def load_font(size: int, bold: bool = False) -> ImageFont.ImageFont:
75
+ font_path = resolve_font_path(bold=bold)
76
+ return ImageFont.truetype(str(font_path), size)
77
+
78
+
79
+ def close_small_gaps(mask: np.ndarray, max_gap: int = 4) -> np.ndarray:
80
+ result = mask.copy()
81
+ start = None
82
+ for idx, value in enumerate(mask):
83
+ if not value and start is None:
84
+ start = idx
85
+ elif value and start is not None:
86
+ if idx - start <= max_gap:
87
+ result[start:idx] = True
88
+ start = None
89
+ if start is not None and len(mask) - start <= max_gap:
90
+ result[start:] = True
91
+ return result
92
+
93
+
94
+ def longest_true_segment(mask: np.ndarray, min_len: int) -> tuple[int, int] | None:
95
+ best = None
96
+ start = None
97
+ for idx, value in enumerate(mask):
98
+ if value and start is None:
99
+ start = idx
100
+ elif not value and start is not None:
101
+ if idx - start >= min_len and (best is None or idx - start > best[1] - best[0]):
102
+ best = (start, idx)
103
+ start = None
104
+ if start is not None and len(mask) - start >= min_len:
105
+ candidate = (start, len(mask))
106
+ if best is None or candidate[1] - candidate[0] > best[1] - best[0]:
107
+ best = candidate
108
+ return best
109
+
110
+
111
+ def writable_box(page_gray: np.ndarray, rect: Rect, row_threshold: float = 0.015, col_threshold: float = 0.03) -> Rect:
112
+ inner = rect.inset(8)
113
+ crop = page_gray[inner.y:inner.y2, inner.x:inner.x2]
114
+ ink = crop < 185
115
+
116
+ row_density = ink.mean(axis=1)
117
+ row_mask = row_density < row_threshold
118
+ row_mask[:4] = False
119
+ row_mask[-4:] = False
120
+ row_mask = close_small_gaps(row_mask, max_gap=5)
121
+ row_segment = longest_true_segment(row_mask, min_len=max(18, crop.shape[0] // 6))
122
+ if row_segment is None:
123
+ row_segment = (crop.shape[0] // 3, crop.shape[0] - 12)
124
+
125
+ band = crop[row_segment[0]:row_segment[1], :]
126
+ band_ink = band < 185
127
+ col_density = band_ink.mean(axis=0)
128
+ col_mask = col_density < col_threshold
129
+ col_mask[:6] = False
130
+ col_mask[-6:] = False
131
+ col_mask = close_small_gaps(col_mask, max_gap=8)
132
+ col_segment = longest_true_segment(col_mask, min_len=max(40, crop.shape[1] // 6))
133
+ if col_segment is None:
134
+ col_segment = (10, crop.shape[1] - 10)
135
+
136
+ box = Rect(
137
+ inner.x + col_segment[0],
138
+ inner.y + row_segment[0],
139
+ col_segment[1] - col_segment[0],
140
+ row_segment[1] - row_segment[0],
141
+ )
142
+ return box.inset(4)
143
+
144
+
145
+ def fit_font(
146
+ draw: ImageDraw.ImageDraw,
147
+ text: str,
148
+ rect: Rect,
149
+ max_size: int,
150
+ min_size: int,
151
+ bold: bool,
152
+ ) -> tuple[ImageFont.FreeTypeFont, tuple[int, int, int, int]]:
153
+ prepared = visual_text(text)
154
+ for size in range(max_size, min_size - 1, -2):
155
+ font = load_font(size, bold=bold)
156
+ bbox = draw.textbbox((0, 0), prepared, font=font)
157
+ width = bbox[2] - bbox[0]
158
+ height = bbox[3] - bbox[1]
159
+ if width <= rect.w and height <= rect.h:
160
+ return font, bbox
161
+ font = load_font(min_size, bold=bold)
162
+ bbox = draw.textbbox((0, 0), prepared, font=font)
163
+ return font, bbox
164
+
165
+
166
+ def draw_text(
167
+ draw: ImageDraw.ImageDraw,
168
+ text: str,
169
+ rect: Rect,
170
+ *,
171
+ align: str,
172
+ max_size: int,
173
+ min_size: int,
174
+ bold: bool = False,
175
+ fill: tuple[int, int, int, int] = TEXT_COLOR,
176
+ ) -> None:
177
+ prepared = visual_text(text)
178
+ font, bbox = fit_font(draw, text, rect, max_size=max_size, min_size=min_size, bold=bold)
179
+ width = bbox[2] - bbox[0]
180
+ height = bbox[3] - bbox[1]
181
+
182
+ if align == "right":
183
+ x = rect.x2 - width - bbox[0]
184
+ elif align == "left":
185
+ x = rect.x - bbox[0]
186
+ else:
187
+ x = rect.x + (rect.w - width) / 2 - bbox[0]
188
+
189
+ y = rect.y + (rect.h - height) / 2 - bbox[1]
190
+ draw.text((x, y), prepared, font=font, fill=fill)
191
+
192
+
193
+ def centered_address_box(rect: Rect, *, top_pad: int, side_pad: int, height: int, right_pad: int | None = None) -> Rect:
194
+ if right_pad is None:
195
+ right_pad = side_pad
196
+ return Rect(rect.x + side_pad, rect.y + top_pad, rect.w - side_pad - right_pad, height)
197
+
198
+
199
+ def detect_square_boxes(page_gray: np.ndarray, region: Rect) -> list[Rect]:
200
+ crop = page_gray[region.y:region.y2, region.x:region.x2]
201
+ _, thresh = cv2.threshold(crop, 210, 255, cv2.THRESH_BINARY_INV)
202
+ contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
203
+ boxes: list[Rect] = []
204
+ for contour in contours:
205
+ x, y, w, h = cv2.boundingRect(contour)
206
+ if 40 <= w <= 60 and 40 <= h <= 60 and 0.8 <= (w / h) <= 1.25:
207
+ candidate = Rect(region.x + x, region.y + y, w, h)
208
+ if any(abs(candidate.x - existing.x) < 5 and abs(candidate.y - existing.y) < 5 for existing in boxes):
209
+ continue
210
+ boxes.append(candidate)
211
+ return sorted(boxes, key=lambda item: (item.y, item.x))
212
+
213
+
214
+ def detect_lines(page_gray: np.ndarray, region: Rect) -> list[Rect]:
215
+ crop = page_gray[region.y:region.y2, region.x:region.x2]
216
+ _, thresh = cv2.threshold(crop, 200, 255, cv2.THRESH_BINARY_INV)
217
+ contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
218
+ lines: list[Rect] = []
219
+ for contour in contours:
220
+ x, y, w, h = cv2.boundingRect(contour)
221
+ if 500 <= w <= 900 and h <= 12:
222
+ candidate = Rect(region.x + x, region.y + y, w, h)
223
+ if any(abs(candidate.x - existing.x) < 10 and abs(candidate.y - existing.y) < 10 for existing in lines):
224
+ continue
225
+ lines.append(candidate)
226
+ return sorted(lines, key=lambda item: item.x)
227
+
228
+
229
+ def detect_id_slots(page_gray: np.ndarray, rect: Rect) -> list[Rect]:
230
+ crop = page_gray[rect.y:rect.y2, rect.x:rect.x2]
231
+ guide_start = int(crop.shape[0] * 0.7)
232
+ lower = crop[guide_start:, :]
233
+ ink = lower < 180
234
+
235
+ row_sum = ink.sum(axis=1)
236
+ first_guide_row = next((idx + guide_start for idx, value in enumerate(row_sum) if value >= 6), int(crop.shape[0] * 0.82))
237
+
238
+ col_sum = ink.sum(axis=0)
239
+ peak_columns = [idx for idx, value in enumerate(col_sum) if value >= 8]
240
+ ranges: list[tuple[int, int]] = []
241
+ start = None
242
+ prev = None
243
+ for idx in peak_columns:
244
+ if start is None:
245
+ start = idx
246
+ prev = idx
247
+ continue
248
+ if idx == prev + 1:
249
+ prev = idx
250
+ continue
251
+ ranges.append((start, prev))
252
+ start = idx
253
+ prev = idx
254
+ if start is not None and prev is not None:
255
+ ranges.append((start, prev))
256
+
257
+ boundaries = [0]
258
+ for left, right in ranges:
259
+ center = int(round((left + right) / 2))
260
+ if 4 < center < crop.shape[1] - 5:
261
+ boundaries.append(center)
262
+ boundaries.append(crop.shape[1] - 1)
263
+ boundaries = sorted(set(boundaries))
264
+
265
+ if len(boundaries) != 10:
266
+ raise RuntimeError(f"Expected 10 ID slot boundaries, found {len(boundaries)} for {rect}.")
267
+
268
+ digit_top = rect.y + first_guide_row - int(rect.h * 0.34)
269
+ digit_height = int(rect.h * 0.48)
270
+ slots: list[Rect] = []
271
+ for left, right in zip(boundaries, boundaries[1:]):
272
+ slots.append(Rect(rect.x + left + 3, digit_top, right - left - 6, digit_height))
273
+ return slots
274
+
275
+
276
+ def draw_id_number(draw: ImageDraw.ImageDraw, page_gray: np.ndarray, rect: Rect, number: str) -> None:
277
+ slots = detect_id_slots(page_gray, rect)
278
+ if len(number) != len(slots):
279
+ raise RuntimeError(f"ID length {len(number)} does not match detected slot count {len(slots)}.")
280
+ for digit, slot in zip(number, slots):
281
+ draw_text(draw, digit, slot, align="center", max_size=74, min_size=54)
282
+
283
+
284
+ def draw_check(
285
+ draw: ImageDraw.ImageDraw,
286
+ rect: Rect,
287
+ *,
288
+ raise_px: int = 10,
289
+ fill: tuple[int, int, int, int] = TEXT_COLOR,
290
+ ) -> None:
291
+ x0, y0 = rect.x, rect.y
292
+ width = max(10, rect.w // 4)
293
+ p1 = (x0 + rect.w * 0.18, y0 + rect.h * 0.54 - raise_px)
294
+ p2 = (x0 + rect.w * 0.43, y0 + rect.h * 0.80 - raise_px)
295
+ p3 = (x0 + rect.w * 0.83, y0 + rect.h * 0.20 - raise_px)
296
+ draw.line([p1, p2], fill=fill, width=width)
297
+ draw.line([p2, p3], fill=fill, width=width)
298
+
299
+
300
+ def paste_signature(
301
+ overlay: Image.Image,
302
+ signature: Image.Image,
303
+ line_rect: Rect,
304
+ *,
305
+ min_cm_width: float = 2.0,
306
+ target_height: int | None = None,
307
+ y_offset: int = 45,
308
+ ) -> None:
309
+ alpha_bbox = signature.getchannel("A").getbbox()
310
+ if alpha_bbox:
311
+ signature = signature.crop(alpha_bbox)
312
+
313
+ min_signature_width = int(round((overlay.width / 21.0) * min_cm_width))
314
+ target_width = max(min_signature_width, int(line_rect.w * 0.55))
315
+ if target_height is None:
316
+ target_height = 260
317
+ resized = signature.resize((target_width, target_height), Image.Resampling.LANCZOS)
318
+ x = int(line_rect.x + (line_rect.w - target_width) / 2)
319
+ y = int(line_rect.y - target_height + y_offset)
320
+ overlay.alpha_composite(resized, (x, y))
321
+
322
+
323
+ def render_pdf_page(pdf_path: Path, page_index: int, scale: int, out_path: Path) -> Image.Image:
324
+ document = fitz.open(pdf_path)
325
+ try:
326
+ page = document[page_index]
327
+ pixmap = page.get_pixmap(matrix=fitz.Matrix(scale, scale), alpha=False)
328
+ image = Image.frombytes("RGB", (pixmap.width, pixmap.height), pixmap.samples)
329
+ image.save(out_path)
330
+ return image
331
+ finally:
332
+ document.close()
333
+
334
+
335
+ def merge_overlay_pdf(src_pdf: Path, overlay_png: Path, out_pdf: Path) -> None:
336
+ reader = PdfReader(str(src_pdf))
337
+ writer = PdfWriter()
338
+
339
+ page = reader.pages[0]
340
+ width = float(page.mediabox.width)
341
+ height = float(page.mediabox.height)
342
+
343
+ overlay_buffer = BytesIO()
344
+ c = canvas.Canvas(overlay_buffer, pagesize=(width, height))
345
+ c.drawImage(ImageReader(str(overlay_png)), 0, 0, width=width, height=height, mask="auto")
346
+ c.save()
347
+ overlay_buffer.seek(0)
348
+ overlay_reader = PdfReader(overlay_buffer)
349
+
350
+ merged_page = page
351
+ merged_page.merge_page(overlay_reader.pages[0])
352
+ writer.add_page(merged_page)
353
+ for extra_page in reader.pages[1:]:
354
+ writer.add_page(extra_page)
355
+
356
+ with out_pdf.open("wb") as handle:
357
+ writer.write(handle)
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: pdf-form-tools
3
+ Version: 2.0.0
4
+ Summary: Template-aware tools for filling scanned PDF forms with visual verification
5
+ License-Expression: MIT
6
+ Project-URL: Homepage, https://github.com/ceratops-code/pdf-form-tools
7
+ Project-URL: Repository, https://github.com/ceratops-code/pdf-form-tools
8
+ Project-URL: Issues, https://github.com/ceratops-code/pdf-form-tools/issues
9
+ Keywords: pdf,forms,opencv,pymupdf,document-processing
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Multimedia :: Graphics
18
+ Classifier: Topic :: Office/Business :: Office Suites
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: numpy
23
+ Requires-Dist: opencv-python-headless
24
+ Requires-Dist: pymupdf
25
+ Requires-Dist: pillow
26
+ Requires-Dist: pypdf
27
+ Requires-Dist: reportlab
28
+ Requires-Dist: python-bidi
29
+ Provides-Extra: dev
30
+ Requires-Dist: build>=1.2; extra == "dev"
31
+ Requires-Dist: pytest>=8.0; extra == "dev"
32
+ Requires-Dist: ruff>=0.6; extra == "dev"
33
+ Requires-Dist: twine>=5.1; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ # pdf-form-tools
37
+
38
+ `pdf-form-tools` is an import-only Python package for filling layout-sensitive scanned PDF forms with deterministic placement helpers and visual verification primitives.
39
+
40
+ It is intentionally small:
41
+
42
+ - render PDF pages to raster images
43
+ - detect writable regions, checkbox boxes, signature lines, and ID slots
44
+ - draw text, checks, and signatures onto an overlay
45
+ - merge the overlay back into the original PDF
46
+
47
+ ## Install
48
+
49
+ ```bash
50
+ python -m pip install pdf-form-tools
51
+ ```
52
+
53
+ ## Example
54
+
55
+ ```python
56
+ from pathlib import Path
57
+
58
+ from pdf_form_tools import Rect, merge_overlay_pdf, render_pdf_page
59
+
60
+ source_pdf = Path("form.pdf")
61
+ preview_png = Path("preview-page1.png")
62
+ render_pdf_page(source_pdf, 0, 2, preview_png)
63
+
64
+ # draw your overlay separately, then merge it back
65
+ merge_overlay_pdf(source_pdf, Path("overlay-page1.png"), Path("form-filled.pdf"))
66
+ ```
67
+
68
+ ## Development
69
+
70
+ ```bash
71
+ python -m pip install -e ".[dev]"
72
+ python -m ruff check .
73
+ python -m pytest
74
+ python -m build
75
+ ```
76
+
77
+ ## Scope
78
+
79
+ This package contains reusable low-level helpers only. Form-specific filling flows belong in project-local scripts or thin runners, not in the shared library.
@@ -0,0 +1,11 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/pdf_form_tools/__init__.py
5
+ src/pdf_form_tools/pdf_form_overlay.py
6
+ src/pdf_form_tools.egg-info/PKG-INFO
7
+ src/pdf_form_tools.egg-info/SOURCES.txt
8
+ src/pdf_form_tools.egg-info/dependency_links.txt
9
+ src/pdf_form_tools.egg-info/requires.txt
10
+ src/pdf_form_tools.egg-info/top_level.txt
11
+ tests/test_pdf_form_overlay.py
@@ -0,0 +1,13 @@
1
+ numpy
2
+ opencv-python-headless
3
+ pymupdf
4
+ pillow
5
+ pypdf
6
+ reportlab
7
+ python-bidi
8
+
9
+ [dev]
10
+ build>=1.2
11
+ pytest>=8.0
12
+ ruff>=0.6
13
+ twine>=5.1
@@ -0,0 +1 @@
1
+ pdf_form_tools
@@ -0,0 +1,24 @@
1
+ from pathlib import Path
2
+
3
+ import pdf_form_tools.pdf_form_overlay as overlay
4
+ from pdf_form_tools import Rect, centered_address_box
5
+
6
+
7
+ def test_rect_inset() -> None:
8
+ assert Rect(10, 20, 30, 40).inset(5) == Rect(15, 25, 20, 30)
9
+
10
+
11
+ def test_centered_address_box() -> None:
12
+ rect = Rect(100, 200, 400, 120)
13
+ assert centered_address_box(rect, top_pad=10, side_pad=20, height=50) == Rect(120, 210, 360, 50)
14
+
15
+
16
+ def test_load_font_uses_existing_system_font() -> None:
17
+ font = overlay.load_font(18, bold=False)
18
+ assert font is not None
19
+ assert Path(overlay.resolve_font_path()).exists()
20
+
21
+
22
+ def test_contains_hebrew_detects_hebrew_characters() -> None:
23
+ assert overlay.contains_hebrew("אמילי")
24
+ assert not overlay.contains_hebrew("Emily")