PyPDFForm 2.0.1__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyPDFForm might be problematic. Click here for more details.
- PyPDFForm/__init__.py +6 -2
- PyPDFForm/adapter.py +37 -3
- PyPDFForm/constants.py +12 -1
- PyPDFForm/coordinate.py +218 -59
- PyPDFForm/filler.py +104 -9
- PyPDFForm/font.py +80 -16
- PyPDFForm/image.py +32 -3
- PyPDFForm/middleware/base.py +57 -8
- PyPDFForm/middleware/checkbox.py +49 -7
- PyPDFForm/middleware/dropdown.py +41 -5
- PyPDFForm/middleware/image.py +26 -2
- PyPDFForm/middleware/radio.py +41 -5
- PyPDFForm/middleware/signature.py +49 -6
- PyPDFForm/middleware/text.py +55 -7
- PyPDFForm/patterns.py +108 -10
- PyPDFForm/template.py +181 -29
- PyPDFForm/utils.py +108 -12
- PyPDFForm/watermark.py +151 -9
- PyPDFForm/widgets/base.py +65 -9
- PyPDFForm/widgets/checkbox.py +22 -2
- PyPDFForm/widgets/dropdown.py +31 -3
- PyPDFForm/widgets/radio.py +78 -0
- PyPDFForm/widgets/text.py +19 -2
- PyPDFForm/wrapper.py +338 -27
- {pypdfform-2.0.1.dist-info → pypdfform-2.1.0.dist-info}/METADATA +1 -1
- pypdfform-2.1.0.dist-info/RECORD +31 -0
- pypdfform-2.0.1.dist-info/RECORD +0 -30
- {pypdfform-2.0.1.dist-info → pypdfform-2.1.0.dist-info}/WHEEL +0 -0
- {pypdfform-2.0.1.dist-info → pypdfform-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {pypdfform-2.0.1.dist-info → pypdfform-2.1.0.dist-info}/top_level.txt +0 -0
PyPDFForm/template.py
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
2
|
+
"""Provides template processing utilities for PDF forms.
|
|
3
|
+
|
|
4
|
+
This module contains functions for:
|
|
5
|
+
- Building and managing PDF form widgets
|
|
6
|
+
- Handling widget properties and attributes
|
|
7
|
+
- Processing text fields and paragraphs
|
|
8
|
+
- Managing widget keys and names
|
|
9
|
+
- Supporting comb fields and multiline text
|
|
10
|
+
"""
|
|
3
11
|
|
|
4
12
|
from functools import lru_cache
|
|
5
13
|
from io import BytesIO
|
|
@@ -33,7 +41,15 @@ from .utils import (extract_widget_property, find_pattern_match, handle_color,
|
|
|
33
41
|
def set_character_x_paddings(
|
|
34
42
|
pdf_stream: bytes, widgets: Dict[str, WIDGET_TYPES]
|
|
35
43
|
) -> Dict[str, WIDGET_TYPES]:
|
|
36
|
-
"""
|
|
44
|
+
"""Calculates and sets character spacing for comb text fields.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
pdf_stream: PDF form as bytes
|
|
48
|
+
widgets: Dictionary of widget middleware objects
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Dict[str, WIDGET_TYPES]: Updated widgets with character paddings
|
|
52
|
+
"""
|
|
37
53
|
|
|
38
54
|
for _widgets in get_widgets_by_page(pdf_stream).values():
|
|
39
55
|
for widget in _widgets:
|
|
@@ -51,7 +67,16 @@ def build_widgets(
|
|
|
51
67
|
use_full_widget_name: bool,
|
|
52
68
|
render_widgets: bool,
|
|
53
69
|
) -> Dict[str, WIDGET_TYPES]:
|
|
54
|
-
"""
|
|
70
|
+
"""Constructs widget middleware objects from a PDF form.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
pdf_stream: PDF form as bytes
|
|
74
|
+
use_full_widget_name: Whether to include parent widget names
|
|
75
|
+
render_widgets: Whether widgets should be rendered visibly
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict[str, WIDGET_TYPES]: Dictionary mapping field names to widgets
|
|
79
|
+
"""
|
|
55
80
|
|
|
56
81
|
results = {}
|
|
57
82
|
|
|
@@ -113,7 +138,14 @@ def build_widgets(
|
|
|
113
138
|
|
|
114
139
|
|
|
115
140
|
def dropdown_to_text(dropdown: Dropdown) -> Text:
|
|
116
|
-
"""Converts a dropdown widget to a text widget.
|
|
141
|
+
"""Converts a dropdown widget to a text widget while preserving properties.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
dropdown: Dropdown widget to convert
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Text: New text widget with dropdown's selected value and styling
|
|
148
|
+
"""
|
|
117
149
|
|
|
118
150
|
result = Text(dropdown.name)
|
|
119
151
|
result.border_color = dropdown.border_color
|
|
@@ -135,7 +167,18 @@ def update_text_field_attributes(
|
|
|
135
167
|
template_stream: bytes,
|
|
136
168
|
widgets: Dict[str, WIDGET_TYPES],
|
|
137
169
|
) -> None:
|
|
138
|
-
"""
|
|
170
|
+
"""Updates text field properties based on PDF template settings.
|
|
171
|
+
|
|
172
|
+
Handles:
|
|
173
|
+
- Font detection and sizing
|
|
174
|
+
- Color properties
|
|
175
|
+
- Paragraph wrapping
|
|
176
|
+
- Auto font size adjustment
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
template_stream: PDF form as bytes
|
|
180
|
+
widgets: Dictionary of widget middleware objects
|
|
181
|
+
"""
|
|
139
182
|
|
|
140
183
|
for _widgets in get_widgets_by_page(template_stream).values():
|
|
141
184
|
for _widget in _widgets:
|
|
@@ -172,7 +215,14 @@ def update_text_field_attributes(
|
|
|
172
215
|
|
|
173
216
|
@lru_cache()
|
|
174
217
|
def get_widgets_by_page(pdf: bytes) -> Dict[int, List[dict]]:
|
|
175
|
-
"""
|
|
218
|
+
"""Extracts all form widgets from a PDF grouped by page number.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
pdf: PDF form as bytes
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
Dict[int, List[dict]]: Mapping of page numbers to widget dictionaries
|
|
225
|
+
"""
|
|
176
226
|
|
|
177
227
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
178
228
|
|
|
@@ -197,9 +247,14 @@ def get_widgets_by_page(pdf: bytes) -> Dict[int, List[dict]]:
|
|
|
197
247
|
|
|
198
248
|
|
|
199
249
|
def get_widget_full_key(widget: dict) -> Union[str, None]:
|
|
200
|
-
"""
|
|
201
|
-
|
|
202
|
-
|
|
250
|
+
"""Constructs a widget's full hierarchical key including parent names.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
widget: PDF widget dictionary
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Union[str, None]: Full key in format "parent.child" if parent exists,
|
|
257
|
+
otherwise None
|
|
203
258
|
"""
|
|
204
259
|
|
|
205
260
|
key = extract_widget_property(widget, WIDGET_KEY_PATTERNS, None, str)
|
|
@@ -215,7 +270,16 @@ def get_widget_full_key(widget: dict) -> Union[str, None]:
|
|
|
215
270
|
|
|
216
271
|
|
|
217
272
|
def construct_widget(widget: dict, key: str) -> Union[WIDGET_TYPES, None]:
|
|
218
|
-
"""
|
|
273
|
+
"""Creates appropriate widget middleware based on PDF widget type.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
widget: PDF widget dictionary
|
|
277
|
+
key: Field name/key for the widget
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Union[WIDGET_TYPES, None]: Appropriate widget middleware instance
|
|
281
|
+
or None if type not recognized
|
|
282
|
+
"""
|
|
219
283
|
|
|
220
284
|
result = None
|
|
221
285
|
for each in WIDGET_TYPE_PATTERNS:
|
|
@@ -230,13 +294,28 @@ def construct_widget(widget: dict, key: str) -> Union[WIDGET_TYPES, None]:
|
|
|
230
294
|
|
|
231
295
|
|
|
232
296
|
def get_text_field_max_length(widget: dict) -> Union[int, None]:
|
|
233
|
-
"""
|
|
297
|
+
"""Extracts max length constraint from a text field widget.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
widget: PDF widget dictionary
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
Union[int, None]: Max character length if specified, otherwise None
|
|
304
|
+
"""
|
|
234
305
|
|
|
235
306
|
return int(widget[MaxLen]) or None if MaxLen in widget else None
|
|
236
307
|
|
|
237
308
|
|
|
238
309
|
def check_field_flag_bit(widget: dict, bit: int) -> bool:
|
|
239
|
-
"""
|
|
310
|
+
"""Tests whether a specific flag bit is set in a widget's field flags.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
widget: PDF widget dictionary
|
|
314
|
+
bit: Flag bit to check (e.g. COMB, MULTILINE)
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
bool: True if the bit is set, False otherwise
|
|
318
|
+
"""
|
|
240
319
|
|
|
241
320
|
field_flag = extract_widget_property(widget, TEXT_FIELD_FLAG_PATTERNS, None, int)
|
|
242
321
|
|
|
@@ -247,19 +326,41 @@ def check_field_flag_bit(widget: dict, bit: int) -> bool:
|
|
|
247
326
|
|
|
248
327
|
|
|
249
328
|
def is_text_field_comb(widget: dict) -> bool:
|
|
250
|
-
"""
|
|
329
|
+
"""Determines if a text field uses comb formatting (fixed character spacing).
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
widget: PDF widget dictionary
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
bool: True if field uses comb formatting
|
|
336
|
+
"""
|
|
251
337
|
|
|
252
338
|
return check_field_flag_bit(widget, COMB)
|
|
253
339
|
|
|
254
340
|
|
|
255
341
|
def is_text_multiline(widget: dict) -> bool:
|
|
256
|
-
"""
|
|
342
|
+
"""Determines if a text field supports multiple lines/paragraphs.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
widget: PDF widget dictionary
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
bool: True if field supports multiline text
|
|
349
|
+
"""
|
|
257
350
|
|
|
258
351
|
return check_field_flag_bit(widget, MULTILINE)
|
|
259
352
|
|
|
260
353
|
|
|
261
354
|
def get_dropdown_choices(widget: dict) -> Union[Tuple[str, ...], None]:
|
|
262
|
-
"""
|
|
355
|
+
"""Extracts available choices from a dropdown widget.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
widget: PDF widget dictionary
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Union[Tuple[str, ...], None]: Tuple of choice strings if found,
|
|
362
|
+
otherwise None
|
|
363
|
+
"""
|
|
263
364
|
|
|
264
365
|
return tuple(
|
|
265
366
|
(each if isinstance(each, str) else str(each[1]))
|
|
@@ -270,14 +371,30 @@ def get_dropdown_choices(widget: dict) -> Union[Tuple[str, ...], None]:
|
|
|
270
371
|
|
|
271
372
|
|
|
272
373
|
def get_char_rect_width(widget: dict, widget_middleware: Text) -> float:
|
|
273
|
-
"""
|
|
374
|
+
"""Calculates per-character width for comb text fields.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
widget: PDF widget dictionary
|
|
378
|
+
widget_middleware: Text middleware instance
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
float: Width in points allocated per character
|
|
382
|
+
"""
|
|
274
383
|
|
|
275
384
|
rect_width = abs(float(widget[Rect][0]) - float(widget[Rect][2]))
|
|
276
385
|
return rect_width / widget_middleware.max_length
|
|
277
386
|
|
|
278
387
|
|
|
279
388
|
def get_character_x_paddings(widget: dict, widget_middleware: Text) -> List[float]:
|
|
280
|
-
"""
|
|
389
|
+
"""Calculates horizontal positioning for each character in comb fields.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
widget: PDF widget dictionary
|
|
393
|
+
widget_middleware: Text middleware instance
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
List[float]: X-offsets for centering each character in its comb cell
|
|
397
|
+
"""
|
|
281
398
|
|
|
282
399
|
length = min(len(widget_middleware.value or ""), widget_middleware.max_length)
|
|
283
400
|
char_rect_width = get_char_rect_width(widget, widget_middleware)
|
|
@@ -299,10 +416,15 @@ def get_character_x_paddings(widget: dict, widget_middleware: Text) -> List[floa
|
|
|
299
416
|
def split_characters_into_lines(
|
|
300
417
|
split_by_new_line_symbol: List[str], middleware: Text, width: float
|
|
301
418
|
) -> List[str]:
|
|
302
|
-
"""
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
419
|
+
"""Splits text into lines that fit within a paragraph field's width.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
split_by_new_line_symbol: Text already split by newlines
|
|
423
|
+
middleware: Text middleware with font properties
|
|
424
|
+
width: Available width in points
|
|
425
|
+
|
|
426
|
+
Returns:
|
|
427
|
+
List[str]: Lines of text fitting within the specified width
|
|
306
428
|
"""
|
|
307
429
|
|
|
308
430
|
lines = []
|
|
@@ -329,11 +451,15 @@ def split_characters_into_lines(
|
|
|
329
451
|
|
|
330
452
|
|
|
331
453
|
def adjust_each_line(lines: List[str], middleware: Text, width: float) -> List[str]:
|
|
332
|
-
"""
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
454
|
+
"""Optimizes line breaks to minimize empty space in paragraph fields.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
lines: Text lines from split_characters_into_lines()
|
|
458
|
+
middleware: Text middleware with font properties
|
|
459
|
+
width: Available width in points
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
List[str]: Optimized lines with balanced word wrapping
|
|
337
463
|
"""
|
|
338
464
|
|
|
339
465
|
result = []
|
|
@@ -373,7 +499,15 @@ def adjust_each_line(lines: List[str], middleware: Text, width: float) -> List[s
|
|
|
373
499
|
|
|
374
500
|
|
|
375
501
|
def get_paragraph_lines(widget: dict, widget_middleware: Text) -> List[str]:
|
|
376
|
-
"""
|
|
502
|
+
"""Generates properly wrapped lines for a paragraph text field.
|
|
503
|
+
|
|
504
|
+
Args:
|
|
505
|
+
widget: PDF widget dictionary
|
|
506
|
+
widget_middleware: Text middleware instance
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
List[str]: Wrapped lines fitting the paragraph field dimensions
|
|
510
|
+
"""
|
|
377
511
|
|
|
378
512
|
value = widget_middleware.value or ""
|
|
379
513
|
if widget_middleware.max_length is not None:
|
|
@@ -390,7 +524,14 @@ def get_paragraph_lines(widget: dict, widget_middleware: Text) -> List[str]:
|
|
|
390
524
|
|
|
391
525
|
|
|
392
526
|
def get_paragraph_auto_wrap_length(widget_middleware: Text) -> int:
|
|
393
|
-
"""
|
|
527
|
+
"""Determines optimal line length for paragraph text wrapping.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
widget_middleware: Text middleware instance
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
int: Suggested maximum characters per line
|
|
534
|
+
"""
|
|
394
535
|
|
|
395
536
|
result = maxsize
|
|
396
537
|
for line in widget_middleware.text_lines:
|
|
@@ -406,7 +547,18 @@ def update_widget_keys(
|
|
|
406
547
|
new_keys: List[str],
|
|
407
548
|
indices: List[int],
|
|
408
549
|
) -> bytes:
|
|
409
|
-
"""
|
|
550
|
+
"""Renames widget fields in a PDF template.
|
|
551
|
+
|
|
552
|
+
Args:
|
|
553
|
+
template: PDF form as bytes
|
|
554
|
+
widgets: Dictionary of widget middleware
|
|
555
|
+
old_keys: List of current field names
|
|
556
|
+
new_keys: List of new field names
|
|
557
|
+
indices: List of indices for handling radio button groups
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
bytes: Modified PDF with updated field names
|
|
561
|
+
"""
|
|
410
562
|
# pylint: disable=R0801
|
|
411
563
|
|
|
412
564
|
pdf = PdfReader(stream_to_io(template))
|
PyPDFForm/utils.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
2
|
+
"""Provides core utility functions for PDF form processing.
|
|
3
|
+
|
|
4
|
+
This module contains general-purpose utilities used throughout PyPDFForm:
|
|
5
|
+
- Stream/file handling conversions
|
|
6
|
+
- Color space transformations
|
|
7
|
+
- Widget preview generation
|
|
8
|
+
- PDF merging and splitting
|
|
9
|
+
- Pattern matching for PDF structures
|
|
10
|
+
- Unique ID generation
|
|
11
|
+
"""
|
|
3
12
|
|
|
4
13
|
from collections.abc import Callable
|
|
5
14
|
from io import BytesIO
|
|
@@ -21,7 +30,14 @@ from .middleware.text import Text
|
|
|
21
30
|
|
|
22
31
|
|
|
23
32
|
def stream_to_io(stream: bytes) -> BinaryIO:
|
|
24
|
-
"""Converts a byte stream to a binary
|
|
33
|
+
"""Converts a byte stream to a seekable binary IO object.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
stream: Input byte stream to convert
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
BinaryIO: Seekable file-like object containing the stream data
|
|
40
|
+
"""
|
|
25
41
|
|
|
26
42
|
result = BytesIO()
|
|
27
43
|
result.write(stream)
|
|
@@ -31,7 +47,19 @@ def stream_to_io(stream: bytes) -> BinaryIO:
|
|
|
31
47
|
|
|
32
48
|
|
|
33
49
|
def handle_color(color: Union[list, ArrayObject]) -> Union[Color, CMYKColor, None]:
|
|
34
|
-
"""Converts
|
|
50
|
+
"""Converts PDF color specifications to reportlab color objects.
|
|
51
|
+
|
|
52
|
+
Supports:
|
|
53
|
+
- Grayscale (1 component)
|
|
54
|
+
- RGB (3 components)
|
|
55
|
+
- CMYK (4 components)
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
color: Color array from PDF specification
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Union[Color, CMYKColor, None]: Color object or None if invalid format
|
|
62
|
+
"""
|
|
35
63
|
|
|
36
64
|
result = None
|
|
37
65
|
|
|
@@ -50,7 +78,15 @@ def handle_color(color: Union[list, ArrayObject]) -> Union[Color, CMYKColor, Non
|
|
|
50
78
|
def checkbox_radio_to_draw(
|
|
51
79
|
widget: Union[Checkbox, Radio], font_size: Union[float, int]
|
|
52
80
|
) -> Text:
|
|
53
|
-
"""Converts
|
|
81
|
+
"""Converts checkbox/radio widgets to text symbols for drawing.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
widget: Checkbox or Radio widget to convert
|
|
85
|
+
font_size: Size for the drawn symbol
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Text: Text widget configured to draw the appropriate symbol
|
|
89
|
+
"""
|
|
54
90
|
|
|
55
91
|
new_widget = Text(
|
|
56
92
|
name=widget.name,
|
|
@@ -67,7 +103,15 @@ def checkbox_radio_to_draw(
|
|
|
67
103
|
|
|
68
104
|
|
|
69
105
|
def preview_widget_to_draw(widget: WIDGET_TYPES, with_preview_text: bool) -> Text:
|
|
70
|
-
"""
|
|
106
|
+
"""Creates preview version of a widget showing field name/location.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
widget: Widget to generate preview for
|
|
110
|
+
with_preview_text: Whether to include field name in preview
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Text: Text widget configured for preview display
|
|
114
|
+
"""
|
|
71
115
|
|
|
72
116
|
new_widget = Text(
|
|
73
117
|
name=widget.name,
|
|
@@ -85,7 +129,14 @@ def preview_widget_to_draw(widget: WIDGET_TYPES, with_preview_text: bool) -> Tex
|
|
|
85
129
|
|
|
86
130
|
|
|
87
131
|
def remove_all_widgets(pdf: bytes) -> bytes:
|
|
88
|
-
"""Removes all
|
|
132
|
+
"""Removes all interactive form fields from a PDF document.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
pdf: Input PDF as bytes
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
bytes: Flattened PDF with form fields removed
|
|
139
|
+
"""
|
|
89
140
|
|
|
90
141
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
91
142
|
result_stream = BytesIO()
|
|
@@ -101,7 +152,14 @@ def remove_all_widgets(pdf: bytes) -> bytes:
|
|
|
101
152
|
|
|
102
153
|
|
|
103
154
|
def get_page_streams(pdf: bytes) -> List[bytes]:
|
|
104
|
-
"""
|
|
155
|
+
"""Splits a PDF into individual page streams.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
pdf: Input PDF as bytes
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
List[bytes]: List where each element contains a single PDF page
|
|
162
|
+
"""
|
|
105
163
|
|
|
106
164
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
107
165
|
result = []
|
|
@@ -118,7 +176,15 @@ def get_page_streams(pdf: bytes) -> List[bytes]:
|
|
|
118
176
|
|
|
119
177
|
|
|
120
178
|
def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
|
|
121
|
-
"""
|
|
179
|
+
"""Combines two PDF documents into a single multipage PDF.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
pdf: First PDF as bytes
|
|
183
|
+
other: Second PDF as bytes
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
bytes: Combined PDF containing all pages from both inputs
|
|
187
|
+
"""
|
|
122
188
|
|
|
123
189
|
output = PdfWriter()
|
|
124
190
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
@@ -136,7 +202,15 @@ def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
|
|
|
136
202
|
|
|
137
203
|
|
|
138
204
|
def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) -> bool:
|
|
139
|
-
"""
|
|
205
|
+
"""Tests whether a widget matches the specified PDF attribute pattern.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
pattern: Dictionary of PDF attributes and expected values
|
|
209
|
+
widget: PDF widget to test against the pattern
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
bool: True if widget matches all pattern criteria
|
|
213
|
+
"""
|
|
140
214
|
|
|
141
215
|
for key, value in widget.items():
|
|
142
216
|
result = False
|
|
@@ -159,7 +233,15 @@ def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) ->
|
|
|
159
233
|
def traverse_pattern(
|
|
160
234
|
pattern: dict, widget: Union[dict, DictionaryObject]
|
|
161
235
|
) -> Union[str, list, None]:
|
|
162
|
-
"""
|
|
236
|
+
"""Recursively searches a widget for a matching pattern and returns its value.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
pattern: Dictionary of PDF attributes specifying the search path
|
|
240
|
+
widget: PDF widget to search through
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Union[str, list, None]: Found value or None if not matched
|
|
244
|
+
"""
|
|
163
245
|
|
|
164
246
|
for key, value in widget.items():
|
|
165
247
|
result = None
|
|
@@ -183,7 +265,17 @@ def extract_widget_property(
|
|
|
183
265
|
default_value: Any,
|
|
184
266
|
func_before_return: Union[Callable, None],
|
|
185
267
|
) -> Any:
|
|
186
|
-
"""
|
|
268
|
+
"""Extracts a widget property using pattern matching with fallback.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
widget: PDF widget dictionary to examine
|
|
272
|
+
patterns: List of patterns to try in order
|
|
273
|
+
default_value: Value to return if no patterns match
|
|
274
|
+
func_before_return: Optional function to transform the extracted value
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Any: Extracted property value or default_value
|
|
278
|
+
"""
|
|
187
279
|
|
|
188
280
|
result = default_value
|
|
189
281
|
|
|
@@ -197,7 +289,11 @@ def extract_widget_property(
|
|
|
197
289
|
|
|
198
290
|
|
|
199
291
|
def generate_unique_suffix() -> str:
|
|
200
|
-
"""Generates a
|
|
292
|
+
"""Generates a random string for disambiguating field names during merging.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
str: Random string containing letters, digits and symbols
|
|
296
|
+
"""
|
|
201
297
|
|
|
202
298
|
return "".join(
|
|
203
299
|
[
|