PyPDFForm 2.4.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyPDFForm might be problematic. Click here for more details.
- PyPDFForm/__init__.py +22 -6
- PyPDFForm/adapter.py +28 -26
- PyPDFForm/constants.py +29 -34
- PyPDFForm/coordinate.py +23 -399
- PyPDFForm/filler.py +79 -303
- PyPDFForm/font.py +166 -164
- PyPDFForm/hooks.py +256 -0
- PyPDFForm/image.py +72 -22
- PyPDFForm/middleware/base.py +54 -48
- PyPDFForm/middleware/checkbox.py +29 -56
- PyPDFForm/middleware/dropdown.py +41 -30
- PyPDFForm/middleware/image.py +10 -22
- PyPDFForm/middleware/radio.py +30 -31
- PyPDFForm/middleware/signature.py +32 -47
- PyPDFForm/middleware/text.py +59 -48
- PyPDFForm/patterns.py +61 -141
- PyPDFForm/template.py +80 -427
- PyPDFForm/utils.py +142 -128
- PyPDFForm/watermark.py +77 -208
- PyPDFForm/widgets/base.py +57 -76
- PyPDFForm/widgets/checkbox.py +18 -21
- PyPDFForm/widgets/dropdown.py +18 -25
- PyPDFForm/widgets/image.py +11 -9
- PyPDFForm/widgets/radio.py +25 -35
- PyPDFForm/widgets/signature.py +29 -40
- PyPDFForm/widgets/text.py +18 -17
- PyPDFForm/wrapper.py +373 -437
- {pypdfform-2.4.0.dist-info → pypdfform-3.0.0.dist-info}/METADATA +6 -7
- pypdfform-3.0.0.dist-info/RECORD +35 -0
- {pypdfform-2.4.0.dist-info → pypdfform-3.0.0.dist-info}/WHEEL +1 -1
- pypdfform-2.4.0.dist-info/RECORD +0 -34
- {pypdfform-2.4.0.dist-info → pypdfform-3.0.0.dist-info}/licenses/LICENSE +0 -0
- {pypdfform-2.4.0.dist-info → pypdfform-3.0.0.dist-info}/top_level.txt +0 -0
PyPDFForm/utils.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
2
|
+
"""
|
|
3
|
+
This module provides a collection of utility functions used throughout the PyPDFForm library.
|
|
4
|
+
|
|
5
|
+
It includes functions for:
|
|
6
|
+
- Converting byte streams to BinaryIO objects.
|
|
7
|
+
- Removing all widgets (form fields) from a PDF.
|
|
8
|
+
- Extracting the content stream of each page in a PDF.
|
|
9
|
+
- Merging two PDFs into one.
|
|
10
|
+
- Finding and traversing patterns within PDF widgets.
|
|
11
|
+
- Extracting widget properties based on defined patterns.
|
|
12
|
+
- Generating unique suffixes for internal use.
|
|
13
|
+
- Enabling Adobe-specific settings in the PDF to ensure proper rendering of form fields.
|
|
11
14
|
"""
|
|
12
15
|
|
|
13
16
|
from collections.abc import Callable
|
|
@@ -18,29 +21,30 @@ from string import ascii_letters, digits, punctuation
|
|
|
18
21
|
from typing import Any, BinaryIO, List, Union
|
|
19
22
|
|
|
20
23
|
from pypdf import PdfReader, PdfWriter
|
|
21
|
-
from pypdf.generic import ArrayObject, DictionaryObject
|
|
22
|
-
|
|
24
|
+
from pypdf.generic import (ArrayObject, BooleanObject, DictionaryObject,
|
|
25
|
+
NameObject)
|
|
23
26
|
|
|
24
|
-
from .constants import (
|
|
25
|
-
|
|
26
|
-
DEFAULT_RADIO_STYLE, PREVIEW_FONT_COLOR,
|
|
27
|
-
UNIQUE_SUFFIX_LENGTH, WIDGET_TYPES)
|
|
28
|
-
from .middleware.checkbox import Checkbox
|
|
29
|
-
from .middleware.radio import Radio
|
|
30
|
-
from .middleware.text import Text
|
|
27
|
+
from .constants import (UNIQUE_SUFFIX_LENGTH, AcroForm, Annots,
|
|
28
|
+
NeedAppearances, Root)
|
|
31
29
|
|
|
32
30
|
|
|
33
31
|
@lru_cache
|
|
34
32
|
def stream_to_io(stream: bytes) -> BinaryIO:
|
|
35
|
-
"""
|
|
33
|
+
"""
|
|
34
|
+
Converts a bytes stream to a BinaryIO object, which can be used by PyPDFForm.
|
|
35
|
+
|
|
36
|
+
This function takes a bytes stream as input and returns a BinaryIO object
|
|
37
|
+
that represents the same data. This is useful because PyPDFForm often
|
|
38
|
+
works with BinaryIO objects, so this function allows you to easily convert
|
|
39
|
+
a bytes stream to the correct format. The result is cached using lru_cache
|
|
40
|
+
for performance.
|
|
36
41
|
|
|
37
42
|
Args:
|
|
38
|
-
stream:
|
|
43
|
+
stream (bytes): The bytes stream to convert.
|
|
39
44
|
|
|
40
45
|
Returns:
|
|
41
|
-
BinaryIO:
|
|
46
|
+
BinaryIO: A BinaryIO object representing the stream.
|
|
42
47
|
"""
|
|
43
|
-
|
|
44
48
|
result = BytesIO()
|
|
45
49
|
result.write(stream)
|
|
46
50
|
result.seek(0)
|
|
@@ -48,101 +52,56 @@ def stream_to_io(stream: bytes) -> BinaryIO:
|
|
|
48
52
|
return result
|
|
49
53
|
|
|
50
54
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Supports:
|
|
55
|
-
- Grayscale (1 component)
|
|
56
|
-
- RGB (3 components)
|
|
57
|
-
- CMYK (4 components)
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
color: Color array from PDF specification
|
|
61
|
-
|
|
62
|
-
Returns:
|
|
63
|
-
Union[Color, CMYKColor, None]: Color object or None if invalid format
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
result = None
|
|
67
|
-
|
|
68
|
-
if len(color) == 1:
|
|
69
|
-
result = CMYKColor(black=1 - color[0])
|
|
70
|
-
elif len(color) == 3:
|
|
71
|
-
result = Color(red=color[0], green=color[1], blue=color[2])
|
|
72
|
-
elif len(color) == 4:
|
|
73
|
-
result = CMYKColor(
|
|
74
|
-
cyan=color[0], magenta=color[1], yellow=color[2], black=color[3]
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
return result
|
|
78
|
-
|
|
55
|
+
@lru_cache
|
|
56
|
+
def enable_adobe_mode(pdf: bytes) -> bytes:
|
|
57
|
+
"""Enables Adobe-specific settings in the PDF to ensure proper rendering of form fields.
|
|
79
58
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
59
|
+
This function modifies the PDF's AcroForm dictionary to include the `NeedAppearances` flag,
|
|
60
|
+
which forces Adobe Reader to generate appearance streams for form fields. This ensures that
|
|
61
|
+
the form fields are rendered correctly in Adobe Reader, especially when the form is filled
|
|
62
|
+
programmatically.
|
|
84
63
|
|
|
85
64
|
Args:
|
|
86
|
-
|
|
87
|
-
font_size: Size for the drawn symbol
|
|
65
|
+
pdf (bytes): The PDF content as bytes.
|
|
88
66
|
|
|
89
67
|
Returns:
|
|
90
|
-
|
|
68
|
+
bytes: The modified PDF content with Adobe mode enabled.
|
|
91
69
|
"""
|
|
70
|
+
reader = PdfReader(stream_to_io(pdf))
|
|
71
|
+
writer = PdfWriter()
|
|
92
72
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
73
|
+
# https://stackoverflow.com/questions/47288578/pdf-form-filled-with-pypdf2-does-not-show-in-print
|
|
74
|
+
if AcroForm in reader.trailer[Root]:
|
|
75
|
+
if NeedAppearances in reader.trailer[Root][AcroForm]:
|
|
76
|
+
return pdf
|
|
77
|
+
else:
|
|
78
|
+
reader.trailer[Root].update({NameObject(AcroForm): DictionaryObject()})
|
|
79
|
+
reader.trailer[Root][AcroForm].update(
|
|
80
|
+
{NameObject(NeedAppearances): BooleanObject(True)}
|
|
96
81
|
)
|
|
97
|
-
|
|
98
|
-
new_widget.font_size = font_size
|
|
99
|
-
new_widget.font_color = DEFAULT_FONT_COLOR
|
|
100
|
-
new_widget.value = BUTTON_STYLES.get(widget.button_style) or (
|
|
101
|
-
DEFAULT_CHECKBOX_STYLE if type(widget) is Checkbox else DEFAULT_RADIO_STYLE
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
return new_widget
|
|
105
|
-
|
|
82
|
+
writer.append(reader)
|
|
106
83
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
)
|
|
110
|
-
|
|
84
|
+
with BytesIO() as f:
|
|
85
|
+
writer.write(f)
|
|
86
|
+
f.seek(0)
|
|
87
|
+
return f.read()
|
|
111
88
|
|
|
112
|
-
Args:
|
|
113
|
-
widget_name: Name of the widget to generate preview for
|
|
114
|
-
widget: Widget to generate preview for
|
|
115
|
-
with_preview_text: Whether to include field name in preview
|
|
116
89
|
|
|
117
|
-
|
|
118
|
-
|
|
90
|
+
@lru_cache
|
|
91
|
+
def remove_all_widgets(pdf: bytes) -> bytes:
|
|
119
92
|
"""
|
|
93
|
+
Removes all widgets (form fields) from a PDF, effectively flattening the form.
|
|
120
94
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
)
|
|
125
|
-
new_widget.font = DEFAULT_FONT
|
|
126
|
-
new_widget.font_size = DEFAULT_FONT_SIZE
|
|
127
|
-
new_widget.font_color = PREVIEW_FONT_COLOR
|
|
128
|
-
new_widget.preview = with_preview_text
|
|
129
|
-
new_widget.border_color = handle_color([0, 0, 0])
|
|
130
|
-
new_widget.border_width = 1
|
|
131
|
-
new_widget.render_widget = True
|
|
132
|
-
|
|
133
|
-
return new_widget
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def remove_all_widgets(pdf: bytes) -> bytes:
|
|
137
|
-
"""Removes all interactive form fields from a PDF document.
|
|
95
|
+
This function takes a PDF as a bytes stream, removes all of its interactive
|
|
96
|
+
form fields (widgets), and returns the modified PDF as a bytes stream. This
|
|
97
|
+
is useful for creating a non-interactive version of a PDF form.
|
|
138
98
|
|
|
139
99
|
Args:
|
|
140
|
-
pdf:
|
|
100
|
+
pdf (bytes): The PDF as a bytes stream.
|
|
141
101
|
|
|
142
102
|
Returns:
|
|
143
|
-
bytes:
|
|
103
|
+
bytes: The PDF with all widgets removed, as a bytes stream.
|
|
144
104
|
"""
|
|
145
|
-
|
|
146
105
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
147
106
|
result_stream = BytesIO()
|
|
148
107
|
writer = PdfWriter()
|
|
@@ -157,15 +116,18 @@ def remove_all_widgets(pdf: bytes) -> bytes:
|
|
|
157
116
|
|
|
158
117
|
|
|
159
118
|
def get_page_streams(pdf: bytes) -> List[bytes]:
|
|
160
|
-
"""
|
|
119
|
+
"""
|
|
120
|
+
Extracts the content stream of each page in a PDF as a list of byte streams.
|
|
121
|
+
|
|
122
|
+
This function takes a PDF as a bytes stream and returns a list of bytes streams,
|
|
123
|
+
where each element in the list represents the content stream of a page in the PDF.
|
|
161
124
|
|
|
162
125
|
Args:
|
|
163
|
-
pdf:
|
|
126
|
+
pdf (bytes): The PDF as a bytes stream.
|
|
164
127
|
|
|
165
128
|
Returns:
|
|
166
|
-
List[bytes]:
|
|
129
|
+
List[bytes]: A list of bytes streams, one for each page.
|
|
167
130
|
"""
|
|
168
|
-
|
|
169
131
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
170
132
|
result = []
|
|
171
133
|
|
|
@@ -181,16 +143,20 @@ def get_page_streams(pdf: bytes) -> List[bytes]:
|
|
|
181
143
|
|
|
182
144
|
|
|
183
145
|
def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
|
|
184
|
-
"""
|
|
146
|
+
"""
|
|
147
|
+
Merges two PDF files into a single PDF file.
|
|
148
|
+
|
|
149
|
+
This function takes two PDF files as byte streams, merges them, and returns the result as a single PDF byte stream.
|
|
150
|
+
It handles the merging of pages from both PDFs and also attempts to preserve form field widgets from both input PDFs
|
|
151
|
+
in the final merged PDF. The form fields are cloned and added to the output pages.
|
|
185
152
|
|
|
186
153
|
Args:
|
|
187
|
-
pdf:
|
|
188
|
-
other:
|
|
154
|
+
pdf (bytes): The first PDF file as a byte stream.
|
|
155
|
+
other (bytes): The second PDF file as a byte stream.
|
|
189
156
|
|
|
190
157
|
Returns:
|
|
191
|
-
bytes:
|
|
158
|
+
bytes: The merged PDF file as a byte stream.
|
|
192
159
|
"""
|
|
193
|
-
|
|
194
160
|
output = PdfWriter()
|
|
195
161
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
196
162
|
other_file = PdfReader(stream_to_io(other))
|
|
@@ -203,20 +169,52 @@ def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
|
|
|
203
169
|
|
|
204
170
|
output.write(result)
|
|
205
171
|
result.seek(0)
|
|
172
|
+
|
|
173
|
+
merged_no_widgets = PdfReader(stream_to_io(remove_all_widgets(result.read())))
|
|
174
|
+
output = PdfWriter()
|
|
175
|
+
output.append(merged_no_widgets)
|
|
176
|
+
|
|
177
|
+
# TODO: refactor duplicate logic with copy_watermark_widgets
|
|
178
|
+
widgets_to_copy = {}
|
|
179
|
+
for i, page in enumerate(pdf_file.pages):
|
|
180
|
+
widgets_to_copy[i] = []
|
|
181
|
+
for annot in page.get(Annots, []):
|
|
182
|
+
widgets_to_copy[i].append(annot.clone(output))
|
|
183
|
+
|
|
184
|
+
for i, page in enumerate(other_file.pages):
|
|
185
|
+
widgets_to_copy[i + len(pdf_file.pages)] = []
|
|
186
|
+
for annot in page.get(Annots, []):
|
|
187
|
+
widgets_to_copy[i + len(pdf_file.pages)].append(annot.clone(output))
|
|
188
|
+
|
|
189
|
+
for i, page in enumerate(output.pages):
|
|
190
|
+
page[NameObject(Annots)] = (
|
|
191
|
+
(page[NameObject(Annots)] + ArrayObject(widgets_to_copy[i]))
|
|
192
|
+
if Annots in page
|
|
193
|
+
else ArrayObject(widgets_to_copy[i])
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
result = BytesIO()
|
|
197
|
+
output.write(result)
|
|
198
|
+
result.seek(0)
|
|
206
199
|
return result.read()
|
|
207
200
|
|
|
208
201
|
|
|
209
202
|
def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) -> bool:
|
|
210
|
-
"""
|
|
203
|
+
"""
|
|
204
|
+
Recursively finds a pattern match within a PDF widget (annotation dictionary).
|
|
205
|
+
|
|
206
|
+
This function searches for a specific pattern within a PDF widget's properties.
|
|
207
|
+
It recursively traverses the widget's dictionary, comparing keys and values
|
|
208
|
+
to the provided pattern.
|
|
211
209
|
|
|
212
210
|
Args:
|
|
213
|
-
pattern:
|
|
214
|
-
widget:
|
|
211
|
+
pattern (dict): The pattern to search for, represented as a dictionary.
|
|
212
|
+
widget (Union[dict, DictionaryObject]): The widget to search within, which
|
|
213
|
+
can be a dictionary or a DictionaryObject.
|
|
215
214
|
|
|
216
215
|
Returns:
|
|
217
|
-
bool: True if
|
|
216
|
+
bool: True if a match is found, False otherwise.
|
|
218
217
|
"""
|
|
219
|
-
|
|
220
218
|
for key, value in widget.items():
|
|
221
219
|
result = False
|
|
222
220
|
if key in pattern:
|
|
@@ -238,16 +236,21 @@ def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) ->
|
|
|
238
236
|
def traverse_pattern(
|
|
239
237
|
pattern: dict, widget: Union[dict, DictionaryObject]
|
|
240
238
|
) -> Union[str, list, None]:
|
|
241
|
-
"""
|
|
239
|
+
"""
|
|
240
|
+
Recursively traverses a pattern within a PDF widget (annotation dictionary) and returns the value.
|
|
241
|
+
|
|
242
|
+
This function searches for a specific pattern within a PDF widget's properties.
|
|
243
|
+
It recursively traverses the widget's dictionary, comparing keys and values
|
|
244
|
+
to the provided pattern and returns the value if the pattern is True.
|
|
242
245
|
|
|
243
246
|
Args:
|
|
244
|
-
pattern:
|
|
245
|
-
widget:
|
|
247
|
+
pattern (dict): The pattern to traverse, represented as a dictionary.
|
|
248
|
+
widget (Union[dict, DictionaryObject]): The widget to traverse within, which
|
|
249
|
+
can be a dictionary or a DictionaryObject.
|
|
246
250
|
|
|
247
251
|
Returns:
|
|
248
|
-
Union[str, list, None]:
|
|
252
|
+
Union[str, list, None]: The value found, or None if not found.
|
|
249
253
|
"""
|
|
250
|
-
|
|
251
254
|
for key, value in widget.items():
|
|
252
255
|
result = None
|
|
253
256
|
if key in pattern:
|
|
@@ -270,18 +273,25 @@ def extract_widget_property(
|
|
|
270
273
|
default_value: Any,
|
|
271
274
|
func_before_return: Union[Callable, None],
|
|
272
275
|
) -> Any:
|
|
273
|
-
"""
|
|
276
|
+
"""
|
|
277
|
+
Extracts a specific property from a PDF widget based on a list of patterns.
|
|
278
|
+
|
|
279
|
+
This function iterates through a list of patterns, attempting to find a match
|
|
280
|
+
within the provided widget. If a match is found, the corresponding value is
|
|
281
|
+
extracted and returned. If no match is found, a default value is returned.
|
|
274
282
|
|
|
275
283
|
Args:
|
|
276
|
-
widget:
|
|
277
|
-
patterns:
|
|
278
|
-
|
|
279
|
-
|
|
284
|
+
widget (Union[dict, DictionaryObject]): The widget to extract the property from.
|
|
285
|
+
patterns (list): A list of patterns to search for. Each pattern should be a
|
|
286
|
+
dictionary representing the structure of the property to extract.
|
|
287
|
+
default_value (Any): The default value to return if no pattern is found.
|
|
288
|
+
func_before_return (Union[Callable, None]): An optional function to call before
|
|
289
|
+
returning the extracted value. This can be used to perform additional
|
|
290
|
+
processing or formatting on the value.
|
|
280
291
|
|
|
281
292
|
Returns:
|
|
282
|
-
Any:
|
|
293
|
+
Any: The extracted property value, or the default value if no pattern is found.
|
|
283
294
|
"""
|
|
284
|
-
|
|
285
295
|
result = default_value
|
|
286
296
|
|
|
287
297
|
for pattern in patterns:
|
|
@@ -294,12 +304,16 @@ def extract_widget_property(
|
|
|
294
304
|
|
|
295
305
|
|
|
296
306
|
def generate_unique_suffix() -> str:
|
|
297
|
-
"""
|
|
307
|
+
"""
|
|
308
|
+
Generates a unique suffix string for internal use, such as to avoid naming conflicts.
|
|
309
|
+
|
|
310
|
+
This function creates a random string of characters with a predefined length
|
|
311
|
+
(UNIQUE_SUFFIX_LENGTH) using a combination of ASCII letters, digits, and
|
|
312
|
+
punctuation characters (excluding hyphens).
|
|
298
313
|
|
|
299
314
|
Returns:
|
|
300
|
-
str:
|
|
315
|
+
str: A unique suffix string.
|
|
301
316
|
"""
|
|
302
|
-
|
|
303
317
|
return "".join(
|
|
304
318
|
[
|
|
305
319
|
choice(ascii_letters + digits + punctuation.replace("-", ""))
|