PyPDFForm 2.5.0__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyPDFForm might be problematic. Click here for more details.
- PyPDFForm/__init__.py +22 -6
- PyPDFForm/adapter.py +28 -26
- PyPDFForm/constants.py +30 -35
- PyPDFForm/coordinate.py +23 -399
- PyPDFForm/filler.py +79 -303
- PyPDFForm/font.py +166 -164
- PyPDFForm/hooks.py +109 -69
- PyPDFForm/image.py +72 -22
- PyPDFForm/middleware/base.py +42 -60
- PyPDFForm/middleware/checkbox.py +27 -58
- PyPDFForm/middleware/dropdown.py +41 -30
- PyPDFForm/middleware/image.py +10 -22
- PyPDFForm/middleware/radio.py +30 -31
- PyPDFForm/middleware/signature.py +32 -47
- PyPDFForm/middleware/text.py +54 -48
- PyPDFForm/patterns.py +61 -106
- PyPDFForm/template.py +80 -427
- PyPDFForm/utils.py +136 -128
- PyPDFForm/watermark.py +77 -208
- PyPDFForm/widgets/base.py +57 -76
- PyPDFForm/widgets/checkbox.py +18 -21
- PyPDFForm/widgets/dropdown.py +18 -25
- PyPDFForm/widgets/image.py +11 -9
- PyPDFForm/widgets/radio.py +25 -35
- PyPDFForm/widgets/signature.py +29 -40
- PyPDFForm/widgets/text.py +18 -17
- PyPDFForm/wrapper.py +351 -443
- {pypdfform-2.5.0.dist-info → pypdfform-3.0.1.dist-info}/METADATA +6 -7
- pypdfform-3.0.1.dist-info/RECORD +35 -0
- {pypdfform-2.5.0.dist-info → pypdfform-3.0.1.dist-info}/WHEEL +1 -1
- pypdfform-2.5.0.dist-info/RECORD +0 -35
- {pypdfform-2.5.0.dist-info → pypdfform-3.0.1.dist-info}/licenses/LICENSE +0 -0
- {pypdfform-2.5.0.dist-info → pypdfform-3.0.1.dist-info}/top_level.txt +0 -0
PyPDFForm/utils.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
2
|
+
"""
|
|
3
|
+
This module provides a collection of utility functions used throughout the PyPDFForm library.
|
|
4
|
+
|
|
5
|
+
It includes functions for:
|
|
6
|
+
- Converting byte streams to BinaryIO objects.
|
|
7
|
+
- Removing all widgets (form fields) from a PDF.
|
|
8
|
+
- Extracting the content stream of each page in a PDF.
|
|
9
|
+
- Merging two PDFs into one.
|
|
10
|
+
- Finding and traversing patterns within PDF widgets.
|
|
11
|
+
- Extracting widget properties based on defined patterns.
|
|
12
|
+
- Generating unique suffixes for internal use.
|
|
13
|
+
- Enabling Adobe-specific settings in the PDF to ensure proper rendering of form fields.
|
|
11
14
|
"""
|
|
12
15
|
|
|
13
16
|
from collections.abc import Callable
|
|
@@ -18,29 +21,28 @@ from string import ascii_letters, digits, punctuation
|
|
|
18
21
|
from typing import Any, BinaryIO, List, Union
|
|
19
22
|
|
|
20
23
|
from pypdf import PdfReader, PdfWriter
|
|
21
|
-
from pypdf.generic import ArrayObject, DictionaryObject
|
|
22
|
-
from reportlab.lib.colors import CMYKColor, Color
|
|
24
|
+
from pypdf.generic import ArrayObject, DictionaryObject, NameObject
|
|
23
25
|
|
|
24
|
-
from .constants import
|
|
25
|
-
DEFAULT_FONT_COLOR, DEFAULT_FONT_SIZE,
|
|
26
|
-
DEFAULT_RADIO_STYLE, PREVIEW_FONT_COLOR,
|
|
27
|
-
UNIQUE_SUFFIX_LENGTH, WIDGET_TYPES)
|
|
28
|
-
from .middleware.checkbox import Checkbox
|
|
29
|
-
from .middleware.radio import Radio
|
|
30
|
-
from .middleware.text import Text
|
|
26
|
+
from .constants import UNIQUE_SUFFIX_LENGTH, XFA, AcroForm, Annots, Root
|
|
31
27
|
|
|
32
28
|
|
|
33
29
|
@lru_cache
|
|
34
30
|
def stream_to_io(stream: bytes) -> BinaryIO:
|
|
35
|
-
"""
|
|
31
|
+
"""
|
|
32
|
+
Converts a bytes stream to a BinaryIO object, which can be used by PyPDFForm.
|
|
33
|
+
|
|
34
|
+
This function takes a bytes stream as input and returns a BinaryIO object
|
|
35
|
+
that represents the same data. This is useful because PyPDFForm often
|
|
36
|
+
works with BinaryIO objects, so this function allows you to easily convert
|
|
37
|
+
a bytes stream to the correct format. The result is cached using lru_cache
|
|
38
|
+
for performance.
|
|
36
39
|
|
|
37
40
|
Args:
|
|
38
|
-
stream:
|
|
41
|
+
stream (bytes): The bytes stream to convert.
|
|
39
42
|
|
|
40
43
|
Returns:
|
|
41
|
-
BinaryIO:
|
|
44
|
+
BinaryIO: A BinaryIO object representing the stream.
|
|
42
45
|
"""
|
|
43
|
-
|
|
44
46
|
result = BytesIO()
|
|
45
47
|
result.write(stream)
|
|
46
48
|
result.seek(0)
|
|
@@ -48,101 +50,52 @@ def stream_to_io(stream: bytes) -> BinaryIO:
|
|
|
48
50
|
return result
|
|
49
51
|
|
|
50
52
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Supports:
|
|
55
|
-
- Grayscale (1 component)
|
|
56
|
-
- RGB (3 components)
|
|
57
|
-
- CMYK (4 components)
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
color: Color array from PDF specification
|
|
61
|
-
|
|
62
|
-
Returns:
|
|
63
|
-
Union[Color, CMYKColor, None]: Color object or None if invalid format
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
result = None
|
|
67
|
-
|
|
68
|
-
if len(color) == 1:
|
|
69
|
-
result = CMYKColor(black=1 - color[0])
|
|
70
|
-
elif len(color) == 3:
|
|
71
|
-
result = Color(red=color[0], green=color[1], blue=color[2])
|
|
72
|
-
elif len(color) == 4:
|
|
73
|
-
result = CMYKColor(
|
|
74
|
-
cyan=color[0], magenta=color[1], yellow=color[2], black=color[3]
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
return result
|
|
78
|
-
|
|
53
|
+
@lru_cache
|
|
54
|
+
def enable_adobe_mode(pdf: bytes) -> bytes:
|
|
55
|
+
"""Enables Adobe-specific settings in the PDF to ensure proper rendering of form fields.
|
|
79
56
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
)
|
|
83
|
-
|
|
57
|
+
This function modifies the PDF's AcroForm dictionary to include the `NeedAppearances` flag,
|
|
58
|
+
which forces Adobe Reader to generate appearance streams for form fields. It also handles
|
|
59
|
+
XFA (XML Forms Architecture) forms by removing the XFA entry from the AcroForm dictionary
|
|
60
|
+
if it exists, ensuring compatibility and proper rendering. This ensures that the form fields
|
|
61
|
+
are rendered correctly in Adobe Reader, especially when the form is filled programmatically.
|
|
84
62
|
|
|
85
63
|
Args:
|
|
86
|
-
|
|
87
|
-
font_size: Size for the drawn symbol
|
|
64
|
+
pdf (bytes): The PDF content as bytes.
|
|
88
65
|
|
|
89
66
|
Returns:
|
|
90
|
-
|
|
67
|
+
bytes: The modified PDF content with Adobe mode enabled.
|
|
91
68
|
"""
|
|
69
|
+
reader = PdfReader(stream_to_io(pdf))
|
|
70
|
+
writer = PdfWriter()
|
|
92
71
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
value="",
|
|
96
|
-
)
|
|
97
|
-
new_widget.font = DEFAULT_FONT
|
|
98
|
-
new_widget.font_size = font_size
|
|
99
|
-
new_widget.font_color = DEFAULT_FONT_COLOR
|
|
100
|
-
new_widget.value = BUTTON_STYLES.get(widget.button_style) or (
|
|
101
|
-
DEFAULT_CHECKBOX_STYLE if type(widget) is Checkbox else DEFAULT_RADIO_STYLE
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
return new_widget
|
|
72
|
+
if AcroForm in reader.trailer[Root] and XFA in reader.trailer[Root][AcroForm]:
|
|
73
|
+
del reader.trailer[Root][AcroForm][XFA]
|
|
105
74
|
|
|
75
|
+
writer.append(reader)
|
|
76
|
+
writer.set_need_appearances_writer()
|
|
106
77
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
)
|
|
110
|
-
|
|
78
|
+
with BytesIO() as f:
|
|
79
|
+
writer.write(f)
|
|
80
|
+
f.seek(0)
|
|
81
|
+
return f.read()
|
|
111
82
|
|
|
112
|
-
Args:
|
|
113
|
-
widget_name: Name of the widget to generate preview for
|
|
114
|
-
widget: Widget to generate preview for
|
|
115
|
-
with_preview_text: Whether to include field name in preview
|
|
116
83
|
|
|
117
|
-
|
|
118
|
-
|
|
84
|
+
@lru_cache
|
|
85
|
+
def remove_all_widgets(pdf: bytes) -> bytes:
|
|
119
86
|
"""
|
|
87
|
+
Removes all widgets (form fields) from a PDF, effectively flattening the form.
|
|
120
88
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
)
|
|
125
|
-
new_widget.font = DEFAULT_FONT
|
|
126
|
-
new_widget.font_size = DEFAULT_FONT_SIZE
|
|
127
|
-
new_widget.font_color = PREVIEW_FONT_COLOR
|
|
128
|
-
new_widget.preview = with_preview_text
|
|
129
|
-
new_widget.border_color = handle_color([0, 0, 0])
|
|
130
|
-
new_widget.border_width = 1
|
|
131
|
-
new_widget.render_widget = True
|
|
132
|
-
|
|
133
|
-
return new_widget
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def remove_all_widgets(pdf: bytes) -> bytes:
|
|
137
|
-
"""Removes all interactive form fields from a PDF document.
|
|
89
|
+
This function takes a PDF as a bytes stream, removes all of its interactive
|
|
90
|
+
form fields (widgets), and returns the modified PDF as a bytes stream. This
|
|
91
|
+
is useful for creating a non-interactive version of a PDF form.
|
|
138
92
|
|
|
139
93
|
Args:
|
|
140
|
-
pdf:
|
|
94
|
+
pdf (bytes): The PDF as a bytes stream.
|
|
141
95
|
|
|
142
96
|
Returns:
|
|
143
|
-
bytes:
|
|
97
|
+
bytes: The PDF with all widgets removed, as a bytes stream.
|
|
144
98
|
"""
|
|
145
|
-
|
|
146
99
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
147
100
|
result_stream = BytesIO()
|
|
148
101
|
writer = PdfWriter()
|
|
@@ -157,15 +110,18 @@ def remove_all_widgets(pdf: bytes) -> bytes:
|
|
|
157
110
|
|
|
158
111
|
|
|
159
112
|
def get_page_streams(pdf: bytes) -> List[bytes]:
|
|
160
|
-
"""
|
|
113
|
+
"""
|
|
114
|
+
Extracts the content stream of each page in a PDF as a list of byte streams.
|
|
115
|
+
|
|
116
|
+
This function takes a PDF as a bytes stream and returns a list of bytes streams,
|
|
117
|
+
where each element in the list represents the content stream of a page in the PDF.
|
|
161
118
|
|
|
162
119
|
Args:
|
|
163
|
-
pdf:
|
|
120
|
+
pdf (bytes): The PDF as a bytes stream.
|
|
164
121
|
|
|
165
122
|
Returns:
|
|
166
|
-
List[bytes]:
|
|
123
|
+
List[bytes]: A list of bytes streams, one for each page.
|
|
167
124
|
"""
|
|
168
|
-
|
|
169
125
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
170
126
|
result = []
|
|
171
127
|
|
|
@@ -181,16 +137,20 @@ def get_page_streams(pdf: bytes) -> List[bytes]:
|
|
|
181
137
|
|
|
182
138
|
|
|
183
139
|
def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
|
|
184
|
-
"""
|
|
140
|
+
"""
|
|
141
|
+
Merges two PDF files into a single PDF file.
|
|
142
|
+
|
|
143
|
+
This function takes two PDF files as byte streams, merges them, and returns the result as a single PDF byte stream.
|
|
144
|
+
It handles the merging of pages from both PDFs and also attempts to preserve form field widgets from both input PDFs
|
|
145
|
+
in the final merged PDF. The form fields are cloned and added to the output pages.
|
|
185
146
|
|
|
186
147
|
Args:
|
|
187
|
-
pdf:
|
|
188
|
-
other:
|
|
148
|
+
pdf (bytes): The first PDF file as a byte stream.
|
|
149
|
+
other (bytes): The second PDF file as a byte stream.
|
|
189
150
|
|
|
190
151
|
Returns:
|
|
191
|
-
bytes:
|
|
152
|
+
bytes: The merged PDF file as a byte stream.
|
|
192
153
|
"""
|
|
193
|
-
|
|
194
154
|
output = PdfWriter()
|
|
195
155
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
196
156
|
other_file = PdfReader(stream_to_io(other))
|
|
@@ -203,20 +163,52 @@ def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
|
|
|
203
163
|
|
|
204
164
|
output.write(result)
|
|
205
165
|
result.seek(0)
|
|
166
|
+
|
|
167
|
+
merged_no_widgets = PdfReader(stream_to_io(remove_all_widgets(result.read())))
|
|
168
|
+
output = PdfWriter()
|
|
169
|
+
output.append(merged_no_widgets)
|
|
170
|
+
|
|
171
|
+
# TODO: refactor duplicate logic with copy_watermark_widgets
|
|
172
|
+
widgets_to_copy = {}
|
|
173
|
+
for i, page in enumerate(pdf_file.pages):
|
|
174
|
+
widgets_to_copy[i] = []
|
|
175
|
+
for annot in page.get(Annots, []):
|
|
176
|
+
widgets_to_copy[i].append(annot.clone(output))
|
|
177
|
+
|
|
178
|
+
for i, page in enumerate(other_file.pages):
|
|
179
|
+
widgets_to_copy[i + len(pdf_file.pages)] = []
|
|
180
|
+
for annot in page.get(Annots, []):
|
|
181
|
+
widgets_to_copy[i + len(pdf_file.pages)].append(annot.clone(output))
|
|
182
|
+
|
|
183
|
+
for i, page in enumerate(output.pages):
|
|
184
|
+
page[NameObject(Annots)] = (
|
|
185
|
+
(page[NameObject(Annots)] + ArrayObject(widgets_to_copy[i]))
|
|
186
|
+
if Annots in page
|
|
187
|
+
else ArrayObject(widgets_to_copy[i])
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
result = BytesIO()
|
|
191
|
+
output.write(result)
|
|
192
|
+
result.seek(0)
|
|
206
193
|
return result.read()
|
|
207
194
|
|
|
208
195
|
|
|
209
196
|
def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) -> bool:
|
|
210
|
-
"""
|
|
197
|
+
"""
|
|
198
|
+
Recursively finds a pattern match within a PDF widget (annotation dictionary).
|
|
199
|
+
|
|
200
|
+
This function searches for a specific pattern within a PDF widget's properties.
|
|
201
|
+
It recursively traverses the widget's dictionary, comparing keys and values
|
|
202
|
+
to the provided pattern.
|
|
211
203
|
|
|
212
204
|
Args:
|
|
213
|
-
pattern:
|
|
214
|
-
widget:
|
|
205
|
+
pattern (dict): The pattern to search for, represented as a dictionary.
|
|
206
|
+
widget (Union[dict, DictionaryObject]): The widget to search within, which
|
|
207
|
+
can be a dictionary or a DictionaryObject.
|
|
215
208
|
|
|
216
209
|
Returns:
|
|
217
|
-
bool: True if
|
|
210
|
+
bool: True if a match is found, False otherwise.
|
|
218
211
|
"""
|
|
219
|
-
|
|
220
212
|
for key, value in widget.items():
|
|
221
213
|
result = False
|
|
222
214
|
if key in pattern:
|
|
@@ -238,16 +230,21 @@ def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) ->
|
|
|
238
230
|
def traverse_pattern(
|
|
239
231
|
pattern: dict, widget: Union[dict, DictionaryObject]
|
|
240
232
|
) -> Union[str, list, None]:
|
|
241
|
-
"""
|
|
233
|
+
"""
|
|
234
|
+
Recursively traverses a pattern within a PDF widget (annotation dictionary) and returns the value.
|
|
235
|
+
|
|
236
|
+
This function searches for a specific pattern within a PDF widget's properties.
|
|
237
|
+
It recursively traverses the widget's dictionary, comparing keys and values
|
|
238
|
+
to the provided pattern and returns the value if the pattern is True.
|
|
242
239
|
|
|
243
240
|
Args:
|
|
244
|
-
pattern:
|
|
245
|
-
widget:
|
|
241
|
+
pattern (dict): The pattern to traverse, represented as a dictionary.
|
|
242
|
+
widget (Union[dict, DictionaryObject]): The widget to traverse within, which
|
|
243
|
+
can be a dictionary or a DictionaryObject.
|
|
246
244
|
|
|
247
245
|
Returns:
|
|
248
|
-
Union[str, list, None]:
|
|
246
|
+
Union[str, list, None]: The value found, or None if not found.
|
|
249
247
|
"""
|
|
250
|
-
|
|
251
248
|
for key, value in widget.items():
|
|
252
249
|
result = None
|
|
253
250
|
if key in pattern:
|
|
@@ -270,18 +267,25 @@ def extract_widget_property(
|
|
|
270
267
|
default_value: Any,
|
|
271
268
|
func_before_return: Union[Callable, None],
|
|
272
269
|
) -> Any:
|
|
273
|
-
"""
|
|
270
|
+
"""
|
|
271
|
+
Extracts a specific property from a PDF widget based on a list of patterns.
|
|
272
|
+
|
|
273
|
+
This function iterates through a list of patterns, attempting to find a match
|
|
274
|
+
within the provided widget. If a match is found, the corresponding value is
|
|
275
|
+
extracted and returned. If no match is found, a default value is returned.
|
|
274
276
|
|
|
275
277
|
Args:
|
|
276
|
-
widget:
|
|
277
|
-
patterns:
|
|
278
|
-
|
|
279
|
-
|
|
278
|
+
widget (Union[dict, DictionaryObject]): The widget to extract the property from.
|
|
279
|
+
patterns (list): A list of patterns to search for. Each pattern should be a
|
|
280
|
+
dictionary representing the structure of the property to extract.
|
|
281
|
+
default_value (Any): The default value to return if no pattern is found.
|
|
282
|
+
func_before_return (Union[Callable, None]): An optional function to call before
|
|
283
|
+
returning the extracted value. This can be used to perform additional
|
|
284
|
+
processing or formatting on the value.
|
|
280
285
|
|
|
281
286
|
Returns:
|
|
282
|
-
Any:
|
|
287
|
+
Any: The extracted property value, or the default value if no pattern is found.
|
|
283
288
|
"""
|
|
284
|
-
|
|
285
289
|
result = default_value
|
|
286
290
|
|
|
287
291
|
for pattern in patterns:
|
|
@@ -294,12 +298,16 @@ def extract_widget_property(
|
|
|
294
298
|
|
|
295
299
|
|
|
296
300
|
def generate_unique_suffix() -> str:
|
|
297
|
-
"""
|
|
301
|
+
"""
|
|
302
|
+
Generates a unique suffix string for internal use, such as to avoid naming conflicts.
|
|
303
|
+
|
|
304
|
+
This function creates a random string of characters with a predefined length
|
|
305
|
+
(UNIQUE_SUFFIX_LENGTH) using a combination of ASCII letters, digits, and
|
|
306
|
+
punctuation characters (excluding hyphens).
|
|
298
307
|
|
|
299
308
|
Returns:
|
|
300
|
-
str:
|
|
309
|
+
str: A unique suffix string.
|
|
301
310
|
"""
|
|
302
|
-
|
|
303
311
|
return "".join(
|
|
304
312
|
[
|
|
305
313
|
choice(ascii_letters + digits + punctuation.replace("-", ""))
|