PyPDFForm 2.5.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PyPDFForm might be problematic. Click here for more details.
- PyPDFForm/__init__.py +22 -6
- PyPDFForm/adapter.py +28 -26
- PyPDFForm/constants.py +29 -34
- PyPDFForm/coordinate.py +23 -399
- PyPDFForm/filler.py +79 -303
- PyPDFForm/font.py +166 -164
- PyPDFForm/hooks.py +109 -69
- PyPDFForm/image.py +72 -22
- PyPDFForm/middleware/base.py +42 -60
- PyPDFForm/middleware/checkbox.py +27 -58
- PyPDFForm/middleware/dropdown.py +41 -30
- PyPDFForm/middleware/image.py +10 -22
- PyPDFForm/middleware/radio.py +30 -31
- PyPDFForm/middleware/signature.py +32 -47
- PyPDFForm/middleware/text.py +54 -48
- PyPDFForm/patterns.py +61 -106
- PyPDFForm/template.py +80 -427
- PyPDFForm/utils.py +142 -128
- PyPDFForm/watermark.py +77 -208
- PyPDFForm/widgets/base.py +57 -76
- PyPDFForm/widgets/checkbox.py +18 -21
- PyPDFForm/widgets/dropdown.py +18 -25
- PyPDFForm/widgets/image.py +11 -9
- PyPDFForm/widgets/radio.py +25 -35
- PyPDFForm/widgets/signature.py +29 -40
- PyPDFForm/widgets/text.py +18 -17
- PyPDFForm/wrapper.py +351 -443
- {pypdfform-2.5.0.dist-info → pypdfform-3.0.0.dist-info}/METADATA +6 -7
- pypdfform-3.0.0.dist-info/RECORD +35 -0
- {pypdfform-2.5.0.dist-info → pypdfform-3.0.0.dist-info}/WHEEL +1 -1
- pypdfform-2.5.0.dist-info/RECORD +0 -35
- {pypdfform-2.5.0.dist-info → pypdfform-3.0.0.dist-info}/licenses/LICENSE +0 -0
- {pypdfform-2.5.0.dist-info → pypdfform-3.0.0.dist-info}/top_level.txt +0 -0
PyPDFForm/font.py
CHANGED
|
@@ -1,43 +1,46 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
-
"""Provides font handling utilities for PDF forms.
|
|
3
|
-
|
|
4
|
-
This module contains functions for:
|
|
5
|
-
- Registering custom fonts from TTF files
|
|
6
|
-
- Extracting font information from PDF text appearances
|
|
7
|
-
- Calculating font sizes based on widget dimensions
|
|
8
|
-
- Adjusting font sizes to fit text within fields
|
|
9
|
-
- Managing font colors and properties
|
|
10
2
|
"""
|
|
3
|
+
This module provides functionalities for handling custom fonts within PDF documents.
|
|
11
4
|
|
|
5
|
+
It includes functions for registering fonts with ReportLab and within the PDF's AcroForm,
|
|
6
|
+
allowing these fonts to be used when filling form fields. The module also provides utilities
|
|
7
|
+
for extracting font information from TTF streams and managing font names within a PDF.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from functools import lru_cache
|
|
12
11
|
from io import BytesIO
|
|
13
|
-
from math import sqrt
|
|
14
|
-
from re import findall
|
|
15
|
-
from typing import Tuple, Union
|
|
16
12
|
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
|
|
13
|
+
from pypdf import PdfReader, PdfWriter
|
|
14
|
+
from pypdf.generic import (ArrayObject, DictionaryObject, NameObject,
|
|
15
|
+
NumberObject, StreamObject)
|
|
16
|
+
from reportlab.pdfbase.pdfmetrics import registerFont
|
|
20
17
|
from reportlab.pdfbase.ttfonts import TTFError, TTFont
|
|
21
18
|
|
|
22
|
-
from .constants import (
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
from .
|
|
27
|
-
from .utils import extract_widget_property
|
|
19
|
+
from .constants import (DR, FONT_NAME_PREFIX, AcroForm, BaseFont, Encoding,
|
|
20
|
+
Fields, Font, FontDescriptor, FontFile2, FontName,
|
|
21
|
+
Length1, Resources, Subtype, TrueType, Type,
|
|
22
|
+
WinAnsiEncoding)
|
|
23
|
+
from .utils import stream_to_io
|
|
28
24
|
|
|
29
25
|
|
|
26
|
+
@lru_cache
|
|
30
27
|
def register_font(font_name: str, ttf_stream: bytes) -> bool:
|
|
31
|
-
"""
|
|
28
|
+
"""
|
|
29
|
+
Registers a TrueType font with the ReportLab library.
|
|
30
|
+
|
|
31
|
+
This allows the font to be used for generating PDF documents with ReportLab.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
|
-
font_name:
|
|
35
|
-
|
|
34
|
+
font_name (str): The name to register the font under. This name will be used
|
|
35
|
+
to reference the font when creating PDF documents with ReportLab.
|
|
36
|
+
ttf_stream (bytes): The font file data in TTF format. This should be the raw
|
|
37
|
+
bytes of the TTF file.
|
|
36
38
|
|
|
37
39
|
Returns:
|
|
38
|
-
bool: True if
|
|
40
|
+
bool: True if the font was registered successfully, False otherwise.
|
|
41
|
+
Returns False if a TTFError occurs during registration, which usually
|
|
42
|
+
indicates an invalid TTF stream.
|
|
39
43
|
"""
|
|
40
|
-
|
|
41
44
|
buff = BytesIO()
|
|
42
45
|
buff.write(ttf_stream)
|
|
43
46
|
buff.seek(0)
|
|
@@ -52,194 +55,193 @@ def register_font(font_name: str, ttf_stream: bytes) -> bool:
|
|
|
52
55
|
return result
|
|
53
56
|
|
|
54
57
|
|
|
55
|
-
def
|
|
56
|
-
"""
|
|
58
|
+
def get_additional_font_params(pdf: bytes, base_font_name: str) -> tuple:
|
|
59
|
+
"""
|
|
60
|
+
Retrieves additional font parameters from a PDF document for a given base font name.
|
|
57
61
|
|
|
58
|
-
|
|
62
|
+
This function searches the PDF's resources for a font dictionary matching the provided
|
|
63
|
+
base font name. If a match is found, it extracts the font descriptor parameters and
|
|
64
|
+
the font dictionary parameters. These parameters can be used to further describe
|
|
65
|
+
and define the font within the PDF.
|
|
59
66
|
|
|
60
67
|
Args:
|
|
61
|
-
|
|
68
|
+
pdf (bytes): The PDF file data as bytes.
|
|
69
|
+
base_font_name (str): The base font name to search for within the PDF's font resources.
|
|
62
70
|
|
|
63
71
|
Returns:
|
|
64
|
-
|
|
72
|
+
tuple: A tuple containing two dictionaries:
|
|
73
|
+
- font_descriptor_params (dict): A dictionary of font descriptor parameters.
|
|
74
|
+
- font_dict_params (dict): A dictionary of font dictionary parameters.
|
|
75
|
+
Returns empty dictionaries if the font is not found.
|
|
65
76
|
"""
|
|
77
|
+
font_descriptor_params = {}
|
|
78
|
+
font_dict_params = {}
|
|
79
|
+
reader = PdfReader(stream_to_io(pdf))
|
|
66
80
|
|
|
67
|
-
|
|
81
|
+
for font in reader.pages[0][Resources][Font].values():
|
|
82
|
+
if base_font_name.replace("/", "") in font[BaseFont]:
|
|
83
|
+
font_descriptor_params = dict(font[FontDescriptor])
|
|
84
|
+
font_dict_params = dict(font)
|
|
85
|
+
break
|
|
68
86
|
|
|
69
|
-
|
|
70
|
-
if each.startswith("/"):
|
|
71
|
-
text_segments = findall("[A-Z][^A-Z]*", each.replace("/", ""))
|
|
87
|
+
return font_descriptor_params, font_dict_params
|
|
72
88
|
|
|
73
|
-
if len(text_segments) == 1:
|
|
74
|
-
for k, v in AcroForm.formFontNames.items():
|
|
75
|
-
if v == text_segments[0]:
|
|
76
|
-
return k
|
|
77
89
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
continue
|
|
82
|
-
|
|
83
|
-
found = True
|
|
84
|
-
for i, val in enumerate(font_segments):
|
|
85
|
-
if not val.startswith(text_segments[i]):
|
|
86
|
-
found = False
|
|
87
|
-
|
|
88
|
-
if found:
|
|
89
|
-
return font
|
|
90
|
-
|
|
91
|
-
return None
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def auto_detect_font(widget: dict) -> str:
|
|
95
|
-
"""Attempts to detect the font used in a PDF text field widget.
|
|
90
|
+
def register_font_acroform(pdf: bytes, ttf_stream: bytes, adobe_mode: bool) -> tuple:
|
|
91
|
+
"""
|
|
92
|
+
Registers a TrueType font within the PDF's AcroForm dictionary.
|
|
96
93
|
|
|
97
|
-
|
|
94
|
+
This allows the font to be used when filling form fields within the PDF.
|
|
95
|
+
The function adds the font as a resource to the PDF, making it available
|
|
96
|
+
for use in form fields.
|
|
98
97
|
|
|
99
98
|
Args:
|
|
100
|
-
|
|
99
|
+
pdf (bytes): The PDF file data as bytes. This is the PDF document that
|
|
100
|
+
will be modified to include the new font.
|
|
101
|
+
ttf_stream (bytes): The font file data in TTF format as bytes. This is the
|
|
102
|
+
raw data of the TrueType font file.
|
|
103
|
+
adobe_mode (bool): A flag indicating whether to use Adobe-specific font parameters.
|
|
101
104
|
|
|
102
105
|
Returns:
|
|
103
|
-
|
|
106
|
+
tuple: A tuple containing the modified PDF data as bytes and the new font name
|
|
107
|
+
(str) that was assigned to the registered font within the PDF.
|
|
104
108
|
"""
|
|
109
|
+
base_font_name = get_base_font_name(ttf_stream)
|
|
110
|
+
reader = PdfReader(stream_to_io(pdf))
|
|
111
|
+
writer = PdfWriter()
|
|
112
|
+
writer.append(reader)
|
|
113
|
+
|
|
114
|
+
font_descriptor_params = {}
|
|
115
|
+
font_dict_params = {}
|
|
116
|
+
if adobe_mode:
|
|
117
|
+
font_descriptor_params, font_dict_params = get_additional_font_params(
|
|
118
|
+
pdf, base_font_name
|
|
119
|
+
)
|
|
105
120
|
|
|
106
|
-
|
|
107
|
-
|
|
121
|
+
font_file_stream = StreamObject()
|
|
122
|
+
font_file_stream.set_data(ttf_stream)
|
|
123
|
+
font_file_stream.update(
|
|
124
|
+
{
|
|
125
|
+
NameObject(Length1): NumberObject(len(ttf_stream)),
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
font_file_ref = writer._add_object(font_file_stream) # type: ignore # noqa: SLF001 # # pylint: disable=W0212
|
|
129
|
+
|
|
130
|
+
font_descriptor = DictionaryObject()
|
|
131
|
+
font_descriptor.update(
|
|
132
|
+
{
|
|
133
|
+
NameObject(Type): NameObject(FontDescriptor),
|
|
134
|
+
NameObject(FontName): NameObject(base_font_name),
|
|
135
|
+
NameObject(FontFile2): font_file_ref,
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
font_descriptor.update(
|
|
139
|
+
{k: v for k, v in font_descriptor_params.items() if k not in font_descriptor}
|
|
108
140
|
)
|
|
141
|
+
font_descriptor_ref = writer._add_object(font_descriptor) # type: ignore # noqa: SLF001 # # pylint: disable=W0212
|
|
142
|
+
|
|
143
|
+
font_dict = DictionaryObject()
|
|
144
|
+
font_dict.update(
|
|
145
|
+
{
|
|
146
|
+
NameObject(Type): NameObject(Font),
|
|
147
|
+
NameObject(Subtype): NameObject(TrueType),
|
|
148
|
+
NameObject(BaseFont): NameObject(base_font_name),
|
|
149
|
+
NameObject(FontDescriptor): font_descriptor_ref,
|
|
150
|
+
NameObject(Encoding): NameObject(WinAnsiEncoding),
|
|
151
|
+
}
|
|
152
|
+
)
|
|
153
|
+
font_dict.update({k: v for k, v in font_dict_params.items() if k not in font_dict})
|
|
154
|
+
font_dict_ref = writer._add_object(font_dict) # type: ignore # noqa: SLF001 # # pylint: disable=W0212
|
|
109
155
|
|
|
110
|
-
if not
|
|
111
|
-
|
|
156
|
+
if AcroForm not in writer._root_object: # type: ignore # noqa: SLF001 # # pylint: disable=W0212
|
|
157
|
+
writer._root_object[NameObject(AcroForm)] = DictionaryObject({NameObject(Fields): ArrayObject([])}) # type: ignore # noqa: SLF001 # # pylint: disable=W0212
|
|
158
|
+
acroform = writer._root_object[AcroForm] # type: ignore # noqa: SLF001 # # pylint: disable=W0212
|
|
112
159
|
|
|
113
|
-
|
|
160
|
+
if DR not in acroform:
|
|
161
|
+
acroform[NameObject(DR)] = DictionaryObject()
|
|
162
|
+
dr = acroform[DR]
|
|
114
163
|
|
|
164
|
+
if Font not in dr:
|
|
165
|
+
dr[NameObject(Font)] = DictionaryObject()
|
|
166
|
+
fonts = dr[Font]
|
|
115
167
|
|
|
116
|
-
|
|
117
|
-
|
|
168
|
+
new_font_name = get_new_font_name(fonts)
|
|
169
|
+
fonts[NameObject(new_font_name)] = font_dict_ref
|
|
118
170
|
|
|
119
|
-
|
|
120
|
-
|
|
171
|
+
with BytesIO() as f:
|
|
172
|
+
writer.write(f)
|
|
173
|
+
f.seek(0)
|
|
174
|
+
return f.read(), new_font_name
|
|
121
175
|
|
|
122
|
-
Returns:
|
|
123
|
-
Union[float, int]: Suggested font size in points
|
|
124
|
-
"""
|
|
125
|
-
|
|
126
|
-
height = abs(float(widget[Rect][1]) - float(widget[Rect][3]))
|
|
127
|
-
|
|
128
|
-
return height * 2 / 3
|
|
129
176
|
|
|
177
|
+
@lru_cache
|
|
178
|
+
def get_base_font_name(ttf_stream: bytes) -> str:
|
|
179
|
+
"""
|
|
180
|
+
Extracts the base font name from a TrueType font stream.
|
|
130
181
|
|
|
131
|
-
|
|
132
|
-
|
|
182
|
+
This function parses the TTF stream to extract the font's face name,
|
|
183
|
+
which is used as the base font name. The result is cached using lru_cache
|
|
184
|
+
for performance.
|
|
133
185
|
|
|
134
186
|
Args:
|
|
135
|
-
|
|
187
|
+
ttf_stream (bytes): The font file data in TTF format.
|
|
136
188
|
|
|
137
189
|
Returns:
|
|
138
|
-
|
|
190
|
+
str: The base font name, prefixed with a forward slash.
|
|
139
191
|
"""
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
float(widget[Rect][1]) - float(widget[Rect][3])
|
|
192
|
+
return (
|
|
193
|
+
f"/{TTFont(name='new_font', filename=stream_to_io(ttf_stream)).face.name.ustr}"
|
|
143
194
|
)
|
|
144
195
|
|
|
145
|
-
return sqrt(area) * 72 / 96
|
|
146
196
|
|
|
197
|
+
def get_new_font_name(fonts: dict) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Generates a new unique font name to avoid conflicts with existing fonts in the PDF.
|
|
147
200
|
|
|
148
|
-
|
|
149
|
-
|
|
201
|
+
This function iterates through the existing fonts in the PDF and generates a new
|
|
202
|
+
font name with the prefix '/F' followed by a unique integer.
|
|
150
203
|
|
|
151
204
|
Args:
|
|
152
|
-
|
|
205
|
+
fonts (dict): A dictionary of existing fonts in the PDF.
|
|
153
206
|
|
|
154
207
|
Returns:
|
|
155
|
-
|
|
208
|
+
str: A new unique font name.
|
|
156
209
|
"""
|
|
210
|
+
existing = set()
|
|
211
|
+
for key in fonts:
|
|
212
|
+
if isinstance(key, str) and key.startswith(FONT_NAME_PREFIX):
|
|
213
|
+
existing.add(int(key[2:]))
|
|
157
214
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
if text_appearance:
|
|
163
|
-
properties = text_appearance.split(" ")
|
|
164
|
-
for i, val in enumerate(properties):
|
|
165
|
-
if val.startswith(FONT_SIZE_IDENTIFIER):
|
|
166
|
-
return float(properties[i - 1])
|
|
215
|
+
n = 1
|
|
216
|
+
while n in existing:
|
|
217
|
+
n += 1
|
|
218
|
+
return f"{FONT_NAME_PREFIX}{n}"
|
|
167
219
|
|
|
168
|
-
return result
|
|
169
220
|
|
|
221
|
+
@lru_cache
|
|
222
|
+
def get_all_available_fonts(pdf: bytes) -> dict:
|
|
223
|
+
"""
|
|
224
|
+
Retrieves all available fonts from a PDF document's AcroForm.
|
|
170
225
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
) -> Union[Tuple[float, float, float], None]:
|
|
174
|
-
"""Extracts font color from PDF text field appearance properties.
|
|
226
|
+
This function extracts the font resources from the PDF's AcroForm dictionary
|
|
227
|
+
and returns them as a dictionary.
|
|
175
228
|
|
|
176
229
|
Args:
|
|
177
|
-
|
|
230
|
+
pdf (bytes): The PDF file data.
|
|
178
231
|
|
|
179
232
|
Returns:
|
|
180
|
-
|
|
181
|
-
|
|
233
|
+
dict: A dictionary of available fonts, where the keys are the font names
|
|
234
|
+
(without the leading slash) and the values are the corresponding font
|
|
235
|
+
identifiers in the PDF. Returns an empty dictionary if no fonts are found.
|
|
182
236
|
"""
|
|
237
|
+
reader = PdfReader(stream_to_io(pdf))
|
|
238
|
+
try:
|
|
239
|
+
fonts = reader.root_object[AcroForm][DR][Font]
|
|
240
|
+
except KeyError:
|
|
241
|
+
return {}
|
|
183
242
|
|
|
184
|
-
result =
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
)
|
|
188
|
-
if text_appearance:
|
|
189
|
-
if FONT_COLOR_IDENTIFIER not in text_appearance:
|
|
190
|
-
return result
|
|
191
|
-
|
|
192
|
-
text_appearance = text_appearance.split(" ")
|
|
193
|
-
for i, val in enumerate(text_appearance):
|
|
194
|
-
if val.startswith(FONT_COLOR_IDENTIFIER.replace(" ", "")):
|
|
195
|
-
result = (
|
|
196
|
-
float(text_appearance[i - 3]),
|
|
197
|
-
float(text_appearance[i - 2]),
|
|
198
|
-
float(text_appearance[i - 1]),
|
|
199
|
-
)
|
|
200
|
-
break
|
|
243
|
+
result = {}
|
|
244
|
+
for key, value in fonts.items():
|
|
245
|
+
result[value[BaseFont].replace("/", "")] = key
|
|
201
246
|
|
|
202
247
|
return result
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def adjust_paragraph_font_size(widget: dict, widget_middleware: Text) -> None:
|
|
206
|
-
"""Dynamically reduces font size until text fits in paragraph field.
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
widget: PDF form widget dictionary
|
|
210
|
-
widget_middleware: Text middleware instance containing text properties
|
|
211
|
-
"""
|
|
212
|
-
|
|
213
|
-
# pylint: disable=C0415, R0401
|
|
214
|
-
from .template import get_paragraph_lines
|
|
215
|
-
|
|
216
|
-
height = abs(float(widget[Rect][1]) - float(widget[Rect][3]))
|
|
217
|
-
|
|
218
|
-
while (
|
|
219
|
-
widget_middleware.font_size > FONT_SIZE_REDUCE_STEP
|
|
220
|
-
and len(widget_middleware.text_lines)
|
|
221
|
-
* (widget_middleware.font_size + MARGIN_BETWEEN_LINES)
|
|
222
|
-
> height
|
|
223
|
-
):
|
|
224
|
-
widget_middleware.font_size -= FONT_SIZE_REDUCE_STEP
|
|
225
|
-
widget_middleware.text_lines = get_paragraph_lines(widget, widget_middleware)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def adjust_text_field_font_size(widget: dict, widget_middleware: Text) -> None:
|
|
229
|
-
"""Dynamically reduces font size until text fits in text field.
|
|
230
|
-
|
|
231
|
-
Args:
|
|
232
|
-
widget: PDF form widget dictionary
|
|
233
|
-
widget_middleware: Text middleware instance containing text properties
|
|
234
|
-
"""
|
|
235
|
-
|
|
236
|
-
width = abs(float(widget[Rect][0]) - float(widget[Rect][2]))
|
|
237
|
-
|
|
238
|
-
while (
|
|
239
|
-
widget_middleware.font_size > FONT_SIZE_REDUCE_STEP
|
|
240
|
-
and stringWidth(
|
|
241
|
-
widget_middleware.value, widget_middleware.font, widget_middleware.font_size
|
|
242
|
-
)
|
|
243
|
-
> width
|
|
244
|
-
):
|
|
245
|
-
widget_middleware.font_size -= FONT_SIZE_REDUCE_STEP
|