PyPDFForm 3.5.3__py3-none-any.whl → 4.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyPDFForm/__init__.py +5 -3
- PyPDFForm/adapter.py +33 -1
- PyPDFForm/ap.py +99 -0
- PyPDFForm/assets/__init__.py +0 -0
- PyPDFForm/assets/blank.py +100 -0
- PyPDFForm/constants.py +20 -2
- PyPDFForm/coordinate.py +7 -11
- PyPDFForm/deprecation.py +30 -0
- PyPDFForm/filler.py +17 -36
- PyPDFForm/font.py +16 -16
- PyPDFForm/hooks.py +153 -30
- PyPDFForm/image.py +0 -3
- PyPDFForm/middleware/__init__.py +35 -0
- PyPDFForm/middleware/base.py +24 -5
- PyPDFForm/middleware/checkbox.py +18 -1
- PyPDFForm/middleware/signature.py +0 -1
- PyPDFForm/patterns.py +44 -13
- PyPDFForm/raw/__init__.py +37 -0
- PyPDFForm/raw/circle.py +65 -0
- PyPDFForm/raw/ellipse.py +69 -0
- PyPDFForm/raw/image.py +79 -0
- PyPDFForm/raw/line.py +65 -0
- PyPDFForm/raw/rect.py +70 -0
- PyPDFForm/raw/text.py +73 -0
- PyPDFForm/template.py +114 -12
- PyPDFForm/types.py +49 -0
- PyPDFForm/utils.py +31 -41
- PyPDFForm/watermark.py +153 -44
- PyPDFForm/widgets/__init__.py +1 -0
- PyPDFForm/widgets/base.py +79 -59
- PyPDFForm/widgets/checkbox.py +30 -30
- PyPDFForm/widgets/dropdown.py +42 -40
- PyPDFForm/widgets/image.py +17 -16
- PyPDFForm/widgets/radio.py +27 -28
- PyPDFForm/widgets/signature.py +96 -60
- PyPDFForm/widgets/text.py +40 -40
- PyPDFForm/wrapper.py +256 -240
- {pypdfform-3.5.3.dist-info → pypdfform-4.2.0.dist-info}/METADATA +33 -26
- pypdfform-4.2.0.dist-info/RECORD +47 -0
- {pypdfform-3.5.3.dist-info → pypdfform-4.2.0.dist-info}/licenses/LICENSE +1 -1
- pypdfform-3.5.3.dist-info/RECORD +0 -35
- /PyPDFForm/{widgets → assets}/bedrock.py +0 -0
- {pypdfform-3.5.3.dist-info → pypdfform-4.2.0.dist-info}/WHEEL +0 -0
- {pypdfform-3.5.3.dist-info → pypdfform-4.2.0.dist-info}/top_level.txt +0 -0
PyPDFForm/__init__.py
CHANGED
|
@@ -20,10 +20,12 @@ The library supports various PDF form features, including:
|
|
|
20
20
|
PyPDFForm aims to simplify PDF form manipulation, making it accessible to developers of all skill levels.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
-
__version__ = "
|
|
23
|
+
__version__ = "4.2.0"
|
|
24
24
|
|
|
25
|
-
from .
|
|
25
|
+
from .assets.blank import BlankPage
|
|
26
|
+
from .middleware import Widgets
|
|
27
|
+
from .raw import RawElements
|
|
26
28
|
from .widgets import Fields
|
|
27
29
|
from .wrapper import PdfWrapper
|
|
28
30
|
|
|
29
|
-
__all__ = ["PdfWrapper", "
|
|
31
|
+
__all__ = ["PdfWrapper", "Fields", "BlankPage", "RawElements", "Widgets"]
|
PyPDFForm/adapter.py
CHANGED
|
@@ -9,7 +9,6 @@ filling operations, where the input PDF template can be provided in different
|
|
|
9
9
|
forms. The module ensures that the input is properly converted into a byte
|
|
10
10
|
stream before further processing.
|
|
11
11
|
"""
|
|
12
|
-
# TODO: For large PDF files, reading the entire file into memory using `_file.read()` in `fp_or_f_obj_or_stream_to_stream` can be inefficient. Consider streaming or chunking if downstream processing allows.
|
|
13
12
|
|
|
14
13
|
from os.path import isfile
|
|
15
14
|
from typing import Any, BinaryIO, Union
|
|
@@ -67,3 +66,36 @@ def fp_or_f_obj_or_stream_to_stream(
|
|
|
67
66
|
with open(fp_or_f_obj_or_stream, "rb") as _file:
|
|
68
67
|
result = _file.read()
|
|
69
68
|
return result
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def fp_or_f_obj_or_f_content_to_content(
|
|
72
|
+
fp_or_f_obj_or_f_content: Union[str, BinaryIO],
|
|
73
|
+
) -> str:
|
|
74
|
+
"""
|
|
75
|
+
Adapt a file path, file object, or file content to file content.
|
|
76
|
+
|
|
77
|
+
This function takes a file path, a file object, or file content and adapts it to a consistent string.
|
|
78
|
+
It handles different input types, including:
|
|
79
|
+
- file content (str)
|
|
80
|
+
- file paths (str)
|
|
81
|
+
- file-like objects with a read() method (BinaryIO)
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
fp_or_f_obj_or_f_content (Union[str, BinaryIO]): The input to adapt.
|
|
85
|
+
It can be file content, a file path (string), or a file object.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
str: The file content representation of the input.
|
|
89
|
+
"""
|
|
90
|
+
result = ""
|
|
91
|
+
if readable(fp_or_f_obj_or_f_content):
|
|
92
|
+
result = str(fp_or_f_obj_or_f_content.read())
|
|
93
|
+
|
|
94
|
+
elif isinstance(fp_or_f_obj_or_f_content, str):
|
|
95
|
+
if isfile(fp_or_f_obj_or_f_content):
|
|
96
|
+
with open(fp_or_f_obj_or_f_content, "r", encoding="utf-8") as _file:
|
|
97
|
+
result = _file.read()
|
|
98
|
+
else:
|
|
99
|
+
result = fp_or_f_obj_or_f_content
|
|
100
|
+
|
|
101
|
+
return result
|
PyPDFForm/ap.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
A module for handling PDF appearance streams.
|
|
4
|
+
|
|
5
|
+
This module provides functionality to manage appearance streams in PDF forms,
|
|
6
|
+
which are necessary for form fields to display correctly after being filled.
|
|
7
|
+
It uses both pypdf and pikepdf for manipulation.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from functools import lru_cache
|
|
11
|
+
from io import BytesIO
|
|
12
|
+
|
|
13
|
+
from pikepdf import Pdf
|
|
14
|
+
from pypdf import PdfReader, PdfWriter
|
|
15
|
+
|
|
16
|
+
from .constants import XFA, AcroForm, Root
|
|
17
|
+
from .template import (get_on_open_javascript, get_pdf_title,
|
|
18
|
+
set_on_open_javascript, set_pdf_title)
|
|
19
|
+
from .utils import stream_to_io
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@lru_cache
|
|
23
|
+
def appearance_streams_handler(pdf: bytes, generate_appearance_streams: bool) -> bytes:
|
|
24
|
+
"""
|
|
25
|
+
Handles appearance streams and the /NeedAppearances flag for a PDF form.
|
|
26
|
+
|
|
27
|
+
This function prepares a PDF for form filling by:
|
|
28
|
+
1. Removing the XFA dictionary if present, as it can interfere with standard
|
|
29
|
+
AcroForm processing.
|
|
30
|
+
2. Setting the /NeedAppearances flag in the AcroForm dictionary, which instructs
|
|
31
|
+
PDF viewers to generate appearance streams for form fields.
|
|
32
|
+
3. Optionally generating appearance streams explicitly using pikepdf if
|
|
33
|
+
`generate_appearance_streams` is True.
|
|
34
|
+
4. Preserving the title from the original PDF.
|
|
35
|
+
5. Preserving the on-open JavaScript from the original PDF.
|
|
36
|
+
|
|
37
|
+
The result is cached using lru_cache for performance.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
pdf (bytes): The PDF file content as a bytes stream.
|
|
41
|
+
generate_appearance_streams (bool): Whether to explicitly generate appearance streams for all form fields.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
bytes: The modified PDF content as a bytes stream.
|
|
45
|
+
"""
|
|
46
|
+
reader = PdfReader(stream_to_io(pdf))
|
|
47
|
+
writer = PdfWriter()
|
|
48
|
+
|
|
49
|
+
if AcroForm in reader.trailer[Root] and XFA in reader.trailer[Root][AcroForm]:
|
|
50
|
+
del reader.trailer[Root][AcroForm][XFA]
|
|
51
|
+
|
|
52
|
+
writer.append(reader)
|
|
53
|
+
writer.set_need_appearances_writer()
|
|
54
|
+
|
|
55
|
+
with BytesIO() as f:
|
|
56
|
+
writer.write(f)
|
|
57
|
+
f.seek(0)
|
|
58
|
+
result = f.read()
|
|
59
|
+
|
|
60
|
+
if generate_appearance_streams:
|
|
61
|
+
with Pdf.open(stream_to_io(result)) as f:
|
|
62
|
+
f.generate_appearance_streams()
|
|
63
|
+
with BytesIO() as r:
|
|
64
|
+
f.save(r)
|
|
65
|
+
r.seek(0)
|
|
66
|
+
result = r.read()
|
|
67
|
+
|
|
68
|
+
result = preserve_title(pdf, result)
|
|
69
|
+
return preserve_on_open_javascript(pdf, result)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def preserve_title(src: bytes, dest: bytes) -> bytes:
|
|
73
|
+
"""
|
|
74
|
+
Preserves the title from the source PDF to the destination PDF.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
src (bytes): The source PDF file content as a bytes stream.
|
|
78
|
+
dest (bytes): The destination PDF file content as a bytes stream.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
bytes: The modified destination PDF content as a bytes stream.
|
|
82
|
+
"""
|
|
83
|
+
title = get_pdf_title(src)
|
|
84
|
+
return set_pdf_title(dest, title)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def preserve_on_open_javascript(src: bytes, dest: bytes) -> bytes:
|
|
88
|
+
"""
|
|
89
|
+
Preserves the on-open JavaScript from the source PDF to the destination PDF.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
src (bytes): The source PDF file content as a bytes stream.
|
|
93
|
+
dest (bytes): The destination PDF file content as a bytes stream.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
bytes: The modified destination PDF content as a bytes stream.
|
|
97
|
+
"""
|
|
98
|
+
script = get_on_open_javascript(src)
|
|
99
|
+
return set_on_open_javascript(dest, script)
|
|
File without changes
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Module for creating and managing a blank PDF page asset.
|
|
4
|
+
|
|
5
|
+
This module provides the BlankPage class, which acts as a utility to generate
|
|
6
|
+
a simple, empty PDF page with customizable dimensions (width and height).
|
|
7
|
+
The primary use case is creating new PDF documents starting with a blank canvas
|
|
8
|
+
or adding blank pages to existing documents. It supports multiplication to
|
|
9
|
+
easily generate a PDF containing multiple identical blank pages.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from functools import cached_property
|
|
15
|
+
from io import BytesIO
|
|
16
|
+
|
|
17
|
+
from reportlab.pdfgen.canvas import Canvas
|
|
18
|
+
|
|
19
|
+
from ..constants import BLANK_PAGE_DEFAULT_HEIGHT, BLANK_PAGE_DEFAULT_WIDTH
|
|
20
|
+
from ..utils import merge_pdfs
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BlankPage:
|
|
24
|
+
"""
|
|
25
|
+
Class for creating a blank PDF page asset.
|
|
26
|
+
|
|
27
|
+
This class manages the generation and representation of a single blank PDF page
|
|
28
|
+
using reportlab. It provides a simple interface to access the page content as
|
|
29
|
+
a byte stream and supports page duplication via the multiplication operator.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
width: float = BLANK_PAGE_DEFAULT_WIDTH,
|
|
35
|
+
height: float = BLANK_PAGE_DEFAULT_HEIGHT,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""
|
|
38
|
+
Initializes a BlankPage object.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
width (float): The width of the blank page in points. Defaults to
|
|
42
|
+
BLANK_PAGE_DEFAULT_WIDTH (612 points).
|
|
43
|
+
height (float): The height of the blank page in points. Defaults to
|
|
44
|
+
BLANK_PAGE_DEFAULT_HEIGHT (792 points).
|
|
45
|
+
"""
|
|
46
|
+
super().__init__()
|
|
47
|
+
self.width = width
|
|
48
|
+
self.height = height
|
|
49
|
+
|
|
50
|
+
def __mul__(self, count: int) -> bytes:
|
|
51
|
+
"""
|
|
52
|
+
Multiplication operator to merge multiple blank pages into one PDF.
|
|
53
|
+
|
|
54
|
+
This allows syntax like `BlankPage() * 3` to create a 3-page PDF.
|
|
55
|
+
It merges copies of the current blank page asset using the efficient
|
|
56
|
+
pairwise merging strategy implemented in `merge_pdfs`.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
count (int): The number of blank pages to merge. Must be an integer >= 1.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
bytes: The byte stream of the resulting PDF containing `count` blank pages.
|
|
63
|
+
"""
|
|
64
|
+
if count == 1:
|
|
65
|
+
return self.read()
|
|
66
|
+
|
|
67
|
+
return merge_pdfs([self.read() for _ in range(count)])
|
|
68
|
+
|
|
69
|
+
def read(self) -> bytes:
|
|
70
|
+
"""
|
|
71
|
+
Read the generated blank page PDF content.
|
|
72
|
+
|
|
73
|
+
This is a public interface to retrieve the cached byte stream of the single
|
|
74
|
+
blank PDF page created by this instance.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
bytes: The byte stream of the single blank PDF page.
|
|
78
|
+
"""
|
|
79
|
+
return self._stream
|
|
80
|
+
|
|
81
|
+
@cached_property
|
|
82
|
+
def _stream(self) -> bytes:
|
|
83
|
+
"""
|
|
84
|
+
Generates and returns the PDF byte stream of a single blank page.
|
|
85
|
+
|
|
86
|
+
This is a cached property that uses `reportlab.pdfgen.canvas.Canvas` to create
|
|
87
|
+
a minimal PDF document consisting of one blank page with the configured
|
|
88
|
+
dimensions (`self.width`, `self.height`). This generation occurs only once.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
bytes: The byte stream of the generated blank PDF page.
|
|
92
|
+
"""
|
|
93
|
+
result = BytesIO()
|
|
94
|
+
|
|
95
|
+
canvas = Canvas(result, pagesize=(self.width, self.height))
|
|
96
|
+
canvas.showPage()
|
|
97
|
+
canvas.save()
|
|
98
|
+
result.seek(0)
|
|
99
|
+
|
|
100
|
+
return result.read()
|
PyPDFForm/constants.py
CHANGED
|
@@ -41,9 +41,8 @@ WIDGET_TYPES = Union[Text, Checkbox, Radio, Dropdown, Signature, Image]
|
|
|
41
41
|
|
|
42
42
|
DEPRECATION_NOTICE = "{} will be deprecated soon. Use {} instead."
|
|
43
43
|
|
|
44
|
+
Title = "/Title"
|
|
44
45
|
Annots = "/Annots"
|
|
45
|
-
A = "/A"
|
|
46
|
-
JS = "/JS"
|
|
47
46
|
T = "/T"
|
|
48
47
|
TU = "/TU"
|
|
49
48
|
Rect = "/Rect"
|
|
@@ -68,6 +67,21 @@ AS = "/AS"
|
|
|
68
67
|
Yes = "/Yes"
|
|
69
68
|
Off = "/Off"
|
|
70
69
|
|
|
70
|
+
# javascript
|
|
71
|
+
A = "/A"
|
|
72
|
+
AA = "/AA"
|
|
73
|
+
Action = "/Action"
|
|
74
|
+
S = "/S"
|
|
75
|
+
JavaScript = "/JavaScript"
|
|
76
|
+
JS = "/JS"
|
|
77
|
+
OpenAction = "/OpenAction"
|
|
78
|
+
E = "/E"
|
|
79
|
+
X = "/X"
|
|
80
|
+
D = "/D"
|
|
81
|
+
U = "/U"
|
|
82
|
+
Fo = "/Fo"
|
|
83
|
+
Bl = "/Bl"
|
|
84
|
+
|
|
71
85
|
# Font dict
|
|
72
86
|
Length = "/Length"
|
|
73
87
|
Length1 = "/Length1"
|
|
@@ -129,3 +143,7 @@ COORDINATE_GRID_FONT_SIZE_MARGIN_RATIO = DEFAULT_FONT_SIZE / 100
|
|
|
129
143
|
UNIQUE_SUFFIX_LENGTH = 20
|
|
130
144
|
|
|
131
145
|
SLASH = "/"
|
|
146
|
+
|
|
147
|
+
# blank page
|
|
148
|
+
BLANK_PAGE_DEFAULT_WIDTH = 612
|
|
149
|
+
BLANK_PAGE_DEFAULT_HEIGHT = 792
|
PyPDFForm/coordinate.py
CHANGED
|
@@ -6,8 +6,6 @@ This module provides functionality to generate coordinate grids on existing PDF
|
|
|
6
6
|
It allows developers to visualize the coordinate system of each page in a PDF, which can be helpful
|
|
7
7
|
for debugging and precisely positioning elements when filling or drawing on PDF forms.
|
|
8
8
|
"""
|
|
9
|
-
# TODO: The `PdfReader` object is initialized twice (lines 42 and implicitly within `create_watermarks_and_draw` if it re-reads the PDF). Consider initializing it once and passing the object or its relevant parts to avoid redundant parsing, especially for large PDFs.
|
|
10
|
-
# TODO: Drawing operations for lines and texts are performed and merged separately. It might be more efficient to combine all drawing operations for a page into a single `create_watermarks_and_draw` call or to merge all watermarks in one final step to reduce PDF processing overhead.
|
|
11
9
|
|
|
12
10
|
from typing import Tuple
|
|
13
11
|
|
|
@@ -44,7 +42,6 @@ def generate_coordinate_grid(
|
|
|
44
42
|
pdf_file = PdfReader(stream_to_io(pdf))
|
|
45
43
|
lines_by_page = {}
|
|
46
44
|
texts_by_page = {}
|
|
47
|
-
watermarks = []
|
|
48
45
|
|
|
49
46
|
for i, page in enumerate(pdf_file.pages):
|
|
50
47
|
lines_by_page[i + 1] = []
|
|
@@ -98,16 +95,15 @@ def generate_coordinate_grid(
|
|
|
98
95
|
y += margin
|
|
99
96
|
x += margin
|
|
100
97
|
|
|
98
|
+
to_draw = []
|
|
99
|
+
|
|
101
100
|
for page, lines in lines_by_page.items():
|
|
102
|
-
|
|
103
|
-
|
|
101
|
+
to_draw.extend(
|
|
102
|
+
[{"page_number": page, "type": "line", **line} for line in lines]
|
|
104
103
|
)
|
|
105
|
-
|
|
106
|
-
result = merge_watermarks_with_pdf(pdf, watermarks)
|
|
107
|
-
watermarks = []
|
|
108
104
|
for page, texts in texts_by_page.items():
|
|
109
|
-
|
|
110
|
-
|
|
105
|
+
to_draw.extend(
|
|
106
|
+
[{"page_number": page, "type": "text", **text} for text in texts]
|
|
111
107
|
)
|
|
112
108
|
|
|
113
|
-
return merge_watermarks_with_pdf(
|
|
109
|
+
return merge_watermarks_with_pdf(pdf, create_watermarks_and_draw(pdf, to_draw))
|
PyPDFForm/deprecation.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
A module for handling deprecation notices within the PyPDFForm library.
|
|
4
|
+
|
|
5
|
+
This module provides utility functions to issue standard DeprecationWarning
|
|
6
|
+
messages, ensuring consistency across the library when notifying users of
|
|
7
|
+
deprecated features.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from warnings import warn
|
|
11
|
+
|
|
12
|
+
from .constants import DEPRECATION_NOTICE
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def deprecation_notice(to_deprecate: str, to_replace: str) -> None:
|
|
16
|
+
"""
|
|
17
|
+
Issues a DeprecationWarning for a feature that is being deprecated.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
to_deprecate (str): The name of the feature or function being deprecated.
|
|
21
|
+
to_replace (str): The name of the feature or function that should be used instead.
|
|
22
|
+
"""
|
|
23
|
+
warn(
|
|
24
|
+
DEPRECATION_NOTICE.format(
|
|
25
|
+
to_deprecate,
|
|
26
|
+
to_replace,
|
|
27
|
+
),
|
|
28
|
+
DeprecationWarning, # noqa: PT030
|
|
29
|
+
stacklevel=3,
|
|
30
|
+
)
|
PyPDFForm/filler.py
CHANGED
|
@@ -7,11 +7,6 @@ It includes functions for handling various form field types, such as text fields
|
|
|
7
7
|
checkboxes, radio buttons, dropdowns, images, and signatures. The module also
|
|
8
8
|
supports flattening the filled form to prevent further modifications.
|
|
9
9
|
"""
|
|
10
|
-
# TODO: In `fill` function, `PdfReader(stream_to_io(template))` and `out.append(pdf)` might involve re-parsing or copying the entire PDF. For very large PDFs, consider if `pypdf` offers more efficient ways to modify in-place or stream processing.
|
|
11
|
-
# TODO: The `get_widget_key` function is called repeatedly in a loop. If its internal logic is complex, consider caching its results or optimizing its implementation to avoid redundant computations.
|
|
12
|
-
# TODO: The `signature_image_handler` function involves `get_image_dimensions` and `get_draw_image_resolutions`. If image processing is a bottleneck, consider optimizing these image-related operations, perhaps by using faster image libraries or pre-calculating dimensions if images are reused.
|
|
13
|
-
# TODO: Similar to `coordinate.py`, `get_drawn_stream` involves multiple `create_watermarks_and_draw` and `merge_watermarks_with_pdf` calls. Combining drawing operations or merging watermarks in a single pass could reduce overhead.
|
|
14
|
-
# TODO: The `radio_button_tracker` logic involves iterating through all radio buttons. For forms with many radio buttons, consider optimizing the lookup or update mechanism if performance becomes an issue.
|
|
15
10
|
|
|
16
11
|
from io import BytesIO
|
|
17
12
|
from typing import Dict, Union, cast
|
|
@@ -75,37 +70,10 @@ def signature_image_handler(
|
|
|
75
70
|
return any_image_to_draw
|
|
76
71
|
|
|
77
72
|
|
|
78
|
-
def get_drawn_stream(to_draw: dict, stream: bytes, action: str) -> bytes:
|
|
79
|
-
"""Applies watermarks to specific pages of a PDF based on the provided drawing instructions.
|
|
80
|
-
|
|
81
|
-
This function takes a dictionary of drawing instructions and applies watermarks to the
|
|
82
|
-
specified pages of a PDF. It iterates through the drawing instructions, creates watermarks
|
|
83
|
-
for each page, and merges the watermarks with the original PDF content. The function
|
|
84
|
-
supports various drawing actions, such as adding images or text.
|
|
85
|
-
|
|
86
|
-
Args:
|
|
87
|
-
to_draw (dict): A dictionary containing page numbers as keys and lists of drawing instructions as values.
|
|
88
|
-
Each drawing instruction specifies the type of drawing, position, dimensions, and content.
|
|
89
|
-
stream (bytes): The PDF content as bytes.
|
|
90
|
-
action (str): The type of action to perform (e.g., "image", "text").
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
bytes: The modified PDF content with watermarks applied.
|
|
94
|
-
"""
|
|
95
|
-
watermark_list = []
|
|
96
|
-
for page, stuffs in to_draw.items():
|
|
97
|
-
watermark_list.append(b"")
|
|
98
|
-
watermarks = create_watermarks_and_draw(stream, page, action, stuffs)
|
|
99
|
-
for i, watermark in enumerate(watermarks):
|
|
100
|
-
if watermark:
|
|
101
|
-
watermark_list[i] = watermark
|
|
102
|
-
|
|
103
|
-
return merge_watermarks_with_pdf(stream, watermark_list)
|
|
104
|
-
|
|
105
|
-
|
|
106
73
|
def fill(
|
|
107
74
|
template: bytes,
|
|
108
75
|
widgets: Dict[str, WIDGET_TYPES],
|
|
76
|
+
need_appearances: bool,
|
|
109
77
|
use_full_widget_name: bool,
|
|
110
78
|
flatten: bool = False,
|
|
111
79
|
) -> tuple:
|
|
@@ -121,6 +89,9 @@ def fill(
|
|
|
121
89
|
template (bytes): The PDF template as bytes.
|
|
122
90
|
widgets (Dict[str, WIDGET_TYPES]): A dictionary of widgets to fill, where the keys are the
|
|
123
91
|
widget names and the values are the widget objects.
|
|
92
|
+
need_appearances (bool): If True, skips updating the appearance stream (AP) for
|
|
93
|
+
text and dropdown fields to maintain compatibility with Adobe Reader's
|
|
94
|
+
behavior for certain fields.
|
|
124
95
|
use_full_widget_name (bool): Whether to use the full widget name when looking up widgets
|
|
125
96
|
in the `widgets` dictionary.
|
|
126
97
|
flatten (bool): Whether to flatten the filled PDF. Defaults to False.
|
|
@@ -130,6 +101,7 @@ def fill(
|
|
|
130
101
|
The image drawn stream is only returned if there are any image or signature widgets
|
|
131
102
|
in the form.
|
|
132
103
|
"""
|
|
104
|
+
# pylint: disable=R0912
|
|
133
105
|
pdf = PdfReader(stream_to_io(template))
|
|
134
106
|
out = PdfWriter()
|
|
135
107
|
out.append(pdf)
|
|
@@ -169,15 +141,24 @@ def fill(
|
|
|
169
141
|
if widget.value == radio_button_tracker[key] - 1:
|
|
170
142
|
update_radio_value(annot)
|
|
171
143
|
elif isinstance(widget, Dropdown):
|
|
172
|
-
update_dropdown_value(annot, widget)
|
|
144
|
+
update_dropdown_value(annot, widget, need_appearances)
|
|
173
145
|
elif isinstance(widget, Text):
|
|
174
|
-
update_text_value(annot, widget)
|
|
146
|
+
update_text_value(annot, widget, need_appearances)
|
|
175
147
|
|
|
176
148
|
with BytesIO() as f:
|
|
177
149
|
out.write(f)
|
|
178
150
|
f.seek(0)
|
|
179
151
|
result = f.read()
|
|
180
152
|
|
|
153
|
+
if not any_image_to_draw:
|
|
154
|
+
return result, None
|
|
155
|
+
|
|
156
|
+
images = []
|
|
157
|
+
for page, elements in images_to_draw.items():
|
|
158
|
+
images.extend(
|
|
159
|
+
[{"page_number": page, "type": "image", **element} for element in elements]
|
|
160
|
+
)
|
|
161
|
+
|
|
181
162
|
return result, (
|
|
182
|
-
|
|
163
|
+
merge_watermarks_with_pdf(result, create_watermarks_and_draw(result, images))
|
|
183
164
|
)
|
PyPDFForm/font.py
CHANGED
|
@@ -6,11 +6,6 @@ It includes functions for registering fonts with ReportLab and within the PDF's
|
|
|
6
6
|
allowing these fonts to be used when filling form fields. The module also provides utilities
|
|
7
7
|
for extracting font information from TTF streams and managing font names within a PDF.
|
|
8
8
|
"""
|
|
9
|
-
# TODO: In `get_additional_font_params`, iterating through `reader.pages[0][Resources][Font].values()` can be inefficient for PDFs with many fonts. Consider building a font lookup dictionary once per PDF or caching results if this function is called frequently with the same PDF.
|
|
10
|
-
# TODO: In `register_font_acroform`, `PdfReader(stream_to_io(pdf))` and `writer.append(reader)` involve re-parsing and appending the PDF. For large PDFs, passing `PdfReader` and `PdfWriter` objects directly could reduce overhead.
|
|
11
|
-
# TODO: In `register_font_acroform`, `compress(ttf_stream)` can be CPU-intensive. If the same font stream is registered multiple times within a single PDF processing session, consider caching the compressed stream to avoid redundant compression.
|
|
12
|
-
# TODO: In `get_new_font_name`, while `existing` is a set, if `n` needs to increment many times due to a dense range of existing font names, the `while` loop could be slow. However, this is likely a minor bottleneck in typical scenarios.
|
|
13
|
-
# TODO: In `get_all_available_fonts`, the `replace("/", "")` operation on `BaseFont` could be avoided if font names are consistently handled with or without the leading slash to prevent string manipulation overhead in a loop.
|
|
14
9
|
|
|
15
10
|
from functools import lru_cache
|
|
16
11
|
from io import BytesIO
|
|
@@ -99,7 +94,7 @@ def get_additional_font_params(pdf: bytes, base_font_name: str) -> tuple:
|
|
|
99
94
|
return font_descriptor_params, font_dict_params
|
|
100
95
|
|
|
101
96
|
|
|
102
|
-
def compute_font_glyph_widths(ttf_file: BytesIO, missing_width: float):
|
|
97
|
+
def compute_font_glyph_widths(ttf_file: BytesIO, missing_width: float) -> list[float]:
|
|
103
98
|
"""
|
|
104
99
|
Computes the advance widths for all glyphs in a TrueType font, scaled for PDF text space.
|
|
105
100
|
|
|
@@ -134,20 +129,23 @@ def compute_font_glyph_widths(ttf_file: BytesIO, missing_width: float):
|
|
|
134
129
|
widths: list[float] = []
|
|
135
130
|
if head_table and cmap_table and hmtx_table:
|
|
136
131
|
cmap = cmap_table.getBestCmap()
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
132
|
+
if cmap:
|
|
133
|
+
units_per_em: int = head_table.unitsPerEm or 1
|
|
134
|
+
|
|
135
|
+
for codepoint in range(ENCODING_TABLE_SIZE):
|
|
136
|
+
glyph_name: str = cmap.get(codepoint, FontNotdef)
|
|
137
|
+
advance_width, _ = hmtx_table[glyph_name]
|
|
138
|
+
pdf_width: float = (advance_width / units_per_em) * EM_TO_PDF_FACTOR
|
|
139
|
+
widths.append(pdf_width)
|
|
144
140
|
else:
|
|
145
141
|
widths: list[float] = [missing_width] * ENCODING_TABLE_SIZE
|
|
146
142
|
|
|
147
143
|
return widths
|
|
148
144
|
|
|
149
145
|
|
|
150
|
-
def register_font_acroform(
|
|
146
|
+
def register_font_acroform(
|
|
147
|
+
pdf: bytes, ttf_stream: bytes, need_appearances: bool
|
|
148
|
+
) -> tuple:
|
|
151
149
|
"""
|
|
152
150
|
Registers a TrueType font within the PDF's AcroForm dictionary.
|
|
153
151
|
|
|
@@ -160,7 +158,9 @@ def register_font_acroform(pdf: bytes, ttf_stream: bytes, adobe_mode: bool) -> t
|
|
|
160
158
|
will be modified to include the new font.
|
|
161
159
|
ttf_stream (bytes): The font file data in TTF format as bytes. This is the
|
|
162
160
|
raw data of the TrueType font file.
|
|
163
|
-
|
|
161
|
+
need_appearances (bool): If True, attempts to retrieve existing font parameters
|
|
162
|
+
from the PDF's resources to ensure compatibility when appearance streams are
|
|
163
|
+
required.
|
|
164
164
|
|
|
165
165
|
Returns:
|
|
166
166
|
tuple: A tuple containing the modified PDF data as bytes and the new font name
|
|
@@ -173,7 +173,7 @@ def register_font_acroform(pdf: bytes, ttf_stream: bytes, adobe_mode: bool) -> t
|
|
|
173
173
|
|
|
174
174
|
font_descriptor_params = {}
|
|
175
175
|
font_dict_params = {}
|
|
176
|
-
if
|
|
176
|
+
if need_appearances:
|
|
177
177
|
font_descriptor_params, font_dict_params = get_additional_font_params(
|
|
178
178
|
pdf, base_font_name
|
|
179
179
|
)
|