PyPI - PyPDFForm - Versions diffs - 3.1.3__py3-none-any.whl → 3.3.0__py3-none-any.whl - Mend

PyPDFForm 3.1.3py3-none-any.whl → 3.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of PyPDFForm might be problematic. Click here for more details.

Files changed (27) hide show

PyPDFForm/__init__.py +1 -1
PyPDFForm/adapter.py +2 -1
PyPDFForm/constants.py +1 -0
PyPDFForm/coordinate.py +2 -0
PyPDFForm/filler.py +5 -0
PyPDFForm/font.py +5 -0
PyPDFForm/hooks.py +51 -1
PyPDFForm/image.py +3 -0
PyPDFForm/middleware/base.py +4 -0
PyPDFForm/middleware/dropdown.py +48 -0
PyPDFForm/middleware/signature.py +1 -0
PyPDFForm/patterns.py +4 -0
PyPDFForm/template.py +5 -0
PyPDFForm/utils.py +8 -0
PyPDFForm/watermark.py +7 -0
PyPDFForm/widgets/base.py +2 -0
PyPDFForm/widgets/checkbox.py +2 -1
PyPDFForm/widgets/radio.py +1 -0
PyPDFForm/widgets/signature.py +9 -0
PyPDFForm/widgets/text.py +2 -1
PyPDFForm/wrapper.py +27 -0
{pypdfform-3.1.3.dist-info → pypdfform-3.3.0.dist-info}/METADATA +3 -3
pypdfform-3.3.0.dist-info/RECORD +35 -0
pypdfform-3.1.3.dist-info/RECORD +0 -35
{pypdfform-3.1.3.dist-info → pypdfform-3.3.0.dist-info}/WHEEL +0 -0
{pypdfform-3.1.3.dist-info → pypdfform-3.3.0.dist-info}/licenses/LICENSE +0 -0
{pypdfform-3.1.3.dist-info → pypdfform-3.3.0.dist-info}/top_level.txt +0 -0

PyPDFForm/__init__.py CHANGED Viewed

@@ -20,7 +20,7 @@ The library supports various PDF form features, including:
 PyPDFForm aims to simplify PDF form manipulation, making it accessible to developers of all skill levels.
 """
-__version__ = "3.1.3"
+__version__ = "3.3.0"
 from .middleware.text import Text  # exposing for setting global font attrs
 from .wrapper import PdfWrapper

PyPDFForm/adapter.py CHANGED Viewed

@@ -9,6 +9,7 @@ filling operations, where the input PDF template can be provided in different
 forms. The module ensures that the input is properly converted into a byte
 stream before further processing.
 """
+# TODO: For large PDF files, reading the entire file into memory using `_file.read()` in `fp_or_f_obj_or_stream_to_stream` can be inefficient. Consider streaming or chunking if downstream processing allows.
 from os.path import isfile
 from typing import Any, BinaryIO, Union
@@ -63,6 +64,6 @@ def fp_or_f_obj_or_stream_to_stream(
         if not isfile(fp_or_f_obj_or_stream):
             pass
         else:
-            with open(fp_or_f_obj_or_stream, "rb+") as _file:
+            with open(fp_or_f_obj_or_stream, "rb") as _file:
                 result = _file.read()
     return result

PyPDFForm/constants.py CHANGED Viewed

@@ -95,6 +95,7 @@ XFA = "/XFA"
 # Field flag bits
 READ_ONLY = 1 << 0
+REQUIRED = 1 << 1
 MULTILINE = 1 << 12
 COMB = 1 << 24

PyPDFForm/coordinate.py CHANGED Viewed

@@ -6,6 +6,8 @@ This module provides functionality to generate coordinate grids on existing PDF
 It allows developers to visualize the coordinate system of each page in a PDF, which can be helpful
 for debugging and precisely positioning elements when filling or drawing on PDF forms.
 """
+# TODO: The `PdfReader` object is initialized twice (lines 42 and implicitly within `create_watermarks_and_draw` if it re-reads the PDF). Consider initializing it once and passing the object or its relevant parts to avoid redundant parsing, especially for large PDFs.
+# TODO: Drawing operations for lines and texts are performed and merged separately. It might be more efficient to combine all drawing operations for a page into a single `create_watermarks_and_draw` call or to merge all watermarks in one final step to reduce PDF processing overhead.
 from typing import Tuple

PyPDFForm/filler.py CHANGED Viewed

@@ -7,6 +7,11 @@ It includes functions for handling various form field types, such as text fields
 checkboxes, radio buttons, dropdowns, images, and signatures. The module also
 supports flattening the filled form to prevent further modifications.
 """
+# TODO: In `fill` function, `PdfReader(stream_to_io(template))` and `out.append(pdf)` might involve re-parsing or copying the entire PDF. For very large PDFs, consider if `pypdf` offers more efficient ways to modify in-place or stream processing.
+# TODO: The `get_widget_key` function is called repeatedly in a loop. If its internal logic is complex, consider caching its results or optimizing its implementation to avoid redundant computations.
+# TODO: The `signature_image_handler` function involves `get_image_dimensions` and `get_draw_image_resolutions`. If image processing is a bottleneck, consider optimizing these image-related operations, perhaps by using faster image libraries or pre-calculating dimensions if images are reused.
+# TODO: Similar to `coordinate.py`, `get_drawn_stream` involves multiple `create_watermarks_and_draw` and `merge_watermarks_with_pdf` calls. Combining drawing operations or merging watermarks in a single pass could reduce overhead.
+# TODO: The `radio_button_tracker` logic involves iterating through all radio buttons. For forms with many radio buttons, consider optimizing the lookup or update mechanism if performance becomes an issue.
 from io import BytesIO
 from typing import Dict, Union, cast

PyPDFForm/font.py CHANGED Viewed

@@ -6,6 +6,11 @@ It includes functions for registering fonts with ReportLab and within the PDF's
 allowing these fonts to be used when filling form fields. The module also provides utilities
 for extracting font information from TTF streams and managing font names within a PDF.
 """
+# TODO: In `get_additional_font_params`, iterating through `reader.pages[0][Resources][Font].values()` can be inefficient for PDFs with many fonts. Consider building a font lookup dictionary once per PDF or caching results if this function is called frequently with the same PDF.
+# TODO: In `register_font_acroform`, `PdfReader(stream_to_io(pdf))` and `writer.append(reader)` involve re-parsing and appending the PDF. For large PDFs, passing `PdfReader` and `PdfWriter` objects directly could reduce overhead.
+# TODO: In `register_font_acroform`, `compress(ttf_stream)` can be CPU-intensive. If the same font stream is registered multiple times within a single PDF processing session, consider caching the compressed stream to avoid redundant compression.
+# TODO: In `get_new_font_name`, while `existing` is a set, if `n` needs to increment many times due to a dense range of existing font names, the `while` loop could be slow. However, this is likely a minor bottleneck in typical scenarios.
+# TODO: In `get_all_available_fonts`, the `replace("/", "")` operation on `BaseFont` could be avoided if font names are consistently handled with or without the leading slash to prevent string manipulation overhead in a loop.
 from functools import lru_cache
 from io import BytesIO

PyPDFForm/hooks.py CHANGED Viewed

@@ -8,6 +8,10 @@ of checkbox and radio button widgets. It also provides functions for flattening
 generic and radio button widgets. These hooks are triggered during the PDF form
 filling process, allowing for customization of the form's appearance and behavior.
 """
+# TODO: In `trigger_widget_hooks`, the PDF is read and written in each call. If this function is part of a larger workflow, consider passing `PdfReader` and `PdfWriter` objects to avoid redundant parsing and writing, allowing modifications to be accumulated and written once.
+# TODO: String manipulations (split/join) in `update_text_field_font`, `update_text_field_font_size`, and `update_text_field_font_color` could be optimized for very long `DA` strings, potentially using more efficient string manipulation techniques or regex if the structure is consistent.
+# TODO: The `get_widget_key` function is called in a loop within `trigger_widget_hooks`. If its internal logic is complex, consider caching its results or optimizing its implementation to avoid redundant computations.
+# TODO: In `flatten_radio` and `flatten_generic`, `annot.get(NameObject(Ff), 0)` is called twice within the conditional. Store this value in a local variable to avoid redundant dictionary lookups.
 import sys
 from io import BytesIO
@@ -18,7 +22,8 @@ from pypdf.generic import (ArrayObject, DictionaryObject, FloatObject,
                            NameObject, NumberObject, TextStringObject)
 from .constants import (COMB, DA, FONT_COLOR_IDENTIFIER, FONT_SIZE_IDENTIFIER,
-                        MULTILINE, READ_ONLY, Annots, Ff, Opt, Parent, Q, Rect)
+                        MULTILINE, READ_ONLY, REQUIRED, TU, Annots, Ff, Opt,
+                        Parent, Q, Rect)
 from .template import get_widget_key
 from .utils import stream_to_io
@@ -325,3 +330,48 @@ def flatten_generic(annot: DictionaryObject, val: bool) -> None:
                 else int(annot.get(NameObject(Ff), 0)) & ~READ_ONLY
             )
         )
+def update_field_tooltip(annot: DictionaryObject, val: str) -> None:
+    """
+    Updates the tooltip (alternate field name) of a form field annotation.
+    This function sets the 'TU' entry in the annotation dictionary, which
+    provides a text string that can be used as a tooltip for the field.
+    Args:
+        annot (DictionaryObject): The annotation dictionary for the form field.
+        val (str): The new tooltip string for the field.
+    """
+    if val:
+        annot[NameObject(TU)] = TextStringObject(val)
+def update_field_required(annot: DictionaryObject, val: bool) -> None:
+    """
+    Updates the 'Required' flag of a form field annotation.
+    This function modifies the Ff (flags) entry in the annotation dictionary
+    (or its parent if applicable) to set or unset the 'Required' flag,
+    making the field mandatory or optional.
+    Args:
+        annot (DictionaryObject): The annotation dictionary for the form field.
+        val (bool): True to set the field as required, False to make it optional.
+    """
+    if Parent in annot and Ff not in annot:
+        annot[NameObject(Parent)][NameObject(Ff)] = NumberObject(
+            (
+                int(annot.get(NameObject(Ff), 0)) | REQUIRED
+                if val
+                else int(annot.get(NameObject(Ff), 0)) & ~REQUIRED
+            )
+        )
+    else:
+        annot[NameObject(Ff)] = NumberObject(
+            (
+                int(annot.get(NameObject(Ff), 0)) | REQUIRED
+                if val
+                else int(annot.get(NameObject(Ff), 0)) & ~REQUIRED
+            )
+        )

PyPDFForm/image.py CHANGED Viewed

@@ -6,6 +6,9 @@ It includes functions for rotating images, retrieving image dimensions, and
 calculating the resolutions for drawing an image on a PDF page, taking into
 account whether to preserve the aspect ratio.
 """
+# TODO: In `rotate_image` and `get_image_dimensions`, `BytesIO` is used to wrap the image stream. While necessary for PIL, consider if the `image_stream` is already a file-like object in some calling contexts, which could avoid redundant copying to `BytesIO`.
+# TODO: The `rotate_image` function creates a new `BytesIO` object and saves the image to it. For multiple rotations or image manipulations, consider keeping the `PIL.Image.Image` object in memory and performing operations on it directly before a final save to bytes, to avoid repeated I/O operations.
+# TODO: The `get_image_dimensions` function opens the image to get its size. If image dimensions are frequently needed for the same image, consider caching the dimensions to avoid re-opening and re-parsing the image data.
 from io import BytesIO
 from typing import Tuple, Union

PyPDFForm/middleware/base.py CHANGED Viewed

@@ -23,6 +23,8 @@ class Widget:
     SET_ATTR_TRIGGER_HOOK_MAP = {
         "readonly": "flatten_generic",
+        "required": "update_field_required",
+        "tooltip": "update_field_tooltip",
     }
     def __init__(
@@ -41,7 +43,9 @@ class Widget:
         self._name = name
         self._value = value
         self.desc: str = None
+        self.tooltip: str = None  # TODO: sync tooltip and desc
         self.readonly: bool = None
+        self.required: bool = None
         self.hooks_to_trigger: list = []
     def __setattr__(self, name: str, value: Any) -> None:

PyPDFForm/middleware/dropdown.py CHANGED Viewed

@@ -55,6 +55,54 @@ class Dropdown(Widget):
         self.font: str = None
         self.choices: Union[tuple, list] = None
+    @property
+    def value(self) -> int:
+        """
+        Gets the current value of the dropdown.
+        Returns:
+            int: The index of the selected choice.
+        """
+        return super().value
+    @value.setter
+    def value(self, value: Union[str, int]) -> None:
+        """
+        Sets the value of the dropdown.
+        If the value is a string, it attempts to find the corresponding
+        index in the choices list. If not found, the string value is
+        added to the choices, and its new index is used.
+        Args:
+            value (Union[str, int]): The value to set. Can be a string
+                                      (option text) or an integer (index).
+        """
+        if isinstance(value, str):
+            index = self._get_option_index(value)
+            if index is None:
+                self.choices = list(self.choices) + [value]
+                index = len(self.choices) - 1
+            value = index
+        self._value = value
+    def _get_option_index(self, value: str) -> Union[int, None]:
+        """
+        Gets the index of a given option value in the dropdown's choices.
+        Args:
+            value (str): The option value to search for.
+        Returns:
+            Union[int, None]: The index of the option if found, otherwise None.
+        """
+        for i, each in enumerate(self.choices):
+            if value == each:
+                return i
+        return None
     @property
     def schema_definition(self) -> dict:
         """

PyPDFForm/middleware/signature.py CHANGED Viewed

@@ -6,6 +6,7 @@ This module defines the Signature class, which is a subclass of the
 Widget class. It represents a signature form field in a PDF document,
 allowing users to add their signature as an image.
 """
+# TODO: In the `stream` property, `fp_or_f_obj_or_stream_to_stream` is called every time the property is accessed. If the signature image is large or the property is accessed frequently, consider caching the result of `fp_or_f_obj_or_stream_to_stream` to avoid redundant file reads.
 from os.path import expanduser
 from typing import Union

PyPDFForm/patterns.py CHANGED Viewed

@@ -7,6 +7,10 @@ checkboxes, radio buttons, dropdowns, images, and signatures) based on their
 properties in the PDF's annotation dictionary. It also provides utility functions
 for updating these widgets.
 """
+# TODO: The `WIDGET_TYPE_PATTERNS` list is iterated through to determine widget types. For very large numbers of annotations or complex pattern matching, consider optimizing this lookup, perhaps by pre-compiling regexes or using a more efficient data structure if the patterns allow.
+# TODO: In `update_checkbox_value` and `update_radio_value`, iterating through `annot[AP][N]` to find the correct appearance state might be slow if `N` contains many entries. If possible, a direct lookup or a more optimized search could improve performance.
+# TODO: In `update_dropdown_value`, the list comprehension for `ArrayObject` can be computationally intensive for dropdowns with many choices, as it creates new `TextStringObject` and `ArrayObject` instances for each choice. Consider optimizing this if dropdowns have a very large number of options.
+# TODO: The `get_checkbox_value` and `get_radio_value` functions involve dictionary lookups and comparisons. While generally fast, repeated calls in a tight loop for many widgets could accumulate overhead.
 from typing import Union

PyPDFForm/template.py CHANGED Viewed

@@ -7,6 +7,11 @@ in PDF form templates. It leverages the pypdf library for PDF manipulation
 and defines specific patterns for identifying and constructing different
 types of widgets.
 """
+# TODO: In `build_widgets`, the `get_widgets_by_page` function is called, which then iterates through pages and annotations. For very large PDFs, this initial parsing and iteration can be a bottleneck. Consider optimizing the widget extraction process if possible, perhaps by using a more direct method to access annotations if `pypdf` allows.
+# TODO: The `construct_widget` function iterates through `WIDGET_TYPE_PATTERNS` for each widget. If there are many patterns or many widgets, this repeated iteration could be optimized by pre-compiling patterns or using a more efficient lookup mechanism.
+# TODO: In `get_widget_key`, the recursive call for `Parent` can lead to deep recursion for deeply nested widgets, potentially impacting performance or hitting recursion limits for extremely complex forms. Consider an iterative approach if deep nesting is common.
+# TODO: In `update_widget_keys`, the nested loops iterating through `old_keys`, `out.pages`, and `page.get(Annots, [])` can be very inefficient for large numbers of keys, pages, or annotations. Consider creating a lookup structure for annotations by key to avoid repeated linear scans.
+# TODO: In `update_widget_keys`, `PdfReader(stream_to_io(template))` and `out.append(pdf)` involve re-parsing and appending the PDF. For large PDFs, passing `PdfReader` and `PdfWriter` objects directly could reduce overhead.
 from functools import lru_cache
 from io import BytesIO

PyPDFForm/utils.py CHANGED Viewed

@@ -12,6 +12,14 @@ It includes functions for:
 - Generating unique suffixes for internal use.
 - Enabling Adobe-specific settings in the PDF to ensure proper rendering of form fields.
 """
+# TODO: In `enable_adobe_mode`, `PdfReader(stream_to_io(pdf))` and `writer.append(reader)` involve re-parsing and appending the PDF. For large PDFs, passing `PdfReader` and `PdfWriter` objects directly could reduce overhead.
+# TODO: In `remove_all_widgets`, `PdfReader(stream_to_io(pdf))` and iterating through pages to add them to a new writer can be inefficient for large PDFs. Consider if `pypdf` offers a more direct way to remove annotations without reconstructing the entire PDF.
+# TODO: In `get_page_streams`, `PdfReader(stream_to_io(pdf))` and then creating a new `PdfWriter` for each page can be very inefficient. It would be more performant to iterate through the pages of a single `PdfReader` and extract their content streams directly if possible, or to use a single `PdfWriter` to extract multiple pages.
+# TODO: In `merge_two_pdfs`, the function reads and writes PDFs multiple times (`PdfReader`, `PdfWriter`, `remove_all_widgets`, then another `PdfReader` and `PdfWriter`). This is highly inefficient. The PDF objects should be passed around and modified in-place as much as possible, with a single final write operation.
+# TODO: The `merge_two_pdfs` function has a `TODO: refactor duplicate logic with copy_watermark_widgets` comment. This indicates a potential for code duplication and inefficiency. Refactoring this to a shared helper function would improve maintainability and potentially performance.
+# TODO: In `find_pattern_match` and `traverse_pattern`, the recursive nature and repeated dictionary lookups (`widget.items()`, `value.get_object()`) can be slow for deeply nested or complex widget structures. Consider optimizing these traversals, perhaps by pre-flattening the widget dictionary or using a more direct access method if `pypdf` allows.
+# TODO: In `extract_widget_property`, the loop iterates through `patterns` and calls `traverse_pattern` for each. If `patterns` is long or `traverse_pattern` is expensive, this could be a bottleneck. Consider optimizing the pattern matching or lookup.
+# TODO: `generate_unique_suffix` uses `choice` in a loop. While generally fast, for extremely high call volumes, pre-generating a pool of characters or using a faster random string generation method might offer minor improvements.
 from collections.abc import Callable
 from functools import lru_cache

PyPDFForm/watermark.py CHANGED Viewed

@@ -7,6 +7,13 @@ It supports drawing text, lines, and images as watermarks.
 The module also includes functions to merge these watermarks with the original PDF content
 and to copy specific widgets from the watermarks to the original PDF.
 """
+# TODO: In `draw_image`, `ImageReader(image_buff)` is created for each image. If the same image is drawn multiple times, consider caching `ImageReader` objects or passing pre-processed image data to avoid redundant processing.
+# TODO: In `create_watermarks_and_draw`, `PdfReader(stream_to_io(pdf))` is called, which re-parses the PDF. If this function is called repeatedly for the same PDF, consider passing the `PdfReader` object directly to avoid redundant parsing.
+# TODO: In `create_watermarks_and_draw`, the function returns a list of watermarks where only one element is populated. This can be inefficient for memory if there are many pages but only one watermark is created. Consider returning only the created watermark and its page number, and let the caller handle placement.
+# TODO: In `merge_watermarks_with_pdf`, `PdfReader(stream_to_io(pdf))` and `PdfReader(stream_to_io(watermarks[i]))` are called in a loop. This leads to repeated parsing of the base PDF and each watermark. It would be more efficient to parse the base PDF once and then merge watermark pages directly into the existing `PdfWriter` object.
+# TODO: In `copy_watermark_widgets`, the function reads the PDF and watermarks multiple times. Similar to `merge_watermarks_with_pdf`, optimize by parsing the base PDF and watermarks once and then manipulating the `PdfWriter` object.
+# TODO: The `copy_watermark_widgets` function has a `TODO: refactor duplicate logic with merge_two_pdfs` comment. This indicates a potential for code duplication and inefficiency. Refactoring this to a shared helper function would improve maintainability and potentially performance.
+# TODO: In `copy_watermark_widgets`, the nested loops iterating through `watermarks`, `watermark_file.pages`, and `page.get(Annots, [])` can be very inefficient for large numbers of watermarks, pages, or annotations. Consider creating a lookup structure for annotations by key to avoid repeated linear scans.
 from io import BytesIO
 from typing import List, Union

PyPDFForm/widgets/base.py CHANGED Viewed

@@ -7,6 +7,8 @@ such as text fields, checkboxes, and radio buttons. The Widget class handles
 basic properties like name, page number, and coordinates, and provides methods
 for rendering the widget on a PDF page.
 """
+# TODO: In `watermarks`, `PdfReader(stream_to_io(stream))` is called, which re-parses the PDF for each widget. If multiple widgets are being processed, consider passing the `PdfReader` object directly to avoid redundant parsing.
+# TODO: In `watermarks`, the list comprehension `[watermark.read() if i == self.page_number - 1 else b"" for i in range(page_count)]` creates a new `BytesIO` object and reads from it for each widget. If many widgets are created, this could be optimized by creating the `BytesIO` object once and passing it around, or by directly returning the watermark bytes and its page number.
 from io import BytesIO
 from typing import List, Union

PyPDFForm/widgets/checkbox.py CHANGED Viewed

@@ -25,6 +25,7 @@ class CheckBoxWidget(Widget):
     """
     USER_PARAMS = [
+        ("tooltip", "tooltip"),
         ("button_style", "buttonStyle"),
         ("tick_color", "textColor"),
         ("bg_color", "fillColor"),
@@ -32,5 +33,5 @@ class CheckBoxWidget(Widget):
         ("border_width", "borderWidth"),
     ]
     COLOR_PARAMS = ["tick_color", "bg_color", "border_color"]
-    ALLOWED_HOOK_PARAMS = ["size"]
+    ALLOWED_HOOK_PARAMS = ["required", "size"]
     ACRO_FORM_FUNC = "checkbox"

PyPDFForm/widgets/radio.py CHANGED Viewed

@@ -4,6 +4,7 @@ This module defines the RadioWidget class, which is a subclass of the
 CheckBoxWidget class. It represents a radio button form field in a PDF
 document.
 """
+# TODO: In `canvas_operations`, `self.acro_form_params.copy()` creates a shallow copy of the dictionary in each iteration of the loop. For a large number of radio buttons, this repeated copying can be inefficient. Consider modifying the dictionary in place and then reverting changes if necessary, or restructuring the data to avoid repeated copying.
 from typing import List

PyPDFForm/widgets/signature.py CHANGED Viewed

@@ -5,6 +5,9 @@ representing signature fields in a PDF form. It handles the creation and
 rendering of signature widgets, as well as the integration of signatures
 into the PDF document.
 """
+# TODO: In `watermarks`, `PdfReader(stream_to_io(BEDROCK_PDF))` is called every time the method is invoked. If `BEDROCK_PDF` is static, consider parsing it once and caching the `PdfReader` object to avoid redundant I/O and parsing.
+# TODO: In `watermarks`, the list comprehension `[f.read() if i == self.page_number - 1 else b"" for i in range(page_count)]` reads the entire `BytesIO` object `f` multiple times if `page_count` is large. Read `f` once into a variable and then use that variable in the list comprehension.
+# TODO: The `input_pdf` is created in `watermarks` but only its page count is used. If the `PdfReader` object is not needed for other operations, consider a lighter way to get the page count or pass the `PdfReader` object from the caller if it's already available.
 from io import BytesIO
 from typing import List
@@ -31,6 +34,8 @@ class SignatureWidget:
     Attributes:
         OPTIONAL_PARAMS (list): A list of tuples, where each tuple contains the
             parameter name and its default value.
+        ALLOWED_HOOK_PARAMS (list): A list of parameter names that can be
+            used as hooks to trigger dynamic modifications.
         BEDROCK_WIDGET_TO_COPY (str): The name of the bedrock widget to copy.
     """
@@ -38,6 +43,7 @@ class SignatureWidget:
         ("width", 160),
         ("height", 90),
     ]
+    ALLOWED_HOOK_PARAMS = ["required", "tooltip"]
     BEDROCK_WIDGET_TO_COPY = "signature"
     def __init__(
@@ -68,6 +74,9 @@ class SignatureWidget:
         self.optional_params = {
             each[0]: kwargs.get(each[0], each[1]) for each in self.OPTIONAL_PARAMS
         }
+        for each in self.ALLOWED_HOOK_PARAMS:
+            if each in kwargs:
+                self.hook_params.append((each, kwargs.get(each)))
     def watermarks(self, stream: bytes) -> List[bytes]:
         """

PyPDFForm/widgets/text.py CHANGED Viewed

@@ -27,6 +27,7 @@ class TextWidget(Widget):
     """
     USER_PARAMS = [
+        ("tooltip", "tooltip"),
         ("width", "width"),
         ("height", "height"),
         ("font_size", "fontSize"),
@@ -37,6 +38,6 @@ class TextWidget(Widget):
         ("max_length", "maxlen"),
     ]
     COLOR_PARAMS = ["font_color", "bg_color", "border_color"]
-    ALLOWED_HOOK_PARAMS = ["alignment", "multiline", "comb", "font"]
+    ALLOWED_HOOK_PARAMS = ["required", "alignment", "multiline", "comb", "font"]
     NONE_DEFAULTS = ["max_length"]
     ACRO_FORM_FUNC = "textfield"

PyPDFForm/wrapper.py CHANGED Viewed

@@ -15,6 +15,17 @@ methods for interacting with its form fields and content. It leverages
 lower-level modules within the `PyPDFForm` library to handle the
 underlying PDF manipulation.
 """
+# TODO: The `__add__` method (merging PDFs) involves multiple `self.read()` and `other.read()` calls, leading to redundant PDF parsing. Consider optimizing by passing `PdfReader` objects directly or by performing a single read and then merging.
+# TODO: In `_init_helper`, `build_widgets` and `get_all_available_fonts` both call `self.read()`, causing the PDF to be parsed multiple times. Optimize by parsing the PDF once and passing the `PdfReader` object to these functions.
+# TODO: The `pages` property's implementation involves `get_page_streams(remove_all_widgets(self.read()))` and `copy_watermark_widgets(each, self.read(), None, i)`. This leads to excessive PDF parsing, widget removal, and copying for each page. Refactor to minimize PDF I/O operations, possibly by working with `pypdf` page objects directly.
+# TODO: The `read` method triggers `trigger_widget_hooks` and `enable_adobe_mode`, both of which can involve PDF parsing and writing. Since `read` is called frequently, this can be a performance bottleneck. Consider a more granular dirty-flag system to only apply changes when necessary, or accumulate changes and apply them in a single PDF write operation.
+# TODO: The `write` method calls `self.read()`, which in turn triggers all pending operations. This can lead to redundant processing if `read()` has already been called or if multiple `write()` calls are made.
+# TODO: In `change_version`, replacing a byte string in the entire PDF stream can be inefficient for very large PDFs. Consider if `pypdf` offers a more direct way to update the PDF version without full stream manipulation.
+# TODO: In `generate_coordinate_grid`, `self.read()` is called multiple times, and then `remove_all_widgets`, `generate_coordinate_grid`, and `copy_watermark_widgets` are called, all of which involve PDF parsing and manipulation. Optimize by minimizing PDF I/O and object re-creation.
+# TODO: In `fill`, `self.read()` is called, and then `fill` (from `filler.py`), `remove_all_widgets`, and `copy_watermark_widgets` are called. This is a major operation and likely a performance hotspot due to repeated PDF processing. Streamline the PDF modification workflow to reduce redundant parsing and writing.
+# TODO: In `create_widget`, `obj.watermarks(self.read())` and `copy_watermark_widgets(self.read(), watermarks, [name], None)` involve reading the PDF multiple times. Optimize by passing the PDF stream or `PdfReader` object more efficiently.
+# TODO: The `commit_widget_key_updates` method calls `update_widget_keys`, which involves re-parsing and writing the PDF. For bulk updates, consider a mechanism to apply all key changes in a single PDF modification operation.
+# TODO: General: Many methods repeatedly call `self.read()`, which re-parses the PDF. Consider maintaining a persistent `pypdf.PdfReader` and `pypdf.PdfWriter` object internally and only writing to a byte stream when explicitly requested (e.g., by the `read()` or `write()` methods) to avoid redundant I/O and parsing overhead.
 from __future__ import annotations
@@ -218,6 +229,22 @@ class PdfWrapper:
             },
         }
+    @property
+    def data(self) -> dict:
+        """
+        Returns a dictionary of the current data in the PDF form fields.
+        The keys of the dictionary are the form field names, and the values are
+        the current values of those fields. This property provides a convenient
+        way to extract all filled data from the PDF.
+        Returns:
+            dict: A dictionary where keys are form field names (str) and values are
+                  their corresponding data (Union[str, bool, int, None]).
+        """
+        return {key: value.value for key, value in self.widgets.items()}
     @property
     def sample_data(self) -> dict:
         """

{pypdfform-3.1.3.dist-info → pypdfform-3.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: PyPDFForm
-Version: 3.1.3
+Version: 3.3.0
 Summary: The Python library for PDF forms.
 Author: Jinge Li
 License-Expression: MIT
@@ -70,7 +70,7 @@ pip install PyPDFForm
 ## Quick Example
 ![Check out the GitHub repository for a live demo if you can't see it here.](https://github.com/chinapandaman/PyPDFForm/raw/master/docs/img/demo.gif)
-A sample PDF form can be found [here](https://github.com/chinapandaman/PyPDFForm/raw/master/pdf_samples/sample_template.pdf). Download it and try:
+A sample PDF form can be found [here](https://chinapandaman.github.io/PyPDFForm/pdfs/sample_template.pdf). Download it and try:
 ```python
 from PyPDFForm import PdfWrapper
@@ -90,7 +90,7 @@ filled.write("output.pdf")
 ```
 After running the above code snippet you can find `output.pdf` at the location you specified,
-and it should look like [this](https://github.com/chinapandaman/PyPDFForm/raw/master/pdf_samples/adobe_mode/sample_filled.pdf).
+and it should look like [this](https://chinapandaman.github.io/PyPDFForm/pdfs/sample_filled.pdf).
 ## Documentation

pypdfform-3.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,35 @@
+PyPDFForm/__init__.py,sha256=ToO1r5zbCDJutiMz36Y_5mpA35NPoRtwTAsABtKUGEQ,925
+PyPDFForm/adapter.py,sha256=LBxHth0qJFB6rdByRJbsn4x0dftCOAolKVutZeFZm9E,2634
+PyPDFForm/constants.py,sha256=Y5l1qIZGPsSoMl55bOsXaHf3yAY36_b-8KRxTLxXGmk,2541
+PyPDFForm/coordinate.py,sha256=veYOlRyFKIvzLISYA_f-drNBiKOzFwr0EIFCaUAzGgo,4428
+PyPDFForm/filler.py,sha256=fqGIxT3FR3cWo3SMTDYud6Ocs9SZBmSpFv5yg1v19Wk,8450
+PyPDFForm/font.py,sha256=opZjAacsIRFcERXWegPXkOSpmnRrv4y_50yD0_BjWPM,10273
+PyPDFForm/hooks.py,sha256=3ugnhnrB4nFsGL6fc1TtT5Nf_J2QOtM5ZQsm6WVpErY,14279
+PyPDFForm/image.py,sha256=P1P3Ejm8PVPQwpJFGAesjtwS5hxnVItrj75TE3WnFhM,4607
+PyPDFForm/patterns.py,sha256=HbTqzFllQ1cW3CqyNEfVh0qUMeFerbvOd0-HQnkifQQ,9765
+PyPDFForm/template.py,sha256=Jvx99HjLcEG8fZQeGSPZEFcITa4jauPSvenj3XgAf3c,11046
+PyPDFForm/utils.py,sha256=JavhAO4HmYRdujlsPXcZWGXTf7wDXzj4uU1XGRFsAaA,13257
+PyPDFForm/watermark.py,sha256=BJ8NeZLKf-MuJ2XusHiALaQpoqE8j6hHGbWcNhpjxN0,11299
+PyPDFForm/wrapper.py,sha256=KTFou6cXrHtLHVKwngoIr4Pwu4vOfjXY0cWRNNDlW0U,28866
+PyPDFForm/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+PyPDFForm/middleware/base.py,sha256=ZmJFh3nSxj6PFjqBqsLih0pXKtcm1o-ctJVWn0v6bbI,3278
+PyPDFForm/middleware/checkbox.py,sha256=OCSZEFD8wQG_Y9qO7Os6VXTaxJCpkRYTxI4wDgG0GZc,1870
+PyPDFForm/middleware/dropdown.py,sha256=pfiMuAOr3ze7eboCB55UKaSR89oLNhvHGvNmDGWHVS0,3855
+PyPDFForm/middleware/image.py,sha256=eKM7anU56jbaECnK6rq0jGsBRY3HW_fM86fgA3hq7xA,585
+PyPDFForm/middleware/radio.py,sha256=PuGDJ8RN1C-MkL9Jf14ABWYV67cN18R66dI4nR-03DU,2211
+PyPDFForm/middleware/signature.py,sha256=P6Mg9AZP5jML7GawsteVZjDaunKb9Yazu5iy0qF60bo,2432
+PyPDFForm/middleware/text.py,sha256=GLKuYvG4BUtNvj-3NkDeIlV1jcouhn7gAqfm9TBWduQ,3936
+PyPDFForm/widgets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+PyPDFForm/widgets/base.py,sha256=vudfjwlybj82pxQhy6K8Qds9osFqn219Ze3yMs8QQuU,5786
+PyPDFForm/widgets/bedrock.py,sha256=j6beU04kaQzpAIFZHI5VJLaDT5RVAAa6LzkU1luJpN8,137660
+PyPDFForm/widgets/checkbox.py,sha256=5Cg07d3SmRehkSiROtkK2vl0WKITxmv9BAKVnem8keM,1325
+PyPDFForm/widgets/dropdown.py,sha256=6zZwt6eU9Hgwl-57QfyT3G6c37FkQTJ-XSsXGluWevs,1459
+PyPDFForm/widgets/image.py,sha256=aSD-3MEZFIRL7HYVuO6Os8irfSUOLHA_rHGkqcEIPPA,855
+PyPDFForm/widgets/radio.py,sha256=oFw8Um4g414UH93QJv6dZHRxpq0yuYog09B2W3eE8wo,2612
+PyPDFForm/widgets/signature.py,sha256=L4Et6pxtrEh7U-lnnLZrnvb_dKwGNpI6TZ11HCD0lvY,5147
+PyPDFForm/widgets/text.py,sha256=GjPwajoP20dZMlJGhJrQtwOa4VHGInjYkjUYmLwtRWs,1584
+pypdfform-3.3.0.dist-info/licenses/LICENSE,sha256=43awmYkI6opyTpg19me731iO1WfXZwViqb67oWtCsFY,1065
+pypdfform-3.3.0.dist-info/METADATA,sha256=K7q2yHg1rUw5hwO4gnmgNB8fR-UjPWce-z-0YY7gbWU,4538
+pypdfform-3.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pypdfform-3.3.0.dist-info/top_level.txt,sha256=GQQKuWqPUjT9YZqwK95NlAQzxjwoQrsxQ8ureM8lWOY,10
+pypdfform-3.3.0.dist-info/RECORD,,

pypdfform-3.1.3.dist-info/RECORD DELETED Viewed

@@ -1,35 +0,0 @@
-PyPDFForm/__init__.py,sha256=Si2S1NESLXeTXj-HLtHdFCntkfhCP3WIG8wCgLdWkbk,925
-PyPDFForm/adapter.py,sha256=8E_PZlXU1ALWez_pWF_U52cynzowK_NQFYzMJoH9VUk,2428
-PyPDFForm/constants.py,sha256=GU0LcNbN-ttYQVVoFGQLysKByJYF4lKoMideU65z_wI,2523
-PyPDFForm/coordinate.py,sha256=VMVkqa-VAGJSGVvetZwOxeMzIgQtQdvtn_DI_qSecCE,3876
-PyPDFForm/filler.py,sha256=KwStL6YzrNBcDd919ig83MnAxopi8Vnz3QNJzN_CjNM,7272
-PyPDFForm/font.py,sha256=Nyk1dHgC9NBkXDTYiGz9eyCwHadpU-JjR-xOM604cpA,9053
-PyPDFForm/hooks.py,sha256=A8p67ubWvCvzRt346Q7BjOvbi4_NXBcynXmq6fJTadY,11679
-PyPDFForm/image.py,sha256=CAC69jEfSbWbyNJcjLhjWVSNJuFh7frMI70eaiFayHw,3823
-PyPDFForm/patterns.py,sha256=RiQKqsOMrB9u4KWj5Kv6GUmcuGI77xMvdOcOcHy_9qE,8717
-PyPDFForm/template.py,sha256=lKkja_8Sx6vun1tOklSpdNT1pdelhfVl10kX-G4sLlA,9673
-PyPDFForm/utils.py,sha256=hLSVUG6qnE0iTMB-yPNQQIhmm3R69X7fcnbCTDvSUQs,11001
-PyPDFForm/watermark.py,sha256=9p1tjaIqicXngTNai_iOEkCoXRYnR66azB4s7wNsZUw,9349
-PyPDFForm/wrapper.py,sha256=Ysd1mkvE5OfDkjUI6mNFIt16-JUKwj2ifbp1MioWPUo,25257
-PyPDFForm/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-PyPDFForm/middleware/base.py,sha256=zBO9YP01dAEfFKoHKDg10XcpXEuYdFd-pb5wSFmJJj0,3091
-PyPDFForm/middleware/checkbox.py,sha256=OCSZEFD8wQG_Y9qO7Os6VXTaxJCpkRYTxI4wDgG0GZc,1870
-PyPDFForm/middleware/dropdown.py,sha256=4HkVNHoYzH0isdBIdjNtViBx263j4KmYtW0SYzER5zQ,2412
-PyPDFForm/middleware/image.py,sha256=eKM7anU56jbaECnK6rq0jGsBRY3HW_fM86fgA3hq7xA,585
-PyPDFForm/middleware/radio.py,sha256=PuGDJ8RN1C-MkL9Jf14ABWYV67cN18R66dI4nR-03DU,2211
-PyPDFForm/middleware/signature.py,sha256=a2IfD36zpEWXWNNWRvtJ6nG6TszkF6Wil82Szsbjfns,2149
-PyPDFForm/middleware/text.py,sha256=GLKuYvG4BUtNvj-3NkDeIlV1jcouhn7gAqfm9TBWduQ,3936
-PyPDFForm/widgets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-PyPDFForm/widgets/base.py,sha256=kOfxV8HSmwXoy0vFYPgJKKDQ7RUNMACWfX6T4HeJeOU,5177
-PyPDFForm/widgets/bedrock.py,sha256=j6beU04kaQzpAIFZHI5VJLaDT5RVAAa6LzkU1luJpN8,137660
-PyPDFForm/widgets/checkbox.py,sha256=s4a0a1pAemQyrz3SpZHzIPoVLvJZAV72KEfxKp15dyk,1281
-PyPDFForm/widgets/dropdown.py,sha256=6zZwt6eU9Hgwl-57QfyT3G6c37FkQTJ-XSsXGluWevs,1459
-PyPDFForm/widgets/image.py,sha256=aSD-3MEZFIRL7HYVuO6Os8irfSUOLHA_rHGkqcEIPPA,855
-PyPDFForm/widgets/radio.py,sha256=nWSQQp06kRISO7Q7FVFeB3PXYvMOSc0SMhRs1bHTxeQ,2261
-PyPDFForm/widgets/signature.py,sha256=EqIRIuKSQEg8LJZ_Mu859eEvs0dwO-mzkMNuhHG1Vsg,4034
-PyPDFForm/widgets/text.py,sha256=gtheE6_w0vQPRJJ9oj_l9FaMDEGnPtvVR6_axsrmxKI,1540
-pypdfform-3.1.3.dist-info/licenses/LICENSE,sha256=43awmYkI6opyTpg19me731iO1WfXZwViqb67oWtCsFY,1065
-pypdfform-3.1.3.dist-info/METADATA,sha256=2CB-pD0wqfNYa7yfdRcwCsxqzv2nfXE7asKLaSkyT20,4587
-pypdfform-3.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pypdfform-3.1.3.dist-info/top_level.txt,sha256=GQQKuWqPUjT9YZqwK95NlAQzxjwoQrsxQ8ureM8lWOY,10
-pypdfform-3.1.3.dist-info/RECORD,,

{pypdfform-3.1.3.dist-info → pypdfform-3.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{pypdfform-3.1.3.dist-info → pypdfform-3.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pypdfform-3.1.3.dist-info → pypdfform-3.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

PyPDFForm 3.1.3__py3-none-any.whl → 3.3.0__py3-none-any.whl

Potentially problematic release.

PyPDFForm 3.1.3py3-none-any.whl → 3.3.0py3-none-any.whl