PyPI - PyPDFForm - Versions diffs - 2.5.0__py3-none-any.whl → 3.0.1__py3-none-any.whl - Mend

PyPDFForm 2.5.0py3-none-any.whl → 3.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of PyPDFForm might be problematic. Click here for more details.

Files changed (33) hide show

PyPDFForm/__init__.py +22 -6
PyPDFForm/adapter.py +28 -26
PyPDFForm/constants.py +30 -35
PyPDFForm/coordinate.py +23 -399
PyPDFForm/filler.py +79 -303
PyPDFForm/font.py +166 -164
PyPDFForm/hooks.py +109 -69
PyPDFForm/image.py +72 -22
PyPDFForm/middleware/base.py +42 -60
PyPDFForm/middleware/checkbox.py +27 -58
PyPDFForm/middleware/dropdown.py +41 -30
PyPDFForm/middleware/image.py +10 -22
PyPDFForm/middleware/radio.py +30 -31
PyPDFForm/middleware/signature.py +32 -47
PyPDFForm/middleware/text.py +54 -48
PyPDFForm/patterns.py +61 -106
PyPDFForm/template.py +80 -427
PyPDFForm/utils.py +136 -128
PyPDFForm/watermark.py +77 -208
PyPDFForm/widgets/base.py +57 -76
PyPDFForm/widgets/checkbox.py +18 -21
PyPDFForm/widgets/dropdown.py +18 -25
PyPDFForm/widgets/image.py +11 -9
PyPDFForm/widgets/radio.py +25 -35
PyPDFForm/widgets/signature.py +29 -40
PyPDFForm/widgets/text.py +18 -17
PyPDFForm/wrapper.py +351 -443
{pypdfform-2.5.0.dist-info → pypdfform-3.0.1.dist-info}/METADATA +6 -7
pypdfform-3.0.1.dist-info/RECORD +35 -0
{pypdfform-2.5.0.dist-info → pypdfform-3.0.1.dist-info}/WHEEL +1 -1
pypdfform-2.5.0.dist-info/RECORD +0 -35
{pypdfform-2.5.0.dist-info → pypdfform-3.0.1.dist-info}/licenses/LICENSE +0 -0
{pypdfform-2.5.0.dist-info → pypdfform-3.0.1.dist-info}/top_level.txt +0 -0

PyPDFForm/utils.py CHANGED Viewed

@@ -1,13 +1,16 @@
 # -*- coding: utf-8 -*-
-"""Provides core utility functions for PDF form processing.
-This module contains general-purpose utilities used throughout PyPDFForm:
-- Stream/file handling conversions
-- Color space transformations
-- Widget preview generation
-- PDF merging and splitting
-- Pattern matching for PDF structures
-- Unique ID generation
+"""
+This module provides a collection of utility functions used throughout the PyPDFForm library.
+It includes functions for:
+- Converting byte streams to BinaryIO objects.
+- Removing all widgets (form fields) from a PDF.
+- Extracting the content stream of each page in a PDF.
+- Merging two PDFs into one.
+- Finding and traversing patterns within PDF widgets.
+- Extracting widget properties based on defined patterns.
+- Generating unique suffixes for internal use.
+- Enabling Adobe-specific settings in the PDF to ensure proper rendering of form fields.
 """
 from collections.abc import Callable
@@ -18,29 +21,28 @@ from string import ascii_letters, digits, punctuation
 from typing import Any, BinaryIO, List, Union
 from pypdf import PdfReader, PdfWriter
-from pypdf.generic import ArrayObject, DictionaryObject
-from reportlab.lib.colors import CMYKColor, Color
+from pypdf.generic import ArrayObject, DictionaryObject, NameObject
-from .constants import (BUTTON_STYLES, DEFAULT_CHECKBOX_STYLE, DEFAULT_FONT,
-                        DEFAULT_FONT_COLOR, DEFAULT_FONT_SIZE,
-                        DEFAULT_RADIO_STYLE, PREVIEW_FONT_COLOR,
-                        UNIQUE_SUFFIX_LENGTH, WIDGET_TYPES)
-from .middleware.checkbox import Checkbox
-from .middleware.radio import Radio
-from .middleware.text import Text
+from .constants import UNIQUE_SUFFIX_LENGTH, XFA, AcroForm, Annots, Root
 @lru_cache
 def stream_to_io(stream: bytes) -> BinaryIO:
-    """Converts a byte stream to a seekable binary IO object.
+    """
+    Converts a bytes stream to a BinaryIO object, which can be used by PyPDFForm.
+    This function takes a bytes stream as input and returns a BinaryIO object
+    that represents the same data. This is useful because PyPDFForm often
+    works with BinaryIO objects, so this function allows you to easily convert
+    a bytes stream to the correct format. The result is cached using lru_cache
+    for performance.
     Args:
-        stream: Input byte stream to convert
+        stream (bytes): The bytes stream to convert.
     Returns:
-        BinaryIO: Seekable file-like object containing the stream data
+        BinaryIO: A BinaryIO object representing the stream.
     """
     result = BytesIO()
     result.write(stream)
     result.seek(0)
@@ -48,101 +50,52 @@ def stream_to_io(stream: bytes) -> BinaryIO:
     return result
-def handle_color(color: Union[list, ArrayObject]) -> Union[Color, CMYKColor, None]:
-    """Converts PDF color specifications to reportlab color objects.
-    Supports:
-    - Grayscale (1 component)
-    - RGB (3 components)
-    - CMYK (4 components)
-    Args:
-        color: Color array from PDF specification
-    Returns:
-        Union[Color, CMYKColor, None]: Color object or None if invalid format
-    """
-    result = None
-    if len(color) == 1:
-        result = CMYKColor(black=1 - color[0])
-    elif len(color) == 3:
-        result = Color(red=color[0], green=color[1], blue=color[2])
-    elif len(color) == 4:
-        result = CMYKColor(
-            cyan=color[0], magenta=color[1], yellow=color[2], black=color[3]
-        )
-    return result
+@lru_cache
+def enable_adobe_mode(pdf: bytes) -> bytes:
+    """Enables Adobe-specific settings in the PDF to ensure proper rendering of form fields.
-def checkbox_radio_to_draw(
-    widget: Union[Checkbox, Radio], font_size: Union[float, int]
-) -> Text:
-    """Converts checkbox/radio widgets to text symbols for drawing.
+    This function modifies the PDF's AcroForm dictionary to include the `NeedAppearances` flag,
+    which forces Adobe Reader to generate appearance streams for form fields. It also handles
+    XFA (XML Forms Architecture) forms by removing the XFA entry from the AcroForm dictionary
+    if it exists, ensuring compatibility and proper rendering. This ensures that the form fields
+    are rendered correctly in Adobe Reader, especially when the form is filled programmatically.
     Args:
-        widget: Checkbox or Radio widget to convert
-        font_size: Size for the drawn symbol
+        pdf (bytes): The PDF content as bytes.
     Returns:
-        Text: Text widget configured to draw the appropriate symbol
+        bytes: The modified PDF content with Adobe mode enabled.
     """
+    reader = PdfReader(stream_to_io(pdf))
+    writer = PdfWriter()
-    new_widget = Text(
-        name=widget.name,
-        value="",
-    )
-    new_widget.font = DEFAULT_FONT
-    new_widget.font_size = font_size
-    new_widget.font_color = DEFAULT_FONT_COLOR
-    new_widget.value = BUTTON_STYLES.get(widget.button_style) or (
-        DEFAULT_CHECKBOX_STYLE if type(widget) is Checkbox else DEFAULT_RADIO_STYLE
-    )
-    return new_widget
+    if AcroForm in reader.trailer[Root] and XFA in reader.trailer[Root][AcroForm]:
+        del reader.trailer[Root][AcroForm][XFA]
+    writer.append(reader)
+    writer.set_need_appearances_writer()
-def preview_widget_to_draw(
-    widget_name: str, widget: WIDGET_TYPES, with_preview_text: bool
-) -> Text:
-    """Creates preview version of a widget showing field name/location.
+    with BytesIO() as f:
+        writer.write(f)
+        f.seek(0)
+        return f.read()
-    Args:
-        widget_name: Name of the widget to generate preview for
-        widget: Widget to generate preview for
-        with_preview_text: Whether to include field name in preview
-    Returns:
-        Text: Text widget configured for preview display
+@lru_cache
+def remove_all_widgets(pdf: bytes) -> bytes:
     """
+    Removes all widgets (form fields) from a PDF, effectively flattening the form.
-    new_widget = Text(
-        name=widget.name,
-        value="{" + f" {widget_name} " + "}" if with_preview_text else None,
-    )
-    new_widget.font = DEFAULT_FONT
-    new_widget.font_size = DEFAULT_FONT_SIZE
-    new_widget.font_color = PREVIEW_FONT_COLOR
-    new_widget.preview = with_preview_text
-    new_widget.border_color = handle_color([0, 0, 0])
-    new_widget.border_width = 1
-    new_widget.render_widget = True
-    return new_widget
-def remove_all_widgets(pdf: bytes) -> bytes:
-    """Removes all interactive form fields from a PDF document.
+    This function takes a PDF as a bytes stream, removes all of its interactive
+    form fields (widgets), and returns the modified PDF as a bytes stream. This
+    is useful for creating a non-interactive version of a PDF form.
     Args:
-        pdf: Input PDF as bytes
+        pdf (bytes): The PDF as a bytes stream.
     Returns:
-        bytes: Flattened PDF with form fields removed
+        bytes: The PDF with all widgets removed, as a bytes stream.
     """
     pdf_file = PdfReader(stream_to_io(pdf))
     result_stream = BytesIO()
     writer = PdfWriter()
@@ -157,15 +110,18 @@ def remove_all_widgets(pdf: bytes) -> bytes:
 def get_page_streams(pdf: bytes) -> List[bytes]:
-    """Splits a PDF into individual page streams.
+    """
+    Extracts the content stream of each page in a PDF as a list of byte streams.
+    This function takes a PDF as a bytes stream and returns a list of bytes streams,
+    where each element in the list represents the content stream of a page in the PDF.
     Args:
-        pdf: Input PDF as bytes
+        pdf (bytes): The PDF as a bytes stream.
     Returns:
-        List[bytes]: List where each element contains a single PDF page
+        List[bytes]: A list of bytes streams, one for each page.
     """
     pdf_file = PdfReader(stream_to_io(pdf))
     result = []
@@ -181,16 +137,20 @@ def get_page_streams(pdf: bytes) -> List[bytes]:
 def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
-    """Combines two PDF documents into a single multipage PDF.
+    """
+    Merges two PDF files into a single PDF file.
+    This function takes two PDF files as byte streams, merges them, and returns the result as a single PDF byte stream.
+    It handles the merging of pages from both PDFs and also attempts to preserve form field widgets from both input PDFs
+    in the final merged PDF. The form fields are cloned and added to the output pages.
     Args:
-        pdf: First PDF as bytes
-        other: Second PDF as bytes
+        pdf (bytes): The first PDF file as a byte stream.
+        other (bytes): The second PDF file as a byte stream.
     Returns:
-        bytes: Combined PDF containing all pages from both inputs
+        bytes: The merged PDF file as a byte stream.
     """
     output = PdfWriter()
     pdf_file = PdfReader(stream_to_io(pdf))
     other_file = PdfReader(stream_to_io(other))
@@ -203,20 +163,52 @@ def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
     output.write(result)
     result.seek(0)
+    merged_no_widgets = PdfReader(stream_to_io(remove_all_widgets(result.read())))
+    output = PdfWriter()
+    output.append(merged_no_widgets)
+    # TODO: refactor duplicate logic with copy_watermark_widgets
+    widgets_to_copy = {}
+    for i, page in enumerate(pdf_file.pages):
+        widgets_to_copy[i] = []
+        for annot in page.get(Annots, []):
+            widgets_to_copy[i].append(annot.clone(output))
+    for i, page in enumerate(other_file.pages):
+        widgets_to_copy[i + len(pdf_file.pages)] = []
+        for annot in page.get(Annots, []):
+            widgets_to_copy[i + len(pdf_file.pages)].append(annot.clone(output))
+    for i, page in enumerate(output.pages):
+        page[NameObject(Annots)] = (
+            (page[NameObject(Annots)] + ArrayObject(widgets_to_copy[i]))
+            if Annots in page
+            else ArrayObject(widgets_to_copy[i])
+        )
+    result = BytesIO()
+    output.write(result)
+    result.seek(0)
     return result.read()
 def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) -> bool:
-    """Tests whether a widget matches the specified PDF attribute pattern.
+    """
+    Recursively finds a pattern match within a PDF widget (annotation dictionary).
+    This function searches for a specific pattern within a PDF widget's properties.
+    It recursively traverses the widget's dictionary, comparing keys and values
+    to the provided pattern.
     Args:
-        pattern: Dictionary of PDF attributes and expected values
-        widget: PDF widget to test against the pattern
+        pattern (dict): The pattern to search for, represented as a dictionary.
+        widget (Union[dict, DictionaryObject]): The widget to search within, which
+            can be a dictionary or a DictionaryObject.
     Returns:
-        bool: True if widget matches all pattern criteria
+        bool: True if a match is found, False otherwise.
     """
     for key, value in widget.items():
         result = False
         if key in pattern:
@@ -238,16 +230,21 @@ def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) ->
 def traverse_pattern(
     pattern: dict, widget: Union[dict, DictionaryObject]
 ) -> Union[str, list, None]:
-    """Recursively searches a widget for a matching pattern and returns its value.
+    """
+    Recursively traverses a pattern within a PDF widget (annotation dictionary) and returns the value.
+    This function searches for a specific pattern within a PDF widget's properties.
+    It recursively traverses the widget's dictionary, comparing keys and values
+    to the provided pattern and returns the value if the pattern is True.
     Args:
-        pattern: Dictionary of PDF attributes specifying the search path
-        widget: PDF widget to search through
+        pattern (dict): The pattern to traverse, represented as a dictionary.
+        widget (Union[dict, DictionaryObject]): The widget to traverse within, which
+            can be a dictionary or a DictionaryObject.
     Returns:
-        Union[str, list, None]: Found value or None if not matched
+        Union[str, list, None]: The value found, or None if not found.
     """
     for key, value in widget.items():
         result = None
         if key in pattern:
@@ -270,18 +267,25 @@ def extract_widget_property(
     default_value: Any,
     func_before_return: Union[Callable, None],
 ) -> Any:
-    """Extracts a widget property using pattern matching with fallback.
+    """
+    Extracts a specific property from a PDF widget based on a list of patterns.
+    This function iterates through a list of patterns, attempting to find a match
+    within the provided widget. If a match is found, the corresponding value is
+    extracted and returned. If no match is found, a default value is returned.
     Args:
-        widget: PDF widget dictionary to examine
-        patterns: List of patterns to try in order
-        default_value: Value to return if no patterns match
-        func_before_return: Optional function to transform the extracted value
+        widget (Union[dict, DictionaryObject]): The widget to extract the property from.
+        patterns (list): A list of patterns to search for. Each pattern should be a
+            dictionary representing the structure of the property to extract.
+        default_value (Any): The default value to return if no pattern is found.
+        func_before_return (Union[Callable, None]): An optional function to call before
+            returning the extracted value. This can be used to perform additional
+            processing or formatting on the value.
     Returns:
-        Any: Extracted property value or default_value
+        Any: The extracted property value, or the default value if no pattern is found.
     """
     result = default_value
     for pattern in patterns:
@@ -294,12 +298,16 @@ def extract_widget_property(
 def generate_unique_suffix() -> str:
-    """Generates a random string for disambiguating field names during merging.
+    """
+    Generates a unique suffix string for internal use, such as to avoid naming conflicts.
+    This function creates a random string of characters with a predefined length
+    (UNIQUE_SUFFIX_LENGTH) using a combination of ASCII letters, digits, and
+    punctuation characters (excluding hyphens).
     Returns:
-        str: Random string containing letters, digits and symbols
+        str: A unique suffix string.
     """
     return "".join(
         [
             choice(ascii_letters + digits + punctuation.replace("-", ""))

PyPDFForm 2.5.0__py3-none-any.whl → 3.0.1__py3-none-any.whl

Potentially problematic release.

PyPDFForm 2.5.0py3-none-any.whl → 3.0.1py3-none-any.whl