PyPI - PyPDFForm - Versions diffs - 2.5.0__py3-none-any.whl → 3.0.0__py3-none-any.whl - Mend

PyPDFForm 2.5.0py3-none-any.whl → 3.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of PyPDFForm might be problematic. Click here for more details.

Files changed (33) hide show

PyPDFForm/__init__.py +22 -6
PyPDFForm/adapter.py +28 -26
PyPDFForm/constants.py +29 -34
PyPDFForm/coordinate.py +23 -399
PyPDFForm/filler.py +79 -303
PyPDFForm/font.py +166 -164
PyPDFForm/hooks.py +109 -69
PyPDFForm/image.py +72 -22
PyPDFForm/middleware/base.py +42 -60
PyPDFForm/middleware/checkbox.py +27 -58
PyPDFForm/middleware/dropdown.py +41 -30
PyPDFForm/middleware/image.py +10 -22
PyPDFForm/middleware/radio.py +30 -31
PyPDFForm/middleware/signature.py +32 -47
PyPDFForm/middleware/text.py +54 -48
PyPDFForm/patterns.py +61 -106
PyPDFForm/template.py +80 -427
PyPDFForm/utils.py +142 -128
PyPDFForm/watermark.py +77 -208
PyPDFForm/widgets/base.py +57 -76
PyPDFForm/widgets/checkbox.py +18 -21
PyPDFForm/widgets/dropdown.py +18 -25
PyPDFForm/widgets/image.py +11 -9
PyPDFForm/widgets/radio.py +25 -35
PyPDFForm/widgets/signature.py +29 -40
PyPDFForm/widgets/text.py +18 -17
PyPDFForm/wrapper.py +351 -443
{pypdfform-2.5.0.dist-info → pypdfform-3.0.0.dist-info}/METADATA +6 -7
pypdfform-3.0.0.dist-info/RECORD +35 -0
{pypdfform-2.5.0.dist-info → pypdfform-3.0.0.dist-info}/WHEEL +1 -1
pypdfform-2.5.0.dist-info/RECORD +0 -35
{pypdfform-2.5.0.dist-info → pypdfform-3.0.0.dist-info}/licenses/LICENSE +0 -0
{pypdfform-2.5.0.dist-info → pypdfform-3.0.0.dist-info}/top_level.txt +0 -0

PyPDFForm/font.py CHANGED Viewed

@@ -1,43 +1,46 @@
 # -*- coding: utf-8 -*-
-"""Provides font handling utilities for PDF forms.
-This module contains functions for:
-- Registering custom fonts from TTF files
-- Extracting font information from PDF text appearances
-- Calculating font sizes based on widget dimensions
-- Adjusting font sizes to fit text within fields
-- Managing font colors and properties
 """
+This module provides functionalities for handling custom fonts within PDF documents.
+It includes functions for registering fonts with ReportLab and within the PDF's AcroForm,
+allowing these fonts to be used when filling form fields. The module also provides utilities
+for extracting font information from TTF streams and managing font names within a PDF.
+"""
+from functools import lru_cache
 from io import BytesIO
-from math import sqrt
-from re import findall
-from typing import Tuple, Union
-from reportlab.pdfbase.acroform import AcroForm
-from reportlab.pdfbase.pdfmetrics import (registerFont, standardFonts,
-                                          stringWidth)
+from pypdf import PdfReader, PdfWriter
+from pypdf.generic import (ArrayObject, DictionaryObject, NameObject,
+                           NumberObject, StreamObject)
+from reportlab.pdfbase.pdfmetrics import registerFont
 from reportlab.pdfbase.ttfonts import TTFError, TTFont
-from .constants import (DEFAULT_FONT, FONT_COLOR_IDENTIFIER,
-                        FONT_SIZE_IDENTIFIER, FONT_SIZE_REDUCE_STEP,
-                        MARGIN_BETWEEN_LINES, Rect)
-from .middleware.text import Text
-from .patterns import TEXT_FIELD_APPEARANCE_PATTERNS
-from .utils import extract_widget_property
+from .constants import (DR, FONT_NAME_PREFIX, AcroForm, BaseFont, Encoding,
+                        Fields, Font, FontDescriptor, FontFile2, FontName,
+                        Length1, Resources, Subtype, TrueType, Type,
+                        WinAnsiEncoding)
+from .utils import stream_to_io
+@lru_cache
 def register_font(font_name: str, ttf_stream: bytes) -> bool:
-    """Registers a TrueType font for use in PDF generation.
+    """
+    Registers a TrueType font with the ReportLab library.
+    This allows the font to be used for generating PDF documents with ReportLab.
     Args:
-        font_name: Name to register the font under
-        ttf_stream: TTF font data as bytes
+        font_name (str): The name to register the font under. This name will be used
+            to reference the font when creating PDF documents with ReportLab.
+        ttf_stream (bytes): The font file data in TTF format. This should be the raw
+            bytes of the TTF file.
     Returns:
-        bool: True if registration succeeded, False if failed
+        bool: True if the font was registered successfully, False otherwise.
+            Returns False if a TTFError occurs during registration, which usually
+            indicates an invalid TTF stream.
     """
     buff = BytesIO()
     buff.write(ttf_stream)
     buff.seek(0)
@@ -52,194 +55,193 @@ def register_font(font_name: str, ttf_stream: bytes) -> bool:
     return result
-def extract_font_from_text_appearance(text_appearance: str) -> Union[str, None]:
-    """Extracts font name from PDF text appearance string.
+def get_additional_font_params(pdf: bytes, base_font_name: str) -> tuple:
+    """
+    Retrieves additional font parameters from a PDF document for a given base font name.
-    Parses the font information embedded in PDF text field appearance strings.
+    This function searches the PDF's resources for a font dictionary matching the provided
+    base font name. If a match is found, it extracts the font descriptor parameters and
+    the font dictionary parameters. These parameters can be used to further describe
+    and define the font within the PDF.
     Args:
-        text_appearance: PDF text appearance string (/DA field)
+        pdf (bytes): The PDF file data as bytes.
+        base_font_name (str): The base font name to search for within the PDF's font resources.
     Returns:
-        Union[str, None]: Font name if found, None if not found
+        tuple: A tuple containing two dictionaries:
+            - font_descriptor_params (dict): A dictionary of font descriptor parameters.
+            - font_dict_params (dict): A dictionary of font dictionary parameters.
+            Returns empty dictionaries if the font is not found.
     """
+    font_descriptor_params = {}
+    font_dict_params = {}
+    reader = PdfReader(stream_to_io(pdf))
-    text_appearances = text_appearance.split(" ")
+    for font in reader.pages[0][Resources][Font].values():
+        if base_font_name.replace("/", "") in font[BaseFont]:
+            font_descriptor_params = dict(font[FontDescriptor])
+            font_dict_params = dict(font)
+            break
-    for each in text_appearances:
-        if each.startswith("/"):
-            text_segments = findall("[A-Z][^A-Z]*", each.replace("/", ""))
+    return font_descriptor_params, font_dict_params
-            if len(text_segments) == 1:
-                for k, v in AcroForm.formFontNames.items():
-                    if v == text_segments[0]:
-                        return k
-            for font in standardFonts:
-                font_segments = findall("[A-Z][^A-Z]*", font.replace("-", ""))
-                if len(font_segments) != len(text_segments):
-                    continue
-                found = True
-                for i, val in enumerate(font_segments):
-                    if not val.startswith(text_segments[i]):
-                        found = False
-                if found:
-                    return font
-    return None
-def auto_detect_font(widget: dict) -> str:
-    """Attempts to detect the font used in a PDF text field widget.
+def register_font_acroform(pdf: bytes, ttf_stream: bytes, adobe_mode: bool) -> tuple:
+    """
+    Registers a TrueType font within the PDF's AcroForm dictionary.
-    Falls back to DEFAULT_FONT if detection fails.
+    This allows the font to be used when filling form fields within the PDF.
+    The function adds the font as a resource to the PDF, making it available
+    for use in form fields.
     Args:
-        widget: PDF form widget dictionary
+        pdf (bytes): The PDF file data as bytes. This is the PDF document that
+            will be modified to include the new font.
+        ttf_stream (bytes): The font file data in TTF format as bytes. This is the
+            raw data of the TrueType font file.
+        adobe_mode (bool): A flag indicating whether to use Adobe-specific font parameters.
     Returns:
-        str: Detected font name or DEFAULT_FONT
+        tuple: A tuple containing the modified PDF data as bytes and the new font name
+            (str) that was assigned to the registered font within the PDF.
     """
+    base_font_name = get_base_font_name(ttf_stream)
+    reader = PdfReader(stream_to_io(pdf))
+    writer = PdfWriter()
+    writer.append(reader)
+    font_descriptor_params = {}
+    font_dict_params = {}
+    if adobe_mode:
+        font_descriptor_params, font_dict_params = get_additional_font_params(
+            pdf, base_font_name
+        )
-    text_appearance = extract_widget_property(
-        widget, TEXT_FIELD_APPEARANCE_PATTERNS, None, None
+    font_file_stream = StreamObject()
+    font_file_stream.set_data(ttf_stream)
+    font_file_stream.update(
+        {
+            NameObject(Length1): NumberObject(len(ttf_stream)),
+        }
+    )
+    font_file_ref = writer._add_object(font_file_stream)  # type: ignore # noqa: SLF001 # # pylint: disable=W0212
+    font_descriptor = DictionaryObject()
+    font_descriptor.update(
+        {
+            NameObject(Type): NameObject(FontDescriptor),
+            NameObject(FontName): NameObject(base_font_name),
+            NameObject(FontFile2): font_file_ref,
+        }
+    )
+    font_descriptor.update(
+        {k: v for k, v in font_descriptor_params.items() if k not in font_descriptor}
     )
+    font_descriptor_ref = writer._add_object(font_descriptor)  # type: ignore # noqa: SLF001 # # pylint: disable=W0212
+    font_dict = DictionaryObject()
+    font_dict.update(
+        {
+            NameObject(Type): NameObject(Font),
+            NameObject(Subtype): NameObject(TrueType),
+            NameObject(BaseFont): NameObject(base_font_name),
+            NameObject(FontDescriptor): font_descriptor_ref,
+            NameObject(Encoding): NameObject(WinAnsiEncoding),
+        }
+    )
+    font_dict.update({k: v for k, v in font_dict_params.items() if k not in font_dict})
+    font_dict_ref = writer._add_object(font_dict)  # type: ignore # noqa: SLF001 # # pylint: disable=W0212
-    if not text_appearance:
-        return DEFAULT_FONT
+    if AcroForm not in writer._root_object:  # type: ignore # noqa: SLF001 # # pylint: disable=W0212
+        writer._root_object[NameObject(AcroForm)] = DictionaryObject({NameObject(Fields): ArrayObject([])})  # type: ignore # noqa: SLF001 # # pylint: disable=W0212
+    acroform = writer._root_object[AcroForm]  # type: ignore # noqa: SLF001 # # pylint: disable=W0212
-    return extract_font_from_text_appearance(text_appearance) or DEFAULT_FONT
+    if DR not in acroform:
+        acroform[NameObject(DR)] = DictionaryObject()
+    dr = acroform[DR]
+    if Font not in dr:
+        dr[NameObject(Font)] = DictionaryObject()
+    fonts = dr[Font]
-def text_field_font_size(widget: dict) -> Union[float, int]:
-    """Calculates an appropriate font size based on text field dimensions.
+    new_font_name = get_new_font_name(fonts)
+    fonts[NameObject(new_font_name)] = font_dict_ref
-    Args:
-        widget: PDF form widget dictionary containing Rect coordinates
+    with BytesIO() as f:
+        writer.write(f)
+        f.seek(0)
+        return f.read(), new_font_name
-    Returns:
-        Union[float, int]: Suggested font size in points
-    """
-    height = abs(float(widget[Rect][1]) - float(widget[Rect][3]))
-    return height * 2 / 3
+@lru_cache
+def get_base_font_name(ttf_stream: bytes) -> str:
+    """
+    Extracts the base font name from a TrueType font stream.
-def checkbox_radio_font_size(widget: dict) -> Union[float, int]:
-    """Calculates appropriate symbol size for checkbox/radio widgets.
+    This function parses the TTF stream to extract the font's face name,
+    which is used as the base font name. The result is cached using lru_cache
+    for performance.
     Args:
-        widget: PDF form widget dictionary containing Rect coordinates
+        ttf_stream (bytes): The font file data in TTF format.
     Returns:
-        Union[float, int]: Suggested symbol size in points
+        str: The base font name, prefixed with a forward slash.
     """
-    area = abs(float(widget[Rect][0]) - float(widget[Rect][2])) * abs(
-        float(widget[Rect][1]) - float(widget[Rect][3])
+    return (
+        f"/{TTFont(name='new_font', filename=stream_to_io(ttf_stream)).face.name.ustr}"
     )
-    return sqrt(area) * 72 / 96
+def get_new_font_name(fonts: dict) -> str:
+    """
+    Generates a new unique font name to avoid conflicts with existing fonts in the PDF.
-def get_text_field_font_size(widget: dict) -> Union[float, int]:
-    """Extracts font size from PDF text field appearance properties.
+    This function iterates through the existing fonts in the PDF and generates a new
+    font name with the prefix '/F' followed by a unique integer.
     Args:
-        widget: PDF form widget dictionary
+        fonts (dict): A dictionary of existing fonts in the PDF.
     Returns:
-        Union[float, int]: Font size in points if found, otherwise 0
+        str: A new unique font name.
     """
+    existing = set()
+    for key in fonts:
+        if isinstance(key, str) and key.startswith(FONT_NAME_PREFIX):
+            existing.add(int(key[2:]))
-    result = 0
-    text_appearance = extract_widget_property(
-        widget, TEXT_FIELD_APPEARANCE_PATTERNS, None, None
-    )
-    if text_appearance:
-        properties = text_appearance.split(" ")
-        for i, val in enumerate(properties):
-            if val.startswith(FONT_SIZE_IDENTIFIER):
-                return float(properties[i - 1])
+    n = 1
+    while n in existing:
+        n += 1
+    return f"{FONT_NAME_PREFIX}{n}"
-    return result
+@lru_cache
+def get_all_available_fonts(pdf: bytes) -> dict:
+    """
+    Retrieves all available fonts from a PDF document's AcroForm.
-def get_text_field_font_color(
-    widget: dict,
-) -> Union[Tuple[float, float, float], None]:
-    """Extracts font color from PDF text field appearance properties.
+    This function extracts the font resources from the PDF's AcroForm dictionary
+    and returns them as a dictionary.
     Args:
-        widget: PDF form widget dictionary
+        pdf (bytes): The PDF file data.
     Returns:
-        Union[Tuple[float, float, float], None]: RGB color tuple (0-1 range)
-            or black by default if not specified
+        dict: A dictionary of available fonts, where the keys are the font names
+            (without the leading slash) and the values are the corresponding font
+            identifiers in the PDF. Returns an empty dictionary if no fonts are found.
     """
+    reader = PdfReader(stream_to_io(pdf))
+    try:
+        fonts = reader.root_object[AcroForm][DR][Font]
+    except KeyError:
+        return {}
-    result = (0, 0, 0)
-    text_appearance = extract_widget_property(
-        widget, TEXT_FIELD_APPEARANCE_PATTERNS, None, None
-    )
-    if text_appearance:
-        if FONT_COLOR_IDENTIFIER not in text_appearance:
-            return result
-        text_appearance = text_appearance.split(" ")
-        for i, val in enumerate(text_appearance):
-            if val.startswith(FONT_COLOR_IDENTIFIER.replace(" ", "")):
-                result = (
-                    float(text_appearance[i - 3]),
-                    float(text_appearance[i - 2]),
-                    float(text_appearance[i - 1]),
-                )
-                break
+    result = {}
+    for key, value in fonts.items():
+        result[value[BaseFont].replace("/", "")] = key
     return result
-def adjust_paragraph_font_size(widget: dict, widget_middleware: Text) -> None:
-    """Dynamically reduces font size until text fits in paragraph field.
-    Args:
-        widget: PDF form widget dictionary
-        widget_middleware: Text middleware instance containing text properties
-    """
-    # pylint: disable=C0415, R0401
-    from .template import get_paragraph_lines
-    height = abs(float(widget[Rect][1]) - float(widget[Rect][3]))
-    while (
-        widget_middleware.font_size > FONT_SIZE_REDUCE_STEP
-        and len(widget_middleware.text_lines)
-        * (widget_middleware.font_size + MARGIN_BETWEEN_LINES)
-        > height
-    ):
-        widget_middleware.font_size -= FONT_SIZE_REDUCE_STEP
-        widget_middleware.text_lines = get_paragraph_lines(widget, widget_middleware)
-def adjust_text_field_font_size(widget: dict, widget_middleware: Text) -> None:
-    """Dynamically reduces font size until text fits in text field.
-    Args:
-        widget: PDF form widget dictionary
-        widget_middleware: Text middleware instance containing text properties
-    """
-    width = abs(float(widget[Rect][0]) - float(widget[Rect][2]))
-    while (
-        widget_middleware.font_size > FONT_SIZE_REDUCE_STEP
-        and stringWidth(
-            widget_middleware.value, widget_middleware.font, widget_middleware.font_size
-        )
-        > width
-    ):
-        widget_middleware.font_size -= FONT_SIZE_REDUCE_STEP

PyPDFForm 2.5.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

Potentially problematic release.

PyPDFForm 2.5.0py3-none-any.whl → 3.0.0py3-none-any.whl