PyPDFForm 3.5.3__tar.gz → 3.5.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyPDFForm might be problematic. Click here for more details.

Files changed (52) hide show
  1. {pypdfform-3.5.3 → pypdfform-3.5.5}/PKG-INFO +4 -4
  2. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/__init__.py +1 -1
  3. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/adapter.py +0 -1
  4. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/coordinate.py +0 -2
  5. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/filler.py +0 -5
  6. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/font.py +0 -5
  7. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/hooks.py +18 -25
  8. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/image.py +0 -3
  9. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/middleware/base.py +3 -4
  10. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/middleware/signature.py +0 -1
  11. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/patterns.py +0 -4
  12. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/template.py +1 -6
  13. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/utils.py +0 -8
  14. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/watermark.py +0 -7
  15. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/base.py +29 -32
  16. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/radio.py +25 -27
  17. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/signature.py +20 -24
  18. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/wrapper.py +0 -11
  19. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm.egg-info/PKG-INFO +4 -4
  20. {pypdfform-3.5.3 → pypdfform-3.5.5}/README.md +1 -1
  21. {pypdfform-3.5.3 → pypdfform-3.5.5}/pyproject.toml +7 -2
  22. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_adobe_mode.py +3 -0
  23. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_create_widget.py +39 -0
  24. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_dropdown.py +11 -9
  25. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_font_widths.py +3 -1
  26. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_functional.py +20 -12
  27. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_paragraph.py +26 -24
  28. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_signature.py +9 -3
  29. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_widget_attr_trigger.py +26 -0
  30. {pypdfform-3.5.3 → pypdfform-3.5.5}/LICENSE +0 -0
  31. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/constants.py +0 -0
  32. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/middleware/__init__.py +0 -0
  33. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/middleware/checkbox.py +0 -0
  34. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/middleware/dropdown.py +0 -0
  35. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/middleware/image.py +0 -0
  36. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/middleware/radio.py +0 -0
  37. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/middleware/text.py +0 -0
  38. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/__init__.py +0 -0
  39. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/bedrock.py +0 -0
  40. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/checkbox.py +29 -29
  41. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/dropdown.py +37 -37
  42. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/image.py +16 -16
  43. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm/widgets/text.py +37 -37
  44. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm.egg-info/SOURCES.txt +0 -0
  45. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm.egg-info/dependency_links.txt +0 -0
  46. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm.egg-info/requires.txt +0 -0
  47. {pypdfform-3.5.3 → pypdfform-3.5.5}/PyPDFForm.egg-info/top_level.txt +0 -0
  48. {pypdfform-3.5.3 → pypdfform-3.5.5}/setup.cfg +0 -0
  49. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_extract_values.py +0 -0
  50. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_fill_max_length_text_field.py +0 -0
  51. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_fill_method.py +0 -0
  52. {pypdfform-3.5.3 → pypdfform-3.5.5}/tests/test_use_full_widget_name.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyPDFForm
3
- Version: 3.5.3
3
+ Version: 3.5.5
4
4
  Summary: The Python library for PDF forms.
5
5
  Author: Jinge Li
6
6
  License-Expression: MIT
@@ -10,14 +10,14 @@ Classifier: Development Status :: 5 - Production/Stable
10
10
  Classifier: Intended Audience :: Developers
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3 :: Only
13
- Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
16
15
  Classifier: Programming Language :: Python :: 3.12
17
16
  Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
18
  Classifier: Operating System :: OS Independent
19
19
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
- Requires-Python: >=3.9
20
+ Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: cryptography
@@ -49,7 +49,7 @@ Dynamic: license-file
49
49
  <a href="https://github.com/chinapandaman/PyPDFForm/actions/workflows/python-package.yml"><img src="https://img.shields.io/badge/coverage-100%25-green"></a>
50
50
  <a href="https://github.com/chinapandaman/PyPDFForm/raw/master/LICENSE"><img src="https://img.shields.io/github/license/chinapandaman/pypdfform?label=license&color=orange"></a>
51
51
  <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/pypi/pyversions/pypdfform?label=python&color=gold"></a>
52
- <a href="https://pepy.tech/projects/pypdfform"><img src="https://static.pepy.tech/badge/pypdfform/month"></a>
52
+ <a href="https://pypistats.org/packages/pypdfform"><img src="https://img.shields.io/pypi/dm/pypdfform?color=blue"></a>
53
53
  </p>
54
54
 
55
55
  ## Introduction
@@ -20,7 +20,7 @@ The library supports various PDF form features, including:
20
20
  PyPDFForm aims to simplify PDF form manipulation, making it accessible to developers of all skill levels.
21
21
  """
22
22
 
23
- __version__ = "3.5.3"
23
+ __version__ = "3.5.5"
24
24
 
25
25
  from .middleware.text import Text # exposing for setting global font attrs
26
26
  from .widgets import Fields
@@ -9,7 +9,6 @@ filling operations, where the input PDF template can be provided in different
9
9
  forms. The module ensures that the input is properly converted into a byte
10
10
  stream before further processing.
11
11
  """
12
- # TODO: For large PDF files, reading the entire file into memory using `_file.read()` in `fp_or_f_obj_or_stream_to_stream` can be inefficient. Consider streaming or chunking if downstream processing allows.
13
12
 
14
13
  from os.path import isfile
15
14
  from typing import Any, BinaryIO, Union
@@ -6,8 +6,6 @@ This module provides functionality to generate coordinate grids on existing PDF
6
6
  It allows developers to visualize the coordinate system of each page in a PDF, which can be helpful
7
7
  for debugging and precisely positioning elements when filling or drawing on PDF forms.
8
8
  """
9
- # TODO: The `PdfReader` object is initialized twice (lines 42 and implicitly within `create_watermarks_and_draw` if it re-reads the PDF). Consider initializing it once and passing the object or its relevant parts to avoid redundant parsing, especially for large PDFs.
10
- # TODO: Drawing operations for lines and texts are performed and merged separately. It might be more efficient to combine all drawing operations for a page into a single `create_watermarks_and_draw` call or to merge all watermarks in one final step to reduce PDF processing overhead.
11
9
 
12
10
  from typing import Tuple
13
11
 
@@ -7,11 +7,6 @@ It includes functions for handling various form field types, such as text fields
7
7
  checkboxes, radio buttons, dropdowns, images, and signatures. The module also
8
8
  supports flattening the filled form to prevent further modifications.
9
9
  """
10
- # TODO: In `fill` function, `PdfReader(stream_to_io(template))` and `out.append(pdf)` might involve re-parsing or copying the entire PDF. For very large PDFs, consider if `pypdf` offers more efficient ways to modify in-place or stream processing.
11
- # TODO: The `get_widget_key` function is called repeatedly in a loop. If its internal logic is complex, consider caching its results or optimizing its implementation to avoid redundant computations.
12
- # TODO: The `signature_image_handler` function involves `get_image_dimensions` and `get_draw_image_resolutions`. If image processing is a bottleneck, consider optimizing these image-related operations, perhaps by using faster image libraries or pre-calculating dimensions if images are reused.
13
- # TODO: Similar to `coordinate.py`, `get_drawn_stream` involves multiple `create_watermarks_and_draw` and `merge_watermarks_with_pdf` calls. Combining drawing operations or merging watermarks in a single pass could reduce overhead.
14
- # TODO: The `radio_button_tracker` logic involves iterating through all radio buttons. For forms with many radio buttons, consider optimizing the lookup or update mechanism if performance becomes an issue.
15
10
 
16
11
  from io import BytesIO
17
12
  from typing import Dict, Union, cast
@@ -6,11 +6,6 @@ It includes functions for registering fonts with ReportLab and within the PDF's
6
6
  allowing these fonts to be used when filling form fields. The module also provides utilities
7
7
  for extracting font information from TTF streams and managing font names within a PDF.
8
8
  """
9
- # TODO: In `get_additional_font_params`, iterating through `reader.pages[0][Resources][Font].values()` can be inefficient for PDFs with many fonts. Consider building a font lookup dictionary once per PDF or caching results if this function is called frequently with the same PDF.
10
- # TODO: In `register_font_acroform`, `PdfReader(stream_to_io(pdf))` and `writer.append(reader)` involve re-parsing and appending the PDF. For large PDFs, passing `PdfReader` and `PdfWriter` objects directly could reduce overhead.
11
- # TODO: In `register_font_acroform`, `compress(ttf_stream)` can be CPU-intensive. If the same font stream is registered multiple times within a single PDF processing session, consider caching the compressed stream to avoid redundant compression.
12
- # TODO: In `get_new_font_name`, while `existing` is a set, if `n` needs to increment many times due to a dense range of existing font names, the `while` loop could be slow. However, this is likely a minor bottleneck in typical scenarios.
13
- # TODO: In `get_all_available_fonts`, the `replace("/", "")` operation on `BaseFont` could be avoided if font names are consistently handled with or without the leading slash to prevent string manipulation overhead in a loop.
14
9
 
15
10
  from functools import lru_cache
16
11
  from io import BytesIO
@@ -8,10 +8,6 @@ of checkbox and radio button widgets. It also provides functions for flattening
8
8
  generic and radio button widgets. These hooks are triggered during the PDF form
9
9
  filling process, allowing for customization of the form's appearance and behavior.
10
10
  """
11
- # TODO: In `trigger_widget_hooks`, the PDF is read and written in each call. If this function is part of a larger workflow, consider passing `PdfReader` and `PdfWriter` objects to avoid redundant parsing and writing, allowing modifications to be accumulated and written once.
12
- # TODO: String manipulations (split/join) in `update_text_field_font`, `update_text_field_font_size`, and `update_text_field_font_color` could be optimized for very long `DA` strings, potentially using more efficient string manipulation techniques or regex if the structure is consistent.
13
- # TODO: The `get_widget_key` function is called in a loop within `trigger_widget_hooks`. If its internal logic is complex, consider caching its results or optimizing its implementation to avoid redundant computations.
14
- # TODO: In `flatten_radio` and `flatten_generic`, `annot.get(NameObject(Ff), 0)` is called twice within the conditional. Store this value in a local variable to avoid redundant dictionary lookups.
15
11
 
16
12
  import sys
17
13
  from io import BytesIO
@@ -216,9 +212,7 @@ def update_text_field_multiline(annot: DictionaryObject, val: bool) -> None:
216
212
  val (bool): True to enable multiline, False to disable.
217
213
  """
218
214
  if val:
219
- # TODO: investigate this more
220
- # may need to change everywhere how feature flags precedence work
221
- # https://github.com/chinapandaman/PyPDFForm/issues/1162#issuecomment-3326233842
215
+ # Ff in annot[Parent] only in hooks.py, or when editing instead of retrieving
222
216
  if Parent in annot and Ff in annot[Parent]:
223
217
  annot[NameObject(Parent)][NameObject(Ff)] = NumberObject(
224
218
  int(
@@ -247,7 +241,7 @@ def update_text_field_comb(annot: DictionaryObject, val: bool) -> None:
247
241
  val (bool): True to enable comb, False to disable.
248
242
  """
249
243
  if val:
250
- if Parent in annot and Ff not in annot:
244
+ if Parent in annot and Ff in annot[Parent]:
251
245
  annot[NameObject(Parent)][NameObject(Ff)] = NumberObject(
252
246
  int(
253
247
  annot[NameObject(Parent)][NameObject(Ff)]
@@ -367,7 +361,7 @@ def flatten_generic(annot: DictionaryObject, val: bool) -> None:
367
361
  annot (DictionaryObject): The annotation dictionary.
368
362
  val (bool): True to flatten (make read-only), False to unflatten (make editable).
369
363
  """
370
- if Parent in annot and Ff not in annot:
364
+ if Parent in annot and (Ff in annot[Parent] or Ff not in annot):
371
365
  annot[NameObject(Parent)][NameObject(Ff)] = NumberObject(
372
366
  (
373
367
  int(annot.get(NameObject(Ff), 0)) | READ_ONLY
@@ -412,20 +406,19 @@ def update_field_required(annot: DictionaryObject, val: bool) -> None:
412
406
  annot (DictionaryObject): The annotation dictionary for the form field.
413
407
  val (bool): True to set the field as required, False to make it optional.
414
408
  """
415
- # TODO: add a test case when supporting edit required
416
- # if Parent in annot and Ff not in annot:
417
- # annot[NameObject(Parent)][NameObject(Ff)] = NumberObject(
418
- # (
419
- # int(annot.get(NameObject(Ff), 0)) | REQUIRED
420
- # if val
421
- # else int(annot.get(NameObject(Ff), 0)) & ~REQUIRED
422
- # )
423
- # )
424
- # else:
425
- annot[NameObject(Ff)] = NumberObject(
426
- (
427
- int(annot.get(NameObject(Ff), 0)) | REQUIRED
428
- if val
429
- else int(annot.get(NameObject(Ff), 0)) & ~REQUIRED
409
+ if Parent in annot and Ff in annot[Parent]:
410
+ annot[NameObject(Parent)][NameObject(Ff)] = NumberObject(
411
+ (
412
+ int(annot.get(NameObject(Ff), 0)) | REQUIRED
413
+ if val
414
+ else int(annot.get(NameObject(Ff), 0)) & ~REQUIRED
415
+ )
416
+ )
417
+ else:
418
+ annot[NameObject(Ff)] = NumberObject(
419
+ (
420
+ int(annot.get(NameObject(Ff), 0)) | REQUIRED
421
+ if val
422
+ else int(annot.get(NameObject(Ff), 0)) & ~REQUIRED
423
+ )
430
424
  )
431
- )
@@ -6,9 +6,6 @@ It includes functions for rotating images, retrieving image dimensions, and
6
6
  calculating the resolutions for drawing an image on a PDF page, taking into
7
7
  account whether to preserve the aspect ratio.
8
8
  """
9
- # TODO: In `rotate_image` and `get_image_dimensions`, `BytesIO` is used to wrap the image stream. While necessary for PIL, consider if the `image_stream` is already a file-like object in some calling contexts, which could avoid redundant copying to `BytesIO`.
10
- # TODO: The `rotate_image` function creates a new `BytesIO` object and saves the image to it. For multiple rotations or image manipulations, consider keeping the `PIL.Image.Image` object in memory and performing operations on it directly before a final save to bytes, to avoid repeated I/O operations.
11
- # TODO: The `get_image_dimensions` function opens the image to get its size. If image dimensions are frequently needed for the same image, consider caching the dimensions to avoid re-opening and re-parsing the image data.
12
9
 
13
10
  from io import BytesIO
14
11
  from typing import Tuple, Union
@@ -42,8 +42,7 @@ class Widget:
42
42
  super().__init__()
43
43
  self._name = name
44
44
  self._value = value
45
- self.desc: str = None
46
- self.tooltip: str = None # TODO: sync tooltip and desc
45
+ self.tooltip: str = None
47
46
  self.readonly: bool = None
48
47
  self.required: bool = None
49
48
  self.hooks_to_trigger: list = []
@@ -107,8 +106,8 @@ class Widget:
107
106
  """
108
107
  result = {}
109
108
 
110
- if self.desc is not None:
111
- result["description"] = self.desc
109
+ if self.tooltip is not None:
110
+ result["description"] = self.tooltip
112
111
 
113
112
  return result
114
113
 
@@ -6,7 +6,6 @@ This module defines the Signature class, which is a subclass of the
6
6
  Widget class. It represents a signature form field in a PDF document,
7
7
  allowing users to add their signature as an image.
8
8
  """
9
- # TODO: In the `stream` property, `fp_or_f_obj_or_stream_to_stream` is called every time the property is accessed. If the signature image is large or the property is accessed frequently, consider caching the result of `fp_or_f_obj_or_stream_to_stream` to avoid redundant file reads.
10
9
 
11
10
  from os.path import expanduser
12
11
  from typing import Union
@@ -7,10 +7,6 @@ checkboxes, radio buttons, dropdowns, images, and signatures) based on their
7
7
  properties in the PDF's annotation dictionary. It also provides utility functions
8
8
  for updating these widgets.
9
9
  """
10
- # TODO: The `WIDGET_TYPE_PATTERNS` list is iterated through to determine widget types. For very large numbers of annotations or complex pattern matching, consider optimizing this lookup, perhaps by pre-compiling regexes or using a more efficient data structure if the patterns allow.
11
- # TODO: In `update_checkbox_value` and `update_radio_value`, iterating through `annot[AP][N]` to find the correct appearance state might be slow if `N` contains many entries. If possible, a direct lookup or a more optimized search could improve performance.
12
- # TODO: In `update_dropdown_value`, the list comprehension for `ArrayObject` can be computationally intensive for dropdowns with many choices, as it creates new `TextStringObject` and `ArrayObject` instances for each choice. Consider optimizing this if dropdowns have a very large number of options.
13
- # TODO: The `get_checkbox_value` and `get_radio_value` functions involve dictionary lookups and comparisons. While generally fast, repeated calls in a tight loop for many widgets could accumulate overhead.
14
10
 
15
11
  from typing import Union
16
12
 
@@ -7,11 +7,6 @@ in PDF form templates. It leverages the pypdf library for PDF manipulation
7
7
  and defines specific patterns for identifying and constructing different
8
8
  types of widgets.
9
9
  """
10
- # TODO: In `build_widgets`, the `get_widgets_by_page` function is called, which then iterates through pages and annotations. For very large PDFs, this initial parsing and iteration can be a bottleneck. Consider optimizing the widget extraction process if possible, perhaps by using a more direct method to access annotations if `pypdf` allows.
11
- # TODO: The `construct_widget` function iterates through `WIDGET_TYPE_PATTERNS` for each widget. If there are many patterns or many widgets, this repeated iteration could be optimized by pre-compiling patterns or using a more efficient lookup mechanism.
12
- # TODO: In `get_widget_key`, the recursive call for `Parent` can lead to deep recursion for deeply nested widgets, potentially impacting performance or hitting recursion limits for extremely complex forms. Consider an iterative approach if deep nesting is common.
13
- # TODO: In `update_widget_keys`, the nested loops iterating through `old_keys`, `out.pages`, and `page.get(Annots, [])` can be very inefficient for large numbers of keys, pages, or annotations. Consider creating a lookup structure for annotations by key to avoid repeated linear scans.
14
- # TODO: In `update_widget_keys`, `PdfReader(stream_to_io(template))` and `out.append(pdf)` involve re-parsing and appending the PDF. For large PDFs, passing `PdfReader` and `PdfWriter` objects directly could reduce overhead.
15
10
 
16
11
  from functools import lru_cache
17
12
  from io import BytesIO
@@ -62,7 +57,7 @@ def build_widgets(
62
57
  key = get_widget_key(widget, use_full_widget_name)
63
58
  _widget = construct_widget(widget, key)
64
59
  if _widget is not None:
65
- _widget.desc = extract_widget_property(
60
+ _widget.__dict__["tooltip"] = extract_widget_property(
66
61
  widget, WIDGET_DESCRIPTION_PATTERNS, None, str
67
62
  )
68
63
 
@@ -12,14 +12,6 @@ It includes functions for:
12
12
  - Generating unique suffixes for internal use.
13
13
  - Enabling Adobe-specific settings in the PDF to ensure proper rendering of form fields.
14
14
  """
15
- # TODO: In `enable_adobe_mode`, `PdfReader(stream_to_io(pdf))` and `writer.append(reader)` involve re-parsing and appending the PDF. For large PDFs, passing `PdfReader` and `PdfWriter` objects directly could reduce overhead.
16
- # TODO: In `remove_all_widgets`, `PdfReader(stream_to_io(pdf))` and iterating through pages to add them to a new writer can be inefficient for large PDFs. Consider if `pypdf` offers a more direct way to remove annotations without reconstructing the entire PDF.
17
- # TODO: In `get_page_streams`, `PdfReader(stream_to_io(pdf))` and then creating a new `PdfWriter` for each page can be very inefficient. It would be more performant to iterate through the pages of a single `PdfReader` and extract their content streams directly if possible, or to use a single `PdfWriter` to extract multiple pages.
18
- # TODO: In `merge_two_pdfs`, the function reads and writes PDFs multiple times (`PdfReader`, `PdfWriter`, `remove_all_widgets`, then another `PdfReader` and `PdfWriter`). This is highly inefficient. The PDF objects should be passed around and modified in-place as much as possible, with a single final write operation.
19
- # TODO: The `merge_two_pdfs` function has a `TODO: refactor duplicate logic with copy_watermark_widgets` comment. This indicates a potential for code duplication and inefficiency. Refactoring this to a shared helper function would improve maintainability and potentially performance.
20
- # TODO: In `find_pattern_match` and `traverse_pattern`, the recursive nature and repeated dictionary lookups (`widget.items()`, `value.get_object()`) can be slow for deeply nested or complex widget structures. Consider optimizing these traversals, perhaps by pre-flattening the widget dictionary or using a more direct access method if `pypdf` allows.
21
- # TODO: In `extract_widget_property`, the loop iterates through `patterns` and calls `traverse_pattern` for each. If `patterns` is long or `traverse_pattern` is expensive, this could be a bottleneck. Consider optimizing the pattern matching or lookup.
22
- # TODO: `generate_unique_suffix` uses `choice` in a loop. While generally fast, for extremely high call volumes, pre-generating a pool of characters or using a faster random string generation method might offer minor improvements.
23
15
 
24
16
  from collections.abc import Callable
25
17
  from functools import lru_cache
@@ -7,13 +7,6 @@ It supports drawing text, lines, and images as watermarks.
7
7
  The module also includes functions to merge these watermarks with the original PDF content
8
8
  and to copy specific widgets from the watermarks to the original PDF.
9
9
  """
10
- # TODO: In `draw_image`, `ImageReader(image_buff)` is created for each image. If the same image is drawn multiple times, consider caching `ImageReader` objects or passing pre-processed image data to avoid redundant processing.
11
- # TODO: In `create_watermarks_and_draw`, `PdfReader(stream_to_io(pdf))` is called, which re-parses the PDF. If this function is called repeatedly for the same PDF, consider passing the `PdfReader` object directly to avoid redundant parsing.
12
- # TODO: In `create_watermarks_and_draw`, the function returns a list of watermarks where only one element is populated. This can be inefficient for memory if there are many pages but only one watermark is created. Consider returning only the created watermark and its page number, and let the caller handle placement.
13
- # TODO: In `merge_watermarks_with_pdf`, `PdfReader(stream_to_io(pdf))` and `PdfReader(stream_to_io(watermarks[i]))` are called in a loop. This leads to repeated parsing of the base PDF and each watermark. It would be more efficient to parse the base PDF once and then merge watermark pages directly into the existing `PdfWriter` object.
14
- # TODO: In `copy_watermark_widgets`, the function reads the PDF and watermarks multiple times. Similar to `merge_watermarks_with_pdf`, optimize by parsing the base PDF and watermarks once and then manipulating the `PdfWriter` object.
15
- # TODO: The `copy_watermark_widgets` function has a `TODO: refactor duplicate logic with merge_two_pdfs` comment. This indicates a potential for code duplication and inefficiency. Refactoring this to a shared helper function would improve maintainability and potentially performance.
16
- # TODO: In `copy_watermark_widgets`, the nested loops iterating through `watermarks`, `watermark_file.pages`, and `page.get(Annots, [])` can be very inefficient for large numbers of watermarks, pages, or annotations. Consider creating a lookup structure for annotations by key to avoid repeated linear scans.
17
10
 
18
11
  from io import BytesIO
19
12
  from typing import List, Union
@@ -12,9 +12,6 @@ Classes:
12
12
  functionality for rendering and manipulation.
13
13
  """
14
14
 
15
- # TODO: In `watermarks`, `PdfReader(stream_to_io(stream))` is called, which re-parses the PDF for each widget. If multiple widgets are being processed, consider passing the `PdfReader` object directly to avoid redundant parsing.
16
- # TODO: In `watermarks`, the list comprehension `[watermark.read() if i == self.page_number - 1 else b"" for i in range(page_count)]` creates a new `BytesIO` object and reads from it for each widget. If many widgets are created, this could be optimized by creating the `BytesIO` object once and passing it around, or by directly returning the watermark bytes and its page number.
17
-
18
15
  from dataclasses import dataclass
19
16
  from inspect import signature
20
17
  from io import BytesIO
@@ -28,35 +25,6 @@ from ..constants import fieldFlags, required
28
25
  from ..utils import stream_to_io
29
26
 
30
27
 
31
- @dataclass
32
- class Field:
33
- """
34
- Base dataclass for all PDF form fields.
35
-
36
- This class defines the common properties that all types of form fields
37
- (e.g., text fields, checkboxes, radio buttons) share. Specific field types
38
- will extend this class to add their unique attributes.
39
-
40
- Attributes:
41
- name (str): The name of the form field. This is used to identify the
42
- field within the PDF document.
43
- page_number (int): The 1-based page number on which the field is located.
44
- x (float): The x-coordinate of the field's position on the page.
45
- y (float): The y-coordinate of the field's position on the page.
46
- required (Optional[bool]): Indicates whether the field is required to be
47
- filled by the user. Defaults to None, meaning not explicitly set.
48
- tooltip (Optional[str]): A tooltip message that appears when the user
49
- hovers over the field. Defaults to None.
50
- """
51
-
52
- name: str
53
- page_number: int
54
- x: float
55
- y: float
56
- required: Optional[bool] = None
57
- tooltip: Optional[str] = None
58
-
59
-
60
28
  class Widget:
61
29
  """
62
30
  Base class for all widgets in PyPDFForm.
@@ -222,3 +190,32 @@ class Widget:
222
190
  watermark.read() if i == self.page_number - 1 else b""
223
191
  for i in range(page_count)
224
192
  ]
193
+
194
+
195
+ @dataclass
196
+ class Field:
197
+ """
198
+ Base dataclass for all PDF form fields.
199
+
200
+ This class defines the common properties that all types of form fields
201
+ (e.g., text fields, checkboxes, radio buttons) share. Specific field types
202
+ will extend this class to add their unique attributes.
203
+
204
+ Attributes:
205
+ name (str): The name of the form field. This is used to identify the
206
+ field within the PDF document.
207
+ page_number (int): The 1-based page number on which the field is located.
208
+ x (float): The x-coordinate of the field's position on the page.
209
+ y (float): The y-coordinate of the field's position on the page.
210
+ required (Optional[bool]): Indicates whether the field is required to be
211
+ filled by the user. Defaults to None, meaning not explicitly set.
212
+ tooltip (Optional[str]): A tooltip message that appears when the user
213
+ hovers over the field. Defaults to None.
214
+ """
215
+
216
+ name: str
217
+ page_number: int
218
+ x: float
219
+ y: float
220
+ required: Optional[bool] = None
221
+ tooltip: Optional[str] = None
@@ -10,8 +10,6 @@ The `RadioWidget` class extends the base `CheckBoxWidget` class to provide
10
10
  specific functionality for interacting with radio button form fields in PDFs.
11
11
  """
12
12
 
13
- # TODO: In `canvas_operations`, `self.acro_form_params.copy()` creates a shallow copy of the dictionary in each iteration of the loop. For a large number of radio buttons, this repeated copying can be inefficient. Consider modifying the dictionary in place and then reverting changes if necessary, or restructuring the data to avoid repeated copying.
14
-
15
13
  from dataclasses import dataclass
16
14
  from typing import List, Optional
17
15
 
@@ -20,31 +18,6 @@ from reportlab.pdfgen.canvas import Canvas
20
18
  from .checkbox import CheckBoxField, CheckBoxWidget
21
19
 
22
20
 
23
- @dataclass
24
- class RadioGroup(CheckBoxField):
25
- """
26
- Represents a group of radio buttons in a PDF document.
27
-
28
- This dataclass extends the `CheckBoxField` base class and defines the specific
29
- attributes that can be configured for a radio button group. Unlike a single
30
- checkbox, a radio group allows for multiple positions (x, y coordinates)
31
- where individual radio buttons can be placed, but only one can be selected.
32
-
33
- Attributes:
34
- _field_type (str): The type of the field, fixed as "radio".
35
- x (List[float]): A list of x-coordinates for each radio button in the group.
36
- y (List[float]): A list of y-coordinates for each radio button in the group.
37
- shape (Optional[str]): The shape of the radio button. Valid values are
38
- "circle" or "square". Defaults to None, which typically means a default circle shape.
39
- """
40
-
41
- _field_type: str = "radio"
42
-
43
- x: List[float]
44
- y: List[float]
45
- shape: Optional[str] = None
46
-
47
-
48
21
  class RadioWidget(CheckBoxWidget):
49
22
  """
50
23
  Represents a radio button widget in a PDF form.
@@ -99,3 +72,28 @@ class RadioWidget(CheckBoxWidget):
99
72
  new_acro_form_params["y"] = y
100
73
  new_acro_form_params["value"] = str(i)
101
74
  getattr(canvas.acroForm, self.ACRO_FORM_FUNC)(**new_acro_form_params)
75
+
76
+
77
+ @dataclass
78
+ class RadioGroup(CheckBoxField):
79
+ """
80
+ Represents a group of radio buttons in a PDF document.
81
+
82
+ This dataclass extends the `CheckBoxField` base class and defines the specific
83
+ attributes that can be configured for a radio button group. Unlike a single
84
+ checkbox, a radio group allows for multiple positions (x, y coordinates)
85
+ where individual radio buttons can be placed, but only one can be selected.
86
+
87
+ Attributes:
88
+ _field_type (str): The type of the field, fixed as "radio".
89
+ x (List[float]): A list of x-coordinates for each radio button in the group.
90
+ y (List[float]): A list of y-coordinates for each radio button in the group.
91
+ shape (Optional[str]): The shape of the radio button. Valid values are
92
+ "circle" or "square". Defaults to None, which typically means a default circle shape.
93
+ """
94
+
95
+ _field_type: str = "radio"
96
+
97
+ x: List[float]
98
+ y: List[float]
99
+ shape: Optional[str] = None
@@ -11,10 +11,6 @@ signature form fields in PDFs, including handling their creation, rendering, and
11
11
  integration into the document.
12
12
  """
13
13
 
14
- # TODO: In `watermarks`, `PdfReader(stream_to_io(BEDROCK_PDF))` is called every time the method is invoked. If `BEDROCK_PDF` is static, consider parsing it once and caching the `PdfReader` object to avoid redundant I/O and parsing.
15
- # TODO: In `watermarks`, the list comprehension `[f.read() if i == self.page_number - 1 else b"" for i in range(page_count)]` reads the entire `BytesIO` object `f` multiple times if `page_count` is large. Read `f` once into a variable and then use that variable in the list comprehension.
16
- # TODO: The `input_pdf` is created in `watermarks` but only its page count is used. If the `PdfReader` object is not needed for other operations, consider a lighter way to get the page count or pass the `PdfReader` object from the caller if it's already available.
17
-
18
14
  from dataclasses import dataclass
19
15
  from io import BytesIO
20
16
  from typing import List, Optional
@@ -30,26 +26,6 @@ from .base import Field
30
26
  from .bedrock import BEDROCK_PDF
31
27
 
32
28
 
33
- @dataclass
34
- class SignatureField(Field):
35
- """
36
- Represents a signature field in a PDF document.
37
-
38
- This dataclass extends the `Field` base class and defines the specific
39
- attributes that can be configured for a signature input field.
40
-
41
- Attributes:
42
- _field_type (str): The type of the field, fixed as "signature".
43
- width (Optional[float]): The width of the signature field.
44
- height (Optional[float]): The height of the signature field.
45
- """
46
-
47
- _field_type: str = "signature"
48
-
49
- width: Optional[float] = None
50
- height: Optional[float] = None
51
-
52
-
53
29
  class SignatureWidget:
54
30
  """
55
31
  Represents a signature widget in a PDF form.
@@ -155,3 +131,23 @@ class SignatureWidget:
155
131
  f.read() if i == self.page_number - 1 else b""
156
132
  for i in range(page_count)
157
133
  ]
134
+
135
+
136
+ @dataclass
137
+ class SignatureField(Field):
138
+ """
139
+ Represents a signature field in a PDF document.
140
+
141
+ This dataclass extends the `Field` base class and defines the specific
142
+ attributes that can be configured for a signature input field.
143
+
144
+ Attributes:
145
+ _field_type (str): The type of the field, fixed as "signature".
146
+ width (Optional[float]): The width of the signature field.
147
+ height (Optional[float]): The height of the signature field.
148
+ """
149
+
150
+ _field_type: str = "signature"
151
+
152
+ width: Optional[float] = None
153
+ height: Optional[float] = None
@@ -15,17 +15,6 @@ methods for interacting with its form fields and content. It leverages
15
15
  lower-level modules within the `PyPDFForm` library to handle the
16
16
  underlying PDF manipulation.
17
17
  """
18
- # TODO: The `__add__` method (merging PDFs) involves multiple `self.read()` and `other.read()` calls, leading to redundant PDF parsing. Consider optimizing by passing `PdfReader` objects directly or by performing a single read and then merging.
19
- # TODO: In `_init_helper`, `build_widgets` and `get_all_available_fonts` both call `self.read()`, causing the PDF to be parsed multiple times. Optimize by parsing the PDF once and passing the `PdfReader` object to these functions.
20
- # TODO: The `pages` property's implementation involves `get_page_streams(remove_all_widgets(self.read()))` and `copy_watermark_widgets(each, self.read(), None, i)`. This leads to excessive PDF parsing, widget removal, and copying for each page. Refactor to minimize PDF I/O operations, possibly by working with `pypdf` page objects directly.
21
- # TODO: The `read` method triggers `trigger_widget_hooks` and `enable_adobe_mode`, both of which can involve PDF parsing and writing. Since `read` is called frequently, this can be a performance bottleneck. Consider a more granular dirty-flag system to only apply changes when necessary, or accumulate changes and apply them in a single PDF write operation.
22
- # TODO: The `write` method calls `self.read()`, which in turn triggers all pending operations. This can lead to redundant processing if `read()` has already been called or if multiple `write()` calls are made.
23
- # TODO: In `change_version`, replacing a byte string in the entire PDF stream can be inefficient for very large PDFs. Consider if `pypdf` offers a more direct way to update the PDF version without full stream manipulation.
24
- # TODO: In `generate_coordinate_grid`, `self.read()` is called multiple times, and then `remove_all_widgets`, `generate_coordinate_grid`, and `copy_watermark_widgets` are called, all of which involve PDF parsing and manipulation. Optimize by minimizing PDF I/O and object re-creation.
25
- # TODO: In `fill`, `self.read()` is called, and then `fill` (from `filler.py`), `remove_all_widgets`, and `copy_watermark_widgets` are called. This is a major operation and likely a performance hotspot due to repeated PDF processing. Streamline the PDF modification workflow to reduce redundant parsing and writing.
26
- # TODO: In `create_widget`, `obj.watermarks(self.read())` and `copy_watermark_widgets(self.read(), watermarks, [name], None)` involve reading the PDF multiple times. Optimize by passing the PDF stream or `PdfReader` object more efficiently.
27
- # TODO: The `commit_widget_key_updates` method calls `update_widget_keys`, which involves re-parsing and writing the PDF. For bulk updates, consider a mechanism to apply all key changes in a single PDF modification operation.
28
- # TODO: General: Many methods repeatedly call `self.read()`, which re-parses the PDF. Consider maintaining a persistent `pypdf.PdfReader` and `pypdf.PdfWriter` object internally and only writing to a byte stream when explicitly requested (e.g., by the `read()` or `write()` methods) to avoid redundant I/O and parsing overhead.
29
18
 
30
19
  from __future__ import annotations
31
20
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyPDFForm
3
- Version: 3.5.3
3
+ Version: 3.5.5
4
4
  Summary: The Python library for PDF forms.
5
5
  Author: Jinge Li
6
6
  License-Expression: MIT
@@ -10,14 +10,14 @@ Classifier: Development Status :: 5 - Production/Stable
10
10
  Classifier: Intended Audience :: Developers
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3 :: Only
13
- Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
16
15
  Classifier: Programming Language :: Python :: 3.12
17
16
  Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
18
  Classifier: Operating System :: OS Independent
19
19
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
- Requires-Python: >=3.9
20
+ Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: cryptography
@@ -49,7 +49,7 @@ Dynamic: license-file
49
49
  <a href="https://github.com/chinapandaman/PyPDFForm/actions/workflows/python-package.yml"><img src="https://img.shields.io/badge/coverage-100%25-green"></a>
50
50
  <a href="https://github.com/chinapandaman/PyPDFForm/raw/master/LICENSE"><img src="https://img.shields.io/github/license/chinapandaman/pypdfform?label=license&color=orange"></a>
51
51
  <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/pypi/pyversions/pypdfform?label=python&color=gold"></a>
52
- <a href="https://pepy.tech/projects/pypdfform"><img src="https://static.pepy.tech/badge/pypdfform/month"></a>
52
+ <a href="https://pypistats.org/packages/pypdfform"><img src="https://img.shields.io/pypi/dm/pypdfform?color=blue"></a>
53
53
  </p>
54
54
 
55
55
  ## Introduction
@@ -8,7 +8,7 @@
8
8
  <a href="https://github.com/chinapandaman/PyPDFForm/actions/workflows/python-package.yml"><img src="https://img.shields.io/badge/coverage-100%25-green"></a>
9
9
  <a href="https://github.com/chinapandaman/PyPDFForm/raw/master/LICENSE"><img src="https://img.shields.io/github/license/chinapandaman/pypdfform?label=license&color=orange"></a>
10
10
  <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/pypi/pyversions/pypdfform?label=python&color=gold"></a>
11
- <a href="https://pepy.tech/projects/pypdfform"><img src="https://static.pepy.tech/badge/pypdfform/month"></a>
11
+ <a href="https://pypistats.org/packages/pypdfform"><img src="https://img.shields.io/pypi/dm/pypdfform?color=blue"></a>
12
12
  </p>
13
13
 
14
14
  ## Introduction
@@ -17,15 +17,15 @@ classifiers = [
17
17
  "Intended Audience :: Developers",
18
18
  "Programming Language :: Python :: 3",
19
19
  "Programming Language :: Python :: 3 :: Only",
20
- "Programming Language :: Python :: 3.9",
21
20
  "Programming Language :: Python :: 3.10",
22
21
  "Programming Language :: Python :: 3.11",
23
22
  "Programming Language :: Python :: 3.12",
24
23
  "Programming Language :: Python :: 3.13",
24
+ "Programming Language :: Python :: 3.14",
25
25
  "Operating System :: OS Independent",
26
26
  "Topic :: Software Development :: Libraries :: Python Modules",
27
27
  ]
28
- requires-python = ">=3.9"
28
+ requires-python = ">=3.10"
29
29
  dependencies = [
30
30
  "cryptography",
31
31
  "fonttools",
@@ -132,3 +132,8 @@ version = {attr = "PyPDFForm.__version__"}
132
132
 
133
133
  [tool.setuptools.packages.find]
134
134
  include = ["PyPDFForm*"]
135
+
136
+ [tool.pytest.ini_options]
137
+ markers = [
138
+ "posix_only",
139
+ ]
@@ -2,6 +2,8 @@
2
2
 
3
3
  import os
4
4
 
5
+ import pytest
6
+
5
7
  from PyPDFForm import Fields, PdfWrapper
6
8
 
7
9
 
@@ -110,6 +112,7 @@ def test_issue_613(pdf_samples, request):
110
112
  assert obj.read() == expected
111
113
 
112
114
 
115
+ @pytest.mark.posix_only
113
116
  def test_sample_template_library(
114
117
  pdf_samples, image_samples, sample_font_stream, request
115
118
  ):