PyPDFForm 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyPDFForm might be problematic. Click here for more details.

PyPDFForm/template.py CHANGED
@@ -1,5 +1,13 @@
1
1
  # -*- coding: utf-8 -*-
2
- """Contains helpers for generic template related processing."""
2
+ """Provides template processing utilities for PDF forms.
3
+
4
+ This module contains functions for:
5
+ - Building and managing PDF form widgets
6
+ - Handling widget properties and attributes
7
+ - Processing text fields and paragraphs
8
+ - Managing widget keys and names
9
+ - Supporting comb fields and multiline text
10
+ """
3
11
 
4
12
  from functools import lru_cache
5
13
  from io import BytesIO
@@ -33,7 +41,15 @@ from .utils import (extract_widget_property, find_pattern_match, handle_color,
33
41
  def set_character_x_paddings(
34
42
  pdf_stream: bytes, widgets: Dict[str, WIDGET_TYPES]
35
43
  ) -> Dict[str, WIDGET_TYPES]:
36
- """Sets paddings between characters for combed text fields."""
44
+ """Calculates and sets character spacing for comb text fields.
45
+
46
+ Args:
47
+ pdf_stream: PDF form as bytes
48
+ widgets: Dictionary of widget middleware objects
49
+
50
+ Returns:
51
+ Dict[str, WIDGET_TYPES]: Updated widgets with character paddings
52
+ """
37
53
 
38
54
  for _widgets in get_widgets_by_page(pdf_stream).values():
39
55
  for widget in _widgets:
@@ -51,7 +67,16 @@ def build_widgets(
51
67
  use_full_widget_name: bool,
52
68
  render_widgets: bool,
53
69
  ) -> Dict[str, WIDGET_TYPES]:
54
- """Builds a widget dict given a PDF form stream."""
70
+ """Constructs widget middleware objects from a PDF form.
71
+
72
+ Args:
73
+ pdf_stream: PDF form as bytes
74
+ use_full_widget_name: Whether to include parent widget names
75
+ render_widgets: Whether widgets should be rendered visibly
76
+
77
+ Returns:
78
+ Dict[str, WIDGET_TYPES]: Dictionary mapping field names to widgets
79
+ """
55
80
 
56
81
  results = {}
57
82
 
@@ -113,7 +138,14 @@ def build_widgets(
113
138
 
114
139
 
115
140
  def dropdown_to_text(dropdown: Dropdown) -> Text:
116
- """Converts a dropdown widget to a text widget."""
141
+ """Converts a dropdown widget to a text widget while preserving properties.
142
+
143
+ Args:
144
+ dropdown: Dropdown widget to convert
145
+
146
+ Returns:
147
+ Text: New text widget with dropdown's selected value and styling
148
+ """
117
149
 
118
150
  result = Text(dropdown.name)
119
151
  result.border_color = dropdown.border_color
@@ -135,7 +167,18 @@ def update_text_field_attributes(
135
167
  template_stream: bytes,
136
168
  widgets: Dict[str, WIDGET_TYPES],
137
169
  ) -> None:
138
- """Auto updates text fields' attributes."""
170
+ """Updates text field properties based on PDF template settings.
171
+
172
+ Handles:
173
+ - Font detection and sizing
174
+ - Color properties
175
+ - Paragraph wrapping
176
+ - Auto font size adjustment
177
+
178
+ Args:
179
+ template_stream: PDF form as bytes
180
+ widgets: Dictionary of widget middleware objects
181
+ """
139
182
 
140
183
  for _widgets in get_widgets_by_page(template_stream).values():
141
184
  for _widget in _widgets:
@@ -172,7 +215,14 @@ def update_text_field_attributes(
172
215
 
173
216
  @lru_cache()
174
217
  def get_widgets_by_page(pdf: bytes) -> Dict[int, List[dict]]:
175
- """Iterates through a PDF and returns all widgets found grouped by page."""
218
+ """Extracts all form widgets from a PDF grouped by page number.
219
+
220
+ Args:
221
+ pdf: PDF form as bytes
222
+
223
+ Returns:
224
+ Dict[int, List[dict]]: Mapping of page numbers to widget dictionaries
225
+ """
176
226
 
177
227
  pdf_file = PdfReader(stream_to_io(pdf))
178
228
 
@@ -197,9 +247,14 @@ def get_widgets_by_page(pdf: bytes) -> Dict[int, List[dict]]:
197
247
 
198
248
 
199
249
  def get_widget_full_key(widget: dict) -> Union[str, None]:
200
- """
201
- Returns a PDF widget's full annotated key by prepending its
202
- parent widget's key.
250
+ """Constructs a widget's full hierarchical key including parent names.
251
+
252
+ Args:
253
+ widget: PDF widget dictionary
254
+
255
+ Returns:
256
+ Union[str, None]: Full key in format "parent.child" if parent exists,
257
+ otherwise None
203
258
  """
204
259
 
205
260
  key = extract_widget_property(widget, WIDGET_KEY_PATTERNS, None, str)
@@ -215,7 +270,16 @@ def get_widget_full_key(widget: dict) -> Union[str, None]:
215
270
 
216
271
 
217
272
  def construct_widget(widget: dict, key: str) -> Union[WIDGET_TYPES, None]:
218
- """Finds a PDF widget's annotated type by pattern matching."""
273
+ """Creates appropriate widget middleware based on PDF widget type.
274
+
275
+ Args:
276
+ widget: PDF widget dictionary
277
+ key: Field name/key for the widget
278
+
279
+ Returns:
280
+ Union[WIDGET_TYPES, None]: Appropriate widget middleware instance
281
+ or None if type not recognized
282
+ """
219
283
 
220
284
  result = None
221
285
  for each in WIDGET_TYPE_PATTERNS:
@@ -230,13 +294,28 @@ def construct_widget(widget: dict, key: str) -> Union[WIDGET_TYPES, None]:
230
294
 
231
295
 
232
296
  def get_text_field_max_length(widget: dict) -> Union[int, None]:
233
- """Returns the max length of the text field if presented or None."""
297
+ """Extracts max length constraint from a text field widget.
298
+
299
+ Args:
300
+ widget: PDF widget dictionary
301
+
302
+ Returns:
303
+ Union[int, None]: Max character length if specified, otherwise None
304
+ """
234
305
 
235
306
  return int(widget[MaxLen]) or None if MaxLen in widget else None
236
307
 
237
308
 
238
309
  def check_field_flag_bit(widget: dict, bit: int) -> bool:
239
- """Checks if a bit is set in a widget's field flag."""
310
+ """Tests whether a specific flag bit is set in a widget's field flags.
311
+
312
+ Args:
313
+ widget: PDF widget dictionary
314
+ bit: Flag bit to check (e.g. COMB, MULTILINE)
315
+
316
+ Returns:
317
+ bool: True if the bit is set, False otherwise
318
+ """
240
319
 
241
320
  field_flag = extract_widget_property(widget, TEXT_FIELD_FLAG_PATTERNS, None, int)
242
321
 
@@ -247,19 +326,41 @@ def check_field_flag_bit(widget: dict, bit: int) -> bool:
247
326
 
248
327
 
249
328
  def is_text_field_comb(widget: dict) -> bool:
250
- """Returns true if characters in a text field needs to be formatted into combs."""
329
+ """Determines if a text field uses comb formatting (fixed character spacing).
330
+
331
+ Args:
332
+ widget: PDF widget dictionary
333
+
334
+ Returns:
335
+ bool: True if field uses comb formatting
336
+ """
251
337
 
252
338
  return check_field_flag_bit(widget, COMB)
253
339
 
254
340
 
255
341
  def is_text_multiline(widget: dict) -> bool:
256
- """Returns true if a text field is a paragraph field."""
342
+ """Determines if a text field supports multiple lines/paragraphs.
343
+
344
+ Args:
345
+ widget: PDF widget dictionary
346
+
347
+ Returns:
348
+ bool: True if field supports multiline text
349
+ """
257
350
 
258
351
  return check_field_flag_bit(widget, MULTILINE)
259
352
 
260
353
 
261
354
  def get_dropdown_choices(widget: dict) -> Union[Tuple[str, ...], None]:
262
- """Returns string options of a dropdown field."""
355
+ """Extracts available choices from a dropdown widget.
356
+
357
+ Args:
358
+ widget: PDF widget dictionary
359
+
360
+ Returns:
361
+ Union[Tuple[str, ...], None]: Tuple of choice strings if found,
362
+ otherwise None
363
+ """
263
364
 
264
365
  return tuple(
265
366
  (each if isinstance(each, str) else str(each[1]))
@@ -270,14 +371,30 @@ def get_dropdown_choices(widget: dict) -> Union[Tuple[str, ...], None]:
270
371
 
271
372
 
272
373
  def get_char_rect_width(widget: dict, widget_middleware: Text) -> float:
273
- """Returns rectangular width of each character for combed text fields."""
374
+ """Calculates per-character width for comb text fields.
375
+
376
+ Args:
377
+ widget: PDF widget dictionary
378
+ widget_middleware: Text middleware instance
379
+
380
+ Returns:
381
+ float: Width in points allocated per character
382
+ """
274
383
 
275
384
  rect_width = abs(float(widget[Rect][0]) - float(widget[Rect][2]))
276
385
  return rect_width / widget_middleware.max_length
277
386
 
278
387
 
279
388
  def get_character_x_paddings(widget: dict, widget_middleware: Text) -> List[float]:
280
- """Returns paddings between characters for combed text fields."""
389
+ """Calculates horizontal positioning for each character in comb fields.
390
+
391
+ Args:
392
+ widget: PDF widget dictionary
393
+ widget_middleware: Text middleware instance
394
+
395
+ Returns:
396
+ List[float]: X-offsets for centering each character in its comb cell
397
+ """
281
398
 
282
399
  length = min(len(widget_middleware.value or ""), widget_middleware.max_length)
283
400
  char_rect_width = get_char_rect_width(widget, widget_middleware)
@@ -299,10 +416,15 @@ def get_character_x_paddings(widget: dict, widget_middleware: Text) -> List[floa
299
416
  def split_characters_into_lines(
300
417
  split_by_new_line_symbol: List[str], middleware: Text, width: float
301
418
  ) -> List[str]:
302
- """
303
- Given a long string meant to be filled for a paragraph widget
304
- split by the new line symbol already, splits it further into lines
305
- where each line would fit into the widget's width.
419
+ """Splits text into lines that fit within a paragraph field's width.
420
+
421
+ Args:
422
+ split_by_new_line_symbol: Text already split by newlines
423
+ middleware: Text middleware with font properties
424
+ width: Available width in points
425
+
426
+ Returns:
427
+ List[str]: Lines of text fitting within the specified width
306
428
  """
307
429
 
308
430
  lines = []
@@ -329,11 +451,15 @@ def split_characters_into_lines(
329
451
 
330
452
 
331
453
  def adjust_each_line(lines: List[str], middleware: Text, width: float) -> List[str]:
332
- """
333
- Given a list of strings which is the return value of
334
- `split_characters_into_lines`, further adjusts each line
335
- so that there is neither overflow nor over-splitting into
336
- unnecessary lines.
454
+ """Optimizes line breaks to minimize empty space in paragraph fields.
455
+
456
+ Args:
457
+ lines: Text lines from split_characters_into_lines()
458
+ middleware: Text middleware with font properties
459
+ width: Available width in points
460
+
461
+ Returns:
462
+ List[str]: Optimized lines with balanced word wrapping
337
463
  """
338
464
 
339
465
  result = []
@@ -373,7 +499,15 @@ def adjust_each_line(lines: List[str], middleware: Text, width: float) -> List[s
373
499
 
374
500
 
375
501
  def get_paragraph_lines(widget: dict, widget_middleware: Text) -> List[str]:
376
- """Splits the paragraph field's text to a list of lines."""
502
+ """Generates properly wrapped lines for a paragraph text field.
503
+
504
+ Args:
505
+ widget: PDF widget dictionary
506
+ widget_middleware: Text middleware instance
507
+
508
+ Returns:
509
+ List[str]: Wrapped lines fitting the paragraph field dimensions
510
+ """
377
511
 
378
512
  value = widget_middleware.value or ""
379
513
  if widget_middleware.max_length is not None:
@@ -390,7 +524,14 @@ def get_paragraph_lines(widget: dict, widget_middleware: Text) -> List[str]:
390
524
 
391
525
 
392
526
  def get_paragraph_auto_wrap_length(widget_middleware: Text) -> int:
393
- """Calculates the text wrap length of a paragraph field."""
527
+ """Determines optimal line length for paragraph text wrapping.
528
+
529
+ Args:
530
+ widget_middleware: Text middleware instance
531
+
532
+ Returns:
533
+ int: Suggested maximum characters per line
534
+ """
394
535
 
395
536
  result = maxsize
396
537
  for line in widget_middleware.text_lines:
@@ -406,7 +547,18 @@ def update_widget_keys(
406
547
  new_keys: List[str],
407
548
  indices: List[int],
408
549
  ) -> bytes:
409
- """Updates a list of keys of widgets."""
550
+ """Renames widget fields in a PDF template.
551
+
552
+ Args:
553
+ template: PDF form as bytes
554
+ widgets: Dictionary of widget middleware
555
+ old_keys: List of current field names
556
+ new_keys: List of new field names
557
+ indices: List of indices for handling radio button groups
558
+
559
+ Returns:
560
+ bytes: Modified PDF with updated field names
561
+ """
410
562
  # pylint: disable=R0801
411
563
 
412
564
  pdf = PdfReader(stream_to_io(template))
PyPDFForm/utils.py CHANGED
@@ -1,5 +1,14 @@
1
1
  # -*- coding: utf-8 -*-
2
- """Contains utility helpers."""
2
+ """Provides core utility functions for PDF form processing.
3
+
4
+ This module contains general-purpose utilities used throughout PyPDFForm:
5
+ - Stream/file handling conversions
6
+ - Color space transformations
7
+ - Widget preview generation
8
+ - PDF merging and splitting
9
+ - Pattern matching for PDF structures
10
+ - Unique ID generation
11
+ """
3
12
 
4
13
  from collections.abc import Callable
5
14
  from io import BytesIO
@@ -21,7 +30,14 @@ from .middleware.text import Text
21
30
 
22
31
 
23
32
  def stream_to_io(stream: bytes) -> BinaryIO:
24
- """Converts a byte stream to a binary io object."""
33
+ """Converts a byte stream to a seekable binary IO object.
34
+
35
+ Args:
36
+ stream: Input byte stream to convert
37
+
38
+ Returns:
39
+ BinaryIO: Seekable file-like object containing the stream data
40
+ """
25
41
 
26
42
  result = BytesIO()
27
43
  result.write(stream)
@@ -31,7 +47,19 @@ def stream_to_io(stream: bytes) -> BinaryIO:
31
47
 
32
48
 
33
49
  def handle_color(color: Union[list, ArrayObject]) -> Union[Color, CMYKColor, None]:
34
- """Converts a color array to an RGB or CMYK color."""
50
+ """Converts PDF color specifications to reportlab color objects.
51
+
52
+ Supports:
53
+ - Grayscale (1 component)
54
+ - RGB (3 components)
55
+ - CMYK (4 components)
56
+
57
+ Args:
58
+ color: Color array from PDF specification
59
+
60
+ Returns:
61
+ Union[Color, CMYKColor, None]: Color object or None if invalid format
62
+ """
35
63
 
36
64
  result = None
37
65
 
@@ -39,9 +67,10 @@ def handle_color(color: Union[list, ArrayObject]) -> Union[Color, CMYKColor, Non
39
67
  result = CMYKColor(black=1 - color[0])
40
68
  elif len(color) == 3:
41
69
  result = Color(red=color[0], green=color[1], blue=color[2])
42
- # write a test case for this
43
- # elif len(color) == 4:
44
- # result = CMYKColor(cyan=color[0], magenta=color[1], yellow=color[2], black=color[3])
70
+ elif len(color) == 4:
71
+ result = CMYKColor(
72
+ cyan=color[0], magenta=color[1], yellow=color[2], black=color[3]
73
+ )
45
74
 
46
75
  return result
47
76
 
@@ -49,7 +78,15 @@ def handle_color(color: Union[list, ArrayObject]) -> Union[Color, CMYKColor, Non
49
78
  def checkbox_radio_to_draw(
50
79
  widget: Union[Checkbox, Radio], font_size: Union[float, int]
51
80
  ) -> Text:
52
- """Converts a checkbox/radio widget to a drawable text widget."""
81
+ """Converts checkbox/radio widgets to text symbols for drawing.
82
+
83
+ Args:
84
+ widget: Checkbox or Radio widget to convert
85
+ font_size: Size for the drawn symbol
86
+
87
+ Returns:
88
+ Text: Text widget configured to draw the appropriate symbol
89
+ """
53
90
 
54
91
  new_widget = Text(
55
92
  name=widget.name,
@@ -66,7 +103,15 @@ def checkbox_radio_to_draw(
66
103
 
67
104
 
68
105
  def preview_widget_to_draw(widget: WIDGET_TYPES, with_preview_text: bool) -> Text:
69
- """Converts a widget to a preview text widget."""
106
+ """Creates preview version of a widget showing field name/location.
107
+
108
+ Args:
109
+ widget: Widget to generate preview for
110
+ with_preview_text: Whether to include field name in preview
111
+
112
+ Returns:
113
+ Text: Text widget configured for preview display
114
+ """
70
115
 
71
116
  new_widget = Text(
72
117
  name=widget.name,
@@ -84,7 +129,14 @@ def preview_widget_to_draw(widget: WIDGET_TYPES, with_preview_text: bool) -> Tex
84
129
 
85
130
 
86
131
  def remove_all_widgets(pdf: bytes) -> bytes:
87
- """Removes all widgets from a PDF form."""
132
+ """Removes all interactive form fields from a PDF document.
133
+
134
+ Args:
135
+ pdf: Input PDF as bytes
136
+
137
+ Returns:
138
+ bytes: Flattened PDF with form fields removed
139
+ """
88
140
 
89
141
  pdf_file = PdfReader(stream_to_io(pdf))
90
142
  result_stream = BytesIO()
@@ -100,7 +152,14 @@ def remove_all_widgets(pdf: bytes) -> bytes:
100
152
 
101
153
 
102
154
  def get_page_streams(pdf: bytes) -> List[bytes]:
103
- """Returns a list of streams where each is a page of the input PDF."""
155
+ """Splits a PDF into individual page streams.
156
+
157
+ Args:
158
+ pdf: Input PDF as bytes
159
+
160
+ Returns:
161
+ List[bytes]: List where each element contains a single PDF page
162
+ """
104
163
 
105
164
  pdf_file = PdfReader(stream_to_io(pdf))
106
165
  result = []
@@ -117,7 +176,15 @@ def get_page_streams(pdf: bytes) -> List[bytes]:
117
176
 
118
177
 
119
178
  def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
120
- """Merges two PDFs into one PDF."""
179
+ """Combines two PDF documents into a single multipage PDF.
180
+
181
+ Args:
182
+ pdf: First PDF as bytes
183
+ other: Second PDF as bytes
184
+
185
+ Returns:
186
+ bytes: Combined PDF containing all pages from both inputs
187
+ """
121
188
 
122
189
  output = PdfWriter()
123
190
  pdf_file = PdfReader(stream_to_io(pdf))
@@ -135,7 +202,15 @@ def merge_two_pdfs(pdf: bytes, other: bytes) -> bytes:
135
202
 
136
203
 
137
204
  def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) -> bool:
138
- """Checks if a PDF dict pattern exists in a PDF widget."""
205
+ """Tests whether a widget matches the specified PDF attribute pattern.
206
+
207
+ Args:
208
+ pattern: Dictionary of PDF attributes and expected values
209
+ widget: PDF widget to test against the pattern
210
+
211
+ Returns:
212
+ bool: True if widget matches all pattern criteria
213
+ """
139
214
 
140
215
  for key, value in widget.items():
141
216
  result = False
@@ -158,7 +233,15 @@ def find_pattern_match(pattern: dict, widget: Union[dict, DictionaryObject]) ->
158
233
  def traverse_pattern(
159
234
  pattern: dict, widget: Union[dict, DictionaryObject]
160
235
  ) -> Union[str, list, None]:
161
- """Traverses down a PDF dict pattern and find the value."""
236
+ """Recursively searches a widget for a matching pattern and returns its value.
237
+
238
+ Args:
239
+ pattern: Dictionary of PDF attributes specifying the search path
240
+ widget: PDF widget to search through
241
+
242
+ Returns:
243
+ Union[str, list, None]: Found value or None if not matched
244
+ """
162
245
 
163
246
  for key, value in widget.items():
164
247
  result = None
@@ -182,7 +265,17 @@ def extract_widget_property(
182
265
  default_value: Any,
183
266
  func_before_return: Union[Callable, None],
184
267
  ) -> Any:
185
- """Returns a property value given a PDF widget dict and a pattern."""
268
+ """Extracts a widget property using pattern matching with fallback.
269
+
270
+ Args:
271
+ widget: PDF widget dictionary to examine
272
+ patterns: List of patterns to try in order
273
+ default_value: Value to return if no patterns match
274
+ func_before_return: Optional function to transform the extracted value
275
+
276
+ Returns:
277
+ Any: Extracted property value or default_value
278
+ """
186
279
 
187
280
  result = default_value
188
281
 
@@ -196,7 +289,11 @@ def extract_widget_property(
196
289
 
197
290
 
198
291
  def generate_unique_suffix() -> str:
199
- """Generates a unique suffix string for widgets during form merging."""
292
+ """Generates a random string for disambiguating field names during merging.
293
+
294
+ Returns:
295
+ str: Random string containing letters, digits and symbols
296
+ """
200
297
 
201
298
  return "".join(
202
299
  [