PyPDFForm 3.5.3__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. PyPDFForm/__init__.py +5 -3
  2. PyPDFForm/adapter.py +33 -1
  3. PyPDFForm/ap.py +99 -0
  4. PyPDFForm/assets/__init__.py +0 -0
  5. PyPDFForm/assets/blank.py +100 -0
  6. PyPDFForm/constants.py +20 -2
  7. PyPDFForm/coordinate.py +7 -11
  8. PyPDFForm/deprecation.py +30 -0
  9. PyPDFForm/filler.py +17 -36
  10. PyPDFForm/font.py +16 -16
  11. PyPDFForm/hooks.py +153 -30
  12. PyPDFForm/image.py +0 -3
  13. PyPDFForm/middleware/__init__.py +35 -0
  14. PyPDFForm/middleware/base.py +24 -5
  15. PyPDFForm/middleware/checkbox.py +18 -1
  16. PyPDFForm/middleware/signature.py +0 -1
  17. PyPDFForm/patterns.py +44 -13
  18. PyPDFForm/raw/__init__.py +37 -0
  19. PyPDFForm/raw/circle.py +65 -0
  20. PyPDFForm/raw/ellipse.py +69 -0
  21. PyPDFForm/raw/image.py +79 -0
  22. PyPDFForm/raw/line.py +65 -0
  23. PyPDFForm/raw/rect.py +70 -0
  24. PyPDFForm/raw/text.py +73 -0
  25. PyPDFForm/template.py +114 -12
  26. PyPDFForm/types.py +49 -0
  27. PyPDFForm/utils.py +31 -41
  28. PyPDFForm/watermark.py +153 -44
  29. PyPDFForm/widgets/__init__.py +1 -0
  30. PyPDFForm/widgets/base.py +79 -59
  31. PyPDFForm/widgets/checkbox.py +30 -30
  32. PyPDFForm/widgets/dropdown.py +42 -40
  33. PyPDFForm/widgets/image.py +17 -16
  34. PyPDFForm/widgets/radio.py +27 -28
  35. PyPDFForm/widgets/signature.py +96 -60
  36. PyPDFForm/widgets/text.py +40 -40
  37. PyPDFForm/wrapper.py +256 -240
  38. {pypdfform-3.5.3.dist-info → pypdfform-4.2.0.dist-info}/METADATA +33 -26
  39. pypdfform-4.2.0.dist-info/RECORD +47 -0
  40. {pypdfform-3.5.3.dist-info → pypdfform-4.2.0.dist-info}/licenses/LICENSE +1 -1
  41. pypdfform-3.5.3.dist-info/RECORD +0 -35
  42. /PyPDFForm/{widgets → assets}/bedrock.py +0 -0
  43. {pypdfform-3.5.3.dist-info → pypdfform-4.2.0.dist-info}/WHEEL +0 -0
  44. {pypdfform-3.5.3.dist-info → pypdfform-4.2.0.dist-info}/top_level.txt +0 -0
PyPDFForm/__init__.py CHANGED
@@ -20,10 +20,12 @@ The library supports various PDF form features, including:
20
20
  PyPDFForm aims to simplify PDF form manipulation, making it accessible to developers of all skill levels.
21
21
  """
22
22
 
23
- __version__ = "3.5.3"
23
+ __version__ = "4.2.0"
24
24
 
25
- from .middleware.text import Text # exposing for setting global font attrs
25
+ from .assets.blank import BlankPage
26
+ from .middleware import Widgets
27
+ from .raw import RawElements
26
28
  from .widgets import Fields
27
29
  from .wrapper import PdfWrapper
28
30
 
29
- __all__ = ["PdfWrapper", "Text", "Fields"]
31
+ __all__ = ["PdfWrapper", "Fields", "BlankPage", "RawElements", "Widgets"]
PyPDFForm/adapter.py CHANGED
@@ -9,7 +9,6 @@ filling operations, where the input PDF template can be provided in different
9
9
  forms. The module ensures that the input is properly converted into a byte
10
10
  stream before further processing.
11
11
  """
12
- # TODO: For large PDF files, reading the entire file into memory using `_file.read()` in `fp_or_f_obj_or_stream_to_stream` can be inefficient. Consider streaming or chunking if downstream processing allows.
13
12
 
14
13
  from os.path import isfile
15
14
  from typing import Any, BinaryIO, Union
@@ -67,3 +66,36 @@ def fp_or_f_obj_or_stream_to_stream(
67
66
  with open(fp_or_f_obj_or_stream, "rb") as _file:
68
67
  result = _file.read()
69
68
  return result
69
+
70
+
71
+ def fp_or_f_obj_or_f_content_to_content(
72
+ fp_or_f_obj_or_f_content: Union[str, BinaryIO],
73
+ ) -> str:
74
+ """
75
+ Adapt a file path, file object, or file content to file content.
76
+
77
+ This function takes a file path, a file object, or file content and adapts it to a consistent string.
78
+ It handles different input types, including:
79
+ - file content (str)
80
+ - file paths (str)
81
+ - file-like objects with a read() method (BinaryIO)
82
+
83
+ Args:
84
+ fp_or_f_obj_or_f_content (Union[str, BinaryIO]): The input to adapt.
85
+ It can be file content, a file path (string), or a file object.
86
+
87
+ Returns:
88
+ str: The file content representation of the input.
89
+ """
90
+ result = ""
91
+ if readable(fp_or_f_obj_or_f_content):
92
+ result = str(fp_or_f_obj_or_f_content.read())
93
+
94
+ elif isinstance(fp_or_f_obj_or_f_content, str):
95
+ if isfile(fp_or_f_obj_or_f_content):
96
+ with open(fp_or_f_obj_or_f_content, "r", encoding="utf-8") as _file:
97
+ result = _file.read()
98
+ else:
99
+ result = fp_or_f_obj_or_f_content
100
+
101
+ return result
PyPDFForm/ap.py ADDED
@@ -0,0 +1,99 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ A module for handling PDF appearance streams.
4
+
5
+ This module provides functionality to manage appearance streams in PDF forms,
6
+ which are necessary for form fields to display correctly after being filled.
7
+ It uses both pypdf and pikepdf for manipulation.
8
+ """
9
+
10
+ from functools import lru_cache
11
+ from io import BytesIO
12
+
13
+ from pikepdf import Pdf
14
+ from pypdf import PdfReader, PdfWriter
15
+
16
+ from .constants import XFA, AcroForm, Root
17
+ from .template import (get_on_open_javascript, get_pdf_title,
18
+ set_on_open_javascript, set_pdf_title)
19
+ from .utils import stream_to_io
20
+
21
+
22
+ @lru_cache
23
+ def appearance_streams_handler(pdf: bytes, generate_appearance_streams: bool) -> bytes:
24
+ """
25
+ Handles appearance streams and the /NeedAppearances flag for a PDF form.
26
+
27
+ This function prepares a PDF for form filling by:
28
+ 1. Removing the XFA dictionary if present, as it can interfere with standard
29
+ AcroForm processing.
30
+ 2. Setting the /NeedAppearances flag in the AcroForm dictionary, which instructs
31
+ PDF viewers to generate appearance streams for form fields.
32
+ 3. Optionally generating appearance streams explicitly using pikepdf if
33
+ `generate_appearance_streams` is True.
34
+ 4. Preserving the title from the original PDF.
35
+ 5. Preserving the on-open JavaScript from the original PDF.
36
+
37
+ The result is cached using lru_cache for performance.
38
+
39
+ Args:
40
+ pdf (bytes): The PDF file content as a bytes stream.
41
+ generate_appearance_streams (bool): Whether to explicitly generate appearance streams for all form fields.
42
+
43
+ Returns:
44
+ bytes: The modified PDF content as a bytes stream.
45
+ """
46
+ reader = PdfReader(stream_to_io(pdf))
47
+ writer = PdfWriter()
48
+
49
+ if AcroForm in reader.trailer[Root] and XFA in reader.trailer[Root][AcroForm]:
50
+ del reader.trailer[Root][AcroForm][XFA]
51
+
52
+ writer.append(reader)
53
+ writer.set_need_appearances_writer()
54
+
55
+ with BytesIO() as f:
56
+ writer.write(f)
57
+ f.seek(0)
58
+ result = f.read()
59
+
60
+ if generate_appearance_streams:
61
+ with Pdf.open(stream_to_io(result)) as f:
62
+ f.generate_appearance_streams()
63
+ with BytesIO() as r:
64
+ f.save(r)
65
+ r.seek(0)
66
+ result = r.read()
67
+
68
+ result = preserve_title(pdf, result)
69
+ return preserve_on_open_javascript(pdf, result)
70
+
71
+
72
+ def preserve_title(src: bytes, dest: bytes) -> bytes:
73
+ """
74
+ Preserves the title from the source PDF to the destination PDF.
75
+
76
+ Args:
77
+ src (bytes): The source PDF file content as a bytes stream.
78
+ dest (bytes): The destination PDF file content as a bytes stream.
79
+
80
+ Returns:
81
+ bytes: The modified destination PDF content as a bytes stream.
82
+ """
83
+ title = get_pdf_title(src)
84
+ return set_pdf_title(dest, title)
85
+
86
+
87
+ def preserve_on_open_javascript(src: bytes, dest: bytes) -> bytes:
88
+ """
89
+ Preserves the on-open JavaScript from the source PDF to the destination PDF.
90
+
91
+ Args:
92
+ src (bytes): The source PDF file content as a bytes stream.
93
+ dest (bytes): The destination PDF file content as a bytes stream.
94
+
95
+ Returns:
96
+ bytes: The modified destination PDF content as a bytes stream.
97
+ """
98
+ script = get_on_open_javascript(src)
99
+ return set_on_open_javascript(dest, script)
File without changes
@@ -0,0 +1,100 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Module for creating and managing a blank PDF page asset.
4
+
5
+ This module provides the BlankPage class, which acts as a utility to generate
6
+ a simple, empty PDF page with customizable dimensions (width and height).
7
+ The primary use case is creating new PDF documents starting with a blank canvas
8
+ or adding blank pages to existing documents. It supports multiplication to
9
+ easily generate a PDF containing multiple identical blank pages.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from functools import cached_property
15
+ from io import BytesIO
16
+
17
+ from reportlab.pdfgen.canvas import Canvas
18
+
19
+ from ..constants import BLANK_PAGE_DEFAULT_HEIGHT, BLANK_PAGE_DEFAULT_WIDTH
20
+ from ..utils import merge_pdfs
21
+
22
+
23
+ class BlankPage:
24
+ """
25
+ Class for creating a blank PDF page asset.
26
+
27
+ This class manages the generation and representation of a single blank PDF page
28
+ using reportlab. It provides a simple interface to access the page content as
29
+ a byte stream and supports page duplication via the multiplication operator.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ width: float = BLANK_PAGE_DEFAULT_WIDTH,
35
+ height: float = BLANK_PAGE_DEFAULT_HEIGHT,
36
+ ) -> None:
37
+ """
38
+ Initializes a BlankPage object.
39
+
40
+ Args:
41
+ width (float): The width of the blank page in points. Defaults to
42
+ BLANK_PAGE_DEFAULT_WIDTH (612 points).
43
+ height (float): The height of the blank page in points. Defaults to
44
+ BLANK_PAGE_DEFAULT_HEIGHT (792 points).
45
+ """
46
+ super().__init__()
47
+ self.width = width
48
+ self.height = height
49
+
50
+ def __mul__(self, count: int) -> bytes:
51
+ """
52
+ Multiplication operator to merge multiple blank pages into one PDF.
53
+
54
+ This allows syntax like `BlankPage() * 3` to create a 3-page PDF.
55
+ It merges copies of the current blank page asset using the efficient
56
+ pairwise merging strategy implemented in `merge_pdfs`.
57
+
58
+ Args:
59
+ count (int): The number of blank pages to merge. Must be an integer >= 1.
60
+
61
+ Returns:
62
+ bytes: The byte stream of the resulting PDF containing `count` blank pages.
63
+ """
64
+ if count == 1:
65
+ return self.read()
66
+
67
+ return merge_pdfs([self.read() for _ in range(count)])
68
+
69
+ def read(self) -> bytes:
70
+ """
71
+ Read the generated blank page PDF content.
72
+
73
+ This is a public interface to retrieve the cached byte stream of the single
74
+ blank PDF page created by this instance.
75
+
76
+ Returns:
77
+ bytes: The byte stream of the single blank PDF page.
78
+ """
79
+ return self._stream
80
+
81
+ @cached_property
82
+ def _stream(self) -> bytes:
83
+ """
84
+ Generates and returns the PDF byte stream of a single blank page.
85
+
86
+ This is a cached property that uses `reportlab.pdfgen.canvas.Canvas` to create
87
+ a minimal PDF document consisting of one blank page with the configured
88
+ dimensions (`self.width`, `self.height`). This generation occurs only once.
89
+
90
+ Returns:
91
+ bytes: The byte stream of the generated blank PDF page.
92
+ """
93
+ result = BytesIO()
94
+
95
+ canvas = Canvas(result, pagesize=(self.width, self.height))
96
+ canvas.showPage()
97
+ canvas.save()
98
+ result.seek(0)
99
+
100
+ return result.read()
PyPDFForm/constants.py CHANGED
@@ -41,9 +41,8 @@ WIDGET_TYPES = Union[Text, Checkbox, Radio, Dropdown, Signature, Image]
41
41
 
42
42
  DEPRECATION_NOTICE = "{} will be deprecated soon. Use {} instead."
43
43
 
44
+ Title = "/Title"
44
45
  Annots = "/Annots"
45
- A = "/A"
46
- JS = "/JS"
47
46
  T = "/T"
48
47
  TU = "/TU"
49
48
  Rect = "/Rect"
@@ -68,6 +67,21 @@ AS = "/AS"
68
67
  Yes = "/Yes"
69
68
  Off = "/Off"
70
69
 
70
+ # javascript
71
+ A = "/A"
72
+ AA = "/AA"
73
+ Action = "/Action"
74
+ S = "/S"
75
+ JavaScript = "/JavaScript"
76
+ JS = "/JS"
77
+ OpenAction = "/OpenAction"
78
+ E = "/E"
79
+ X = "/X"
80
+ D = "/D"
81
+ U = "/U"
82
+ Fo = "/Fo"
83
+ Bl = "/Bl"
84
+
71
85
  # Font dict
72
86
  Length = "/Length"
73
87
  Length1 = "/Length1"
@@ -129,3 +143,7 @@ COORDINATE_GRID_FONT_SIZE_MARGIN_RATIO = DEFAULT_FONT_SIZE / 100
129
143
  UNIQUE_SUFFIX_LENGTH = 20
130
144
 
131
145
  SLASH = "/"
146
+
147
+ # blank page
148
+ BLANK_PAGE_DEFAULT_WIDTH = 612
149
+ BLANK_PAGE_DEFAULT_HEIGHT = 792
PyPDFForm/coordinate.py CHANGED
@@ -6,8 +6,6 @@ This module provides functionality to generate coordinate grids on existing PDF
6
6
  It allows developers to visualize the coordinate system of each page in a PDF, which can be helpful
7
7
  for debugging and precisely positioning elements when filling or drawing on PDF forms.
8
8
  """
9
- # TODO: The `PdfReader` object is initialized twice (lines 42 and implicitly within `create_watermarks_and_draw` if it re-reads the PDF). Consider initializing it once and passing the object or its relevant parts to avoid redundant parsing, especially for large PDFs.
10
- # TODO: Drawing operations for lines and texts are performed and merged separately. It might be more efficient to combine all drawing operations for a page into a single `create_watermarks_and_draw` call or to merge all watermarks in one final step to reduce PDF processing overhead.
11
9
 
12
10
  from typing import Tuple
13
11
 
@@ -44,7 +42,6 @@ def generate_coordinate_grid(
44
42
  pdf_file = PdfReader(stream_to_io(pdf))
45
43
  lines_by_page = {}
46
44
  texts_by_page = {}
47
- watermarks = []
48
45
 
49
46
  for i, page in enumerate(pdf_file.pages):
50
47
  lines_by_page[i + 1] = []
@@ -98,16 +95,15 @@ def generate_coordinate_grid(
98
95
  y += margin
99
96
  x += margin
100
97
 
98
+ to_draw = []
99
+
101
100
  for page, lines in lines_by_page.items():
102
- watermarks.append(
103
- create_watermarks_and_draw(pdf, page, "line", lines)[page - 1]
101
+ to_draw.extend(
102
+ [{"page_number": page, "type": "line", **line} for line in lines]
104
103
  )
105
-
106
- result = merge_watermarks_with_pdf(pdf, watermarks)
107
- watermarks = []
108
104
  for page, texts in texts_by_page.items():
109
- watermarks.append(
110
- create_watermarks_and_draw(pdf, page, "text", texts)[page - 1]
105
+ to_draw.extend(
106
+ [{"page_number": page, "type": "text", **text} for text in texts]
111
107
  )
112
108
 
113
- return merge_watermarks_with_pdf(result, watermarks)
109
+ return merge_watermarks_with_pdf(pdf, create_watermarks_and_draw(pdf, to_draw))
@@ -0,0 +1,30 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ A module for handling deprecation notices within the PyPDFForm library.
4
+
5
+ This module provides utility functions to issue standard DeprecationWarning
6
+ messages, ensuring consistency across the library when notifying users of
7
+ deprecated features.
8
+ """
9
+
10
+ from warnings import warn
11
+
12
+ from .constants import DEPRECATION_NOTICE
13
+
14
+
15
+ def deprecation_notice(to_deprecate: str, to_replace: str) -> None:
16
+ """
17
+ Issues a DeprecationWarning for a feature that is being deprecated.
18
+
19
+ Args:
20
+ to_deprecate (str): The name of the feature or function being deprecated.
21
+ to_replace (str): The name of the feature or function that should be used instead.
22
+ """
23
+ warn(
24
+ DEPRECATION_NOTICE.format(
25
+ to_deprecate,
26
+ to_replace,
27
+ ),
28
+ DeprecationWarning, # noqa: PT030
29
+ stacklevel=3,
30
+ )
PyPDFForm/filler.py CHANGED
@@ -7,11 +7,6 @@ It includes functions for handling various form field types, such as text fields
7
7
  checkboxes, radio buttons, dropdowns, images, and signatures. The module also
8
8
  supports flattening the filled form to prevent further modifications.
9
9
  """
10
- # TODO: In `fill` function, `PdfReader(stream_to_io(template))` and `out.append(pdf)` might involve re-parsing or copying the entire PDF. For very large PDFs, consider if `pypdf` offers more efficient ways to modify in-place or stream processing.
11
- # TODO: The `get_widget_key` function is called repeatedly in a loop. If its internal logic is complex, consider caching its results or optimizing its implementation to avoid redundant computations.
12
- # TODO: The `signature_image_handler` function involves `get_image_dimensions` and `get_draw_image_resolutions`. If image processing is a bottleneck, consider optimizing these image-related operations, perhaps by using faster image libraries or pre-calculating dimensions if images are reused.
13
- # TODO: Similar to `coordinate.py`, `get_drawn_stream` involves multiple `create_watermarks_and_draw` and `merge_watermarks_with_pdf` calls. Combining drawing operations or merging watermarks in a single pass could reduce overhead.
14
- # TODO: The `radio_button_tracker` logic involves iterating through all radio buttons. For forms with many radio buttons, consider optimizing the lookup or update mechanism if performance becomes an issue.
15
10
 
16
11
  from io import BytesIO
17
12
  from typing import Dict, Union, cast
@@ -75,37 +70,10 @@ def signature_image_handler(
75
70
  return any_image_to_draw
76
71
 
77
72
 
78
- def get_drawn_stream(to_draw: dict, stream: bytes, action: str) -> bytes:
79
- """Applies watermarks to specific pages of a PDF based on the provided drawing instructions.
80
-
81
- This function takes a dictionary of drawing instructions and applies watermarks to the
82
- specified pages of a PDF. It iterates through the drawing instructions, creates watermarks
83
- for each page, and merges the watermarks with the original PDF content. The function
84
- supports various drawing actions, such as adding images or text.
85
-
86
- Args:
87
- to_draw (dict): A dictionary containing page numbers as keys and lists of drawing instructions as values.
88
- Each drawing instruction specifies the type of drawing, position, dimensions, and content.
89
- stream (bytes): The PDF content as bytes.
90
- action (str): The type of action to perform (e.g., "image", "text").
91
-
92
- Returns:
93
- bytes: The modified PDF content with watermarks applied.
94
- """
95
- watermark_list = []
96
- for page, stuffs in to_draw.items():
97
- watermark_list.append(b"")
98
- watermarks = create_watermarks_and_draw(stream, page, action, stuffs)
99
- for i, watermark in enumerate(watermarks):
100
- if watermark:
101
- watermark_list[i] = watermark
102
-
103
- return merge_watermarks_with_pdf(stream, watermark_list)
104
-
105
-
106
73
  def fill(
107
74
  template: bytes,
108
75
  widgets: Dict[str, WIDGET_TYPES],
76
+ need_appearances: bool,
109
77
  use_full_widget_name: bool,
110
78
  flatten: bool = False,
111
79
  ) -> tuple:
@@ -121,6 +89,9 @@ def fill(
121
89
  template (bytes): The PDF template as bytes.
122
90
  widgets (Dict[str, WIDGET_TYPES]): A dictionary of widgets to fill, where the keys are the
123
91
  widget names and the values are the widget objects.
92
+ need_appearances (bool): If True, skips updating the appearance stream (AP) for
93
+ text and dropdown fields to maintain compatibility with Adobe Reader's
94
+ behavior for certain fields.
124
95
  use_full_widget_name (bool): Whether to use the full widget name when looking up widgets
125
96
  in the `widgets` dictionary.
126
97
  flatten (bool): Whether to flatten the filled PDF. Defaults to False.
@@ -130,6 +101,7 @@ def fill(
130
101
  The image drawn stream is only returned if there are any image or signature widgets
131
102
  in the form.
132
103
  """
104
+ # pylint: disable=R0912
133
105
  pdf = PdfReader(stream_to_io(template))
134
106
  out = PdfWriter()
135
107
  out.append(pdf)
@@ -169,15 +141,24 @@ def fill(
169
141
  if widget.value == radio_button_tracker[key] - 1:
170
142
  update_radio_value(annot)
171
143
  elif isinstance(widget, Dropdown):
172
- update_dropdown_value(annot, widget)
144
+ update_dropdown_value(annot, widget, need_appearances)
173
145
  elif isinstance(widget, Text):
174
- update_text_value(annot, widget)
146
+ update_text_value(annot, widget, need_appearances)
175
147
 
176
148
  with BytesIO() as f:
177
149
  out.write(f)
178
150
  f.seek(0)
179
151
  result = f.read()
180
152
 
153
+ if not any_image_to_draw:
154
+ return result, None
155
+
156
+ images = []
157
+ for page, elements in images_to_draw.items():
158
+ images.extend(
159
+ [{"page_number": page, "type": "image", **element} for element in elements]
160
+ )
161
+
181
162
  return result, (
182
- get_drawn_stream(images_to_draw, result, "image") if any_image_to_draw else None
163
+ merge_watermarks_with_pdf(result, create_watermarks_and_draw(result, images))
183
164
  )
PyPDFForm/font.py CHANGED
@@ -6,11 +6,6 @@ It includes functions for registering fonts with ReportLab and within the PDF's
6
6
  allowing these fonts to be used when filling form fields. The module also provides utilities
7
7
  for extracting font information from TTF streams and managing font names within a PDF.
8
8
  """
9
- # TODO: In `get_additional_font_params`, iterating through `reader.pages[0][Resources][Font].values()` can be inefficient for PDFs with many fonts. Consider building a font lookup dictionary once per PDF or caching results if this function is called frequently with the same PDF.
10
- # TODO: In `register_font_acroform`, `PdfReader(stream_to_io(pdf))` and `writer.append(reader)` involve re-parsing and appending the PDF. For large PDFs, passing `PdfReader` and `PdfWriter` objects directly could reduce overhead.
11
- # TODO: In `register_font_acroform`, `compress(ttf_stream)` can be CPU-intensive. If the same font stream is registered multiple times within a single PDF processing session, consider caching the compressed stream to avoid redundant compression.
12
- # TODO: In `get_new_font_name`, while `existing` is a set, if `n` needs to increment many times due to a dense range of existing font names, the `while` loop could be slow. However, this is likely a minor bottleneck in typical scenarios.
13
- # TODO: In `get_all_available_fonts`, the `replace("/", "")` operation on `BaseFont` could be avoided if font names are consistently handled with or without the leading slash to prevent string manipulation overhead in a loop.
14
9
 
15
10
  from functools import lru_cache
16
11
  from io import BytesIO
@@ -99,7 +94,7 @@ def get_additional_font_params(pdf: bytes, base_font_name: str) -> tuple:
99
94
  return font_descriptor_params, font_dict_params
100
95
 
101
96
 
102
- def compute_font_glyph_widths(ttf_file: BytesIO, missing_width: float):
97
+ def compute_font_glyph_widths(ttf_file: BytesIO, missing_width: float) -> list[float]:
103
98
  """
104
99
  Computes the advance widths for all glyphs in a TrueType font, scaled for PDF text space.
105
100
 
@@ -134,20 +129,23 @@ def compute_font_glyph_widths(ttf_file: BytesIO, missing_width: float):
134
129
  widths: list[float] = []
135
130
  if head_table and cmap_table and hmtx_table:
136
131
  cmap = cmap_table.getBestCmap()
137
- units_per_em: int = head_table.unitsPerEm or 1
138
-
139
- for codepoint in range(ENCODING_TABLE_SIZE):
140
- glyph_name: str = cmap.get(codepoint, FontNotdef)
141
- advance_width, _ = hmtx_table[glyph_name]
142
- pdf_width: float = (advance_width / units_per_em) * EM_TO_PDF_FACTOR
143
- widths.append(pdf_width)
132
+ if cmap:
133
+ units_per_em: int = head_table.unitsPerEm or 1
134
+
135
+ for codepoint in range(ENCODING_TABLE_SIZE):
136
+ glyph_name: str = cmap.get(codepoint, FontNotdef)
137
+ advance_width, _ = hmtx_table[glyph_name]
138
+ pdf_width: float = (advance_width / units_per_em) * EM_TO_PDF_FACTOR
139
+ widths.append(pdf_width)
144
140
  else:
145
141
  widths: list[float] = [missing_width] * ENCODING_TABLE_SIZE
146
142
 
147
143
  return widths
148
144
 
149
145
 
150
- def register_font_acroform(pdf: bytes, ttf_stream: bytes, adobe_mode: bool) -> tuple:
146
+ def register_font_acroform(
147
+ pdf: bytes, ttf_stream: bytes, need_appearances: bool
148
+ ) -> tuple:
151
149
  """
152
150
  Registers a TrueType font within the PDF's AcroForm dictionary.
153
151
 
@@ -160,7 +158,9 @@ def register_font_acroform(pdf: bytes, ttf_stream: bytes, adobe_mode: bool) -> t
160
158
  will be modified to include the new font.
161
159
  ttf_stream (bytes): The font file data in TTF format as bytes. This is the
162
160
  raw data of the TrueType font file.
163
- adobe_mode (bool): A flag indicating whether to use Adobe-specific font parameters.
161
+ need_appearances (bool): If True, attempts to retrieve existing font parameters
162
+ from the PDF's resources to ensure compatibility when appearance streams are
163
+ required.
164
164
 
165
165
  Returns:
166
166
  tuple: A tuple containing the modified PDF data as bytes and the new font name
@@ -173,7 +173,7 @@ def register_font_acroform(pdf: bytes, ttf_stream: bytes, adobe_mode: bool) -> t
173
173
 
174
174
  font_descriptor_params = {}
175
175
  font_dict_params = {}
176
- if adobe_mode:
176
+ if need_appearances:
177
177
  font_descriptor_params, font_dict_params = get_additional_font_params(
178
178
  pdf, base_font_name
179
179
  )