natural-pdf 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/api/index.md +386 -0
- docs/assets/favicon.png +3 -0
- docs/assets/favicon.svg +3 -0
- docs/assets/javascripts/custom.js +17 -0
- docs/assets/logo.svg +3 -0
- docs/assets/sample-screen.png +0 -0
- docs/assets/social-preview.png +17 -0
- docs/assets/social-preview.svg +17 -0
- docs/assets/stylesheets/custom.css +65 -0
- docs/document-qa/index.ipynb +435 -0
- docs/document-qa/index.md +79 -0
- docs/element-selection/index.ipynb +915 -0
- docs/element-selection/index.md +229 -0
- docs/index.md +170 -0
- docs/installation/index.md +69 -0
- docs/interactive-widget/index.ipynb +962 -0
- docs/interactive-widget/index.md +12 -0
- docs/layout-analysis/index.ipynb +818 -0
- docs/layout-analysis/index.md +185 -0
- docs/ocr/index.md +209 -0
- docs/pdf-navigation/index.ipynb +314 -0
- docs/pdf-navigation/index.md +97 -0
- docs/regions/index.ipynb +816 -0
- docs/regions/index.md +294 -0
- docs/tables/index.ipynb +658 -0
- docs/tables/index.md +144 -0
- docs/text-analysis/index.ipynb +370 -0
- docs/text-analysis/index.md +105 -0
- docs/text-extraction/index.ipynb +1478 -0
- docs/text-extraction/index.md +292 -0
- docs/tutorials/01-loading-and-extraction.ipynb +1710 -0
- docs/tutorials/01-loading-and-extraction.md +95 -0
- docs/tutorials/02-finding-elements.ipynb +340 -0
- docs/tutorials/02-finding-elements.md +149 -0
- docs/tutorials/03-extracting-blocks.ipynb +147 -0
- docs/tutorials/03-extracting-blocks.md +48 -0
- docs/tutorials/04-table-extraction.ipynb +114 -0
- docs/tutorials/04-table-extraction.md +50 -0
- docs/tutorials/05-excluding-content.ipynb +270 -0
- docs/tutorials/05-excluding-content.md +109 -0
- docs/tutorials/06-document-qa.ipynb +332 -0
- docs/tutorials/06-document-qa.md +91 -0
- docs/tutorials/07-layout-analysis.ipynb +288 -0
- docs/tutorials/07-layout-analysis.md +66 -0
- docs/tutorials/07-working-with-regions.ipynb +413 -0
- docs/tutorials/07-working-with-regions.md +151 -0
- docs/tutorials/08-spatial-navigation.ipynb +508 -0
- docs/tutorials/08-spatial-navigation.md +190 -0
- docs/tutorials/09-section-extraction.ipynb +2434 -0
- docs/tutorials/09-section-extraction.md +256 -0
- docs/tutorials/10-form-field-extraction.ipynb +512 -0
- docs/tutorials/10-form-field-extraction.md +201 -0
- docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
- docs/tutorials/11-enhanced-table-processing.md +9 -0
- docs/tutorials/12-ocr-integration.ipynb +604 -0
- docs/tutorials/12-ocr-integration.md +175 -0
- docs/tutorials/13-semantic-search.ipynb +1328 -0
- docs/tutorials/13-semantic-search.md +77 -0
- docs/visual-debugging/index.ipynb +2970 -0
- docs/visual-debugging/index.md +157 -0
- docs/visual-debugging/region.png +0 -0
- natural_pdf/__init__.py +50 -33
- natural_pdf/analyzers/__init__.py +2 -1
- natural_pdf/analyzers/layout/base.py +32 -24
- natural_pdf/analyzers/layout/docling.py +131 -72
- natural_pdf/analyzers/layout/gemini.py +264 -0
- natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
- natural_pdf/analyzers/layout/layout_manager.py +125 -58
- natural_pdf/analyzers/layout/layout_options.py +43 -17
- natural_pdf/analyzers/layout/paddle.py +152 -95
- natural_pdf/analyzers/layout/surya.py +164 -92
- natural_pdf/analyzers/layout/tatr.py +149 -84
- natural_pdf/analyzers/layout/yolo.py +89 -45
- natural_pdf/analyzers/text_options.py +22 -15
- natural_pdf/analyzers/text_structure.py +131 -85
- natural_pdf/analyzers/utils.py +30 -23
- natural_pdf/collections/pdf_collection.py +146 -97
- natural_pdf/core/__init__.py +1 -1
- natural_pdf/core/element_manager.py +419 -337
- natural_pdf/core/highlighting_service.py +268 -196
- natural_pdf/core/page.py +1044 -521
- natural_pdf/core/pdf.py +516 -313
- natural_pdf/elements/__init__.py +1 -1
- natural_pdf/elements/base.py +307 -225
- natural_pdf/elements/collections.py +805 -543
- natural_pdf/elements/line.py +39 -36
- natural_pdf/elements/rect.py +32 -30
- natural_pdf/elements/region.py +889 -879
- natural_pdf/elements/text.py +127 -99
- natural_pdf/exporters/__init__.py +0 -1
- natural_pdf/exporters/searchable_pdf.py +261 -102
- natural_pdf/ocr/__init__.py +57 -35
- natural_pdf/ocr/engine.py +150 -46
- natural_pdf/ocr/engine_easyocr.py +146 -150
- natural_pdf/ocr/engine_paddle.py +118 -175
- natural_pdf/ocr/engine_surya.py +78 -141
- natural_pdf/ocr/ocr_factory.py +114 -0
- natural_pdf/ocr/ocr_manager.py +122 -124
- natural_pdf/ocr/ocr_options.py +16 -20
- natural_pdf/ocr/utils.py +98 -0
- natural_pdf/qa/__init__.py +1 -1
- natural_pdf/qa/document_qa.py +119 -111
- natural_pdf/search/__init__.py +37 -31
- natural_pdf/search/haystack_search_service.py +312 -189
- natural_pdf/search/haystack_utils.py +186 -122
- natural_pdf/search/search_options.py +25 -14
- natural_pdf/search/search_service_protocol.py +12 -6
- natural_pdf/search/searchable_mixin.py +261 -176
- natural_pdf/selectors/__init__.py +2 -1
- natural_pdf/selectors/parser.py +159 -316
- natural_pdf/templates/__init__.py +1 -1
- natural_pdf/templates/spa/css/style.css +334 -0
- natural_pdf/templates/spa/index.html +31 -0
- natural_pdf/templates/spa/js/app.js +472 -0
- natural_pdf/templates/spa/words.txt +235976 -0
- natural_pdf/utils/debug.py +32 -0
- natural_pdf/utils/highlighting.py +8 -2
- natural_pdf/utils/identifiers.py +29 -0
- natural_pdf/utils/packaging.py +418 -0
- natural_pdf/utils/reading_order.py +65 -63
- natural_pdf/utils/text_extraction.py +195 -0
- natural_pdf/utils/visualization.py +70 -61
- natural_pdf/widgets/__init__.py +2 -3
- natural_pdf/widgets/viewer.py +749 -718
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/METADATA +53 -17
- natural_pdf-0.1.6.dist-info/RECORD +141 -0
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/WHEEL +1 -1
- natural_pdf-0.1.6.dist-info/top_level.txt +4 -0
- notebooks/Examples.ipynb +1293 -0
- pdfs/.gitkeep +0 -0
- pdfs/01-practice.pdf +543 -0
- pdfs/0500000US42001.pdf +0 -0
- pdfs/0500000US42007.pdf +0 -0
- pdfs/2014 Statistics.pdf +0 -0
- pdfs/2019 Statistics.pdf +0 -0
- pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- pdfs/needs-ocr.pdf +0 -0
- natural_pdf/templates/ocr_debug.html +0 -517
- natural_pdf-0.1.4.dist-info/RECORD +0 -61
- natural_pdf-0.1.4.dist-info/top_level.txt +0 -1
- {natural_pdf-0.1.4.dist-info → natural_pdf-0.1.6.dist-info}/licenses/LICENSE +0 -0
natural_pdf/elements/text.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
"""
|
2
2
|
Text element classes for natural-pdf.
|
3
3
|
"""
|
4
|
-
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional
|
5
6
|
|
6
7
|
from natural_pdf.elements.base import Element
|
7
8
|
|
@@ -12,117 +13,133 @@ if TYPE_CHECKING:
|
|
12
13
|
class TextElement(Element):
|
13
14
|
"""
|
14
15
|
Represents a text element in a PDF.
|
15
|
-
|
16
|
+
|
16
17
|
This class is a wrapper around pdfplumber's character objects,
|
17
18
|
providing additional functionality for text extraction and analysis.
|
18
19
|
"""
|
19
|
-
|
20
|
-
def __init__(self, obj: Dict[str, Any], page:
|
20
|
+
|
21
|
+
def __init__(self, obj: Dict[str, Any], page: "Page"):
|
21
22
|
"""
|
22
23
|
Initialize a text element.
|
23
|
-
|
24
|
+
|
24
25
|
Args:
|
25
26
|
obj: The underlying pdfplumber object. For OCR text elements,
|
26
27
|
should include 'text', 'bbox', 'source', and 'confidence'
|
27
28
|
page: The parent Page object
|
28
29
|
"""
|
29
30
|
# Add object_type if not present
|
30
|
-
if
|
31
|
-
obj[
|
32
|
-
|
31
|
+
if "object_type" not in obj:
|
32
|
+
obj["object_type"] = "text"
|
33
|
+
|
33
34
|
super().__init__(obj, page)
|
34
|
-
|
35
|
+
# Explicitly store constituent characters if provided
|
36
|
+
# (Pop from obj to avoid storing it twice if super() stores _obj by ref)
|
37
|
+
self._char_dicts = obj.pop("_char_dicts", [])
|
38
|
+
|
35
39
|
@property
|
36
40
|
def text(self) -> str:
|
37
41
|
"""Get the text content."""
|
38
|
-
return self._obj.get(
|
39
|
-
|
42
|
+
return self._obj.get("text", "")
|
43
|
+
|
44
|
+
@text.setter
|
45
|
+
def text(self, value: str):
|
46
|
+
"""Set the text content."""
|
47
|
+
self._obj["text"] = value
|
48
|
+
|
40
49
|
@property
|
41
50
|
def source(self) -> str:
|
42
51
|
"""Get the source of this text element (pdf or ocr)."""
|
43
|
-
return self._obj.get(
|
44
|
-
|
52
|
+
return self._obj.get("source", "pdf")
|
53
|
+
|
45
54
|
@property
|
46
55
|
def confidence(self) -> float:
|
47
56
|
"""Get the confidence score for OCR text elements."""
|
48
|
-
return self._obj.get(
|
49
|
-
|
57
|
+
return self._obj.get("confidence", 1.0)
|
58
|
+
|
50
59
|
@property
|
51
60
|
def fontname(self) -> str:
|
52
61
|
"""Get the font name."""
|
53
62
|
# First check if we have a real fontname from PDF resources
|
54
|
-
if
|
55
|
-
return self._obj[
|
63
|
+
if "real_fontname" in self._obj:
|
64
|
+
return self._obj["real_fontname"]
|
56
65
|
# Otherwise use standard fontname
|
57
|
-
return self._obj.get(
|
58
|
-
|
66
|
+
return self._obj.get("fontname", "") or self._obj.get("font", "")
|
67
|
+
|
59
68
|
@property
|
60
69
|
def font_family(self) -> str:
|
61
70
|
"""
|
62
71
|
Get a cleaner font family name by stripping PDF-specific prefixes.
|
63
|
-
|
72
|
+
|
64
73
|
PDF font names often include prefixes like 'ABCDEF+' followed by the font name
|
65
74
|
or unique identifiers. This method attempts to extract a more readable font name.
|
66
75
|
"""
|
67
76
|
font = self.fontname
|
68
|
-
|
77
|
+
|
69
78
|
# Remove common PDF font prefixes (e.g., 'ABCDEF+')
|
70
|
-
if
|
71
|
-
font = font.split(
|
72
|
-
|
79
|
+
if "+" in font:
|
80
|
+
font = font.split("+", 1)[1]
|
81
|
+
|
73
82
|
# Try to extract common font family names
|
74
83
|
common_fonts = [
|
75
|
-
|
76
|
-
|
84
|
+
"Arial",
|
85
|
+
"Helvetica",
|
86
|
+
"Times",
|
87
|
+
"Courier",
|
88
|
+
"Calibri",
|
89
|
+
"Cambria",
|
90
|
+
"Georgia",
|
91
|
+
"Verdana",
|
92
|
+
"Tahoma",
|
93
|
+
"Trebuchet",
|
77
94
|
]
|
78
|
-
|
95
|
+
|
79
96
|
for common in common_fonts:
|
80
97
|
if common.lower() in font.lower():
|
81
98
|
return common
|
82
|
-
|
99
|
+
|
83
100
|
return font
|
84
|
-
|
101
|
+
|
85
102
|
@property
|
86
103
|
def font_variant(self) -> str:
|
87
104
|
"""
|
88
105
|
Get the font variant identifier (prefix before the '+' in PDF font names).
|
89
|
-
|
106
|
+
|
90
107
|
PDF embeds font subsets with unique identifiers like 'AAAAAB+FontName'.
|
91
108
|
Different variants of the same base font will have different prefixes.
|
92
|
-
This can be used to differentiate text that looks different despite
|
109
|
+
This can be used to differentiate text that looks different despite
|
93
110
|
having the same font name and size.
|
94
|
-
|
111
|
+
|
95
112
|
Returns:
|
96
113
|
The font variant prefix, or empty string if no variant is present
|
97
114
|
"""
|
98
115
|
font = self.fontname
|
99
|
-
|
116
|
+
|
100
117
|
# Extract the prefix before '+' if it exists
|
101
|
-
if
|
102
|
-
return font.split(
|
103
|
-
|
118
|
+
if "+" in font:
|
119
|
+
return font.split("+", 1)[0]
|
120
|
+
|
104
121
|
return ""
|
105
|
-
|
122
|
+
|
106
123
|
@property
|
107
124
|
def size(self) -> float:
|
108
125
|
"""Get the font size."""
|
109
|
-
return self._obj.get(
|
110
|
-
|
126
|
+
return self._obj.get("size", 0)
|
127
|
+
|
111
128
|
@property
|
112
129
|
def color(self) -> tuple:
|
113
130
|
"""Get the text color (RGB tuple)."""
|
114
131
|
# PDFs often use non-RGB values, so we handle different formats
|
115
132
|
# In pdfplumber, colors can be in various formats depending on the PDF
|
116
|
-
color = self._obj.get(
|
117
|
-
|
133
|
+
color = self._obj.get("non_stroking_color", (0, 0, 0))
|
134
|
+
|
118
135
|
# If it's a single value, treat as grayscale
|
119
136
|
if isinstance(color, (int, float)):
|
120
137
|
return (color, color, color)
|
121
|
-
|
138
|
+
|
122
139
|
# If it's a tuple of 3 values, treat as RGB
|
123
140
|
if isinstance(color, tuple) and len(color) == 3:
|
124
141
|
return color
|
125
|
-
|
142
|
+
|
126
143
|
# If it's a tuple of 4 values, treat as CMYK and convert to approximate RGB
|
127
144
|
if isinstance(color, tuple) and len(color) == 4:
|
128
145
|
c, m, y, k = color
|
@@ -130,33 +147,33 @@ class TextElement(Element):
|
|
130
147
|
g = 1 - min(1, m + k)
|
131
148
|
b = 1 - min(1, y + k)
|
132
149
|
return (r, g, b)
|
133
|
-
|
150
|
+
|
134
151
|
# Default to black
|
135
152
|
return (0, 0, 0)
|
136
|
-
|
153
|
+
|
137
154
|
def extract_text(self, keep_blank_chars=True, **kwargs) -> str:
|
138
155
|
"""
|
139
156
|
Extract text from this element.
|
140
|
-
|
157
|
+
|
141
158
|
Args:
|
142
159
|
keep_blank_chars: Whether to keep blank characters (default: True)
|
143
160
|
**kwargs: Additional extraction parameters
|
144
|
-
|
161
|
+
|
145
162
|
Returns:
|
146
163
|
Text content
|
147
164
|
"""
|
148
165
|
# For text elements, keep_blank_chars doesn't affect anything as we're
|
149
166
|
# simply returning the text property. Included for API consistency.
|
150
167
|
return self.text
|
151
|
-
|
168
|
+
|
152
169
|
def contains(self, substring: str, case_sensitive: bool = True) -> bool:
|
153
170
|
"""
|
154
171
|
Check if this text element contains a substring.
|
155
|
-
|
172
|
+
|
156
173
|
Args:
|
157
174
|
substring: The substring to check for
|
158
175
|
case_sensitive: Whether the check is case-sensitive
|
159
|
-
|
176
|
+
|
160
177
|
Returns:
|
161
178
|
True if the text contains the substring
|
162
179
|
"""
|
@@ -164,25 +181,26 @@ class TextElement(Element):
|
|
164
181
|
return substring in self.text
|
165
182
|
else:
|
166
183
|
return substring.lower() in self.text.lower()
|
167
|
-
|
184
|
+
|
168
185
|
def matches(self, pattern: str) -> bool:
|
169
186
|
"""
|
170
187
|
Check if this text element matches a regular expression pattern.
|
171
|
-
|
188
|
+
|
172
189
|
Args:
|
173
190
|
pattern: Regular expression pattern
|
174
|
-
|
191
|
+
|
175
192
|
Returns:
|
176
193
|
True if the text matches the pattern
|
177
194
|
"""
|
178
195
|
import re
|
196
|
+
|
179
197
|
return bool(re.search(pattern, self.text))
|
180
|
-
|
198
|
+
|
181
199
|
@property
|
182
200
|
def bold(self) -> bool:
|
183
201
|
"""
|
184
202
|
Check if the text is bold based on multiple indicators in the PDF.
|
185
|
-
|
203
|
+
|
186
204
|
PDFs encode boldness in several ways:
|
187
205
|
1. Font name containing 'bold' or 'black'
|
188
206
|
2. Font descriptor flags (bit 2 indicates bold)
|
@@ -192,43 +210,43 @@ class TextElement(Element):
|
|
192
210
|
"""
|
193
211
|
# Check font name (original method)
|
194
212
|
fontname = self.fontname.lower()
|
195
|
-
if
|
213
|
+
if "bold" in fontname or "black" in fontname or self.fontname.endswith("-B"):
|
196
214
|
return True
|
197
|
-
|
215
|
+
|
198
216
|
# Check font descriptor flags if available (bit 2 = bold)
|
199
|
-
flags = self._obj.get(
|
217
|
+
flags = self._obj.get("flags")
|
200
218
|
if flags is not None and (flags & 4) != 0: # Check if bit 2 is set
|
201
219
|
return True
|
202
|
-
|
220
|
+
|
203
221
|
# Check StemV (vertical stem width) if available
|
204
222
|
# Higher StemV values indicate bolder fonts
|
205
|
-
stemv = self._obj.get(
|
223
|
+
stemv = self._obj.get("stemv") or self._obj.get("StemV")
|
206
224
|
if stemv is not None and isinstance(stemv, (int, float)) and stemv > 120:
|
207
225
|
return True
|
208
|
-
|
226
|
+
|
209
227
|
# Check font weight if available (700+ is typically bold)
|
210
|
-
weight = self._obj.get(
|
228
|
+
weight = self._obj.get("weight") or self._obj.get("FontWeight")
|
211
229
|
if weight is not None and isinstance(weight, (int, float)) and weight >= 700:
|
212
230
|
return True
|
213
|
-
|
231
|
+
|
214
232
|
# Check text rendering mode (mode 2 = fill and stroke, can make text appear bold)
|
215
|
-
render_mode = self._obj.get(
|
233
|
+
render_mode = self._obj.get("render_mode")
|
216
234
|
if render_mode is not None and render_mode == 2:
|
217
235
|
return True
|
218
|
-
|
236
|
+
|
219
237
|
# Additional check: if we have text with the same font but different paths/strokes
|
220
238
|
# Path widths or stroke widths can indicate boldness
|
221
|
-
stroke_width = self._obj.get(
|
239
|
+
stroke_width = self._obj.get("stroke_width") or self._obj.get("lineWidth")
|
222
240
|
if stroke_width is not None and isinstance(stroke_width, (int, float)) and stroke_width > 0:
|
223
241
|
return True
|
224
|
-
|
242
|
+
|
225
243
|
return False
|
226
|
-
|
244
|
+
|
227
245
|
@property
|
228
246
|
def italic(self) -> bool:
|
229
247
|
"""
|
230
248
|
Check if the text is italic based on multiple indicators in the PDF.
|
231
|
-
|
249
|
+
|
232
250
|
PDFs encode italic (oblique) text in several ways:
|
233
251
|
1. Font name containing 'italic' or 'oblique'
|
234
252
|
2. Font descriptor flags (bit 6 indicates italic)
|
@@ -236,69 +254,79 @@ class TextElement(Element):
|
|
236
254
|
"""
|
237
255
|
# Check font name (original method)
|
238
256
|
fontname = self.fontname.lower()
|
239
|
-
if
|
257
|
+
if "italic" in fontname or "oblique" in fontname or self.fontname.endswith("-I"):
|
240
258
|
return True
|
241
|
-
|
259
|
+
|
242
260
|
# Check font descriptor flags if available (bit 6 = italic)
|
243
|
-
flags = self._obj.get(
|
261
|
+
flags = self._obj.get("flags")
|
244
262
|
if flags is not None and (flags & 64) != 0: # Check if bit 6 is set
|
245
263
|
return True
|
246
|
-
|
264
|
+
|
247
265
|
# Check italic angle if available
|
248
266
|
# Non-zero italic angle indicates italic font
|
249
|
-
italic_angle = self._obj.get(
|
250
|
-
if
|
267
|
+
italic_angle = self._obj.get("italic_angle") or self._obj.get("ItalicAngle")
|
268
|
+
if (
|
269
|
+
italic_angle is not None
|
270
|
+
and isinstance(italic_angle, (int, float))
|
271
|
+
and italic_angle != 0
|
272
|
+
):
|
251
273
|
return True
|
252
|
-
|
274
|
+
|
253
275
|
return False
|
254
|
-
|
276
|
+
|
255
277
|
def __repr__(self) -> str:
|
256
278
|
"""String representation of the text element."""
|
257
|
-
preview = self.text[:10] +
|
279
|
+
preview = self.text[:10] + "..." if len(self.text) > 10 else self.text
|
258
280
|
font_style = []
|
259
281
|
if self.bold:
|
260
282
|
font_style.append("bold")
|
261
283
|
if self.italic:
|
262
284
|
font_style.append("italic")
|
263
285
|
style_str = f", style={font_style}" if font_style else ""
|
264
|
-
|
286
|
+
|
265
287
|
# Use font_family for display but include raw fontname and variant
|
266
288
|
font_display = self.font_family
|
267
289
|
variant = self.font_variant
|
268
290
|
variant_str = f", variant='{variant}'" if variant else ""
|
269
|
-
|
270
|
-
if font_display != self.fontname and
|
271
|
-
base_font = self.fontname.split(
|
291
|
+
|
292
|
+
if font_display != self.fontname and "+" in self.fontname:
|
293
|
+
base_font = self.fontname.split("+", 1)[1]
|
272
294
|
font_display = f"{font_display} ({base_font})"
|
273
|
-
|
295
|
+
|
274
296
|
return f"<TextElement text='{preview}' font='{font_display}'{variant_str} size={self.size}{style_str} bbox={self.bbox}>"
|
275
|
-
|
297
|
+
|
276
298
|
def font_info(self) -> dict:
|
277
299
|
"""
|
278
300
|
Get detailed font information for this text element.
|
279
|
-
|
301
|
+
|
280
302
|
Returns a dictionary with all available font-related properties,
|
281
303
|
useful for debugging font detection issues.
|
282
304
|
"""
|
283
305
|
info = {
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
306
|
+
"text": self.text,
|
307
|
+
"fontname": self.fontname,
|
308
|
+
"font_family": self.font_family,
|
309
|
+
"font_variant": self.font_variant,
|
310
|
+
"size": self.size,
|
311
|
+
"bold": self.bold,
|
312
|
+
"italic": self.italic,
|
313
|
+
"color": self.color,
|
292
314
|
}
|
293
|
-
|
315
|
+
|
294
316
|
# Include raw font properties from the PDF
|
295
317
|
font_props = [
|
296
|
-
|
297
|
-
|
318
|
+
"flags",
|
319
|
+
"stemv",
|
320
|
+
"StemV",
|
321
|
+
"weight",
|
322
|
+
"FontWeight",
|
323
|
+
"render_mode",
|
324
|
+
"stroke_width",
|
325
|
+
"lineWidth",
|
298
326
|
]
|
299
|
-
|
327
|
+
|
300
328
|
for prop in font_props:
|
301
329
|
if prop in self._obj:
|
302
330
|
info[f"raw_{prop}"] = self._obj[prop]
|
303
|
-
|
304
|
-
return info
|
331
|
+
|
332
|
+
return info
|
@@ -1 +0,0 @@
|
|
1
|
-
|