natural-pdf 0.1.38__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +11 -6
- natural_pdf/analyzers/__init__.py +6 -1
- natural_pdf/analyzers/guides.py +354 -258
- natural_pdf/analyzers/layout/layout_analyzer.py +2 -3
- natural_pdf/analyzers/layout/layout_manager.py +18 -4
- natural_pdf/analyzers/layout/paddle.py +11 -0
- natural_pdf/analyzers/layout/surya.py +2 -3
- natural_pdf/analyzers/shape_detection_mixin.py +25 -34
- natural_pdf/analyzers/text_structure.py +2 -2
- natural_pdf/classification/manager.py +1 -1
- natural_pdf/collections/mixins.py +3 -2
- natural_pdf/core/highlighting_service.py +743 -32
- natural_pdf/core/page.py +252 -399
- natural_pdf/core/page_collection.py +1249 -0
- natural_pdf/core/pdf.py +231 -89
- natural_pdf/{collections → core}/pdf_collection.py +18 -11
- natural_pdf/core/render_spec.py +335 -0
- natural_pdf/describe/base.py +1 -1
- natural_pdf/elements/__init__.py +1 -0
- natural_pdf/elements/base.py +108 -83
- natural_pdf/elements/{collections.py → element_collection.py} +575 -1372
- natural_pdf/elements/line.py +0 -1
- natural_pdf/elements/rect.py +0 -1
- natural_pdf/elements/region.py +405 -280
- natural_pdf/elements/text.py +9 -7
- natural_pdf/exporters/base.py +2 -2
- natural_pdf/exporters/original_pdf.py +1 -1
- natural_pdf/exporters/paddleocr.py +2 -4
- natural_pdf/exporters/searchable_pdf.py +3 -2
- natural_pdf/extraction/mixin.py +1 -3
- natural_pdf/flows/collections.py +1 -69
- natural_pdf/flows/element.py +25 -0
- natural_pdf/flows/flow.py +1658 -19
- natural_pdf/flows/region.py +757 -263
- natural_pdf/ocr/ocr_options.py +0 -2
- natural_pdf/ocr/utils.py +2 -1
- natural_pdf/qa/document_qa.py +21 -5
- natural_pdf/search/search_service_protocol.py +1 -1
- natural_pdf/selectors/parser.py +35 -2
- natural_pdf/tables/result.py +35 -1
- natural_pdf/text_mixin.py +101 -0
- natural_pdf/utils/debug.py +2 -1
- natural_pdf/utils/highlighting.py +1 -0
- natural_pdf/utils/layout.py +2 -2
- natural_pdf/utils/packaging.py +4 -3
- natural_pdf/utils/text_extraction.py +15 -12
- natural_pdf/utils/visualization.py +385 -0
- {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/METADATA +7 -3
- {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/RECORD +55 -52
- optimization/memory_comparison.py +1 -1
- optimization/pdf_analyzer.py +2 -2
- {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/entry_points.txt +0 -0
- {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.38.dist-info → natural_pdf-0.2.0.dist-info}/top_level.txt +0 -0
@@ -34,6 +34,10 @@ _BASE_HIGHLIGHT_COLORS = [
|
|
34
34
|
# Default Alpha for highlight fills
|
35
35
|
DEFAULT_FILL_ALPHA = 100
|
36
36
|
|
37
|
+
# Add quantitative color mapping functionality
|
38
|
+
import matplotlib.cm as cm
|
39
|
+
import matplotlib.pyplot as plt
|
40
|
+
|
37
41
|
|
38
42
|
class ColorManager:
|
39
43
|
"""
|
@@ -176,6 +180,208 @@ def create_legend(
|
|
176
180
|
return legend
|
177
181
|
|
178
182
|
|
183
|
+
def create_colorbar(
|
184
|
+
values: List[float],
|
185
|
+
colormap: str = "viridis",
|
186
|
+
bins: Optional[Union[int, List[float]]] = None,
|
187
|
+
width: int = 80,
|
188
|
+
height: int = 20,
|
189
|
+
orientation: str = "horizontal",
|
190
|
+
) -> Image.Image:
|
191
|
+
"""
|
192
|
+
Create a color bar for quantitative data visualization.
|
193
|
+
|
194
|
+
Args:
|
195
|
+
values: List of numeric values to create color bar for
|
196
|
+
colormap: Name of the matplotlib colormap to use
|
197
|
+
bins: Optional binning specification (int for equal bins, list for custom bins)
|
198
|
+
width: Width of the color bar
|
199
|
+
height: Height of the color bar
|
200
|
+
orientation: 'horizontal' or 'vertical'
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
PIL Image with the color bar
|
204
|
+
"""
|
205
|
+
import numpy as np
|
206
|
+
|
207
|
+
# Get value range
|
208
|
+
vmin = min(values)
|
209
|
+
vmax = max(values)
|
210
|
+
|
211
|
+
if vmin == vmax:
|
212
|
+
# Handle edge case where all values are the same
|
213
|
+
vmax = vmin + 1
|
214
|
+
|
215
|
+
# Create the colorbar image
|
216
|
+
if orientation == "horizontal":
|
217
|
+
bar_width = width - 40 # Leave space for labels (reduced from 60)
|
218
|
+
bar_height = height
|
219
|
+
total_width = width
|
220
|
+
total_height = height + 40 # Extra space for labels
|
221
|
+
else:
|
222
|
+
bar_width = width
|
223
|
+
bar_height = max(height, 120) # Ensure minimum height for vertical colorbar
|
224
|
+
total_width = width + 80 # Extra space for labels (increased for larger text)
|
225
|
+
total_height = bar_height + 60 # Extra space for labels
|
226
|
+
|
227
|
+
# Create base image
|
228
|
+
img = Image.new("RGBA", (total_width, total_height), (255, 255, 255, 255))
|
229
|
+
draw = ImageDraw.Draw(img)
|
230
|
+
|
231
|
+
# Try to load a font
|
232
|
+
try:
|
233
|
+
font = ImageFont.truetype("DejaVuSans.ttf", 16)
|
234
|
+
except IOError:
|
235
|
+
try:
|
236
|
+
font = ImageFont.truetype("Arial.ttf", 16)
|
237
|
+
except IOError:
|
238
|
+
# Load default font but try to get a larger size
|
239
|
+
try:
|
240
|
+
font = ImageFont.load_default(size=16)
|
241
|
+
except:
|
242
|
+
font = ImageFont.load_default()
|
243
|
+
|
244
|
+
# Draw the color blocks (5 discrete blocks)
|
245
|
+
if orientation == "horizontal":
|
246
|
+
# Create 5 discrete color blocks
|
247
|
+
num_blocks = 5
|
248
|
+
block_width = bar_width // num_blocks
|
249
|
+
|
250
|
+
for i in range(num_blocks):
|
251
|
+
# Calculate value for this block (center of block)
|
252
|
+
block_start = i / num_blocks
|
253
|
+
block_center = (i + 0.5) / num_blocks
|
254
|
+
value = vmin + block_center * (vmax - vmin)
|
255
|
+
|
256
|
+
# Get color for this block
|
257
|
+
rgb = get_colormap_color(colormap, value, vmin, vmax)
|
258
|
+
color = (*rgb, 255)
|
259
|
+
|
260
|
+
# Calculate block position
|
261
|
+
x_start = 20 + i * block_width
|
262
|
+
x_end = 20 + (i + 1) * block_width
|
263
|
+
|
264
|
+
# Draw filled rectangle for this block
|
265
|
+
draw.rectangle(
|
266
|
+
[(x_start, 10), (x_end, 10 + bar_height)],
|
267
|
+
fill=color,
|
268
|
+
outline=(0, 0, 0, 255),
|
269
|
+
width=1,
|
270
|
+
)
|
271
|
+
|
272
|
+
# Add value labels
|
273
|
+
if bins is not None:
|
274
|
+
# Show bin boundaries
|
275
|
+
if isinstance(bins, int):
|
276
|
+
# Equal-width bins
|
277
|
+
step = (vmax - vmin) / bins
|
278
|
+
tick_values = [vmin + i * step for i in range(bins + 1)]
|
279
|
+
else:
|
280
|
+
# Custom bins
|
281
|
+
tick_values = bins
|
282
|
+
|
283
|
+
for tick_val in tick_values:
|
284
|
+
if vmin <= tick_val <= vmax:
|
285
|
+
x_pos = int(20 + (tick_val - vmin) / (vmax - vmin) * bar_width)
|
286
|
+
# Draw tick mark
|
287
|
+
draw.line(
|
288
|
+
[(x_pos, 10 + bar_height), (x_pos, 10 + bar_height + 5)],
|
289
|
+
fill=(0, 0, 0, 255),
|
290
|
+
width=1,
|
291
|
+
)
|
292
|
+
# Draw label
|
293
|
+
label_text = f"{tick_val:.2f}".rstrip("0").rstrip(".")
|
294
|
+
text_bbox = draw.textbbox((0, 0), label_text, font=font)
|
295
|
+
text_width = text_bbox[2] - text_bbox[0]
|
296
|
+
draw.text(
|
297
|
+
(x_pos - text_width // 2, 10 + bar_height + 8),
|
298
|
+
label_text,
|
299
|
+
fill=(0, 0, 0, 255),
|
300
|
+
font=font,
|
301
|
+
)
|
302
|
+
else:
|
303
|
+
# Show min and max values
|
304
|
+
# Min value
|
305
|
+
min_text = f"{vmin:.2f}".rstrip("0").rstrip(".")
|
306
|
+
draw.text((20, 10 + bar_height + 8), min_text, fill=(0, 0, 0, 255), font=font)
|
307
|
+
|
308
|
+
# Max value
|
309
|
+
max_text = f"{vmax:.2f}".rstrip("0").rstrip(".")
|
310
|
+
text_bbox = draw.textbbox((0, 0), max_text, font=font)
|
311
|
+
text_width = text_bbox[2] - text_bbox[0]
|
312
|
+
draw.text(
|
313
|
+
(20 + bar_width - text_width, 10 + bar_height + 8),
|
314
|
+
max_text,
|
315
|
+
fill=(0, 0, 0, 255),
|
316
|
+
font=font,
|
317
|
+
)
|
318
|
+
|
319
|
+
else: # vertical orientation
|
320
|
+
# Create 5 discrete color blocks
|
321
|
+
num_blocks = 5
|
322
|
+
block_height = bar_height // num_blocks
|
323
|
+
|
324
|
+
for i in range(num_blocks):
|
325
|
+
# Calculate value for this block (center of block, top = max, bottom = min)
|
326
|
+
block_center = (i + 0.5) / num_blocks
|
327
|
+
value = vmax - block_center * (vmax - vmin)
|
328
|
+
|
329
|
+
# Get color for this block
|
330
|
+
rgb = get_colormap_color(colormap, value, vmin, vmax)
|
331
|
+
color = (*rgb, 255)
|
332
|
+
|
333
|
+
# Calculate block position
|
334
|
+
y_start = 30 + i * block_height
|
335
|
+
y_end = 30 + (i + 1) * block_height
|
336
|
+
|
337
|
+
# Draw filled rectangle for this block
|
338
|
+
draw.rectangle(
|
339
|
+
[(10, y_start), (10 + bar_width, y_end)],
|
340
|
+
fill=color,
|
341
|
+
outline=(0, 0, 0, 255),
|
342
|
+
width=1,
|
343
|
+
)
|
344
|
+
|
345
|
+
# Add value labels
|
346
|
+
if bins is not None:
|
347
|
+
# Show bin boundaries
|
348
|
+
if isinstance(bins, int):
|
349
|
+
# Equal-width bins
|
350
|
+
step = (vmax - vmin) / bins
|
351
|
+
tick_values = [vmin + i * step for i in range(bins + 1)]
|
352
|
+
else:
|
353
|
+
# Custom bins
|
354
|
+
tick_values = bins
|
355
|
+
|
356
|
+
for tick_val in tick_values:
|
357
|
+
if vmin <= tick_val <= vmax:
|
358
|
+
y_pos = int(30 + (vmax - tick_val) / (vmax - vmin) * bar_height)
|
359
|
+
# Draw tick mark
|
360
|
+
draw.line(
|
361
|
+
[(10 + bar_width, y_pos), (10 + bar_width + 5, y_pos)],
|
362
|
+
fill=(0, 0, 0, 255),
|
363
|
+
width=1,
|
364
|
+
)
|
365
|
+
# Draw label
|
366
|
+
label_text = f"{tick_val:.2f}".rstrip("0").rstrip(".")
|
367
|
+
draw.text(
|
368
|
+
(10 + bar_width + 8, y_pos - 6), label_text, fill=(0, 0, 0, 255), font=font
|
369
|
+
)
|
370
|
+
else:
|
371
|
+
# Show min and max values
|
372
|
+
# Max value (top)
|
373
|
+
max_text = f"{vmax:.2f}".rstrip("0").rstrip(".")
|
374
|
+
draw.text((10 + bar_width + 8, 30 - 6), max_text, fill=(0, 0, 0, 255), font=font)
|
375
|
+
|
376
|
+
# Min value (bottom)
|
377
|
+
min_text = f"{vmin:.2f}".rstrip("0").rstrip(".")
|
378
|
+
draw.text(
|
379
|
+
(10 + bar_width + 8, 30 + bar_height - 6), min_text, fill=(0, 0, 0, 255), font=font
|
380
|
+
)
|
381
|
+
|
382
|
+
return img
|
383
|
+
|
384
|
+
|
179
385
|
def merge_images_with_legend(
|
180
386
|
image: Image.Image, legend: Image.Image, position: str = "right"
|
181
387
|
) -> Image.Image:
|
@@ -262,3 +468,182 @@ def render_plain_page(page, resolution):
|
|
262
468
|
doc.close()
|
263
469
|
|
264
470
|
return image
|
471
|
+
|
472
|
+
|
473
|
+
def detect_quantitative_data(values: List[Any]) -> bool:
|
474
|
+
"""
|
475
|
+
Detect if a list of values represents quantitative data suitable for gradient coloring.
|
476
|
+
|
477
|
+
Args:
|
478
|
+
values: List of attribute values from elements
|
479
|
+
|
480
|
+
Returns:
|
481
|
+
True if data appears to be quantitative, False otherwise
|
482
|
+
"""
|
483
|
+
# Filter out None values
|
484
|
+
numeric_values = []
|
485
|
+
for v in values:
|
486
|
+
if v is not None:
|
487
|
+
try:
|
488
|
+
# Try to convert to float
|
489
|
+
numeric_values.append(float(v))
|
490
|
+
except (ValueError, TypeError):
|
491
|
+
# Not numeric, likely categorical
|
492
|
+
pass
|
493
|
+
|
494
|
+
# If we have fewer than 2 numeric values, treat as categorical
|
495
|
+
if len(numeric_values) < 2:
|
496
|
+
return False
|
497
|
+
|
498
|
+
# If more than 80% of values are numeric and we have >8 unique values, treat as quantitative
|
499
|
+
numeric_ratio = len(numeric_values) / len(values)
|
500
|
+
unique_values = len(set(numeric_values))
|
501
|
+
|
502
|
+
return numeric_ratio > 0.8 and unique_values > 8
|
503
|
+
|
504
|
+
|
505
|
+
def get_colormap_color(
|
506
|
+
colormap_name: str, value: float, vmin: float, vmax: float
|
507
|
+
) -> Tuple[int, int, int]:
|
508
|
+
"""
|
509
|
+
Get a color from a matplotlib colormap based on a normalized value.
|
510
|
+
|
511
|
+
Args:
|
512
|
+
colormap_name: Name of the colormap ('viridis', 'plasma', etc.)
|
513
|
+
value: The value to map to a color
|
514
|
+
vmin: Minimum value in the data range
|
515
|
+
vmax: Maximum value in the data range
|
516
|
+
|
517
|
+
Returns:
|
518
|
+
RGB color tuple (0-255)
|
519
|
+
"""
|
520
|
+
# Try to get the colormap from matplotlib
|
521
|
+
try:
|
522
|
+
cmap = cm.get_cmap(colormap_name)
|
523
|
+
except (ValueError, KeyError):
|
524
|
+
# Fallback to viridis if colormap doesn't exist
|
525
|
+
cmap = cm.get_cmap("viridis")
|
526
|
+
|
527
|
+
# Normalize value to [0, 1]
|
528
|
+
if vmax == vmin:
|
529
|
+
t = 0.0
|
530
|
+
else:
|
531
|
+
t = (value - vmin) / (vmax - vmin)
|
532
|
+
|
533
|
+
# Clamp to [0, 1]
|
534
|
+
t = max(0.0, min(1.0, t))
|
535
|
+
|
536
|
+
# Get RGBA color from matplotlib (values are 0-1)
|
537
|
+
rgba = cmap(t)
|
538
|
+
|
539
|
+
# Convert to 0-255 RGB
|
540
|
+
r = int(rgba[0] * 255)
|
541
|
+
g = int(rgba[1] * 255)
|
542
|
+
b = int(rgba[2] * 255)
|
543
|
+
|
544
|
+
return (r, g, b)
|
545
|
+
|
546
|
+
|
547
|
+
def apply_bins_to_values(
|
548
|
+
values: List[float], bins: Union[int, List[float]]
|
549
|
+
) -> Tuple[List[str], List[float]]:
|
550
|
+
"""
|
551
|
+
Apply binning to quantitative values.
|
552
|
+
|
553
|
+
Args:
|
554
|
+
values: List of numeric values
|
555
|
+
bins: Either number of bins (int) or list of bin edges (List[float])
|
556
|
+
|
557
|
+
Returns:
|
558
|
+
Tuple of (bin_labels, bin_values) where bin_values are the centers of bins
|
559
|
+
"""
|
560
|
+
if isinstance(bins, int):
|
561
|
+
# Equal-width bins
|
562
|
+
min_val = min(values)
|
563
|
+
max_val = max(values)
|
564
|
+
bin_edges = [min_val + i * (max_val - min_val) / bins for i in range(bins + 1)]
|
565
|
+
else:
|
566
|
+
# Custom bin edges
|
567
|
+
bin_edges = sorted(bins)
|
568
|
+
|
569
|
+
# Create bin labels and centers
|
570
|
+
bin_labels = []
|
571
|
+
bin_centers = []
|
572
|
+
for i in range(len(bin_edges) - 1):
|
573
|
+
start = bin_edges[i]
|
574
|
+
end = bin_edges[i + 1]
|
575
|
+
bin_labels.append(f"{start:.2f}-{end:.2f}")
|
576
|
+
bin_centers.append((start + end) / 2)
|
577
|
+
|
578
|
+
return bin_labels, bin_centers
|
579
|
+
|
580
|
+
|
581
|
+
def create_quantitative_color_mapping(
|
582
|
+
values: List[Any], colormap: str = "viridis", bins: Optional[Union[int, List[float]]] = None
|
583
|
+
) -> Dict[Any, Tuple[int, int, int, int]]:
|
584
|
+
"""
|
585
|
+
Create a color mapping for quantitative data using matplotlib colormaps.
|
586
|
+
|
587
|
+
Args:
|
588
|
+
values: List of values to map to colors
|
589
|
+
colormap: Name of any matplotlib colormap (e.g., 'viridis', 'plasma', 'inferno',
|
590
|
+
'magma', 'coolwarm', 'RdBu', 'tab10', etc.). See matplotlib.cm for full list.
|
591
|
+
bins: Optional binning specification (int for equal-width bins, list for custom bins)
|
592
|
+
|
593
|
+
Returns:
|
594
|
+
Dictionary mapping values to RGBA colors
|
595
|
+
"""
|
596
|
+
# Convert to numeric values, filtering out None/non-numeric
|
597
|
+
numeric_values = []
|
598
|
+
value_to_numeric = {}
|
599
|
+
|
600
|
+
for v in values:
|
601
|
+
if v is not None:
|
602
|
+
try:
|
603
|
+
numeric_val = float(v)
|
604
|
+
numeric_values.append(numeric_val)
|
605
|
+
value_to_numeric[v] = numeric_val
|
606
|
+
except (ValueError, TypeError):
|
607
|
+
pass
|
608
|
+
|
609
|
+
if not numeric_values:
|
610
|
+
# Fallback to categorical if no numeric values
|
611
|
+
return {}
|
612
|
+
|
613
|
+
# Determine min/max for normalization
|
614
|
+
vmin = min(numeric_values)
|
615
|
+
vmax = max(numeric_values)
|
616
|
+
|
617
|
+
# Apply binning if specified
|
618
|
+
if bins is not None:
|
619
|
+
bin_labels, bin_centers = apply_bins_to_values(numeric_values, bins)
|
620
|
+
# Create mapping from original values to bin centers
|
621
|
+
result = {}
|
622
|
+
for orig_val, numeric_val in value_to_numeric.items():
|
623
|
+
# Find which bin this value belongs to
|
624
|
+
if isinstance(bins, int):
|
625
|
+
bin_width = (vmax - vmin) / bins
|
626
|
+
bin_idx = min(int((numeric_val - vmin) / bin_width), bins - 1)
|
627
|
+
else:
|
628
|
+
bin_idx = 0
|
629
|
+
for i, edge in enumerate(bins[1:], 1):
|
630
|
+
if numeric_val <= edge:
|
631
|
+
bin_idx = i - 1
|
632
|
+
break
|
633
|
+
else:
|
634
|
+
bin_idx = len(bins) - 2
|
635
|
+
|
636
|
+
# Get color for this bin center
|
637
|
+
bin_center = bin_centers[bin_idx]
|
638
|
+
rgb = get_colormap_color(colormap, bin_center, vmin, vmax)
|
639
|
+
result[orig_val] = (*rgb, DEFAULT_FILL_ALPHA)
|
640
|
+
|
641
|
+
return result
|
642
|
+
else:
|
643
|
+
# Continuous gradient mapping
|
644
|
+
result = {}
|
645
|
+
for orig_val, numeric_val in value_to_numeric.items():
|
646
|
+
rgb = get_colormap_color(colormap, numeric_val, vmin, vmax)
|
647
|
+
result[orig_val] = (*rgb, DEFAULT_FILL_ALPHA)
|
648
|
+
|
649
|
+
return result
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -26,6 +26,7 @@ Requires-Dist: jenkspy
|
|
26
26
|
Requires-Dist: scipy
|
27
27
|
Requires-Dist: ipywidgets>=7.0.0
|
28
28
|
Requires-Dist: python-bidi
|
29
|
+
Requires-Dist: matplotlib
|
29
30
|
Provides-Extra: test
|
30
31
|
Requires-Dist: pytest; extra == "test"
|
31
32
|
Requires-Dist: pytest-xdist; extra == "test"
|
@@ -43,7 +44,8 @@ Requires-Dist: uv; extra == "dev"
|
|
43
44
|
Requires-Dist: pipdeptree; extra == "dev"
|
44
45
|
Requires-Dist: nbformat; extra == "dev"
|
45
46
|
Requires-Dist: jupytext; extra == "dev"
|
46
|
-
Requires-Dist: nbclient; extra == "dev"
|
47
|
+
Requires-Dist: nbclient==0.10.2; extra == "dev"
|
48
|
+
Requires-Dist: jupyter_core==5.7.2; extra == "dev"
|
47
49
|
Requires-Dist: ipykernel; extra == "dev"
|
48
50
|
Requires-Dist: pre-commit; extra == "dev"
|
49
51
|
Requires-Dist: setuptools; extra == "dev"
|
@@ -52,7 +54,6 @@ Requires-Dist: natural-pdf[ocr-export]; extra == "all"
|
|
52
54
|
Requires-Dist: natural-pdf[deskew]; extra == "all"
|
53
55
|
Requires-Dist: natural-pdf[test]; extra == "all"
|
54
56
|
Requires-Dist: natural-pdf[search]; extra == "all"
|
55
|
-
Requires-Dist: natural-pdf[favorites]; extra == "all"
|
56
57
|
Requires-Dist: natural-pdf[export-extras]; extra == "all"
|
57
58
|
Requires-Dist: natural-pdf[ai]; extra == "all"
|
58
59
|
Provides-Extra: deskew
|
@@ -75,10 +76,13 @@ Requires-Dist: huggingface_hub>=0.29.3; extra == "ai"
|
|
75
76
|
Requires-Dist: timm; extra == "ai"
|
76
77
|
Requires-Dist: doclayout_yolo; extra == "ai"
|
77
78
|
Requires-Dist: easyocr; extra == "ai"
|
79
|
+
Requires-Dist: openai; extra == "ai"
|
78
80
|
Dynamic: license-file
|
79
81
|
|
80
82
|
# Natural PDF
|
81
83
|
|
84
|
+
[](https://github.com/jsoma/natural-pdf/actions/workflows/ci.yml)
|
85
|
+
|
82
86
|
A friendly library for working with PDFs, built on top of [pdfplumber](https://github.com/jsvine/pdfplumber).
|
83
87
|
|
84
88
|
Natural PDF lets you find and extract content from PDFs using simple code that makes sense.
|
@@ -1,66 +1,69 @@
|
|
1
|
-
natural_pdf/__init__.py,sha256=
|
1
|
+
natural_pdf/__init__.py,sha256=N4pR0LbuPEnUYFZqbdVqc_FGKldgwPQc1wjJhYKTBBM,3417
|
2
2
|
natural_pdf/cli.py,sha256=SkPwhhMM-GhLsj3O1n1Agxz4KOxcZ08sj8hVQSFJB5c,4064
|
3
|
-
natural_pdf/
|
4
|
-
natural_pdf/analyzers/
|
5
|
-
natural_pdf/analyzers/
|
3
|
+
natural_pdf/text_mixin.py,sha256=eFCiHj6Okcw3aum4955BepcI2NPRalkf9UFFVTc_H30,4012
|
4
|
+
natural_pdf/analyzers/__init__.py,sha256=3XGoNq3OgiVkZP7tOdeP5XVUl7fDgyztdA8DlOcMLXg,1138
|
5
|
+
natural_pdf/analyzers/guides.py,sha256=N8fetR3jrDXzeHtIlbxg8BEbthB_lS0L8yhzVXHqiGQ,143245
|
6
|
+
natural_pdf/analyzers/shape_detection_mixin.py,sha256=mgpyJ4jIulz9l9HCqThabJIsLSrXh9BB2AmLxUoHmw0,62584
|
6
7
|
natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
|
7
|
-
natural_pdf/analyzers/text_structure.py,sha256=
|
8
|
+
natural_pdf/analyzers/text_structure.py,sha256=3WWusi-BI0krUnJxB05DD6XmKj5qRNvQBqH7zOQGm1M,28451
|
8
9
|
natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
|
9
10
|
natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
|
10
11
|
natural_pdf/analyzers/layout/base.py,sha256=F5xPOJcI65N4nxwm0szvhtbDD6lVMqWDut8PSkTCobU,8349
|
11
12
|
natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
|
12
13
|
natural_pdf/analyzers/layout/gemini.py,sha256=ldECVCQ5HNQA3Omjg2NOsTrJXslyYb0vErDncmLIiuE,10510
|
13
|
-
natural_pdf/analyzers/layout/layout_analyzer.py,sha256=
|
14
|
-
natural_pdf/analyzers/layout/layout_manager.py,sha256=
|
14
|
+
natural_pdf/analyzers/layout/layout_analyzer.py,sha256=taYM6uE-Lvp35hwv6XvPzrUIjAs9DfroA9Ndjy9bWsg,15553
|
15
|
+
natural_pdf/analyzers/layout/layout_manager.py,sha256=j6joIgOkeooxv9Kr9ELomm9-5dWlPvNTMzYa_-pwUAQ,10825
|
15
16
|
natural_pdf/analyzers/layout/layout_options.py,sha256=2JENtBMHhP3hP0zpFI5-UP3-t1y49E7oLZnjd9d1eB0,7704
|
16
|
-
natural_pdf/analyzers/layout/paddle.py,sha256=
|
17
|
+
natural_pdf/analyzers/layout/paddle.py,sha256=WDXq-J2XXC0DW2LSe7sDtQstgPIGODnLjv8FCiWDXt0,23604
|
17
18
|
natural_pdf/analyzers/layout/pdfplumber_table_finder.py,sha256=Tk0Q7wv7nGYPo69lh6RoezjdepTnMl90SaNIrP29Pwc,5902
|
18
|
-
natural_pdf/analyzers/layout/surya.py,sha256=
|
19
|
+
natural_pdf/analyzers/layout/surya.py,sha256=lXgn5bGVv-YIDTQod5NYViuJjYa5tM_qlEUc3dzWe28,9786
|
19
20
|
natural_pdf/analyzers/layout/table_structure_utils.py,sha256=_sugFWvVpRK3EimOCrikTDAalGnSaWqiqFbtJw8t-lg,2770
|
20
21
|
natural_pdf/analyzers/layout/tatr.py,sha256=cVr0ZyhY2mNLAKZ4DGMm-b7XNJpILKh8x8ZpyDeUhLk,15032
|
21
22
|
natural_pdf/analyzers/layout/yolo.py,sha256=2Iz2-WsMy--ftkZQ8j5PGqp_1fTD7Mskl2kNnMUuwCU,8286
|
22
|
-
natural_pdf/classification/manager.py,sha256=
|
23
|
+
natural_pdf/classification/manager.py,sha256=BaqBL9GeMvYgoJsiQeI2J8aUKQ5Qxu_ELRvmCWquld8,22172
|
23
24
|
natural_pdf/classification/mixin.py,sha256=CXygXXhe_qx1563SmIjiu4uSnZkxCkuRR4fGvLokS2w,9416
|
24
25
|
natural_pdf/classification/results.py,sha256=5ha77CxK0GYwkBMJbvUBZkBjsL5GpOveIZDK9nO4j8I,3239
|
25
|
-
natural_pdf/collections/mixins.py,sha256=
|
26
|
-
natural_pdf/collections/pdf_collection.py,sha256=sDVEbFMNME_2OaHIsCoR_W7V1cAATNw4ZRqKWa6nbqA,30131
|
26
|
+
natural_pdf/collections/mixins.py,sha256=u4KtnlUZZYQ74e0OXAniOv9RtuA6FhwBxsLMJLjdbpQ,5169
|
27
27
|
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
28
28
|
natural_pdf/core/element_manager.py,sha256=DRZvntd99wjXy6KeDjCq5uRhjMftZop9QklOZqlUH8M,55349
|
29
|
-
natural_pdf/core/highlighting_service.py,sha256=
|
30
|
-
natural_pdf/core/page.py,sha256=
|
31
|
-
natural_pdf/core/
|
29
|
+
natural_pdf/core/highlighting_service.py,sha256=k_SMCINeK4aUwfQLmaiyipCPL8vv33ibrCyqtlni8Bc,67921
|
30
|
+
natural_pdf/core/page.py,sha256=nQDUR4eKsUhPmEnofjmJRPITQ1RJoK3ITC0Lrtt4AHw,135510
|
31
|
+
natural_pdf/core/page_collection.py,sha256=9ff7IfO04bUkJCBZv__Z9G8A-NY7mR3OujVl54lH-FE,50985
|
32
|
+
natural_pdf/core/pdf.py,sha256=q54DyhXwAS_zAmsBd3PsCezu1wyQOYmGmB3iKfP8gAM,101884
|
33
|
+
natural_pdf/core/pdf_collection.py,sha256=8tM0qVWS1L5Hwv5cXuZ2X8znAYOjKmlERX62bksDlJU,30144
|
34
|
+
natural_pdf/core/render_spec.py,sha256=j77UrHA_g_e0RbAyn-4hkjFtqm_oaTe5KRd_Ii9izf4,12243
|
32
35
|
natural_pdf/describe/__init__.py,sha256=kIV7ORmWWB1SAur7nK2aAwR-wHqSedhKfUsaUl4hG0A,586
|
33
|
-
natural_pdf/describe/base.py,sha256=
|
36
|
+
natural_pdf/describe/base.py,sha256=Of9WVo9XuShXoeyJr0RN2CpLhF_CeiOjazl-or53RKU,18173
|
34
37
|
natural_pdf/describe/elements.py,sha256=JicXC9SJmmasqxalpCXA47-kVwv-6JnR3Xiu778aNHM,12634
|
35
38
|
natural_pdf/describe/mixin.py,sha256=rkX14aGrSz7Jvxx8Rbxv3eSfbO-_29DipwpstrV2pDQ,3109
|
36
39
|
natural_pdf/describe/summary.py,sha256=cfT4ZQkeatCDAOwWPwhtEVXisNgk6E57fAXAnoRysSU,7645
|
37
|
-
natural_pdf/elements/__init__.py,sha256=
|
38
|
-
natural_pdf/elements/base.py,sha256
|
39
|
-
natural_pdf/elements/
|
40
|
+
natural_pdf/elements/__init__.py,sha256=ICNikmLeIEuSYypz-KnkBn8xR1hR7rge4hsa1KLkyWY,42
|
41
|
+
natural_pdf/elements/base.py,sha256=jEBw5cq4mzgOYeEBrWPml2RBuVmOnwBNA4nTd7pLmMI,52292
|
42
|
+
natural_pdf/elements/element_collection.py,sha256=av2YKTxEB5lHYqw1A6aYoN-Uef2qzT9z6ibBAbJMPo4,101322
|
40
43
|
natural_pdf/elements/image.py,sha256=zu-P2Y8fRoEXf6IeZU0EYRWsgZ6I_a5vy1FA3VXTGkQ,1424
|
41
|
-
natural_pdf/elements/line.py,sha256=
|
42
|
-
natural_pdf/elements/rect.py,sha256=
|
43
|
-
natural_pdf/elements/region.py,sha256=
|
44
|
-
natural_pdf/elements/text.py,sha256=
|
44
|
+
natural_pdf/elements/line.py,sha256=TFn7KXjPT_jUQyQyabU0F7XYU4dC-qadwodJMZF4DCU,3844
|
45
|
+
natural_pdf/elements/rect.py,sha256=0lNkVkPkvbRbrFED856RXoUcTcDkeeOIs5xldKGAQT8,3324
|
46
|
+
natural_pdf/elements/region.py,sha256=PoT4e2s0gPkMa2Px0LjkThi-Jc8O0_ebl6U7UYADAQk,155289
|
47
|
+
natural_pdf/elements/text.py,sha256=IyyU3G4F3OzNZ4Oo0BTK_Wq0p0xFj5EYBWNVL4SZ-BQ,20492
|
45
48
|
natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
|
46
49
|
natural_pdf/exporters/__init__.py,sha256=QffoARekR6WzXEd05oxOytly4qPdBizuIF-SUkeFpig,643
|
47
|
-
natural_pdf/exporters/base.py,sha256=
|
50
|
+
natural_pdf/exporters/base.py,sha256=379sioW_hbkGb21sEVuJhbkkDO5MFsFtTUNO5TgG2YU,2101
|
48
51
|
natural_pdf/exporters/hocr.py,sha256=wksvJvWLSxuAfhYzg_0T2_W8eqDoMgAVC-gwZ9FoO_k,19969
|
49
52
|
natural_pdf/exporters/hocr_font.py,sha256=1wsGOMj6zoaRN2rxCwrv4MMLGawpNz984WgXpmWekgw,4574
|
50
|
-
natural_pdf/exporters/original_pdf.py,sha256=
|
51
|
-
natural_pdf/exporters/paddleocr.py,sha256=
|
52
|
-
natural_pdf/exporters/searchable_pdf.py,sha256=
|
53
|
+
natural_pdf/exporters/original_pdf.py,sha256=sJOSq3JmQa3mHK0Nn9w-seO5cY524tIkqV9iXHOMcCE,6782
|
54
|
+
natural_pdf/exporters/paddleocr.py,sha256=CyXDy6kIoRu5iquOkn1voVrHiYqCfocqPAIGkRemMJg,19395
|
55
|
+
natural_pdf/exporters/searchable_pdf.py,sha256=7RDNTV2jK5b5PhZz-v-kpYGTDCXu8FBgX-Mp9PZAoyg,16328
|
53
56
|
natural_pdf/exporters/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
54
57
|
natural_pdf/exporters/data/pdf.ttf,sha256=x4RUIJJaI9iO2DCmOVe4r4Wmao2vjZ_JDoQ2c7LvGlk,572
|
55
58
|
natural_pdf/exporters/data/sRGB.icc,sha256=KpLUuuRQt22LCqQhk9-XTXX2Jzjs6_dPAcXnWxKpV5Y,6922
|
56
59
|
natural_pdf/extraction/manager.py,sha256=sASPJZ5cWFsl8A4PyTjg2yqkyC00tRl6glfoFA6HcsM,4979
|
57
|
-
natural_pdf/extraction/mixin.py,sha256=
|
60
|
+
natural_pdf/extraction/mixin.py,sha256=ck2e48BYZg5RNderNE0QST6RSn2D6mIZYBw91nMSgp8,24970
|
58
61
|
natural_pdf/extraction/result.py,sha256=PDaCCN2LQBbHsZy0_lrQ0ROeMsnmH1WRoXWOjk9M2o4,1825
|
59
62
|
natural_pdf/flows/__init__.py,sha256=cUN4A8hTDLZSRr4PO2W_lR4z6hWpbNG8Seox-IIcrLU,277
|
60
|
-
natural_pdf/flows/collections.py,sha256=
|
61
|
-
natural_pdf/flows/element.py,sha256=
|
62
|
-
natural_pdf/flows/flow.py,sha256=
|
63
|
-
natural_pdf/flows/region.py,sha256=
|
63
|
+
natural_pdf/flows/collections.py,sha256=ErkHWdX6W_y1SjkcA_bGM0uUYRGPWWpRkHip6LHpej0,25740
|
64
|
+
natural_pdf/flows/element.py,sha256=T-9uXsIBe7mIim-mQQMep6Ja5dRfWaYIj8g1ak_Bv8c,24892
|
65
|
+
natural_pdf/flows/flow.py,sha256=If4G0feMsO3jZO5T2YXvGhb2UygMgwK3567JD42bSl0,85975
|
66
|
+
natural_pdf/flows/region.py,sha256=r_cFtBlmPi7ADN3k8oYA1s_vyz8GeQLCnYcv58Zt5eM,52263
|
64
67
|
natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
|
65
68
|
natural_pdf/ocr/engine.py,sha256=SwNlWydtHbrIghV5JD_j5B4-rnjCMYIWUIEARag-zHw,11839
|
66
69
|
natural_pdf/ocr/engine_doctr.py,sha256=ptKrupMWoulZb-R93zr9btoe94JPWU7vlJuN7OBJEIM,17740
|
@@ -69,38 +72,38 @@ natural_pdf/ocr/engine_paddle.py,sha256=OmZlXVh2SSgNePqb6sMo2Mg5boX7REA4MUY25O7h
|
|
69
72
|
natural_pdf/ocr/engine_surya.py,sha256=lOvSbZk53VKFVxRmqcQzM_0dHVdwTkRGiDZ9AWCgL1Q,5951
|
70
73
|
natural_pdf/ocr/ocr_factory.py,sha256=Ix-p1SrV6dchq6YcbbCTf2BPBHSGwu9KBnwnZ_ohOuw,5282
|
71
74
|
natural_pdf/ocr/ocr_manager.py,sha256=U8EVzNgeRQxxAbMpCEZhkF7nr_R8Fcvtp28oeV_D-Ms,16229
|
72
|
-
natural_pdf/ocr/ocr_options.py,sha256=
|
73
|
-
natural_pdf/ocr/utils.py,sha256=
|
75
|
+
natural_pdf/ocr/ocr_options.py,sha256=dX0b8pb6rllKWpweZuIqWBoN16mmibycrUhUTsOcCw0,5144
|
76
|
+
natural_pdf/ocr/utils.py,sha256=Nks2t8WQ2n4zSRc8ugbvnqNQEYfahVwCbFKkjUI2l2Y,4427
|
74
77
|
natural_pdf/qa/__init__.py,sha256=2u2KJcA71g1I0HnLD-j6yvDw1moAjo9kkLhhfoYRURM,166
|
75
|
-
natural_pdf/qa/document_qa.py,sha256=
|
78
|
+
natural_pdf/qa/document_qa.py,sha256=z3ACNdjFA5Il-I2QkxIhZRny030TBKrmcRbUcyVU4IA,20606
|
76
79
|
natural_pdf/qa/qa_result.py,sha256=8_jL5MJAHR4LcjGVe5lVsFizxWieF6VI86DWaqetYxs,2167
|
77
80
|
natural_pdf/search/__init__.py,sha256=araouqM-l_m0VlluKf6i9BybAsHnfCuh39M0-xEI3jA,4273
|
78
81
|
natural_pdf/search/lancedb_search_service.py,sha256=dfz5IiMIcAc3KFzkBDF6Ab_JDLpLHqW6DO1JDkPPu1k,14458
|
79
82
|
natural_pdf/search/numpy_search_service.py,sha256=GwPwnX_wxBPFHe-bKS5upMRZLHj8PjLQ2d84lZygzHg,10331
|
80
83
|
natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzPkK0a8QA,3566
|
81
|
-
natural_pdf/search/search_service_protocol.py,sha256=
|
84
|
+
natural_pdf/search/search_service_protocol.py,sha256=u8pbuWP96fnQEe6mnreY9DrdiDAHP6ZCY7phvSbFlP8,6697
|
82
85
|
natural_pdf/search/searchable_mixin.py,sha256=hqQ_AuID5eTGRCtKYdFLZ1zF35y73uk3x1M1VW9Il8U,23514
|
83
86
|
natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
|
84
|
-
natural_pdf/selectors/parser.py,sha256=
|
87
|
+
natural_pdf/selectors/parser.py,sha256=uWo0K4uWJFbD4kTXz9fOcPwEjs7cGR9Mfpm1jm7qKUM,38824
|
85
88
|
natural_pdf/tables/__init__.py,sha256=sCvCGbGsL6BiqlNxAYfVv003bIDLI11FmjHhaWfcU6w,104
|
86
|
-
natural_pdf/tables/result.py,sha256=
|
89
|
+
natural_pdf/tables/result.py,sha256=lfhLs5OxZ2IRLNndb8zjOQBk1SPjHx4KePzI7GlRkMg,5478
|
87
90
|
natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
|
88
91
|
natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
|
89
92
|
natural_pdf/utils/bidi_mirror.py,sha256=jJEES0xDrMfo5Me8kHMxHv4COS51PitnYi2EvKv3HCE,1151
|
90
|
-
natural_pdf/utils/debug.py,sha256=
|
91
|
-
natural_pdf/utils/highlighting.py,sha256=
|
93
|
+
natural_pdf/utils/debug.py,sha256=Epwie_jmRgknUSaEoxEyvr1lBXpfYTFOe2UQh_zSj_0,1026
|
94
|
+
natural_pdf/utils/highlighting.py,sha256=c9SvvPaJDI9bWXzq1A7zdh_0s3C4GCMngrJdkL2AMeM,719
|
92
95
|
natural_pdf/utils/identifiers.py,sha256=P7n6owcubnF8oAMa_UfYtENmIaJQdH_AMC9Jbs2bWXo,1117
|
93
|
-
natural_pdf/utils/layout.py,sha256=
|
96
|
+
natural_pdf/utils/layout.py,sha256=tJRRzwUVP0EeqqbGzr9yOuE5qFvhjZ9A44BuItmKGaU,753
|
94
97
|
natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,215
|
95
|
-
natural_pdf/utils/packaging.py,sha256=
|
98
|
+
natural_pdf/utils/packaging.py,sha256=TM0jafwS5yVbTGC-RMi4TyWunf9cUUo9h5J6rMzkT-o,22444
|
96
99
|
natural_pdf/utils/reading_order.py,sha256=u7XyVZdKMPMK0CL1C7xFogKnZ92b0JKT068KFjQWe18,7437
|
97
|
-
natural_pdf/utils/text_extraction.py,sha256=
|
98
|
-
natural_pdf/utils/visualization.py,sha256=
|
100
|
+
natural_pdf/utils/text_extraction.py,sha256=CCwPTmMoTgtQt2P00X_ADIf6ZGNfxvjCO9FO0_HqG40,13900
|
101
|
+
natural_pdf/utils/visualization.py,sha256=zhZEHgYnZFuX7YxTHXF8Y3D97uHp2beTKMaC-JkCFwk,22364
|
99
102
|
natural_pdf/widgets/__init__.py,sha256=QTVaUmsw__FCweFYZebwPssQxxUFUMd0wpm_cUbGZJY,181
|
100
103
|
natural_pdf/widgets/viewer.py,sha256=KW3JogdR2TMg2ECUMYp8hwd060hfg8EsYBWxb5IEzBY,24942
|
101
|
-
natural_pdf-0.
|
102
|
-
optimization/memory_comparison.py,sha256=
|
103
|
-
optimization/pdf_analyzer.py,sha256=
|
104
|
+
natural_pdf-0.2.0.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
105
|
+
optimization/memory_comparison.py,sha256=0i_foFSRmppj-fY069qjwH36s_zkx-1L2ASAAlepWzA,6541
|
106
|
+
optimization/pdf_analyzer.py,sha256=HjrmTgu2qchxPeDckc5kjgxppGwd40UESrYS9Myj7pY,19352
|
104
107
|
optimization/performance_analysis.py,sha256=RjAqeE3YS1r_7qTWkY6Ng5YMbb6MXJXfXX6LoVjg_xQ,13035
|
105
108
|
optimization/test_cleanup_methods.py,sha256=PmLOL4MRgvV0j_DW9W1TS8MsGGgu57QCuq6_5y7zK3s,6209
|
106
109
|
optimization/test_memory_fix.py,sha256=A3knK74fNhvHknDbLhbTmA276x1ifl-3ivJ_7BhVSTI,6170
|
@@ -115,8 +118,8 @@ tools/bad_pdf_eval/llm_enrich.py,sha256=mCh4KGi1HmIkzGjj5rrHz1Osd7sEX1IZ_FW08H1t
|
|
115
118
|
tools/bad_pdf_eval/llm_enrich_with_retry.py,sha256=XUtPF1hUvqd3frDXT0wDTXoonuAivhjM5vgFdZ-tm0A,9373
|
116
119
|
tools/bad_pdf_eval/reporter.py,sha256=e1g__mkSB4q02p3mGWOwMhvFs7F2HJosNBxup0-LkyU,400
|
117
120
|
tools/bad_pdf_eval/utils.py,sha256=hR95XQ7qf7Cu6BdyX0L7ggGVx-ah5sK0jHWblTJUUic,4896
|
118
|
-
natural_pdf-0.
|
119
|
-
natural_pdf-0.
|
120
|
-
natural_pdf-0.
|
121
|
-
natural_pdf-0.
|
122
|
-
natural_pdf-0.
|
121
|
+
natural_pdf-0.2.0.dist-info/METADATA,sha256=XQDU_jfYdUorv20eFGGyVLl48REcto9zLXWzG-gLtxw,6951
|
122
|
+
natural_pdf-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
123
|
+
natural_pdf-0.2.0.dist-info/entry_points.txt,sha256=1R_KMv7g60UBBpRqGfw7bppsMNGdayR-iJlb9ohEk_8,81
|
124
|
+
natural_pdf-0.2.0.dist-info/top_level.txt,sha256=oZlRzSc3nZ9sV3L6kD_Di734Pp62ANrm46imFVa51qQ,58
|
125
|
+
natural_pdf-0.2.0.dist-info/RECORD,,
|
@@ -4,7 +4,7 @@ Memory comparison script to measure the effectiveness of the character duplicati
|
|
4
4
|
|
5
5
|
This script compares memory usage before and after the optimization by:
|
6
6
|
1. Testing with a text-heavy PDF
|
7
|
-
2. Measuring detailed memory usage patterns
|
7
|
+
2. Measuring detailed memory usage patterns
|
8
8
|
3. Calculating memory savings
|
9
9
|
"""
|
10
10
|
|
optimization/pdf_analyzer.py
CHANGED
@@ -14,7 +14,7 @@ import sys
|
|
14
14
|
from pathlib import Path
|
15
15
|
|
16
16
|
import natural_pdf as npdf
|
17
|
-
from natural_pdf.elements.
|
17
|
+
from natural_pdf.elements.element_collection import ElementCollection
|
18
18
|
|
19
19
|
|
20
20
|
def analyze_pdf(
|
@@ -386,7 +386,7 @@ def analyze_pdf(
|
|
386
386
|
# 7. Render page as image
|
387
387
|
print(f"\n🖼️ RENDERING PAGE AS IMAGE:")
|
388
388
|
try:
|
389
|
-
img = page.
|
389
|
+
img = page.render(resolution=144)
|
390
390
|
print(f"Image: {img.width}x{img.height} pixels")
|
391
391
|
|
392
392
|
# Save image in output folder
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|