natural-pdf 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +1 -0
- natural_pdf/analyzers/layout/base.py +1 -5
- natural_pdf/analyzers/layout/gemini.py +61 -51
- natural_pdf/analyzers/layout/layout_analyzer.py +40 -11
- natural_pdf/analyzers/layout/layout_manager.py +26 -84
- natural_pdf/analyzers/layout/layout_options.py +7 -0
- natural_pdf/analyzers/layout/pdfplumber_table_finder.py +142 -0
- natural_pdf/analyzers/layout/surya.py +46 -123
- natural_pdf/analyzers/layout/tatr.py +51 -4
- natural_pdf/analyzers/text_structure.py +3 -5
- natural_pdf/analyzers/utils.py +3 -3
- natural_pdf/classification/manager.py +230 -151
- natural_pdf/classification/mixin.py +49 -35
- natural_pdf/classification/results.py +64 -46
- natural_pdf/collections/mixins.py +68 -20
- natural_pdf/collections/pdf_collection.py +177 -64
- natural_pdf/core/element_manager.py +30 -14
- natural_pdf/core/highlighting_service.py +13 -22
- natural_pdf/core/page.py +423 -101
- natural_pdf/core/pdf.py +633 -190
- natural_pdf/elements/base.py +134 -40
- natural_pdf/elements/collections.py +503 -131
- natural_pdf/elements/region.py +659 -90
- natural_pdf/elements/text.py +1 -1
- natural_pdf/export/mixin.py +137 -0
- natural_pdf/exporters/base.py +3 -3
- natural_pdf/exporters/paddleocr.py +4 -3
- natural_pdf/extraction/manager.py +50 -49
- natural_pdf/extraction/mixin.py +90 -57
- natural_pdf/extraction/result.py +9 -23
- natural_pdf/ocr/__init__.py +5 -5
- natural_pdf/ocr/engine_doctr.py +346 -0
- natural_pdf/ocr/ocr_factory.py +24 -4
- natural_pdf/ocr/ocr_manager.py +61 -25
- natural_pdf/ocr/ocr_options.py +70 -10
- natural_pdf/ocr/utils.py +6 -4
- natural_pdf/search/__init__.py +20 -34
- natural_pdf/search/haystack_search_service.py +309 -265
- natural_pdf/search/haystack_utils.py +99 -75
- natural_pdf/search/search_service_protocol.py +11 -12
- natural_pdf/selectors/parser.py +219 -143
- natural_pdf/utils/debug.py +3 -3
- natural_pdf/utils/identifiers.py +1 -1
- natural_pdf/utils/locks.py +1 -1
- natural_pdf/utils/packaging.py +8 -6
- natural_pdf/utils/text_extraction.py +24 -16
- natural_pdf/utils/tqdm_utils.py +18 -10
- natural_pdf/utils/visualization.py +18 -0
- natural_pdf/widgets/viewer.py +4 -25
- {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.9.dist-info}/METADATA +12 -3
- natural_pdf-0.1.9.dist-info/RECORD +80 -0
- {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.9.dist-info}/WHEEL +1 -1
- {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.9.dist-info}/top_level.txt +0 -2
- docs/api/index.md +0 -386
- docs/assets/favicon.png +0 -3
- docs/assets/favicon.svg +0 -3
- docs/assets/javascripts/custom.js +0 -17
- docs/assets/logo.svg +0 -3
- docs/assets/sample-screen.png +0 -0
- docs/assets/social-preview.png +0 -17
- docs/assets/social-preview.svg +0 -17
- docs/assets/stylesheets/custom.css +0 -65
- docs/categorizing-documents/index.md +0 -168
- docs/data-extraction/index.md +0 -87
- docs/document-qa/index.ipynb +0 -435
- docs/document-qa/index.md +0 -79
- docs/element-selection/index.ipynb +0 -969
- docs/element-selection/index.md +0 -249
- docs/finetuning/index.md +0 -176
- docs/index.md +0 -189
- docs/installation/index.md +0 -69
- docs/interactive-widget/index.ipynb +0 -962
- docs/interactive-widget/index.md +0 -12
- docs/layout-analysis/index.ipynb +0 -818
- docs/layout-analysis/index.md +0 -185
- docs/ocr/index.md +0 -256
- docs/pdf-navigation/index.ipynb +0 -314
- docs/pdf-navigation/index.md +0 -97
- docs/regions/index.ipynb +0 -816
- docs/regions/index.md +0 -294
- docs/tables/index.ipynb +0 -658
- docs/tables/index.md +0 -144
- docs/text-analysis/index.ipynb +0 -370
- docs/text-analysis/index.md +0 -105
- docs/text-extraction/index.ipynb +0 -1478
- docs/text-extraction/index.md +0 -292
- docs/tutorials/01-loading-and-extraction.ipynb +0 -1873
- docs/tutorials/01-loading-and-extraction.md +0 -95
- docs/tutorials/02-finding-elements.ipynb +0 -417
- docs/tutorials/02-finding-elements.md +0 -149
- docs/tutorials/03-extracting-blocks.ipynb +0 -152
- docs/tutorials/03-extracting-blocks.md +0 -48
- docs/tutorials/04-table-extraction.ipynb +0 -119
- docs/tutorials/04-table-extraction.md +0 -50
- docs/tutorials/05-excluding-content.ipynb +0 -275
- docs/tutorials/05-excluding-content.md +0 -109
- docs/tutorials/06-document-qa.ipynb +0 -337
- docs/tutorials/06-document-qa.md +0 -91
- docs/tutorials/07-layout-analysis.ipynb +0 -293
- docs/tutorials/07-layout-analysis.md +0 -66
- docs/tutorials/07-working-with-regions.ipynb +0 -414
- docs/tutorials/07-working-with-regions.md +0 -151
- docs/tutorials/08-spatial-navigation.ipynb +0 -513
- docs/tutorials/08-spatial-navigation.md +0 -190
- docs/tutorials/09-section-extraction.ipynb +0 -2439
- docs/tutorials/09-section-extraction.md +0 -256
- docs/tutorials/10-form-field-extraction.ipynb +0 -517
- docs/tutorials/10-form-field-extraction.md +0 -201
- docs/tutorials/11-enhanced-table-processing.ipynb +0 -59
- docs/tutorials/11-enhanced-table-processing.md +0 -9
- docs/tutorials/12-ocr-integration.ipynb +0 -3712
- docs/tutorials/12-ocr-integration.md +0 -137
- docs/tutorials/13-semantic-search.ipynb +0 -1718
- docs/tutorials/13-semantic-search.md +0 -77
- docs/visual-debugging/index.ipynb +0 -2970
- docs/visual-debugging/index.md +0 -157
- docs/visual-debugging/region.png +0 -0
- natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -420
- natural_pdf/templates/spa/css/style.css +0 -334
- natural_pdf/templates/spa/index.html +0 -31
- natural_pdf/templates/spa/js/app.js +0 -472
- natural_pdf/templates/spa/words.txt +0 -235976
- natural_pdf/widgets/frontend/viewer.js +0 -88
- natural_pdf-0.1.8.dist-info/RECORD +0 -156
- notebooks/Examples.ipynb +0 -1293
- pdfs/.gitkeep +0 -0
- pdfs/01-practice.pdf +0 -543
- pdfs/0500000US42001.pdf +0 -0
- pdfs/0500000US42007.pdf +0 -0
- pdfs/2014 Statistics.pdf +0 -0
- pdfs/2019 Statistics.pdf +0 -0
- pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- pdfs/needs-ocr.pdf +0 -0
- {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.9.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,12 @@
|
|
2
2
|
import logging
|
3
3
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
4
4
|
|
5
|
-
from pdfplumber.utils.geometry import
|
5
|
+
from pdfplumber.utils.geometry import (
|
6
|
+
cluster_objects,
|
7
|
+
get_bbox_overlap,
|
8
|
+
merge_bboxes,
|
9
|
+
objects_to_bbox,
|
10
|
+
)
|
6
11
|
from pdfplumber.utils.text import TEXTMAP_KWARGS, WORD_EXTRACTOR_KWARGS, chars_to_textmap
|
7
12
|
|
8
13
|
if TYPE_CHECKING:
|
@@ -19,23 +24,25 @@ def _get_layout_kwargs(
|
|
19
24
|
Prepares the keyword arguments for pdfplumber's chars_to_textmap based
|
20
25
|
on defaults, context bbox, and allowed user overrides.
|
21
26
|
"""
|
22
|
-
# 1. Start with an empty dict for layout kwargs
|
27
|
+
# 1. Start with an empty dict for layout kwargs
|
23
28
|
layout_kwargs = {}
|
24
|
-
|
29
|
+
|
25
30
|
# Build allowed keys set without trying to copy the constants
|
26
31
|
allowed_keys = set(TEXTMAP_KWARGS) | set(WORD_EXTRACTOR_KWARGS)
|
27
32
|
|
28
33
|
# Add common, well-known default values
|
29
|
-
layout_kwargs.update(
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
34
|
+
layout_kwargs.update(
|
35
|
+
{
|
36
|
+
"x_tolerance": 5,
|
37
|
+
"y_tolerance": 5,
|
38
|
+
"x_density": 7.25,
|
39
|
+
"y_density": 13,
|
40
|
+
"mode": "box",
|
41
|
+
"min_words_vertical": 1,
|
42
|
+
"min_words_horizontal": 1,
|
43
|
+
}
|
44
|
+
)
|
45
|
+
|
39
46
|
# 2. Apply context if provided
|
40
47
|
if layout_context_bbox:
|
41
48
|
ctx_x0, ctx_top, ctx_x1, ctx_bottom = layout_context_bbox
|
@@ -51,17 +58,18 @@ def _get_layout_kwargs(
|
|
51
58
|
for key, value in user_kwargs.items():
|
52
59
|
if key in allowed_keys:
|
53
60
|
layout_kwargs[key] = value
|
54
|
-
elif key ==
|
61
|
+
elif key == "layout": # Always allow layout flag
|
55
62
|
layout_kwargs[key] = value
|
56
63
|
else:
|
57
64
|
logger.warning(f"Ignoring unsupported layout keyword argument: '{key}'")
|
58
65
|
|
59
66
|
# 4. Ensure layout flag is present, defaulting to True
|
60
|
-
if
|
61
|
-
layout_kwargs[
|
67
|
+
if "layout" not in layout_kwargs:
|
68
|
+
layout_kwargs["layout"] = True
|
62
69
|
|
63
70
|
return layout_kwargs
|
64
71
|
|
72
|
+
|
65
73
|
def filter_chars_spatially(
|
66
74
|
char_dicts: List[Dict[str, Any]],
|
67
75
|
exclusion_regions: List["Region"],
|
natural_pdf/utils/tqdm_utils.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
import sys
|
2
1
|
import os
|
2
|
+
import sys
|
3
3
|
|
4
4
|
# Default to standard tqdm
|
5
5
|
try:
|
@@ -10,34 +10,42 @@ except ImportError:
|
|
10
10
|
iterable = args[0] if args else None
|
11
11
|
if iterable:
|
12
12
|
return iterable
|
13
|
-
return None
|
13
|
+
return None # Simple passthrough if no iterable
|
14
|
+
|
14
15
|
|
15
16
|
# Try to detect notebook environment
|
16
17
|
try:
|
17
18
|
# Check 1: Are we running in an IPython kernel?
|
18
19
|
from IPython import get_ipython
|
20
|
+
|
19
21
|
ipython = get_ipython()
|
20
|
-
if ipython and
|
22
|
+
if ipython and "IPKernelApp" in ipython.config:
|
21
23
|
# Check 2: Is it likely a notebook UI (Jupyter Notebook/Lab, VSCode, etc.)?
|
22
24
|
# This checks for common indicators. Might not be foolproof.
|
23
|
-
if
|
24
|
-
|
25
|
+
if "VSCODE_PID" in os.environ or (
|
26
|
+
"ipykernel" in sys.modules and "spyder" not in sys.modules
|
27
|
+
):
|
28
|
+
# Check 3: Can we import notebook version?
|
25
29
|
try:
|
26
30
|
from tqdm.notebook import tqdm as notebook_tqdm
|
27
|
-
|
31
|
+
|
32
|
+
selected_tqdm = notebook_tqdm # Use notebook version
|
28
33
|
except ImportError:
|
29
|
-
pass
|
34
|
+
pass # Stick with std if notebook version missing
|
30
35
|
except ImportError:
|
31
|
-
pass
|
36
|
+
pass # Stick with std if IPython not available
|
37
|
+
|
32
38
|
|
33
39
|
def get_tqdm():
|
34
40
|
"""Returns the tqdm class best suited for the detected environment."""
|
35
41
|
return selected_tqdm
|
36
42
|
|
43
|
+
|
37
44
|
# Example usage (for testing):
|
38
|
-
if __name__ ==
|
45
|
+
if __name__ == "__main__":
|
39
46
|
import time
|
47
|
+
|
40
48
|
tqdm_instance = get_tqdm()
|
41
49
|
print(f"Using tqdm class: {tqdm_instance}")
|
42
50
|
for i in tqdm_instance(range(10), desc="Testing tqdm"):
|
43
|
-
time.sleep(0.1)
|
51
|
+
time.sleep(0.1)
|
@@ -8,6 +8,7 @@ import math
|
|
8
8
|
import random
|
9
9
|
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
10
10
|
|
11
|
+
import pypdfium2
|
11
12
|
from PIL import Image, ImageDraw, ImageFont
|
12
13
|
|
13
14
|
# Define a base list of visually distinct colors for highlighting
|
@@ -193,6 +194,7 @@ def merge_images_with_legend(
|
|
193
194
|
return image # Return original image if legend is None or empty
|
194
195
|
|
195
196
|
bg_color = (255, 255, 255, 255) # Always use white for the merged background
|
197
|
+
bg_color = (255, 255, 255, 255) # Always use white for the merged background
|
196
198
|
|
197
199
|
if position == "right":
|
198
200
|
# Create a new image with extra width for the legend
|
@@ -230,3 +232,19 @@ def merge_images_with_legend(
|
|
230
232
|
merged = image
|
231
233
|
|
232
234
|
return merged
|
235
|
+
|
236
|
+
|
237
|
+
def render_plain_page(page, resolution):
|
238
|
+
doc = pypdfium2.PdfDocument(page._page.pdf.stream)
|
239
|
+
|
240
|
+
pdf_page = doc[page.index]
|
241
|
+
|
242
|
+
bitmap = pdf_page.render(
|
243
|
+
scale=resolution / 72,
|
244
|
+
)
|
245
|
+
image = bitmap.to_pil().convert("RGB")
|
246
|
+
|
247
|
+
pdf_page.close()
|
248
|
+
doc.close()
|
249
|
+
|
250
|
+
return image
|
natural_pdf/widgets/viewer.py
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
import logging
|
4
4
|
import os
|
5
5
|
|
6
|
+
from natural_pdf.utils.visualization import render_plain_page
|
7
|
+
|
6
8
|
logger = logging.getLogger(__name__)
|
7
9
|
|
8
10
|
# Initialize flag and module/class variables to None
|
@@ -615,31 +617,7 @@ try:
|
|
615
617
|
|
616
618
|
from PIL import Image # Ensure Image is imported
|
617
619
|
|
618
|
-
|
619
|
-
scale = 1.0 # Define scale factor used for rendering
|
620
|
-
try:
|
621
|
-
img_object = page.to_image(resolution=int(72 * scale)) # Call to_image
|
622
|
-
# Check if .original attribute exists, otherwise assume img_object is the PIL Image
|
623
|
-
if hasattr(img_object, "original") and isinstance(img_object.original, Image.Image):
|
624
|
-
img = img_object.original
|
625
|
-
elif isinstance(img_object, Image.Image):
|
626
|
-
img = img_object
|
627
|
-
else:
|
628
|
-
# If it's neither, maybe it's the raw bytes? Try opening it.
|
629
|
-
try:
|
630
|
-
img = Image.open(BytesIO(img_object)).convert("RGB")
|
631
|
-
except Exception:
|
632
|
-
raise TypeError(
|
633
|
-
f"page.to_image() returned unexpected type: {type(img_object)}"
|
634
|
-
)
|
635
|
-
logger.debug(f"Successfully rendered page {page.index} using to_image()")
|
636
|
-
except Exception as render_err:
|
637
|
-
logger.error(
|
638
|
-
f"Error rendering page {page.index} image for widget: {render_err}",
|
639
|
-
exc_info=True,
|
640
|
-
)
|
641
|
-
# Return None or raise the error? Let's raise for now to make it clear.
|
642
|
-
raise ValueError(f"Failed to render page image: {render_err}") from render_err
|
620
|
+
img = render_plain_page(page, resolution=72)
|
643
621
|
|
644
622
|
buffered = BytesIO()
|
645
623
|
img.save(buffered, format="PNG")
|
@@ -687,6 +665,7 @@ try:
|
|
687
665
|
original_y1 = element.bottom
|
688
666
|
width = element.width
|
689
667
|
height = element.height
|
668
|
+
scale = 1.0
|
690
669
|
|
691
670
|
# Base element dict with required info
|
692
671
|
elem_dict = {
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -17,11 +17,13 @@ Requires-Dist: colour
|
|
17
17
|
Requires-Dist: numpy
|
18
18
|
Requires-Dist: urllib3
|
19
19
|
Requires-Dist: tqdm
|
20
|
+
Requires-Dist: pydantic
|
20
21
|
Provides-Extra: interactive
|
21
22
|
Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
|
22
23
|
Provides-Extra: haystack
|
23
24
|
Requires-Dist: haystack-ai; extra == "haystack"
|
24
|
-
Requires-Dist:
|
25
|
+
Requires-Dist: lancedb-haystack; extra == "haystack"
|
26
|
+
Requires-Dist: lancedb; extra == "haystack"
|
25
27
|
Requires-Dist: sentence-transformers; extra == "haystack"
|
26
28
|
Requires-Dist: natural-pdf[core-ml]; extra == "haystack"
|
27
29
|
Provides-Extra: easyocr
|
@@ -36,6 +38,9 @@ Requires-Dist: natural-pdf[core-ml]; extra == "layout-yolo"
|
|
36
38
|
Provides-Extra: surya
|
37
39
|
Requires-Dist: surya-ocr; extra == "surya"
|
38
40
|
Requires-Dist: natural-pdf[core-ml]; extra == "surya"
|
41
|
+
Provides-Extra: doctr
|
42
|
+
Requires-Dist: python-doctr[torch]; extra == "doctr"
|
43
|
+
Requires-Dist: natural-pdf[core-ml]; extra == "doctr"
|
39
44
|
Provides-Extra: qa
|
40
45
|
Requires-Dist: natural-pdf[core-ml]; extra == "qa"
|
41
46
|
Provides-Extra: docling
|
@@ -43,7 +48,6 @@ Requires-Dist: docling; extra == "docling"
|
|
43
48
|
Requires-Dist: natural-pdf[core-ml]; extra == "docling"
|
44
49
|
Provides-Extra: llm
|
45
50
|
Requires-Dist: openai>=1.0; extra == "llm"
|
46
|
-
Requires-Dist: pydantic; extra == "llm"
|
47
51
|
Provides-Extra: classification
|
48
52
|
Requires-Dist: sentence-transformers; extra == "classification"
|
49
53
|
Requires-Dist: timm; extra == "classification"
|
@@ -63,6 +67,9 @@ Requires-Dist: pipdeptree; extra == "dev"
|
|
63
67
|
Requires-Dist: nbformat; extra == "dev"
|
64
68
|
Requires-Dist: jupytext; extra == "dev"
|
65
69
|
Requires-Dist: nbclient; extra == "dev"
|
70
|
+
Provides-Extra: deskew
|
71
|
+
Requires-Dist: deskew>=1.5; extra == "deskew"
|
72
|
+
Requires-Dist: img2pdf; extra == "deskew"
|
66
73
|
Provides-Extra: all
|
67
74
|
Requires-Dist: natural-pdf[interactive]; extra == "all"
|
68
75
|
Requires-Dist: natural-pdf[haystack]; extra == "all"
|
@@ -70,11 +77,13 @@ Requires-Dist: natural-pdf[easyocr]; extra == "all"
|
|
70
77
|
Requires-Dist: natural-pdf[paddle]; extra == "all"
|
71
78
|
Requires-Dist: natural-pdf[layout_yolo]; extra == "all"
|
72
79
|
Requires-Dist: natural-pdf[surya]; extra == "all"
|
80
|
+
Requires-Dist: natural-pdf[doctr]; extra == "all"
|
73
81
|
Requires-Dist: natural-pdf[qa]; extra == "all"
|
74
82
|
Requires-Dist: natural-pdf[ocr-export]; extra == "all"
|
75
83
|
Requires-Dist: natural-pdf[docling]; extra == "all"
|
76
84
|
Requires-Dist: natural-pdf[llm]; extra == "all"
|
77
85
|
Requires-Dist: natural-pdf[classification]; extra == "all"
|
86
|
+
Requires-Dist: natural-pdf[deskew]; extra == "all"
|
78
87
|
Requires-Dist: natural-pdf[test]; extra == "all"
|
79
88
|
Provides-Extra: core-ml
|
80
89
|
Requires-Dist: torch; extra == "core-ml"
|
@@ -0,0 +1,80 @@
|
|
1
|
+
natural_pdf/__init__.py,sha256=LBrQcFOGooaUsTSAk6zrPCQqu0IM-ClvJLasexEk64k,2728
|
2
|
+
natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
|
3
|
+
natural_pdf/analyzers/text_options.py,sha256=nE2E1pp4psDPpxmtarvNtEQsgozPkyFRjv0TVP2HTyU,2865
|
4
|
+
natural_pdf/analyzers/text_structure.py,sha256=Uhxc7aYB1jddkiwRTEPOg_Te2HfOua4z_OtgP1m3org,12794
|
5
|
+
natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
|
6
|
+
natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
|
7
|
+
natural_pdf/analyzers/layout/base.py,sha256=bYawhmc_0xqKG-xbxUSiazIU1om-aBox5Jh8qDqv-eM,6451
|
8
|
+
natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
|
9
|
+
natural_pdf/analyzers/layout/gemini.py,sha256=iuq-zZYkTS7fdAjD3ULRhqYTP9Ky2NgVHaXSLppDidw,11751
|
10
|
+
natural_pdf/analyzers/layout/layout_analyzer.py,sha256=n327Zjuf7aSzKQKChPHeiCVHinzeDGaWNyKiwQ-DkJk,15571
|
11
|
+
natural_pdf/analyzers/layout/layout_manager.py,sha256=RiVq6gUA8t9OLj-HojdzQkJtabM32iBWEBoLtS7_TjY,8115
|
12
|
+
natural_pdf/analyzers/layout/layout_options.py,sha256=Jsm4MfD_vedXvS7NCpVmuIRsIuyNyKOjvdgoRYOKZpI,4133
|
13
|
+
natural_pdf/analyzers/layout/paddle.py,sha256=gTI9ZqNd5-t4H5IByGfL32WgcE6JrdchW6jRiGI6ulM,13375
|
14
|
+
natural_pdf/analyzers/layout/pdfplumber_table_finder.py,sha256=Tk0Q7wv7nGYPo69lh6RoezjdepTnMl90SaNIrP29Pwc,5902
|
15
|
+
natural_pdf/analyzers/layout/surya.py,sha256=4RdnhRxSS3i3Ns5mFhOA9-P0xd7Ms19uZuKvUGQfEBI,9789
|
16
|
+
natural_pdf/analyzers/layout/tatr.py,sha256=cVr0ZyhY2mNLAKZ4DGMm-b7XNJpILKh8x8ZpyDeUhLk,15032
|
17
|
+
natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
|
18
|
+
natural_pdf/classification/manager.py,sha256=CvZd3-lN3fEhcaLXr8gYfrdBGoBgzkIeE14EqjrOAzU,17730
|
19
|
+
natural_pdf/classification/mixin.py,sha256=llari9AIMNGy9sTaR7y1g5vtVNUwuCutbKnjbJRMYx4,6903
|
20
|
+
natural_pdf/classification/results.py,sha256=Ia26BQxObL5sURpFmg66bfjFPCxjcO_jeP2G-S9wRgo,2289
|
21
|
+
natural_pdf/collections/mixins.py,sha256=ufetdzHmd2_WLGBPW4eBQrzZTFpjXyVsVwBquIE47zw,4476
|
22
|
+
natural_pdf/collections/pdf_collection.py,sha256=JnsJugE-vxYsW1ZJWmMlVv_jbyG37X-9rZK1RQyKWAY,30020
|
23
|
+
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
24
|
+
natural_pdf/core/element_manager.py,sha256=knRN6qXxV-6KZCj2GUOyiqRi83DjJzL77TmKGeiD08Y,25144
|
25
|
+
natural_pdf/core/highlighting_service.py,sha256=wINdRxq63_CYYA81EwuCRqhNKimn0dNKyoKWuzkirc0,31959
|
26
|
+
natural_pdf/core/page.py,sha256=icJLu6jRbkD3iOE8r60XPkQZ8FN3ZcKo5TT5MVGkGl0,105122
|
27
|
+
natural_pdf/core/pdf.py,sha256=Vw-L5149wO6RSfvb9sAfPDLqd9M1TdYoPHNEePh65y8,61201
|
28
|
+
natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
|
29
|
+
natural_pdf/elements/base.py,sha256=7vVCPQyEHifh4LyBuv0kLTqr_gNbbEMc4SoiJmLfEUQ,37585
|
30
|
+
natural_pdf/elements/collections.py,sha256=YRaJxNbJrBjgwzwuSoOtEotOKh6RaTi7NRCqKiGl514,92955
|
31
|
+
natural_pdf/elements/line.py,sha256=7cow3xMUKhAj7zoQz7OaB1eIH2_a8B__LB7iGJ4Mb0o,4612
|
32
|
+
natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
|
33
|
+
natural_pdf/elements/region.py,sha256=LfyB_9DCw5Tzn_G9xsjFz2FfKBOHRqGIND4DQWoA7KM,97324
|
34
|
+
natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
|
35
|
+
natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
|
36
|
+
natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
|
37
|
+
natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
|
38
|
+
natural_pdf/exporters/paddleocr.py,sha256=BYpdtJI7S8rBkI2dkRESx2epVAZOTfzqU-rjJnUQ5jQ,16249
|
39
|
+
natural_pdf/exporters/searchable_pdf.py,sha256=qsaPsnbOOaZHA_aplfZbwQnBoK9KghWm-wzbyRRomeY,16859
|
40
|
+
natural_pdf/extraction/manager.py,sha256=mUBbfgLG5Pl31wmajXwyipdEJb_dZ5I-y8GnWw7IzGo,4969
|
41
|
+
natural_pdf/extraction/mixin.py,sha256=eKbr70VibpbtfjvCE80lTFuYHzq_BoVtOHjznL_GMRA,11719
|
42
|
+
natural_pdf/extraction/result.py,sha256=c1vLguCR6l95cvg-BJJmZvL_MPg2McJaczge55bKZMg,934
|
43
|
+
natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
|
44
|
+
natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
|
45
|
+
natural_pdf/ocr/engine_doctr.py,sha256=519WpvSHgwP6Hv24tci_YHFX7XPlaxOnlREN_YG-Yys,16331
|
46
|
+
natural_pdf/ocr/engine_easyocr.py,sha256=9TbxJjmhWFrzM8mcNnZjoRtIDr6gwpuwKm4-Zfub2-8,9281
|
47
|
+
natural_pdf/ocr/engine_paddle.py,sha256=2nIrvLBBAiZG1BxVo3eFVJulA6YGoOTXw_RN98p_BUk,6184
|
48
|
+
natural_pdf/ocr/engine_surya.py,sha256=iySjG-Dahgh0cLICfbMtOcwUpRFcZjo-5Ed5Zwz-o5Y,4805
|
49
|
+
natural_pdf/ocr/ocr_factory.py,sha256=gBFXdFs7E4aCynHz06sQsAhaO3s8yhgoFgN5nyxtg9c,5221
|
50
|
+
natural_pdf/ocr/ocr_manager.py,sha256=f0q68ynGYVPkF4D3WnufxmHWD5R1jW5Z_1czTEi9JVU,13931
|
51
|
+
natural_pdf/ocr/ocr_options.py,sha256=ZvtnFn1kPkFEoWveQ13uy6B-ofquP0gHEi4tBHrjqCE,6438
|
52
|
+
natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
|
53
|
+
natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
|
54
|
+
natural_pdf/qa/document_qa.py,sha256=Jw4yyq3Vifn57D0ANmOfUlZeG8CJjBkItZBV-8ZAmos,15111
|
55
|
+
natural_pdf/search/__init__.py,sha256=gdGlW3kTCw87iXMwcIesbLkUsnv5UKJmF-_1ZMR0pfQ,3339
|
56
|
+
natural_pdf/search/haystack_search_service.py,sha256=UHr2UWNBetG3MZ1n_1LnV9oUe5fC-rY9p-V0j00JjQM,30339
|
57
|
+
natural_pdf/search/haystack_utils.py,sha256=6Hv5DeLSF4AVDrB_aFJZGB3XpSCLQ45dXLKEd4yG2tU,18978
|
58
|
+
natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzPkK0a8QA,3566
|
59
|
+
natural_pdf/search/search_service_protocol.py,sha256=Dl-Q-CrutkhZwI69scbW9EWPeYM63qxB60_EA7YqIYo,6699
|
60
|
+
natural_pdf/search/searchable_mixin.py,sha256=M2a6FaFVM0vcfh7FgjDH6BLhS-7ggeVpcfft4OOBDxY,26390
|
61
|
+
natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
|
62
|
+
natural_pdf/selectors/parser.py,sha256=oI3ezkB6sIyrq_nLJrbaBaBZktXwEp_HG_gKQlVSVcs,24447
|
63
|
+
natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
|
64
|
+
natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
|
65
|
+
natural_pdf/utils/debug.py,sha256=RN7H3E6ph-GtxubCW6psW7TO8o2BxcNLiEzByTVR9fk,995
|
66
|
+
natural_pdf/utils/highlighting.py,sha256=EIY6ihVGtUTS_DjWyxpnr_UXpcR4btC1KhSGQ9VUfKg,698
|
67
|
+
natural_pdf/utils/identifiers.py,sha256=P7n6owcubnF8oAMa_UfYtENmIaJQdH_AMC9Jbs2bWXo,1117
|
68
|
+
natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,215
|
69
|
+
natural_pdf/utils/packaging.py,sha256=Jshxp6S1zfcqoZmFhdd7WOpL--b6rBSz-Y9mYqELXIY,21581
|
70
|
+
natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
|
71
|
+
natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9YDmfXWL4,9605
|
72
|
+
natural_pdf/utils/tqdm_utils.py,sha256=wV3RXvqog26eWEFEqjt2LkGnLswmO1GXaVGSqgS7tAY,1601
|
73
|
+
natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
|
74
|
+
natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
|
75
|
+
natural_pdf/widgets/viewer.py,sha256=dC_hlPlosc08gsDc3bdAa8chOKtAoH9QFU6mrGOG9vE,39532
|
76
|
+
natural_pdf-0.1.9.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
77
|
+
natural_pdf-0.1.9.dist-info/METADATA,sha256=10GX2Qesem-n8sPem4lls2EEQen4KyJVdcmQf1mt9mI,7400
|
78
|
+
natural_pdf-0.1.9.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
|
79
|
+
natural_pdf-0.1.9.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
|
80
|
+
natural_pdf-0.1.9.dist-info/RECORD,,
|