natural-pdf 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. natural_pdf/__init__.py +3 -0
  2. natural_pdf/analyzers/layout/base.py +1 -5
  3. natural_pdf/analyzers/layout/gemini.py +61 -51
  4. natural_pdf/analyzers/layout/layout_analyzer.py +40 -11
  5. natural_pdf/analyzers/layout/layout_manager.py +26 -84
  6. natural_pdf/analyzers/layout/layout_options.py +7 -0
  7. natural_pdf/analyzers/layout/pdfplumber_table_finder.py +142 -0
  8. natural_pdf/analyzers/layout/surya.py +46 -123
  9. natural_pdf/analyzers/layout/tatr.py +51 -4
  10. natural_pdf/analyzers/text_structure.py +3 -5
  11. natural_pdf/analyzers/utils.py +3 -3
  12. natural_pdf/classification/manager.py +422 -0
  13. natural_pdf/classification/mixin.py +163 -0
  14. natural_pdf/classification/results.py +80 -0
  15. natural_pdf/collections/mixins.py +111 -0
  16. natural_pdf/collections/pdf_collection.py +434 -15
  17. natural_pdf/core/element_manager.py +83 -0
  18. natural_pdf/core/highlighting_service.py +13 -22
  19. natural_pdf/core/page.py +578 -93
  20. natural_pdf/core/pdf.py +912 -460
  21. natural_pdf/elements/base.py +134 -40
  22. natural_pdf/elements/collections.py +712 -109
  23. natural_pdf/elements/region.py +722 -69
  24. natural_pdf/elements/text.py +4 -1
  25. natural_pdf/export/mixin.py +137 -0
  26. natural_pdf/exporters/base.py +3 -3
  27. natural_pdf/exporters/paddleocr.py +5 -4
  28. natural_pdf/extraction/manager.py +135 -0
  29. natural_pdf/extraction/mixin.py +279 -0
  30. natural_pdf/extraction/result.py +23 -0
  31. natural_pdf/ocr/__init__.py +5 -5
  32. natural_pdf/ocr/engine_doctr.py +346 -0
  33. natural_pdf/ocr/engine_easyocr.py +6 -3
  34. natural_pdf/ocr/ocr_factory.py +24 -4
  35. natural_pdf/ocr/ocr_manager.py +122 -26
  36. natural_pdf/ocr/ocr_options.py +94 -11
  37. natural_pdf/ocr/utils.py +19 -6
  38. natural_pdf/qa/document_qa.py +0 -4
  39. natural_pdf/search/__init__.py +20 -34
  40. natural_pdf/search/haystack_search_service.py +309 -265
  41. natural_pdf/search/haystack_utils.py +99 -75
  42. natural_pdf/search/search_service_protocol.py +11 -12
  43. natural_pdf/selectors/parser.py +431 -230
  44. natural_pdf/utils/debug.py +3 -3
  45. natural_pdf/utils/identifiers.py +1 -1
  46. natural_pdf/utils/locks.py +8 -0
  47. natural_pdf/utils/packaging.py +8 -6
  48. natural_pdf/utils/text_extraction.py +60 -1
  49. natural_pdf/utils/tqdm_utils.py +51 -0
  50. natural_pdf/utils/visualization.py +18 -0
  51. natural_pdf/widgets/viewer.py +4 -25
  52. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.9.dist-info}/METADATA +17 -3
  53. natural_pdf-0.1.9.dist-info/RECORD +80 -0
  54. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.9.dist-info}/WHEEL +1 -1
  55. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.9.dist-info}/top_level.txt +0 -2
  56. docs/api/index.md +0 -386
  57. docs/assets/favicon.png +0 -3
  58. docs/assets/favicon.svg +0 -3
  59. docs/assets/javascripts/custom.js +0 -17
  60. docs/assets/logo.svg +0 -3
  61. docs/assets/sample-screen.png +0 -0
  62. docs/assets/social-preview.png +0 -17
  63. docs/assets/social-preview.svg +0 -17
  64. docs/assets/stylesheets/custom.css +0 -65
  65. docs/document-qa/index.ipynb +0 -435
  66. docs/document-qa/index.md +0 -79
  67. docs/element-selection/index.ipynb +0 -915
  68. docs/element-selection/index.md +0 -229
  69. docs/finetuning/index.md +0 -176
  70. docs/index.md +0 -170
  71. docs/installation/index.md +0 -69
  72. docs/interactive-widget/index.ipynb +0 -962
  73. docs/interactive-widget/index.md +0 -12
  74. docs/layout-analysis/index.ipynb +0 -818
  75. docs/layout-analysis/index.md +0 -185
  76. docs/ocr/index.md +0 -209
  77. docs/pdf-navigation/index.ipynb +0 -314
  78. docs/pdf-navigation/index.md +0 -97
  79. docs/regions/index.ipynb +0 -816
  80. docs/regions/index.md +0 -294
  81. docs/tables/index.ipynb +0 -658
  82. docs/tables/index.md +0 -144
  83. docs/text-analysis/index.ipynb +0 -370
  84. docs/text-analysis/index.md +0 -105
  85. docs/text-extraction/index.ipynb +0 -1478
  86. docs/text-extraction/index.md +0 -292
  87. docs/tutorials/01-loading-and-extraction.ipynb +0 -194
  88. docs/tutorials/01-loading-and-extraction.md +0 -95
  89. docs/tutorials/02-finding-elements.ipynb +0 -340
  90. docs/tutorials/02-finding-elements.md +0 -149
  91. docs/tutorials/03-extracting-blocks.ipynb +0 -147
  92. docs/tutorials/03-extracting-blocks.md +0 -48
  93. docs/tutorials/04-table-extraction.ipynb +0 -114
  94. docs/tutorials/04-table-extraction.md +0 -50
  95. docs/tutorials/05-excluding-content.ipynb +0 -270
  96. docs/tutorials/05-excluding-content.md +0 -109
  97. docs/tutorials/06-document-qa.ipynb +0 -332
  98. docs/tutorials/06-document-qa.md +0 -91
  99. docs/tutorials/07-layout-analysis.ipynb +0 -288
  100. docs/tutorials/07-layout-analysis.md +0 -66
  101. docs/tutorials/07-working-with-regions.ipynb +0 -413
  102. docs/tutorials/07-working-with-regions.md +0 -151
  103. docs/tutorials/08-spatial-navigation.ipynb +0 -508
  104. docs/tutorials/08-spatial-navigation.md +0 -190
  105. docs/tutorials/09-section-extraction.ipynb +0 -2434
  106. docs/tutorials/09-section-extraction.md +0 -256
  107. docs/tutorials/10-form-field-extraction.ipynb +0 -512
  108. docs/tutorials/10-form-field-extraction.md +0 -201
  109. docs/tutorials/11-enhanced-table-processing.ipynb +0 -54
  110. docs/tutorials/11-enhanced-table-processing.md +0 -9
  111. docs/tutorials/12-ocr-integration.ipynb +0 -604
  112. docs/tutorials/12-ocr-integration.md +0 -175
  113. docs/tutorials/13-semantic-search.ipynb +0 -1328
  114. docs/tutorials/13-semantic-search.md +0 -77
  115. docs/visual-debugging/index.ipynb +0 -2970
  116. docs/visual-debugging/index.md +0 -157
  117. docs/visual-debugging/region.png +0 -0
  118. natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -415
  119. natural_pdf/templates/spa/css/style.css +0 -334
  120. natural_pdf/templates/spa/index.html +0 -31
  121. natural_pdf/templates/spa/js/app.js +0 -472
  122. natural_pdf/templates/spa/words.txt +0 -235976
  123. natural_pdf/widgets/frontend/viewer.js +0 -88
  124. natural_pdf-0.1.7.dist-info/RECORD +0 -145
  125. notebooks/Examples.ipynb +0 -1293
  126. pdfs/.gitkeep +0 -0
  127. pdfs/01-practice.pdf +0 -543
  128. pdfs/0500000US42001.pdf +0 -0
  129. pdfs/0500000US42007.pdf +0 -0
  130. pdfs/2014 Statistics.pdf +0 -0
  131. pdfs/2019 Statistics.pdf +0 -0
  132. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  133. pdfs/needs-ocr.pdf +0 -0
  134. {natural_pdf-0.1.7.dist-info → natural_pdf-0.1.9.dist-info}/licenses/LICENSE +0 -0
@@ -3,13 +3,13 @@ OCR debug utilities for natural-pdf.
3
3
  """
4
4
 
5
5
  import base64
6
+ import importlib.resources
7
+ import importlib.util
6
8
  import io
7
9
  import json
8
10
  import os
9
- import importlib.util
10
- import importlib.resources
11
11
  import webbrowser
12
- from typing import Dict, List, Any, Optional, Union, Tuple
12
+ from typing import Any, Dict, List, Optional, Tuple, Union
13
13
 
14
14
  from PIL import Image
15
15
 
@@ -2,8 +2,8 @@
2
2
  Utilities for generating consistent identifiers.
3
3
  """
4
4
 
5
- import hashlib
6
5
  import base64
6
+ import hashlib
7
7
  import os
8
8
 
9
9
 
@@ -0,0 +1,8 @@
1
+ """
2
+ Shared locks for thread synchronization across the natural-pdf library.
3
+ """
4
+
5
+ import threading
6
+
7
+ # Global lock for PDF rendering operations to prevent PDFium concurrency issues
8
+ pdf_render_lock = threading.RLock()
@@ -2,23 +2,25 @@
2
2
  Utilities for packaging data for external processes, like correction tasks.
3
3
  """
4
4
 
5
- import os
6
5
  import base64
7
6
  import io
8
7
  import json
9
- import zipfile
10
- import tempfile
11
8
  import logging
9
+ import os
12
10
  import shutil
13
- from typing import Any, List, Union, Iterable, TYPE_CHECKING, Dict
11
+ import tempfile
12
+ import zipfile
13
+ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Union
14
+
14
15
  from tqdm import tqdm
16
+
15
17
  from natural_pdf.elements.text import TextElement
16
18
 
17
19
  # Import the specific PDF/Page types if possible, otherwise use Any
18
20
  if TYPE_CHECKING:
19
- from natural_pdf.core.pdf import PDF
20
- from natural_pdf.core.page import Page
21
21
  from natural_pdf.collections.pdf_collection import PDFCollection
22
+ from natural_pdf.core.page import Page
23
+ from natural_pdf.core.pdf import PDF
22
24
  else:
23
25
  PDF = Any
24
26
  Page = Any
@@ -2,7 +2,12 @@
2
2
  import logging
3
3
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
4
4
 
5
- from pdfplumber.utils.geometry import get_bbox_overlap, merge_bboxes, objects_to_bbox
5
+ from pdfplumber.utils.geometry import (
6
+ cluster_objects,
7
+ get_bbox_overlap,
8
+ merge_bboxes,
9
+ objects_to_bbox,
10
+ )
6
11
  from pdfplumber.utils.text import TEXTMAP_KWARGS, WORD_EXTRACTOR_KWARGS, chars_to_textmap
7
12
 
8
13
  if TYPE_CHECKING:
@@ -11,6 +16,60 @@ if TYPE_CHECKING:
11
16
  logger = logging.getLogger(__name__)
12
17
 
13
18
 
19
+ def _get_layout_kwargs(
20
+ layout_context_bbox: Optional[Tuple[float, float, float, float]] = None,
21
+ user_kwargs: Optional[Dict[str, Any]] = None,
22
+ ) -> Dict[str, Any]:
23
+ """
24
+ Prepares the keyword arguments for pdfplumber's chars_to_textmap based
25
+ on defaults, context bbox, and allowed user overrides.
26
+ """
27
+ # 1. Start with an empty dict for layout kwargs
28
+ layout_kwargs = {}
29
+
30
+ # Build allowed keys set without trying to copy the constants
31
+ allowed_keys = set(TEXTMAP_KWARGS) | set(WORD_EXTRACTOR_KWARGS)
32
+
33
+ # Add common, well-known default values
34
+ layout_kwargs.update(
35
+ {
36
+ "x_tolerance": 5,
37
+ "y_tolerance": 5,
38
+ "x_density": 7.25,
39
+ "y_density": 13,
40
+ "mode": "box",
41
+ "min_words_vertical": 1,
42
+ "min_words_horizontal": 1,
43
+ }
44
+ )
45
+
46
+ # 2. Apply context if provided
47
+ if layout_context_bbox:
48
+ ctx_x0, ctx_top, ctx_x1, ctx_bottom = layout_context_bbox
49
+ layout_kwargs["layout_width"] = ctx_x1 - ctx_x0
50
+ layout_kwargs["layout_height"] = ctx_bottom - ctx_top
51
+ layout_kwargs["x_shift"] = ctx_x0
52
+ layout_kwargs["y_shift"] = ctx_top
53
+ # Add layout_bbox itself
54
+ layout_kwargs["layout_bbox"] = layout_context_bbox
55
+
56
+ # 3. Apply user overrides (only for allowed keys)
57
+ if user_kwargs:
58
+ for key, value in user_kwargs.items():
59
+ if key in allowed_keys:
60
+ layout_kwargs[key] = value
61
+ elif key == "layout": # Always allow layout flag
62
+ layout_kwargs[key] = value
63
+ else:
64
+ logger.warning(f"Ignoring unsupported layout keyword argument: '{key}'")
65
+
66
+ # 4. Ensure layout flag is present, defaulting to True
67
+ if "layout" not in layout_kwargs:
68
+ layout_kwargs["layout"] = True
69
+
70
+ return layout_kwargs
71
+
72
+
14
73
  def filter_chars_spatially(
15
74
  char_dicts: List[Dict[str, Any]],
16
75
  exclusion_regions: List["Region"],
@@ -0,0 +1,51 @@
1
+ import os
2
+ import sys
3
+
4
+ # Default to standard tqdm
5
+ try:
6
+ from tqdm.std import tqdm as selected_tqdm
7
+ except ImportError:
8
+ # Basic fallback if even std is missing (though unlikely)
9
+ def selected_tqdm(*args, **kwargs):
10
+ iterable = args[0] if args else None
11
+ if iterable:
12
+ return iterable
13
+ return None # Simple passthrough if no iterable
14
+
15
+
16
+ # Try to detect notebook environment
17
+ try:
18
+ # Check 1: Are we running in an IPython kernel?
19
+ from IPython import get_ipython
20
+
21
+ ipython = get_ipython()
22
+ if ipython and "IPKernelApp" in ipython.config:
23
+ # Check 2: Is it likely a notebook UI (Jupyter Notebook/Lab, VSCode, etc.)?
24
+ # This checks for common indicators. Might not be foolproof.
25
+ if "VSCODE_PID" in os.environ or (
26
+ "ipykernel" in sys.modules and "spyder" not in sys.modules
27
+ ):
28
+ # Check 3: Can we import notebook version?
29
+ try:
30
+ from tqdm.notebook import tqdm as notebook_tqdm
31
+
32
+ selected_tqdm = notebook_tqdm # Use notebook version
33
+ except ImportError:
34
+ pass # Stick with std if notebook version missing
35
+ except ImportError:
36
+ pass # Stick with std if IPython not available
37
+
38
+
39
+ def get_tqdm():
40
+ """Returns the tqdm class best suited for the detected environment."""
41
+ return selected_tqdm
42
+
43
+
44
+ # Example usage (for testing):
45
+ if __name__ == "__main__":
46
+ import time
47
+
48
+ tqdm_instance = get_tqdm()
49
+ print(f"Using tqdm class: {tqdm_instance}")
50
+ for i in tqdm_instance(range(10), desc="Testing tqdm"):
51
+ time.sleep(0.1)
@@ -8,6 +8,7 @@ import math
8
8
  import random
9
9
  from typing import Any, Dict, List, Optional, Set, Tuple, Union
10
10
 
11
+ import pypdfium2
11
12
  from PIL import Image, ImageDraw, ImageFont
12
13
 
13
14
  # Define a base list of visually distinct colors for highlighting
@@ -193,6 +194,7 @@ def merge_images_with_legend(
193
194
  return image # Return original image if legend is None or empty
194
195
 
195
196
  bg_color = (255, 255, 255, 255) # Always use white for the merged background
197
+ bg_color = (255, 255, 255, 255) # Always use white for the merged background
196
198
 
197
199
  if position == "right":
198
200
  # Create a new image with extra width for the legend
@@ -230,3 +232,19 @@ def merge_images_with_legend(
230
232
  merged = image
231
233
 
232
234
  return merged
235
+
236
+
237
+ def render_plain_page(page, resolution):
238
+ doc = pypdfium2.PdfDocument(page._page.pdf.stream)
239
+
240
+ pdf_page = doc[page.index]
241
+
242
+ bitmap = pdf_page.render(
243
+ scale=resolution / 72,
244
+ )
245
+ image = bitmap.to_pil().convert("RGB")
246
+
247
+ pdf_page.close()
248
+ doc.close()
249
+
250
+ return image
@@ -3,6 +3,8 @@
3
3
  import logging
4
4
  import os
5
5
 
6
+ from natural_pdf.utils.visualization import render_plain_page
7
+
6
8
  logger = logging.getLogger(__name__)
7
9
 
8
10
  # Initialize flag and module/class variables to None
@@ -615,31 +617,7 @@ try:
615
617
 
616
618
  from PIL import Image # Ensure Image is imported
617
619
 
618
- # Render page to image using the correct method and parameter
619
- scale = 1.0 # Define scale factor used for rendering
620
- try:
621
- img_object = page.to_image(resolution=int(72 * scale)) # Call to_image
622
- # Check if .original attribute exists, otherwise assume img_object is the PIL Image
623
- if hasattr(img_object, "original") and isinstance(img_object.original, Image.Image):
624
- img = img_object.original
625
- elif isinstance(img_object, Image.Image):
626
- img = img_object
627
- else:
628
- # If it's neither, maybe it's the raw bytes? Try opening it.
629
- try:
630
- img = Image.open(BytesIO(img_object)).convert("RGB")
631
- except Exception:
632
- raise TypeError(
633
- f"page.to_image() returned unexpected type: {type(img_object)}"
634
- )
635
- logger.debug(f"Successfully rendered page {page.index} using to_image()")
636
- except Exception as render_err:
637
- logger.error(
638
- f"Error rendering page {page.index} image for widget: {render_err}",
639
- exc_info=True,
640
- )
641
- # Return None or raise the error? Let's raise for now to make it clear.
642
- raise ValueError(f"Failed to render page image: {render_err}") from render_err
620
+ img = render_plain_page(page, resolution=72)
643
621
 
644
622
  buffered = BytesIO()
645
623
  img.save(buffered, format="PNG")
@@ -687,6 +665,7 @@ try:
687
665
  original_y1 = element.bottom
688
666
  width = element.width
689
667
  height = element.height
668
+ scale = 1.0
690
669
 
691
670
  # Base element dict with required info
692
671
  elem_dict = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -17,11 +17,13 @@ Requires-Dist: colour
17
17
  Requires-Dist: numpy
18
18
  Requires-Dist: urllib3
19
19
  Requires-Dist: tqdm
20
+ Requires-Dist: pydantic
20
21
  Provides-Extra: interactive
21
22
  Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
22
23
  Provides-Extra: haystack
23
24
  Requires-Dist: haystack-ai; extra == "haystack"
24
- Requires-Dist: chroma-haystack; extra == "haystack"
25
+ Requires-Dist: lancedb-haystack; extra == "haystack"
26
+ Requires-Dist: lancedb; extra == "haystack"
25
27
  Requires-Dist: sentence-transformers; extra == "haystack"
26
28
  Requires-Dist: natural-pdf[core-ml]; extra == "haystack"
27
29
  Provides-Extra: easyocr
@@ -36,6 +38,9 @@ Requires-Dist: natural-pdf[core-ml]; extra == "layout-yolo"
36
38
  Provides-Extra: surya
37
39
  Requires-Dist: surya-ocr; extra == "surya"
38
40
  Requires-Dist: natural-pdf[core-ml]; extra == "surya"
41
+ Provides-Extra: doctr
42
+ Requires-Dist: python-doctr[torch]; extra == "doctr"
43
+ Requires-Dist: natural-pdf[core-ml]; extra == "doctr"
39
44
  Provides-Extra: qa
40
45
  Requires-Dist: natural-pdf[core-ml]; extra == "qa"
41
46
  Provides-Extra: docling
@@ -43,7 +48,10 @@ Requires-Dist: docling; extra == "docling"
43
48
  Requires-Dist: natural-pdf[core-ml]; extra == "docling"
44
49
  Provides-Extra: llm
45
50
  Requires-Dist: openai>=1.0; extra == "llm"
46
- Requires-Dist: pydantic; extra == "llm"
51
+ Provides-Extra: classification
52
+ Requires-Dist: sentence-transformers; extra == "classification"
53
+ Requires-Dist: timm; extra == "classification"
54
+ Requires-Dist: natural-pdf[core-ml]; extra == "classification"
47
55
  Provides-Extra: test
48
56
  Requires-Dist: pytest; extra == "test"
49
57
  Provides-Extra: dev
@@ -59,6 +67,9 @@ Requires-Dist: pipdeptree; extra == "dev"
59
67
  Requires-Dist: nbformat; extra == "dev"
60
68
  Requires-Dist: jupytext; extra == "dev"
61
69
  Requires-Dist: nbclient; extra == "dev"
70
+ Provides-Extra: deskew
71
+ Requires-Dist: deskew>=1.5; extra == "deskew"
72
+ Requires-Dist: img2pdf; extra == "deskew"
62
73
  Provides-Extra: all
63
74
  Requires-Dist: natural-pdf[interactive]; extra == "all"
64
75
  Requires-Dist: natural-pdf[haystack]; extra == "all"
@@ -66,10 +77,13 @@ Requires-Dist: natural-pdf[easyocr]; extra == "all"
66
77
  Requires-Dist: natural-pdf[paddle]; extra == "all"
67
78
  Requires-Dist: natural-pdf[layout_yolo]; extra == "all"
68
79
  Requires-Dist: natural-pdf[surya]; extra == "all"
80
+ Requires-Dist: natural-pdf[doctr]; extra == "all"
69
81
  Requires-Dist: natural-pdf[qa]; extra == "all"
70
82
  Requires-Dist: natural-pdf[ocr-export]; extra == "all"
71
83
  Requires-Dist: natural-pdf[docling]; extra == "all"
72
84
  Requires-Dist: natural-pdf[llm]; extra == "all"
85
+ Requires-Dist: natural-pdf[classification]; extra == "all"
86
+ Requires-Dist: natural-pdf[deskew]; extra == "all"
73
87
  Requires-Dist: natural-pdf[test]; extra == "all"
74
88
  Provides-Extra: core-ml
75
89
  Requires-Dist: torch; extra == "core-ml"
@@ -0,0 +1,80 @@
1
+ natural_pdf/__init__.py,sha256=LBrQcFOGooaUsTSAk6zrPCQqu0IM-ClvJLasexEk64k,2728
2
+ natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
3
+ natural_pdf/analyzers/text_options.py,sha256=nE2E1pp4psDPpxmtarvNtEQsgozPkyFRjv0TVP2HTyU,2865
4
+ natural_pdf/analyzers/text_structure.py,sha256=Uhxc7aYB1jddkiwRTEPOg_Te2HfOua4z_OtgP1m3org,12794
5
+ natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
6
+ natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
7
+ natural_pdf/analyzers/layout/base.py,sha256=bYawhmc_0xqKG-xbxUSiazIU1om-aBox5Jh8qDqv-eM,6451
8
+ natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
9
+ natural_pdf/analyzers/layout/gemini.py,sha256=iuq-zZYkTS7fdAjD3ULRhqYTP9Ky2NgVHaXSLppDidw,11751
10
+ natural_pdf/analyzers/layout/layout_analyzer.py,sha256=n327Zjuf7aSzKQKChPHeiCVHinzeDGaWNyKiwQ-DkJk,15571
11
+ natural_pdf/analyzers/layout/layout_manager.py,sha256=RiVq6gUA8t9OLj-HojdzQkJtabM32iBWEBoLtS7_TjY,8115
12
+ natural_pdf/analyzers/layout/layout_options.py,sha256=Jsm4MfD_vedXvS7NCpVmuIRsIuyNyKOjvdgoRYOKZpI,4133
13
+ natural_pdf/analyzers/layout/paddle.py,sha256=gTI9ZqNd5-t4H5IByGfL32WgcE6JrdchW6jRiGI6ulM,13375
14
+ natural_pdf/analyzers/layout/pdfplumber_table_finder.py,sha256=Tk0Q7wv7nGYPo69lh6RoezjdepTnMl90SaNIrP29Pwc,5902
15
+ natural_pdf/analyzers/layout/surya.py,sha256=4RdnhRxSS3i3Ns5mFhOA9-P0xd7Ms19uZuKvUGQfEBI,9789
16
+ natural_pdf/analyzers/layout/tatr.py,sha256=cVr0ZyhY2mNLAKZ4DGMm-b7XNJpILKh8x8ZpyDeUhLk,15032
17
+ natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
18
+ natural_pdf/classification/manager.py,sha256=CvZd3-lN3fEhcaLXr8gYfrdBGoBgzkIeE14EqjrOAzU,17730
19
+ natural_pdf/classification/mixin.py,sha256=llari9AIMNGy9sTaR7y1g5vtVNUwuCutbKnjbJRMYx4,6903
20
+ natural_pdf/classification/results.py,sha256=Ia26BQxObL5sURpFmg66bfjFPCxjcO_jeP2G-S9wRgo,2289
21
+ natural_pdf/collections/mixins.py,sha256=ufetdzHmd2_WLGBPW4eBQrzZTFpjXyVsVwBquIE47zw,4476
22
+ natural_pdf/collections/pdf_collection.py,sha256=JnsJugE-vxYsW1ZJWmMlVv_jbyG37X-9rZK1RQyKWAY,30020
23
+ natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
24
+ natural_pdf/core/element_manager.py,sha256=knRN6qXxV-6KZCj2GUOyiqRi83DjJzL77TmKGeiD08Y,25144
25
+ natural_pdf/core/highlighting_service.py,sha256=wINdRxq63_CYYA81EwuCRqhNKimn0dNKyoKWuzkirc0,31959
26
+ natural_pdf/core/page.py,sha256=icJLu6jRbkD3iOE8r60XPkQZ8FN3ZcKo5TT5MVGkGl0,105122
27
+ natural_pdf/core/pdf.py,sha256=Vw-L5149wO6RSfvb9sAfPDLqd9M1TdYoPHNEePh65y8,61201
28
+ natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
29
+ natural_pdf/elements/base.py,sha256=7vVCPQyEHifh4LyBuv0kLTqr_gNbbEMc4SoiJmLfEUQ,37585
30
+ natural_pdf/elements/collections.py,sha256=YRaJxNbJrBjgwzwuSoOtEotOKh6RaTi7NRCqKiGl514,92955
31
+ natural_pdf/elements/line.py,sha256=7cow3xMUKhAj7zoQz7OaB1eIH2_a8B__LB7iGJ4Mb0o,4612
32
+ natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
33
+ natural_pdf/elements/region.py,sha256=LfyB_9DCw5Tzn_G9xsjFz2FfKBOHRqGIND4DQWoA7KM,97324
34
+ natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
35
+ natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
36
+ natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
37
+ natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
38
+ natural_pdf/exporters/paddleocr.py,sha256=BYpdtJI7S8rBkI2dkRESx2epVAZOTfzqU-rjJnUQ5jQ,16249
39
+ natural_pdf/exporters/searchable_pdf.py,sha256=qsaPsnbOOaZHA_aplfZbwQnBoK9KghWm-wzbyRRomeY,16859
40
+ natural_pdf/extraction/manager.py,sha256=mUBbfgLG5Pl31wmajXwyipdEJb_dZ5I-y8GnWw7IzGo,4969
41
+ natural_pdf/extraction/mixin.py,sha256=eKbr70VibpbtfjvCE80lTFuYHzq_BoVtOHjznL_GMRA,11719
42
+ natural_pdf/extraction/result.py,sha256=c1vLguCR6l95cvg-BJJmZvL_MPg2McJaczge55bKZMg,934
43
+ natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
44
+ natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
45
+ natural_pdf/ocr/engine_doctr.py,sha256=519WpvSHgwP6Hv24tci_YHFX7XPlaxOnlREN_YG-Yys,16331
46
+ natural_pdf/ocr/engine_easyocr.py,sha256=9TbxJjmhWFrzM8mcNnZjoRtIDr6gwpuwKm4-Zfub2-8,9281
47
+ natural_pdf/ocr/engine_paddle.py,sha256=2nIrvLBBAiZG1BxVo3eFVJulA6YGoOTXw_RN98p_BUk,6184
48
+ natural_pdf/ocr/engine_surya.py,sha256=iySjG-Dahgh0cLICfbMtOcwUpRFcZjo-5Ed5Zwz-o5Y,4805
49
+ natural_pdf/ocr/ocr_factory.py,sha256=gBFXdFs7E4aCynHz06sQsAhaO3s8yhgoFgN5nyxtg9c,5221
50
+ natural_pdf/ocr/ocr_manager.py,sha256=f0q68ynGYVPkF4D3WnufxmHWD5R1jW5Z_1czTEi9JVU,13931
51
+ natural_pdf/ocr/ocr_options.py,sha256=ZvtnFn1kPkFEoWveQ13uy6B-ofquP0gHEi4tBHrjqCE,6438
52
+ natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
53
+ natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
54
+ natural_pdf/qa/document_qa.py,sha256=Jw4yyq3Vifn57D0ANmOfUlZeG8CJjBkItZBV-8ZAmos,15111
55
+ natural_pdf/search/__init__.py,sha256=gdGlW3kTCw87iXMwcIesbLkUsnv5UKJmF-_1ZMR0pfQ,3339
56
+ natural_pdf/search/haystack_search_service.py,sha256=UHr2UWNBetG3MZ1n_1LnV9oUe5fC-rY9p-V0j00JjQM,30339
57
+ natural_pdf/search/haystack_utils.py,sha256=6Hv5DeLSF4AVDrB_aFJZGB3XpSCLQ45dXLKEd4yG2tU,18978
58
+ natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzPkK0a8QA,3566
59
+ natural_pdf/search/search_service_protocol.py,sha256=Dl-Q-CrutkhZwI69scbW9EWPeYM63qxB60_EA7YqIYo,6699
60
+ natural_pdf/search/searchable_mixin.py,sha256=M2a6FaFVM0vcfh7FgjDH6BLhS-7ggeVpcfft4OOBDxY,26390
61
+ natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
62
+ natural_pdf/selectors/parser.py,sha256=oI3ezkB6sIyrq_nLJrbaBaBZktXwEp_HG_gKQlVSVcs,24447
63
+ natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
64
+ natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
65
+ natural_pdf/utils/debug.py,sha256=RN7H3E6ph-GtxubCW6psW7TO8o2BxcNLiEzByTVR9fk,995
66
+ natural_pdf/utils/highlighting.py,sha256=EIY6ihVGtUTS_DjWyxpnr_UXpcR4btC1KhSGQ9VUfKg,698
67
+ natural_pdf/utils/identifiers.py,sha256=P7n6owcubnF8oAMa_UfYtENmIaJQdH_AMC9Jbs2bWXo,1117
68
+ natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,215
69
+ natural_pdf/utils/packaging.py,sha256=Jshxp6S1zfcqoZmFhdd7WOpL--b6rBSz-Y9mYqELXIY,21581
70
+ natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
71
+ natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9YDmfXWL4,9605
72
+ natural_pdf/utils/tqdm_utils.py,sha256=wV3RXvqog26eWEFEqjt2LkGnLswmO1GXaVGSqgS7tAY,1601
73
+ natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
74
+ natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
75
+ natural_pdf/widgets/viewer.py,sha256=dC_hlPlosc08gsDc3bdAa8chOKtAoH9QFU6mrGOG9vE,39532
76
+ natural_pdf-0.1.9.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
77
+ natural_pdf-0.1.9.dist-info/METADATA,sha256=10GX2Qesem-n8sPem4lls2EEQen4KyJVdcmQf1mt9mI,7400
78
+ natural_pdf-0.1.9.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
79
+ natural_pdf-0.1.9.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
80
+ natural_pdf-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (80.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,4 +1,2 @@
1
- docs
2
1
  natural_pdf
3
- notebooks
4
2
  pdfs