natural-pdf 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. natural_pdf/__init__.py +1 -0
  2. natural_pdf/analyzers/layout/base.py +1 -5
  3. natural_pdf/analyzers/layout/gemini.py +61 -51
  4. natural_pdf/analyzers/layout/layout_analyzer.py +40 -11
  5. natural_pdf/analyzers/layout/layout_manager.py +26 -84
  6. natural_pdf/analyzers/layout/layout_options.py +7 -0
  7. natural_pdf/analyzers/layout/pdfplumber_table_finder.py +142 -0
  8. natural_pdf/analyzers/layout/surya.py +46 -123
  9. natural_pdf/analyzers/layout/tatr.py +51 -4
  10. natural_pdf/analyzers/text_structure.py +3 -5
  11. natural_pdf/analyzers/utils.py +3 -3
  12. natural_pdf/classification/manager.py +230 -151
  13. natural_pdf/classification/mixin.py +49 -35
  14. natural_pdf/classification/results.py +64 -46
  15. natural_pdf/collections/mixins.py +68 -20
  16. natural_pdf/collections/pdf_collection.py +177 -64
  17. natural_pdf/core/element_manager.py +30 -14
  18. natural_pdf/core/highlighting_service.py +13 -22
  19. natural_pdf/core/page.py +423 -101
  20. natural_pdf/core/pdf.py +633 -190
  21. natural_pdf/elements/base.py +134 -40
  22. natural_pdf/elements/collections.py +503 -131
  23. natural_pdf/elements/region.py +659 -90
  24. natural_pdf/elements/text.py +1 -1
  25. natural_pdf/export/mixin.py +137 -0
  26. natural_pdf/exporters/base.py +3 -3
  27. natural_pdf/exporters/paddleocr.py +4 -3
  28. natural_pdf/extraction/manager.py +50 -49
  29. natural_pdf/extraction/mixin.py +90 -57
  30. natural_pdf/extraction/result.py +9 -23
  31. natural_pdf/ocr/__init__.py +5 -5
  32. natural_pdf/ocr/engine_doctr.py +346 -0
  33. natural_pdf/ocr/ocr_factory.py +24 -4
  34. natural_pdf/ocr/ocr_manager.py +61 -25
  35. natural_pdf/ocr/ocr_options.py +70 -10
  36. natural_pdf/ocr/utils.py +6 -4
  37. natural_pdf/search/__init__.py +20 -34
  38. natural_pdf/search/haystack_search_service.py +309 -265
  39. natural_pdf/search/haystack_utils.py +99 -75
  40. natural_pdf/search/search_service_protocol.py +11 -12
  41. natural_pdf/selectors/parser.py +219 -143
  42. natural_pdf/utils/debug.py +3 -3
  43. natural_pdf/utils/identifiers.py +1 -1
  44. natural_pdf/utils/locks.py +1 -1
  45. natural_pdf/utils/packaging.py +8 -6
  46. natural_pdf/utils/text_extraction.py +24 -16
  47. natural_pdf/utils/tqdm_utils.py +18 -10
  48. natural_pdf/utils/visualization.py +18 -0
  49. natural_pdf/widgets/viewer.py +4 -25
  50. {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.9.dist-info}/METADATA +12 -3
  51. natural_pdf-0.1.9.dist-info/RECORD +80 -0
  52. {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.9.dist-info}/WHEEL +1 -1
  53. {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.9.dist-info}/top_level.txt +0 -2
  54. docs/api/index.md +0 -386
  55. docs/assets/favicon.png +0 -3
  56. docs/assets/favicon.svg +0 -3
  57. docs/assets/javascripts/custom.js +0 -17
  58. docs/assets/logo.svg +0 -3
  59. docs/assets/sample-screen.png +0 -0
  60. docs/assets/social-preview.png +0 -17
  61. docs/assets/social-preview.svg +0 -17
  62. docs/assets/stylesheets/custom.css +0 -65
  63. docs/categorizing-documents/index.md +0 -168
  64. docs/data-extraction/index.md +0 -87
  65. docs/document-qa/index.ipynb +0 -435
  66. docs/document-qa/index.md +0 -79
  67. docs/element-selection/index.ipynb +0 -969
  68. docs/element-selection/index.md +0 -249
  69. docs/finetuning/index.md +0 -176
  70. docs/index.md +0 -189
  71. docs/installation/index.md +0 -69
  72. docs/interactive-widget/index.ipynb +0 -962
  73. docs/interactive-widget/index.md +0 -12
  74. docs/layout-analysis/index.ipynb +0 -818
  75. docs/layout-analysis/index.md +0 -185
  76. docs/ocr/index.md +0 -256
  77. docs/pdf-navigation/index.ipynb +0 -314
  78. docs/pdf-navigation/index.md +0 -97
  79. docs/regions/index.ipynb +0 -816
  80. docs/regions/index.md +0 -294
  81. docs/tables/index.ipynb +0 -658
  82. docs/tables/index.md +0 -144
  83. docs/text-analysis/index.ipynb +0 -370
  84. docs/text-analysis/index.md +0 -105
  85. docs/text-extraction/index.ipynb +0 -1478
  86. docs/text-extraction/index.md +0 -292
  87. docs/tutorials/01-loading-and-extraction.ipynb +0 -1873
  88. docs/tutorials/01-loading-and-extraction.md +0 -95
  89. docs/tutorials/02-finding-elements.ipynb +0 -417
  90. docs/tutorials/02-finding-elements.md +0 -149
  91. docs/tutorials/03-extracting-blocks.ipynb +0 -152
  92. docs/tutorials/03-extracting-blocks.md +0 -48
  93. docs/tutorials/04-table-extraction.ipynb +0 -119
  94. docs/tutorials/04-table-extraction.md +0 -50
  95. docs/tutorials/05-excluding-content.ipynb +0 -275
  96. docs/tutorials/05-excluding-content.md +0 -109
  97. docs/tutorials/06-document-qa.ipynb +0 -337
  98. docs/tutorials/06-document-qa.md +0 -91
  99. docs/tutorials/07-layout-analysis.ipynb +0 -293
  100. docs/tutorials/07-layout-analysis.md +0 -66
  101. docs/tutorials/07-working-with-regions.ipynb +0 -414
  102. docs/tutorials/07-working-with-regions.md +0 -151
  103. docs/tutorials/08-spatial-navigation.ipynb +0 -513
  104. docs/tutorials/08-spatial-navigation.md +0 -190
  105. docs/tutorials/09-section-extraction.ipynb +0 -2439
  106. docs/tutorials/09-section-extraction.md +0 -256
  107. docs/tutorials/10-form-field-extraction.ipynb +0 -517
  108. docs/tutorials/10-form-field-extraction.md +0 -201
  109. docs/tutorials/11-enhanced-table-processing.ipynb +0 -59
  110. docs/tutorials/11-enhanced-table-processing.md +0 -9
  111. docs/tutorials/12-ocr-integration.ipynb +0 -3712
  112. docs/tutorials/12-ocr-integration.md +0 -137
  113. docs/tutorials/13-semantic-search.ipynb +0 -1718
  114. docs/tutorials/13-semantic-search.md +0 -77
  115. docs/visual-debugging/index.ipynb +0 -2970
  116. docs/visual-debugging/index.md +0 -157
  117. docs/visual-debugging/region.png +0 -0
  118. natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -420
  119. natural_pdf/templates/spa/css/style.css +0 -334
  120. natural_pdf/templates/spa/index.html +0 -31
  121. natural_pdf/templates/spa/js/app.js +0 -472
  122. natural_pdf/templates/spa/words.txt +0 -235976
  123. natural_pdf/widgets/frontend/viewer.js +0 -88
  124. natural_pdf-0.1.8.dist-info/RECORD +0 -156
  125. notebooks/Examples.ipynb +0 -1293
  126. pdfs/.gitkeep +0 -0
  127. pdfs/01-practice.pdf +0 -543
  128. pdfs/0500000US42001.pdf +0 -0
  129. pdfs/0500000US42007.pdf +0 -0
  130. pdfs/2014 Statistics.pdf +0 -0
  131. pdfs/2019 Statistics.pdf +0 -0
  132. pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  133. pdfs/needs-ocr.pdf +0 -0
  134. {natural_pdf-0.1.8.dist-info → natural_pdf-0.1.9.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,12 @@
2
2
  import logging
3
3
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
4
4
 
5
- from pdfplumber.utils.geometry import get_bbox_overlap, merge_bboxes, objects_to_bbox, cluster_objects
5
+ from pdfplumber.utils.geometry import (
6
+ cluster_objects,
7
+ get_bbox_overlap,
8
+ merge_bboxes,
9
+ objects_to_bbox,
10
+ )
6
11
  from pdfplumber.utils.text import TEXTMAP_KWARGS, WORD_EXTRACTOR_KWARGS, chars_to_textmap
7
12
 
8
13
  if TYPE_CHECKING:
@@ -19,23 +24,25 @@ def _get_layout_kwargs(
19
24
  Prepares the keyword arguments for pdfplumber's chars_to_textmap based
20
25
  on defaults, context bbox, and allowed user overrides.
21
26
  """
22
- # 1. Start with an empty dict for layout kwargs
27
+ # 1. Start with an empty dict for layout kwargs
23
28
  layout_kwargs = {}
24
-
29
+
25
30
  # Build allowed keys set without trying to copy the constants
26
31
  allowed_keys = set(TEXTMAP_KWARGS) | set(WORD_EXTRACTOR_KWARGS)
27
32
 
28
33
  # Add common, well-known default values
29
- layout_kwargs.update({
30
- 'x_tolerance': 5,
31
- 'y_tolerance': 5,
32
- 'x_density': 7.25,
33
- 'y_density': 13,
34
- 'mode': 'box',
35
- 'min_words_vertical': 1,
36
- 'min_words_horizontal': 1,
37
- })
38
-
34
+ layout_kwargs.update(
35
+ {
36
+ "x_tolerance": 5,
37
+ "y_tolerance": 5,
38
+ "x_density": 7.25,
39
+ "y_density": 13,
40
+ "mode": "box",
41
+ "min_words_vertical": 1,
42
+ "min_words_horizontal": 1,
43
+ }
44
+ )
45
+
39
46
  # 2. Apply context if provided
40
47
  if layout_context_bbox:
41
48
  ctx_x0, ctx_top, ctx_x1, ctx_bottom = layout_context_bbox
@@ -51,17 +58,18 @@ def _get_layout_kwargs(
51
58
  for key, value in user_kwargs.items():
52
59
  if key in allowed_keys:
53
60
  layout_kwargs[key] = value
54
- elif key == 'layout': # Always allow layout flag
61
+ elif key == "layout": # Always allow layout flag
55
62
  layout_kwargs[key] = value
56
63
  else:
57
64
  logger.warning(f"Ignoring unsupported layout keyword argument: '{key}'")
58
65
 
59
66
  # 4. Ensure layout flag is present, defaulting to True
60
- if 'layout' not in layout_kwargs:
61
- layout_kwargs['layout'] = True
67
+ if "layout" not in layout_kwargs:
68
+ layout_kwargs["layout"] = True
62
69
 
63
70
  return layout_kwargs
64
71
 
72
+
65
73
  def filter_chars_spatially(
66
74
  char_dicts: List[Dict[str, Any]],
67
75
  exclusion_regions: List["Region"],
@@ -1,5 +1,5 @@
1
- import sys
2
1
  import os
2
+ import sys
3
3
 
4
4
  # Default to standard tqdm
5
5
  try:
@@ -10,34 +10,42 @@ except ImportError:
10
10
  iterable = args[0] if args else None
11
11
  if iterable:
12
12
  return iterable
13
- return None # Simple passthrough if no iterable
13
+ return None # Simple passthrough if no iterable
14
+
14
15
 
15
16
  # Try to detect notebook environment
16
17
  try:
17
18
  # Check 1: Are we running in an IPython kernel?
18
19
  from IPython import get_ipython
20
+
19
21
  ipython = get_ipython()
20
- if ipython and 'IPKernelApp' in ipython.config:
22
+ if ipython and "IPKernelApp" in ipython.config:
21
23
  # Check 2: Is it likely a notebook UI (Jupyter Notebook/Lab, VSCode, etc.)?
22
24
  # This checks for common indicators. Might not be foolproof.
23
- if 'VSCODE_PID' in os.environ or ('ipykernel' in sys.modules and 'spyder' not in sys.modules):
24
- # Check 3: Can we import notebook version?
25
+ if "VSCODE_PID" in os.environ or (
26
+ "ipykernel" in sys.modules and "spyder" not in sys.modules
27
+ ):
28
+ # Check 3: Can we import notebook version?
25
29
  try:
26
30
  from tqdm.notebook import tqdm as notebook_tqdm
27
- selected_tqdm = notebook_tqdm # Use notebook version
31
+
32
+ selected_tqdm = notebook_tqdm # Use notebook version
28
33
  except ImportError:
29
- pass # Stick with std if notebook version missing
34
+ pass # Stick with std if notebook version missing
30
35
  except ImportError:
31
- pass # Stick with std if IPython not available
36
+ pass # Stick with std if IPython not available
37
+
32
38
 
33
39
  def get_tqdm():
34
40
  """Returns the tqdm class best suited for the detected environment."""
35
41
  return selected_tqdm
36
42
 
43
+
37
44
  # Example usage (for testing):
38
- if __name__ == '__main__':
45
+ if __name__ == "__main__":
39
46
  import time
47
+
40
48
  tqdm_instance = get_tqdm()
41
49
  print(f"Using tqdm class: {tqdm_instance}")
42
50
  for i in tqdm_instance(range(10), desc="Testing tqdm"):
43
- time.sleep(0.1)
51
+ time.sleep(0.1)
@@ -8,6 +8,7 @@ import math
8
8
  import random
9
9
  from typing import Any, Dict, List, Optional, Set, Tuple, Union
10
10
 
11
+ import pypdfium2
11
12
  from PIL import Image, ImageDraw, ImageFont
12
13
 
13
14
  # Define a base list of visually distinct colors for highlighting
@@ -193,6 +194,7 @@ def merge_images_with_legend(
193
194
  return image # Return original image if legend is None or empty
194
195
 
195
196
  bg_color = (255, 255, 255, 255) # Always use white for the merged background
197
+ bg_color = (255, 255, 255, 255) # Always use white for the merged background
196
198
 
197
199
  if position == "right":
198
200
  # Create a new image with extra width for the legend
@@ -230,3 +232,19 @@ def merge_images_with_legend(
230
232
  merged = image
231
233
 
232
234
  return merged
235
+
236
+
237
+ def render_plain_page(page, resolution):
238
+ doc = pypdfium2.PdfDocument(page._page.pdf.stream)
239
+
240
+ pdf_page = doc[page.index]
241
+
242
+ bitmap = pdf_page.render(
243
+ scale=resolution / 72,
244
+ )
245
+ image = bitmap.to_pil().convert("RGB")
246
+
247
+ pdf_page.close()
248
+ doc.close()
249
+
250
+ return image
@@ -3,6 +3,8 @@
3
3
  import logging
4
4
  import os
5
5
 
6
+ from natural_pdf.utils.visualization import render_plain_page
7
+
6
8
  logger = logging.getLogger(__name__)
7
9
 
8
10
  # Initialize flag and module/class variables to None
@@ -615,31 +617,7 @@ try:
615
617
 
616
618
  from PIL import Image # Ensure Image is imported
617
619
 
618
- # Render page to image using the correct method and parameter
619
- scale = 1.0 # Define scale factor used for rendering
620
- try:
621
- img_object = page.to_image(resolution=int(72 * scale)) # Call to_image
622
- # Check if .original attribute exists, otherwise assume img_object is the PIL Image
623
- if hasattr(img_object, "original") and isinstance(img_object.original, Image.Image):
624
- img = img_object.original
625
- elif isinstance(img_object, Image.Image):
626
- img = img_object
627
- else:
628
- # If it's neither, maybe it's the raw bytes? Try opening it.
629
- try:
630
- img = Image.open(BytesIO(img_object)).convert("RGB")
631
- except Exception:
632
- raise TypeError(
633
- f"page.to_image() returned unexpected type: {type(img_object)}"
634
- )
635
- logger.debug(f"Successfully rendered page {page.index} using to_image()")
636
- except Exception as render_err:
637
- logger.error(
638
- f"Error rendering page {page.index} image for widget: {render_err}",
639
- exc_info=True,
640
- )
641
- # Return None or raise the error? Let's raise for now to make it clear.
642
- raise ValueError(f"Failed to render page image: {render_err}") from render_err
620
+ img = render_plain_page(page, resolution=72)
643
621
 
644
622
  buffered = BytesIO()
645
623
  img.save(buffered, format="PNG")
@@ -687,6 +665,7 @@ try:
687
665
  original_y1 = element.bottom
688
666
  width = element.width
689
667
  height = element.height
668
+ scale = 1.0
690
669
 
691
670
  # Base element dict with required info
692
671
  elem_dict = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -17,11 +17,13 @@ Requires-Dist: colour
17
17
  Requires-Dist: numpy
18
18
  Requires-Dist: urllib3
19
19
  Requires-Dist: tqdm
20
+ Requires-Dist: pydantic
20
21
  Provides-Extra: interactive
21
22
  Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
22
23
  Provides-Extra: haystack
23
24
  Requires-Dist: haystack-ai; extra == "haystack"
24
- Requires-Dist: chroma-haystack; extra == "haystack"
25
+ Requires-Dist: lancedb-haystack; extra == "haystack"
26
+ Requires-Dist: lancedb; extra == "haystack"
25
27
  Requires-Dist: sentence-transformers; extra == "haystack"
26
28
  Requires-Dist: natural-pdf[core-ml]; extra == "haystack"
27
29
  Provides-Extra: easyocr
@@ -36,6 +38,9 @@ Requires-Dist: natural-pdf[core-ml]; extra == "layout-yolo"
36
38
  Provides-Extra: surya
37
39
  Requires-Dist: surya-ocr; extra == "surya"
38
40
  Requires-Dist: natural-pdf[core-ml]; extra == "surya"
41
+ Provides-Extra: doctr
42
+ Requires-Dist: python-doctr[torch]; extra == "doctr"
43
+ Requires-Dist: natural-pdf[core-ml]; extra == "doctr"
39
44
  Provides-Extra: qa
40
45
  Requires-Dist: natural-pdf[core-ml]; extra == "qa"
41
46
  Provides-Extra: docling
@@ -43,7 +48,6 @@ Requires-Dist: docling; extra == "docling"
43
48
  Requires-Dist: natural-pdf[core-ml]; extra == "docling"
44
49
  Provides-Extra: llm
45
50
  Requires-Dist: openai>=1.0; extra == "llm"
46
- Requires-Dist: pydantic; extra == "llm"
47
51
  Provides-Extra: classification
48
52
  Requires-Dist: sentence-transformers; extra == "classification"
49
53
  Requires-Dist: timm; extra == "classification"
@@ -63,6 +67,9 @@ Requires-Dist: pipdeptree; extra == "dev"
63
67
  Requires-Dist: nbformat; extra == "dev"
64
68
  Requires-Dist: jupytext; extra == "dev"
65
69
  Requires-Dist: nbclient; extra == "dev"
70
+ Provides-Extra: deskew
71
+ Requires-Dist: deskew>=1.5; extra == "deskew"
72
+ Requires-Dist: img2pdf; extra == "deskew"
66
73
  Provides-Extra: all
67
74
  Requires-Dist: natural-pdf[interactive]; extra == "all"
68
75
  Requires-Dist: natural-pdf[haystack]; extra == "all"
@@ -70,11 +77,13 @@ Requires-Dist: natural-pdf[easyocr]; extra == "all"
70
77
  Requires-Dist: natural-pdf[paddle]; extra == "all"
71
78
  Requires-Dist: natural-pdf[layout_yolo]; extra == "all"
72
79
  Requires-Dist: natural-pdf[surya]; extra == "all"
80
+ Requires-Dist: natural-pdf[doctr]; extra == "all"
73
81
  Requires-Dist: natural-pdf[qa]; extra == "all"
74
82
  Requires-Dist: natural-pdf[ocr-export]; extra == "all"
75
83
  Requires-Dist: natural-pdf[docling]; extra == "all"
76
84
  Requires-Dist: natural-pdf[llm]; extra == "all"
77
85
  Requires-Dist: natural-pdf[classification]; extra == "all"
86
+ Requires-Dist: natural-pdf[deskew]; extra == "all"
78
87
  Requires-Dist: natural-pdf[test]; extra == "all"
79
88
  Provides-Extra: core-ml
80
89
  Requires-Dist: torch; extra == "core-ml"
@@ -0,0 +1,80 @@
1
+ natural_pdf/__init__.py,sha256=LBrQcFOGooaUsTSAk6zrPCQqu0IM-ClvJLasexEk64k,2728
2
+ natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
3
+ natural_pdf/analyzers/text_options.py,sha256=nE2E1pp4psDPpxmtarvNtEQsgozPkyFRjv0TVP2HTyU,2865
4
+ natural_pdf/analyzers/text_structure.py,sha256=Uhxc7aYB1jddkiwRTEPOg_Te2HfOua4z_OtgP1m3org,12794
5
+ natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
6
+ natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
7
+ natural_pdf/analyzers/layout/base.py,sha256=bYawhmc_0xqKG-xbxUSiazIU1om-aBox5Jh8qDqv-eM,6451
8
+ natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
9
+ natural_pdf/analyzers/layout/gemini.py,sha256=iuq-zZYkTS7fdAjD3ULRhqYTP9Ky2NgVHaXSLppDidw,11751
10
+ natural_pdf/analyzers/layout/layout_analyzer.py,sha256=n327Zjuf7aSzKQKChPHeiCVHinzeDGaWNyKiwQ-DkJk,15571
11
+ natural_pdf/analyzers/layout/layout_manager.py,sha256=RiVq6gUA8t9OLj-HojdzQkJtabM32iBWEBoLtS7_TjY,8115
12
+ natural_pdf/analyzers/layout/layout_options.py,sha256=Jsm4MfD_vedXvS7NCpVmuIRsIuyNyKOjvdgoRYOKZpI,4133
13
+ natural_pdf/analyzers/layout/paddle.py,sha256=gTI9ZqNd5-t4H5IByGfL32WgcE6JrdchW6jRiGI6ulM,13375
14
+ natural_pdf/analyzers/layout/pdfplumber_table_finder.py,sha256=Tk0Q7wv7nGYPo69lh6RoezjdepTnMl90SaNIrP29Pwc,5902
15
+ natural_pdf/analyzers/layout/surya.py,sha256=4RdnhRxSS3i3Ns5mFhOA9-P0xd7Ms19uZuKvUGQfEBI,9789
16
+ natural_pdf/analyzers/layout/tatr.py,sha256=cVr0ZyhY2mNLAKZ4DGMm-b7XNJpILKh8x8ZpyDeUhLk,15032
17
+ natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
18
+ natural_pdf/classification/manager.py,sha256=CvZd3-lN3fEhcaLXr8gYfrdBGoBgzkIeE14EqjrOAzU,17730
19
+ natural_pdf/classification/mixin.py,sha256=llari9AIMNGy9sTaR7y1g5vtVNUwuCutbKnjbJRMYx4,6903
20
+ natural_pdf/classification/results.py,sha256=Ia26BQxObL5sURpFmg66bfjFPCxjcO_jeP2G-S9wRgo,2289
21
+ natural_pdf/collections/mixins.py,sha256=ufetdzHmd2_WLGBPW4eBQrzZTFpjXyVsVwBquIE47zw,4476
22
+ natural_pdf/collections/pdf_collection.py,sha256=JnsJugE-vxYsW1ZJWmMlVv_jbyG37X-9rZK1RQyKWAY,30020
23
+ natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
24
+ natural_pdf/core/element_manager.py,sha256=knRN6qXxV-6KZCj2GUOyiqRi83DjJzL77TmKGeiD08Y,25144
25
+ natural_pdf/core/highlighting_service.py,sha256=wINdRxq63_CYYA81EwuCRqhNKimn0dNKyoKWuzkirc0,31959
26
+ natural_pdf/core/page.py,sha256=icJLu6jRbkD3iOE8r60XPkQZ8FN3ZcKo5TT5MVGkGl0,105122
27
+ natural_pdf/core/pdf.py,sha256=Vw-L5149wO6RSfvb9sAfPDLqd9M1TdYoPHNEePh65y8,61201
28
+ natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
29
+ natural_pdf/elements/base.py,sha256=7vVCPQyEHifh4LyBuv0kLTqr_gNbbEMc4SoiJmLfEUQ,37585
30
+ natural_pdf/elements/collections.py,sha256=YRaJxNbJrBjgwzwuSoOtEotOKh6RaTi7NRCqKiGl514,92955
31
+ natural_pdf/elements/line.py,sha256=7cow3xMUKhAj7zoQz7OaB1eIH2_a8B__LB7iGJ4Mb0o,4612
32
+ natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
33
+ natural_pdf/elements/region.py,sha256=LfyB_9DCw5Tzn_G9xsjFz2FfKBOHRqGIND4DQWoA7KM,97324
34
+ natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
35
+ natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
36
+ natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
37
+ natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
38
+ natural_pdf/exporters/paddleocr.py,sha256=BYpdtJI7S8rBkI2dkRESx2epVAZOTfzqU-rjJnUQ5jQ,16249
39
+ natural_pdf/exporters/searchable_pdf.py,sha256=qsaPsnbOOaZHA_aplfZbwQnBoK9KghWm-wzbyRRomeY,16859
40
+ natural_pdf/extraction/manager.py,sha256=mUBbfgLG5Pl31wmajXwyipdEJb_dZ5I-y8GnWw7IzGo,4969
41
+ natural_pdf/extraction/mixin.py,sha256=eKbr70VibpbtfjvCE80lTFuYHzq_BoVtOHjznL_GMRA,11719
42
+ natural_pdf/extraction/result.py,sha256=c1vLguCR6l95cvg-BJJmZvL_MPg2McJaczge55bKZMg,934
43
+ natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
44
+ natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
45
+ natural_pdf/ocr/engine_doctr.py,sha256=519WpvSHgwP6Hv24tci_YHFX7XPlaxOnlREN_YG-Yys,16331
46
+ natural_pdf/ocr/engine_easyocr.py,sha256=9TbxJjmhWFrzM8mcNnZjoRtIDr6gwpuwKm4-Zfub2-8,9281
47
+ natural_pdf/ocr/engine_paddle.py,sha256=2nIrvLBBAiZG1BxVo3eFVJulA6YGoOTXw_RN98p_BUk,6184
48
+ natural_pdf/ocr/engine_surya.py,sha256=iySjG-Dahgh0cLICfbMtOcwUpRFcZjo-5Ed5Zwz-o5Y,4805
49
+ natural_pdf/ocr/ocr_factory.py,sha256=gBFXdFs7E4aCynHz06sQsAhaO3s8yhgoFgN5nyxtg9c,5221
50
+ natural_pdf/ocr/ocr_manager.py,sha256=f0q68ynGYVPkF4D3WnufxmHWD5R1jW5Z_1czTEi9JVU,13931
51
+ natural_pdf/ocr/ocr_options.py,sha256=ZvtnFn1kPkFEoWveQ13uy6B-ofquP0gHEi4tBHrjqCE,6438
52
+ natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
53
+ natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
54
+ natural_pdf/qa/document_qa.py,sha256=Jw4yyq3Vifn57D0ANmOfUlZeG8CJjBkItZBV-8ZAmos,15111
55
+ natural_pdf/search/__init__.py,sha256=gdGlW3kTCw87iXMwcIesbLkUsnv5UKJmF-_1ZMR0pfQ,3339
56
+ natural_pdf/search/haystack_search_service.py,sha256=UHr2UWNBetG3MZ1n_1LnV9oUe5fC-rY9p-V0j00JjQM,30339
57
+ natural_pdf/search/haystack_utils.py,sha256=6Hv5DeLSF4AVDrB_aFJZGB3XpSCLQ45dXLKEd4yG2tU,18978
58
+ natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzPkK0a8QA,3566
59
+ natural_pdf/search/search_service_protocol.py,sha256=Dl-Q-CrutkhZwI69scbW9EWPeYM63qxB60_EA7YqIYo,6699
60
+ natural_pdf/search/searchable_mixin.py,sha256=M2a6FaFVM0vcfh7FgjDH6BLhS-7ggeVpcfft4OOBDxY,26390
61
+ natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
62
+ natural_pdf/selectors/parser.py,sha256=oI3ezkB6sIyrq_nLJrbaBaBZktXwEp_HG_gKQlVSVcs,24447
63
+ natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
64
+ natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
65
+ natural_pdf/utils/debug.py,sha256=RN7H3E6ph-GtxubCW6psW7TO8o2BxcNLiEzByTVR9fk,995
66
+ natural_pdf/utils/highlighting.py,sha256=EIY6ihVGtUTS_DjWyxpnr_UXpcR4btC1KhSGQ9VUfKg,698
67
+ natural_pdf/utils/identifiers.py,sha256=P7n6owcubnF8oAMa_UfYtENmIaJQdH_AMC9Jbs2bWXo,1117
68
+ natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,215
69
+ natural_pdf/utils/packaging.py,sha256=Jshxp6S1zfcqoZmFhdd7WOpL--b6rBSz-Y9mYqELXIY,21581
70
+ natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
71
+ natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9YDmfXWL4,9605
72
+ natural_pdf/utils/tqdm_utils.py,sha256=wV3RXvqog26eWEFEqjt2LkGnLswmO1GXaVGSqgS7tAY,1601
73
+ natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
74
+ natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
75
+ natural_pdf/widgets/viewer.py,sha256=dC_hlPlosc08gsDc3bdAa8chOKtAoH9QFU6mrGOG9vE,39532
76
+ natural_pdf-0.1.9.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
77
+ natural_pdf-0.1.9.dist-info/METADATA,sha256=10GX2Qesem-n8sPem4lls2EEQen4KyJVdcmQf1mt9mI,7400
78
+ natural_pdf-0.1.9.dist-info/WHEEL,sha256=wXxTzcEDnjrTwFYjLPcsW_7_XihufBwmpiBeiXNBGEA,91
79
+ natural_pdf-0.1.9.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
80
+ natural_pdf-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.1)
2
+ Generator: setuptools (80.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,4 +1,2 @@
1
- docs
2
1
  natural_pdf
3
- notebooks
4
2
  pdfs