natural-pdf 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. docs/categorizing-documents/index.md +168 -0
  2. docs/data-extraction/index.md +87 -0
  3. docs/element-selection/index.ipynb +218 -164
  4. docs/element-selection/index.md +20 -0
  5. docs/finetuning/index.md +176 -0
  6. docs/index.md +19 -0
  7. docs/ocr/index.md +63 -16
  8. docs/tutorials/01-loading-and-extraction.ipynb +411 -248
  9. docs/tutorials/02-finding-elements.ipynb +123 -46
  10. docs/tutorials/03-extracting-blocks.ipynb +24 -19
  11. docs/tutorials/04-table-extraction.ipynb +17 -12
  12. docs/tutorials/05-excluding-content.ipynb +37 -32
  13. docs/tutorials/06-document-qa.ipynb +36 -31
  14. docs/tutorials/07-layout-analysis.ipynb +45 -40
  15. docs/tutorials/07-working-with-regions.ipynb +61 -60
  16. docs/tutorials/08-spatial-navigation.ipynb +76 -71
  17. docs/tutorials/09-section-extraction.ipynb +160 -155
  18. docs/tutorials/10-form-field-extraction.ipynb +71 -66
  19. docs/tutorials/11-enhanced-table-processing.ipynb +11 -6
  20. docs/tutorials/12-ocr-integration.ipynb +3420 -312
  21. docs/tutorials/12-ocr-integration.md +68 -106
  22. docs/tutorials/13-semantic-search.ipynb +641 -251
  23. natural_pdf/__init__.py +3 -0
  24. natural_pdf/analyzers/layout/gemini.py +63 -47
  25. natural_pdf/classification/manager.py +343 -0
  26. natural_pdf/classification/mixin.py +149 -0
  27. natural_pdf/classification/results.py +62 -0
  28. natural_pdf/collections/mixins.py +63 -0
  29. natural_pdf/collections/pdf_collection.py +326 -17
  30. natural_pdf/core/element_manager.py +73 -4
  31. natural_pdf/core/page.py +255 -83
  32. natural_pdf/core/pdf.py +385 -367
  33. natural_pdf/elements/base.py +1 -3
  34. natural_pdf/elements/collections.py +279 -49
  35. natural_pdf/elements/region.py +106 -21
  36. natural_pdf/elements/text.py +5 -2
  37. natural_pdf/exporters/__init__.py +4 -0
  38. natural_pdf/exporters/base.py +61 -0
  39. natural_pdf/exporters/paddleocr.py +345 -0
  40. natural_pdf/extraction/manager.py +134 -0
  41. natural_pdf/extraction/mixin.py +246 -0
  42. natural_pdf/extraction/result.py +37 -0
  43. natural_pdf/ocr/__init__.py +16 -8
  44. natural_pdf/ocr/engine.py +46 -30
  45. natural_pdf/ocr/engine_easyocr.py +86 -42
  46. natural_pdf/ocr/engine_paddle.py +39 -28
  47. natural_pdf/ocr/engine_surya.py +32 -16
  48. natural_pdf/ocr/ocr_factory.py +34 -23
  49. natural_pdf/ocr/ocr_manager.py +98 -34
  50. natural_pdf/ocr/ocr_options.py +38 -10
  51. natural_pdf/ocr/utils.py +59 -33
  52. natural_pdf/qa/document_qa.py +0 -4
  53. natural_pdf/selectors/parser.py +363 -238
  54. natural_pdf/templates/finetune/fine_tune_paddleocr.md +420 -0
  55. natural_pdf/utils/debug.py +4 -2
  56. natural_pdf/utils/identifiers.py +9 -5
  57. natural_pdf/utils/locks.py +8 -0
  58. natural_pdf/utils/packaging.py +172 -105
  59. natural_pdf/utils/text_extraction.py +96 -65
  60. natural_pdf/utils/tqdm_utils.py +43 -0
  61. natural_pdf/utils/visualization.py +1 -1
  62. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/METADATA +10 -3
  63. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/RECORD +66 -51
  64. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/WHEEL +1 -1
  65. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/licenses/LICENSE +0 -0
  66. {natural_pdf-0.1.6.dist-info → natural_pdf-0.1.8.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
  import logging
3
3
  from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
4
4
 
5
- from pdfplumber.utils.geometry import get_bbox_overlap, merge_bboxes, objects_to_bbox
5
+ from pdfplumber.utils.geometry import get_bbox_overlap, merge_bboxes, objects_to_bbox, cluster_objects
6
6
  from pdfplumber.utils.text import TEXTMAP_KWARGS, WORD_EXTRACTOR_KWARGS, chars_to_textmap
7
7
 
8
8
  if TYPE_CHECKING:
@@ -11,6 +11,57 @@ if TYPE_CHECKING:
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
13
 
14
+ def _get_layout_kwargs(
15
+ layout_context_bbox: Optional[Tuple[float, float, float, float]] = None,
16
+ user_kwargs: Optional[Dict[str, Any]] = None,
17
+ ) -> Dict[str, Any]:
18
+ """
19
+ Prepares the keyword arguments for pdfplumber's chars_to_textmap based
20
+ on defaults, context bbox, and allowed user overrides.
21
+ """
22
+ # 1. Start with an empty dict for layout kwargs
23
+ layout_kwargs = {}
24
+
25
+ # Build allowed keys set without trying to copy the constants
26
+ allowed_keys = set(TEXTMAP_KWARGS) | set(WORD_EXTRACTOR_KWARGS)
27
+
28
+ # Add common, well-known default values
29
+ layout_kwargs.update({
30
+ 'x_tolerance': 5,
31
+ 'y_tolerance': 5,
32
+ 'x_density': 7.25,
33
+ 'y_density': 13,
34
+ 'mode': 'box',
35
+ 'min_words_vertical': 1,
36
+ 'min_words_horizontal': 1,
37
+ })
38
+
39
+ # 2. Apply context if provided
40
+ if layout_context_bbox:
41
+ ctx_x0, ctx_top, ctx_x1, ctx_bottom = layout_context_bbox
42
+ layout_kwargs["layout_width"] = ctx_x1 - ctx_x0
43
+ layout_kwargs["layout_height"] = ctx_bottom - ctx_top
44
+ layout_kwargs["x_shift"] = ctx_x0
45
+ layout_kwargs["y_shift"] = ctx_top
46
+ # Add layout_bbox itself
47
+ layout_kwargs["layout_bbox"] = layout_context_bbox
48
+
49
+ # 3. Apply user overrides (only for allowed keys)
50
+ if user_kwargs:
51
+ for key, value in user_kwargs.items():
52
+ if key in allowed_keys:
53
+ layout_kwargs[key] = value
54
+ elif key == 'layout': # Always allow layout flag
55
+ layout_kwargs[key] = value
56
+ else:
57
+ logger.warning(f"Ignoring unsupported layout keyword argument: '{key}'")
58
+
59
+ # 4. Ensure layout flag is present, defaulting to True
60
+ if 'layout' not in layout_kwargs:
61
+ layout_kwargs['layout'] = True
62
+
63
+ return layout_kwargs
64
+
14
65
  def filter_chars_spatially(
15
66
  char_dicts: List[Dict[str, Any]],
16
67
  exclusion_regions: List["Region"],
@@ -116,80 +167,60 @@ def filter_chars_spatially(
116
167
 
117
168
  def generate_text_layout(
118
169
  char_dicts: List[Dict[str, Any]],
119
- layout_context_bbox: Tuple[float, float, float, float],
120
- user_kwargs: Dict[str, Any],
170
+ layout_context_bbox: Optional[Tuple[float, float, float, float]] = None,
171
+ user_kwargs: Optional[Dict[str, Any]] = None,
121
172
  ) -> str:
122
173
  """
123
- Takes a list of filtered character dictionaries and generates
124
- text output using pdfplumber's layout engine.
174
+ Generates a string representation of text from character dictionaries,
175
+ attempting to reconstruct layout using pdfplumber's utilities.
125
176
 
126
177
  Args:
127
- char_dicts: The final list of character dictionaries to include.
128
- layout_context_bbox: The bounding box (x0, top, x1, bottom) to use for
129
- calculating default layout width/height/shifts.
130
- user_kwargs: Dictionary of user-provided keyword arguments.
178
+ char_dicts: List of character dictionary objects.
179
+ layout_context_bbox: Optional bounding box for layout context.
180
+ user_kwargs: User-provided kwargs, potentially overriding defaults.
131
181
 
132
182
  Returns:
133
- The formatted text string.
183
+ String representation of the text.
134
184
  """
135
- if not char_dicts:
136
- logger.debug("generate_text_layout: No characters provided.")
137
- return ""
138
-
139
- # Prepare layout kwargs, prioritizing user input
140
- layout_kwargs = {}
141
- allowed_keys = set(WORD_EXTRACTOR_KWARGS) | set(TEXTMAP_KWARGS)
142
- for key, value in user_kwargs.items():
143
- if key in allowed_keys:
144
- layout_kwargs[key] = value
185
+ # --- Filter out invalid char dicts early ---
186
+ initial_count = len(char_dicts)
187
+ valid_char_dicts = [c for c in char_dicts if isinstance(c.get("text"), str)]
188
+ filtered_count = initial_count - len(valid_char_dicts)
189
+ if filtered_count > 0:
190
+ logger.debug(
191
+ f"generate_text_layout: Filtered out {filtered_count} char dicts with non-string/None text."
192
+ )
145
193
 
146
- # Default to layout=True unless explicitly False
147
- use_layout = layout_kwargs.get("layout", True) # Default to layout if called
148
- layout_kwargs["layout"] = use_layout
194
+ if not valid_char_dicts: # Return empty if no valid chars remain
195
+ logger.debug("generate_text_layout: No valid character dicts found after filtering.")
196
+ return ""
149
197
 
150
- if use_layout:
151
- ctx_x0, ctx_top, ctx_x1, ctx_bottom = layout_context_bbox
152
- ctx_width = ctx_x1 - ctx_x0
153
- ctx_height = ctx_bottom - ctx_top
154
-
155
- # Set layout defaults based on context_bbox if not overridden by user
156
- if "layout_bbox" not in layout_kwargs:
157
- layout_kwargs["layout_bbox"] = layout_context_bbox
158
- # Only set default layout_width if neither width specifier is present
159
- if "layout_width_chars" not in layout_kwargs and "layout_width" not in layout_kwargs:
160
- layout_kwargs["layout_width"] = ctx_width
161
- if "layout_height" not in layout_kwargs:
162
- layout_kwargs["layout_height"] = ctx_height
163
- # Adjust shift based on context's top-left corner
164
- if "x_shift" not in layout_kwargs:
165
- layout_kwargs["x_shift"] = ctx_x0
166
- if "y_shift" not in layout_kwargs:
167
- layout_kwargs["y_shift"] = ctx_top
198
+ # Prepare layout arguments
199
+ layout_kwargs = _get_layout_kwargs(layout_context_bbox, user_kwargs)
200
+ use_layout = layout_kwargs.pop("layout", True) # Extract layout flag, default True
168
201
 
169
- logger.debug(
170
- f"generate_text_layout: Calling chars_to_textmap with {len(char_dicts)} chars and kwargs: {layout_kwargs}"
171
- )
172
- try:
173
- # Sort final list by reading order before passing to textmap
174
- # TODO: Make sorting key dynamic based on layout_kwargs directions?
175
- char_dicts.sort(key=lambda c: (c.get("top", 0), c.get("x0", 0)))
176
- textmap = chars_to_textmap(char_dicts, **layout_kwargs)
177
- result = textmap.as_string
178
- except Exception as e:
179
- logger.error(
180
- f"generate_text_layout: Error calling chars_to_textmap: {e}", exc_info=True
181
- )
182
- logger.warning(
183
- "generate_text_layout: Falling back to simple character join due to layout error."
184
- )
185
- # Ensure chars are sorted before fallback join
186
- fallback_chars = sorted(char_dicts, key=lambda c: (c.get("top", 0), c.get("x0", 0)))
187
- result = "".join(c.get("text", "") for c in fallback_chars)
188
- else:
202
+ if not use_layout:
189
203
  # Simple join if layout=False
190
- logger.debug("generate_text_layout: Using simple join (layout=False).")
191
- # Sort by document order for simple join as well
192
- char_dicts.sort(key=lambda c: (c.get("page_number", 0), c.get("top", 0), c.get("x0", 0)))
193
- result = "".join(c.get("text", "") for c in char_dicts)
204
+ logger.debug("generate_text_layout: Using simple join (layout=False requested).")
205
+ # Sort before joining if layout is off
206
+ valid_char_dicts.sort(key=lambda c: (c.get("top", 0), c.get("x0", 0)))
207
+ result = "".join(c.get("text", "") for c in valid_char_dicts) # Use valid chars
208
+ return result
209
+
210
+ try:
211
+ # Sort chars primarily by top, then x0 before layout analysis
212
+ # This helps pdfplumber group lines correctly
213
+ valid_char_dicts.sort(key=lambda c: (c.get("top", 0), c.get("x0", 0)))
214
+ textmap = chars_to_textmap(valid_char_dicts, **layout_kwargs)
215
+ result = textmap.as_string
216
+ except Exception as e:
217
+ # Fallback to simple join on error
218
+ logger.error(f"generate_text_layout: Error calling chars_to_textmap: {e}", exc_info=False)
219
+ logger.warning(
220
+ "generate_text_layout: Falling back to simple character join due to layout error."
221
+ )
222
+ # Fallback already has sorted characters if layout was attempted
223
+ # Need to use the valid_char_dicts here too
224
+ result = "".join(c.get("text", "") for c in valid_char_dicts)
194
225
 
195
226
  return result
@@ -0,0 +1,43 @@
1
+ import sys
2
+ import os
3
+
4
+ # Default to standard tqdm
5
+ try:
6
+ from tqdm.std import tqdm as selected_tqdm
7
+ except ImportError:
8
+ # Basic fallback if even std is missing (though unlikely)
9
+ def selected_tqdm(*args, **kwargs):
10
+ iterable = args[0] if args else None
11
+ if iterable:
12
+ return iterable
13
+ return None # Simple passthrough if no iterable
14
+
15
+ # Try to detect notebook environment
16
+ try:
17
+ # Check 1: Are we running in an IPython kernel?
18
+ from IPython import get_ipython
19
+ ipython = get_ipython()
20
+ if ipython and 'IPKernelApp' in ipython.config:
21
+ # Check 2: Is it likely a notebook UI (Jupyter Notebook/Lab, VSCode, etc.)?
22
+ # This checks for common indicators. Might not be foolproof.
23
+ if 'VSCODE_PID' in os.environ or ('ipykernel' in sys.modules and 'spyder' not in sys.modules):
24
+ # Check 3: Can we import notebook version?
25
+ try:
26
+ from tqdm.notebook import tqdm as notebook_tqdm
27
+ selected_tqdm = notebook_tqdm # Use notebook version
28
+ except ImportError:
29
+ pass # Stick with std if notebook version missing
30
+ except ImportError:
31
+ pass # Stick with std if IPython not available
32
+
33
+ def get_tqdm():
34
+ """Returns the tqdm class best suited for the detected environment."""
35
+ return selected_tqdm
36
+
37
+ # Example usage (for testing):
38
+ if __name__ == '__main__':
39
+ import time
40
+ tqdm_instance = get_tqdm()
41
+ print(f"Using tqdm class: {tqdm_instance}")
42
+ for i in tqdm_instance(range(10), desc="Testing tqdm"):
43
+ time.sleep(0.1)
@@ -192,7 +192,7 @@ def merge_images_with_legend(
192
192
  if not legend:
193
193
  return image # Return original image if legend is None or empty
194
194
 
195
- bg_color = (255, 255, 255, 255) # Always use white for the merged background
195
+ bg_color = (255, 255, 255, 255) # Always use white for the merged background
196
196
 
197
197
  if position == "right":
198
198
  # Create a new image with extra width for the legend
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -23,7 +23,6 @@ Provides-Extra: haystack
23
23
  Requires-Dist: haystack-ai; extra == "haystack"
24
24
  Requires-Dist: chroma-haystack; extra == "haystack"
25
25
  Requires-Dist: sentence-transformers; extra == "haystack"
26
- Requires-Dist: protobuf<4; extra == "haystack"
27
26
  Requires-Dist: natural-pdf[core-ml]; extra == "haystack"
28
27
  Provides-Extra: easyocr
29
28
  Requires-Dist: easyocr; extra == "easyocr"
@@ -45,6 +44,10 @@ Requires-Dist: natural-pdf[core-ml]; extra == "docling"
45
44
  Provides-Extra: llm
46
45
  Requires-Dist: openai>=1.0; extra == "llm"
47
46
  Requires-Dist: pydantic; extra == "llm"
47
+ Provides-Extra: classification
48
+ Requires-Dist: sentence-transformers; extra == "classification"
49
+ Requires-Dist: timm; extra == "classification"
50
+ Requires-Dist: natural-pdf[core-ml]; extra == "classification"
48
51
  Provides-Extra: test
49
52
  Requires-Dist: pytest; extra == "test"
50
53
  Provides-Extra: dev
@@ -71,15 +74,19 @@ Requires-Dist: natural-pdf[qa]; extra == "all"
71
74
  Requires-Dist: natural-pdf[ocr-export]; extra == "all"
72
75
  Requires-Dist: natural-pdf[docling]; extra == "all"
73
76
  Requires-Dist: natural-pdf[llm]; extra == "all"
77
+ Requires-Dist: natural-pdf[classification]; extra == "all"
74
78
  Requires-Dist: natural-pdf[test]; extra == "all"
75
79
  Provides-Extra: core-ml
76
80
  Requires-Dist: torch; extra == "core-ml"
77
81
  Requires-Dist: torchvision; extra == "core-ml"
78
- Requires-Dist: transformers; extra == "core-ml"
82
+ Requires-Dist: transformers[sentencepiece]; extra == "core-ml"
79
83
  Requires-Dist: huggingface_hub; extra == "core-ml"
80
84
  Provides-Extra: ocr-export
81
85
  Requires-Dist: ocrmypdf; extra == "ocr-export"
82
86
  Requires-Dist: pikepdf; extra == "ocr-export"
87
+ Provides-Extra: export-extras
88
+ Requires-Dist: jupytext; extra == "export-extras"
89
+ Requires-Dist: nbformat; extra == "export-extras"
83
90
  Dynamic: license-file
84
91
 
85
92
  # Natural PDF
@@ -1,4 +1,4 @@
1
- docs/index.md,sha256=P1kXZc8aefnxH0bBjvBgj1o3puRiezjUiBLqS4bcUhM,4889
1
+ docs/index.md,sha256=FG4MYQs-gUR16NQ4XF0AVoQeLuykLeY8XxNwW3h-qUM,5572
2
2
  docs/api/index.md,sha256=4bn8nYklWJuNDrnY-Kt7sf7IejeAEDhcnqYmjH9GJTA,22405
3
3
  docs/assets/favicon.png,sha256=nxca8jM2Y4GxZKzkmagUHO1GpUREK-GRA5LEFue9OOU,284
4
4
  docs/assets/favicon.svg,sha256=nxca8jM2Y4GxZKzkmagUHO1GpUREK-GRA5LEFue9OOU,284
@@ -8,16 +8,19 @@ docs/assets/social-preview.png,sha256=AvyzzM8dC0j5SPFF63bvQrxU4GE1f9j-GUNUv0oA9t
8
8
  docs/assets/social-preview.svg,sha256=AvyzzM8dC0j5SPFF63bvQrxU4GE1f9j-GUNUv0oA9ts,1085
9
9
  docs/assets/javascripts/custom.js,sha256=0NVHGprwiLPFYdYunJcHjOphzk_EhBSNuOUz5Uzdv_k,594
10
10
  docs/assets/stylesheets/custom.css,sha256=PbTp3k77gzUBUQQ01pDXzpNwo4wUv3aJD-SMBQvQItY,1156
11
+ docs/categorizing-documents/index.md,sha256=tgKfv3DidZysrFhaOEM-FiIVDAzNPPnK02sKaE5pE2I,8196
12
+ docs/data-extraction/index.md,sha256=LwQ2MJVI5u5ELI51Iq0WUdDo5sl_s18GWG_cBABI8fQ,3430
11
13
  docs/document-qa/index.ipynb,sha256=MXJoFhi8TUKK6ZnRFiUBglLGpMbzwdb7LJYfzw8Gp48,528713
12
14
  docs/document-qa/index.md,sha256=mwuO4tothg0OzBXewnj73QEJu46Udq7f1pQBYrKOHwM,2131
13
- docs/element-selection/index.ipynb,sha256=-7PwKw1RbPlZ4stzN1Rd1GJ8mwjOD4ySsLcpqVX7chc,1193628
14
- docs/element-selection/index.md,sha256=_1P8vI64Y0aSVwUzdRJD4ayb80BJWBLED9TvVpveFx8,6979
15
+ docs/element-selection/index.ipynb,sha256=WuKd3bTTOnzBDfbuzkxmJxO6EzM9RAkFXoF0U3-8qRA,1223398
16
+ docs/element-selection/index.md,sha256=ZUkOD6VVK11K6WQ86FPnTeeco27PrFWtkObKw8j6Fok,7867
17
+ docs/finetuning/index.md,sha256=Ur3zqSaR0X8PvBCSyI7cFiDv5qZ6Jtv4omBKXCKAzEk,9200
15
18
  docs/installation/index.md,sha256=nd4RZrQFR8_vv7Xm3xAzp7z-CQQr9ffAcGa7yuEYn2U,1594
16
19
  docs/interactive-widget/index.ipynb,sha256=zY1rz5N34OUW-OtgcbI6iiOjlIJqXjVcx9OoNWMjuyU,293111
17
20
  docs/interactive-widget/index.md,sha256=tZbq0uYI7Zwo9mLbhXpqeBriuAjazkIyEJeP-jasJ-Q,259
18
21
  docs/layout-analysis/index.ipynb,sha256=dkS_-cu-KGir5G2LGRcxBThKnW0dfA5nPPnwpoYGFtU,1869093
19
22
  docs/layout-analysis/index.md,sha256=ZnH5yd7B_eOLgGxW_4rNlzQs4Tn3Xx1cK3jX43CSpSM,5390
20
- docs/ocr/index.md,sha256=uuzTqcAgUmMN7jZVq8VkVcbRDHn8Yg2nJVvHJ-bDK-Y,8177
23
+ docs/ocr/index.md,sha256=BR8a3_X6zng5yAo8O8isOBhb2Gm9hM9FIasc58aYF78,11137
21
24
  docs/pdf-navigation/index.ipynb,sha256=h6yew0HePXK1_c5FmETqzjBQceUBT0MU-vnXx_y91mo,8018
22
25
  docs/pdf-navigation/index.md,sha256=P3b3tsmOcmRtnfRxpsMeTgwm7vApnH_4le_QIwJd51M,2391
23
26
  docs/regions/index.ipynb,sha256=5A-N5A4v4lcXNptOAeI4i7i9Gx66To-Yus8B816dHBk,1303347
@@ -28,38 +31,38 @@ docs/text-analysis/index.ipynb,sha256=iaup8pcQXGp0ZK3IWi-HHssQLdIzWYGYfvZK5i8yjj
28
31
  docs/text-analysis/index.md,sha256=02pfZemOgV37izV7H-XzKmHu7AedDKLidQ-sKhYaMVw,3527
29
32
  docs/text-extraction/index.ipynb,sha256=809y9ZamXT3bc3GhwwFyoDnlyEpO-kUZ3tIsZZWyrj8,2537087
30
33
  docs/text-extraction/index.md,sha256=b1KfQpvIEelc8cPbFETUnK92az7iB4b7-LqK2DRH8vw,6985
31
- docs/tutorials/01-loading-and-extraction.ipynb,sha256=-9hFAVQtHmuXsR9Ge3A80wKr-t9wxouAAlW4_Iotdwo,544610
34
+ docs/tutorials/01-loading-and-extraction.ipynb,sha256=2vGLM1_2_Xcpn32HvMLXj_Ro8w4HPofSZNpxZ1qPtL8,520140
32
35
  docs/tutorials/01-loading-and-extraction.md,sha256=g40J8GhKz-ikM2URj5MqIatKKj4l5kTFozHeVjxDJQA,2191
33
- docs/tutorials/02-finding-elements.ipynb,sha256=k1CSz47_atA9D6DXfQzVS64t5-L-KjssU2VuFvdy7oU,524374
36
+ docs/tutorials/02-finding-elements.ipynb,sha256=yVW3B578mKXkFUWJQnBaDB0SlnNodROjemMbdx-LWBw,524506
34
37
  docs/tutorials/02-finding-elements.md,sha256=qOkjcWUzem05of54aKzKvy-MMzRX_S4CyZisVV-73QM,4162
35
- docs/tutorials/03-extracting-blocks.ipynb,sha256=1UjdP0j3kPCE3aU8p1jBCBqflG-xRLli2Ltx80DhOVk,260729
38
+ docs/tutorials/03-extracting-blocks.ipynb,sha256=qifBv5bsKcZIQVQAHtl84GqD6Wy-IZiUMkSXURCu3ug,263329
36
39
  docs/tutorials/03-extracting-blocks.md,sha256=_kqvhk6rSL7cGp2MSwTJk8LYlJGbK_r_umnCSBdR8XU,1665
37
- docs/tutorials/04-table-extraction.ipynb,sha256=u92Wppw1qHG__Mx3ZKtETm4AWuGF8X-Ln3kvmF8zCSo,3973
40
+ docs/tutorials/04-table-extraction.ipynb,sha256=Jj2OzN32I5z1_gfMVgdr2GGyEgbWTgI7harwMWfHxYc,4089
38
41
  docs/tutorials/04-table-extraction.md,sha256=4q4v17VX8K-ZBtWYy0nbWPccyqB_ybd5Vl_IROmxz6Q,2130
39
- docs/tutorials/05-excluding-content.ipynb,sha256=oSg8ll_nuWOfQHGLp0fNKVeyYyn_L8a-F7HJADjjdq8,336857
42
+ docs/tutorials/05-excluding-content.ipynb,sha256=EaZwfDJK3BUghY1iwQ4qR8Z9nXf9e8QUeHxvJmZ3xsw,336933
40
43
  docs/tutorials/05-excluding-content.md,sha256=U52SPlc5knqxiyhRokmxrj06T54r2ENyTfP7BMGykhY,3907
41
- docs/tutorials/06-document-qa.ipynb,sha256=Facyqns8jw2bTvsOSbNnsLskFH8kg1JTz4kmJ16dpcE,10303
44
+ docs/tutorials/06-document-qa.ipynb,sha256=sGesxP26CMSD2GD-47dXq7EnqK3tlEDzM-uu7sZVR2E,10421
42
45
  docs/tutorials/06-document-qa.md,sha256=PzPPgw0Rkkfe6sfz3XyKD9S9JbQ40qf4bDzCBvwH1P0,3026
43
- docs/tutorials/07-layout-analysis.ipynb,sha256=tdNnMro1V66YPx0h96HZnujSm-zDpy7o78euQix4lyU,559517
46
+ docs/tutorials/07-layout-analysis.ipynb,sha256=DgyocqPRt9Rxsz-Sjgi153MvvnoDF3Vpsyhq27N72sE,571321
44
47
  docs/tutorials/07-layout-analysis.md,sha256=NAYVzJTecDnXjo_isbPCSUBSn3c-xM1tELct1Zn5GmI,2533
45
- docs/tutorials/07-working-with-regions.ipynb,sha256=s4BFKKbKUemmURCpg6j91rNI8eFFOJUgxY4QN4alK4I,69584
48
+ docs/tutorials/07-working-with-regions.ipynb,sha256=JMUnjQ_tCBqs4dWIyZ2jNHQCnJkwAzTJuxQVRGBqLqI,67945
46
49
  docs/tutorials/07-working-with-regions.md,sha256=oanbTFSQ-topAVd9kjfkaPiMjHcx6Y8cqyxVbmxLhgs,4365
47
- docs/tutorials/08-spatial-navigation.ipynb,sha256=jfwF6OHLvrMvaaknp-9AfUvr-pPXjPljUyGnFKF9wsw,194523
50
+ docs/tutorials/08-spatial-navigation.ipynb,sha256=Q0N-az8ZiaMmS42HXMnpDYp97Z_9YPXfM-azC9Sf_f8,186624
48
51
  docs/tutorials/08-spatial-navigation.md,sha256=IMbOYBjayXKE7pHfBjApTxOoKRD8WYj7opf8fsJCtzA,4855
49
- docs/tutorials/09-section-extraction.ipynb,sha256=Aqcy08oXTJ1pkJCmVVumndje-4WXnbkl_QfJPhps7f8,1100736
52
+ docs/tutorials/09-section-extraction.ipynb,sha256=CPBXw28Y7WjWE3HY5SJlUnGlOFQQQ0ZUB65c_uVissA,1101081
50
53
  docs/tutorials/09-section-extraction.md,sha256=Jy_be8ftAl_VPBWl5nEv7_5sKSZPx22DLUcBVHMD3Nc,7832
51
- docs/tutorials/10-form-field-extraction.ipynb,sha256=yyopvBoS5vkKKtUQ6rZ4Kyo5E0Olp2WYnmunhfzSQkQ,281491
54
+ docs/tutorials/10-form-field-extraction.ipynb,sha256=S0S5cdnrioweeKVjdRQnZptUEG-b0VvgrROkOygjAzk,268148
52
55
  docs/tutorials/10-form-field-extraction.md,sha256=t9tPlW36vJEhDrKIsHGg_f3P_MK62DT4-ZK1thKFs4Y,5494
53
- docs/tutorials/11-enhanced-table-processing.ipynb,sha256=BWpVUhtjaAX7r4OOdiy5gQgrSqREaoB0L5TuHqoHEn8,1278
56
+ docs/tutorials/11-enhanced-table-processing.ipynb,sha256=2i8gQRwkLDH14Yie56-3K5YIhdaR83XbL7m-8pQ5cJU,1394
54
57
  docs/tutorials/11-enhanced-table-processing.md,sha256=2HK-r1UwU7FLn7zWr_pMG7iLk-i0L4U4-t6ubOEeduc,282
55
- docs/tutorials/12-ocr-integration.ipynb,sha256=xurkoPwgk2p6mhmPdCehy9ccuYHrAhBCb1zGnjRbZ7Y,26724
56
- docs/tutorials/12-ocr-integration.md,sha256=wU90sfnm1R6BoMFq-orbGpl8OUVcm-wEBTlK0bLgJC4,4572
57
- docs/tutorials/13-semantic-search.ipynb,sha256=5h806AIal3EwXPVuXJESbXwdUImCx7fo0mo5-f3Dj44,42817
58
+ docs/tutorials/12-ocr-integration.ipynb,sha256=DB1pWJG1vW4aNVdQ2g5w42a71TFThmzObaVQs8h63U0,194084
59
+ docs/tutorials/12-ocr-integration.md,sha256=-IW4wqLb10eOIWC00NHTGXwtD6jDv7Tp7d-UCOk9SuE,5057
60
+ docs/tutorials/13-semantic-search.ipynb,sha256=BwFepMsOuHrWTFqczvxikPgTh5o97sYX4uleylnOBmc,54126
58
61
  docs/tutorials/13-semantic-search.md,sha256=nsNjv0ipYUC3YPSqT5d6dga9ZjObEc04Mc8c0-gsRnU,2914
59
62
  docs/visual-debugging/index.ipynb,sha256=MJ92u3Q9sfRCyDAQM4KWmCrs4QhKwIagbn6ytPF83L4,2175800
60
63
  docs/visual-debugging/index.md,sha256=ueGD2kNFhEAgIHt7qxCfrLRLjHcR7NTD3AU9okBhX9k,4176
61
64
  docs/visual-debugging/region.png,sha256=ULAJs3ZTxMjpD9F4w1DKaZXmhxga3KRq3NrUsXgw28s,67835
62
- natural_pdf/__init__.py,sha256=IjeAfnDU9fpkVc9YvU2DltNtBtIsZm8_DUyXGuyHGB8,2669
65
+ natural_pdf/__init__.py,sha256=aCnIBTYZlUCL1j78sScPX8kXF88JnuQSHsErboTcjnM,2727
63
66
  natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
64
67
  natural_pdf/analyzers/text_options.py,sha256=nE2E1pp4psDPpxmtarvNtEQsgozPkyFRjv0TVP2HTyU,2865
65
68
  natural_pdf/analyzers/text_structure.py,sha256=9h8hKRz0JWnr13xQr3b4FFr_-hDIjue07WvG7LmT8nc,12827
@@ -67,7 +70,7 @@ natural_pdf/analyzers/utils.py,sha256=Lgub1kYSTOnNxeLO1klStHLwH-GIuT4vpdqyVRF-Mc
67
70
  natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
68
71
  natural_pdf/analyzers/layout/base.py,sha256=9dCR758mAuz7ExlHJ-gwnPnETaM4GZV3W1IRei_t13s,6815
69
72
  natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
70
- natural_pdf/analyzers/layout/gemini.py,sha256=Dslt6rOar9S-QSahyjjXN7Z0KEp7CoVStAA4tvB06X8,10894
73
+ natural_pdf/analyzers/layout/gemini.py,sha256=CzJPWyyEghuCNpu2CMb6OA6FtBGdGhXspHjsjy6I4JE,11195
71
74
  natural_pdf/analyzers/layout/layout_analyzer.py,sha256=6aed1qz5jpndOiakXCBRZAcnyG_waeXi3WPuP5fRvh4,14046
72
75
  natural_pdf/analyzers/layout/layout_manager.py,sha256=Vh8EKiszKqjELofxQ1eiVLKVjibyjBsZpLFzTf0_21E,11179
73
76
  natural_pdf/analyzers/layout/layout_options.py,sha256=s7xr4brE3OutE6aYNAi2PniRy1p2w8a342C2xGpvX2s,3777
@@ -75,32 +78,41 @@ natural_pdf/analyzers/layout/paddle.py,sha256=gTI9ZqNd5-t4H5IByGfL32WgcE6JrdchW6
75
78
  natural_pdf/analyzers/layout/surya.py,sha256=vhji6ynHPMyQLHuYRPQcplNi7m_lG4P4NYtWv6MzcME,13556
76
79
  natural_pdf/analyzers/layout/tatr.py,sha256=-GJhMy4d0yx6egkO9-ULAIdQkkQRyAKExoIta-b256U,12971
77
80
  natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
78
- natural_pdf/collections/pdf_collection.py,sha256=Qfauo7fskmw3MSzG0C_OjIGLskyFSQxWQkbHrPDYpno,13240
81
+ natural_pdf/classification/manager.py,sha256=pLcEDe1a5QARJCMimE5Ul_HKZD4jX-eREUCeUuniA0U,16445
82
+ natural_pdf/classification/mixin.py,sha256=aySe0bEjkaI9qYDmSkQe536w0Xrxcg4j6k3JGPvj-cY,6737
83
+ natural_pdf/classification/results.py,sha256=Hn-3xDSThR8x7XpoTlQLWpX6JE1VHVe2QpOeWNY2Ycw,2949
84
+ natural_pdf/collections/mixins.py,sha256=BXk4o_PRrczSXjR7vorIEe4WyEKyms4_qYnY8ZAZd-A,2737
85
+ natural_pdf/collections/pdf_collection.py,sha256=F_4Z-nrL9wFQ-mt4T4cJ2ERVUnkh2kyQdmOV8ASBgoM,27281
79
86
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
80
- natural_pdf/core/element_manager.py,sha256=rPTkppvU5nQ6Q1_ZtTbcY-KpcRBZV_uBKuNwllHBks0,22100
87
+ natural_pdf/core/element_manager.py,sha256=KZ9yNtpFwuImDWmFUXgISAoWQdSib93E4t3ILUZzIic,24805
81
88
  natural_pdf/core/highlighting_service.py,sha256=CTVd7y-fpIreFSe70cTpMu1Pwl6HKMtTHp0bh2U7VXk,32609
82
- natural_pdf/core/page.py,sha256=Fw01hvK7ekj3iGpj9YbbsVygCmOtTC3qLNvzGBHS4iQ,84784
83
- natural_pdf/core/pdf.py,sha256=dq2bcxbrbY0zS8t3NKkvqi63Oj2nNzaCHUyHz81OoBI,46383
89
+ natural_pdf/core/page.py,sha256=4iykmXdVwmSQOpGukTxfJYU-5XEgSafNbKsnIedVaGA,94051
90
+ natural_pdf/core/pdf.py,sha256=yPAaOv5vNKZlC9oVk5sKsFxb4LdoRygz_Qkp2EaDtOY,43074
84
91
  natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
85
- natural_pdf/elements/base.py,sha256=hgXpcftlvyUOMGZ8EHEu0WhJz2weqcLrCOFgMEhfm-w,35171
86
- natural_pdf/elements/collections.py,sha256=Fy5l1Rso-LMunZ6D6Y0n90Uk89h8dzJ2Js7r-8r8NSY,68885
92
+ natural_pdf/elements/base.py,sha256=UtoSD-c_s0yiLpWZrIIJjeJ9MgGz_4R0UHYcsFWH6bc,35157
93
+ natural_pdf/elements/collections.py,sha256=CCQVgglxWLfhuy4FZvVHXdmgiZxU27Ay7Myt8ttQYWg,79467
87
94
  natural_pdf/elements/line.py,sha256=7cow3xMUKhAj7zoQz7OaB1eIH2_a8B__LB7iGJ4Mb0o,4612
88
95
  natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
89
- natural_pdf/elements/region.py,sha256=-mBZnWoYMB7cLm5RZhHWCnJkJjsfcgH-f2ESktl-lNk,69656
90
- natural_pdf/elements/text.py,sha256=8PNKSLUgXUhEu9IFfbNbSSpuu0Slm11T6UH8jn4O6hQ,11078
91
- natural_pdf/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
+ natural_pdf/elements/region.py,sha256=f7ArCPizkosIei9ePixHYqedK3K6LBVJotwKZ-y33a0,74058
97
+ natural_pdf/elements/text.py,sha256=ZpPluwZtAVfOkoeM4Fm2PDsN87BBZduURZaFWns03RM,11158
98
+ natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
99
+ natural_pdf/exporters/base.py,sha256=s-NpHoH81x80GQxs0oqjdhPGrzbUa8npjnK8apKOsHQ,2115
100
+ natural_pdf/exporters/paddleocr.py,sha256=vyVetJ6RgEY46qS5Yl5mKl4cSJadwOxLWGGsdiDjico,16248
92
101
  natural_pdf/exporters/searchable_pdf.py,sha256=qsaPsnbOOaZHA_aplfZbwQnBoK9KghWm-wzbyRRomeY,16859
93
- natural_pdf/ocr/__init__.py,sha256=8EvoA2poYbtkS8Jmbkgn4hFEwOHbN5fIXdJc9tT4NzI,2477
94
- natural_pdf/ocr/engine.py,sha256=oTABzt20pEvKadTv1U7zOIv_jSTwv5dty2Q_6vf6HQs,8767
95
- natural_pdf/ocr/engine_easyocr.py,sha256=zki3-fd-u01E9VPdaBD0Rkel18Nk9xYBg477MXY0C8w,8487
96
- natural_pdf/ocr/engine_paddle.py,sha256=8OYbcYrjIaYop7h7W4R7bXWHKOCYhJQpqT2cVccNb5E,6209
97
- natural_pdf/ocr/engine_surya.py,sha256=PtzvDOqxNzhAEeh0rIhTwPXDAbTHMR2IRs0tXb-4pag,4693
98
- natural_pdf/ocr/ocr_factory.py,sha256=3fxIpgGi6NzxhfYpl5Kblufyx_-RRoT7dlRGktYjB_Y,4425
99
- natural_pdf/ocr/ocr_manager.py,sha256=sMQrpvAaA4PbjhZOf7G_KGW3KBhXhx1l26ig_Xqf_0g,9190
100
- natural_pdf/ocr/ocr_options.py,sha256=3njghlnnkcxOL4Td7l6mt-mTEFmL8Z-ryf_BX-EM3i4,3289
101
- natural_pdf/ocr/utils.py,sha256=AIBkGQExetjl2wyuQLpWJSy0HSxaOBmuCo2QsKmY7Rc,3404
102
+ natural_pdf/extraction/manager.py,sha256=YH5dyUorMItGxuaZ-DhuJD5Sh_Ozjj0fa-WBMcQw1E0,4903
103
+ natural_pdf/extraction/mixin.py,sha256=6CWYyutGcKCxFVYun8yXC4H1IZWLMXaeYZ-cWJRx5FE,11430
104
+ natural_pdf/extraction/result.py,sha256=ihY1g_C2hsMACYqU7bcvAKRijuh-FHVtpnn0uoP--pk,1047
105
+ natural_pdf/ocr/__init__.py,sha256=jKaDbo13CdCDcas1WiBmg5gjBvVeG-Z9uaeYxyzvaNY,2464
106
+ natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
107
+ natural_pdf/ocr/engine_easyocr.py,sha256=9TbxJjmhWFrzM8mcNnZjoRtIDr6gwpuwKm4-Zfub2-8,9281
108
+ natural_pdf/ocr/engine_paddle.py,sha256=2nIrvLBBAiZG1BxVo3eFVJulA6YGoOTXw_RN98p_BUk,6184
109
+ natural_pdf/ocr/engine_surya.py,sha256=iySjG-Dahgh0cLICfbMtOcwUpRFcZjo-5Ed5Zwz-o5Y,4805
110
+ natural_pdf/ocr/ocr_factory.py,sha256=IFccj0BB75YGV4hjcy4ECtGQX_JQzdptpvDFfeGxxgI,4391
111
+ natural_pdf/ocr/ocr_manager.py,sha256=ivk4Aqr5gsDJWiCxP1-FLkhuvfJiQtilwbPtgIPm--4,13320
112
+ natural_pdf/ocr/ocr_options.py,sha256=BcPVwJGYE3vMug7wsVh_ARUJlm_4emz9ynOAwYgwHBk,4257
113
+ natural_pdf/ocr/utils.py,sha256=4b_A47hfynfV00iR8I9OWmXCzDzRvSdEkQhZLcSV4kQ,4394
102
114
  natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
103
- natural_pdf/qa/document_qa.py,sha256=W4E4vS_Eox_IBsYpVb0ifQbJb0FP-PYEIG93CU3rUkE,15246
115
+ natural_pdf/qa/document_qa.py,sha256=Jw4yyq3Vifn57D0ANmOfUlZeG8CJjBkItZBV-8ZAmos,15111
104
116
  natural_pdf/search/__init__.py,sha256=EB_HRwlktJn5WGPVtSaRbOQNjLAZTxujeYf_eN-zd2U,4191
105
117
  natural_pdf/search/haystack_search_service.py,sha256=6RjTFWbTo3gaO-90IF6PEuo_9WRwOdj232eWn3OT0BQ,29270
106
118
  natural_pdf/search/haystack_utils.py,sha256=UI4eu3SVieGR_QnBtLhP8Fjtt2AJgeLgxrpa_dBmD6k,19289
@@ -108,24 +120,27 @@ natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzP
108
120
  natural_pdf/search/search_service_protocol.py,sha256=ybNcF_NoLZuIx0rb4XB1dsDl3o_LAaWR1fVVKld2TxI,6818
109
121
  natural_pdf/search/searchable_mixin.py,sha256=M2a6FaFVM0vcfh7FgjDH6BLhS-7ggeVpcfft4OOBDxY,26390
110
122
  natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
111
- natural_pdf/selectors/parser.py,sha256=59_GSsTApM6MFvtqhrrmbKaBfODPbGXMluvvQJcrqhE,15754
123
+ natural_pdf/selectors/parser.py,sha256=AKXGv4MaZDiaWT_jSfn_vU-qVlECB8b-IxnyocXtaaE,22671
112
124
  natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
125
+ natural_pdf/templates/finetune/fine_tune_paddleocr.md,sha256=H6Wmu3Nvi2qKK-rPwr8KUZfILzXz8VmWyCWYOTe6QCI,14764
113
126
  natural_pdf/templates/spa/index.html,sha256=6hLTp07OeV5Q4jUMp5Sgl-dwfBs3oPzBxqphG4kEs24,787
114
127
  natural_pdf/templates/spa/words.txt,sha256=vkGtl5Y7-Nq-3Vhx1daRWWF1Jp1UCVaw-ZZaiFwrurk,2493885
115
128
  natural_pdf/templates/spa/css/style.css,sha256=Qdl0U3L5HMyhBDNzyRPklfb3OxW6rMxCfQbzO8i8IW4,7643
116
129
  natural_pdf/templates/spa/js/app.js,sha256=Efb7NmcTN9RLdLwKpDcU6CG5Ix0laHtzRHmfUlDMJXw,19679
117
130
  natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
118
- natural_pdf/utils/debug.py,sha256=56dk0OcDUwjRbYU2g1k0_G_3hfHkAF9Z6vnMVt3myMU,992
131
+ natural_pdf/utils/debug.py,sha256=lk_6qzxan8NagjEtJEZpZ2MS30SO8ce6iznBxmA0xgk,995
119
132
  natural_pdf/utils/highlighting.py,sha256=EIY6ihVGtUTS_DjWyxpnr_UXpcR4btC1KhSGQ9VUfKg,698
120
- natural_pdf/utils/identifiers.py,sha256=IvXj2q-NW1cmivWAedDOZBFPTRbVl0_J8BP4phREt9U,1092
121
- natural_pdf/utils/packaging.py,sha256=mNPEqCtc0CVWjKg1RcSldYTgspUWQOLAR_ZEllyKxs4,20175
133
+ natural_pdf/utils/identifiers.py,sha256=n61viCQiMlf5-E_jsPLe-FkPBdKkMKv-gfs5tGqlKiw,1117
134
+ natural_pdf/utils/locks.py,sha256=E_Fb6GnRNq-tF5aE7jnllkpidsNr8LXPhSaqgr56Ks4,215
135
+ natural_pdf/utils/packaging.py,sha256=HSgpubpHICU75L4ZAZPU8iOjium055XWnklV9_YqoCA,21579
122
136
  natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
123
- natural_pdf/utils/text_extraction.py,sha256=VlbkXg14GlvwYTjRJWa8FVUigETY3Hq0v8NlIRnzYkM,8619
124
- natural_pdf/utils/visualization.py,sha256=ir5PgpptRuVuVeRT9IcdTsNeEpdOYD_69rByjHQ7JhI,8592
137
+ natural_pdf/utils/text_extraction.py,sha256=qZfOuO57XeKg7p-Q7yzTBMTrpAvDRslYXjDSjiJLStI,9545
138
+ natural_pdf/utils/tqdm_utils.py,sha256=bKWvsoAOl0lPOPLJC2hkTtkdxBf5f9aVtcA3DmUE19M,1570
139
+ natural_pdf/utils/visualization.py,sha256=5GbhxtvZW-77ONVnICupg-s2D-OaxLZNqkKlOrQESK4,8593
125
140
  natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
126
141
  natural_pdf/widgets/viewer.py,sha256=Aiw6kuBc0WkhcZrPNKyLNzzWbmtmU6rvOmHV0IuXCBk,40862
127
142
  natural_pdf/widgets/frontend/viewer.js,sha256=w8ywfz_IOAAv2nP_qaf2VBUkF1KhjT3zorhJxM1-CfU,4371
128
- natural_pdf-0.1.6.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
143
+ natural_pdf-0.1.8.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
129
144
  notebooks/Examples.ipynb,sha256=l4YMtMEx_DWBzWIjl9CmBkWTo0g_nK8l_XWOyzYooQM,4275170
130
145
  pdfs/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
131
146
  pdfs/01-practice.pdf,sha256=dxWyJIa2cm7bALE3BWDJ2dg3inyFlo1n8ntVyy0hkTo,7906
@@ -135,7 +150,7 @@ pdfs/2014 Statistics.pdf,sha256=B-30OQVjqj_3718-G9cGUefNddnz-MosPdHAzfGfkcc,9559
135
150
  pdfs/2019 Statistics.pdf,sha256=reuSJxvAlx9_P-pW7IPqzox0jFCxSPbK1i1-WFu-uGA,511439
136
151
  pdfs/Atlanta_Public_Schools_GA_sample.pdf,sha256=PLBh_uWJQH0MnBaSm5ng5Ima63_m6Mi11CjdravB_S8,137689
137
152
  pdfs/needs-ocr.pdf,sha256=vusKiLxSOlELUTetfZfaotNU54RtMj9PCzGfLc2cuNs,139305
138
- natural_pdf-0.1.6.dist-info/METADATA,sha256=Yd92-6FRKc6KdEuTpCOWyxBcewnhAoYTAhtbq6qIB0Q,6668
139
- natural_pdf-0.1.6.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
140
- natural_pdf-0.1.6.dist-info/top_level.txt,sha256=7nDKUnpkN7B8cBI7DEpW5JM8S7OcOgHw3jXH-1iCX2o,32
141
- natural_pdf-0.1.6.dist-info/RECORD,,
153
+ natural_pdf-0.1.8.dist-info/METADATA,sha256=Qz_ePmFWt4poceUJnVcldvhJoIRWuo2lEIEoVp-mnwE,7030
154
+ natural_pdf-0.1.8.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
155
+ natural_pdf-0.1.8.dist-info/top_level.txt,sha256=7nDKUnpkN7B8cBI7DEpW5JM8S7OcOgHw3jXH-1iCX2o,32
156
+ natural_pdf-0.1.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5