natural-pdf 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. natural_pdf/__init__.py +7 -2
  2. natural_pdf/analyzers/shape_detection_mixin.py +1092 -0
  3. natural_pdf/analyzers/text_options.py +9 -1
  4. natural_pdf/analyzers/text_structure.py +371 -58
  5. natural_pdf/classification/manager.py +3 -4
  6. natural_pdf/collections/pdf_collection.py +19 -39
  7. natural_pdf/core/element_manager.py +11 -1
  8. natural_pdf/core/highlighting_service.py +146 -75
  9. natural_pdf/core/page.py +287 -188
  10. natural_pdf/core/pdf.py +57 -42
  11. natural_pdf/elements/base.py +51 -0
  12. natural_pdf/elements/collections.py +362 -67
  13. natural_pdf/elements/line.py +5 -0
  14. natural_pdf/elements/region.py +396 -23
  15. natural_pdf/exporters/data/__init__.py +0 -0
  16. natural_pdf/exporters/data/pdf.ttf +0 -0
  17. natural_pdf/exporters/data/sRGB.icc +0 -0
  18. natural_pdf/exporters/hocr.py +40 -61
  19. natural_pdf/exporters/hocr_font.py +7 -13
  20. natural_pdf/exporters/original_pdf.py +10 -13
  21. natural_pdf/exporters/paddleocr.py +51 -11
  22. natural_pdf/exporters/searchable_pdf.py +0 -10
  23. natural_pdf/flows/__init__.py +12 -0
  24. natural_pdf/flows/collections.py +533 -0
  25. natural_pdf/flows/element.py +382 -0
  26. natural_pdf/flows/flow.py +216 -0
  27. natural_pdf/flows/region.py +458 -0
  28. natural_pdf/search/__init__.py +65 -52
  29. natural_pdf/search/lancedb_search_service.py +325 -0
  30. natural_pdf/search/numpy_search_service.py +255 -0
  31. natural_pdf/search/searchable_mixin.py +25 -71
  32. natural_pdf/selectors/parser.py +163 -8
  33. natural_pdf/widgets/viewer.py +22 -31
  34. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/METADATA +55 -49
  35. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/RECORD +38 -30
  36. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/WHEEL +1 -1
  37. natural_pdf/search/haystack_search_service.py +0 -687
  38. natural_pdf/search/haystack_utils.py +0 -474
  39. natural_pdf/utils/tqdm_utils.py +0 -51
  40. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/licenses/LICENSE +0 -0
  41. {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.11
3
+ Version: 0.1.13
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -12,20 +12,17 @@ Requires-Python: >=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: pdfplumber
15
- Requires-Dist: Pillow
15
+ Requires-Dist: pillow
16
16
  Requires-Dist: colour
17
17
  Requires-Dist: numpy
18
18
  Requires-Dist: urllib3
19
19
  Requires-Dist: tqdm
20
20
  Requires-Dist: pydantic
21
- Provides-Extra: interactive
22
- Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
23
- Provides-Extra: haystack
24
- Requires-Dist: haystack-ai; extra == "haystack"
25
- Requires-Dist: lancedb-haystack; extra == "haystack"
26
- Requires-Dist: lancedb; extra == "haystack"
27
- Requires-Dist: sentence-transformers; extra == "haystack"
28
- Requires-Dist: natural-pdf[core-ml]; extra == "haystack"
21
+ Requires-Dist: jenkspy
22
+ Requires-Dist: pikepdf>=9.7.0
23
+ Requires-Dist: scipy
24
+ Provides-Extra: viewer
25
+ Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "viewer"
29
26
  Provides-Extra: easyocr
30
27
  Requires-Dist: easyocr; extra == "easyocr"
31
28
  Requires-Dist: natural-pdf[core-ml]; extra == "easyocr"
@@ -41,19 +38,25 @@ Requires-Dist: natural-pdf[core-ml]; extra == "surya"
41
38
  Provides-Extra: doctr
42
39
  Requires-Dist: python-doctr[torch]; extra == "doctr"
43
40
  Requires-Dist: natural-pdf[core-ml]; extra == "doctr"
44
- Provides-Extra: qa
45
- Requires-Dist: natural-pdf[core-ml]; extra == "qa"
46
41
  Provides-Extra: docling
47
42
  Requires-Dist: docling; extra == "docling"
48
43
  Requires-Dist: natural-pdf[core-ml]; extra == "docling"
49
44
  Provides-Extra: llm
50
45
  Requires-Dist: openai>=1.0; extra == "llm"
51
- Provides-Extra: classification
52
- Requires-Dist: sentence-transformers; extra == "classification"
53
- Requires-Dist: timm; extra == "classification"
54
- Requires-Dist: natural-pdf[core-ml]; extra == "classification"
55
46
  Provides-Extra: test
56
47
  Requires-Dist: pytest; extra == "test"
48
+ Provides-Extra: search
49
+ Requires-Dist: lancedb; extra == "search"
50
+ Requires-Dist: pyarrow; extra == "search"
51
+ Provides-Extra: favorites
52
+ Requires-Dist: natural-pdf[deskew]; extra == "favorites"
53
+ Requires-Dist: natural-pdf[llm]; extra == "favorites"
54
+ Requires-Dist: natural-pdf[surya]; extra == "favorites"
55
+ Requires-Dist: natural-pdf[easyocr]; extra == "favorites"
56
+ Requires-Dist: natural-pdf[layout_yolo]; extra == "favorites"
57
+ Requires-Dist: natural-pdf[ocr-export]; extra == "favorites"
58
+ Requires-Dist: natural-pdf[viewer]; extra == "favorites"
59
+ Requires-Dist: natural-pdf[search]; extra == "favorites"
57
60
  Provides-Extra: dev
58
61
  Requires-Dist: black; extra == "dev"
59
62
  Requires-Dist: isort; extra == "dev"
@@ -67,29 +70,32 @@ Requires-Dist: pipdeptree; extra == "dev"
67
70
  Requires-Dist: nbformat; extra == "dev"
68
71
  Requires-Dist: jupytext; extra == "dev"
69
72
  Requires-Dist: nbclient; extra == "dev"
73
+ Requires-Dist: ipykernel; extra == "dev"
70
74
  Provides-Extra: deskew
71
75
  Requires-Dist: deskew>=1.5; extra == "deskew"
72
76
  Requires-Dist: img2pdf; extra == "deskew"
73
77
  Provides-Extra: all
74
- Requires-Dist: natural-pdf[interactive]; extra == "all"
75
- Requires-Dist: natural-pdf[haystack]; extra == "all"
78
+ Requires-Dist: natural-pdf[viewer]; extra == "all"
76
79
  Requires-Dist: natural-pdf[easyocr]; extra == "all"
77
80
  Requires-Dist: natural-pdf[paddle]; extra == "all"
78
81
  Requires-Dist: natural-pdf[layout_yolo]; extra == "all"
79
82
  Requires-Dist: natural-pdf[surya]; extra == "all"
80
83
  Requires-Dist: natural-pdf[doctr]; extra == "all"
81
- Requires-Dist: natural-pdf[qa]; extra == "all"
82
84
  Requires-Dist: natural-pdf[ocr-export]; extra == "all"
83
85
  Requires-Dist: natural-pdf[docling]; extra == "all"
84
86
  Requires-Dist: natural-pdf[llm]; extra == "all"
85
- Requires-Dist: natural-pdf[classification]; extra == "all"
87
+ Requires-Dist: natural-pdf[core-ml]; extra == "all"
86
88
  Requires-Dist: natural-pdf[deskew]; extra == "all"
87
89
  Requires-Dist: natural-pdf[test]; extra == "all"
90
+ Requires-Dist: natural-pdf[search]; extra == "all"
88
91
  Provides-Extra: core-ml
89
92
  Requires-Dist: torch; extra == "core-ml"
90
93
  Requires-Dist: torchvision; extra == "core-ml"
91
94
  Requires-Dist: transformers[sentencepiece]; extra == "core-ml"
92
95
  Requires-Dist: huggingface_hub; extra == "core-ml"
96
+ Requires-Dist: sentence-transformers; extra == "core-ml"
97
+ Requires-Dist: numpy; extra == "core-ml"
98
+ Requires-Dist: timm; extra == "core-ml"
93
99
  Provides-Extra: ocr-export
94
100
  Requires-Dist: pikepdf; extra == "ocr-export"
95
101
  Provides-Extra: export-extras
@@ -114,26 +120,11 @@ Natural PDF lets you find and extract content from PDFs using simple code that m
114
120
  pip install natural-pdf
115
121
  ```
116
122
 
117
- For optional features like specific OCR engines, layout analysis models, or the interactive Jupyter widget, you can install extras:
123
+ For optional features like specific OCR engines, layout analysis models, or the interactive Jupyter widget, you can install one to two million different extras. If you just want the greatest hits:
118
124
 
119
125
  ```bash
120
- # Example: Install with EasyOCR support
121
- pip install natural-pdf[easyocr]
122
- pip install natural-pdf[surya]
123
- pip install natural-pdf[paddle]
124
-
125
- # Example: Install support for features using Large Language Models (e.g., via OpenAI-compatible APIs)
126
- pip install natural-pdf[llm]
127
- # (May require setting API key environment variables, e.g., GOOGLE_API_KEY for Gemini)
128
-
129
- # Example: Install with interactive viewer support
130
- pip install natural-pdf[interactive]
131
-
132
- # Example: Install with semantic search support (Haystack)
133
- pip install natural-pdf[haystack]
134
-
135
- # Install everything
136
- pip install natural-pdf[all]
126
+ # deskewing, OCR (surya) + layout analysis (yolo), interactive browsing
127
+ pip install natural-pdf[favorites]
137
128
  ```
138
129
 
139
130
  See the [installation guide](https://jsoma.github.io/natural-pdf/installation/) for more details on extras.
@@ -147,25 +138,26 @@ from natural_pdf import PDF
147
138
  pdf = PDF('document.pdf')
148
139
  page = pdf.pages[0]
149
140
 
141
+ # Extract all of the text on the page
142
+ page.extract_text()
143
+
150
144
  # Find elements using CSS-like selectors
151
145
  heading = page.find('text:contains("Summary"):bold')
152
146
 
153
147
  # Extract content below the heading
154
148
  content = heading.below().extract_text()
155
- print("Content below Summary:", content[:100] + "...")
156
149
 
157
- # Exclude headers/footers automatically (example)
158
- # You might define these based on common text or position
159
- page.add_exclusion(page.find('text:contains("CONFIDENTIAL")').above())
160
- page.add_exclusion(page.find_all('line')[-1].below())
150
+ # Examine all the bold text on the page
151
+ page.find_all('text:bold').show()
161
152
 
162
- # Extract clean text from the page
163
- clean_text = page.extract_text()
164
- print("\nClean page text:", clean_text[:200] + "...")
153
+ # Exclude parts of the page from selectors/extractors
154
+ header = page.find('text:contains("CONFIDENTIAL")').above()
155
+ footer = page.find_all('line')[-1].below()
156
+ page.add_exclusion(header)
157
+ page.add_exclusion(footer)
165
158
 
166
- # Highlight the heading and view the page
167
- heading.highlight(color='red')
168
- page.to_image()
159
+ # Extract clean text from the page ignoring exclusions
160
+ clean_text = page.extract_text()
169
161
  ```
170
162
 
171
163
  And as a fun bonus, `page.viewer()` will provide an interactive method to explore the PDF.
@@ -186,3 +178,17 @@ Natural PDF offers a range of features for working with PDFs:
186
178
  ## Learn More
187
179
 
188
180
  Dive deeper into the features and explore advanced usage in the [**Complete Documentation**](https://jsoma.github.io/natural-pdf).
181
+
182
+ ## Best friends
183
+
184
+ Natural PDF sits on top of a *lot* of fantastic tools and mdoels, some of which are:
185
+
186
+ - [pdfplumber](https://github.com/jsvine/pdfplumber)
187
+ - [EasyOCR](https://www.jaided.ai/easyocr/)
188
+ - [PaddleOCR](https://paddlepaddle.github.io/PaddleOCR/latest/en/index.html)
189
+ - [Surya](https://github.com/VikParuchuri/surya)
190
+ - A specific [YOLO](https://github.com/opendatalab/DocLayout-YOLO)
191
+ - [deskew](https://github.com/sbrunner/deskew)
192
+ - [doctr](https://github.com/mindee/doctr)
193
+ - [docling](https://github.com/docling-project/docling)
194
+ - [Hugging Face](https://huggingface.co/models)
@@ -1,7 +1,8 @@
1
- natural_pdf/__init__.py,sha256=HIYdzHD7QBRssIseUX_oDJYvVJs646tNSYhKHqk0HeA,2495
1
+ natural_pdf/__init__.py,sha256=0sCYgb9BAV5OnpD_1AswMuOLuXNmpe3OLJpv_6p3tgw,2449
2
2
  natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
3
- natural_pdf/analyzers/text_options.py,sha256=nE2E1pp4psDPpxmtarvNtEQsgozPkyFRjv0TVP2HTyU,2865
4
- natural_pdf/analyzers/text_structure.py,sha256=Uhxc7aYB1jddkiwRTEPOg_Te2HfOua4z_OtgP1m3org,12794
3
+ natural_pdf/analyzers/shape_detection_mixin.py,sha256=BweC9i8Z4xByUKXyd0Aapk_EMJmjMvSv4x_CAD3_-Zc,61466
4
+ natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
5
+ natural_pdf/analyzers/text_structure.py,sha256=VfKTsTFrK877sC0grsis9jK3rrgp0Mbp13VWEbukTcs,28437
5
6
  natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
6
7
  natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
7
8
  natural_pdf/analyzers/layout/base.py,sha256=bYawhmc_0xqKG-xbxUSiazIU1om-aBox5Jh8qDqv-eM,6451
@@ -15,34 +16,42 @@ natural_pdf/analyzers/layout/pdfplumber_table_finder.py,sha256=Tk0Q7wv7nGYPo69lh
15
16
  natural_pdf/analyzers/layout/surya.py,sha256=4RdnhRxSS3i3Ns5mFhOA9-P0xd7Ms19uZuKvUGQfEBI,9789
16
17
  natural_pdf/analyzers/layout/tatr.py,sha256=cVr0ZyhY2mNLAKZ4DGMm-b7XNJpILKh8x8ZpyDeUhLk,15032
17
18
  natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
18
- natural_pdf/classification/manager.py,sha256=RxJch8xVu8Me6_T2Kh7ZqUNaAKlXvfyCZD0hRc4Hk6w,17929
19
+ natural_pdf/classification/manager.py,sha256=-rdZzGP_JK4RDDxIEgdY8_gHRNS0cNHhpOSodjxbd84,17853
19
20
  natural_pdf/classification/mixin.py,sha256=hhX9qWPShpOq_-mgoEq0GUWnutBnNMo3YdUlxwyNWMA,6781
20
21
  natural_pdf/classification/results.py,sha256=El1dY7cBQVOB5lP-uj52dWgH6Y7TeQgJOVcZD-OLjes,2778
21
22
  natural_pdf/collections/mixins.py,sha256=sj76Cn6EdBtb5f-bdAV-1qpdixX8tI4BzPccPiYLI1w,5117
22
- natural_pdf/collections/pdf_collection.py,sha256=obHizc2KR4ZiAspodaPOeMgfpoW3aKg_G0goBHlrFJI,32018
23
+ natural_pdf/collections/pdf_collection.py,sha256=nsbrzcsXAD2qVLLXhDYpljAb_WnjMNanHJ6J7UtYzGA,31165
23
24
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
24
- natural_pdf/core/element_manager.py,sha256=knRN6qXxV-6KZCj2GUOyiqRi83DjJzL77TmKGeiD08Y,25144
25
- natural_pdf/core/highlighting_service.py,sha256=wINdRxq63_CYYA81EwuCRqhNKimn0dNKyoKWuzkirc0,31959
26
- natural_pdf/core/page.py,sha256=S7Uj3DVksX7o3Qg7hpNulYuxHmqzSJIJ0yXVytPhFqY,105158
27
- natural_pdf/core/pdf.py,sha256=qpZx5LXZ5Oq1fZ4mzDXBDOIcsApRinMEH0CjVY6jNvM,69273
25
+ natural_pdf/core/element_manager.py,sha256=_UdXu51sLi6STzc8Pj4k8R721G3yJixXDLuRHn3hmr8,25731
26
+ natural_pdf/core/highlighting_service.py,sha256=tjMJpdJj2oaMGpdqiNHPcTJqID4nd-uBZ5v7KtPmoc0,36762
27
+ natural_pdf/core/page.py,sha256=uPSkGB9dXxtp4_uE1ELyPznDQ3CwWsnMCRGhEg2ny0o,111120
28
+ natural_pdf/core/pdf.py,sha256=395aBTg4Le4vABvQWgBhPm669nGJ8JdMToTs1UtQ2Vg,69575
28
29
  natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
29
- natural_pdf/elements/base.py,sha256=7vVCPQyEHifh4LyBuv0kLTqr_gNbbEMc4SoiJmLfEUQ,37585
30
- natural_pdf/elements/collections.py,sha256=HsNt_4x-yqNI_bDGeNEiih3hotAfrbppmp_O7rq9HGs,107141
31
- natural_pdf/elements/line.py,sha256=7cow3xMUKhAj7zoQz7OaB1eIH2_a8B__LB7iGJ4Mb0o,4612
30
+ natural_pdf/elements/base.py,sha256=d2K_uVRXLHapFFVaBuVqKxUzjGBzRERMAAjEkQNBkj4,39655
31
+ natural_pdf/elements/collections.py,sha256=qd58tD3f-eojz90ICytlqu4Ej0OQoWgsxV4umQDhUvA,120809
32
+ natural_pdf/elements/line.py,sha256=300kSFBDUBIudfeQtH_tzW9gTYRgRKUDPiTABw6J-BE,4782
32
33
  natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
33
- natural_pdf/elements/region.py,sha256=XYWUym7hgkzMMfmXw0hEz_iGJ6Sdyf6DRz6XjgMVwN0,97250
34
+ natural_pdf/elements/region.py,sha256=6A5RIDuVbrHLhBcJn2lSXjVPtx5sERC3YZsz0dEmLaQ,115747
34
35
  natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
35
36
  natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
36
37
  natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
37
38
  natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
38
- natural_pdf/exporters/hocr.py,sha256=wilmVyBgmBNp2ZEdbKijk9ag8E1AGMMl6rBtsAOzp-Y,20201
39
- natural_pdf/exporters/hocr_font.py,sha256=e9QdxeCExxpY_dpzwGxFlT_3TcvNejw9qpkNc1NVa4Y,4612
40
- natural_pdf/exporters/original_pdf.py,sha256=vZeqBsCZh3JRRWwtfHzM78fxvhKkAI4QK3LLkeXidUM,5082
41
- natural_pdf/exporters/paddleocr.py,sha256=BYpdtJI7S8rBkI2dkRESx2epVAZOTfzqU-rjJnUQ5jQ,16249
42
- natural_pdf/exporters/searchable_pdf.py,sha256=-sbjjM4oV2YCiJaVKcUIPXjAs94ouXSyOSlAzv_qM7I,16815
39
+ natural_pdf/exporters/hocr.py,sha256=MOb5sTxe-GlMSOtmqp3p4SY_ZigwOtmd4sj_zMRCIQY,19907
40
+ natural_pdf/exporters/hocr_font.py,sha256=1wsGOMj6zoaRN2rxCwrv4MMLGawpNz984WgXpmWekgw,4574
41
+ natural_pdf/exporters/original_pdf.py,sha256=zsZPg_lUoEerKIzzoEw-qGdM5XBg_LZhFJeVKnCUp4o,5054
42
+ natural_pdf/exporters/paddleocr.py,sha256=srwk_N10wVqtEU5bI8B3XGfXr54gaaJ0Q5zpq4-cSVY,18361
43
+ natural_pdf/exporters/searchable_pdf.py,sha256=G2Tc4tpDXSYIufXJlkA8ppW_3DuzHAaweYKae33pI_c,16290
44
+ natural_pdf/exporters/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
+ natural_pdf/exporters/data/pdf.ttf,sha256=x4RUIJJaI9iO2DCmOVe4r4Wmao2vjZ_JDoQ2c7LvGlk,572
46
+ natural_pdf/exporters/data/sRGB.icc,sha256=KpLUuuRQt22LCqQhk9-XTXX2Jzjs6_dPAcXnWxKpV5Y,6922
43
47
  natural_pdf/extraction/manager.py,sha256=mUBbfgLG5Pl31wmajXwyipdEJb_dZ5I-y8GnWw7IzGo,4969
44
48
  natural_pdf/extraction/mixin.py,sha256=eKbr70VibpbtfjvCE80lTFuYHzq_BoVtOHjznL_GMRA,11719
45
49
  natural_pdf/extraction/result.py,sha256=c1vLguCR6l95cvg-BJJmZvL_MPg2McJaczge55bKZMg,934
50
+ natural_pdf/flows/__init__.py,sha256=82ibI0eNJfVergEsTyom9Nxe_T6pnWQsr4-CISGQlz0,277
51
+ natural_pdf/flows/collections.py,sha256=iOmRqM5K74kqioh7-UAbNgkpXMr9nkZZ5oW4_sQ1Alo,26433
52
+ natural_pdf/flows/element.py,sha256=NmNWvrvihsO8OpUDNqW7rwcDZSGMjmJzAy4d-iaxgDc,20566
53
+ natural_pdf/flows/flow.py,sha256=ft07Ou0uRodF_gTgumVlU9YUquE3LTZz5LEAoQGErEs,10375
54
+ natural_pdf/flows/region.py,sha256=5xAnePZjs292oKrGG5El3pwhpxaHQYLzse35ilswhqI,21298
46
55
  natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
47
56
  natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
48
57
  natural_pdf/ocr/engine_doctr.py,sha256=519WpvSHgwP6Hv24tci_YHFX7XPlaxOnlREN_YG-Yys,16331
@@ -55,14 +64,14 @@ natural_pdf/ocr/ocr_options.py,sha256=ZvtnFn1kPkFEoWveQ13uy6B-ofquP0gHEi4tBHrjqC
55
64
  natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
56
65
  natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
57
66
  natural_pdf/qa/document_qa.py,sha256=Jw4yyq3Vifn57D0ANmOfUlZeG8CJjBkItZBV-8ZAmos,15111
58
- natural_pdf/search/__init__.py,sha256=gdGlW3kTCw87iXMwcIesbLkUsnv5UKJmF-_1ZMR0pfQ,3339
59
- natural_pdf/search/haystack_search_service.py,sha256=UHr2UWNBetG3MZ1n_1LnV9oUe5fC-rY9p-V0j00JjQM,30339
60
- natural_pdf/search/haystack_utils.py,sha256=6Hv5DeLSF4AVDrB_aFJZGB3XpSCLQ45dXLKEd4yG2tU,18978
67
+ natural_pdf/search/__init__.py,sha256=72n_Mj_AhF_RCIoBBhZ6EZKjbILM8omelXZ99fXw7n4,3688
68
+ natural_pdf/search/lancedb_search_service.py,sha256=tW7ONPcWGY1HKle_7OqCXRnMCI-aKL-AqneKz2YbLlM,13706
69
+ natural_pdf/search/numpy_search_service.py,sha256=3_8fx7NV-15jBokOU73mcxrznxPxzVQnOlDHf3dpo28,10117
61
70
  natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzPkK0a8QA,3566
62
71
  natural_pdf/search/search_service_protocol.py,sha256=Dl-Q-CrutkhZwI69scbW9EWPeYM63qxB60_EA7YqIYo,6699
63
- natural_pdf/search/searchable_mixin.py,sha256=M2a6FaFVM0vcfh7FgjDH6BLhS-7ggeVpcfft4OOBDxY,26390
72
+ natural_pdf/search/searchable_mixin.py,sha256=dZbaHv8Go3TJNqxoPtnp9Dr0Ftxuf_44RpBeIRXkPxc,23534
64
73
  natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
65
- natural_pdf/selectors/parser.py,sha256=oI3ezkB6sIyrq_nLJrbaBaBZktXwEp_HG_gKQlVSVcs,24447
74
+ natural_pdf/selectors/parser.py,sha256=EZsNRjn40qjiQ-KlQOmGJVrkvlVd8wez9v3SGdoFYSo,30226
66
75
  natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
67
76
  natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
68
77
  natural_pdf/utils/debug.py,sha256=RN7H3E6ph-GtxubCW6psW7TO8o2BxcNLiEzByTVR9fk,995
@@ -72,12 +81,11 @@ natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,21
72
81
  natural_pdf/utils/packaging.py,sha256=Jshxp6S1zfcqoZmFhdd7WOpL--b6rBSz-Y9mYqELXIY,21581
73
82
  natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
74
83
  natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9YDmfXWL4,9605
75
- natural_pdf/utils/tqdm_utils.py,sha256=wV3RXvqog26eWEFEqjt2LkGnLswmO1GXaVGSqgS7tAY,1601
76
84
  natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
77
85
  natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
78
- natural_pdf/widgets/viewer.py,sha256=dC_hlPlosc08gsDc3bdAa8chOKtAoH9QFU6mrGOG9vE,39532
79
- natural_pdf-0.1.11.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
80
- natural_pdf-0.1.11.dist-info/METADATA,sha256=HBEH41sOW2opbRoN_yUq8iw3jB2fvdOXEDj0ZGfmw8g,7354
81
- natural_pdf-0.1.11.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
82
- natural_pdf-0.1.11.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
83
- natural_pdf-0.1.11.dist-info/RECORD,,
86
+ natural_pdf/widgets/viewer.py,sha256=ekgXTEfA48GrR-JjpCpgyBCXdf4IubV0pAXDJozcU7A,39196
87
+ natural_pdf-0.1.13.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
88
+ natural_pdf-0.1.13.dist-info/METADATA,sha256=xihj7-PKPM86F2ztQeHOGrkF2OgS20l5GxI5UlheIjw,7674
89
+ natural_pdf-0.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
90
+ natural_pdf-0.1.13.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
91
+ natural_pdf-0.1.13.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5