natural-pdf 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf/__init__.py +7 -2
- natural_pdf/analyzers/shape_detection_mixin.py +1092 -0
- natural_pdf/analyzers/text_options.py +9 -1
- natural_pdf/analyzers/text_structure.py +371 -58
- natural_pdf/classification/manager.py +3 -4
- natural_pdf/collections/pdf_collection.py +19 -39
- natural_pdf/core/element_manager.py +11 -1
- natural_pdf/core/highlighting_service.py +146 -75
- natural_pdf/core/page.py +287 -188
- natural_pdf/core/pdf.py +57 -42
- natural_pdf/elements/base.py +51 -0
- natural_pdf/elements/collections.py +362 -67
- natural_pdf/elements/line.py +5 -0
- natural_pdf/elements/region.py +396 -23
- natural_pdf/exporters/data/__init__.py +0 -0
- natural_pdf/exporters/data/pdf.ttf +0 -0
- natural_pdf/exporters/data/sRGB.icc +0 -0
- natural_pdf/exporters/hocr.py +40 -61
- natural_pdf/exporters/hocr_font.py +7 -13
- natural_pdf/exporters/original_pdf.py +10 -13
- natural_pdf/exporters/paddleocr.py +51 -11
- natural_pdf/exporters/searchable_pdf.py +0 -10
- natural_pdf/flows/__init__.py +12 -0
- natural_pdf/flows/collections.py +533 -0
- natural_pdf/flows/element.py +382 -0
- natural_pdf/flows/flow.py +216 -0
- natural_pdf/flows/region.py +458 -0
- natural_pdf/search/__init__.py +65 -52
- natural_pdf/search/lancedb_search_service.py +325 -0
- natural_pdf/search/numpy_search_service.py +255 -0
- natural_pdf/search/searchable_mixin.py +25 -71
- natural_pdf/selectors/parser.py +163 -8
- natural_pdf/widgets/viewer.py +22 -31
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/METADATA +55 -49
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/RECORD +38 -30
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/WHEEL +1 -1
- natural_pdf/search/haystack_search_service.py +0 -687
- natural_pdf/search/haystack_utils.py +0 -474
- natural_pdf/utils/tqdm_utils.py +0 -51
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/licenses/LICENSE +0 -0
- {natural_pdf-0.1.11.dist-info → natural_pdf-0.1.13.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.13
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -12,20 +12,17 @@ Requires-Python: >=3.9
|
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
License-File: LICENSE
|
14
14
|
Requires-Dist: pdfplumber
|
15
|
-
Requires-Dist:
|
15
|
+
Requires-Dist: pillow
|
16
16
|
Requires-Dist: colour
|
17
17
|
Requires-Dist: numpy
|
18
18
|
Requires-Dist: urllib3
|
19
19
|
Requires-Dist: tqdm
|
20
20
|
Requires-Dist: pydantic
|
21
|
-
|
22
|
-
Requires-Dist:
|
23
|
-
|
24
|
-
|
25
|
-
Requires-Dist:
|
26
|
-
Requires-Dist: lancedb; extra == "haystack"
|
27
|
-
Requires-Dist: sentence-transformers; extra == "haystack"
|
28
|
-
Requires-Dist: natural-pdf[core-ml]; extra == "haystack"
|
21
|
+
Requires-Dist: jenkspy
|
22
|
+
Requires-Dist: pikepdf>=9.7.0
|
23
|
+
Requires-Dist: scipy
|
24
|
+
Provides-Extra: viewer
|
25
|
+
Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "viewer"
|
29
26
|
Provides-Extra: easyocr
|
30
27
|
Requires-Dist: easyocr; extra == "easyocr"
|
31
28
|
Requires-Dist: natural-pdf[core-ml]; extra == "easyocr"
|
@@ -41,19 +38,25 @@ Requires-Dist: natural-pdf[core-ml]; extra == "surya"
|
|
41
38
|
Provides-Extra: doctr
|
42
39
|
Requires-Dist: python-doctr[torch]; extra == "doctr"
|
43
40
|
Requires-Dist: natural-pdf[core-ml]; extra == "doctr"
|
44
|
-
Provides-Extra: qa
|
45
|
-
Requires-Dist: natural-pdf[core-ml]; extra == "qa"
|
46
41
|
Provides-Extra: docling
|
47
42
|
Requires-Dist: docling; extra == "docling"
|
48
43
|
Requires-Dist: natural-pdf[core-ml]; extra == "docling"
|
49
44
|
Provides-Extra: llm
|
50
45
|
Requires-Dist: openai>=1.0; extra == "llm"
|
51
|
-
Provides-Extra: classification
|
52
|
-
Requires-Dist: sentence-transformers; extra == "classification"
|
53
|
-
Requires-Dist: timm; extra == "classification"
|
54
|
-
Requires-Dist: natural-pdf[core-ml]; extra == "classification"
|
55
46
|
Provides-Extra: test
|
56
47
|
Requires-Dist: pytest; extra == "test"
|
48
|
+
Provides-Extra: search
|
49
|
+
Requires-Dist: lancedb; extra == "search"
|
50
|
+
Requires-Dist: pyarrow; extra == "search"
|
51
|
+
Provides-Extra: favorites
|
52
|
+
Requires-Dist: natural-pdf[deskew]; extra == "favorites"
|
53
|
+
Requires-Dist: natural-pdf[llm]; extra == "favorites"
|
54
|
+
Requires-Dist: natural-pdf[surya]; extra == "favorites"
|
55
|
+
Requires-Dist: natural-pdf[easyocr]; extra == "favorites"
|
56
|
+
Requires-Dist: natural-pdf[layout_yolo]; extra == "favorites"
|
57
|
+
Requires-Dist: natural-pdf[ocr-export]; extra == "favorites"
|
58
|
+
Requires-Dist: natural-pdf[viewer]; extra == "favorites"
|
59
|
+
Requires-Dist: natural-pdf[search]; extra == "favorites"
|
57
60
|
Provides-Extra: dev
|
58
61
|
Requires-Dist: black; extra == "dev"
|
59
62
|
Requires-Dist: isort; extra == "dev"
|
@@ -67,29 +70,32 @@ Requires-Dist: pipdeptree; extra == "dev"
|
|
67
70
|
Requires-Dist: nbformat; extra == "dev"
|
68
71
|
Requires-Dist: jupytext; extra == "dev"
|
69
72
|
Requires-Dist: nbclient; extra == "dev"
|
73
|
+
Requires-Dist: ipykernel; extra == "dev"
|
70
74
|
Provides-Extra: deskew
|
71
75
|
Requires-Dist: deskew>=1.5; extra == "deskew"
|
72
76
|
Requires-Dist: img2pdf; extra == "deskew"
|
73
77
|
Provides-Extra: all
|
74
|
-
Requires-Dist: natural-pdf[
|
75
|
-
Requires-Dist: natural-pdf[haystack]; extra == "all"
|
78
|
+
Requires-Dist: natural-pdf[viewer]; extra == "all"
|
76
79
|
Requires-Dist: natural-pdf[easyocr]; extra == "all"
|
77
80
|
Requires-Dist: natural-pdf[paddle]; extra == "all"
|
78
81
|
Requires-Dist: natural-pdf[layout_yolo]; extra == "all"
|
79
82
|
Requires-Dist: natural-pdf[surya]; extra == "all"
|
80
83
|
Requires-Dist: natural-pdf[doctr]; extra == "all"
|
81
|
-
Requires-Dist: natural-pdf[qa]; extra == "all"
|
82
84
|
Requires-Dist: natural-pdf[ocr-export]; extra == "all"
|
83
85
|
Requires-Dist: natural-pdf[docling]; extra == "all"
|
84
86
|
Requires-Dist: natural-pdf[llm]; extra == "all"
|
85
|
-
Requires-Dist: natural-pdf[
|
87
|
+
Requires-Dist: natural-pdf[core-ml]; extra == "all"
|
86
88
|
Requires-Dist: natural-pdf[deskew]; extra == "all"
|
87
89
|
Requires-Dist: natural-pdf[test]; extra == "all"
|
90
|
+
Requires-Dist: natural-pdf[search]; extra == "all"
|
88
91
|
Provides-Extra: core-ml
|
89
92
|
Requires-Dist: torch; extra == "core-ml"
|
90
93
|
Requires-Dist: torchvision; extra == "core-ml"
|
91
94
|
Requires-Dist: transformers[sentencepiece]; extra == "core-ml"
|
92
95
|
Requires-Dist: huggingface_hub; extra == "core-ml"
|
96
|
+
Requires-Dist: sentence-transformers; extra == "core-ml"
|
97
|
+
Requires-Dist: numpy; extra == "core-ml"
|
98
|
+
Requires-Dist: timm; extra == "core-ml"
|
93
99
|
Provides-Extra: ocr-export
|
94
100
|
Requires-Dist: pikepdf; extra == "ocr-export"
|
95
101
|
Provides-Extra: export-extras
|
@@ -114,26 +120,11 @@ Natural PDF lets you find and extract content from PDFs using simple code that m
|
|
114
120
|
pip install natural-pdf
|
115
121
|
```
|
116
122
|
|
117
|
-
For optional features like specific OCR engines, layout analysis models, or the interactive Jupyter widget, you can install extras:
|
123
|
+
For optional features like specific OCR engines, layout analysis models, or the interactive Jupyter widget, you can install one to two million different extras. If you just want the greatest hits:
|
118
124
|
|
119
125
|
```bash
|
120
|
-
#
|
121
|
-
pip install natural-pdf[
|
122
|
-
pip install natural-pdf[surya]
|
123
|
-
pip install natural-pdf[paddle]
|
124
|
-
|
125
|
-
# Example: Install support for features using Large Language Models (e.g., via OpenAI-compatible APIs)
|
126
|
-
pip install natural-pdf[llm]
|
127
|
-
# (May require setting API key environment variables, e.g., GOOGLE_API_KEY for Gemini)
|
128
|
-
|
129
|
-
# Example: Install with interactive viewer support
|
130
|
-
pip install natural-pdf[interactive]
|
131
|
-
|
132
|
-
# Example: Install with semantic search support (Haystack)
|
133
|
-
pip install natural-pdf[haystack]
|
134
|
-
|
135
|
-
# Install everything
|
136
|
-
pip install natural-pdf[all]
|
126
|
+
# deskewing, OCR (surya) + layout analysis (yolo), interactive browsing
|
127
|
+
pip install natural-pdf[favorites]
|
137
128
|
```
|
138
129
|
|
139
130
|
See the [installation guide](https://jsoma.github.io/natural-pdf/installation/) for more details on extras.
|
@@ -147,25 +138,26 @@ from natural_pdf import PDF
|
|
147
138
|
pdf = PDF('document.pdf')
|
148
139
|
page = pdf.pages[0]
|
149
140
|
|
141
|
+
# Extract all of the text on the page
|
142
|
+
page.extract_text()
|
143
|
+
|
150
144
|
# Find elements using CSS-like selectors
|
151
145
|
heading = page.find('text:contains("Summary"):bold')
|
152
146
|
|
153
147
|
# Extract content below the heading
|
154
148
|
content = heading.below().extract_text()
|
155
|
-
print("Content below Summary:", content[:100] + "...")
|
156
149
|
|
157
|
-
#
|
158
|
-
|
159
|
-
page.add_exclusion(page.find('text:contains("CONFIDENTIAL")').above())
|
160
|
-
page.add_exclusion(page.find_all('line')[-1].below())
|
150
|
+
# Examine all the bold text on the page
|
151
|
+
page.find_all('text:bold').show()
|
161
152
|
|
162
|
-
#
|
163
|
-
|
164
|
-
|
153
|
+
# Exclude parts of the page from selectors/extractors
|
154
|
+
header = page.find('text:contains("CONFIDENTIAL")').above()
|
155
|
+
footer = page.find_all('line')[-1].below()
|
156
|
+
page.add_exclusion(header)
|
157
|
+
page.add_exclusion(footer)
|
165
158
|
|
166
|
-
#
|
167
|
-
|
168
|
-
page.to_image()
|
159
|
+
# Extract clean text from the page ignoring exclusions
|
160
|
+
clean_text = page.extract_text()
|
169
161
|
```
|
170
162
|
|
171
163
|
And as a fun bonus, `page.viewer()` will provide an interactive method to explore the PDF.
|
@@ -186,3 +178,17 @@ Natural PDF offers a range of features for working with PDFs:
|
|
186
178
|
## Learn More
|
187
179
|
|
188
180
|
Dive deeper into the features and explore advanced usage in the [**Complete Documentation**](https://jsoma.github.io/natural-pdf).
|
181
|
+
|
182
|
+
## Best friends
|
183
|
+
|
184
|
+
Natural PDF sits on top of a *lot* of fantastic tools and mdoels, some of which are:
|
185
|
+
|
186
|
+
- [pdfplumber](https://github.com/jsvine/pdfplumber)
|
187
|
+
- [EasyOCR](https://www.jaided.ai/easyocr/)
|
188
|
+
- [PaddleOCR](https://paddlepaddle.github.io/PaddleOCR/latest/en/index.html)
|
189
|
+
- [Surya](https://github.com/VikParuchuri/surya)
|
190
|
+
- A specific [YOLO](https://github.com/opendatalab/DocLayout-YOLO)
|
191
|
+
- [deskew](https://github.com/sbrunner/deskew)
|
192
|
+
- [doctr](https://github.com/mindee/doctr)
|
193
|
+
- [docling](https://github.com/docling-project/docling)
|
194
|
+
- [Hugging Face](https://huggingface.co/models)
|
@@ -1,7 +1,8 @@
|
|
1
|
-
natural_pdf/__init__.py,sha256=
|
1
|
+
natural_pdf/__init__.py,sha256=0sCYgb9BAV5OnpD_1AswMuOLuXNmpe3OLJpv_6p3tgw,2449
|
2
2
|
natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
|
3
|
-
natural_pdf/analyzers/
|
4
|
-
natural_pdf/analyzers/
|
3
|
+
natural_pdf/analyzers/shape_detection_mixin.py,sha256=BweC9i8Z4xByUKXyd0Aapk_EMJmjMvSv4x_CAD3_-Zc,61466
|
4
|
+
natural_pdf/analyzers/text_options.py,sha256=qEkDaYWla0rIM_gszEOsu52q7C_dAfV81P2HLJZM2sw,3333
|
5
|
+
natural_pdf/analyzers/text_structure.py,sha256=VfKTsTFrK877sC0grsis9jK3rrgp0Mbp13VWEbukTcs,28437
|
5
6
|
natural_pdf/analyzers/utils.py,sha256=PYbzJzSAHZ7JsMes84WIrSbA0zkjJGs0CLvIeINsf_k,2100
|
6
7
|
natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
|
7
8
|
natural_pdf/analyzers/layout/base.py,sha256=bYawhmc_0xqKG-xbxUSiazIU1om-aBox5Jh8qDqv-eM,6451
|
@@ -15,34 +16,42 @@ natural_pdf/analyzers/layout/pdfplumber_table_finder.py,sha256=Tk0Q7wv7nGYPo69lh
|
|
15
16
|
natural_pdf/analyzers/layout/surya.py,sha256=4RdnhRxSS3i3Ns5mFhOA9-P0xd7Ms19uZuKvUGQfEBI,9789
|
16
17
|
natural_pdf/analyzers/layout/tatr.py,sha256=cVr0ZyhY2mNLAKZ4DGMm-b7XNJpILKh8x8ZpyDeUhLk,15032
|
17
18
|
natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
|
18
|
-
natural_pdf/classification/manager.py,sha256
|
19
|
+
natural_pdf/classification/manager.py,sha256=-rdZzGP_JK4RDDxIEgdY8_gHRNS0cNHhpOSodjxbd84,17853
|
19
20
|
natural_pdf/classification/mixin.py,sha256=hhX9qWPShpOq_-mgoEq0GUWnutBnNMo3YdUlxwyNWMA,6781
|
20
21
|
natural_pdf/classification/results.py,sha256=El1dY7cBQVOB5lP-uj52dWgH6Y7TeQgJOVcZD-OLjes,2778
|
21
22
|
natural_pdf/collections/mixins.py,sha256=sj76Cn6EdBtb5f-bdAV-1qpdixX8tI4BzPccPiYLI1w,5117
|
22
|
-
natural_pdf/collections/pdf_collection.py,sha256=
|
23
|
+
natural_pdf/collections/pdf_collection.py,sha256=nsbrzcsXAD2qVLLXhDYpljAb_WnjMNanHJ6J7UtYzGA,31165
|
23
24
|
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
24
|
-
natural_pdf/core/element_manager.py,sha256=
|
25
|
-
natural_pdf/core/highlighting_service.py,sha256=
|
26
|
-
natural_pdf/core/page.py,sha256=
|
27
|
-
natural_pdf/core/pdf.py,sha256=
|
25
|
+
natural_pdf/core/element_manager.py,sha256=_UdXu51sLi6STzc8Pj4k8R721G3yJixXDLuRHn3hmr8,25731
|
26
|
+
natural_pdf/core/highlighting_service.py,sha256=tjMJpdJj2oaMGpdqiNHPcTJqID4nd-uBZ5v7KtPmoc0,36762
|
27
|
+
natural_pdf/core/page.py,sha256=uPSkGB9dXxtp4_uE1ELyPznDQ3CwWsnMCRGhEg2ny0o,111120
|
28
|
+
natural_pdf/core/pdf.py,sha256=395aBTg4Le4vABvQWgBhPm669nGJ8JdMToTs1UtQ2Vg,69575
|
28
29
|
natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
|
29
|
-
natural_pdf/elements/base.py,sha256=
|
30
|
-
natural_pdf/elements/collections.py,sha256=
|
31
|
-
natural_pdf/elements/line.py,sha256=
|
30
|
+
natural_pdf/elements/base.py,sha256=d2K_uVRXLHapFFVaBuVqKxUzjGBzRERMAAjEkQNBkj4,39655
|
31
|
+
natural_pdf/elements/collections.py,sha256=qd58tD3f-eojz90ICytlqu4Ej0OQoWgsxV4umQDhUvA,120809
|
32
|
+
natural_pdf/elements/line.py,sha256=300kSFBDUBIudfeQtH_tzW9gTYRgRKUDPiTABw6J-BE,4782
|
32
33
|
natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
|
33
|
-
natural_pdf/elements/region.py,sha256=
|
34
|
+
natural_pdf/elements/region.py,sha256=6A5RIDuVbrHLhBcJn2lSXjVPtx5sERC3YZsz0dEmLaQ,115747
|
34
35
|
natural_pdf/elements/text.py,sha256=13HvVZGinj2Vm_fFCAnqi7hohtoKvnpCp3VCfkpeAbc,11146
|
35
36
|
natural_pdf/export/mixin.py,sha256=L1q3MIEFWuvie4j4_EmW7GT3NerbZ1as0XMUoqTS7gM,5083
|
36
37
|
natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
|
37
38
|
natural_pdf/exporters/base.py,sha256=XhR1xlkHOh7suOuX7mWbsj1h2o1pZNet-OAS5YCJyeI,2115
|
38
|
-
natural_pdf/exporters/hocr.py,sha256=
|
39
|
-
natural_pdf/exporters/hocr_font.py,sha256=
|
40
|
-
natural_pdf/exporters/original_pdf.py,sha256=
|
41
|
-
natural_pdf/exporters/paddleocr.py,sha256=
|
42
|
-
natural_pdf/exporters/searchable_pdf.py,sha256
|
39
|
+
natural_pdf/exporters/hocr.py,sha256=MOb5sTxe-GlMSOtmqp3p4SY_ZigwOtmd4sj_zMRCIQY,19907
|
40
|
+
natural_pdf/exporters/hocr_font.py,sha256=1wsGOMj6zoaRN2rxCwrv4MMLGawpNz984WgXpmWekgw,4574
|
41
|
+
natural_pdf/exporters/original_pdf.py,sha256=zsZPg_lUoEerKIzzoEw-qGdM5XBg_LZhFJeVKnCUp4o,5054
|
42
|
+
natural_pdf/exporters/paddleocr.py,sha256=srwk_N10wVqtEU5bI8B3XGfXr54gaaJ0Q5zpq4-cSVY,18361
|
43
|
+
natural_pdf/exporters/searchable_pdf.py,sha256=G2Tc4tpDXSYIufXJlkA8ppW_3DuzHAaweYKae33pI_c,16290
|
44
|
+
natural_pdf/exporters/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
|
+
natural_pdf/exporters/data/pdf.ttf,sha256=x4RUIJJaI9iO2DCmOVe4r4Wmao2vjZ_JDoQ2c7LvGlk,572
|
46
|
+
natural_pdf/exporters/data/sRGB.icc,sha256=KpLUuuRQt22LCqQhk9-XTXX2Jzjs6_dPAcXnWxKpV5Y,6922
|
43
47
|
natural_pdf/extraction/manager.py,sha256=mUBbfgLG5Pl31wmajXwyipdEJb_dZ5I-y8GnWw7IzGo,4969
|
44
48
|
natural_pdf/extraction/mixin.py,sha256=eKbr70VibpbtfjvCE80lTFuYHzq_BoVtOHjznL_GMRA,11719
|
45
49
|
natural_pdf/extraction/result.py,sha256=c1vLguCR6l95cvg-BJJmZvL_MPg2McJaczge55bKZMg,934
|
50
|
+
natural_pdf/flows/__init__.py,sha256=82ibI0eNJfVergEsTyom9Nxe_T6pnWQsr4-CISGQlz0,277
|
51
|
+
natural_pdf/flows/collections.py,sha256=iOmRqM5K74kqioh7-UAbNgkpXMr9nkZZ5oW4_sQ1Alo,26433
|
52
|
+
natural_pdf/flows/element.py,sha256=NmNWvrvihsO8OpUDNqW7rwcDZSGMjmJzAy4d-iaxgDc,20566
|
53
|
+
natural_pdf/flows/flow.py,sha256=ft07Ou0uRodF_gTgumVlU9YUquE3LTZz5LEAoQGErEs,10375
|
54
|
+
natural_pdf/flows/region.py,sha256=5xAnePZjs292oKrGG5El3pwhpxaHQYLzse35ilswhqI,21298
|
46
55
|
natural_pdf/ocr/__init__.py,sha256=VY8hhvDPf7Gh2lB-d2QRmghLLyTy6ydxlgo1cS4dOSk,2482
|
47
56
|
natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
|
48
57
|
natural_pdf/ocr/engine_doctr.py,sha256=519WpvSHgwP6Hv24tci_YHFX7XPlaxOnlREN_YG-Yys,16331
|
@@ -55,14 +64,14 @@ natural_pdf/ocr/ocr_options.py,sha256=ZvtnFn1kPkFEoWveQ13uy6B-ofquP0gHEi4tBHrjqC
|
|
55
64
|
natural_pdf/ocr/utils.py,sha256=OxuHwDbHWj6setvnC0QYwMHrAjxGkhmLzWHpMqqGupA,4397
|
56
65
|
natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
|
57
66
|
natural_pdf/qa/document_qa.py,sha256=Jw4yyq3Vifn57D0ANmOfUlZeG8CJjBkItZBV-8ZAmos,15111
|
58
|
-
natural_pdf/search/__init__.py,sha256=
|
59
|
-
natural_pdf/search/
|
60
|
-
natural_pdf/search/
|
67
|
+
natural_pdf/search/__init__.py,sha256=72n_Mj_AhF_RCIoBBhZ6EZKjbILM8omelXZ99fXw7n4,3688
|
68
|
+
natural_pdf/search/lancedb_search_service.py,sha256=tW7ONPcWGY1HKle_7OqCXRnMCI-aKL-AqneKz2YbLlM,13706
|
69
|
+
natural_pdf/search/numpy_search_service.py,sha256=3_8fx7NV-15jBokOU73mcxrznxPxzVQnOlDHf3dpo28,10117
|
61
70
|
natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzPkK0a8QA,3566
|
62
71
|
natural_pdf/search/search_service_protocol.py,sha256=Dl-Q-CrutkhZwI69scbW9EWPeYM63qxB60_EA7YqIYo,6699
|
63
|
-
natural_pdf/search/searchable_mixin.py,sha256=
|
72
|
+
natural_pdf/search/searchable_mixin.py,sha256=dZbaHv8Go3TJNqxoPtnp9Dr0Ftxuf_44RpBeIRXkPxc,23534
|
64
73
|
natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
|
65
|
-
natural_pdf/selectors/parser.py,sha256=
|
74
|
+
natural_pdf/selectors/parser.py,sha256=EZsNRjn40qjiQ-KlQOmGJVrkvlVd8wez9v3SGdoFYSo,30226
|
66
75
|
natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
|
67
76
|
natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
|
68
77
|
natural_pdf/utils/debug.py,sha256=RN7H3E6ph-GtxubCW6psW7TO8o2BxcNLiEzByTVR9fk,995
|
@@ -72,12 +81,11 @@ natural_pdf/utils/locks.py,sha256=7HJqV0VsNcOfISnbw8goCKWP5ck11uSJo6T_x9XIPKI,21
|
|
72
81
|
natural_pdf/utils/packaging.py,sha256=Jshxp6S1zfcqoZmFhdd7WOpL--b6rBSz-Y9mYqELXIY,21581
|
73
82
|
natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
|
74
83
|
natural_pdf/utils/text_extraction.py,sha256=z6Jhy11pakYCsEpkvh8ldw6DkUFsYF1hCL9YDmfXWL4,9605
|
75
|
-
natural_pdf/utils/tqdm_utils.py,sha256=wV3RXvqog26eWEFEqjt2LkGnLswmO1GXaVGSqgS7tAY,1601
|
76
84
|
natural_pdf/utils/visualization.py,sha256=30pRWQdsRJh2pSObh-brKVsFgC1n8tHmSrta_UDnVPw,8989
|
77
85
|
natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
|
78
|
-
natural_pdf/widgets/viewer.py,sha256=
|
79
|
-
natural_pdf-0.1.
|
80
|
-
natural_pdf-0.1.
|
81
|
-
natural_pdf-0.1.
|
82
|
-
natural_pdf-0.1.
|
83
|
-
natural_pdf-0.1.
|
86
|
+
natural_pdf/widgets/viewer.py,sha256=ekgXTEfA48GrR-JjpCpgyBCXdf4IubV0pAXDJozcU7A,39196
|
87
|
+
natural_pdf-0.1.13.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
88
|
+
natural_pdf-0.1.13.dist-info/METADATA,sha256=xihj7-PKPM86F2ztQeHOGrkF2OgS20l5GxI5UlheIjw,7674
|
89
|
+
natural_pdf-0.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
90
|
+
natural_pdf-0.1.13.dist-info/top_level.txt,sha256=Cyw1zmNDlUZfb5moU-WUWGprrwH7ln_8LDGdmMHF1xI,17
|
91
|
+
natural_pdf-0.1.13.dist-info/RECORD,,
|