natural-pdf 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/api/index.md +386 -0
- docs/assets/favicon.png +3 -0
- docs/assets/favicon.svg +3 -0
- docs/assets/javascripts/custom.js +17 -0
- docs/assets/logo.svg +3 -0
- docs/assets/sample-screen.png +0 -0
- docs/assets/social-preview.png +17 -0
- docs/assets/social-preview.svg +17 -0
- docs/assets/stylesheets/custom.css +65 -0
- docs/document-qa/index.ipynb +435 -0
- docs/document-qa/index.md +79 -0
- docs/element-selection/index.ipynb +915 -0
- docs/element-selection/index.md +229 -0
- docs/index.md +170 -0
- docs/installation/index.md +69 -0
- docs/interactive-widget/index.ipynb +962 -0
- docs/interactive-widget/index.md +12 -0
- docs/layout-analysis/index.ipynb +818 -0
- docs/layout-analysis/index.md +185 -0
- docs/ocr/index.md +222 -0
- docs/pdf-navigation/index.ipynb +314 -0
- docs/pdf-navigation/index.md +97 -0
- docs/regions/index.ipynb +816 -0
- docs/regions/index.md +294 -0
- docs/tables/index.ipynb +658 -0
- docs/tables/index.md +144 -0
- docs/text-analysis/index.ipynb +370 -0
- docs/text-analysis/index.md +105 -0
- docs/text-extraction/index.ipynb +1478 -0
- docs/text-extraction/index.md +292 -0
- docs/tutorials/01-loading-and-extraction.ipynb +1696 -0
- docs/tutorials/01-loading-and-extraction.md +95 -0
- docs/tutorials/02-finding-elements.ipynb +340 -0
- docs/tutorials/02-finding-elements.md +149 -0
- docs/tutorials/03-extracting-blocks.ipynb +147 -0
- docs/tutorials/03-extracting-blocks.md +48 -0
- docs/tutorials/04-table-extraction.ipynb +114 -0
- docs/tutorials/04-table-extraction.md +50 -0
- docs/tutorials/05-excluding-content.ipynb +270 -0
- docs/tutorials/05-excluding-content.md +109 -0
- docs/tutorials/06-document-qa.ipynb +332 -0
- docs/tutorials/06-document-qa.md +91 -0
- docs/tutorials/07-layout-analysis.ipynb +260 -0
- docs/tutorials/07-layout-analysis.md +66 -0
- docs/tutorials/07-working-with-regions.ipynb +409 -0
- docs/tutorials/07-working-with-regions.md +151 -0
- docs/tutorials/08-spatial-navigation.ipynb +508 -0
- docs/tutorials/08-spatial-navigation.md +190 -0
- docs/tutorials/09-section-extraction.ipynb +2434 -0
- docs/tutorials/09-section-extraction.md +256 -0
- docs/tutorials/10-form-field-extraction.ipynb +484 -0
- docs/tutorials/10-form-field-extraction.md +201 -0
- docs/tutorials/11-enhanced-table-processing.ipynb +54 -0
- docs/tutorials/11-enhanced-table-processing.md +9 -0
- docs/tutorials/12-ocr-integration.ipynb +586 -0
- docs/tutorials/12-ocr-integration.md +188 -0
- docs/tutorials/13-semantic-search.ipynb +1888 -0
- docs/tutorials/13-semantic-search.md +77 -0
- docs/visual-debugging/index.ipynb +2970 -0
- docs/visual-debugging/index.md +157 -0
- docs/visual-debugging/region.png +0 -0
- natural_pdf/__init__.py +39 -20
- natural_pdf/analyzers/__init__.py +2 -1
- natural_pdf/analyzers/layout/base.py +32 -24
- natural_pdf/analyzers/layout/docling.py +131 -72
- natural_pdf/analyzers/layout/layout_analyzer.py +156 -113
- natural_pdf/analyzers/layout/layout_manager.py +98 -58
- natural_pdf/analyzers/layout/layout_options.py +32 -17
- natural_pdf/analyzers/layout/paddle.py +152 -95
- natural_pdf/analyzers/layout/surya.py +164 -92
- natural_pdf/analyzers/layout/tatr.py +149 -84
- natural_pdf/analyzers/layout/yolo.py +84 -44
- natural_pdf/analyzers/text_options.py +22 -15
- natural_pdf/analyzers/text_structure.py +131 -85
- natural_pdf/analyzers/utils.py +30 -23
- natural_pdf/collections/pdf_collection.py +126 -98
- natural_pdf/core/__init__.py +1 -1
- natural_pdf/core/element_manager.py +416 -337
- natural_pdf/core/highlighting_service.py +268 -196
- natural_pdf/core/page.py +910 -516
- natural_pdf/core/pdf.py +387 -289
- natural_pdf/elements/__init__.py +1 -1
- natural_pdf/elements/base.py +302 -214
- natural_pdf/elements/collections.py +714 -514
- natural_pdf/elements/line.py +39 -36
- natural_pdf/elements/rect.py +32 -30
- natural_pdf/elements/region.py +854 -883
- natural_pdf/elements/text.py +122 -99
- natural_pdf/exporters/__init__.py +0 -1
- natural_pdf/exporters/searchable_pdf.py +261 -102
- natural_pdf/ocr/__init__.py +23 -14
- natural_pdf/ocr/engine.py +17 -8
- natural_pdf/ocr/engine_easyocr.py +63 -47
- natural_pdf/ocr/engine_paddle.py +97 -68
- natural_pdf/ocr/engine_surya.py +54 -44
- natural_pdf/ocr/ocr_manager.py +88 -62
- natural_pdf/ocr/ocr_options.py +16 -10
- natural_pdf/qa/__init__.py +1 -1
- natural_pdf/qa/document_qa.py +119 -111
- natural_pdf/search/__init__.py +37 -31
- natural_pdf/search/haystack_search_service.py +312 -189
- natural_pdf/search/haystack_utils.py +186 -122
- natural_pdf/search/search_options.py +25 -14
- natural_pdf/search/search_service_protocol.py +12 -6
- natural_pdf/search/searchable_mixin.py +261 -176
- natural_pdf/selectors/__init__.py +2 -1
- natural_pdf/selectors/parser.py +159 -316
- natural_pdf/templates/__init__.py +1 -1
- natural_pdf/utils/highlighting.py +8 -2
- natural_pdf/utils/reading_order.py +65 -63
- natural_pdf/utils/text_extraction.py +195 -0
- natural_pdf/utils/visualization.py +70 -61
- natural_pdf/widgets/__init__.py +2 -3
- natural_pdf/widgets/viewer.py +749 -718
- {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/METADATA +29 -15
- natural_pdf-0.1.5.dist-info/RECORD +134 -0
- natural_pdf-0.1.5.dist-info/top_level.txt +5 -0
- notebooks/Examples.ipynb +1293 -0
- pdfs/.gitkeep +0 -0
- pdfs/01-practice.pdf +543 -0
- pdfs/0500000US42001.pdf +0 -0
- pdfs/0500000US42007.pdf +0 -0
- pdfs/2014 Statistics.pdf +0 -0
- pdfs/2019 Statistics.pdf +0 -0
- pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- pdfs/needs-ocr.pdf +0 -0
- tests/test_loading.py +50 -0
- tests/test_optional_deps.py +298 -0
- natural_pdf-0.1.3.dist-info/RECORD +0 -61
- natural_pdf-0.1.3.dist-info/top_level.txt +0 -1
- {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/WHEEL +0 -0
- {natural_pdf-0.1.3.dist-info → natural_pdf-0.1.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.5
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -8,26 +8,27 @@ Project-URL: Homepage, https://github.com/jsoma/natural-pdf
|
|
8
8
|
Project-URL: Repository, https://github.com/jsoma/natural-pdf
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
10
10
|
Classifier: Operating System :: OS Independent
|
11
|
-
Requires-Python: >=3.
|
11
|
+
Requires-Python: >=3.9
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
License-File: LICENSE
|
14
|
-
Requires-Dist: pdfplumber
|
15
|
-
Requires-Dist: Pillow
|
16
|
-
Requires-Dist: colour
|
17
|
-
Requires-Dist: numpy
|
18
|
-
Requires-Dist: urllib3
|
19
|
-
Requires-Dist: torch
|
20
|
-
Requires-Dist: torchvision
|
21
|
-
Requires-Dist: transformers
|
22
|
-
Requires-Dist: huggingface_hub
|
23
|
-
Requires-Dist: ocrmypdf
|
24
|
-
Requires-Dist: pikepdf
|
14
|
+
Requires-Dist: pdfplumber
|
15
|
+
Requires-Dist: Pillow
|
16
|
+
Requires-Dist: colour
|
17
|
+
Requires-Dist: numpy
|
18
|
+
Requires-Dist: urllib3
|
19
|
+
Requires-Dist: torch
|
20
|
+
Requires-Dist: torchvision
|
21
|
+
Requires-Dist: transformers
|
22
|
+
Requires-Dist: huggingface_hub
|
23
|
+
Requires-Dist: ocrmypdf
|
24
|
+
Requires-Dist: pikepdf
|
25
25
|
Provides-Extra: interactive
|
26
26
|
Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
|
27
27
|
Provides-Extra: haystack
|
28
|
-
Requires-Dist: haystack-ai
|
28
|
+
Requires-Dist: haystack-ai; extra == "haystack"
|
29
29
|
Requires-Dist: chroma-haystack; extra == "haystack"
|
30
30
|
Requires-Dist: sentence-transformers; extra == "haystack"
|
31
|
+
Requires-Dist: protobuf<4; extra == "haystack"
|
31
32
|
Provides-Extra: easyocr
|
32
33
|
Requires-Dist: easyocr; extra == "easyocr"
|
33
34
|
Provides-Extra: paddle
|
@@ -38,6 +39,17 @@ Requires-Dist: doclayout_yolo; extra == "layout-yolo"
|
|
38
39
|
Provides-Extra: surya
|
39
40
|
Requires-Dist: surya-ocr; extra == "surya"
|
40
41
|
Provides-Extra: qa
|
42
|
+
Provides-Extra: test
|
43
|
+
Requires-Dist: pytest; extra == "test"
|
44
|
+
Provides-Extra: dev
|
45
|
+
Requires-Dist: black; extra == "dev"
|
46
|
+
Requires-Dist: isort; extra == "dev"
|
47
|
+
Requires-Dist: mypy; extra == "dev"
|
48
|
+
Requires-Dist: pytest; extra == "dev"
|
49
|
+
Requires-Dist: nox; extra == "dev"
|
50
|
+
Requires-Dist: nox-uv; extra == "dev"
|
51
|
+
Requires-Dist: build; extra == "dev"
|
52
|
+
Requires-Dist: uv; extra == "dev"
|
41
53
|
Provides-Extra: all
|
42
54
|
Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "all"
|
43
55
|
Requires-Dist: easyocr; extra == "all"
|
@@ -45,9 +57,11 @@ Requires-Dist: paddlepaddle; extra == "all"
|
|
45
57
|
Requires-Dist: paddleocr; extra == "all"
|
46
58
|
Requires-Dist: doclayout_yolo; extra == "all"
|
47
59
|
Requires-Dist: surya-ocr; extra == "all"
|
48
|
-
Requires-Dist: haystack-ai
|
60
|
+
Requires-Dist: haystack-ai; extra == "all"
|
49
61
|
Requires-Dist: chroma-haystack; extra == "all"
|
50
62
|
Requires-Dist: sentence-transformers; extra == "all"
|
63
|
+
Requires-Dist: protobuf<4; extra == "all"
|
64
|
+
Requires-Dist: pytest; extra == "all"
|
51
65
|
Dynamic: license-file
|
52
66
|
|
53
67
|
# Natural PDF
|
@@ -0,0 +1,134 @@
|
|
1
|
+
docs/index.md,sha256=P1kXZc8aefnxH0bBjvBgj1o3puRiezjUiBLqS4bcUhM,4889
|
2
|
+
docs/api/index.md,sha256=4bn8nYklWJuNDrnY-Kt7sf7IejeAEDhcnqYmjH9GJTA,22405
|
3
|
+
docs/assets/favicon.png,sha256=nxca8jM2Y4GxZKzkmagUHO1GpUREK-GRA5LEFue9OOU,284
|
4
|
+
docs/assets/favicon.svg,sha256=nxca8jM2Y4GxZKzkmagUHO1GpUREK-GRA5LEFue9OOU,284
|
5
|
+
docs/assets/logo.svg,sha256=DdSmjHISSLt20mhf7YDXztigE6w_Reja_ATUYNAPL0M,286
|
6
|
+
docs/assets/sample-screen.png,sha256=qfsrThVoPOksn1ACVbLGe_gdPQmY1EsespXPs1Ocm2Y,182330
|
7
|
+
docs/assets/social-preview.png,sha256=AvyzzM8dC0j5SPFF63bvQrxU4GE1f9j-GUNUv0oA9ts,1085
|
8
|
+
docs/assets/social-preview.svg,sha256=AvyzzM8dC0j5SPFF63bvQrxU4GE1f9j-GUNUv0oA9ts,1085
|
9
|
+
docs/assets/javascripts/custom.js,sha256=0NVHGprwiLPFYdYunJcHjOphzk_EhBSNuOUz5Uzdv_k,594
|
10
|
+
docs/assets/stylesheets/custom.css,sha256=PbTp3k77gzUBUQQ01pDXzpNwo4wUv3aJD-SMBQvQItY,1156
|
11
|
+
docs/document-qa/index.ipynb,sha256=MXJoFhi8TUKK6ZnRFiUBglLGpMbzwdb7LJYfzw8Gp48,528713
|
12
|
+
docs/document-qa/index.md,sha256=mwuO4tothg0OzBXewnj73QEJu46Udq7f1pQBYrKOHwM,2131
|
13
|
+
docs/element-selection/index.ipynb,sha256=-7PwKw1RbPlZ4stzN1Rd1GJ8mwjOD4ySsLcpqVX7chc,1193628
|
14
|
+
docs/element-selection/index.md,sha256=_1P8vI64Y0aSVwUzdRJD4ayb80BJWBLED9TvVpveFx8,6979
|
15
|
+
docs/installation/index.md,sha256=nd4RZrQFR8_vv7Xm3xAzp7z-CQQr9ffAcGa7yuEYn2U,1594
|
16
|
+
docs/interactive-widget/index.ipynb,sha256=zY1rz5N34OUW-OtgcbI6iiOjlIJqXjVcx9OoNWMjuyU,293111
|
17
|
+
docs/interactive-widget/index.md,sha256=tZbq0uYI7Zwo9mLbhXpqeBriuAjazkIyEJeP-jasJ-Q,259
|
18
|
+
docs/layout-analysis/index.ipynb,sha256=dkS_-cu-KGir5G2LGRcxBThKnW0dfA5nPPnwpoYGFtU,1869093
|
19
|
+
docs/layout-analysis/index.md,sha256=ZnH5yd7B_eOLgGxW_4rNlzQs4Tn3Xx1cK3jX43CSpSM,5390
|
20
|
+
docs/ocr/index.md,sha256=e5E9wqY6ehX7sJtrrie6gr6HtQb2neiYEoDKu4Qcjs4,8931
|
21
|
+
docs/pdf-navigation/index.ipynb,sha256=h6yew0HePXK1_c5FmETqzjBQceUBT0MU-vnXx_y91mo,8018
|
22
|
+
docs/pdf-navigation/index.md,sha256=P3b3tsmOcmRtnfRxpsMeTgwm7vApnH_4le_QIwJd51M,2391
|
23
|
+
docs/regions/index.ipynb,sha256=5A-N5A4v4lcXNptOAeI4i7i9Gx66To-Yus8B816dHBk,1303347
|
24
|
+
docs/regions/index.md,sha256=e4aS_vV2FUFHPc5-Up60Ip8PYBIwT9qkjZcNwaS3JbY,8197
|
25
|
+
docs/tables/index.ipynb,sha256=61I9GwJlOM02Mx5aUtzJpRMh0OkpVlqMuEsO1J2s4go,763784
|
26
|
+
docs/tables/index.md,sha256=MVQpkhcWiFJwhMjfPouRVV0nZIUG-PNwFdspc-E8Xow,4428
|
27
|
+
docs/text-analysis/index.ipynb,sha256=iaup8pcQXGp0ZK3IWi-HHssQLdIzWYGYfvZK5i8yjjg,538024
|
28
|
+
docs/text-analysis/index.md,sha256=02pfZemOgV37izV7H-XzKmHu7AedDKLidQ-sKhYaMVw,3527
|
29
|
+
docs/text-extraction/index.ipynb,sha256=809y9ZamXT3bc3GhwwFyoDnlyEpO-kUZ3tIsZZWyrj8,2537087
|
30
|
+
docs/text-extraction/index.md,sha256=b1KfQpvIEelc8cPbFETUnK92az7iB4b7-LqK2DRH8vw,6985
|
31
|
+
docs/tutorials/01-loading-and-extraction.ipynb,sha256=tB1TLios1FaieMUE4RuY_H6fVYpmDwFiMMxW6sillbs,541071
|
32
|
+
docs/tutorials/01-loading-and-extraction.md,sha256=g40J8GhKz-ikM2URj5MqIatKKj4l5kTFozHeVjxDJQA,2191
|
33
|
+
docs/tutorials/02-finding-elements.ipynb,sha256=oEdkN20PXGM1oH9p0QnSsq8yjQJJ9SU9eQhKO_g4RVQ,524374
|
34
|
+
docs/tutorials/02-finding-elements.md,sha256=qOkjcWUzem05of54aKzKvy-MMzRX_S4CyZisVV-73QM,4162
|
35
|
+
docs/tutorials/03-extracting-blocks.ipynb,sha256=2e7fc9t_46x0DM5RLI9aUUfLeVzfFZzzzKphBA8G5lY,260729
|
36
|
+
docs/tutorials/03-extracting-blocks.md,sha256=_kqvhk6rSL7cGp2MSwTJk8LYlJGbK_r_umnCSBdR8XU,1665
|
37
|
+
docs/tutorials/04-table-extraction.ipynb,sha256=o0LdALyko01oHJbMuuqZkVnoF2pfFnMNgtg1IgpSnRI,3973
|
38
|
+
docs/tutorials/04-table-extraction.md,sha256=4q4v17VX8K-ZBtWYy0nbWPccyqB_ybd5Vl_IROmxz6Q,2130
|
39
|
+
docs/tutorials/05-excluding-content.ipynb,sha256=6ZLFm3L_Odr4NJD2iW6mL81y5e3xymQvEVyv-VY5O6U,336197
|
40
|
+
docs/tutorials/05-excluding-content.md,sha256=U52SPlc5knqxiyhRokmxrj06T54r2ENyTfP7BMGykhY,3907
|
41
|
+
docs/tutorials/06-document-qa.ipynb,sha256=BdFQNRqg6U4trACM8lmYSs9RYHJlxcd6DafXdw5mWR8,10303
|
42
|
+
docs/tutorials/06-document-qa.md,sha256=PzPPgw0Rkkfe6sfz3XyKD9S9JbQ40qf4bDzCBvwH1P0,3026
|
43
|
+
docs/tutorials/07-layout-analysis.ipynb,sha256=A5HMljUq7AaDSg_-vFywIQCyjKW2tjMmSPyPdaKFAE4,554523
|
44
|
+
docs/tutorials/07-layout-analysis.md,sha256=NAYVzJTecDnXjo_isbPCSUBSn3c-xM1tELct1Zn5GmI,2533
|
45
|
+
docs/tutorials/07-working-with-regions.ipynb,sha256=cRkr9VRho7J-dx9aIINO253Uz8io3PhD2mjNrASxql4,69510
|
46
|
+
docs/tutorials/07-working-with-regions.md,sha256=Hi18sZhiHV1NDYE-EQ82OPMwrz-j1Krjw_ipT9cTkSI,4379
|
47
|
+
docs/tutorials/08-spatial-navigation.ipynb,sha256=7HAAaK80R82Fy09heZ9WKwijY50DS89qGt_Xf2lB0Vo,193515
|
48
|
+
docs/tutorials/08-spatial-navigation.md,sha256=IMbOYBjayXKE7pHfBjApTxOoKRD8WYj7opf8fsJCtzA,4855
|
49
|
+
docs/tutorials/09-section-extraction.ipynb,sha256=JqkcPDXaifJSYJjbBB3LxB8XCMhbrWs-y5GcuOIvoNA,1100632
|
50
|
+
docs/tutorials/09-section-extraction.md,sha256=Jy_be8ftAl_VPBWl5nEv7_5sKSZPx22DLUcBVHMD3Nc,7832
|
51
|
+
docs/tutorials/10-form-field-extraction.ipynb,sha256=azOE7nDz-rYm-AqXF1NvO41CthR9DTwA_rbXHtobDZ4,280125
|
52
|
+
docs/tutorials/10-form-field-extraction.md,sha256=t9tPlW36vJEhDrKIsHGg_f3P_MK62DT4-ZK1thKFs4Y,5494
|
53
|
+
docs/tutorials/11-enhanced-table-processing.ipynb,sha256=GWH3xn2LTQztOTvkqjbqsUc6IbmuA0hJVFEPZ_O7Jew,1278
|
54
|
+
docs/tutorials/11-enhanced-table-processing.md,sha256=2HK-r1UwU7FLn7zWr_pMG7iLk-i0L4U4-t6ubOEeduc,282
|
55
|
+
docs/tutorials/12-ocr-integration.ipynb,sha256=HeahYziw6aEIzMdTCN8F3XPPBmyVmZ0NU11ZT9JiMy0,23897
|
56
|
+
docs/tutorials/12-ocr-integration.md,sha256=8FYgRciCkAPFF-tW1rkl5CrMGmvCR6oVWT1-f_tJ5as,4831
|
57
|
+
docs/tutorials/13-semantic-search.ipynb,sha256=LhqelW0jxcAW1hpvBrEcCeM6gb5AKD10PJ439ywlHrw,73920
|
58
|
+
docs/tutorials/13-semantic-search.md,sha256=nsNjv0ipYUC3YPSqT5d6dga9ZjObEc04Mc8c0-gsRnU,2914
|
59
|
+
docs/visual-debugging/index.ipynb,sha256=MJ92u3Q9sfRCyDAQM4KWmCrs4QhKwIagbn6ytPF83L4,2175800
|
60
|
+
docs/visual-debugging/index.md,sha256=ueGD2kNFhEAgIHt7qxCfrLRLjHcR7NTD3AU9okBhX9k,4176
|
61
|
+
docs/visual-debugging/region.png,sha256=ULAJs3ZTxMjpD9F4w1DKaZXmhxga3KRq3NrUsXgw28s,67835
|
62
|
+
natural_pdf/__init__.py,sha256=A3Bc-K2F_LtG08IjkJGngZraLsAT2FSm35Yic7i4Tuk,2913
|
63
|
+
natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
|
64
|
+
natural_pdf/analyzers/text_options.py,sha256=nE2E1pp4psDPpxmtarvNtEQsgozPkyFRjv0TVP2HTyU,2865
|
65
|
+
natural_pdf/analyzers/text_structure.py,sha256=9h8hKRz0JWnr13xQr3b4FFr_-hDIjue07WvG7LmT8nc,12827
|
66
|
+
natural_pdf/analyzers/utils.py,sha256=Lgub1kYSTOnNxeLO1klStHLwH-GIuT4vpdqyVRF-Mcg,2084
|
67
|
+
natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
|
68
|
+
natural_pdf/analyzers/layout/base.py,sha256=9dCR758mAuz7ExlHJ-gwnPnETaM4GZV3W1IRei_t13s,6815
|
69
|
+
natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
|
70
|
+
natural_pdf/analyzers/layout/layout_analyzer.py,sha256=6aed1qz5jpndOiakXCBRZAcnyG_waeXi3WPuP5fRvh4,14046
|
71
|
+
natural_pdf/analyzers/layout/layout_manager.py,sha256=kVBPQ8Ex33SYzzm1fhQOtP4qmHOc92dn4BEfff66Qx4,10053
|
72
|
+
natural_pdf/analyzers/layout/layout_options.py,sha256=1u8RVdiRwaq5hhGUpVLIdYXCH6TqEq0UxCPdm6JrdTI,3369
|
73
|
+
natural_pdf/analyzers/layout/paddle.py,sha256=gTI9ZqNd5-t4H5IByGfL32WgcE6JrdchW6jRiGI6ulM,13375
|
74
|
+
natural_pdf/analyzers/layout/surya.py,sha256=vhji6ynHPMyQLHuYRPQcplNi7m_lG4P4NYtWv6MzcME,13556
|
75
|
+
natural_pdf/analyzers/layout/tatr.py,sha256=-GJhMy4d0yx6egkO9-ULAIdQkkQRyAKExoIta-b256U,12971
|
76
|
+
natural_pdf/analyzers/layout/yolo.py,sha256=gy_1DY4sG7jU5rQ7Rb6FUGYI9FFMaozAWiWuxRH5yNw,8294
|
77
|
+
natural_pdf/collections/pdf_collection.py,sha256=E9GVEgGjTBGpNkuSO_f4GMrSB7Tmi60wnkD7pgvBVOM,12175
|
78
|
+
natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
|
79
|
+
natural_pdf/core/element_manager.py,sha256=R2vY7nYbqrtL1FLRbrJvpUyCpf97zUui-2CaEV2CFQc,21858
|
80
|
+
natural_pdf/core/highlighting_service.py,sha256=CTVd7y-fpIreFSe70cTpMu1Pwl6HKMtTHp0bh2U7VXk,32609
|
81
|
+
natural_pdf/core/page.py,sha256=CQy3zgHT6VBmo7n6cZ5RITSUURIEPzPsWqWQAUGFOZc,78302
|
82
|
+
natural_pdf/core/pdf.py,sha256=yaShN4vHJ1BXxWAj4lRk7udTcl6F9ddicBFCV6kd--w,41146
|
83
|
+
natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
|
84
|
+
natural_pdf/elements/base.py,sha256=vUga2Nm8DWoRfKMWVTt5N8UMh1q-YzUNbTfDouHzS2U,35698
|
85
|
+
natural_pdf/elements/collections.py,sha256=2kwOF_-5TePvLbZLVyeEZRt4Im3KlmX8j46giVdxcUE,66000
|
86
|
+
natural_pdf/elements/line.py,sha256=7cow3xMUKhAj7zoQz7OaB1eIH2_a8B__LB7iGJ4Mb0o,4612
|
87
|
+
natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
|
88
|
+
natural_pdf/elements/region.py,sha256=GOHnq4j4GL-UUQyLdnCLPb0YhBq_YrHn6anecX03t30,67714
|
89
|
+
natural_pdf/elements/text.py,sha256=Q4hKlXyGhz7njnr_-sON1p8Uqqc8qZBLAqu0VUkT-OE,10958
|
90
|
+
natural_pdf/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
|
+
natural_pdf/exporters/searchable_pdf.py,sha256=qsaPsnbOOaZHA_aplfZbwQnBoK9KghWm-wzbyRRomeY,16859
|
92
|
+
natural_pdf/ocr/__init__.py,sha256=8ytKCg4VmiX1LkehnCbBNnL-zLc95CmsLJKDfGFtklE,1916
|
93
|
+
natural_pdf/ocr/engine.py,sha256=D8B8QHF_8E68JaklyHoNUA3hGn5ld7TGKHD7Ho6mJMg,4316
|
94
|
+
natural_pdf/ocr/engine_easyocr.py,sha256=aE9alo9rKxZddQAt3liWvlGssH8b2jRtmLwpndttlRM,8617
|
95
|
+
natural_pdf/ocr/engine_paddle.py,sha256=4ELH9P9-FOTYj_CbUOKiW2gf5U9v2Tscx0fX0nW8j84,9248
|
96
|
+
natural_pdf/ocr/engine_surya.py,sha256=pzv4CKneXlRvCXsdImAEou81MOTwMYCNhNIVG6Fg3rU,7922
|
97
|
+
natural_pdf/ocr/ocr_manager.py,sha256=S2ndzKdB-nmK9glbSmn7srotIgKweCByeGIX6SOoEY4,10465
|
98
|
+
natural_pdf/ocr/ocr_options.py,sha256=JZXRxjsQuKf9GJMt56YikcOqsTQ7SvXOv2XZ7z1qnB8,3794
|
99
|
+
natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
|
100
|
+
natural_pdf/qa/document_qa.py,sha256=W4E4vS_Eox_IBsYpVb0ifQbJb0FP-PYEIG93CU3rUkE,15246
|
101
|
+
natural_pdf/search/__init__.py,sha256=EB_HRwlktJn5WGPVtSaRbOQNjLAZTxujeYf_eN-zd2U,4191
|
102
|
+
natural_pdf/search/haystack_search_service.py,sha256=6RjTFWbTo3gaO-90IF6PEuo_9WRwOdj232eWn3OT0BQ,29270
|
103
|
+
natural_pdf/search/haystack_utils.py,sha256=UI4eu3SVieGR_QnBtLhP8Fjtt2AJgeLgxrpa_dBmD6k,19289
|
104
|
+
natural_pdf/search/search_options.py,sha256=sq_e8_jSROicD94b_xtDtLnjEr_Zsy4icjzPkK0a8QA,3566
|
105
|
+
natural_pdf/search/search_service_protocol.py,sha256=ybNcF_NoLZuIx0rb4XB1dsDl3o_LAaWR1fVVKld2TxI,6818
|
106
|
+
natural_pdf/search/searchable_mixin.py,sha256=M2a6FaFVM0vcfh7FgjDH6BLhS-7ggeVpcfft4OOBDxY,26390
|
107
|
+
natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
|
108
|
+
natural_pdf/selectors/parser.py,sha256=59_GSsTApM6MFvtqhrrmbKaBfODPbGXMluvvQJcrqhE,15754
|
109
|
+
natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
|
110
|
+
natural_pdf/templates/ocr_debug.html,sha256=Zy9StzBeHFQU8ity6cjFSZLe3TY0QOabUux4c5WQUzs,19171
|
111
|
+
natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
|
112
|
+
natural_pdf/utils/highlighting.py,sha256=EIY6ihVGtUTS_DjWyxpnr_UXpcR4btC1KhSGQ9VUfKg,698
|
113
|
+
natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
|
114
|
+
natural_pdf/utils/text_extraction.py,sha256=VlbkXg14GlvwYTjRJWa8FVUigETY3Hq0v8NlIRnzYkM,8619
|
115
|
+
natural_pdf/utils/visualization.py,sha256=ir5PgpptRuVuVeRT9IcdTsNeEpdOYD_69rByjHQ7JhI,8592
|
116
|
+
natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
|
117
|
+
natural_pdf/widgets/viewer.py,sha256=Aiw6kuBc0WkhcZrPNKyLNzzWbmtmU6rvOmHV0IuXCBk,40862
|
118
|
+
natural_pdf/widgets/frontend/viewer.js,sha256=w8ywfz_IOAAv2nP_qaf2VBUkF1KhjT3zorhJxM1-CfU,4371
|
119
|
+
natural_pdf-0.1.5.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
|
120
|
+
notebooks/Examples.ipynb,sha256=l4YMtMEx_DWBzWIjl9CmBkWTo0g_nK8l_XWOyzYooQM,4275170
|
121
|
+
pdfs/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
122
|
+
pdfs/01-practice.pdf,sha256=dxWyJIa2cm7bALE3BWDJ2dg3inyFlo1n8ntVyy0hkTo,7906
|
123
|
+
pdfs/0500000US42001.pdf,sha256=VHn5gxlysgD6oJUCndeWXe_RwOrOraO1uIRt_fu8YNY,315883
|
124
|
+
pdfs/0500000US42007.pdf,sha256=pTfu_IVKvHpv9WUyN3QSiGevAOpPZmnr4QL1z-rYQ4E,1168633
|
125
|
+
pdfs/2014 Statistics.pdf,sha256=B-30OQVjqj_3718-G9cGUefNddnz-MosPdHAzfGfkcc,9559
|
126
|
+
pdfs/2019 Statistics.pdf,sha256=reuSJxvAlx9_P-pW7IPqzox0jFCxSPbK1i1-WFu-uGA,511439
|
127
|
+
pdfs/Atlanta_Public_Schools_GA_sample.pdf,sha256=PLBh_uWJQH0MnBaSm5ng5Ima63_m6Mi11CjdravB_S8,137689
|
128
|
+
pdfs/needs-ocr.pdf,sha256=vusKiLxSOlELUTetfZfaotNU54RtMj9PCzGfLc2cuNs,139305
|
129
|
+
tests/test_loading.py,sha256=AHjnIKqEAdtQa28kEAhFQTJ0Nnu49AmxnPM8YE8_EP0,1770
|
130
|
+
tests/test_optional_deps.py,sha256=e9H3ylLsB4cnyC3TVMgUbBMzmSbq6MlH8jn_pqh4Hus,12111
|
131
|
+
natural_pdf-0.1.5.dist-info/METADATA,sha256=7lBaQX1e1ibQibz2ZRYt0DsDy84k3sk6dGJqT9lQvWg,5466
|
132
|
+
natural_pdf-0.1.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
133
|
+
natural_pdf-0.1.5.dist-info/top_level.txt,sha256=N44f8aOLMpI6MzrNHsCD8MzElkir_H1nPUGZ4QToWqI,38
|
134
|
+
natural_pdf-0.1.5.dist-info/RECORD,,
|