natural-pdf 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. docs/finetuning/index.md +176 -0
  2. docs/ocr/index.md +34 -47
  3. docs/tutorials/01-loading-and-extraction.ipynb +34 -1536
  4. docs/tutorials/02-finding-elements.ipynb +42 -42
  5. docs/tutorials/03-extracting-blocks.ipynb +17 -17
  6. docs/tutorials/04-table-extraction.ipynb +12 -12
  7. docs/tutorials/05-excluding-content.ipynb +30 -30
  8. docs/tutorials/06-document-qa.ipynb +28 -28
  9. docs/tutorials/07-layout-analysis.ipynb +63 -35
  10. docs/tutorials/07-working-with-regions.ipynb +55 -51
  11. docs/tutorials/07-working-with-regions.md +2 -2
  12. docs/tutorials/08-spatial-navigation.ipynb +60 -60
  13. docs/tutorials/09-section-extraction.ipynb +113 -113
  14. docs/tutorials/10-form-field-extraction.ipynb +78 -50
  15. docs/tutorials/11-enhanced-table-processing.ipynb +6 -6
  16. docs/tutorials/12-ocr-integration.ipynb +149 -131
  17. docs/tutorials/12-ocr-integration.md +0 -13
  18. docs/tutorials/13-semantic-search.ipynb +313 -873
  19. natural_pdf/__init__.py +21 -22
  20. natural_pdf/analyzers/layout/gemini.py +280 -0
  21. natural_pdf/analyzers/layout/layout_manager.py +28 -1
  22. natural_pdf/analyzers/layout/layout_options.py +11 -0
  23. natural_pdf/analyzers/layout/yolo.py +6 -2
  24. natural_pdf/collections/pdf_collection.py +24 -0
  25. natural_pdf/core/element_manager.py +18 -13
  26. natural_pdf/core/page.py +174 -36
  27. natural_pdf/core/pdf.py +156 -42
  28. natural_pdf/elements/base.py +9 -17
  29. natural_pdf/elements/collections.py +99 -38
  30. natural_pdf/elements/region.py +77 -37
  31. natural_pdf/elements/text.py +5 -0
  32. natural_pdf/exporters/__init__.py +4 -0
  33. natural_pdf/exporters/base.py +61 -0
  34. natural_pdf/exporters/paddleocr.py +345 -0
  35. natural_pdf/ocr/__init__.py +57 -36
  36. natural_pdf/ocr/engine.py +160 -49
  37. natural_pdf/ocr/engine_easyocr.py +178 -157
  38. natural_pdf/ocr/engine_paddle.py +114 -189
  39. natural_pdf/ocr/engine_surya.py +87 -144
  40. natural_pdf/ocr/ocr_factory.py +125 -0
  41. natural_pdf/ocr/ocr_manager.py +65 -89
  42. natural_pdf/ocr/ocr_options.py +8 -13
  43. natural_pdf/ocr/utils.py +113 -0
  44. natural_pdf/templates/finetune/fine_tune_paddleocr.md +415 -0
  45. natural_pdf/templates/spa/css/style.css +334 -0
  46. natural_pdf/templates/spa/index.html +31 -0
  47. natural_pdf/templates/spa/js/app.js +472 -0
  48. natural_pdf/templates/spa/words.txt +235976 -0
  49. natural_pdf/utils/debug.py +34 -0
  50. natural_pdf/utils/identifiers.py +33 -0
  51. natural_pdf/utils/packaging.py +485 -0
  52. natural_pdf/utils/text_extraction.py +44 -64
  53. natural_pdf/utils/visualization.py +1 -1
  54. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.7.dist-info}/METADATA +44 -20
  55. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.7.dist-info}/RECORD +58 -47
  56. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.7.dist-info}/WHEEL +1 -1
  57. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.7.dist-info}/top_level.txt +0 -1
  58. natural_pdf/templates/ocr_debug.html +0 -517
  59. tests/test_loading.py +0 -50
  60. tests/test_optional_deps.py +0 -298
  61. {natural_pdf-0.1.5.dist-info → natural_pdf-0.1.7.dist-info}/licenses/LICENSE +0 -0
@@ -192,7 +192,7 @@ def merge_images_with_legend(
192
192
  if not legend:
193
193
  return image # Return original image if legend is None or empty
194
194
 
195
- bg_color = (255, 255, 255, 255) # Always use white for the merged background
195
+ bg_color = (255, 255, 255, 255) # Always use white for the merged background
196
196
 
197
197
  if position == "right":
198
198
  # Create a new image with extra width for the legend
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -16,29 +16,34 @@ Requires-Dist: Pillow
16
16
  Requires-Dist: colour
17
17
  Requires-Dist: numpy
18
18
  Requires-Dist: urllib3
19
- Requires-Dist: torch
20
- Requires-Dist: torchvision
21
- Requires-Dist: transformers
22
- Requires-Dist: huggingface_hub
23
- Requires-Dist: ocrmypdf
24
- Requires-Dist: pikepdf
19
+ Requires-Dist: tqdm
25
20
  Provides-Extra: interactive
26
21
  Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
27
22
  Provides-Extra: haystack
28
23
  Requires-Dist: haystack-ai; extra == "haystack"
29
24
  Requires-Dist: chroma-haystack; extra == "haystack"
30
25
  Requires-Dist: sentence-transformers; extra == "haystack"
31
- Requires-Dist: protobuf<4; extra == "haystack"
26
+ Requires-Dist: natural-pdf[core-ml]; extra == "haystack"
32
27
  Provides-Extra: easyocr
33
28
  Requires-Dist: easyocr; extra == "easyocr"
29
+ Requires-Dist: natural-pdf[core-ml]; extra == "easyocr"
34
30
  Provides-Extra: paddle
35
31
  Requires-Dist: paddlepaddle; extra == "paddle"
36
32
  Requires-Dist: paddleocr; extra == "paddle"
37
33
  Provides-Extra: layout-yolo
38
34
  Requires-Dist: doclayout_yolo; extra == "layout-yolo"
35
+ Requires-Dist: natural-pdf[core-ml]; extra == "layout-yolo"
39
36
  Provides-Extra: surya
40
37
  Requires-Dist: surya-ocr; extra == "surya"
38
+ Requires-Dist: natural-pdf[core-ml]; extra == "surya"
41
39
  Provides-Extra: qa
40
+ Requires-Dist: natural-pdf[core-ml]; extra == "qa"
41
+ Provides-Extra: docling
42
+ Requires-Dist: docling; extra == "docling"
43
+ Requires-Dist: natural-pdf[core-ml]; extra == "docling"
44
+ Provides-Extra: llm
45
+ Requires-Dist: openai>=1.0; extra == "llm"
46
+ Requires-Dist: pydantic; extra == "llm"
42
47
  Provides-Extra: test
43
48
  Requires-Dist: pytest; extra == "test"
44
49
  Provides-Extra: dev
@@ -50,18 +55,33 @@ Requires-Dist: nox; extra == "dev"
50
55
  Requires-Dist: nox-uv; extra == "dev"
51
56
  Requires-Dist: build; extra == "dev"
52
57
  Requires-Dist: uv; extra == "dev"
58
+ Requires-Dist: pipdeptree; extra == "dev"
59
+ Requires-Dist: nbformat; extra == "dev"
60
+ Requires-Dist: jupytext; extra == "dev"
61
+ Requires-Dist: nbclient; extra == "dev"
53
62
  Provides-Extra: all
54
- Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "all"
55
- Requires-Dist: easyocr; extra == "all"
56
- Requires-Dist: paddlepaddle; extra == "all"
57
- Requires-Dist: paddleocr; extra == "all"
58
- Requires-Dist: doclayout_yolo; extra == "all"
59
- Requires-Dist: surya-ocr; extra == "all"
60
- Requires-Dist: haystack-ai; extra == "all"
61
- Requires-Dist: chroma-haystack; extra == "all"
62
- Requires-Dist: sentence-transformers; extra == "all"
63
- Requires-Dist: protobuf<4; extra == "all"
64
- Requires-Dist: pytest; extra == "all"
63
+ Requires-Dist: natural-pdf[interactive]; extra == "all"
64
+ Requires-Dist: natural-pdf[haystack]; extra == "all"
65
+ Requires-Dist: natural-pdf[easyocr]; extra == "all"
66
+ Requires-Dist: natural-pdf[paddle]; extra == "all"
67
+ Requires-Dist: natural-pdf[layout_yolo]; extra == "all"
68
+ Requires-Dist: natural-pdf[surya]; extra == "all"
69
+ Requires-Dist: natural-pdf[qa]; extra == "all"
70
+ Requires-Dist: natural-pdf[ocr-export]; extra == "all"
71
+ Requires-Dist: natural-pdf[docling]; extra == "all"
72
+ Requires-Dist: natural-pdf[llm]; extra == "all"
73
+ Requires-Dist: natural-pdf[test]; extra == "all"
74
+ Provides-Extra: core-ml
75
+ Requires-Dist: torch; extra == "core-ml"
76
+ Requires-Dist: torchvision; extra == "core-ml"
77
+ Requires-Dist: transformers[sentencepiece]; extra == "core-ml"
78
+ Requires-Dist: huggingface_hub; extra == "core-ml"
79
+ Provides-Extra: ocr-export
80
+ Requires-Dist: ocrmypdf; extra == "ocr-export"
81
+ Requires-Dist: pikepdf; extra == "ocr-export"
82
+ Provides-Extra: export-extras
83
+ Requires-Dist: jupytext; extra == "export-extras"
84
+ Requires-Dist: nbformat; extra == "export-extras"
65
85
  Dynamic: license-file
66
86
 
67
87
  # Natural PDF
@@ -89,6 +109,10 @@ pip install natural-pdf[easyocr]
89
109
  pip install natural-pdf[surya]
90
110
  pip install natural-pdf[paddle]
91
111
 
112
+ # Example: Install support for features using Large Language Models (e.g., via OpenAI-compatible APIs)
113
+ pip install natural-pdf[llm]
114
+ # (May require setting API key environment variables, e.g., GOOGLE_API_KEY for Gemini)
115
+
92
116
  # Example: Install with interactive viewer support
93
117
  pip install natural-pdf[interactive]
94
118
 
@@ -141,7 +165,7 @@ Natural PDF offers a range of features for working with PDFs:
141
165
  * **Spatial Navigation:** Select content relative to other elements (`heading.below()`, `element.select_until(...)`).
142
166
  * **Text & Table Extraction:** Get clean text or structured table data, automatically handling exclusions.
143
167
  * **OCR Integration:** Extract text from scanned documents using engines like EasyOCR, PaddleOCR, or Surya.
144
- * **Layout Analysis:** Detect document structures (titles, paragraphs, tables) using AI models.
168
+ * **Layout Analysis:** Detect document structures (titles, paragraphs, tables) using various engines (e.g., YOLO, Paddle, LLM via API).
145
169
  * **Document QA:** Ask natural language questions about your document's content.
146
170
  * **Semantic Search:** Index PDFs and find relevant pages or documents based on semantic meaning using Haystack.
147
171
  * **Visual Debugging:** Highlight elements and use an interactive viewer or save images to understand your selections.
@@ -12,12 +12,13 @@ docs/document-qa/index.ipynb,sha256=MXJoFhi8TUKK6ZnRFiUBglLGpMbzwdb7LJYfzw8Gp48,
12
12
  docs/document-qa/index.md,sha256=mwuO4tothg0OzBXewnj73QEJu46Udq7f1pQBYrKOHwM,2131
13
13
  docs/element-selection/index.ipynb,sha256=-7PwKw1RbPlZ4stzN1Rd1GJ8mwjOD4ySsLcpqVX7chc,1193628
14
14
  docs/element-selection/index.md,sha256=_1P8vI64Y0aSVwUzdRJD4ayb80BJWBLED9TvVpveFx8,6979
15
+ docs/finetuning/index.md,sha256=Ur3zqSaR0X8PvBCSyI7cFiDv5qZ6Jtv4omBKXCKAzEk,9200
15
16
  docs/installation/index.md,sha256=nd4RZrQFR8_vv7Xm3xAzp7z-CQQr9ffAcGa7yuEYn2U,1594
16
17
  docs/interactive-widget/index.ipynb,sha256=zY1rz5N34OUW-OtgcbI6iiOjlIJqXjVcx9OoNWMjuyU,293111
17
18
  docs/interactive-widget/index.md,sha256=tZbq0uYI7Zwo9mLbhXpqeBriuAjazkIyEJeP-jasJ-Q,259
18
19
  docs/layout-analysis/index.ipynb,sha256=dkS_-cu-KGir5G2LGRcxBThKnW0dfA5nPPnwpoYGFtU,1869093
19
20
  docs/layout-analysis/index.md,sha256=ZnH5yd7B_eOLgGxW_4rNlzQs4Tn3Xx1cK3jX43CSpSM,5390
20
- docs/ocr/index.md,sha256=e5E9wqY6ehX7sJtrrie6gr6HtQb2neiYEoDKu4Qcjs4,8931
21
+ docs/ocr/index.md,sha256=uuzTqcAgUmMN7jZVq8VkVcbRDHn8Yg2nJVvHJ-bDK-Y,8177
21
22
  docs/pdf-navigation/index.ipynb,sha256=h6yew0HePXK1_c5FmETqzjBQceUBT0MU-vnXx_y91mo,8018
22
23
  docs/pdf-navigation/index.md,sha256=P3b3tsmOcmRtnfRxpsMeTgwm7vApnH_4le_QIwJd51M,2391
23
24
  docs/regions/index.ipynb,sha256=5A-N5A4v4lcXNptOAeI4i7i9Gx66To-Yus8B816dHBk,1303347
@@ -28,38 +29,38 @@ docs/text-analysis/index.ipynb,sha256=iaup8pcQXGp0ZK3IWi-HHssQLdIzWYGYfvZK5i8yjj
28
29
  docs/text-analysis/index.md,sha256=02pfZemOgV37izV7H-XzKmHu7AedDKLidQ-sKhYaMVw,3527
29
30
  docs/text-extraction/index.ipynb,sha256=809y9ZamXT3bc3GhwwFyoDnlyEpO-kUZ3tIsZZWyrj8,2537087
30
31
  docs/text-extraction/index.md,sha256=b1KfQpvIEelc8cPbFETUnK92az7iB4b7-LqK2DRH8vw,6985
31
- docs/tutorials/01-loading-and-extraction.ipynb,sha256=tB1TLios1FaieMUE4RuY_H6fVYpmDwFiMMxW6sillbs,541071
32
+ docs/tutorials/01-loading-and-extraction.ipynb,sha256=SCW26hxW9PhOspiR-2X5CD6L1EiJRfXouO-OF_Nc718,4548
32
33
  docs/tutorials/01-loading-and-extraction.md,sha256=g40J8GhKz-ikM2URj5MqIatKKj4l5kTFozHeVjxDJQA,2191
33
- docs/tutorials/02-finding-elements.ipynb,sha256=oEdkN20PXGM1oH9p0QnSsq8yjQJJ9SU9eQhKO_g4RVQ,524374
34
+ docs/tutorials/02-finding-elements.ipynb,sha256=k1CSz47_atA9D6DXfQzVS64t5-L-KjssU2VuFvdy7oU,524374
34
35
  docs/tutorials/02-finding-elements.md,sha256=qOkjcWUzem05of54aKzKvy-MMzRX_S4CyZisVV-73QM,4162
35
- docs/tutorials/03-extracting-blocks.ipynb,sha256=2e7fc9t_46x0DM5RLI9aUUfLeVzfFZzzzKphBA8G5lY,260729
36
+ docs/tutorials/03-extracting-blocks.ipynb,sha256=1UjdP0j3kPCE3aU8p1jBCBqflG-xRLli2Ltx80DhOVk,260729
36
37
  docs/tutorials/03-extracting-blocks.md,sha256=_kqvhk6rSL7cGp2MSwTJk8LYlJGbK_r_umnCSBdR8XU,1665
37
- docs/tutorials/04-table-extraction.ipynb,sha256=o0LdALyko01oHJbMuuqZkVnoF2pfFnMNgtg1IgpSnRI,3973
38
+ docs/tutorials/04-table-extraction.ipynb,sha256=u92Wppw1qHG__Mx3ZKtETm4AWuGF8X-Ln3kvmF8zCSo,3973
38
39
  docs/tutorials/04-table-extraction.md,sha256=4q4v17VX8K-ZBtWYy0nbWPccyqB_ybd5Vl_IROmxz6Q,2130
39
- docs/tutorials/05-excluding-content.ipynb,sha256=6ZLFm3L_Odr4NJD2iW6mL81y5e3xymQvEVyv-VY5O6U,336197
40
+ docs/tutorials/05-excluding-content.ipynb,sha256=oSg8ll_nuWOfQHGLp0fNKVeyYyn_L8a-F7HJADjjdq8,336857
40
41
  docs/tutorials/05-excluding-content.md,sha256=U52SPlc5knqxiyhRokmxrj06T54r2ENyTfP7BMGykhY,3907
41
- docs/tutorials/06-document-qa.ipynb,sha256=BdFQNRqg6U4trACM8lmYSs9RYHJlxcd6DafXdw5mWR8,10303
42
+ docs/tutorials/06-document-qa.ipynb,sha256=Facyqns8jw2bTvsOSbNnsLskFH8kg1JTz4kmJ16dpcE,10303
42
43
  docs/tutorials/06-document-qa.md,sha256=PzPPgw0Rkkfe6sfz3XyKD9S9JbQ40qf4bDzCBvwH1P0,3026
43
- docs/tutorials/07-layout-analysis.ipynb,sha256=A5HMljUq7AaDSg_-vFywIQCyjKW2tjMmSPyPdaKFAE4,554523
44
+ docs/tutorials/07-layout-analysis.ipynb,sha256=tdNnMro1V66YPx0h96HZnujSm-zDpy7o78euQix4lyU,559517
44
45
  docs/tutorials/07-layout-analysis.md,sha256=NAYVzJTecDnXjo_isbPCSUBSn3c-xM1tELct1Zn5GmI,2533
45
- docs/tutorials/07-working-with-regions.ipynb,sha256=cRkr9VRho7J-dx9aIINO253Uz8io3PhD2mjNrASxql4,69510
46
- docs/tutorials/07-working-with-regions.md,sha256=Hi18sZhiHV1NDYE-EQ82OPMwrz-j1Krjw_ipT9cTkSI,4379
47
- docs/tutorials/08-spatial-navigation.ipynb,sha256=7HAAaK80R82Fy09heZ9WKwijY50DS89qGt_Xf2lB0Vo,193515
46
+ docs/tutorials/07-working-with-regions.ipynb,sha256=s4BFKKbKUemmURCpg6j91rNI8eFFOJUgxY4QN4alK4I,69584
47
+ docs/tutorials/07-working-with-regions.md,sha256=oanbTFSQ-topAVd9kjfkaPiMjHcx6Y8cqyxVbmxLhgs,4365
48
+ docs/tutorials/08-spatial-navigation.ipynb,sha256=jfwF6OHLvrMvaaknp-9AfUvr-pPXjPljUyGnFKF9wsw,194523
48
49
  docs/tutorials/08-spatial-navigation.md,sha256=IMbOYBjayXKE7pHfBjApTxOoKRD8WYj7opf8fsJCtzA,4855
49
- docs/tutorials/09-section-extraction.ipynb,sha256=JqkcPDXaifJSYJjbBB3LxB8XCMhbrWs-y5GcuOIvoNA,1100632
50
+ docs/tutorials/09-section-extraction.ipynb,sha256=Aqcy08oXTJ1pkJCmVVumndje-4WXnbkl_QfJPhps7f8,1100736
50
51
  docs/tutorials/09-section-extraction.md,sha256=Jy_be8ftAl_VPBWl5nEv7_5sKSZPx22DLUcBVHMD3Nc,7832
51
- docs/tutorials/10-form-field-extraction.ipynb,sha256=azOE7nDz-rYm-AqXF1NvO41CthR9DTwA_rbXHtobDZ4,280125
52
+ docs/tutorials/10-form-field-extraction.ipynb,sha256=yyopvBoS5vkKKtUQ6rZ4Kyo5E0Olp2WYnmunhfzSQkQ,281491
52
53
  docs/tutorials/10-form-field-extraction.md,sha256=t9tPlW36vJEhDrKIsHGg_f3P_MK62DT4-ZK1thKFs4Y,5494
53
- docs/tutorials/11-enhanced-table-processing.ipynb,sha256=GWH3xn2LTQztOTvkqjbqsUc6IbmuA0hJVFEPZ_O7Jew,1278
54
+ docs/tutorials/11-enhanced-table-processing.ipynb,sha256=BWpVUhtjaAX7r4OOdiy5gQgrSqREaoB0L5TuHqoHEn8,1278
54
55
  docs/tutorials/11-enhanced-table-processing.md,sha256=2HK-r1UwU7FLn7zWr_pMG7iLk-i0L4U4-t6ubOEeduc,282
55
- docs/tutorials/12-ocr-integration.ipynb,sha256=HeahYziw6aEIzMdTCN8F3XPPBmyVmZ0NU11ZT9JiMy0,23897
56
- docs/tutorials/12-ocr-integration.md,sha256=8FYgRciCkAPFF-tW1rkl5CrMGmvCR6oVWT1-f_tJ5as,4831
57
- docs/tutorials/13-semantic-search.ipynb,sha256=LhqelW0jxcAW1hpvBrEcCeM6gb5AKD10PJ439ywlHrw,73920
56
+ docs/tutorials/12-ocr-integration.ipynb,sha256=xurkoPwgk2p6mhmPdCehy9ccuYHrAhBCb1zGnjRbZ7Y,26724
57
+ docs/tutorials/12-ocr-integration.md,sha256=wU90sfnm1R6BoMFq-orbGpl8OUVcm-wEBTlK0bLgJC4,4572
58
+ docs/tutorials/13-semantic-search.ipynb,sha256=5h806AIal3EwXPVuXJESbXwdUImCx7fo0mo5-f3Dj44,42817
58
59
  docs/tutorials/13-semantic-search.md,sha256=nsNjv0ipYUC3YPSqT5d6dga9ZjObEc04Mc8c0-gsRnU,2914
59
60
  docs/visual-debugging/index.ipynb,sha256=MJ92u3Q9sfRCyDAQM4KWmCrs4QhKwIagbn6ytPF83L4,2175800
60
61
  docs/visual-debugging/index.md,sha256=ueGD2kNFhEAgIHt7qxCfrLRLjHcR7NTD3AU9okBhX9k,4176
61
62
  docs/visual-debugging/region.png,sha256=ULAJs3ZTxMjpD9F4w1DKaZXmhxga3KRq3NrUsXgw28s,67835
62
- natural_pdf/__init__.py,sha256=A3Bc-K2F_LtG08IjkJGngZraLsAT2FSm35Yic7i4Tuk,2913
63
+ natural_pdf/__init__.py,sha256=UdS-I3d7MzSvpxL-QMQUSUO5IGhh8c5of34BIs49TaU,2670
63
64
  natural_pdf/analyzers/__init__.py,sha256=dIXjsMqoxKmd9OOnSBzn12wvdIz7D7YNQRAnXslpJSM,142
64
65
  natural_pdf/analyzers/text_options.py,sha256=nE2E1pp4psDPpxmtarvNtEQsgozPkyFRjv0TVP2HTyU,2865
65
66
  natural_pdf/analyzers/text_structure.py,sha256=9h8hKRz0JWnr13xQr3b4FFr_-hDIjue07WvG7LmT8nc,12827
@@ -67,35 +68,40 @@ natural_pdf/analyzers/utils.py,sha256=Lgub1kYSTOnNxeLO1klStHLwH-GIuT4vpdqyVRF-Mc
67
68
  natural_pdf/analyzers/layout/__init__.py,sha256=oq1uJ5UkGGMbBKGirV1aRKK3hxAUyjTLywYkPCQH1f0,33
68
69
  natural_pdf/analyzers/layout/base.py,sha256=9dCR758mAuz7ExlHJ-gwnPnETaM4GZV3W1IRei_t13s,6815
69
70
  natural_pdf/analyzers/layout/docling.py,sha256=4BJYyNVR6VegZGxyisvNIBBRvVk6YKPyDVs7ZdVfzEU,12676
71
+ natural_pdf/analyzers/layout/gemini.py,sha256=CzJPWyyEghuCNpu2CMb6OA6FtBGdGhXspHjsjy6I4JE,11195
70
72
  natural_pdf/analyzers/layout/layout_analyzer.py,sha256=6aed1qz5jpndOiakXCBRZAcnyG_waeXi3WPuP5fRvh4,14046
71
- natural_pdf/analyzers/layout/layout_manager.py,sha256=kVBPQ8Ex33SYzzm1fhQOtP4qmHOc92dn4BEfff66Qx4,10053
72
- natural_pdf/analyzers/layout/layout_options.py,sha256=1u8RVdiRwaq5hhGUpVLIdYXCH6TqEq0UxCPdm6JrdTI,3369
73
+ natural_pdf/analyzers/layout/layout_manager.py,sha256=Vh8EKiszKqjELofxQ1eiVLKVjibyjBsZpLFzTf0_21E,11179
74
+ natural_pdf/analyzers/layout/layout_options.py,sha256=s7xr4brE3OutE6aYNAi2PniRy1p2w8a342C2xGpvX2s,3777
73
75
  natural_pdf/analyzers/layout/paddle.py,sha256=gTI9ZqNd5-t4H5IByGfL32WgcE6JrdchW6jRiGI6ulM,13375
74
76
  natural_pdf/analyzers/layout/surya.py,sha256=vhji6ynHPMyQLHuYRPQcplNi7m_lG4P4NYtWv6MzcME,13556
75
77
  natural_pdf/analyzers/layout/tatr.py,sha256=-GJhMy4d0yx6egkO9-ULAIdQkkQRyAKExoIta-b256U,12971
76
- natural_pdf/analyzers/layout/yolo.py,sha256=gy_1DY4sG7jU5rQ7Rb6FUGYI9FFMaozAWiWuxRH5yNw,8294
77
- natural_pdf/collections/pdf_collection.py,sha256=E9GVEgGjTBGpNkuSO_f4GMrSB7Tmi60wnkD7pgvBVOM,12175
78
+ natural_pdf/analyzers/layout/yolo.py,sha256=ANo2U4EZgeN2eYKM1bZIuysiuJLgwl4JeQchrRxOKwA,8388
79
+ natural_pdf/collections/pdf_collection.py,sha256=afE0tNIfwA7IRCc8g0EGgiBgJz3TuJbEzZ5meDNAnQw,13272
78
80
  natural_pdf/core/__init__.py,sha256=QC8H4M3KbXwMFiQORZ0pdPlzx1Ix6oKKQSS7Ib2KEaA,38
79
- natural_pdf/core/element_manager.py,sha256=R2vY7nYbqrtL1FLRbrJvpUyCpf97zUui-2CaEV2CFQc,21858
81
+ natural_pdf/core/element_manager.py,sha256=RjLCzeHDRJCoCx1W_6jGg8KsiCTuXz7Uc2BoSY4M7mE,22144
80
82
  natural_pdf/core/highlighting_service.py,sha256=CTVd7y-fpIreFSe70cTpMu1Pwl6HKMtTHp0bh2U7VXk,32609
81
- natural_pdf/core/page.py,sha256=CQy3zgHT6VBmo7n6cZ5RITSUURIEPzPsWqWQAUGFOZc,78302
82
- natural_pdf/core/pdf.py,sha256=yaShN4vHJ1BXxWAj4lRk7udTcl6F9ddicBFCV6kd--w,41146
83
+ natural_pdf/core/page.py,sha256=emS6jJdb-J7xnK8Uo8Hs1n0plbIAGA_YH6kmp36wVgM,84955
84
+ natural_pdf/core/pdf.py,sha256=hOR1i3bJjfJCBCI2m4pBNAMEYpmbtG905QbFe-l8gZU,46525
83
85
  natural_pdf/elements/__init__.py,sha256=S8XeiNWJ1WcgnyYKdYV1yxQlAxCCO3FfITT8MQwNbyk,41
84
- natural_pdf/elements/base.py,sha256=vUga2Nm8DWoRfKMWVTt5N8UMh1q-YzUNbTfDouHzS2U,35698
85
- natural_pdf/elements/collections.py,sha256=2kwOF_-5TePvLbZLVyeEZRt4Im3KlmX8j46giVdxcUE,66000
86
+ natural_pdf/elements/base.py,sha256=UtoSD-c_s0yiLpWZrIIJjeJ9MgGz_4R0UHYcsFWH6bc,35157
87
+ natural_pdf/elements/collections.py,sha256=w0JqLwn57Je00Aq4Ay8SeYmxPjPJvUOtkLbgfGM2-nM,68882
86
88
  natural_pdf/elements/line.py,sha256=7cow3xMUKhAj7zoQz7OaB1eIH2_a8B__LB7iGJ4Mb0o,4612
87
89
  natural_pdf/elements/rect.py,sha256=kiVa3e377ZnqIOXc89d9ZSY4EcmDxtccdtUw-HOQzpw,3796
88
- natural_pdf/elements/region.py,sha256=GOHnq4j4GL-UUQyLdnCLPb0YhBq_YrHn6anecX03t30,67714
89
- natural_pdf/elements/text.py,sha256=Q4hKlXyGhz7njnr_-sON1p8Uqqc8qZBLAqu0VUkT-OE,10958
90
- natural_pdf/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
+ natural_pdf/elements/region.py,sha256=9E21LYQWB98coi_73Kpf9mQ60p9ElzGOJzxdtgOUfh4,69662
91
+ natural_pdf/elements/text.py,sha256=8PNKSLUgXUhEu9IFfbNbSSpuu0Slm11T6UH8jn4O6hQ,11078
92
+ natural_pdf/exporters/__init__.py,sha256=7MnvRLLQdwtg-ULu-8uK8C84GsKiJamyhRw_GgWhw7k,151
93
+ natural_pdf/exporters/base.py,sha256=s-NpHoH81x80GQxs0oqjdhPGrzbUa8npjnK8apKOsHQ,2115
94
+ natural_pdf/exporters/paddleocr.py,sha256=1G2bS2-CcuAtS78JZYRczO3r5k8fdO9jrExH0Kr9r7M,16249
91
95
  natural_pdf/exporters/searchable_pdf.py,sha256=qsaPsnbOOaZHA_aplfZbwQnBoK9KghWm-wzbyRRomeY,16859
92
- natural_pdf/ocr/__init__.py,sha256=8ytKCg4VmiX1LkehnCbBNnL-zLc95CmsLJKDfGFtklE,1916
93
- natural_pdf/ocr/engine.py,sha256=D8B8QHF_8E68JaklyHoNUA3hGn5ld7TGKHD7Ho6mJMg,4316
94
- natural_pdf/ocr/engine_easyocr.py,sha256=aE9alo9rKxZddQAt3liWvlGssH8b2jRtmLwpndttlRM,8617
95
- natural_pdf/ocr/engine_paddle.py,sha256=4ELH9P9-FOTYj_CbUOKiW2gf5U9v2Tscx0fX0nW8j84,9248
96
- natural_pdf/ocr/engine_surya.py,sha256=pzv4CKneXlRvCXsdImAEou81MOTwMYCNhNIVG6Fg3rU,7922
97
- natural_pdf/ocr/ocr_manager.py,sha256=S2ndzKdB-nmK9glbSmn7srotIgKweCByeGIX6SOoEY4,10465
98
- natural_pdf/ocr/ocr_options.py,sha256=JZXRxjsQuKf9GJMt56YikcOqsTQ7SvXOv2XZ7z1qnB8,3794
96
+ natural_pdf/ocr/__init__.py,sha256=jKaDbo13CdCDcas1WiBmg5gjBvVeG-Z9uaeYxyzvaNY,2464
97
+ natural_pdf/ocr/engine.py,sha256=ZBC1tZNM5EDbGDJJmZI9mNHr4nCMLEZvUFhiJq8GdF4,8741
98
+ natural_pdf/ocr/engine_easyocr.py,sha256=rnDXLNa-keymonR3qbLEbbxA6bqk4QUAVCHKUDixqKg,9045
99
+ natural_pdf/ocr/engine_paddle.py,sha256=2nIrvLBBAiZG1BxVo3eFVJulA6YGoOTXw_RN98p_BUk,6184
100
+ natural_pdf/ocr/engine_surya.py,sha256=iySjG-Dahgh0cLICfbMtOcwUpRFcZjo-5Ed5Zwz-o5Y,4805
101
+ natural_pdf/ocr/ocr_factory.py,sha256=IFccj0BB75YGV4hjcy4ECtGQX_JQzdptpvDFfeGxxgI,4391
102
+ natural_pdf/ocr/ocr_manager.py,sha256=PqF1z1ET8emSw19r7jtEkC9_LZJXY7C5zK5cFklo57I,9238
103
+ natural_pdf/ocr/ocr_options.py,sha256=MIH7cOe8esuiGcVe4AtArSeQdaIpUu9RaUZbuwwvKQw,3294
104
+ natural_pdf/ocr/utils.py,sha256=kdO4sCBqCb5qB-9iPqdPN8_5t1jWwijpT-ci5UHnz6A,3867
99
105
  natural_pdf/qa/__init__.py,sha256=Pjo62JTnUNEjGNsC437mvsS5KQ5m7X_BibGvavR9AW0,108
100
106
  natural_pdf/qa/document_qa.py,sha256=W4E4vS_Eox_IBsYpVb0ifQbJb0FP-PYEIG93CU3rUkE,15246
101
107
  natural_pdf/search/__init__.py,sha256=EB_HRwlktJn5WGPVtSaRbOQNjLAZTxujeYf_eN-zd2U,4191
@@ -107,16 +113,23 @@ natural_pdf/search/searchable_mixin.py,sha256=M2a6FaFVM0vcfh7FgjDH6BLhS-7ggeVpcf
107
113
  natural_pdf/selectors/__init__.py,sha256=oZGeqSv53EqmIZOhcnawuaGGlRg1h79vArXuZCWKm4A,123
108
114
  natural_pdf/selectors/parser.py,sha256=59_GSsTApM6MFvtqhrrmbKaBfODPbGXMluvvQJcrqhE,15754
109
115
  natural_pdf/templates/__init__.py,sha256=jYBxzfi73vew0f6yhIh1MlRxw4F_TVN2hKQR0YXOFe0,20
110
- natural_pdf/templates/ocr_debug.html,sha256=Zy9StzBeHFQU8ity6cjFSZLe3TY0QOabUux4c5WQUzs,19171
116
+ natural_pdf/templates/finetune/fine_tune_paddleocr.md,sha256=AGt6kQWSTJZ8F28iN1D4p_Q6f1bvFML9gyUk6QcSHDc,14517
117
+ natural_pdf/templates/spa/index.html,sha256=6hLTp07OeV5Q4jUMp5Sgl-dwfBs3oPzBxqphG4kEs24,787
118
+ natural_pdf/templates/spa/words.txt,sha256=vkGtl5Y7-Nq-3Vhx1daRWWF1Jp1UCVaw-ZZaiFwrurk,2493885
119
+ natural_pdf/templates/spa/css/style.css,sha256=Qdl0U3L5HMyhBDNzyRPklfb3OxW6rMxCfQbzO8i8IW4,7643
120
+ natural_pdf/templates/spa/js/app.js,sha256=Efb7NmcTN9RLdLwKpDcU6CG5Ix0laHtzRHmfUlDMJXw,19679
111
121
  natural_pdf/utils/__init__.py,sha256=s3M8FggaK1P3EBYn6R_-HgSDjNc9C73gyKe1hihtNWg,43
122
+ natural_pdf/utils/debug.py,sha256=lk_6qzxan8NagjEtJEZpZ2MS30SO8ce6iznBxmA0xgk,995
112
123
  natural_pdf/utils/highlighting.py,sha256=EIY6ihVGtUTS_DjWyxpnr_UXpcR4btC1KhSGQ9VUfKg,698
124
+ natural_pdf/utils/identifiers.py,sha256=n61viCQiMlf5-E_jsPLe-FkPBdKkMKv-gfs5tGqlKiw,1117
125
+ natural_pdf/utils/packaging.py,sha256=HSgpubpHICU75L4ZAZPU8iOjium055XWnklV9_YqoCA,21579
113
126
  natural_pdf/utils/reading_order.py,sha256=s3DsYq_3g_1YA07qhd4BGEjeIRTeyGtnwc_hNtSzwBY,7290
114
- natural_pdf/utils/text_extraction.py,sha256=VlbkXg14GlvwYTjRJWa8FVUigETY3Hq0v8NlIRnzYkM,8619
115
- natural_pdf/utils/visualization.py,sha256=ir5PgpptRuVuVeRT9IcdTsNeEpdOYD_69rByjHQ7JhI,8592
127
+ natural_pdf/utils/text_extraction.py,sha256=ujhqU2C9y2YwzGDBfT9oiGPUvSz6mVqq72ttd3Ksskg,7712
128
+ natural_pdf/utils/visualization.py,sha256=5GbhxtvZW-77ONVnICupg-s2D-OaxLZNqkKlOrQESK4,8593
116
129
  natural_pdf/widgets/__init__.py,sha256=O2fSDo604wDAP6UwUkmBq3eT91RSqHwBpAOQXq92S8s,214
117
130
  natural_pdf/widgets/viewer.py,sha256=Aiw6kuBc0WkhcZrPNKyLNzzWbmtmU6rvOmHV0IuXCBk,40862
118
131
  natural_pdf/widgets/frontend/viewer.js,sha256=w8ywfz_IOAAv2nP_qaf2VBUkF1KhjT3zorhJxM1-CfU,4371
119
- natural_pdf-0.1.5.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
132
+ natural_pdf-0.1.7.dist-info/licenses/LICENSE,sha256=9zfwINwJlarbDmdh6iJV4QUG54QSJlSAUcnC1YiC_Ns,1074
120
133
  notebooks/Examples.ipynb,sha256=l4YMtMEx_DWBzWIjl9CmBkWTo0g_nK8l_XWOyzYooQM,4275170
121
134
  pdfs/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
122
135
  pdfs/01-practice.pdf,sha256=dxWyJIa2cm7bALE3BWDJ2dg3inyFlo1n8ntVyy0hkTo,7906
@@ -126,9 +139,7 @@ pdfs/2014 Statistics.pdf,sha256=B-30OQVjqj_3718-G9cGUefNddnz-MosPdHAzfGfkcc,9559
126
139
  pdfs/2019 Statistics.pdf,sha256=reuSJxvAlx9_P-pW7IPqzox0jFCxSPbK1i1-WFu-uGA,511439
127
140
  pdfs/Atlanta_Public_Schools_GA_sample.pdf,sha256=PLBh_uWJQH0MnBaSm5ng5Ima63_m6Mi11CjdravB_S8,137689
128
141
  pdfs/needs-ocr.pdf,sha256=vusKiLxSOlELUTetfZfaotNU54RtMj9PCzGfLc2cuNs,139305
129
- tests/test_loading.py,sha256=AHjnIKqEAdtQa28kEAhFQTJ0Nnu49AmxnPM8YE8_EP0,1770
130
- tests/test_optional_deps.py,sha256=e9H3ylLsB4cnyC3TVMgUbBMzmSbq6MlH8jn_pqh4Hus,12111
131
- natural_pdf-0.1.5.dist-info/METADATA,sha256=7lBaQX1e1ibQibz2ZRYt0DsDy84k3sk6dGJqT9lQvWg,5466
132
- natural_pdf-0.1.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
133
- natural_pdf-0.1.5.dist-info/top_level.txt,sha256=N44f8aOLMpI6MzrNHsCD8MzElkir_H1nPUGZ4QToWqI,38
134
- natural_pdf-0.1.5.dist-info/RECORD,,
142
+ natural_pdf-0.1.7.dist-info/METADATA,sha256=BMzSroqVMlbJrti_56ilNFZkSEH2-hJc8vUVrjk3OZU,6766
143
+ natural_pdf-0.1.7.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
144
+ natural_pdf-0.1.7.dist-info/top_level.txt,sha256=7nDKUnpkN7B8cBI7DEpW5JM8S7OcOgHw3jXH-1iCX2o,32
145
+ natural_pdf-0.1.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -2,4 +2,3 @@ docs
2
2
  natural_pdf
3
3
  notebooks
4
4
  pdfs
5
- tests