natural-pdf 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/.gitignore +5 -1
  2. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/PKG-INFO +14 -1
  3. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/README.md +4 -0
  4. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/regions/index.ipynb +124 -158
  5. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/regions/index.md +2 -3
  6. natural_pdf-0.1.3/docs/tutorials/01-loading-and-extraction.ipynb +1658 -0
  7. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/02-finding-elements.ipynb +43 -47
  8. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/03-extracting-blocks.ipynb +18 -22
  9. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/04-table-extraction.ipynb +13 -17
  10. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/05-excluding-content.ipynb +66 -39
  11. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/06-document-qa.ipynb +29 -33
  12. natural_pdf-0.1.3/docs/tutorials/07-layout-analysis.ipynb +260 -0
  13. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/07-working-with-regions.ipynb +49 -53
  14. natural_pdf-0.1.3/docs/tutorials/08-spatial-navigation.ipynb +508 -0
  15. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/09-section-extraction.ipynb +98 -102
  16. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/10-form-field-extraction.ipynb +51 -55
  17. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/11-enhanced-table-processing.ipynb +7 -11
  18. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/12-ocr-integration.ipynb +173 -65
  19. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/12-ocr-integration.md +32 -0
  20. natural_pdf-0.1.3/docs/tutorials/13-semantic-search.ipynb +1908 -0
  21. natural_pdf-0.1.3/docs/tutorials/13-semantic-search.md +77 -0
  22. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/mkdocs.yml +2 -0
  23. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/__init__.py +33 -1
  24. natural_pdf-0.1.3/natural_pdf/collections/pdf_collection.py +259 -0
  25. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/page.py +97 -69
  26. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/pdf.py +382 -171
  27. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/region.py +3 -1
  28. natural_pdf-0.1.3/natural_pdf/exporters/__init__.py +1 -0
  29. natural_pdf-0.1.3/natural_pdf/exporters/searchable_pdf.py +252 -0
  30. natural_pdf-0.1.3/natural_pdf/search/__init__.py +94 -0
  31. natural_pdf-0.1.3/natural_pdf/search/haystack_search_service.py +520 -0
  32. natural_pdf-0.1.3/natural_pdf/search/haystack_utils.py +386 -0
  33. natural_pdf-0.1.3/natural_pdf/search/search_options.py +72 -0
  34. natural_pdf-0.1.3/natural_pdf/search/search_service_protocol.py +189 -0
  35. natural_pdf-0.1.3/natural_pdf/search/searchable_mixin.py +464 -0
  36. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/PKG-INFO +14 -1
  37. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/SOURCES.txt +11 -0
  38. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/requires.txt +10 -0
  39. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pyproject.toml +12 -2
  40. natural_pdf-0.1.2/docs/tutorials/01-loading-and-extraction.ipynb +0 -1137
  41. natural_pdf-0.1.2/docs/tutorials/07-layout-analysis.ipynb +0 -264
  42. natural_pdf-0.1.2/docs/tutorials/08-spatial-navigation.ipynb +0 -512
  43. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/.github/workflows/docs.yml +0 -0
  44. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/CLAUDE.md +0 -0
  45. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/LICENSE +0 -0
  46. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/MANIFEST.in +0 -0
  47. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/check_run_md.sh +0 -0
  48. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/api/index.md +0 -0
  49. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/favicon.png +0 -0
  50. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/favicon.svg +0 -0
  51. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/javascripts/custom.js +0 -0
  52. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/logo.svg +0 -0
  53. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/sample-screen.png +0 -0
  54. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/social-preview.png +0 -0
  55. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/social-preview.svg +0 -0
  56. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/stylesheets/custom.css +0 -0
  57. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/document-qa/index.ipynb +0 -0
  58. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/document-qa/index.md +0 -0
  59. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/element-selection/index.ipynb +0 -0
  60. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/element-selection/index.md +0 -0
  61. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/index.md +0 -0
  62. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/installation/index.md +0 -0
  63. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/interactive-widget/index.ipynb +0 -0
  64. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/interactive-widget/index.md +0 -0
  65. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/layout-analysis/index.ipynb +0 -0
  66. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/layout-analysis/index.md +0 -0
  67. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/ocr/index.md +0 -0
  68. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/pdf-navigation/index.ipynb +0 -0
  69. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/pdf-navigation/index.md +0 -0
  70. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tables/index.ipynb +0 -0
  71. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tables/index.md +0 -0
  72. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/text-analysis/index.ipynb +0 -0
  73. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/text-analysis/index.md +0 -0
  74. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/text-extraction/index.ipynb +0 -0
  75. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/text-extraction/index.md +0 -0
  76. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/01-loading-and-extraction.md +0 -0
  77. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/02-finding-elements.md +0 -0
  78. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/03-extracting-blocks.md +0 -0
  79. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/04-table-extraction.md +0 -0
  80. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/05-excluding-content.md +0 -0
  81. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/06-document-qa.md +0 -0
  82. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/07-layout-analysis.md +0 -0
  83. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/07-working-with-regions.md +0 -0
  84. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/08-spatial-navigation.md +0 -0
  85. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/09-section-extraction.md +0 -0
  86. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/10-form-field-extraction.md +0 -0
  87. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  88. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/visual-debugging/index.ipynb +0 -0
  89. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/visual-debugging/index.md +0 -0
  90. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/visual-debugging/region.png +0 -0
  91. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/execute_notebooks.py +0 -0
  92. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/__init__.py +0 -0
  93. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/__init__.py +0 -0
  94. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/base.py +0 -0
  95. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/docling.py +0 -0
  96. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  97. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  98. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  99. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/paddle.py +0 -0
  100. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/surya.py +0 -0
  101. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/tatr.py +0 -0
  102. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/yolo.py +0 -0
  103. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/text_options.py +0 -0
  104. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/text_structure.py +0 -0
  105. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/utils.py +0 -0
  106. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/__init__.py +0 -0
  107. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/element_manager.py +0 -0
  108. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/highlighting_service.py +0 -0
  109. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/__init__.py +0 -0
  110. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/base.py +0 -0
  111. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/collections.py +0 -0
  112. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/line.py +0 -0
  113. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/rect.py +0 -0
  114. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/text.py +0 -0
  115. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/__init__.py +0 -0
  116. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/engine.py +0 -0
  117. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/engine_easyocr.py +0 -0
  118. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/engine_paddle.py +0 -0
  119. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/engine_surya.py +0 -0
  120. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/ocr_manager.py +0 -0
  121. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/ocr_options.py +0 -0
  122. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/qa/__init__.py +0 -0
  123. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/qa/document_qa.py +0 -0
  124. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/selectors/__init__.py +0 -0
  125. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/selectors/parser.py +0 -0
  126. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/templates/__init__.py +0 -0
  127. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/templates/ocr_debug.html +0 -0
  128. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/utils/__init__.py +0 -0
  129. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/utils/highlighting.py +0 -0
  130. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/utils/reading_order.py +0 -0
  131. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/utils/visualization.py +0 -0
  132. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/widgets/__init__.py +0 -0
  133. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/widgets/frontend/viewer.js +0 -0
  134. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/widgets/viewer.py +0 -0
  135. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/dependency_links.txt +0 -0
  136. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/top_level.txt +0 -0
  137. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/notebooks/Examples.ipynb +0 -0
  138. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/all_detected_regions.png +0 -0
  139. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/all_elements.png +0 -0
  140. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/basic_highlighting.png +0 -0
  141. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/chainable_layout.png +0 -0
  142. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/chained_analysis.png +0 -0
  143. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/color_names.png +0 -0
  144. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/color_names_with_boxes.png +0 -0
  145. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/conf_display_highlight_all.png +0 -0
  146. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/conf_display_highlight_layout.png +0 -0
  147. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/conf_display_layout_only.png +0 -0
  148. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/confidence_color_coded.png +0 -0
  149. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/debug_page_image.png +0 -0
  150. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/detected_table.png +0 -0
  151. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/dimension_analysis.txt +0 -0
  152. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/direct_ocr_debug.png +0 -0
  153. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/easyocr_debug_input.png +0 -0
  154. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/easyocr_results.png +0 -0
  155. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/easyocr_test_input.png +0 -0
  156. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/exclusion_optimization_regions.png +0 -0
  157. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/explicit_confidence_display.png +0 -0
  158. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/footer_overlap_test.png +0 -0
  159. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all.png +0 -0
  160. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all_styles.png +0 -0
  161. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all_with_all_layouts.png +0 -0
  162. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all_with_attrs.png +0 -0
  163. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all_with_yolo.png +0 -0
  164. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_by_confidence.png +0 -0
  165. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_color_test_1.png +0 -0
  166. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_color_test_2.png +0 -0
  167. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_color_test_3.png +0 -0
  168. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_color_test_4.png +0 -0
  169. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_layout_method.png +0 -0
  170. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_multiple.png +0 -0
  171. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_no_attrs.png +0 -0
  172. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_region.png +0 -0
  173. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_single.png +0 -0
  174. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_specific_types.png +0 -0
  175. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_specific_types_with_boxes.png +0 -0
  176. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_specific_types_with_tables.png +0 -0
  177. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test.png +0 -0
  178. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_colors.png +0 -0
  179. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual.png +0 -0
  180. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual_annotated.png +0 -0
  181. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual_with_structure.png +0 -0
  182. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual_with_structure_yolo.png +0 -0
  183. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual_with_tables.png +0 -0
  184. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_with_attrs.png +0 -0
  185. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_conf_default.png +0 -0
  186. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_conf_high.png +0 -0
  187. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_detection.png +0 -0
  188. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_fix_test.png +0 -0
  189. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_fix_test2.png +0 -0
  190. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_fix_test3.png +0 -0
  191. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_fix_test4.png +0 -0
  192. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/model_comparison.png +0 -0
  193. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/multiple_attributes_display.png +0 -0
  194. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_confidence_visualization.png +0 -0
  195. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_debug.png +0 -0
  196. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_debug_page.html +0 -0
  197. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_highlight_all_test.png +0 -0
  198. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_highlight_test.png +0 -0
  199. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_highlighted.png +0 -0
  200. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_simplified.png +0 -0
  201. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_threshold_comparison.png +0 -0
  202. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_visualization_clean.png +0 -0
  203. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_visualization_highlights.png +0 -0
  204. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_visualization_text.png +0 -0
  205. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_detection.png +0 -0
  206. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_polygons.png +0 -0
  207. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_sources.png +0 -0
  208. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_with_text.png +0 -0
  209. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_without_text.png +0 -0
  210. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddleocr_highlights.png +0 -0
  211. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddleocr_results.png +0 -0
  212. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddleocr_test_input.png +0 -0
  213. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/page_1_for_ocr.png +0 -0
  214. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/page_4_for_ocr.png +0 -0
  215. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_exclusion_test.png +0 -0
  216. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_management_test.png +0 -0
  217. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_ocr_cropped.png +0 -0
  218. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_ocr_debug.png +0 -0
  219. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_ocr_full_page.png +0 -0
  220. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_ocr_highlighted.png +0 -0
  221. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/spatial_navigation.png +0 -0
  222. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/standard_highlight_all.png +0 -0
  223. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/table_no_ocr.csv +0 -0
  224. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/table_structure.png +0 -0
  225. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/table_structure_detail.png +0 -0
  226. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/table_with_ocr.csv +0 -0
  227. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/tatr_cells_test.png +0 -0
  228. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/tatr_ocr_table_test.png +0 -0
  229. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/tatr_regions.png +0 -0
  230. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/tatr_regions.txt +0 -0
  231. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/text_styles.png +0 -0
  232. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/titles_only.png +0 -0
  233. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/width_1200px.png +0 -0
  234. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/width_800px.png +0 -0
  235. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/width_default.png +0 -0
  236. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/width_with_scale.png +0 -0
  237. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/yolo_regions.png +0 -0
  238. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/yolo_regions.txt +0 -0
  239. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/.gitkeep +0 -0
  240. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/01-practice.pdf +0 -0
  241. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/0500000US42001.pdf +0 -0
  242. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/0500000US42007.pdf +0 -0
  243. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/2014 Statistics.pdf +0 -0
  244. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/2019 Statistics.pdf +0 -0
  245. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  246. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/needs-ocr.pdf +0 -0
  247. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/publish.sh +0 -0
  248. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/run_all_tutorials.sh +0 -0
  249. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/sample-screen.png +0 -0
  250. {natural_pdf-0.1.2 → natural_pdf-0.1.3}/setup.cfg +0 -0
@@ -5,7 +5,11 @@ docs/tutorials/pdfs
5
5
  install.sh
6
6
  notebooks/Examples.md
7
7
  transcript.md
8
-
8
+ natural_pdf_index
9
+ results
10
+ docs/tutorials/needs-ocr-searchable.pdf
11
+ sample.py
12
+ sample2.py
9
13
 
10
14
  # Created by https://www.toptal.com/developers/gitignore/api/python,macos,visualstudiocode,jupyternotebooks
11
15
  # Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,visualstudiocode,jupyternotebooks
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -20,8 +20,14 @@ Requires-Dist: torch>=2.0.0
20
20
  Requires-Dist: torchvision>=0.15.0
21
21
  Requires-Dist: transformers>=4.30.0
22
22
  Requires-Dist: huggingface_hub>=0.19.0
23
+ Requires-Dist: ocrmypdf>=16.0.0
24
+ Requires-Dist: pikepdf>=10.0.0
23
25
  Provides-Extra: interactive
24
26
  Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
27
+ Provides-Extra: haystack
28
+ Requires-Dist: haystack-ai>=2.0.0b5; extra == "haystack"
29
+ Requires-Dist: chroma-haystack; extra == "haystack"
30
+ Requires-Dist: sentence-transformers; extra == "haystack"
25
31
  Provides-Extra: easyocr
26
32
  Requires-Dist: easyocr; extra == "easyocr"
27
33
  Provides-Extra: paddle
@@ -39,6 +45,9 @@ Requires-Dist: paddlepaddle; extra == "all"
39
45
  Requires-Dist: paddleocr; extra == "all"
40
46
  Requires-Dist: doclayout_yolo; extra == "all"
41
47
  Requires-Dist: surya-ocr; extra == "all"
48
+ Requires-Dist: haystack-ai>=2.0.0b5; extra == "all"
49
+ Requires-Dist: chroma-haystack; extra == "all"
50
+ Requires-Dist: sentence-transformers; extra == "all"
42
51
  Dynamic: license-file
43
52
 
44
53
  # Natural PDF
@@ -69,6 +78,9 @@ pip install natural-pdf[paddle]
69
78
  # Example: Install with interactive viewer support
70
79
  pip install natural-pdf[interactive]
71
80
 
81
+ # Example: Install with semantic search support (Haystack)
82
+ pip install natural-pdf[haystack]
83
+
72
84
  # Install everything
73
85
  pip install natural-pdf[all]
74
86
  ```
@@ -117,6 +129,7 @@ Natural PDF offers a range of features for working with PDFs:
117
129
  * **OCR Integration:** Extract text from scanned documents using engines like EasyOCR, PaddleOCR, or Surya.
118
130
  * **Layout Analysis:** Detect document structures (titles, paragraphs, tables) using AI models.
119
131
  * **Document QA:** Ask natural language questions about your document's content.
132
+ * **Semantic Search:** Index PDFs and find relevant pages or documents based on semantic meaning using Haystack.
120
133
  * **Visual Debugging:** Highlight elements and use an interactive viewer or save images to understand your selections.
121
134
 
122
135
  ## Learn More
@@ -26,6 +26,9 @@ pip install natural-pdf[paddle]
26
26
  # Example: Install with interactive viewer support
27
27
  pip install natural-pdf[interactive]
28
28
 
29
+ # Example: Install with semantic search support (Haystack)
30
+ pip install natural-pdf[haystack]
31
+
29
32
  # Install everything
30
33
  pip install natural-pdf[all]
31
34
  ```
@@ -74,6 +77,7 @@ Natural PDF offers a range of features for working with PDFs:
74
77
  * **OCR Integration:** Extract text from scanned documents using engines like EasyOCR, PaddleOCR, or Surya.
75
78
  * **Layout Analysis:** Detect document structures (titles, paragraphs, tables) using AI models.
76
79
  * **Document QA:** Ask natural language questions about your document's content.
80
+ * **Semantic Search:** Index PDFs and find relevant pages or documents based on semantic meaning using Haystack.
77
81
  * **Visual Debugging:** Highlight elements and use an interactive viewer or save images to understand your selections.
78
82
 
79
83
  ## Learn More