natural-pdf 0.2.17__tar.gz → 0.2.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (378) hide show
  1. {natural_pdf-0.2.17/natural_pdf.egg-info → natural_pdf-0.2.18}/PKG-INFO +1 -1
  2. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/page.py +42 -9
  3. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/spatial.py +42 -39
  4. {natural_pdf-0.2.17 → natural_pdf-0.2.18/natural_pdf.egg-info}/PKG-INFO +1 -1
  5. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf.egg-info/SOURCES.txt +2 -0
  6. natural_pdf-0.2.18/tests/demo_multipage.py +56 -0
  7. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_aggregate_selectors.py +2 -2
  8. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_element_exclusions.py +18 -0
  9. natural_pdf-0.2.18/tests/test_exclusion_recursion_fix.py +46 -0
  10. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_expand_enhanced.py +3 -3
  11. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_pdf_exclusions_in_find_methods.py +13 -5
  12. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.cursor/rules/analysis_framework.mdc +0 -0
  13. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.cursor/rules/coding-style.mdc +0 -0
  14. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
  15. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.cursor/rules/minimal-comments.mdc +0 -0
  16. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.cursor/rules/natural-pdf-overview.mdc +0 -0
  17. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.cursor/rules/user-friendly-library-code.mdc +0 -0
  18. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.github/workflows/ci.yml +0 -0
  19. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.github/workflows/docs.yml +0 -0
  20. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.github/workflows/nightly-tutorials.yml +0 -0
  21. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.gitignore +0 -0
  22. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/.pre-commit-config.yaml +0 -0
  23. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/01-execute_notebooks.py +0 -0
  24. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/02-run_all_tutorials.sh +0 -0
  25. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/CLAUDE.md +0 -0
  26. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/LICENSE +0 -0
  27. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/MANIFEST.in +0 -0
  28. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/README.md +0 -0
  29. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/audit_packaging.py +0 -0
  30. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/check_run_md.sh +0 -0
  31. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/api/index.md +0 -0
  32. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/assets/favicon.png +0 -0
  33. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/assets/favicon.svg +0 -0
  34. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/assets/javascripts/custom.js +0 -0
  35. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/assets/logo.svg +0 -0
  36. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/assets/sample-screen.png +0 -0
  37. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/assets/social-preview.png +0 -0
  38. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/assets/social-preview.svg +0 -0
  39. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/assets/stylesheets/custom.css +0 -0
  40. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/categorizing-documents/index.md +0 -0
  41. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/data-extraction/index.md +0 -0
  42. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/describe/index.md +0 -0
  43. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/document-qa/index.md +0 -0
  44. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/element-selection/index.md +0 -0
  45. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/extracting-clean-text/index.md +0 -0
  46. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/finetuning/index.md +0 -0
  47. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/fix-messy-tables/index.md +0 -0
  48. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/fix-messy-tables/table_1.csv +0 -0
  49. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/fix-messy-tables/table_2.csv +0 -0
  50. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/fix-messy-tables/table_3.csv +0 -0
  51. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/guide_adjustment_stream.md +0 -0
  52. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/guides_boundary_columns.md +0 -0
  53. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/index.md +0 -0
  54. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/installation/index.md +0 -0
  55. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/interactive-widget/index.md +0 -0
  56. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/layout-analysis/index.md +0 -0
  57. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/loops-and-groups/index.md +0 -0
  58. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/ocr/index.md +0 -0
  59. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/pdf-navigation/index.md +0 -0
  60. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
  61. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/process-forms-and-invoices/index.md +0 -0
  62. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/quick-reference/index.md +0 -0
  63. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/reflowing-pages/index.md +0 -0
  64. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/regions/index.md +0 -0
  65. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tables/index.md +0 -0
  66. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/text-analysis/index.md +0 -0
  67. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/01-loading-and-extraction.md +0 -0
  68. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/02-finding-elements.md +0 -0
  69. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/03-extracting-blocks.md +0 -0
  70. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/04-table-extraction.md +0 -0
  71. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/05-excluding-content.md +0 -0
  72. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/06-document-qa.md +0 -0
  73. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/07-layout-analysis.md +0 -0
  74. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/07-working-with-regions.md +0 -0
  75. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/08-spatial-navigation.md +0 -0
  76. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/09-section-extraction.md +0 -0
  77. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/10-form-field-extraction.md +0 -0
  78. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  79. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/12-ocr-integration.md +0 -0
  80. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/13-semantic-search.md +0 -0
  81. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/tutorials/14-categorizing-documents.md +0 -0
  82. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/visual-debugging/index.md +0 -0
  83. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/docs/visual-debugging/region.png +0 -0
  84. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/mkdocs.yml +0 -0
  85. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/__init__.py +0 -0
  86. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/__init__.py +0 -0
  87. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/guides.py +0 -0
  88. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/__init__.py +0 -0
  89. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/base.py +0 -0
  90. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/docling.py +0 -0
  91. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/gemini.py +0 -0
  92. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  93. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  94. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  95. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/paddle.py +0 -0
  96. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
  97. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/surya.py +0 -0
  98. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
  99. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/tatr.py +0 -0
  100. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/layout/yolo.py +0 -0
  101. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/shape_detection_mixin.py +0 -0
  102. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/text_options.py +0 -0
  103. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/text_structure.py +0 -0
  104. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/analyzers/utils.py +0 -0
  105. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/classification/manager.py +0 -0
  106. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/classification/mixin.py +0 -0
  107. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/classification/results.py +0 -0
  108. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/cli.py +0 -0
  109. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/collections/mixins.py +0 -0
  110. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/__init__.py +0 -0
  111. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/element_manager.py +0 -0
  112. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/highlighting_service.py +0 -0
  113. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/page_collection.py +0 -0
  114. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/page_groupby.py +0 -0
  115. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/pdf.py +0 -0
  116. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/pdf_collection.py +0 -0
  117. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/core/render_spec.py +0 -0
  118. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/describe/__init__.py +0 -0
  119. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/describe/base.py +0 -0
  120. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/describe/elements.py +0 -0
  121. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/describe/mixin.py +0 -0
  122. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/describe/summary.py +0 -0
  123. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/elements/__init__.py +0 -0
  124. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/elements/base.py +0 -0
  125. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/elements/element_collection.py +0 -0
  126. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/elements/image.py +0 -0
  127. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/elements/line.py +0 -0
  128. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/elements/rect.py +0 -0
  129. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/elements/region.py +0 -0
  130. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/elements/text.py +0 -0
  131. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/export/mixin.py +0 -0
  132. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/__init__.py +0 -0
  133. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/base.py +0 -0
  134. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/data/__init__.py +0 -0
  135. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/data/pdf.ttf +0 -0
  136. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/data/sRGB.icc +0 -0
  137. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/hocr.py +0 -0
  138. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/hocr_font.py +0 -0
  139. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/original_pdf.py +0 -0
  140. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/paddleocr.py +0 -0
  141. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/exporters/searchable_pdf.py +0 -0
  142. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/extraction/manager.py +0 -0
  143. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/extraction/mixin.py +0 -0
  144. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/extraction/result.py +0 -0
  145. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/flows/__init__.py +0 -0
  146. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/flows/collections.py +0 -0
  147. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/flows/element.py +0 -0
  148. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/flows/flow.py +0 -0
  149. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/flows/region.py +0 -0
  150. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/__init__.py +0 -0
  151. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/engine.py +0 -0
  152. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/engine_doctr.py +0 -0
  153. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/engine_easyocr.py +0 -0
  154. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/engine_paddle.py +0 -0
  155. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/engine_surya.py +0 -0
  156. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/ocr_factory.py +0 -0
  157. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/ocr_manager.py +0 -0
  158. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/ocr_options.py +0 -0
  159. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/ocr/utils.py +0 -0
  160. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/qa/__init__.py +0 -0
  161. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/qa/document_qa.py +0 -0
  162. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/qa/qa_result.py +0 -0
  163. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/search/__init__.py +0 -0
  164. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/search/lancedb_search_service.py +0 -0
  165. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/search/numpy_search_service.py +0 -0
  166. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/search/search_options.py +0 -0
  167. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/search/search_service_protocol.py +0 -0
  168. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/search/searchable_mixin.py +0 -0
  169. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/selectors/__init__.py +0 -0
  170. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/selectors/parser.py +0 -0
  171. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/tables/__init__.py +0 -0
  172. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/tables/result.py +0 -0
  173. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/templates/__init__.py +0 -0
  174. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
  175. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/templates/spa/css/style.css +0 -0
  176. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/templates/spa/index.html +0 -0
  177. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/templates/spa/js/app.js +0 -0
  178. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/templates/spa/words.txt +0 -0
  179. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/text_mixin.py +0 -0
  180. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/__init__.py +0 -0
  181. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/bidi_mirror.py +0 -0
  182. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/color_utils.py +0 -0
  183. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/debug.py +0 -0
  184. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/highlighting.py +0 -0
  185. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/identifiers.py +0 -0
  186. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/layout.py +0 -0
  187. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/locks.py +0 -0
  188. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/packaging.py +0 -0
  189. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/pdfminer_patches.py +0 -0
  190. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/reading_order.py +0 -0
  191. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/sections.py +0 -0
  192. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/text_extraction.py +0 -0
  193. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/utils/visualization.py +0 -0
  194. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/vision/__init__.py +0 -0
  195. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/vision/mixin.py +0 -0
  196. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/vision/results.py +0 -0
  197. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/vision/similarity.py +0 -0
  198. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/vision/template_matching.py +0 -0
  199. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/widgets/__init__.py +0 -0
  200. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf/widgets/viewer.py +0 -0
  201. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf.egg-info/dependency_links.txt +0 -0
  202. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf.egg-info/entry_points.txt +0 -0
  203. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf.egg-info/requires.txt +0 -0
  204. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/natural_pdf.egg-info/top_level.txt +0 -0
  205. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/noxfile.py +0 -0
  206. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/memory_comparison.py +0 -0
  207. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/pdf_analyzer.py +0 -0
  208. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/performance_analysis.py +0 -0
  209. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/performance_results/image_heavy_snapshots.csv +0 -0
  210. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/performance_results/image_heavy_snapshots.json +0 -0
  211. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/performance_results/text_heavy_snapshots.csv +0 -0
  212. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/performance_results/text_heavy_snapshots.json +0 -0
  213. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/test_cleanup_methods.py +0 -0
  214. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/optimization/test_memory_fix.py +0 -0
  215. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/publish.sh +0 -0
  216. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/pyproject.toml +0 -0
  217. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/sample-screen.png +0 -0
  218. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/setup.cfg +0 -0
  219. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/fix_page_exclusions.py +0 -0
  220. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_draw_guides.py +0 -0
  221. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_draw_guides_interactive.py +0 -0
  222. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_exclusion_with_debug.py +0 -0
  223. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_find_exclusions_fix.py +0 -0
  224. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_find_exclusions_fix_no_recursion.py +0 -0
  225. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_fix_real_pdf.py +0 -0
  226. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_fix_working.py +0 -0
  227. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_fixed_pdf_exclusions.py +0 -0
  228. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_guide_draw_notebook.py +0 -0
  229. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_horizontal_top_bottom.py +0 -0
  230. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_inline_js.py +0 -0
  231. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_marker_order.py +0 -0
  232. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_original_exclusions_now_work.py +0 -0
  233. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_pdf_exclusions_with_guides.py +0 -0
  234. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_region_exclusions_detailed.py +0 -0
  235. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_stripes_real_pdf.py +0 -0
  236. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_vertical_stripes.py +0 -0
  237. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_widget_functionality.py +0 -0
  238. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/temp/test_widget_simple.py +0 -0
  239. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/conftest.py +0 -0
  240. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/exporters/test_paddleocr_exporter.py +0 -0
  241. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_annotate.py +0 -0
  242. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_arabic_performance.py +0 -0
  243. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_arabic_real_world.py +0 -0
  244. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_auto_multipage_option.py +0 -0
  245. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_color_conversion.py +0 -0
  246. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_color_hex_display.py +0 -0
  247. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_core/test_containment_geometry.py +0 -0
  248. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_core/test_elements.py +0 -0
  249. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_core/test_loading.py +0 -0
  250. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_core/test_spatial.py +0 -0
  251. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_core/test_text_extraction.py +0 -0
  252. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_core/test_text_layer.py +0 -0
  253. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_crop_enhancements.py +0 -0
  254. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_crop_region_highlights.py +0 -0
  255. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_directional_defaults.py +0 -0
  256. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_dissolve.py +0 -0
  257. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_dissolve_cross_page_bug.py +0 -0
  258. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_dissolve_debug_issue.py +0 -0
  259. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_dissolve_real_world_issue.py +0 -0
  260. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_dissolve_single_elements.py +0 -0
  261. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_dissolve_vertical_offset_issue.py +0 -0
  262. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_document_qa.py +0 -0
  263. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_element_addition.py +0 -0
  264. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_element_collection_guides.py +0 -0
  265. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_element_collection_show_cols.py +0 -0
  266. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_element_collection_slicing.py +0 -0
  267. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_element_show_crop_highlights.py +0 -0
  268. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_empty_pseudo_class.py +0 -0
  269. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_exclude_multi_page.py +0 -0
  270. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_exclude_real_pdf.py +0 -0
  271. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_exclusions.py +0 -0
  272. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_expand.py +0 -0
  273. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_extraction_error.py +0 -0
  274. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_extraction_mixin_fix.py +0 -0
  275. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_extraction_text_and_vision.py +0 -0
  276. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_extraction_working.py +0 -0
  277. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_find_similar.py +0 -0
  278. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_first_last_selectors.py +0 -0
  279. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_fix_get_sections_zero_height.py +0 -0
  280. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_flow_region_directional.py +0 -0
  281. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_get_sections_fix_comprehensive.py +0 -0
  282. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_get_sections_zero_height.py +0 -0
  283. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_groupby.py +0 -0
  284. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guide_adjustment_stream.py +0 -0
  285. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides.py +0 -0
  286. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_apply_exclusions.py +0 -0
  287. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_apply_exclusions_simple.py +0 -0
  288. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_boundaries.py +0 -0
  289. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_extract_table.py +0 -0
  290. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_extract_table_collections.py +0 -0
  291. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_extract_table_exclusions.py +0 -0
  292. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_extract_table_real.py +0 -0
  293. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_from_headers.py +0 -0
  294. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_from_stripes.py +0 -0
  295. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_integration.py +0 -0
  296. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_marker_sorting.py +0 -0
  297. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_guides_partial.py +0 -0
  298. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_highlight_color_falsy.py +0 -0
  299. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_highlight_detection.py +0 -0
  300. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_highlight_detection_comprehensive.py +0 -0
  301. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_highlight_offset.py +0 -0
  302. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_highlight_protocol.py +0 -0
  303. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_highlight_protocol_simple.py +0 -0
  304. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_highlight_regions.py +0 -0
  305. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_horizontal_guides_alignment.py +0 -0
  306. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_comprehensive.py +0 -0
  307. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_final.py +0 -0
  308. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_final_verification.py +0 -0
  309. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_fix.py +0 -0
  310. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_mock.py +0 -0
  311. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_simple.py +0 -0
  312. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_types_pdf.py +0 -0
  313. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_verification.py +0 -0
  314. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_include_boundaries_with_real_text.py +0 -0
  315. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_loading_original.py +0 -0
  316. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_match_results_sorting.py +0 -0
  317. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_merge_connected.py +0 -0
  318. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_merge_connected_real_world.py +0 -0
  319. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_merge_method.py +0 -0
  320. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_merged_flowregion_specs.py +0 -0
  321. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_mixed_collection_rendering.py +0 -0
  322. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_multi_page_table_discovery.py +0 -0
  323. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_multipage_directional.py +0 -0
  324. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_negative_bounds_pdf.py +0 -0
  325. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_optional_deps.py +0 -0
  326. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_page_exclusion_lists.py +0 -0
  327. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_pdf_add_exclusion_elementcollection.py +0 -0
  328. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_pdfminer_bug_status.py +0 -0
  329. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_pdfminer_color_bug.py +0 -0
  330. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_pdfminer_color_stack_bug.py +0 -0
  331. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_phash_masking.py +0 -0
  332. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_region_find_similar.py +0 -0
  333. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_region_show_crop_highlights.py +0 -0
  334. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_region_viewer.py +0 -0
  335. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_sections_end_only.py +0 -0
  336. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_sections_with_start_and_end.py +0 -0
  337. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_show_column_layout.py +0 -0
  338. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_show_edge_cases.py +0 -0
  339. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_show_exclusions.py +0 -0
  340. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_show_exclusions_feature.py +0 -0
  341. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_show_limit.py +0 -0
  342. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_skip_repeating_headers_multipage.py +0 -0
  343. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_slice_cache_reuse.py +0 -0
  344. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_slice_exclusion_fix.py +0 -0
  345. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_slice_exclusion_issue.py +0 -0
  346. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_slice_exclusion_mock.py +0 -0
  347. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_sliced_collection_exclusions.py +0 -0
  348. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_smart_exclusion.py +0 -0
  349. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_spatial_offset.py +0 -0
  350. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_strikethrough_detection.py +0 -0
  351. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_table_result_header_mismatch.py +0 -0
  352. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_table_result_keep_blank.py +0 -0
  353. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_template_matching.py +0 -0
  354. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_template_white_masking.py +0 -0
  355. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_tiny_text_tables.py +0 -0
  356. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_tiny_text_tables_table.py +0 -0
  357. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_tutorials.py +0 -0
  358. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_underline_detection.py +0 -0
  359. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tests/test_update_text.py +0 -0
  360. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/todo/bad_pdf_analysis.md +0 -0
  361. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/todo/evaluation.md +0 -0
  362. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/IMPROVEMENTS_SUMMARY.md +0 -0
  363. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/LLM_NaturalPDF_CheatSheet.md +0 -0
  364. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/LLM_NaturalPDF_Workflows.md +0 -0
  365. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/README.md +0 -0
  366. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/__init__.py +0 -0
  367. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/analyser.py +0 -0
  368. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/collate_summaries.py +0 -0
  369. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/compile_attempts_markdown.py +0 -0
  370. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/eval_suite.py +0 -0
  371. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/evaluate_quality.py +0 -0
  372. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/export_enrichment_csv.py +0 -0
  373. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/extraction_decision_tree.md +0 -0
  374. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/llm_enrich.py +0 -0
  375. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/llm_enrich_with_retry.py +0 -0
  376. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/reporter.py +0 -0
  377. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/tools/bad_pdf_eval/utils.py +0 -0
  378. {natural_pdf-0.2.17 → natural_pdf-0.2.18}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -1,5 +1,6 @@
1
1
  import base64
2
2
  import concurrent.futures # Added import
3
+ import contextlib
3
4
  import hashlib
4
5
  import io
5
6
  import json
@@ -275,6 +276,9 @@ class Page(
275
276
  self._load_elements()
276
277
  self._to_image_cache: Dict[tuple, Optional["Image.Image"]] = {}
277
278
 
279
+ # Flag to prevent infinite recursion when computing exclusions
280
+ self._computing_exclusions = False
281
+
278
282
  def _get_render_specs(
279
283
  self,
280
284
  mode: Literal["show", "render"] = "show",
@@ -412,6 +416,35 @@ class Page(
412
416
  self._exclusions = []
413
417
  return self
414
418
 
419
+ @contextlib.contextmanager
420
+ def without_exclusions(self):
421
+ """
422
+ Context manager that temporarily disables exclusion processing.
423
+
424
+ This prevents infinite recursion when exclusion callables themselves
425
+ use find() operations. While in this context, all find operations
426
+ will skip exclusion filtering.
427
+
428
+ Example:
429
+ ```python
430
+ # This exclusion would normally cause infinite recursion:
431
+ page.add_exclusion(lambda p: p.find("text:contains('Header')").expand())
432
+
433
+ # But internally, it's safe because we use:
434
+ with page.without_exclusions():
435
+ region = exclusion_callable(page)
436
+ ```
437
+
438
+ Yields:
439
+ The page object with exclusions temporarily disabled.
440
+ """
441
+ old_value = self._computing_exclusions
442
+ self._computing_exclusions = True
443
+ try:
444
+ yield self
445
+ finally:
446
+ self._computing_exclusions = old_value
447
+
415
448
  def add_exclusion(
416
449
  self,
417
450
  exclusion_func_or_region: Union[
@@ -759,15 +792,10 @@ class Page(
759
792
  if debug:
760
793
  print(f" - Evaluating callable '{exclusion_label}'...")
761
794
 
762
- # Temporarily clear exclusions (consider if really needed)
763
- temp_original_exclusions = self._exclusions
764
- self._exclusions = []
765
-
766
- # Call the function - Expects it to return a Region or None
767
- region_result = exclusion_item(self)
768
-
769
- # Restore exclusions
770
- self._exclusions = temp_original_exclusions
795
+ # Use context manager to prevent infinite recursion
796
+ with self.without_exclusions():
797
+ # Call the function - Expects it to return a Region or None
798
+ region_result = exclusion_item(self)
771
799
 
772
800
  if isinstance(region_result, Region):
773
801
  # Assign the label to the returned region
@@ -947,6 +975,11 @@ class Page(
947
975
  Returns:
948
976
  A new list containing only the elements not excluded.
949
977
  """
978
+ # Skip exclusion filtering if we're currently computing exclusions
979
+ # This prevents infinite recursion when exclusion callables use find operations
980
+ if self._computing_exclusions:
981
+ return elements
982
+
950
983
  # Check both page-level and PDF-level exclusions
951
984
  has_page_exclusions = bool(self._exclusions)
952
985
  has_pdf_exclusions = (
@@ -10,11 +10,11 @@ with include_boundaries='none'.
10
10
 
11
11
  Example:
12
12
  from natural_pdf.utils.spatial import is_element_in_region
13
-
13
+
14
14
  # Check if element is in region using center-based logic (default)
15
15
  if is_element_in_region(element, region):
16
16
  print("Element is in region")
17
-
17
+
18
18
  # Use different strategies
19
19
  if is_element_in_region(element, region, strategy="intersects"):
20
20
  print("Element overlaps with region")
@@ -35,16 +35,16 @@ InclusionStrategy = Literal["center", "intersects", "contains"]
35
35
 
36
36
  def is_element_in_region(
37
37
  element: "Element",
38
- region: "Region",
38
+ region: "Region",
39
39
  strategy: InclusionStrategy = "center",
40
- check_page: bool = True
40
+ check_page: bool = True,
41
41
  ) -> bool:
42
42
  """
43
43
  Unified function to check if an element is inside a region.
44
-
44
+
45
45
  This centralizes the logic used across Region, Page, and Flow to ensure
46
46
  consistent behavior throughout the library.
47
-
47
+
48
48
  Args:
49
49
  element: The element to check
50
50
  region: The region to check against
@@ -53,7 +53,7 @@ def is_element_in_region(
53
53
  - "intersects": Element belongs if any part overlaps
54
54
  - "contains": Element belongs only if fully contained
55
55
  check_page: Whether to verify element and region are on the same page
56
-
56
+
57
57
  Returns:
58
58
  bool: True if element is in region according to the strategy
59
59
  """
@@ -61,18 +61,18 @@ def is_element_in_region(
61
61
  if not hasattr(element, "bbox") or not element.bbox:
62
62
  logger.debug(f"Element lacks bbox attributes: {element}")
63
63
  return False
64
-
64
+
65
65
  if not hasattr(region, "bbox") or not region.bbox:
66
66
  logger.debug(f"Region lacks bbox attributes: {region}")
67
67
  return False
68
-
68
+
69
69
  # Check page membership if requested
70
70
  if check_page:
71
71
  if not hasattr(element, "page") or not hasattr(region, "page"):
72
72
  return False
73
73
  if element.page != region.page:
74
74
  return False
75
-
75
+
76
76
  # Apply the appropriate strategy
77
77
  if strategy == "center":
78
78
  # Use existing region method if available
@@ -82,37 +82,43 @@ def is_element_in_region(
82
82
  # Fallback calculation
83
83
  elem_center_x = (element.x0 + element.x1) / 2
84
84
  elem_center_y = (element.top + element.bottom) / 2
85
-
85
+
86
86
  # Use region's is_point_inside if available
87
87
  if hasattr(region, "is_point_inside"):
88
88
  return region.is_point_inside(elem_center_x, elem_center_y)
89
89
  else:
90
90
  # Simple bounds check
91
- return (region.x0 <= elem_center_x <= region.x1 and
92
- region.top <= elem_center_y <= region.bottom)
93
-
91
+ return (
92
+ region.x0 <= elem_center_x <= region.x1
93
+ and region.top <= elem_center_y <= region.bottom
94
+ )
95
+
94
96
  elif strategy == "intersects":
95
97
  # Use existing region method if available
96
98
  if hasattr(region, "intersects"):
97
99
  return region.intersects(element)
98
100
  else:
99
101
  # Simple bbox overlap check
100
- return not (element.x1 < region.x0 or
101
- element.x0 > region.x1 or
102
- element.bottom < region.top or
103
- element.top > region.bottom)
104
-
102
+ return not (
103
+ element.x1 < region.x0
104
+ or element.x0 > region.x1
105
+ or element.bottom < region.top
106
+ or element.top > region.bottom
107
+ )
108
+
105
109
  elif strategy == "contains":
106
110
  # Use existing region method if available
107
111
  if hasattr(region, "contains"):
108
112
  return region.contains(element)
109
113
  else:
110
114
  # Simple full containment check
111
- return (region.x0 <= element.x0 and
112
- element.x1 <= region.x1 and
113
- region.top <= element.top and
114
- element.bottom <= region.bottom)
115
-
115
+ return (
116
+ region.x0 <= element.x0
117
+ and element.x1 <= region.x1
118
+ and region.top <= element.top
119
+ and element.bottom <= region.bottom
120
+ )
121
+
116
122
  else:
117
123
  raise ValueError(f"Unknown inclusion strategy: {strategy}")
118
124
 
@@ -120,10 +126,10 @@ def is_element_in_region(
120
126
  def get_inclusion_strategy() -> InclusionStrategy:
121
127
  """
122
128
  Get the current global inclusion strategy.
123
-
129
+
124
130
  This could be made configurable via environment variable or settings.
125
131
  For now, returns the default strategy.
126
-
132
+
127
133
  Returns:
128
134
  The current inclusion strategy (default: "center")
129
135
  """
@@ -132,38 +138,35 @@ def get_inclusion_strategy() -> InclusionStrategy:
132
138
  return "center"
133
139
 
134
140
 
135
- def calculate_element_overlap_percentage(
136
- element: "Element",
137
- region: "Region"
138
- ) -> float:
141
+ def calculate_element_overlap_percentage(element: "Element", region: "Region") -> float:
139
142
  """
140
143
  Calculate what percentage of an element overlaps with a region.
141
-
144
+
142
145
  Args:
143
146
  element: The element to check
144
147
  region: The region to check against
145
-
148
+
146
149
  Returns:
147
150
  float: Percentage of element area that overlaps with region (0.0 to 1.0)
148
151
  """
149
152
  if not hasattr(element, "bbox") or not hasattr(region, "bbox"):
150
153
  return 0.0
151
-
154
+
152
155
  # Calculate intersection bounds
153
156
  intersect_x0 = max(element.x0, region.x0)
154
- intersect_y0 = max(element.top, region.top)
157
+ intersect_y0 = max(element.top, region.top)
155
158
  intersect_x1 = min(element.x1, region.x1)
156
159
  intersect_y1 = min(element.bottom, region.bottom)
157
-
160
+
158
161
  # Check if there's an intersection
159
162
  if intersect_x1 <= intersect_x0 or intersect_y1 <= intersect_y0:
160
163
  return 0.0
161
-
164
+
162
165
  # Calculate areas
163
166
  element_area = (element.x1 - element.x0) * (element.bottom - element.top)
164
167
  if element_area == 0:
165
168
  return 0.0
166
-
169
+
167
170
  intersect_area = (intersect_x1 - intersect_x0) * (intersect_y1 - intersect_y0)
168
-
169
- return intersect_area / element_area
171
+
172
+ return intersect_area / element_area
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.17
3
+ Version: 0.2.18
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -230,6 +230,7 @@ temp/test_vertical_stripes.py
230
230
  temp/test_widget_functionality.py
231
231
  temp/test_widget_simple.py
232
232
  tests/conftest.py
233
+ tests/demo_multipage.py
233
234
  tests/test_aggregate_selectors.py
234
235
  tests/test_annotate.py
235
236
  tests/test_arabic_performance.py
@@ -256,6 +257,7 @@ tests/test_element_show_crop_highlights.py
256
257
  tests/test_empty_pseudo_class.py
257
258
  tests/test_exclude_multi_page.py
258
259
  tests/test_exclude_real_pdf.py
260
+ tests/test_exclusion_recursion_fix.py
259
261
  tests/test_exclusions.py
260
262
  tests/test_expand.py
261
263
  tests/test_expand_enhanced.py
@@ -0,0 +1,56 @@
1
+ """Demo script showing multipage directional navigation."""
2
+
3
+ import natural_pdf as npdf
4
+ from natural_pdf import PDF
5
+
6
+
7
+ def main():
8
+ """Demonstrate multipage directional navigation."""
9
+ pdf = PDF("pdfs/sections.pdf")
10
+
11
+ print("=== Multipage Directional Navigation Demo ===\n")
12
+
13
+ # Find Section 1 on page 1
14
+ section1 = pdf.pages[0].find("text:contains(Section 1)")
15
+ print(f"Found Section 1 on page {section1.page.number}")
16
+
17
+ # Without multipage - stops at page boundary
18
+ print("\n1. Without multipage=True:")
19
+ result = section1.below(until="text:contains(Section 6)")
20
+ print(f" Result type: {type(result).__name__}")
21
+ print(f" Result on page: {result.page.number}")
22
+ print(f" Text excerpt: {result.extract_text()[:50]}...")
23
+
24
+ # With multipage=True - crosses page boundary
25
+ print("\n2. With multipage=True:")
26
+ result = section1.below(until="text:contains(Section 6)", multipage=True)
27
+ print(f" Result type: {type(result).__name__}")
28
+ if hasattr(result, "constituent_regions"):
29
+ print(f" Spans {len(result.constituent_regions)} pages")
30
+ text = result.extract_text()
31
+ print(f" Contains 'Section 6': {'Section 6' in text}")
32
+
33
+ # Using global option
34
+ print("\n3. Using global auto_multipage option:")
35
+ original = npdf.options.layout.auto_multipage
36
+ npdf.set_option("layout.auto_multipage", True)
37
+
38
+ result = section1.below(until="text:contains(Section 6)") # No multipage param needed!
39
+ print(f" Result type: {type(result).__name__}")
40
+ text = result.extract_text()
41
+ print(f" Contains 'Section 6': {'Section 6' in text}")
42
+
43
+ # Restore original setting
44
+ npdf.options.layout.auto_multipage = original
45
+
46
+ # Example of above() with multipage
47
+ print("\n4. Using above() with multipage:")
48
+ section6 = pdf.pages[1].find("text:contains(Section 6)")
49
+ result = section6.above(multipage=True)
50
+ print(f" Result type: {type(result).__name__}")
51
+ if hasattr(result, "constituent_regions"):
52
+ print(f" Spans {len(result.constituent_regions)} pages")
53
+
54
+
55
+ if __name__ == "__main__":
56
+ main()
@@ -2,7 +2,7 @@
2
2
 
3
3
  import pytest
4
4
 
5
- from natural_pdf import NaturalPDF
5
+ from natural_pdf import PDF
6
6
  from natural_pdf.elements.element_collection import ElementCollection
7
7
 
8
8
 
@@ -13,7 +13,7 @@ class TestAggregateSelectors:
13
13
  def sample_pdf(self):
14
14
  """Create a sample PDF for testing."""
15
15
  # This assumes we have a test PDF with various text sizes and positions
16
- pdf = NaturalPDF("tests/fixtures/sample.pdf")
16
+ pdf = PDF("tests/fixtures/sample.pdf")
17
17
  return pdf
18
18
 
19
19
  def test_min_max_coordinates(self, sample_pdf):
@@ -17,6 +17,15 @@ def test_callable_exclusion_returning_element():
17
17
  mock_page._exclusions = []
18
18
  mock_page._parent = None
19
19
 
20
+ # Add context manager support to mock
21
+ from contextlib import contextmanager
22
+
23
+ @contextmanager
24
+ def mock_without_exclusions():
25
+ yield mock_page
26
+
27
+ mock_page.without_exclusions = mock_without_exclusions
28
+
20
29
  # Create mock element with expand() method
21
30
  mock_element = Mock()
22
31
  mock_element.bbox = (100, 200, 300, 400)
@@ -114,6 +123,15 @@ def test_pdf_level_element_exclusions():
114
123
  mock_page._exclusions = []
115
124
  mock_page._parent = mock_pdf
116
125
 
126
+ # Add context manager support to mock
127
+ from contextlib import contextmanager
128
+
129
+ @contextmanager
130
+ def mock_without_exclusions():
131
+ yield mock_page
132
+
133
+ mock_page.without_exclusions = mock_without_exclusions
134
+
117
135
  # Call _get_exclusion_regions
118
136
  from natural_pdf.core.page import Page
119
137
 
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env python3
2
+ """Test the exclusion recursion fix."""
3
+
4
+ import os
5
+ import sys
6
+
7
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
+
9
+ import natural_pdf as npdf
10
+
11
+
12
+ def test_exclusion_with_find():
13
+ """Test that exclusions using find() don't cause infinite recursion."""
14
+ # Create a simple test PDF
15
+ pdf = npdf.PDF("pdfs/sections.pdf")
16
+ page = pdf.pages[0]
17
+
18
+ # This should NOT cause infinite recursion anymore
19
+ # Use safer lambdas that handle None returns
20
+ page.add_exclusion(
21
+ lambda p: (
22
+ p.find("text:contains('Section')").above()
23
+ if p.find("text:contains('Section')")
24
+ else None
25
+ )
26
+ )
27
+ page.add_exclusion(lambda p: p.find("text").expand() if p.find("text") else None)
28
+
29
+ # Try to extract text - this should work without recursion
30
+ text = page.extract_text()
31
+ print(f"Successfully extracted {len(text)} characters")
32
+
33
+ # Try finding elements - this should also work
34
+ elements = page.find_all("text")
35
+ print(f"Found {len(elements)} text elements after applying exclusions")
36
+
37
+ # Test with ElementCollection return
38
+ page.add_exclusion(lambda p: p.find_all("text:contains('Header')"))
39
+ text2 = page.extract_text()
40
+ print(f"Successfully extracted {len(text2)} characters with ElementCollection exclusion")
41
+
42
+ print("✅ All tests passed - no infinite recursion!")
43
+
44
+
45
+ if __name__ == "__main__":
46
+ test_exclusion_with_find()
@@ -76,7 +76,7 @@ def test_expand_with_selectors():
76
76
  # Test expanding right until "Repeat?" (excluding)
77
77
  expanded = statute.expand(right='text:contains("Repeat?")')
78
78
  assert expanded.x0 == statute.x0
79
- assert expanded.x1 == repeat.x0 # Should stop at the left edge of "Repeat?"
79
+ assert expanded.x1 == repeat.x0 - 0.01 # Should stop just before "Repeat?" with default offset
80
80
  assert expanded.top == statute.top
81
81
  assert expanded.bottom == statute.bottom
82
82
 
@@ -99,7 +99,7 @@ def test_expand_with_selectors_not_found():
99
99
  # Test with selector that won't match anything
100
100
  expanded = element.expand(right='text:contains("NonExistentText")')
101
101
  assert expanded.x0 == element.x0
102
- assert expanded.x1 == element.x1 # Should remain unchanged
102
+ assert expanded.x1 == page.width # Should expand to page edge when selector not found
103
103
  assert expanded.top == element.top
104
104
  assert expanded.bottom == element.bottom
105
105
 
@@ -127,7 +127,7 @@ def test_expand_mixed_parameters():
127
127
  # The right edge should be at "Repeat?" if found
128
128
  repeat = page.find('text:contains("Repeat?")')
129
129
  if repeat and repeat.x0 > element.x1:
130
- assert expanded.x1 == repeat.x0
130
+ assert expanded.x1 == repeat.x0 - 0.01 # With default offset
131
131
 
132
132
 
133
133
  def test_expand_with_factors():
@@ -59,10 +59,12 @@ def test_find_with_pdf_exclusions():
59
59
 
60
60
  # Create a real page instance but with our mocked attributes
61
61
  page = Page.__new__(Page)
62
- page.index = 0
62
+ page._index = 0 # Set the internal _index attribute
63
63
  page._exclusions = []
64
64
  page._parent = mock_pdf
65
65
  page._page_obj = Mock()
66
+ page._computing_exclusions = False # Add the new flag
67
+ page._computing_exclusions = False # Add the new flag
66
68
 
67
69
  # Mock the internal methods we need
68
70
  page._get_exclusion_regions = Mock(return_value=[mock_region])
@@ -113,10 +115,11 @@ def test_find_all_with_pdf_exclusions():
113
115
 
114
116
  # Create a real page instance
115
117
  page = Page.__new__(Page)
116
- page.index = 0
118
+ page._index = 0 # Set the internal _index attribute
117
119
  page._exclusions = []
118
120
  page._parent = mock_pdf
119
121
  page._page_obj = Mock()
122
+ page._computing_exclusions = False # Add the new flag
120
123
 
121
124
  # Mock the methods
122
125
  mock_collection = Mock(spec=ElementCollection)
@@ -142,17 +145,22 @@ def test_get_elements_with_pdf_exclusions():
142
145
 
143
146
  # Create a real page instance
144
147
  page = Page.__new__(Page)
145
- page.index = 0
148
+ page._index = 0 # Set the internal _index attribute
146
149
  page._exclusions = [] # Empty page exclusions
147
150
  page._parent = mock_pdf
148
151
  page._page_obj = Mock()
152
+ page._computing_exclusions = False # Add the new flag
149
153
 
150
154
  # Mock elements
151
155
  all_elements = [Mock(), Mock(), Mock()]
152
156
  filtered_elements = [all_elements[0], all_elements[2]] # Exclude middle one
153
157
 
154
- # Mock the methods
155
- page._get_all_elements = Mock(return_value=all_elements)
158
+ # Mock the element manager
159
+ mock_element_mgr = Mock()
160
+ mock_element_mgr.get_all_elements = Mock(return_value=all_elements)
161
+ page._element_mgr = mock_element_mgr
162
+
163
+ # Mock the filter method
156
164
  page._filter_elements_by_exclusions = Mock(return_value=filtered_elements)
157
165
 
158
166
  # Test get_elements() with apply_exclusions=True
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes