natural-pdf 0.2.19__tar.gz → 0.2.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (420) hide show
  1. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/PKG-INFO +1 -1
  2. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/guides.py +26 -2
  3. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf.egg-info/PKG-INFO +1 -1
  4. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf.egg-info/SOURCES.txt +1 -0
  5. natural_pdf-0.2.20/tests/test_guides_from_headers_strings.py +76 -0
  6. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.cursor/rules/analysis_framework.mdc +0 -0
  7. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.cursor/rules/coding-style.mdc +0 -0
  8. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
  9. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.cursor/rules/minimal-comments.mdc +0 -0
  10. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.cursor/rules/natural-pdf-overview.mdc +0 -0
  11. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.cursor/rules/user-friendly-library-code.mdc +0 -0
  12. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.github/workflows/ci.yml +0 -0
  13. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.github/workflows/docs.yml +0 -0
  14. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.github/workflows/nightly-tutorials.yml +0 -0
  15. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.gitignore +0 -0
  16. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/.pre-commit-config.yaml +0 -0
  17. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/01-execute_notebooks.py +0 -0
  18. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/02-run_all_tutorials.sh +0 -0
  19. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/CHECKBOX_DETECTION.md +0 -0
  20. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/CLAUDE.md +0 -0
  21. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/LICENSE +0 -0
  22. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/MANIFEST.in +0 -0
  23. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/README.md +0 -0
  24. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/audit_packaging.py +0 -0
  25. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/check_run_md.sh +0 -0
  26. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/api/index.md +0 -0
  27. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/assets/favicon.png +0 -0
  28. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/assets/favicon.svg +0 -0
  29. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/assets/javascripts/custom.js +0 -0
  30. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/assets/logo.svg +0 -0
  31. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/assets/sample-screen.png +0 -0
  32. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/assets/social-preview.png +0 -0
  33. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/assets/social-preview.svg +0 -0
  34. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/assets/stylesheets/custom.css +0 -0
  35. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/categorizing-documents/index.md +0 -0
  36. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/data-extraction/index.md +0 -0
  37. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/describe/index.md +0 -0
  38. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/document-qa/index.md +0 -0
  39. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/element-selection/index.md +0 -0
  40. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/extracting-clean-text/index.md +0 -0
  41. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/finetuning/index.md +0 -0
  42. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/fix-messy-tables/index.md +0 -0
  43. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/fix-messy-tables/table_1.csv +0 -0
  44. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/fix-messy-tables/table_2.csv +0 -0
  45. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/fix-messy-tables/table_3.csv +0 -0
  46. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/guide_adjustment_stream.md +0 -0
  47. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/guides_boundary_columns.md +0 -0
  48. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/index.md +0 -0
  49. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/installation/index.md +0 -0
  50. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/interactive-widget/index.md +0 -0
  51. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/layout-analysis/index.md +0 -0
  52. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/loops-and-groups/index.md +0 -0
  53. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/ocr/index.md +0 -0
  54. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/pdf-navigation/index.md +0 -0
  55. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
  56. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/process-forms-and-invoices/index.md +0 -0
  57. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/quick-reference/index.md +0 -0
  58. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/reflowing-pages/index.md +0 -0
  59. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/regions/index.md +0 -0
  60. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tables/index.md +0 -0
  61. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/text-analysis/index.md +0 -0
  62. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/01-loading-and-extraction.md +0 -0
  63. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/02-finding-elements.md +0 -0
  64. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/03-extracting-blocks.md +0 -0
  65. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/04-table-extraction.md +0 -0
  66. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/05-excluding-content.md +0 -0
  67. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/06-document-qa.md +0 -0
  68. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/07-layout-analysis.md +0 -0
  69. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/07-working-with-regions.md +0 -0
  70. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/08-spatial-navigation.md +0 -0
  71. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/09-section-extraction.md +0 -0
  72. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/10-form-field-extraction.md +0 -0
  73. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  74. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/12-ocr-integration.md +0 -0
  75. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/13-semantic-search.md +0 -0
  76. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/tutorials/14-categorizing-documents.md +0 -0
  77. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/visual-debugging/index.md +0 -0
  78. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/docs/visual-debugging/region.png +0 -0
  79. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/example_checkbox_usage.py +0 -0
  80. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/mkdocs.yml +0 -0
  81. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/__init__.py +0 -0
  82. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/__init__.py +0 -0
  83. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/checkbox/__init__.py +0 -0
  84. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/checkbox/base.py +0 -0
  85. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/checkbox/checkbox_analyzer.py +0 -0
  86. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/checkbox/checkbox_manager.py +0 -0
  87. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/checkbox/checkbox_options.py +0 -0
  88. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/checkbox/mixin.py +0 -0
  89. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/checkbox/rtdetr.py +0 -0
  90. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/__init__.py +0 -0
  91. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/base.py +0 -0
  92. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/docling.py +0 -0
  93. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/gemini.py +0 -0
  94. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  95. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  96. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  97. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/paddle.py +0 -0
  98. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
  99. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/surya.py +0 -0
  100. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
  101. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/tatr.py +0 -0
  102. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/layout/yolo.py +0 -0
  103. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/shape_detection_mixin.py +0 -0
  104. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/text_options.py +0 -0
  105. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/text_structure.py +0 -0
  106. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/analyzers/utils.py +0 -0
  107. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/classification/manager.py +0 -0
  108. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/classification/mixin.py +0 -0
  109. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/classification/results.py +0 -0
  110. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/cli.py +0 -0
  111. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/collections/mixins.py +0 -0
  112. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/__init__.py +0 -0
  113. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/element_manager.py +0 -0
  114. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/highlighting_service.py +0 -0
  115. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/page.py +0 -0
  116. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/page_collection.py +0 -0
  117. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/page_groupby.py +0 -0
  118. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/pdf.py +0 -0
  119. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/pdf_collection.py +0 -0
  120. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/core/render_spec.py +0 -0
  121. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/describe/__init__.py +0 -0
  122. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/describe/base.py +0 -0
  123. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/describe/elements.py +0 -0
  124. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/describe/mixin.py +0 -0
  125. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/describe/summary.py +0 -0
  126. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/elements/__init__.py +0 -0
  127. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/elements/base.py +0 -0
  128. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/elements/element_collection.py +0 -0
  129. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/elements/image.py +0 -0
  130. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/elements/line.py +0 -0
  131. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/elements/rect.py +0 -0
  132. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/elements/region.py +0 -0
  133. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/elements/text.py +0 -0
  134. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/export/mixin.py +0 -0
  135. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/__init__.py +0 -0
  136. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/base.py +0 -0
  137. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/data/__init__.py +0 -0
  138. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/data/pdf.ttf +0 -0
  139. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/data/sRGB.icc +0 -0
  140. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/hocr.py +0 -0
  141. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/hocr_font.py +0 -0
  142. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/original_pdf.py +0 -0
  143. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/paddleocr.py +0 -0
  144. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/exporters/searchable_pdf.py +0 -0
  145. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/extraction/manager.py +0 -0
  146. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/extraction/mixin.py +0 -0
  147. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/extraction/result.py +0 -0
  148. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/flows/__init__.py +0 -0
  149. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/flows/collections.py +0 -0
  150. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/flows/element.py +0 -0
  151. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/flows/flow.py +0 -0
  152. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/flows/region.py +0 -0
  153. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/judge.py +0 -0
  154. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/__init__.py +0 -0
  155. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/engine.py +0 -0
  156. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/engine_doctr.py +0 -0
  157. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/engine_easyocr.py +0 -0
  158. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/engine_paddle.py +0 -0
  159. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/engine_surya.py +0 -0
  160. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/ocr_factory.py +0 -0
  161. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/ocr_manager.py +0 -0
  162. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/ocr_options.py +0 -0
  163. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/ocr/utils.py +0 -0
  164. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/qa/__init__.py +0 -0
  165. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/qa/document_qa.py +0 -0
  166. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/qa/qa_result.py +0 -0
  167. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/search/__init__.py +0 -0
  168. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/search/lancedb_search_service.py +0 -0
  169. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/search/numpy_search_service.py +0 -0
  170. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/search/search_options.py +0 -0
  171. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/search/search_service_protocol.py +0 -0
  172. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/search/searchable_mixin.py +0 -0
  173. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/selectors/__init__.py +0 -0
  174. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/selectors/parser.py +0 -0
  175. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/tables/__init__.py +0 -0
  176. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/tables/result.py +0 -0
  177. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/templates/__init__.py +0 -0
  178. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
  179. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/templates/spa/css/style.css +0 -0
  180. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/templates/spa/index.html +0 -0
  181. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/templates/spa/js/app.js +0 -0
  182. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/templates/spa/words.txt +0 -0
  183. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/text_mixin.py +0 -0
  184. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/__init__.py +0 -0
  185. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/bidi_mirror.py +0 -0
  186. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/color_utils.py +0 -0
  187. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/debug.py +0 -0
  188. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/highlighting.py +0 -0
  189. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/identifiers.py +0 -0
  190. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/layout.py +0 -0
  191. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/locks.py +0 -0
  192. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/packaging.py +0 -0
  193. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/pdfminer_patches.py +0 -0
  194. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/reading_order.py +0 -0
  195. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/sections.py +0 -0
  196. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/spatial.py +0 -0
  197. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/text_extraction.py +0 -0
  198. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/utils/visualization.py +0 -0
  199. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/vision/__init__.py +0 -0
  200. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/vision/mixin.py +0 -0
  201. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/vision/results.py +0 -0
  202. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/vision/similarity.py +0 -0
  203. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/vision/template_matching.py +0 -0
  204. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/widgets/__init__.py +0 -0
  205. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf/widgets/viewer.py +0 -0
  206. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf.egg-info/dependency_links.txt +0 -0
  207. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf.egg-info/entry_points.txt +0 -0
  208. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf.egg-info/requires.txt +0 -0
  209. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/natural_pdf.egg-info/top_level.txt +0 -0
  210. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/noxfile.py +0 -0
  211. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/memory_comparison.py +0 -0
  212. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/pdf_analyzer.py +0 -0
  213. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/performance_analysis.py +0 -0
  214. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/performance_results/image_heavy_snapshots.csv +0 -0
  215. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/performance_results/image_heavy_snapshots.json +0 -0
  216. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/performance_results/text_heavy_snapshots.csv +0 -0
  217. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/performance_results/text_heavy_snapshots.json +0 -0
  218. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/test_cleanup_methods.py +0 -0
  219. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/optimization/test_memory_fix.py +0 -0
  220. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/publish.sh +0 -0
  221. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/pyproject.toml +0 -0
  222. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/sample-screen.png +0 -0
  223. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/setup.cfg +0 -0
  224. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/check_model.py +0 -0
  225. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/check_pdf_content.py +0 -0
  226. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/checkbox_checks.py +0 -0
  227. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/checkbox_simple.py +0 -0
  228. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/checkbox_ux_ideas.py +0 -0
  229. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/context_manager_prototype.py +0 -0
  230. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/convert_to_hf.py +0 -0
  231. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/demo_text_closest.py +0 -0
  232. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/fix_page_exclusions.py +0 -0
  233. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/inspect_model.py +0 -0
  234. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/rtdetr_dinov2_test.py +0 -0
  235. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_closest_debug.py +0 -0
  236. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_closest_debug2.py +0 -0
  237. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_context_exploration.py +0 -0
  238. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_draw_guides.py +0 -0
  239. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_draw_guides_interactive.py +0 -0
  240. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_durham.py +0 -0
  241. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_empty_string.py +0 -0
  242. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_exclusion_with_debug.py +0 -0
  243. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_find_exclusions_fix.py +0 -0
  244. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_find_exclusions_fix_no_recursion.py +0 -0
  245. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_fix_real_pdf.py +0 -0
  246. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_fix_working.py +0 -0
  247. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_fixed_pdf_exclusions.py +0 -0
  248. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_guide_draw_notebook.py +0 -0
  249. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_horizontal_top_bottom.py +0 -0
  250. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_inline_js.py +0 -0
  251. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_marker_order.py +0 -0
  252. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_original_exclusions_now_work.py +0 -0
  253. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_pdf_exclusions_with_guides.py +0 -0
  254. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_region_exclusions_detailed.py +0 -0
  255. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_similarity.py +0 -0
  256. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_stripes_real_pdf.py +0 -0
  257. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_vertical_stripes.py +0 -0
  258. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_widget_functionality.py +0 -0
  259. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/temp/test_widget_simple.py +0 -0
  260. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/conftest.py +0 -0
  261. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/demo_multipage.py +0 -0
  262. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/exporters/test_paddleocr_exporter.py +0 -0
  263. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_aggregate_selectors.py +0 -0
  264. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_annotate.py +0 -0
  265. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_arabic_performance.py +0 -0
  266. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_arabic_real_world.py +0 -0
  267. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_auto_multipage_option.py +0 -0
  268. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_closest_substring_sorting.py +0 -0
  269. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_closest_until.py +0 -0
  270. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_closest_until_comparison.py +0 -0
  271. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_closest_until_debug.py +0 -0
  272. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_closest_until_fix.py +0 -0
  273. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_closest_until_ordering.py +0 -0
  274. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_color_conversion.py +0 -0
  275. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_color_hex_display.py +0 -0
  276. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_core/test_containment_geometry.py +0 -0
  277. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_core/test_elements.py +0 -0
  278. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_core/test_loading.py +0 -0
  279. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_core/test_spatial.py +0 -0
  280. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_core/test_text_extraction.py +0 -0
  281. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_core/test_text_layer.py +0 -0
  282. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_crop_enhancements.py +0 -0
  283. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_crop_region_highlights.py +0 -0
  284. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_directional_defaults.py +0 -0
  285. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_dissolve.py +0 -0
  286. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_dissolve_cross_page_bug.py +0 -0
  287. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_dissolve_debug_issue.py +0 -0
  288. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_dissolve_real_world_issue.py +0 -0
  289. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_dissolve_single_elements.py +0 -0
  290. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_dissolve_vertical_offset_issue.py +0 -0
  291. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_document_qa.py +0 -0
  292. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_element_addition.py +0 -0
  293. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_element_collection_guides.py +0 -0
  294. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_element_collection_show_cols.py +0 -0
  295. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_element_collection_slicing.py +0 -0
  296. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_element_exclusions.py +0 -0
  297. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_element_show_crop_highlights.py +0 -0
  298. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_empty_pseudo_class.py +0 -0
  299. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_exclude_multi_page.py +0 -0
  300. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_exclude_real_pdf.py +0 -0
  301. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_exclusion_recursion_fix.py +0 -0
  302. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_exclusions.py +0 -0
  303. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_expand.py +0 -0
  304. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_expand_enhanced.py +0 -0
  305. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_extract_text_words.py +0 -0
  306. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_extraction_error.py +0 -0
  307. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_extraction_mixin_fix.py +0 -0
  308. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_extraction_text_and_vision.py +0 -0
  309. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_extraction_working.py +0 -0
  310. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_find_similar.py +0 -0
  311. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_first_last_selectors.py +0 -0
  312. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_fix_get_sections_zero_height.py +0 -0
  313. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_flow_region_directional.py +0 -0
  314. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_from_parameter.py +0 -0
  315. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_from_parameter_example.py +0 -0
  316. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_from_self_exclusion.py +0 -0
  317. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_from_simple.py +0 -0
  318. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_get_sections_fix_comprehensive.py +0 -0
  319. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_get_sections_zero_height.py +0 -0
  320. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_groupby.py +0 -0
  321. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guide_adjustment_stream.py +0 -0
  322. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides.py +0 -0
  323. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_apply_exclusions.py +0 -0
  324. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_apply_exclusions_simple.py +0 -0
  325. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_boundaries.py +0 -0
  326. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_extract_table.py +0 -0
  327. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_extract_table_collections.py +0 -0
  328. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_extract_table_exclusions.py +0 -0
  329. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_extract_table_real.py +0 -0
  330. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_from_headers.py +0 -0
  331. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_from_stripes.py +0 -0
  332. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_integration.py +0 -0
  333. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_marker_sorting.py +0 -0
  334. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_guides_partial.py +0 -0
  335. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_highlight_color_falsy.py +0 -0
  336. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_highlight_detection.py +0 -0
  337. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_highlight_detection_comprehensive.py +0 -0
  338. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_highlight_offset.py +0 -0
  339. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_highlight_protocol.py +0 -0
  340. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_highlight_protocol_simple.py +0 -0
  341. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_highlight_regions.py +0 -0
  342. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_horizontal_guides_alignment.py +0 -0
  343. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_comprehensive.py +0 -0
  344. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_final.py +0 -0
  345. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_final_verification.py +0 -0
  346. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_fix.py +0 -0
  347. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_mock.py +0 -0
  348. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_simple.py +0 -0
  349. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_types_pdf.py +0 -0
  350. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_verification.py +0 -0
  351. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_include_boundaries_with_real_text.py +0 -0
  352. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_loading_original.py +0 -0
  353. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_match_results_sorting.py +0 -0
  354. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_merge_connected.py +0 -0
  355. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_merge_connected_real_world.py +0 -0
  356. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_merge_method.py +0 -0
  357. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_merged_flowregion_specs.py +0 -0
  358. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_mixed_collection_rendering.py +0 -0
  359. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_multi_page_table_discovery.py +0 -0
  360. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_multipage_directional.py +0 -0
  361. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_negative_bounds_pdf.py +0 -0
  362. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_optional_deps.py +0 -0
  363. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_page_exclusion_lists.py +0 -0
  364. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_pdf_add_exclusion_elementcollection.py +0 -0
  365. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_pdf_exclusions_in_find_methods.py +0 -0
  366. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_pdfminer_bug_status.py +0 -0
  367. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_pdfminer_color_bug.py +0 -0
  368. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_pdfminer_color_stack_bug.py +0 -0
  369. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_phash_masking.py +0 -0
  370. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_region_find_similar.py +0 -0
  371. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_region_show_crop_highlights.py +0 -0
  372. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_region_viewer.py +0 -0
  373. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_sections_end_only.py +0 -0
  374. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_sections_with_start_and_end.py +0 -0
  375. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_show_column_layout.py +0 -0
  376. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_show_edge_cases.py +0 -0
  377. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_show_exclusions.py +0 -0
  378. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_show_exclusions_feature.py +0 -0
  379. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_show_limit.py +0 -0
  380. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_skip_repeating_headers_multipage.py +0 -0
  381. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_slice_cache_reuse.py +0 -0
  382. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_slice_exclusion_fix.py +0 -0
  383. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_slice_exclusion_issue.py +0 -0
  384. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_slice_exclusion_mock.py +0 -0
  385. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_sliced_collection_exclusions.py +0 -0
  386. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_smart_exclusion.py +0 -0
  387. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_spatial_offset.py +0 -0
  388. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_strikethrough_detection.py +0 -0
  389. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_table_result_header_mismatch.py +0 -0
  390. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_table_result_keep_blank.py +0 -0
  391. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_template_matching.py +0 -0
  392. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_template_white_masking.py +0 -0
  393. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_text_closest_selector.py +0 -0
  394. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_tiny_text_tables.py +0 -0
  395. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_tiny_text_tables_table.py +0 -0
  396. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_tutorials.py +0 -0
  397. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_underline_detection.py +0 -0
  398. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_update_text.py +0 -0
  399. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_within_constraint.py +0 -0
  400. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_words_vs_find_all_text.py +0 -0
  401. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tests/test_words_vs_find_all_text_summary.md +0 -0
  402. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/todo/bad_pdf_analysis.md +0 -0
  403. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/todo/evaluation.md +0 -0
  404. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/IMPROVEMENTS_SUMMARY.md +0 -0
  405. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/LLM_NaturalPDF_CheatSheet.md +0 -0
  406. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/LLM_NaturalPDF_Workflows.md +0 -0
  407. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/README.md +0 -0
  408. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/__init__.py +0 -0
  409. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/analyser.py +0 -0
  410. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/collate_summaries.py +0 -0
  411. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/compile_attempts_markdown.py +0 -0
  412. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/eval_suite.py +0 -0
  413. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/evaluate_quality.py +0 -0
  414. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/export_enrichment_csv.py +0 -0
  415. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/extraction_decision_tree.md +0 -0
  416. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/llm_enrich.py +0 -0
  417. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/llm_enrich_with_retry.py +0 -0
  418. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/reporter.py +0 -0
  419. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/tools/bad_pdf_eval/utils.py +0 -0
  420. {natural_pdf-0.2.19 → natural_pdf-0.2.20}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.19
3
+ Version: 0.2.20
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -943,7 +943,7 @@ class GuidesList(UserList):
943
943
 
944
944
  def from_headers(
945
945
  self,
946
- headers: Union["ElementCollection", List["Element"]],
946
+ headers: Union["ElementCollection", List["Element"], List[str]],
947
947
  obj: Optional[Union["Page", "Region"]] = None,
948
948
  method: Literal["min_crossings", "seam_carving"] = "min_crossings",
949
949
  min_width: Optional[float] = None,
@@ -960,7 +960,10 @@ class GuidesList(UserList):
960
960
  between headers that minimize text crossings, regardless of text alignment.
961
961
 
962
962
  Args:
963
- headers: Column header elements (ElementCollection or list of Elements)
963
+ headers: Column header elements. Can be:
964
+ - ElementCollection: collection of header elements
965
+ - List[Element]: list of header elements
966
+ - List[str]: list of header text to search for
964
967
  obj: Page/Region to analyze (uses parent's context if None)
965
968
  method: Detection method:
966
969
  - 'min_crossings': Fast vector-based minimum intersection count
@@ -980,6 +983,9 @@ class GuidesList(UserList):
980
983
  headers = page.find_all('text[size=16]')
981
984
  guides.vertical.from_headers(headers)
982
985
 
986
+ # From header text strings
987
+ guides.vertical.from_headers(["Statute", "Description", "Level", "Repeat"])
988
+
983
989
  # With width constraints
984
990
  guides.vertical.from_headers(headers, min_width=50, max_width=200)
985
991
 
@@ -997,6 +1003,24 @@ class GuidesList(UserList):
997
1003
  # Convert headers to list if ElementCollection
998
1004
  if hasattr(headers, "elements"):
999
1005
  header_elements = list(headers.elements)
1006
+ # Check if headers is a list of strings
1007
+ elif isinstance(headers, list) and headers and isinstance(headers[0], str):
1008
+ # Find elements for each header text with exact matching
1009
+ header_elements = []
1010
+ for header_text in headers:
1011
+ # Find all text elements and filter for exact match
1012
+ all_text = target_obj.find_all("text")
1013
+ exact_matches = [elem for elem in all_text if elem.extract_text() == header_text]
1014
+
1015
+ if exact_matches:
1016
+ # Use the first exact match
1017
+ header_elements.append(exact_matches[0])
1018
+ else:
1019
+ logger.warning(f"Could not find header text: {header_text}")
1020
+
1021
+ if not header_elements:
1022
+ logger.warning("No header elements found from provided text strings")
1023
+ return self._parent
1000
1024
  else:
1001
1025
  header_elements = list(headers)
1002
1026
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.19
3
+ Version: 0.2.20
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -319,6 +319,7 @@ tests/test_guides_extract_table_collections.py
319
319
  tests/test_guides_extract_table_exclusions.py
320
320
  tests/test_guides_extract_table_real.py
321
321
  tests/test_guides_from_headers.py
322
+ tests/test_guides_from_headers_strings.py
322
323
  tests/test_guides_from_stripes.py
323
324
  tests/test_guides_integration.py
324
325
  tests/test_guides_marker_sorting.py
@@ -0,0 +1,76 @@
1
+ """Test from_headers() with string headers."""
2
+
3
+ import pytest
4
+
5
+ from natural_pdf import PDF
6
+ from natural_pdf.analyzers.guides import Guides
7
+
8
+
9
+ def test_from_headers_with_strings(practice_pdf):
10
+ """Test from_headers with list of strings."""
11
+ page = practice_pdf[0]
12
+
13
+ # Get some text to use as test headers
14
+ all_text = page.find_all("text")[:5]
15
+ if len(all_text) >= 3:
16
+ # Extract the actual text content
17
+ header_texts = [elem.extract_text() for elem in all_text[:3]]
18
+
19
+ # Create guides using string headers
20
+ guides = Guides(page)
21
+ guides.vertical.from_headers(header_texts)
22
+
23
+ # Should work similar to using elements directly
24
+ guides_from_elements = Guides(page)
25
+ guides_from_elements.vertical.from_headers(all_text[:3])
26
+
27
+ # Both approaches should produce similar results
28
+ # (may not be identical due to search differences)
29
+ assert len(guides.vertical) >= 0
30
+ assert len(guides_from_elements.vertical) >= 0
31
+
32
+
33
+ def test_from_headers_with_missing_strings(practice_pdf):
34
+ """Test from_headers when some strings are not found."""
35
+ page = practice_pdf[0]
36
+
37
+ # Use mix of real and fake headers
38
+ all_text = page.find_all("text")[:2]
39
+ if len(all_text) >= 2:
40
+ real_texts = [elem.extract_text() for elem in all_text]
41
+ fake_texts = ["NONEXISTENT_HEADER_1", "NONEXISTENT_HEADER_2"]
42
+ mixed_texts = real_texts + fake_texts
43
+
44
+ # Should still work with found headers
45
+ guides = Guides(page)
46
+ result = guides.vertical.from_headers(mixed_texts)
47
+
48
+ # Should return the parent for chaining
49
+ assert result is guides
50
+
51
+
52
+ def test_from_headers_all_strings_missing(practice_pdf):
53
+ """Test from_headers when no strings are found."""
54
+ page = practice_pdf[0]
55
+
56
+ # All fake headers
57
+ fake_headers = ["FAKE_HEADER_1", "FAKE_HEADER_2", "FAKE_HEADER_3"]
58
+
59
+ guides = Guides(page)
60
+ result = guides.vertical.from_headers(fake_headers)
61
+
62
+ # Should return parent but no guides created
63
+ assert result is guides
64
+ assert len(guides.vertical) == 0
65
+
66
+
67
+ def test_from_headers_empty_string_list(practice_pdf):
68
+ """Test from_headers with empty string list."""
69
+ page = practice_pdf[0]
70
+
71
+ guides = Guides(page)
72
+ result = guides.vertical.from_headers([])
73
+
74
+ # Should handle gracefully
75
+ assert result is guides
76
+ assert len(guides.vertical) == 0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes