natural-pdf 0.2.11__tar.gz → 0.2.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. {natural_pdf-0.2.11/natural_pdf.egg-info → natural_pdf-0.2.13}/PKG-INFO +1 -1
  2. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/guides.py +196 -43
  3. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/highlighting_service.py +40 -10
  4. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/page.py +56 -8
  5. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/elements/base.py +15 -1
  6. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/elements/region.py +37 -5
  7. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/vision/__init__.py +1 -2
  8. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/vision/mixin.py +67 -27
  9. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/vision/results.py +49 -5
  10. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/vision/similarity.py +195 -23
  11. natural_pdf-0.2.13/natural_pdf/vision/template_matching.py +209 -0
  12. {natural_pdf-0.2.11 → natural_pdf-0.2.13/natural_pdf.egg-info}/PKG-INFO +1 -1
  13. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf.egg-info/SOURCES.txt +35 -0
  14. natural_pdf-0.2.13/temp/fix_page_exclusions.py +42 -0
  15. natural_pdf-0.2.13/temp/test_draw_guides.py +25 -0
  16. natural_pdf-0.2.13/temp/test_draw_guides_interactive.py +30 -0
  17. natural_pdf-0.2.13/temp/test_exclusion_with_debug.py +30 -0
  18. natural_pdf-0.2.13/temp/test_find_exclusions_fix.py +53 -0
  19. natural_pdf-0.2.13/temp/test_find_exclusions_fix_no_recursion.py +97 -0
  20. natural_pdf-0.2.13/temp/test_fix_real_pdf.py +48 -0
  21. natural_pdf-0.2.13/temp/test_fix_working.py +55 -0
  22. natural_pdf-0.2.13/temp/test_fixed_pdf_exclusions.py +67 -0
  23. natural_pdf-0.2.13/temp/test_guide_draw_notebook.py +47 -0
  24. natural_pdf-0.2.13/temp/test_horizontal_top_bottom.py +53 -0
  25. natural_pdf-0.2.13/temp/test_inline_js.py +22 -0
  26. natural_pdf-0.2.13/temp/test_marker_order.py +45 -0
  27. natural_pdf-0.2.13/temp/test_original_exclusions_now_work.py +56 -0
  28. natural_pdf-0.2.13/temp/test_pdf_exclusions_with_guides.py +84 -0
  29. natural_pdf-0.2.13/temp/test_region_exclusions_detailed.py +25 -0
  30. natural_pdf-0.2.13/temp/test_stripes_real_pdf.py +62 -0
  31. natural_pdf-0.2.13/temp/test_vertical_stripes.py +55 -0
  32. natural_pdf-0.2.13/temp/test_widget_functionality.py +68 -0
  33. natural_pdf-0.2.13/temp/test_widget_simple.py +41 -0
  34. natural_pdf-0.2.13/tests/test_element_collection_guides.py +140 -0
  35. natural_pdf-0.2.13/tests/test_element_exclusions.py +130 -0
  36. natural_pdf-0.2.13/tests/test_guides_from_stripes.py +224 -0
  37. natural_pdf-0.2.13/tests/test_guides_marker_sorting.py +177 -0
  38. natural_pdf-0.2.13/tests/test_highlight_offset.py +102 -0
  39. natural_pdf-0.2.13/tests/test_horizontal_guides_alignment.py +203 -0
  40. natural_pdf-0.2.13/tests/test_match_results_sorting.py +192 -0
  41. natural_pdf-0.2.13/tests/test_negative_bounds_pdf.py +53 -0
  42. natural_pdf-0.2.13/tests/test_pdf_exclusions_in_find_methods.py +169 -0
  43. natural_pdf-0.2.13/tests/test_phash_masking.py +175 -0
  44. natural_pdf-0.2.13/tests/test_region_find_similar.py +145 -0
  45. natural_pdf-0.2.13/tests/test_spatial_offset.py +89 -0
  46. natural_pdf-0.2.13/tests/test_template_matching.py +226 -0
  47. natural_pdf-0.2.13/tests/test_template_white_masking.py +178 -0
  48. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.cursor/rules/analysis_framework.mdc +0 -0
  49. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.cursor/rules/coding-style.mdc +0 -0
  50. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
  51. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.cursor/rules/minimal-comments.mdc +0 -0
  52. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.cursor/rules/natural-pdf-overview.mdc +0 -0
  53. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.cursor/rules/user-friendly-library-code.mdc +0 -0
  54. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.github/workflows/ci.yml +0 -0
  55. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.github/workflows/docs.yml +0 -0
  56. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.github/workflows/nightly-tutorials.yml +0 -0
  57. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.gitignore +0 -0
  58. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/.pre-commit-config.yaml +0 -0
  59. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/01-execute_notebooks.py +0 -0
  60. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/02-run_all_tutorials.sh +0 -0
  61. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/CLAUDE.md +0 -0
  62. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/LICENSE +0 -0
  63. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/MANIFEST.in +0 -0
  64. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/README.md +0 -0
  65. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/audit_packaging.py +0 -0
  66. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/check_run_md.sh +0 -0
  67. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/api/index.md +0 -0
  68. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/assets/favicon.png +0 -0
  69. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/assets/favicon.svg +0 -0
  70. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/assets/javascripts/custom.js +0 -0
  71. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/assets/logo.svg +0 -0
  72. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/assets/sample-screen.png +0 -0
  73. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/assets/social-preview.png +0 -0
  74. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/assets/social-preview.svg +0 -0
  75. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/assets/stylesheets/custom.css +0 -0
  76. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/categorizing-documents/index.md +0 -0
  77. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/data-extraction/index.md +0 -0
  78. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/describe/index.md +0 -0
  79. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/document-qa/index.md +0 -0
  80. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/element-selection/index.md +0 -0
  81. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/extracting-clean-text/index.md +0 -0
  82. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/finetuning/index.md +0 -0
  83. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/fix-messy-tables/index.md +0 -0
  84. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/fix-messy-tables/table_1.csv +0 -0
  85. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/fix-messy-tables/table_2.csv +0 -0
  86. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/fix-messy-tables/table_3.csv +0 -0
  87. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/index.md +0 -0
  88. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/installation/index.md +0 -0
  89. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/interactive-widget/index.md +0 -0
  90. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/layout-analysis/index.md +0 -0
  91. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/loops-and-groups/index.md +0 -0
  92. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/ocr/index.md +0 -0
  93. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/pdf-navigation/index.md +0 -0
  94. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
  95. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/process-forms-and-invoices/index.md +0 -0
  96. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/quick-reference/index.md +0 -0
  97. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/reflowing-pages/index.md +0 -0
  98. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/regions/index.md +0 -0
  99. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tables/index.md +0 -0
  100. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/text-analysis/index.md +0 -0
  101. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/01-loading-and-extraction.md +0 -0
  102. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/02-finding-elements.md +0 -0
  103. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/03-extracting-blocks.md +0 -0
  104. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/04-table-extraction.md +0 -0
  105. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/05-excluding-content.md +0 -0
  106. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/06-document-qa.md +0 -0
  107. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/07-layout-analysis.md +0 -0
  108. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/07-working-with-regions.md +0 -0
  109. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/08-spatial-navigation.md +0 -0
  110. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/09-section-extraction.md +0 -0
  111. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/10-form-field-extraction.md +0 -0
  112. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  113. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/12-ocr-integration.md +0 -0
  114. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/13-semantic-search.md +0 -0
  115. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/tutorials/14-categorizing-documents.md +0 -0
  116. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/visual-debugging/index.md +0 -0
  117. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/docs/visual-debugging/region.png +0 -0
  118. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/mkdocs.yml +0 -0
  119. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/__init__.py +0 -0
  120. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/__init__.py +0 -0
  121. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/__init__.py +0 -0
  122. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/base.py +0 -0
  123. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/docling.py +0 -0
  124. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/gemini.py +0 -0
  125. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  126. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  127. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  128. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/paddle.py +0 -0
  129. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
  130. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/surya.py +0 -0
  131. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
  132. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/tatr.py +0 -0
  133. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/layout/yolo.py +0 -0
  134. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/shape_detection_mixin.py +0 -0
  135. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/text_options.py +0 -0
  136. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/text_structure.py +0 -0
  137. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/analyzers/utils.py +0 -0
  138. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/classification/manager.py +0 -0
  139. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/classification/mixin.py +0 -0
  140. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/classification/results.py +0 -0
  141. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/cli.py +0 -0
  142. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/collections/mixins.py +0 -0
  143. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/__init__.py +0 -0
  144. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/element_manager.py +0 -0
  145. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/page_collection.py +0 -0
  146. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/page_groupby.py +0 -0
  147. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/pdf.py +0 -0
  148. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/pdf_collection.py +0 -0
  149. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/core/render_spec.py +0 -0
  150. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/describe/__init__.py +0 -0
  151. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/describe/base.py +0 -0
  152. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/describe/elements.py +0 -0
  153. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/describe/mixin.py +0 -0
  154. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/describe/summary.py +0 -0
  155. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/elements/__init__.py +0 -0
  156. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/elements/element_collection.py +0 -0
  157. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/elements/image.py +0 -0
  158. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/elements/line.py +0 -0
  159. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/elements/rect.py +0 -0
  160. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/elements/text.py +0 -0
  161. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/export/mixin.py +0 -0
  162. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/__init__.py +0 -0
  163. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/base.py +0 -0
  164. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/data/__init__.py +0 -0
  165. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/data/pdf.ttf +0 -0
  166. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/data/sRGB.icc +0 -0
  167. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/hocr.py +0 -0
  168. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/hocr_font.py +0 -0
  169. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/original_pdf.py +0 -0
  170. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/paddleocr.py +0 -0
  171. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/exporters/searchable_pdf.py +0 -0
  172. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/extraction/manager.py +0 -0
  173. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/extraction/mixin.py +0 -0
  174. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/extraction/result.py +0 -0
  175. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/flows/__init__.py +0 -0
  176. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/flows/collections.py +0 -0
  177. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/flows/element.py +0 -0
  178. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/flows/flow.py +0 -0
  179. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/flows/region.py +0 -0
  180. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/__init__.py +0 -0
  181. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/engine.py +0 -0
  182. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/engine_doctr.py +0 -0
  183. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/engine_easyocr.py +0 -0
  184. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/engine_paddle.py +0 -0
  185. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/engine_surya.py +0 -0
  186. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/ocr_factory.py +0 -0
  187. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/ocr_manager.py +0 -0
  188. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/ocr_options.py +0 -0
  189. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/ocr/utils.py +0 -0
  190. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/qa/__init__.py +0 -0
  191. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/qa/document_qa.py +0 -0
  192. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/qa/qa_result.py +0 -0
  193. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/search/__init__.py +0 -0
  194. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/search/lancedb_search_service.py +0 -0
  195. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/search/numpy_search_service.py +0 -0
  196. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/search/search_options.py +0 -0
  197. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/search/search_service_protocol.py +0 -0
  198. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/search/searchable_mixin.py +0 -0
  199. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/selectors/__init__.py +0 -0
  200. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/selectors/parser.py +0 -0
  201. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/tables/__init__.py +0 -0
  202. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/tables/result.py +0 -0
  203. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/templates/__init__.py +0 -0
  204. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
  205. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/templates/spa/css/style.css +0 -0
  206. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/templates/spa/index.html +0 -0
  207. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/templates/spa/js/app.js +0 -0
  208. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/templates/spa/words.txt +0 -0
  209. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/text_mixin.py +0 -0
  210. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/__init__.py +0 -0
  211. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/bidi_mirror.py +0 -0
  212. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/color_utils.py +0 -0
  213. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/debug.py +0 -0
  214. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/highlighting.py +0 -0
  215. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/identifiers.py +0 -0
  216. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/layout.py +0 -0
  217. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/locks.py +0 -0
  218. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/packaging.py +0 -0
  219. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/reading_order.py +0 -0
  220. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/text_extraction.py +0 -0
  221. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/utils/visualization.py +0 -0
  222. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/widgets/__init__.py +0 -0
  223. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf/widgets/viewer.py +0 -0
  224. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf.egg-info/dependency_links.txt +0 -0
  225. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf.egg-info/entry_points.txt +0 -0
  226. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf.egg-info/requires.txt +0 -0
  227. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/natural_pdf.egg-info/top_level.txt +0 -0
  228. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/noxfile.py +0 -0
  229. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/memory_comparison.py +0 -0
  230. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/pdf_analyzer.py +0 -0
  231. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/performance_analysis.py +0 -0
  232. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/performance_results/image_heavy_snapshots.csv +0 -0
  233. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/performance_results/image_heavy_snapshots.json +0 -0
  234. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/performance_results/text_heavy_snapshots.csv +0 -0
  235. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/performance_results/text_heavy_snapshots.json +0 -0
  236. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/test_cleanup_methods.py +0 -0
  237. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/optimization/test_memory_fix.py +0 -0
  238. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/publish.sh +0 -0
  239. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/pyproject.toml +0 -0
  240. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/sample-screen.png +0 -0
  241. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/setup.cfg +0 -0
  242. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/conftest.py +0 -0
  243. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/exporters/test_paddleocr_exporter.py +0 -0
  244. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_annotate.py +0 -0
  245. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_arabic_performance.py +0 -0
  246. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_arabic_real_world.py +0 -0
  247. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_color_conversion.py +0 -0
  248. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_color_hex_display.py +0 -0
  249. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_core/test_containment_geometry.py +0 -0
  250. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_core/test_elements.py +0 -0
  251. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_core/test_loading.py +0 -0
  252. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_core/test_spatial.py +0 -0
  253. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_core/test_text_extraction.py +0 -0
  254. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_core/test_text_layer.py +0 -0
  255. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_crop_enhancements.py +0 -0
  256. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_crop_region_highlights.py +0 -0
  257. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_directional_defaults.py +0 -0
  258. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_dissolve.py +0 -0
  259. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_dissolve_cross_page_bug.py +0 -0
  260. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_dissolve_debug_issue.py +0 -0
  261. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_dissolve_real_world_issue.py +0 -0
  262. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_dissolve_single_elements.py +0 -0
  263. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_dissolve_vertical_offset_issue.py +0 -0
  264. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_document_qa.py +0 -0
  265. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_element_addition.py +0 -0
  266. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_element_collection_show_cols.py +0 -0
  267. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_element_collection_slicing.py +0 -0
  268. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_element_show_crop_highlights.py +0 -0
  269. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_empty_pseudo_class.py +0 -0
  270. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_exclusions.py +0 -0
  271. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_expand.py +0 -0
  272. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_extraction_error.py +0 -0
  273. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_extraction_mixin_fix.py +0 -0
  274. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_extraction_text_and_vision.py +0 -0
  275. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_extraction_working.py +0 -0
  276. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_find_similar.py +0 -0
  277. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_first_last_selectors.py +0 -0
  278. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_fix_get_sections_zero_height.py +0 -0
  279. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_flow_region_directional.py +0 -0
  280. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_get_sections_fix_comprehensive.py +0 -0
  281. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_get_sections_zero_height.py +0 -0
  282. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_groupby.py +0 -0
  283. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_guides.py +0 -0
  284. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_guides_apply_exclusions.py +0 -0
  285. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_guides_apply_exclusions_simple.py +0 -0
  286. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_guides_extract_table.py +0 -0
  287. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_guides_extract_table_collections.py +0 -0
  288. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_guides_extract_table_exclusions.py +0 -0
  289. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_guides_extract_table_real.py +0 -0
  290. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_guides_integration.py +0 -0
  291. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_highlight_detection.py +0 -0
  292. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_highlight_detection_comprehensive.py +0 -0
  293. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_highlight_protocol.py +0 -0
  294. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_highlight_protocol_simple.py +0 -0
  295. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_highlight_regions.py +0 -0
  296. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_comprehensive.py +0 -0
  297. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_debug.py +0 -0
  298. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_final.py +0 -0
  299. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_final_verification.py +0 -0
  300. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_fix.py +0 -0
  301. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_mock.py +0 -0
  302. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_simple.py +0 -0
  303. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_types_pdf.py +0 -0
  304. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_verification.py +0 -0
  305. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_include_boundaries_with_real_text.py +0 -0
  306. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_loading_original.py +0 -0
  307. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_merge_connected.py +0 -0
  308. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_merge_connected_real_world.py +0 -0
  309. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_merge_method.py +0 -0
  310. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_multi_page_table_discovery.py +0 -0
  311. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_optional_deps.py +0 -0
  312. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_page_exclusion_lists.py +0 -0
  313. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_pdf_add_exclusion_elementcollection.py +0 -0
  314. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_region_show_crop_highlights.py +0 -0
  315. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_region_viewer.py +0 -0
  316. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_sections_end_only.py +0 -0
  317. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_sections_with_start_and_end.py +0 -0
  318. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_show_column_layout.py +0 -0
  319. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_show_edge_cases.py +0 -0
  320. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_show_exclusions.py +0 -0
  321. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_show_exclusions_feature.py +0 -0
  322. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_show_limit.py +0 -0
  323. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_skip_repeating_headers_multipage.py +0 -0
  324. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_slice_cache_reuse.py +0 -0
  325. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_slice_exclusion_fix.py +0 -0
  326. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_slice_exclusion_issue.py +0 -0
  327. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_slice_exclusion_mock.py +0 -0
  328. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_sliced_collection_exclusions.py +0 -0
  329. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_strikethrough_detection.py +0 -0
  330. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_table_result_header_mismatch.py +0 -0
  331. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_table_result_keep_blank.py +0 -0
  332. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_tiny_text_tables.py +0 -0
  333. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_tiny_text_tables_table.py +0 -0
  334. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_tutorials.py +0 -0
  335. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_underline_detection.py +0 -0
  336. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tests/test_update_text.py +0 -0
  337. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/todo/bad_pdf_analysis.md +0 -0
  338. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/todo/evaluation.md +0 -0
  339. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/IMPROVEMENTS_SUMMARY.md +0 -0
  340. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/LLM_NaturalPDF_CheatSheet.md +0 -0
  341. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/LLM_NaturalPDF_Workflows.md +0 -0
  342. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/README.md +0 -0
  343. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/__init__.py +0 -0
  344. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/analyser.py +0 -0
  345. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/collate_summaries.py +0 -0
  346. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/compile_attempts_markdown.py +0 -0
  347. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/eval_suite.py +0 -0
  348. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/evaluate_quality.py +0 -0
  349. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/export_enrichment_csv.py +0 -0
  350. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/extraction_decision_tree.md +0 -0
  351. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/llm_enrich.py +0 -0
  352. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/llm_enrich_with_retry.py +0 -0
  353. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/reporter.py +0 -0
  354. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/tools/bad_pdf_eval/utils.py +0 -0
  355. {natural_pdf-0.2.11 → natural_pdf-0.2.13}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.11
3
+ Version: 0.2.13
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -185,7 +185,9 @@ class GuidesList(UserList):
185
185
  self,
186
186
  markers: Union[str, List[str], "ElementCollection", Callable, None],
187
187
  obj: Optional[Union["Page", "Region", "FlowRegion"]] = None,
188
- align: Literal["left", "right", "center", "between"] = "left",
188
+ align: Union[
189
+ Literal["left", "right", "center", "between"], Literal["top", "bottom"]
190
+ ] = "left",
189
191
  outer: bool = True,
190
192
  tolerance: float = 5,
191
193
  *,
@@ -203,7 +205,10 @@ class GuidesList(UserList):
203
205
  - Callable: function that takes a page and returns markers
204
206
  - None: no markers
205
207
  obj: Page/Region/FlowRegion to search (uses parent's context if None)
206
- align: How to align guides relative to found elements
208
+ align: How to align guides relative to found elements:
209
+ - For vertical guides: 'left', 'right', 'center', 'between'
210
+ - For horizontal guides: 'top', 'bottom', 'center', 'between'
211
+ - Note: 'left'/'right' also work for horizontal (mapped to top/bottom)
207
212
  outer: Whether to add outer boundary guides
208
213
  tolerance: Tolerance for snapping to element edges
209
214
  apply_exclusions: Whether to apply exclusion zones when searching for text
@@ -224,19 +229,25 @@ class GuidesList(UserList):
224
229
  self._callable = None
225
230
  actual_markers = markers
226
231
 
232
+ # Normalize alignment for horizontal guides
233
+ if self._axis == "horizontal":
234
+ if align == "top":
235
+ align = "left"
236
+ elif align == "bottom":
237
+ align = "right"
238
+
227
239
  # Check if parent is in flow mode
228
240
  if self._parent.is_flow_region:
229
241
  # Create guides across all constituent regions
230
242
  all_guides = []
231
243
  for region in self._parent.context.constituent_regions:
232
- # Normalize markers for this region
233
- marker_texts = _normalize_markers(actual_markers, region)
244
+ # Pass markers directly - from_content will handle them properly
234
245
 
235
246
  # Create guides for this region
236
247
  region_guides = Guides.from_content(
237
248
  obj=region,
238
249
  axis=self._axis,
239
- markers=marker_texts,
250
+ markers=actual_markers, # Pass original markers, not normalized text
240
251
  align=align,
241
252
  outer=outer,
242
253
  tolerance=tolerance,
@@ -312,14 +323,14 @@ class GuidesList(UserList):
312
323
  return self._parent
313
324
 
314
325
  # Original single-region logic
315
- # Normalize markers to list of text strings
316
- marker_texts = _normalize_markers(actual_markers, target_obj)
326
+ # Pass markers directly to from_content which will handle them properly
327
+ # (no need to normalize here since from_content now handles ElementCollection)
317
328
 
318
329
  # Create guides for this axis
319
330
  new_guides = Guides.from_content(
320
331
  obj=target_obj,
321
332
  axis=self._axis,
322
- markers=marker_texts,
333
+ markers=actual_markers, # Pass original markers, not normalized text
323
334
  align=align,
324
335
  outer=outer,
325
336
  tolerance=tolerance,
@@ -930,6 +941,82 @@ class GuidesList(UserList):
930
941
  self.data.clear()
931
942
  return self._parent
932
943
 
944
+ def from_stripes(
945
+ self,
946
+ stripes=None,
947
+ color=None, # Explicitly specify stripe color
948
+ ) -> "Guides":
949
+ """Create guides from striped table rows or columns.
950
+
951
+ Creates guides at both edges of stripe elements (e.g., colored table rows).
952
+ Perfect for zebra-striped tables where you need guides at every row boundary.
953
+
954
+ Args:
955
+ stripes: Elements representing stripes. If None, auto-detects.
956
+ color: Specific color to look for (e.g., '#00ffff'). If None, finds most common.
957
+
958
+ Examples:
959
+ # Auto-detect zebra stripes
960
+ guides.horizontal.from_stripes()
961
+
962
+ # Specific color
963
+ guides.horizontal.from_stripes(color='#00ffff')
964
+
965
+ # Manual selection
966
+ stripes = page.find_all('rect[fill=#00ffff]')
967
+ guides.horizontal.from_stripes(stripes)
968
+
969
+ # Vertical stripes
970
+ guides.vertical.from_stripes(color='#e0e0e0')
971
+
972
+ Returns:
973
+ Parent Guides object for chaining
974
+ """
975
+ from collections import defaultdict
976
+
977
+ target_obj = self._parent.context
978
+ if target_obj is None:
979
+ raise ValueError("No context available for stripe detection")
980
+
981
+ if stripes is None:
982
+ if color:
983
+ # User specified color
984
+ stripes = target_obj.find_all(f"rect[fill={color}]")
985
+ else:
986
+ # Auto-detect most common non-white fill
987
+ all_rects = target_obj.find_all("rect[fill]")
988
+
989
+ # Group by fill color
990
+ fill_counts = defaultdict(list)
991
+ for rect in all_rects:
992
+ if rect.fill and rect.fill not in ["#ffffff", "white", "none", "transparent"]:
993
+ fill_counts[rect.fill].append(rect)
994
+
995
+ if not fill_counts:
996
+ return self._parent # No stripes found
997
+
998
+ # Find most common fill color
999
+ stripes = max(fill_counts.values(), key=len)
1000
+
1001
+ if not stripes:
1002
+ return self._parent
1003
+
1004
+ # Get both edges of each stripe
1005
+ edges = []
1006
+ if self._axis == "horizontal":
1007
+ for stripe in stripes:
1008
+ edges.extend([stripe.top, stripe.bottom])
1009
+ else:
1010
+ for stripe in stripes:
1011
+ edges.extend([stripe.x0, stripe.x1])
1012
+
1013
+ # Remove duplicates and sort
1014
+ edges = sorted(set(edges))
1015
+
1016
+ # Add guides
1017
+ self.extend(edges)
1018
+ return self._parent
1019
+
933
1020
  def __add__(self, other):
934
1021
  """Handle addition of GuidesList objects by returning combined data."""
935
1022
  if isinstance(other, GuidesList):
@@ -1459,7 +1546,9 @@ class Guides:
1459
1546
  obj: Union["Page", "Region", "FlowRegion"],
1460
1547
  axis: Literal["vertical", "horizontal"] = "vertical",
1461
1548
  markers: Union[str, List[str], "ElementCollection", None] = None,
1462
- align: Literal["left", "right", "center", "between"] = "left",
1549
+ align: Union[
1550
+ Literal["left", "right", "center", "between"], Literal["top", "bottom"]
1551
+ ] = "left",
1463
1552
  outer: bool = True,
1464
1553
  tolerance: float = 5,
1465
1554
  apply_exclusions: bool = True,
@@ -1475,7 +1564,9 @@ class Guides:
1475
1564
  - List[str]: list of selectors or literal text strings
1476
1565
  - ElementCollection: collection of elements to extract text from
1477
1566
  - None: no markers
1478
- align: Where to place guides relative to found text
1567
+ align: Where to place guides relative to found text:
1568
+ - For vertical guides: 'left', 'right', 'center', 'between'
1569
+ - For horizontal guides: 'top', 'bottom', 'center', 'between'
1479
1570
  outer: Whether to add guides at the boundaries
1480
1571
  tolerance: Maximum distance to search for text
1481
1572
  apply_exclusions: Whether to apply exclusion zones when searching for text
@@ -1483,6 +1574,13 @@ class Guides:
1483
1574
  Returns:
1484
1575
  New Guides object aligned to text content
1485
1576
  """
1577
+ # Normalize alignment for horizontal guides
1578
+ if axis == "horizontal":
1579
+ if align == "top":
1580
+ align = "left"
1581
+ elif align == "bottom":
1582
+ align = "right"
1583
+
1486
1584
  # Handle FlowRegion
1487
1585
  if hasattr(obj, "constituent_regions"):
1488
1586
  guides = cls(context=obj)
@@ -1530,39 +1628,51 @@ class Guides:
1530
1628
  elif hasattr(obj, "width"):
1531
1629
  bounds = (0, 0, obj.width, obj.height)
1532
1630
 
1533
- # Normalize markers to list of text strings
1534
- marker_texts = _normalize_markers(markers, obj)
1631
+ # Handle different marker types
1632
+ elements_to_process = []
1535
1633
 
1536
- # Find each marker and determine guide position
1537
- for marker in marker_texts:
1538
- if hasattr(obj, "find"):
1539
- element = obj.find(f'text:contains("{marker}")', apply_exclusions=apply_exclusions)
1540
- if element:
1541
- if axis == "vertical":
1542
- if align == "left":
1543
- guides_coords.append(element.x0)
1544
- elif align == "right":
1545
- guides_coords.append(element.x1)
1546
- elif align == "center":
1547
- guides_coords.append((element.x0 + element.x1) / 2)
1548
- elif align == "between":
1549
- # For between, collect left edges for processing later
1550
- guides_coords.append(element.x0)
1551
- else: # horizontal
1552
- if align == "left": # top for horizontal
1553
- guides_coords.append(element.top)
1554
- elif align == "right": # bottom for horizontal
1555
- guides_coords.append(element.bottom)
1556
- elif align == "center":
1557
- guides_coords.append((element.top + element.bottom) / 2)
1558
- elif align == "between":
1559
- # For between, collect top edges for processing later
1560
- guides_coords.append(element.top)
1634
+ # Check if markers is an ElementCollection or has elements attribute
1635
+ if hasattr(markers, "elements") or hasattr(markers, "_elements"):
1636
+ # It's an ElementCollection - use elements directly
1637
+ elements_to_process = getattr(markers, "elements", getattr(markers, "_elements", []))
1638
+ elif hasattr(markers, "__iter__") and not isinstance(markers, str):
1639
+ # Check if it's an iterable of elements (not strings)
1640
+ try:
1641
+ markers_list = list(markers)
1642
+ if markers_list and hasattr(markers_list[0], "x0"):
1643
+ # It's a list of elements
1644
+ elements_to_process = markers_list
1645
+ except:
1646
+ pass
1561
1647
 
1562
- # Handle 'between' alignment - find midpoints between adjacent markers
1563
- if align == "between" and len(guides_coords) >= 2:
1564
- # We need to get the right and left edges of each marker
1565
- marker_bounds = []
1648
+ if elements_to_process:
1649
+ # Process elements directly without text search
1650
+ for element in elements_to_process:
1651
+ if axis == "vertical":
1652
+ if align == "left":
1653
+ guides_coords.append(element.x0)
1654
+ elif align == "right":
1655
+ guides_coords.append(element.x1)
1656
+ elif align == "center":
1657
+ guides_coords.append((element.x0 + element.x1) / 2)
1658
+ elif align == "between":
1659
+ # For between, collect left edges for processing later
1660
+ guides_coords.append(element.x0)
1661
+ else: # horizontal
1662
+ if align == "left": # top for horizontal
1663
+ guides_coords.append(element.top)
1664
+ elif align == "right": # bottom for horizontal
1665
+ guides_coords.append(element.bottom)
1666
+ elif align == "center":
1667
+ guides_coords.append((element.top + element.bottom) / 2)
1668
+ elif align == "between":
1669
+ # For between, collect top edges for processing later
1670
+ guides_coords.append(element.top)
1671
+ else:
1672
+ # Fall back to text-based search
1673
+ marker_texts = _normalize_markers(markers, obj)
1674
+
1675
+ # Find each marker and determine guide position
1566
1676
  for marker in marker_texts:
1567
1677
  if hasattr(obj, "find"):
1568
1678
  element = obj.find(
@@ -1570,9 +1680,52 @@ class Guides:
1570
1680
  )
1571
1681
  if element:
1572
1682
  if axis == "vertical":
1573
- marker_bounds.append((element.x0, element.x1))
1683
+ if align == "left":
1684
+ guides_coords.append(element.x0)
1685
+ elif align == "right":
1686
+ guides_coords.append(element.x1)
1687
+ elif align == "center":
1688
+ guides_coords.append((element.x0 + element.x1) / 2)
1689
+ elif align == "between":
1690
+ # For between, collect left edges for processing later
1691
+ guides_coords.append(element.x0)
1574
1692
  else: # horizontal
1575
- marker_bounds.append((element.top, element.bottom))
1693
+ if align == "left": # top for horizontal
1694
+ guides_coords.append(element.top)
1695
+ elif align == "right": # bottom for horizontal
1696
+ guides_coords.append(element.bottom)
1697
+ elif align == "center":
1698
+ guides_coords.append((element.top + element.bottom) / 2)
1699
+ elif align == "between":
1700
+ # For between, collect top edges for processing later
1701
+ guides_coords.append(element.top)
1702
+
1703
+ # Handle 'between' alignment - find midpoints between adjacent markers
1704
+ if align == "between" and len(guides_coords) >= 2:
1705
+ # We need to get the right and left edges of each marker
1706
+ marker_bounds = []
1707
+
1708
+ if elements_to_process:
1709
+ # Use elements directly
1710
+ for element in elements_to_process:
1711
+ if axis == "vertical":
1712
+ marker_bounds.append((element.x0, element.x1))
1713
+ else: # horizontal
1714
+ marker_bounds.append((element.top, element.bottom))
1715
+ else:
1716
+ # Fall back to text search
1717
+ if "marker_texts" not in locals():
1718
+ marker_texts = _normalize_markers(markers, obj)
1719
+ for marker in marker_texts:
1720
+ if hasattr(obj, "find"):
1721
+ element = obj.find(
1722
+ f'text:contains("{marker}")', apply_exclusions=apply_exclusions
1723
+ )
1724
+ if element:
1725
+ if axis == "vertical":
1726
+ marker_bounds.append((element.x0, element.x1))
1727
+ else: # horizontal
1728
+ marker_bounds.append((element.top, element.bottom))
1576
1729
 
1577
1730
  # Sort markers by their left edge (or top edge for horizontal)
1578
1731
  marker_bounds.sort(key=lambda x: x[0])
@@ -92,6 +92,16 @@ class HighlightRenderer:
92
92
 
93
93
  def _draw_highlights(self):
94
94
  """Draws all highlight shapes, borders, vertices, and attributes."""
95
+ # Get the pdfplumber page offset for coordinate translation
96
+ page_offset_x = 0
97
+ page_offset_y = 0
98
+
99
+ if hasattr(self.page, "_page") and hasattr(self.page._page, "bbox"):
100
+ # PDFPlumber page bbox might have negative offsets
101
+ page_offset_x = -self.page._page.bbox[0]
102
+ page_offset_y = -self.page._page.bbox[1]
103
+ logger.debug(f"Applying highlight offset: x={page_offset_x}, y={page_offset_y}")
104
+
95
105
  for highlight in self.highlights:
96
106
  # Create a transparent overlay for this single highlight
97
107
  overlay = Image.new("RGBA", self.base_image.size, (0, 0, 0, 0))
@@ -101,7 +111,11 @@ class HighlightRenderer:
101
111
 
102
112
  if highlight.is_polygon:
103
113
  scaled_polygon = [
104
- (p[0] * self.scale_factor, p[1] * self.scale_factor) for p in highlight.polygon
114
+ (
115
+ (p[0] + page_offset_x) * self.scale_factor,
116
+ (p[1] + page_offset_y) * self.scale_factor,
117
+ )
118
+ for p in highlight.polygon
105
119
  ]
106
120
  # Draw polygon fill and border
107
121
  draw.polygon(
@@ -117,11 +131,16 @@ class HighlightRenderer:
117
131
  else: # Rectangle
118
132
  x0, top, x1, bottom = highlight.bbox
119
133
  x0_s, top_s, x1_s, bottom_s = (
120
- x0 * self.scale_factor,
121
- top * self.scale_factor,
122
- x1 * self.scale_factor,
123
- bottom * self.scale_factor,
134
+ (x0 + page_offset_x) * self.scale_factor,
135
+ (top + page_offset_y) * self.scale_factor,
136
+ (x1 + page_offset_x) * self.scale_factor,
137
+ (bottom + page_offset_y) * self.scale_factor,
124
138
  )
139
+ logger.debug(f"Original bbox: ({x0}, {top}, {x1}, {bottom})")
140
+ logger.debug(
141
+ f"Offset bbox: ({x0 + page_offset_x}, {top + page_offset_y}, {x1 + page_offset_x}, {bottom + page_offset_y})"
142
+ )
143
+ logger.debug(f"Scaled bbox: ({x0_s}, {top_s}, {x1_s}, {bottom_s})")
125
144
  scaled_bbox = [x0_s, top_s, x1_s, bottom_s]
126
145
  # Draw rectangle fill and border
127
146
  draw.rectangle(
@@ -1482,11 +1501,22 @@ class HighlightingService:
1482
1501
  offset_x = crop_offset[0] * scale_factor
1483
1502
  offset_y = crop_offset[1] * scale_factor
1484
1503
 
1504
+ # Add pdfplumber page offset for coordinate translation
1505
+ page_offset_x = 0
1506
+ page_offset_y = 0
1507
+ if hasattr(page, "_page") and hasattr(page._page, "bbox"):
1508
+ # PDFPlumber page bbox might have negative offsets
1509
+ page_offset_x = -page._page.bbox[0]
1510
+ page_offset_y = -page._page.bbox[1]
1511
+
1485
1512
  # Draw the highlight
1486
1513
  if polygon:
1487
1514
  # Scale polygon points and apply offset
1488
1515
  scaled_polygon = [
1489
- (p[0] * scale_factor - offset_x, p[1] * scale_factor - offset_y)
1516
+ (
1517
+ (p[0] + page_offset_x) * scale_factor - offset_x,
1518
+ (p[1] + page_offset_y) * scale_factor - offset_y,
1519
+ )
1490
1520
  for p in polygon
1491
1521
  ]
1492
1522
  draw.polygon(
@@ -1496,10 +1526,10 @@ class HighlightingService:
1496
1526
  # Scale bbox and apply offset
1497
1527
  x0, y0, x1, y1 = bbox
1498
1528
  scaled_bbox = [
1499
- x0 * scale_factor - offset_x,
1500
- y0 * scale_factor - offset_y,
1501
- x1 * scale_factor - offset_x,
1502
- y1 * scale_factor - offset_y,
1529
+ (x0 + page_offset_x) * scale_factor - offset_x,
1530
+ (y0 + page_offset_y) * scale_factor - offset_y,
1531
+ (x1 + page_offset_x) * scale_factor - offset_x,
1532
+ (y1 + page_offset_y) * scale_factor - offset_y,
1503
1533
  ]
1504
1534
  draw.rectangle(
1505
1535
  scaled_bbox, fill=color, outline=(color[0], color[1], color[2], BORDER_ALPHA)
@@ -815,11 +815,38 @@ class Page(
815
815
  if debug:
816
816
  print(f" ✗ Empty iterable returned from callable '{label}'")
817
817
  elif region_result:
818
- logger.warning(
819
- f"Callable exclusion '{exclusion_label}' returned non-Region object: {type(region_result)}. Skipping."
820
- )
821
- if debug:
822
- print(f" Callable returned non-Region/None: {type(region_result)}")
818
+ # Check if it's a single Element that can be converted to a Region
819
+ from natural_pdf.elements.base import Element
820
+
821
+ if isinstance(region_result, Element) or (
822
+ hasattr(region_result, "bbox") and hasattr(region_result, "expand")
823
+ ):
824
+ try:
825
+ # Convert Element to Region using expand()
826
+ expanded_region = region_result.expand()
827
+ if isinstance(expanded_region, Region):
828
+ expanded_region.label = label
829
+ regions.append(expanded_region)
830
+ if debug:
831
+ print(
832
+ f" ✓ Converted Element to Region from callable '{label}': {expanded_region}"
833
+ )
834
+ else:
835
+ if debug:
836
+ print(
837
+ f" ✗ Element.expand() did not return a Region: {type(expanded_region)}"
838
+ )
839
+ except Exception as e:
840
+ if debug:
841
+ print(f" ✗ Failed to convert Element to Region: {e}")
842
+ else:
843
+ logger.warning(
844
+ f"Callable exclusion '{exclusion_label}' returned non-Region object: {type(region_result)}. Skipping."
845
+ )
846
+ if debug:
847
+ print(
848
+ f" ✗ Callable returned non-Region/None: {type(region_result)}"
849
+ )
823
850
  else:
824
851
  if debug:
825
852
  print(
@@ -839,6 +866,27 @@ class Page(
839
866
  if debug:
840
867
  print(f" - Added direct region '{label}': {exclusion_item}")
841
868
 
869
+ # Process direct Element objects - convert to Region
870
+ elif hasattr(exclusion_item, "bbox") and hasattr(exclusion_item, "expand"):
871
+ try:
872
+ # Convert Element to Region using expand()
873
+ expanded_region = exclusion_item.expand()
874
+ if isinstance(expanded_region, Region):
875
+ expanded_region.label = label
876
+ regions.append(expanded_region)
877
+ if debug:
878
+ print(
879
+ f" - Converted direct Element to Region '{label}': {expanded_region}"
880
+ )
881
+ else:
882
+ if debug:
883
+ print(
884
+ f" - Element.expand() did not return a Region: {type(expanded_region)}"
885
+ )
886
+ except Exception as e:
887
+ if debug:
888
+ print(f" - Failed to convert Element to Region: {e}")
889
+
842
890
  # Process string selectors (from PDF-level exclusions)
843
891
  elif isinstance(exclusion_item, str):
844
892
  selector_str = exclusion_item
@@ -1081,7 +1129,7 @@ class Page(
1081
1129
  ) # _apply_selector doesn't filter
1082
1130
 
1083
1131
  # Filter the results based on exclusions if requested
1084
- if apply_exclusions and self._exclusions and results_collection:
1132
+ if apply_exclusions and results_collection:
1085
1133
  filtered_elements = self._filter_elements_by_exclusions(results_collection.elements)
1086
1134
  # Return the first element from the filtered list
1087
1135
  return filtered_elements[0] if filtered_elements else None
@@ -1176,7 +1224,7 @@ class Page(
1176
1224
  ) # _apply_selector doesn't filter
1177
1225
 
1178
1226
  # Filter the results based on exclusions if requested
1179
- if apply_exclusions and self._exclusions and results_collection:
1227
+ if apply_exclusions and results_collection:
1180
1228
  filtered_elements = self._filter_elements_by_exclusions(results_collection.elements)
1181
1229
  return ElementCollection(filtered_elements)
1182
1230
  else:
@@ -1548,7 +1596,7 @@ class Page(
1548
1596
  all_elements = self._element_mgr.get_all_elements()
1549
1597
 
1550
1598
  # Apply exclusions if requested
1551
- if apply_exclusions and self._exclusions:
1599
+ if apply_exclusions:
1552
1600
  return self._filter_elements_by_exclusions(
1553
1601
  all_elements, debug_exclusions=debug_exclusions
1554
1602
  )
@@ -106,6 +106,7 @@ class DirectionalMixin:
106
106
  include_source: bool = False,
107
107
  until: Optional[str] = None,
108
108
  include_endpoint: bool = True,
109
+ offset: float = 0.1,
109
110
  **kwargs,
110
111
  ) -> "Region":
111
112
  """
@@ -118,6 +119,7 @@ class DirectionalMixin:
118
119
  include_source: Whether to include this element/region's area in the result
119
120
  until: Optional selector string to specify a boundary element
120
121
  include_endpoint: Whether to include the boundary element found by 'until'
122
+ offset: Pixel offset when excluding source/endpoint (default: 0.1)
121
123
  **kwargs: Additional parameters for the 'until' selector search
122
124
 
123
125
  Returns:
@@ -127,7 +129,7 @@ class DirectionalMixin:
127
129
 
128
130
  is_horizontal = direction in ("left", "right")
129
131
  is_positive = direction in ("right", "below") # right/below are positive directions
130
- pixel_offset = 1 # Offset for excluding elements/endpoints
132
+ pixel_offset = offset # Use provided offset for excluding elements/endpoints
131
133
 
132
134
  # 1. Determine initial boundaries based on direction and include_source
133
135
  if is_horizontal:
@@ -260,6 +262,7 @@ class DirectionalMixin:
260
262
  include_source: bool = False,
261
263
  until: Optional[str] = None,
262
264
  include_endpoint: bool = True,
265
+ offset: float = 0.1,
263
266
  **kwargs,
264
267
  ) -> "Region":
265
268
  """
@@ -271,6 +274,7 @@ class DirectionalMixin:
271
274
  include_source: Whether to include this element/region in the result (default: False)
272
275
  until: Optional selector string to specify an upper boundary element
273
276
  include_endpoint: Whether to include the boundary element in the region (default: True)
277
+ offset: Pixel offset when excluding source/endpoint (default: 0.1)
274
278
  **kwargs: Additional parameters
275
279
 
276
280
  Returns:
@@ -295,6 +299,7 @@ class DirectionalMixin:
295
299
  include_source=include_source,
296
300
  until=until,
297
301
  include_endpoint=include_endpoint,
302
+ offset=offset,
298
303
  **kwargs,
299
304
  )
300
305
 
@@ -305,6 +310,7 @@ class DirectionalMixin:
305
310
  include_source: bool = False,
306
311
  until: Optional[str] = None,
307
312
  include_endpoint: bool = True,
313
+ offset: float = 0.1,
308
314
  **kwargs,
309
315
  ) -> "Region":
310
316
  """
@@ -316,6 +322,7 @@ class DirectionalMixin:
316
322
  include_source: Whether to include this element/region in the result (default: False)
317
323
  until: Optional selector string to specify a lower boundary element
318
324
  include_endpoint: Whether to include the boundary element in the region (default: True)
325
+ offset: Pixel offset when excluding source/endpoint (default: 0.1)
319
326
  **kwargs: Additional parameters
320
327
 
321
328
  Returns:
@@ -340,6 +347,7 @@ class DirectionalMixin:
340
347
  include_source=include_source,
341
348
  until=until,
342
349
  include_endpoint=include_endpoint,
350
+ offset=offset,
343
351
  **kwargs,
344
352
  )
345
353
 
@@ -350,6 +358,7 @@ class DirectionalMixin:
350
358
  include_source: bool = False,
351
359
  until: Optional[str] = None,
352
360
  include_endpoint: bool = True,
361
+ offset: float = 0.1,
353
362
  **kwargs,
354
363
  ) -> "Region":
355
364
  """
@@ -361,6 +370,7 @@ class DirectionalMixin:
361
370
  include_source: Whether to include this element/region in the result (default: False)
362
371
  until: Optional selector string to specify a left boundary element
363
372
  include_endpoint: Whether to include the boundary element in the region (default: True)
373
+ offset: Pixel offset when excluding source/endpoint (default: 0.1)
364
374
  **kwargs: Additional parameters
365
375
 
366
376
  Returns:
@@ -385,6 +395,7 @@ class DirectionalMixin:
385
395
  include_source=include_source,
386
396
  until=until,
387
397
  include_endpoint=include_endpoint,
398
+ offset=offset,
388
399
  **kwargs,
389
400
  )
390
401
 
@@ -395,6 +406,7 @@ class DirectionalMixin:
395
406
  include_source: bool = False,
396
407
  until: Optional[str] = None,
397
408
  include_endpoint: bool = True,
409
+ offset: float = 0.1,
398
410
  **kwargs,
399
411
  ) -> "Region":
400
412
  """
@@ -406,6 +418,7 @@ class DirectionalMixin:
406
418
  include_source: Whether to include this element/region in the result (default: False)
407
419
  until: Optional selector string to specify a right boundary element
408
420
  include_endpoint: Whether to include the boundary element in the region (default: True)
421
+ offset: Pixel offset when excluding source/endpoint (default: 0.1)
409
422
  **kwargs: Additional parameters
410
423
 
411
424
  Returns:
@@ -430,6 +443,7 @@ class DirectionalMixin:
430
443
  include_source=include_source,
431
444
  until=until,
432
445
  include_endpoint=include_endpoint,
446
+ offset=offset,
433
447
  **kwargs,
434
448
  )
435
449