natural-pdf 0.2.9__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (320) hide show
  1. {natural_pdf-0.2.9/natural_pdf.egg-info → natural_pdf-0.2.10}/PKG-INFO +1 -1
  2. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/guides.py +499 -3
  3. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/cli.py +1 -1
  4. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/elements/region.py +61 -0
  5. {natural_pdf-0.2.9 → natural_pdf-0.2.10/natural_pdf.egg-info}/PKG-INFO +1 -1
  6. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_guides.py +71 -0
  7. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.cursor/rules/analysis_framework.mdc +0 -0
  8. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.cursor/rules/coding-style.mdc +0 -0
  9. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
  10. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.cursor/rules/minimal-comments.mdc +0 -0
  11. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.cursor/rules/natural-pdf-overview.mdc +0 -0
  12. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.cursor/rules/user-friendly-library-code.mdc +0 -0
  13. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.github/workflows/ci.yml +0 -0
  14. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.github/workflows/docs.yml +0 -0
  15. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.github/workflows/nightly-tutorials.yml +0 -0
  16. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.gitignore +0 -0
  17. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/.pre-commit-config.yaml +0 -0
  18. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/01-execute_notebooks.py +0 -0
  19. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/02-run_all_tutorials.sh +0 -0
  20. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/CLAUDE.md +0 -0
  21. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/LICENSE +0 -0
  22. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/MANIFEST.in +0 -0
  23. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/README.md +0 -0
  24. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/audit_packaging.py +0 -0
  25. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/check_run_md.sh +0 -0
  26. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/api/index.md +0 -0
  27. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/assets/favicon.png +0 -0
  28. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/assets/favicon.svg +0 -0
  29. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/assets/javascripts/custom.js +0 -0
  30. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/assets/logo.svg +0 -0
  31. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/assets/sample-screen.png +0 -0
  32. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/assets/social-preview.png +0 -0
  33. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/assets/social-preview.svg +0 -0
  34. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/assets/stylesheets/custom.css +0 -0
  35. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/categorizing-documents/index.md +0 -0
  36. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/data-extraction/index.md +0 -0
  37. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/describe/index.md +0 -0
  38. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/document-qa/index.md +0 -0
  39. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/element-selection/index.md +0 -0
  40. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/extracting-clean-text/index.md +0 -0
  41. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/finetuning/index.md +0 -0
  42. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/fix-messy-tables/index.md +0 -0
  43. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/fix-messy-tables/table_1.csv +0 -0
  44. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/fix-messy-tables/table_2.csv +0 -0
  45. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/fix-messy-tables/table_3.csv +0 -0
  46. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/index.md +0 -0
  47. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/installation/index.md +0 -0
  48. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/interactive-widget/index.md +0 -0
  49. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/layout-analysis/index.md +0 -0
  50. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/loops-and-groups/index.md +0 -0
  51. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/ocr/index.md +0 -0
  52. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/pdf-navigation/index.md +0 -0
  53. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
  54. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/process-forms-and-invoices/index.md +0 -0
  55. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/quick-reference/index.md +0 -0
  56. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/reflowing-pages/index.md +0 -0
  57. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/regions/index.md +0 -0
  58. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tables/index.md +0 -0
  59. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/text-analysis/index.md +0 -0
  60. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/01-loading-and-extraction.md +0 -0
  61. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/02-finding-elements.md +0 -0
  62. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/03-extracting-blocks.md +0 -0
  63. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/04-table-extraction.md +0 -0
  64. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/05-excluding-content.md +0 -0
  65. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/06-document-qa.md +0 -0
  66. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/07-layout-analysis.md +0 -0
  67. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/07-working-with-regions.md +0 -0
  68. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/08-spatial-navigation.md +0 -0
  69. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/09-section-extraction.md +0 -0
  70. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/10-form-field-extraction.md +0 -0
  71. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  72. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/12-ocr-integration.md +0 -0
  73. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/13-semantic-search.md +0 -0
  74. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/tutorials/14-categorizing-documents.md +0 -0
  75. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/visual-debugging/index.md +0 -0
  76. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/docs/visual-debugging/region.png +0 -0
  77. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/mkdocs.yml +0 -0
  78. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/__init__.py +0 -0
  79. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/__init__.py +0 -0
  80. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/__init__.py +0 -0
  81. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/base.py +0 -0
  82. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/docling.py +0 -0
  83. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/gemini.py +0 -0
  84. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  85. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  86. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  87. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/paddle.py +0 -0
  88. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
  89. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/surya.py +0 -0
  90. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
  91. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/tatr.py +0 -0
  92. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/layout/yolo.py +0 -0
  93. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/shape_detection_mixin.py +0 -0
  94. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/text_options.py +0 -0
  95. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/text_structure.py +0 -0
  96. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/analyzers/utils.py +0 -0
  97. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/classification/manager.py +0 -0
  98. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/classification/mixin.py +0 -0
  99. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/classification/results.py +0 -0
  100. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/collections/mixins.py +0 -0
  101. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/__init__.py +0 -0
  102. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/element_manager.py +0 -0
  103. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/highlighting_service.py +0 -0
  104. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/page.py +0 -0
  105. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/page_collection.py +0 -0
  106. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/page_groupby.py +0 -0
  107. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/pdf.py +0 -0
  108. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/pdf_collection.py +0 -0
  109. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/core/render_spec.py +0 -0
  110. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/describe/__init__.py +0 -0
  111. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/describe/base.py +0 -0
  112. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/describe/elements.py +0 -0
  113. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/describe/mixin.py +0 -0
  114. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/describe/summary.py +0 -0
  115. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/elements/__init__.py +0 -0
  116. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/elements/base.py +0 -0
  117. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/elements/element_collection.py +0 -0
  118. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/elements/image.py +0 -0
  119. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/elements/line.py +0 -0
  120. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/elements/rect.py +0 -0
  121. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/elements/text.py +0 -0
  122. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/export/mixin.py +0 -0
  123. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/__init__.py +0 -0
  124. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/base.py +0 -0
  125. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/data/__init__.py +0 -0
  126. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/data/pdf.ttf +0 -0
  127. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/data/sRGB.icc +0 -0
  128. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/hocr.py +0 -0
  129. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/hocr_font.py +0 -0
  130. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/original_pdf.py +0 -0
  131. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/paddleocr.py +0 -0
  132. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/exporters/searchable_pdf.py +0 -0
  133. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/extraction/manager.py +0 -0
  134. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/extraction/mixin.py +0 -0
  135. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/extraction/result.py +0 -0
  136. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/flows/__init__.py +0 -0
  137. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/flows/collections.py +0 -0
  138. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/flows/element.py +0 -0
  139. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/flows/flow.py +0 -0
  140. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/flows/region.py +0 -0
  141. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/__init__.py +0 -0
  142. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/engine.py +0 -0
  143. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/engine_doctr.py +0 -0
  144. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/engine_easyocr.py +0 -0
  145. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/engine_paddle.py +0 -0
  146. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/engine_surya.py +0 -0
  147. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/ocr_factory.py +0 -0
  148. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/ocr_manager.py +0 -0
  149. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/ocr_options.py +0 -0
  150. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/ocr/utils.py +0 -0
  151. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/qa/__init__.py +0 -0
  152. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/qa/document_qa.py +0 -0
  153. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/qa/qa_result.py +0 -0
  154. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/search/__init__.py +0 -0
  155. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/search/lancedb_search_service.py +0 -0
  156. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/search/numpy_search_service.py +0 -0
  157. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/search/search_options.py +0 -0
  158. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/search/search_service_protocol.py +0 -0
  159. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/search/searchable_mixin.py +0 -0
  160. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/selectors/__init__.py +0 -0
  161. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/selectors/parser.py +0 -0
  162. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/tables/__init__.py +0 -0
  163. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/tables/result.py +0 -0
  164. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/templates/__init__.py +0 -0
  165. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
  166. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/templates/spa/css/style.css +0 -0
  167. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/templates/spa/index.html +0 -0
  168. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/templates/spa/js/app.js +0 -0
  169. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/templates/spa/words.txt +0 -0
  170. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/text_mixin.py +0 -0
  171. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/__init__.py +0 -0
  172. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/bidi_mirror.py +0 -0
  173. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/color_utils.py +0 -0
  174. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/debug.py +0 -0
  175. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/highlighting.py +0 -0
  176. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/identifiers.py +0 -0
  177. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/layout.py +0 -0
  178. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/locks.py +0 -0
  179. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/packaging.py +0 -0
  180. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/reading_order.py +0 -0
  181. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/text_extraction.py +0 -0
  182. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/utils/visualization.py +0 -0
  183. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/vision/__init__.py +0 -0
  184. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/vision/mixin.py +0 -0
  185. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/vision/results.py +0 -0
  186. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/vision/similarity.py +0 -0
  187. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/widgets/__init__.py +0 -0
  188. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf/widgets/viewer.py +0 -0
  189. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf.egg-info/SOURCES.txt +0 -0
  190. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf.egg-info/dependency_links.txt +0 -0
  191. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf.egg-info/entry_points.txt +0 -0
  192. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf.egg-info/requires.txt +0 -0
  193. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/natural_pdf.egg-info/top_level.txt +0 -0
  194. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/noxfile.py +0 -0
  195. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/memory_comparison.py +0 -0
  196. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/pdf_analyzer.py +0 -0
  197. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/performance_analysis.py +0 -0
  198. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/performance_results/image_heavy_snapshots.csv +0 -0
  199. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/performance_results/image_heavy_snapshots.json +0 -0
  200. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/performance_results/text_heavy_snapshots.csv +0 -0
  201. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/performance_results/text_heavy_snapshots.json +0 -0
  202. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/test_cleanup_methods.py +0 -0
  203. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/optimization/test_memory_fix.py +0 -0
  204. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/publish.sh +0 -0
  205. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/pyproject.toml +0 -0
  206. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/sample-screen.png +0 -0
  207. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/setup.cfg +0 -0
  208. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/conftest.py +0 -0
  209. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/exporters/test_paddleocr_exporter.py +0 -0
  210. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_annotate.py +0 -0
  211. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_arabic_performance.py +0 -0
  212. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_arabic_real_world.py +0 -0
  213. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_color_conversion.py +0 -0
  214. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_color_hex_display.py +0 -0
  215. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_core/test_containment_geometry.py +0 -0
  216. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_core/test_elements.py +0 -0
  217. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_core/test_loading.py +0 -0
  218. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_core/test_spatial.py +0 -0
  219. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_core/test_text_extraction.py +0 -0
  220. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_core/test_text_layer.py +0 -0
  221. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_crop_enhancements.py +0 -0
  222. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_crop_region_highlights.py +0 -0
  223. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_directional_defaults.py +0 -0
  224. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_dissolve.py +0 -0
  225. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_dissolve_cross_page_bug.py +0 -0
  226. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_dissolve_debug_issue.py +0 -0
  227. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_dissolve_real_world_issue.py +0 -0
  228. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_dissolve_single_elements.py +0 -0
  229. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_dissolve_vertical_offset_issue.py +0 -0
  230. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_document_qa.py +0 -0
  231. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_element_addition.py +0 -0
  232. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_element_collection_show_cols.py +0 -0
  233. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_element_collection_slicing.py +0 -0
  234. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_element_show_crop_highlights.py +0 -0
  235. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_empty_pseudo_class.py +0 -0
  236. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_exclusions.py +0 -0
  237. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_expand.py +0 -0
  238. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_extraction_error.py +0 -0
  239. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_extraction_mixin_fix.py +0 -0
  240. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_extraction_text_and_vision.py +0 -0
  241. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_extraction_working.py +0 -0
  242. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_find_similar.py +0 -0
  243. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_first_last_selectors.py +0 -0
  244. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_fix_get_sections_zero_height.py +0 -0
  245. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_flow_region_directional.py +0 -0
  246. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_get_sections_fix_comprehensive.py +0 -0
  247. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_get_sections_zero_height.py +0 -0
  248. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_groupby.py +0 -0
  249. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_guides_apply_exclusions.py +0 -0
  250. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_guides_apply_exclusions_simple.py +0 -0
  251. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_guides_extract_table.py +0 -0
  252. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_guides_extract_table_collections.py +0 -0
  253. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_guides_extract_table_exclusions.py +0 -0
  254. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_guides_extract_table_real.py +0 -0
  255. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_guides_integration.py +0 -0
  256. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_highlight_detection.py +0 -0
  257. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_highlight_detection_comprehensive.py +0 -0
  258. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_highlight_protocol.py +0 -0
  259. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_highlight_protocol_simple.py +0 -0
  260. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_highlight_regions.py +0 -0
  261. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_comprehensive.py +0 -0
  262. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_debug.py +0 -0
  263. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_final.py +0 -0
  264. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_final_verification.py +0 -0
  265. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_fix.py +0 -0
  266. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_mock.py +0 -0
  267. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_simple.py +0 -0
  268. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_types_pdf.py +0 -0
  269. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_verification.py +0 -0
  270. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_include_boundaries_with_real_text.py +0 -0
  271. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_loading_original.py +0 -0
  272. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_merge_connected.py +0 -0
  273. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_merge_connected_real_world.py +0 -0
  274. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_merge_method.py +0 -0
  275. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_multi_page_table_discovery.py +0 -0
  276. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_optional_deps.py +0 -0
  277. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_page_exclusion_lists.py +0 -0
  278. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_pdf_add_exclusion_elementcollection.py +0 -0
  279. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_region_show_crop_highlights.py +0 -0
  280. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_region_viewer.py +0 -0
  281. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_sections_end_only.py +0 -0
  282. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_sections_with_start_and_end.py +0 -0
  283. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_show_column_layout.py +0 -0
  284. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_show_edge_cases.py +0 -0
  285. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_show_exclusions.py +0 -0
  286. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_show_exclusions_feature.py +0 -0
  287. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_show_limit.py +0 -0
  288. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_skip_repeating_headers_multipage.py +0 -0
  289. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_slice_cache_reuse.py +0 -0
  290. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_slice_exclusion_fix.py +0 -0
  291. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_slice_exclusion_issue.py +0 -0
  292. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_slice_exclusion_mock.py +0 -0
  293. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_sliced_collection_exclusions.py +0 -0
  294. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_strikethrough_detection.py +0 -0
  295. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_table_result_header_mismatch.py +0 -0
  296. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_table_result_keep_blank.py +0 -0
  297. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_tiny_text_tables.py +0 -0
  298. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_tiny_text_tables_table.py +0 -0
  299. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_tutorials.py +0 -0
  300. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_underline_detection.py +0 -0
  301. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tests/test_update_text.py +0 -0
  302. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/todo/bad_pdf_analysis.md +0 -0
  303. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/todo/evaluation.md +0 -0
  304. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/IMPROVEMENTS_SUMMARY.md +0 -0
  305. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/LLM_NaturalPDF_CheatSheet.md +0 -0
  306. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/LLM_NaturalPDF_Workflows.md +0 -0
  307. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/README.md +0 -0
  308. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/__init__.py +0 -0
  309. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/analyser.py +0 -0
  310. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/collate_summaries.py +0 -0
  311. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/compile_attempts_markdown.py +0 -0
  312. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/eval_suite.py +0 -0
  313. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/evaluate_quality.py +0 -0
  314. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/export_enrichment_csv.py +0 -0
  315. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/extraction_decision_tree.md +0 -0
  316. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/llm_enrich.py +0 -0
  317. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/llm_enrich_with_retry.py +0 -0
  318. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/reporter.py +0 -0
  319. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/tools/bad_pdf_eval/utils.py +0 -0
  320. {natural_pdf-0.2.9 → natural_pdf-0.2.10}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.9
3
+ Version: 0.2.10
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -128,19 +128,59 @@ class GuidesList(UserList):
128
128
  """A list of guide coordinates that also provides methods for creating guides."""
129
129
 
130
130
  def __init__(self, parent_guides: "Guides", axis: Literal["vertical", "horizontal"], data=None):
131
- super().__init__(data or [])
131
+ # Always sort the initial data
132
+ super().__init__(sorted(data) if data else [])
132
133
  self._parent = parent_guides
133
134
  self._axis = axis
134
135
 
135
136
  def __getitem__(self, i):
136
- """Override to handle slicing properly."""
137
+ """Override to handle slicing and negative indexing properly."""
137
138
  if isinstance(i, slice):
138
139
  # Return a new GuidesList with the sliced data
139
140
  return self.__class__(self._parent, self._axis, self.data[i])
140
141
  else:
141
- # For single index, return the value directly
142
+ # For single index, handle negative indices properly
143
+ if i < 0:
144
+ # Convert negative index to positive
145
+ i = len(self.data) + i
142
146
  return self.data[i]
143
147
 
148
+ def __setitem__(self, i, item):
149
+ """Override to maintain sorted order."""
150
+ self.data[i] = item
151
+ self.data.sort()
152
+
153
+ def append(self, item):
154
+ """Override to maintain sorted order."""
155
+ self.data.append(item)
156
+ self.data.sort()
157
+
158
+ def extend(self, other):
159
+ """Override to maintain sorted order."""
160
+ self.data.extend(other)
161
+ self.data.sort()
162
+
163
+ def insert(self, i, item):
164
+ """Override to maintain sorted order."""
165
+ self.data.append(item) # Just append and sort
166
+ self.data.sort()
167
+
168
+ def __iadd__(self, other):
169
+ """Override to maintain sorted order."""
170
+ self.data.extend(other)
171
+ self.data.sort()
172
+ return self
173
+
174
+ @property
175
+ def data(self):
176
+ """Get the data list."""
177
+ return self._data
178
+
179
+ @data.setter
180
+ def data(self, value):
181
+ """Set the data list, always keeping it sorted."""
182
+ self._data = sorted(value) if value else []
183
+
144
184
  def from_content(
145
185
  self,
146
186
  markers: Union[str, List[str], "ElementCollection", Callable, None],
@@ -1842,6 +1882,370 @@ class Guides:
1842
1882
  self.horizontal.pop(index)
1843
1883
  return self
1844
1884
 
1885
+ # -------------------------------------------------------------------------
1886
+ # Region extraction properties
1887
+ # -------------------------------------------------------------------------
1888
+
1889
+ @property
1890
+ def columns(self):
1891
+ """Access columns by index like guides.columns[0]."""
1892
+ return _ColumnAccessor(self)
1893
+
1894
+ @property
1895
+ def rows(self):
1896
+ """Access rows by index like guides.rows[0]."""
1897
+ return _RowAccessor(self)
1898
+
1899
+ @property
1900
+ def cells(self):
1901
+ """Access cells by index like guides.cells[row][col] or guides.cells[row, col]."""
1902
+ return _CellAccessor(self)
1903
+
1904
+ # -------------------------------------------------------------------------
1905
+ # Region extraction methods (alternative API)
1906
+ # -------------------------------------------------------------------------
1907
+
1908
+ def column(self, index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
1909
+ """
1910
+ Get a column region from the guides.
1911
+
1912
+ Args:
1913
+ index: Column index (0-based)
1914
+ obj: Page or Region to create the column on (uses self.context if None)
1915
+
1916
+ Returns:
1917
+ Region representing the specified column
1918
+
1919
+ Raises:
1920
+ IndexError: If column index is out of range
1921
+ """
1922
+ target = obj or self.context
1923
+ if target is None:
1924
+ raise ValueError("No context available for region creation")
1925
+
1926
+ if not self.vertical or index < 0 or index >= len(self.vertical) - 1:
1927
+ raise IndexError(
1928
+ f"Column index {index} out of range (have {len(self.vertical)-1} columns)"
1929
+ )
1930
+
1931
+ # Get bounds from context
1932
+ bounds = self._get_context_bounds()
1933
+ if not bounds:
1934
+ raise ValueError("Could not determine bounds")
1935
+ _, y0, _, y1 = bounds
1936
+
1937
+ # Get column boundaries
1938
+ x0 = self.vertical[index]
1939
+ x1 = self.vertical[index + 1]
1940
+
1941
+ # Create region using absolute coordinates
1942
+ if hasattr(target, "region"):
1943
+ # Target has a region method (Page)
1944
+ return target.region(x0, y0, x1, y1)
1945
+ elif hasattr(target, "page"):
1946
+ # Target is a Region, use its parent page
1947
+ # The coordinates from guides are already absolute
1948
+ return target.page.region(x0, y0, x1, y1)
1949
+ else:
1950
+ raise TypeError(f"Cannot create region on {type(target)}")
1951
+
1952
+ def row(self, index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
1953
+ """
1954
+ Get a row region from the guides.
1955
+
1956
+ Args:
1957
+ index: Row index (0-based)
1958
+ obj: Page or Region to create the row on (uses self.context if None)
1959
+
1960
+ Returns:
1961
+ Region representing the specified row
1962
+
1963
+ Raises:
1964
+ IndexError: If row index is out of range
1965
+ """
1966
+ target = obj or self.context
1967
+ if target is None:
1968
+ raise ValueError("No context available for region creation")
1969
+
1970
+ if not self.horizontal or index < 0 or index >= len(self.horizontal) - 1:
1971
+ raise IndexError(f"Row index {index} out of range (have {len(self.horizontal)-1} rows)")
1972
+
1973
+ # Get bounds from context
1974
+ bounds = self._get_context_bounds()
1975
+ if not bounds:
1976
+ raise ValueError("Could not determine bounds")
1977
+ x0, _, x1, _ = bounds
1978
+
1979
+ # Get row boundaries
1980
+ y0 = self.horizontal[index]
1981
+ y1 = self.horizontal[index + 1]
1982
+
1983
+ # Create region using absolute coordinates
1984
+ if hasattr(target, "region"):
1985
+ # Target has a region method (Page)
1986
+ return target.region(x0, y0, x1, y1)
1987
+ elif hasattr(target, "page"):
1988
+ # Target is a Region, use its parent page
1989
+ # The coordinates from guides are already absolute
1990
+ return target.page.region(x0, y0, x1, y1)
1991
+ else:
1992
+ raise TypeError(f"Cannot create region on {type(target)}")
1993
+
1994
+ def cell(self, row: int, col: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
1995
+ """
1996
+ Get a cell region from the guides.
1997
+
1998
+ Args:
1999
+ row: Row index (0-based)
2000
+ col: Column index (0-based)
2001
+ obj: Page or Region to create the cell on (uses self.context if None)
2002
+
2003
+ Returns:
2004
+ Region representing the specified cell
2005
+
2006
+ Raises:
2007
+ IndexError: If row or column index is out of range
2008
+ """
2009
+ target = obj or self.context
2010
+ if target is None:
2011
+ raise ValueError("No context available for region creation")
2012
+
2013
+ if not self.vertical or col < 0 or col >= len(self.vertical) - 1:
2014
+ raise IndexError(
2015
+ f"Column index {col} out of range (have {len(self.vertical)-1} columns)"
2016
+ )
2017
+ if not self.horizontal or row < 0 or row >= len(self.horizontal) - 1:
2018
+ raise IndexError(f"Row index {row} out of range (have {len(self.horizontal)-1} rows)")
2019
+
2020
+ # Get cell boundaries
2021
+ x0 = self.vertical[col]
2022
+ x1 = self.vertical[col + 1]
2023
+ y0 = self.horizontal[row]
2024
+ y1 = self.horizontal[row + 1]
2025
+
2026
+ # Create region using absolute coordinates
2027
+ if hasattr(target, "region"):
2028
+ # Target has a region method (Page)
2029
+ return target.region(x0, y0, x1, y1)
2030
+ elif hasattr(target, "page"):
2031
+ # Target is a Region, use its parent page
2032
+ # The coordinates from guides are already absolute
2033
+ return target.page.region(x0, y0, x1, y1)
2034
+ else:
2035
+ raise TypeError(f"Cannot create region on {type(target)}")
2036
+
2037
+ def left_of(self, guide_index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
2038
+ """
2039
+ Get a region to the left of a vertical guide.
2040
+
2041
+ Args:
2042
+ guide_index: Vertical guide index
2043
+ obj: Page or Region to create the region on (uses self.context if None)
2044
+
2045
+ Returns:
2046
+ Region to the left of the specified guide
2047
+ """
2048
+ target = obj or self.context
2049
+ if target is None:
2050
+ raise ValueError("No context available for region creation")
2051
+
2052
+ if not self.vertical or guide_index < 0 or guide_index >= len(self.vertical):
2053
+ raise IndexError(f"Guide index {guide_index} out of range")
2054
+
2055
+ # Get bounds from context
2056
+ bounds = self._get_context_bounds()
2057
+ if not bounds:
2058
+ raise ValueError("Could not determine bounds")
2059
+ x0, y0, _, y1 = bounds
2060
+
2061
+ # Create region from left edge to guide
2062
+ x1 = self.vertical[guide_index]
2063
+
2064
+ if hasattr(target, "region"):
2065
+ return target.region(x0, y0, x1, y1)
2066
+ else:
2067
+ raise TypeError(f"Cannot create region on {type(target)}")
2068
+
2069
+ def right_of(self, guide_index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
2070
+ """
2071
+ Get a region to the right of a vertical guide.
2072
+
2073
+ Args:
2074
+ guide_index: Vertical guide index
2075
+ obj: Page or Region to create the region on (uses self.context if None)
2076
+
2077
+ Returns:
2078
+ Region to the right of the specified guide
2079
+ """
2080
+ target = obj or self.context
2081
+ if target is None:
2082
+ raise ValueError("No context available for region creation")
2083
+
2084
+ if not self.vertical or guide_index < 0 or guide_index >= len(self.vertical):
2085
+ raise IndexError(f"Guide index {guide_index} out of range")
2086
+
2087
+ # Get bounds from context
2088
+ bounds = self._get_context_bounds()
2089
+ if not bounds:
2090
+ raise ValueError("Could not determine bounds")
2091
+ _, y0, x1, y1 = bounds
2092
+
2093
+ # Create region from guide to right edge
2094
+ x0 = self.vertical[guide_index]
2095
+
2096
+ if hasattr(target, "region"):
2097
+ return target.region(x0, y0, x1, y1)
2098
+ else:
2099
+ raise TypeError(f"Cannot create region on {type(target)}")
2100
+
2101
+ def above(self, guide_index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
2102
+ """
2103
+ Get a region above a horizontal guide.
2104
+
2105
+ Args:
2106
+ guide_index: Horizontal guide index
2107
+ obj: Page or Region to create the region on (uses self.context if None)
2108
+
2109
+ Returns:
2110
+ Region above the specified guide
2111
+ """
2112
+ target = obj or self.context
2113
+ if target is None:
2114
+ raise ValueError("No context available for region creation")
2115
+
2116
+ if not self.horizontal or guide_index < 0 or guide_index >= len(self.horizontal):
2117
+ raise IndexError(f"Guide index {guide_index} out of range")
2118
+
2119
+ # Get bounds from context
2120
+ bounds = self._get_context_bounds()
2121
+ if not bounds:
2122
+ raise ValueError("Could not determine bounds")
2123
+ x0, y0, x1, _ = bounds
2124
+
2125
+ # Create region from top edge to guide
2126
+ y1 = self.horizontal[guide_index]
2127
+
2128
+ if hasattr(target, "region"):
2129
+ return target.region(x0, y0, x1, y1)
2130
+ else:
2131
+ raise TypeError(f"Cannot create region on {type(target)}")
2132
+
2133
+ def below(self, guide_index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
2134
+ """
2135
+ Get a region below a horizontal guide.
2136
+
2137
+ Args:
2138
+ guide_index: Horizontal guide index
2139
+ obj: Page or Region to create the region on (uses self.context if None)
2140
+
2141
+ Returns:
2142
+ Region below the specified guide
2143
+ """
2144
+ target = obj or self.context
2145
+ if target is None:
2146
+ raise ValueError("No context available for region creation")
2147
+
2148
+ if not self.horizontal or guide_index < 0 or guide_index >= len(self.horizontal):
2149
+ raise IndexError(f"Guide index {guide_index} out of range")
2150
+
2151
+ # Get bounds from context
2152
+ bounds = self._get_context_bounds()
2153
+ if not bounds:
2154
+ raise ValueError("Could not determine bounds")
2155
+ x0, _, x1, y1 = bounds
2156
+
2157
+ # Create region from guide to bottom edge
2158
+ y0 = self.horizontal[guide_index]
2159
+
2160
+ if hasattr(target, "region"):
2161
+ return target.region(x0, y0, x1, y1)
2162
+ else:
2163
+ raise TypeError(f"Cannot create region on {type(target)}")
2164
+
2165
+ def between_vertical(
2166
+ self, start_index: int, end_index: int, obj: Optional[Union["Page", "Region"]] = None
2167
+ ) -> "Region":
2168
+ """
2169
+ Get a region between two vertical guides.
2170
+
2171
+ Args:
2172
+ start_index: Starting vertical guide index
2173
+ end_index: Ending vertical guide index
2174
+ obj: Page or Region to create the region on (uses self.context if None)
2175
+
2176
+ Returns:
2177
+ Region between the specified guides
2178
+ """
2179
+ target = obj or self.context
2180
+ if target is None:
2181
+ raise ValueError("No context available for region creation")
2182
+
2183
+ if not self.vertical:
2184
+ raise ValueError("No vertical guides available")
2185
+ if start_index < 0 or start_index >= len(self.vertical):
2186
+ raise IndexError(f"Start index {start_index} out of range")
2187
+ if end_index < 0 or end_index >= len(self.vertical):
2188
+ raise IndexError(f"End index {end_index} out of range")
2189
+ if start_index >= end_index:
2190
+ raise ValueError("Start index must be less than end index")
2191
+
2192
+ # Get bounds from context
2193
+ bounds = self._get_context_bounds()
2194
+ if not bounds:
2195
+ raise ValueError("Could not determine bounds")
2196
+ _, y0, _, y1 = bounds
2197
+
2198
+ # Get horizontal boundaries
2199
+ x0 = self.vertical[start_index]
2200
+ x1 = self.vertical[end_index]
2201
+
2202
+ if hasattr(target, "region"):
2203
+ return target.region(x0, y0, x1, y1)
2204
+ else:
2205
+ raise TypeError(f"Cannot create region on {type(target)}")
2206
+
2207
+ def between_horizontal(
2208
+ self, start_index: int, end_index: int, obj: Optional[Union["Page", "Region"]] = None
2209
+ ) -> "Region":
2210
+ """
2211
+ Get a region between two horizontal guides.
2212
+
2213
+ Args:
2214
+ start_index: Starting horizontal guide index
2215
+ end_index: Ending horizontal guide index
2216
+ obj: Page or Region to create the region on (uses self.context if None)
2217
+
2218
+ Returns:
2219
+ Region between the specified guides
2220
+ """
2221
+ target = obj or self.context
2222
+ if target is None:
2223
+ raise ValueError("No context available for region creation")
2224
+
2225
+ if not self.horizontal:
2226
+ raise ValueError("No horizontal guides available")
2227
+ if start_index < 0 or start_index >= len(self.horizontal):
2228
+ raise IndexError(f"Start index {start_index} out of range")
2229
+ if end_index < 0 or end_index >= len(self.horizontal):
2230
+ raise IndexError(f"End index {end_index} out of range")
2231
+ if start_index >= end_index:
2232
+ raise ValueError("Start index must be less than end index")
2233
+
2234
+ # Get bounds from context
2235
+ bounds = self._get_context_bounds()
2236
+ if not bounds:
2237
+ raise ValueError("Could not determine bounds")
2238
+ x0, _, x1, _ = bounds
2239
+
2240
+ # Get vertical boundaries
2241
+ y0 = self.horizontal[start_index]
2242
+ y1 = self.horizontal[end_index]
2243
+
2244
+ if hasattr(target, "region"):
2245
+ return target.region(x0, y0, x1, y1)
2246
+ else:
2247
+ raise TypeError(f"Cannot create region on {type(target)}")
2248
+
1845
2249
  # -------------------------------------------------------------------------
1846
2250
  # Operations
1847
2251
  # -------------------------------------------------------------------------
@@ -3825,3 +4229,95 @@ class Guides:
3825
4229
  return "vertical"
3826
4230
  else:
3827
4231
  return "horizontal"
4232
+
4233
+
4234
+ # -------------------------------------------------------------------------
4235
+ # Accessor classes for property-based access
4236
+ # -------------------------------------------------------------------------
4237
+
4238
+
4239
+ class _ColumnAccessor:
4240
+ """Provides indexed access to columns via guides.columns[index]."""
4241
+
4242
+ def __init__(self, guides: "Guides"):
4243
+ self._guides = guides
4244
+
4245
+ def __len__(self):
4246
+ """Return number of columns (vertical guides - 1)."""
4247
+ return max(0, len(self._guides.vertical) - 1)
4248
+
4249
+ def __getitem__(self, index: int) -> "Region":
4250
+ """Get column at the specified index."""
4251
+ # Handle negative indexing
4252
+ if index < 0:
4253
+ index = len(self) + index
4254
+ return self._guides.column(index)
4255
+
4256
+
4257
+ class _RowAccessor:
4258
+ """Provides indexed access to rows via guides.rows[index]."""
4259
+
4260
+ def __init__(self, guides: "Guides"):
4261
+ self._guides = guides
4262
+
4263
+ def __len__(self):
4264
+ """Return number of rows (horizontal guides - 1)."""
4265
+ return max(0, len(self._guides.horizontal) - 1)
4266
+
4267
+ def __getitem__(self, index: int) -> "Region":
4268
+ """Get row at the specified index."""
4269
+ # Handle negative indexing
4270
+ if index < 0:
4271
+ index = len(self) + index
4272
+ return self._guides.row(index)
4273
+
4274
+
4275
+ class _CellAccessor:
4276
+ """Provides indexed access to cells via guides.cells[row][col] or guides.cells[row, col]."""
4277
+
4278
+ def __init__(self, guides: "Guides"):
4279
+ self._guides = guides
4280
+
4281
+ def __getitem__(self, key) -> Union["Region", "_CellRowAccessor"]:
4282
+ """
4283
+ Get cell(s) at the specified position.
4284
+
4285
+ Supports:
4286
+ - guides.cells[row, col] - tuple indexing
4287
+ - guides.cells[row][col] - nested indexing
4288
+ """
4289
+ if isinstance(key, tuple) and len(key) == 2:
4290
+ # Direct tuple access: guides.cells[row, col]
4291
+ row, col = key
4292
+ # Handle negative indexing for both row and col
4293
+ if row < 0:
4294
+ row = len(self._guides.rows) + row
4295
+ if col < 0:
4296
+ col = len(self._guides.columns) + col
4297
+ return self._guides.cell(row, col)
4298
+ elif isinstance(key, int):
4299
+ # First level of nested access: guides.cells[row]
4300
+ # Handle negative indexing for row
4301
+ if key < 0:
4302
+ key = len(self._guides.rows) + key
4303
+ # Return a row accessor that allows [col] indexing
4304
+ return _CellRowAccessor(self._guides, key)
4305
+ else:
4306
+ raise TypeError(
4307
+ f"Cell indices must be integers or tuple of two integers, got {type(key)}"
4308
+ )
4309
+
4310
+
4311
+ class _CellRowAccessor:
4312
+ """Provides column access for a specific row in nested cell indexing."""
4313
+
4314
+ def __init__(self, guides: "Guides", row: int):
4315
+ self._guides = guides
4316
+ self._row = row
4317
+
4318
+ def __getitem__(self, col: int) -> "Region":
4319
+ """Get cell at [row][col]."""
4320
+ # Handle negative indexing for column
4321
+ if col < 0:
4322
+ col = len(self._guides.columns) + col
4323
+ return self._guides.cell(self._row, col)
@@ -16,7 +16,7 @@ INSTALL_RECIPES: Dict[str, list[str]] = {
16
16
  "paddle": ["paddlepaddle>=3.0.0", "paddleocr>=3.0.1", "paddlex>=3.0.2", "pandas>=2.2.0"],
17
17
  "numpy-high": ["numpy>=2.0"],
18
18
  "numpy-low": ["numpy<1.27"],
19
- "surya": ["surya-ocr>=0.13.0"],
19
+ "surya": ["surya-ocr<0.15"],
20
20
  "yolo": ["doclayout_yolo", "huggingface_hub>=0.29.3"],
21
21
  "docling": ["docling"],
22
22
  # light helpers
@@ -1138,6 +1138,67 @@ class Region(
1138
1138
  )
1139
1139
  return clipped_region
1140
1140
 
1141
+ def region(
1142
+ self,
1143
+ left: float = None,
1144
+ top: float = None,
1145
+ right: float = None,
1146
+ bottom: float = None,
1147
+ width: Union[str, float, None] = None,
1148
+ height: Optional[float] = None,
1149
+ relative: bool = False,
1150
+ ) -> "Region":
1151
+ """
1152
+ Create a sub-region within this region using the same API as Page.region().
1153
+
1154
+ By default, coordinates are absolute (relative to the page), matching Page.region().
1155
+ Set relative=True to use coordinates relative to this region's top-left corner.
1156
+
1157
+ Args:
1158
+ left: Left x-coordinate (absolute by default, or relative to region if relative=True)
1159
+ top: Top y-coordinate (absolute by default, or relative to region if relative=True)
1160
+ right: Right x-coordinate (absolute by default, or relative to region if relative=True)
1161
+ bottom: Bottom y-coordinate (absolute by default, or relative to region if relative=True)
1162
+ width: Width definition (same as Page.region())
1163
+ height: Height of the region (same as Page.region())
1164
+ relative: If True, coordinates are relative to this region's top-left (0,0).
1165
+ If False (default), coordinates are absolute page coordinates.
1166
+
1167
+ Returns:
1168
+ Region object for the specified coordinates, clipped to this region's bounds
1169
+
1170
+ Examples:
1171
+ # Absolute coordinates (default) - same as page.region()
1172
+ sub = region.region(left=100, top=200, width=50, height=30)
1173
+
1174
+ # Relative to region's top-left
1175
+ sub = region.region(left=10, top=10, width=50, height=30, relative=True)
1176
+
1177
+ # Mix relative positioning with this region's bounds
1178
+ sub = region.region(left=region.x0 + 10, width=50, height=30)
1179
+ """
1180
+ # If relative coordinates requested, convert to absolute
1181
+ if relative:
1182
+ if left is not None:
1183
+ left = self.x0 + left
1184
+ if top is not None:
1185
+ top = self.top + top
1186
+ if right is not None:
1187
+ right = self.x0 + right
1188
+ if bottom is not None:
1189
+ bottom = self.top + bottom
1190
+
1191
+ # For numeric width/height with relative coords, we need to handle the calculation
1192
+ # in the context of absolute positioning
1193
+
1194
+ # Use the parent page's region method to create the region with all its logic
1195
+ new_region = self.page.region(
1196
+ left=left, top=top, right=right, bottom=bottom, width=width, height=height
1197
+ )
1198
+
1199
+ # Clip the new region to this region's bounds
1200
+ return new_region.clip(self)
1201
+
1141
1202
  def get_elements(
1142
1203
  self, selector: Optional[str] = None, apply_exclusions=True, **kwargs
1143
1204
  ) -> List["Element"]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.9
3
+ Version: 0.2.10
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT