natural-pdf 0.2.5__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.gitignore +1 -0
  2. {natural_pdf-0.2.5/natural_pdf.egg-info → natural_pdf-0.2.8}/PKG-INFO +1 -1
  3. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/guides.py +94 -42
  4. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/page.py +224 -62
  5. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/page_collection.py +261 -50
  6. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/page_groupby.py +20 -2
  7. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/pdf.py +17 -14
  8. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/render_spec.py +20 -5
  9. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/describe/base.py +1 -1
  10. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/describe/elements.py +1 -1
  11. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/elements/base.py +84 -8
  12. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/elements/element_collection.py +757 -20
  13. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/elements/region.py +181 -48
  14. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/flows/flow.py +3 -0
  15. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/selectors/parser.py +2 -2
  16. natural_pdf-0.2.8/natural_pdf/utils/color_utils.py +100 -0
  17. {natural_pdf-0.2.5 → natural_pdf-0.2.8/natural_pdf.egg-info}/PKG-INFO +1 -1
  18. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf.egg-info/SOURCES.txt +32 -1
  19. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf.egg-info/top_level.txt +1 -0
  20. natural_pdf-0.2.8/tests/test_color_hex_display.py +195 -0
  21. natural_pdf-0.2.8/tests/test_crop_enhancements.py +149 -0
  22. natural_pdf-0.2.8/tests/test_crop_region_highlights.py +119 -0
  23. natural_pdf-0.2.8/tests/test_dissolve.py +471 -0
  24. natural_pdf-0.2.8/tests/test_dissolve_cross_page_bug.py +155 -0
  25. natural_pdf-0.2.8/tests/test_dissolve_debug_issue.py +195 -0
  26. natural_pdf-0.2.8/tests/test_dissolve_real_world_issue.py +201 -0
  27. natural_pdf-0.2.8/tests/test_dissolve_single_elements.py +159 -0
  28. natural_pdf-0.2.8/tests/test_dissolve_vertical_offset_issue.py +139 -0
  29. natural_pdf-0.2.8/tests/test_element_addition.py +176 -0
  30. natural_pdf-0.2.8/tests/test_element_collection_show_cols.py +132 -0
  31. natural_pdf-0.2.8/tests/test_empty_pseudo_class.py +215 -0
  32. natural_pdf-0.2.8/tests/test_fix_get_sections_zero_height.py +122 -0
  33. natural_pdf-0.2.8/tests/test_get_sections_fix_comprehensive.py +186 -0
  34. natural_pdf-0.2.8/tests/test_get_sections_zero_height.py +179 -0
  35. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_guides_extract_table.py +1 -0
  36. natural_pdf-0.2.5/tests/test_guides_extract_table_from_pages.py → natural_pdf-0.2.8/tests/test_guides_extract_table_collections.py +80 -57
  37. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_guides_extract_table_exclusions.py +41 -40
  38. natural_pdf-0.2.8/tests/test_highlight_detection.py +40 -0
  39. natural_pdf-0.2.8/tests/test_highlight_detection_comprehensive.py +94 -0
  40. natural_pdf-0.2.8/tests/test_include_boundaries_comprehensive.py +124 -0
  41. natural_pdf-0.2.8/tests/test_include_boundaries_debug.py +67 -0
  42. natural_pdf-0.2.8/tests/test_include_boundaries_final.py +159 -0
  43. natural_pdf-0.2.8/tests/test_include_boundaries_final_verification.py +126 -0
  44. natural_pdf-0.2.8/tests/test_include_boundaries_fix.py +126 -0
  45. natural_pdf-0.2.8/tests/test_include_boundaries_mock.py +199 -0
  46. natural_pdf-0.2.8/tests/test_include_boundaries_simple.py +119 -0
  47. natural_pdf-0.2.8/tests/test_include_boundaries_types_pdf.py +113 -0
  48. natural_pdf-0.2.8/tests/test_include_boundaries_verification.py +134 -0
  49. natural_pdf-0.2.8/tests/test_include_boundaries_with_real_text.py +104 -0
  50. natural_pdf-0.2.8/tests/test_merge_connected.py +302 -0
  51. natural_pdf-0.2.8/tests/test_merge_connected_real_world.py +240 -0
  52. natural_pdf-0.2.8/tests/test_merge_method.py +187 -0
  53. natural_pdf-0.2.8/tests/test_sections_with_start_and_end.py +107 -0
  54. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_slice_cache_reuse.py +70 -52
  55. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_slice_exclusion_fix.py +37 -34
  56. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_slice_exclusion_issue.py +22 -16
  57. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_slice_exclusion_mock.py +56 -56
  58. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_sliced_collection_exclusions.py +50 -42
  59. natural_pdf-0.2.5/tests/test_highlight_detection.py +0 -11
  60. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.cursor/rules/analysis_framework.mdc +0 -0
  61. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.cursor/rules/coding-style.mdc +0 -0
  62. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
  63. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.cursor/rules/minimal-comments.mdc +0 -0
  64. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.cursor/rules/natural-pdf-overview.mdc +0 -0
  65. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.cursor/rules/user-friendly-library-code.mdc +0 -0
  66. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.github/workflows/ci.yml +0 -0
  67. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.github/workflows/docs.yml +0 -0
  68. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.github/workflows/nightly-tutorials.yml +0 -0
  69. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/.pre-commit-config.yaml +0 -0
  70. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/01-execute_notebooks.py +0 -0
  71. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/02-run_all_tutorials.sh +0 -0
  72. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/CLAUDE.md +0 -0
  73. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/LICENSE +0 -0
  74. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/MANIFEST.in +0 -0
  75. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/README.md +0 -0
  76. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/audit_packaging.py +0 -0
  77. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/check_run_md.sh +0 -0
  78. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/api/index.md +0 -0
  79. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/assets/favicon.png +0 -0
  80. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/assets/favicon.svg +0 -0
  81. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/assets/javascripts/custom.js +0 -0
  82. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/assets/logo.svg +0 -0
  83. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/assets/sample-screen.png +0 -0
  84. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/assets/social-preview.png +0 -0
  85. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/assets/social-preview.svg +0 -0
  86. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/assets/stylesheets/custom.css +0 -0
  87. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/categorizing-documents/index.md +0 -0
  88. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/data-extraction/index.md +0 -0
  89. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/describe/index.md +0 -0
  90. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/document-qa/index.md +0 -0
  91. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/element-selection/index.md +0 -0
  92. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/extracting-clean-text/index.md +0 -0
  93. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/finetuning/index.md +0 -0
  94. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/fix-messy-tables/index.md +0 -0
  95. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/fix-messy-tables/table_1.csv +0 -0
  96. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/fix-messy-tables/table_2.csv +0 -0
  97. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/fix-messy-tables/table_3.csv +0 -0
  98. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/index.md +0 -0
  99. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/installation/index.md +0 -0
  100. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/interactive-widget/index.md +0 -0
  101. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/layout-analysis/index.md +0 -0
  102. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/loops-and-groups/index.md +0 -0
  103. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/ocr/index.md +0 -0
  104. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/pdf-navigation/index.md +0 -0
  105. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
  106. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/process-forms-and-invoices/index.md +0 -0
  107. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/quick-reference/index.md +0 -0
  108. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/reflowing-pages/index.md +0 -0
  109. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/regions/index.md +0 -0
  110. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tables/index.md +0 -0
  111. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/text-analysis/index.md +0 -0
  112. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/01-loading-and-extraction.md +0 -0
  113. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/02-finding-elements.md +0 -0
  114. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/03-extracting-blocks.md +0 -0
  115. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/04-table-extraction.md +0 -0
  116. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/05-excluding-content.md +0 -0
  117. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/06-document-qa.md +0 -0
  118. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/07-layout-analysis.md +0 -0
  119. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/07-working-with-regions.md +0 -0
  120. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/08-spatial-navigation.md +0 -0
  121. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/09-section-extraction.md +0 -0
  122. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/10-form-field-extraction.md +0 -0
  123. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  124. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/12-ocr-integration.md +0 -0
  125. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/13-semantic-search.md +0 -0
  126. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/tutorials/14-categorizing-documents.md +0 -0
  127. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/visual-debugging/index.md +0 -0
  128. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/docs/visual-debugging/region.png +0 -0
  129. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/mkdocs.yml +0 -0
  130. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/__init__.py +0 -0
  131. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/__init__.py +0 -0
  132. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/__init__.py +0 -0
  133. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/base.py +0 -0
  134. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/docling.py +0 -0
  135. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/gemini.py +0 -0
  136. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  137. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  138. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  139. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/paddle.py +0 -0
  140. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
  141. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/surya.py +0 -0
  142. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
  143. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/tatr.py +0 -0
  144. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/layout/yolo.py +0 -0
  145. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/shape_detection_mixin.py +0 -0
  146. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/text_options.py +0 -0
  147. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/text_structure.py +0 -0
  148. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/analyzers/utils.py +0 -0
  149. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/classification/manager.py +0 -0
  150. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/classification/mixin.py +0 -0
  151. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/classification/results.py +0 -0
  152. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/cli.py +0 -0
  153. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/collections/mixins.py +0 -0
  154. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/__init__.py +0 -0
  155. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/element_manager.py +0 -0
  156. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/highlighting_service.py +0 -0
  157. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/core/pdf_collection.py +0 -0
  158. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/describe/__init__.py +0 -0
  159. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/describe/mixin.py +0 -0
  160. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/describe/summary.py +0 -0
  161. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/elements/__init__.py +0 -0
  162. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/elements/image.py +0 -0
  163. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/elements/line.py +0 -0
  164. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/elements/rect.py +0 -0
  165. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/elements/text.py +0 -0
  166. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/export/mixin.py +0 -0
  167. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/__init__.py +0 -0
  168. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/base.py +0 -0
  169. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/data/__init__.py +0 -0
  170. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/data/pdf.ttf +0 -0
  171. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/data/sRGB.icc +0 -0
  172. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/hocr.py +0 -0
  173. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/hocr_font.py +0 -0
  174. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/original_pdf.py +0 -0
  175. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/paddleocr.py +0 -0
  176. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/exporters/searchable_pdf.py +0 -0
  177. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/extraction/manager.py +0 -0
  178. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/extraction/mixin.py +0 -0
  179. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/extraction/result.py +0 -0
  180. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/flows/__init__.py +0 -0
  181. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/flows/collections.py +0 -0
  182. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/flows/element.py +0 -0
  183. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/flows/region.py +0 -0
  184. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/__init__.py +0 -0
  185. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/engine.py +0 -0
  186. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/engine_doctr.py +0 -0
  187. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/engine_easyocr.py +0 -0
  188. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/engine_paddle.py +0 -0
  189. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/engine_surya.py +0 -0
  190. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/ocr_factory.py +0 -0
  191. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/ocr_manager.py +0 -0
  192. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/ocr_options.py +0 -0
  193. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/ocr/utils.py +0 -0
  194. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/qa/__init__.py +0 -0
  195. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/qa/document_qa.py +0 -0
  196. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/qa/qa_result.py +0 -0
  197. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/search/__init__.py +0 -0
  198. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/search/lancedb_search_service.py +0 -0
  199. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/search/numpy_search_service.py +0 -0
  200. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/search/search_options.py +0 -0
  201. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/search/search_service_protocol.py +0 -0
  202. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/search/searchable_mixin.py +0 -0
  203. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/selectors/__init__.py +0 -0
  204. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/tables/__init__.py +0 -0
  205. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/tables/result.py +0 -0
  206. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/templates/__init__.py +0 -0
  207. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
  208. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/templates/spa/css/style.css +0 -0
  209. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/templates/spa/index.html +0 -0
  210. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/templates/spa/js/app.js +0 -0
  211. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/templates/spa/words.txt +0 -0
  212. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/text_mixin.py +0 -0
  213. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/__init__.py +0 -0
  214. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/bidi_mirror.py +0 -0
  215. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/debug.py +0 -0
  216. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/highlighting.py +0 -0
  217. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/identifiers.py +0 -0
  218. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/layout.py +0 -0
  219. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/locks.py +0 -0
  220. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/packaging.py +0 -0
  221. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/reading_order.py +0 -0
  222. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/text_extraction.py +0 -0
  223. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/utils/visualization.py +0 -0
  224. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/vision/__init__.py +0 -0
  225. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/vision/mixin.py +0 -0
  226. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/vision/results.py +0 -0
  227. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/vision/similarity.py +0 -0
  228. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/widgets/__init__.py +0 -0
  229. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf/widgets/viewer.py +0 -0
  230. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf.egg-info/dependency_links.txt +0 -0
  231. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf.egg-info/entry_points.txt +0 -0
  232. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/natural_pdf.egg-info/requires.txt +0 -0
  233. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/noxfile.py +0 -0
  234. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/memory_comparison.py +0 -0
  235. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/pdf_analyzer.py +0 -0
  236. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/performance_analysis.py +0 -0
  237. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/performance_results/image_heavy_snapshots.csv +0 -0
  238. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/performance_results/image_heavy_snapshots.json +0 -0
  239. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/performance_results/text_heavy_snapshots.csv +0 -0
  240. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/performance_results/text_heavy_snapshots.json +0 -0
  241. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/test_cleanup_methods.py +0 -0
  242. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/optimization/test_memory_fix.py +0 -0
  243. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/publish.sh +0 -0
  244. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/pyproject.toml +0 -0
  245. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/sample-screen.png +0 -0
  246. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/setup.cfg +0 -0
  247. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/conftest.py +0 -0
  248. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/exporters/test_paddleocr_exporter.py +0 -0
  249. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_annotate.py +0 -0
  250. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_arabic_performance.py +0 -0
  251. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_arabic_real_world.py +0 -0
  252. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_color_conversion.py +0 -0
  253. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_core/test_containment_geometry.py +0 -0
  254. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_core/test_elements.py +0 -0
  255. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_core/test_loading.py +0 -0
  256. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_core/test_spatial.py +0 -0
  257. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_core/test_text_extraction.py +0 -0
  258. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_core/test_text_layer.py +0 -0
  259. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_directional_defaults.py +0 -0
  260. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_document_qa.py +0 -0
  261. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_element_collection_slicing.py +0 -0
  262. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_element_show_crop_highlights.py +0 -0
  263. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_exclusions.py +0 -0
  264. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_expand.py +0 -0
  265. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_extraction_error.py +0 -0
  266. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_extraction_mixin_fix.py +0 -0
  267. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_extraction_text_and_vision.py +0 -0
  268. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_extraction_working.py +0 -0
  269. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_find_similar.py +0 -0
  270. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_first_last_selectors.py +0 -0
  271. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_flow_region_directional.py +0 -0
  272. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_groupby.py +0 -0
  273. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_guides.py +0 -0
  274. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_guides_apply_exclusions.py +0 -0
  275. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_guides_apply_exclusions_simple.py +0 -0
  276. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_guides_extract_table_real.py +0 -0
  277. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_guides_integration.py +0 -0
  278. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_highlight_protocol.py +0 -0
  279. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_highlight_protocol_simple.py +0 -0
  280. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_highlight_regions.py +0 -0
  281. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_loading_original.py +0 -0
  282. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_multi_page_table_discovery.py +0 -0
  283. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_optional_deps.py +0 -0
  284. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_page_exclusion_lists.py +0 -0
  285. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_pdf_add_exclusion_elementcollection.py +0 -0
  286. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_region_show_crop_highlights.py +0 -0
  287. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_region_viewer.py +0 -0
  288. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_sections_end_only.py +0 -0
  289. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_show_column_layout.py +0 -0
  290. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_show_edge_cases.py +0 -0
  291. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_show_exclusions.py +0 -0
  292. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_show_exclusions_feature.py +0 -0
  293. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_show_limit.py +0 -0
  294. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_skip_repeating_headers_multipage.py +0 -0
  295. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_strikethrough_detection.py +0 -0
  296. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_table_result_header_mismatch.py +0 -0
  297. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_table_result_keep_blank.py +0 -0
  298. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_tiny_text_tables.py +0 -0
  299. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_tiny_text_tables_table.py +0 -0
  300. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_tutorials.py +0 -0
  301. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_underline_detection.py +0 -0
  302. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tests/test_update_text.py +0 -0
  303. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/todo/bad_pdf_analysis.md +0 -0
  304. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/todo/evaluation.md +0 -0
  305. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/IMPROVEMENTS_SUMMARY.md +0 -0
  306. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/LLM_NaturalPDF_CheatSheet.md +0 -0
  307. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/LLM_NaturalPDF_Workflows.md +0 -0
  308. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/README.md +0 -0
  309. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/__init__.py +0 -0
  310. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/analyser.py +0 -0
  311. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/collate_summaries.py +0 -0
  312. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/compile_attempts_markdown.py +0 -0
  313. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/eval_suite.py +0 -0
  314. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/evaluate_quality.py +0 -0
  315. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/export_enrichment_csv.py +0 -0
  316. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/extraction_decision_tree.md +0 -0
  317. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/llm_enrich.py +0 -0
  318. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/llm_enrich_with_retry.py +0 -0
  319. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/reporter.py +0 -0
  320. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/tools/bad_pdf_eval/utils.py +0 -0
  321. {natural_pdf-0.2.5 → natural_pdf-0.2.8}/uv.lock +0 -0
@@ -1,3 +1,4 @@
1
+ temp
1
2
  Untitled*.ipynb
2
3
  importtime_output.txt
3
4
  .notebook_cache.json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.5
3
+ Version: 0.2.8
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -3462,7 +3462,15 @@ class Guides:
3462
3462
 
3463
3463
  def extract_table(
3464
3464
  self,
3465
- target: Optional[Union["Page", "Region"]] = None,
3465
+ target: Optional[
3466
+ Union[
3467
+ "Page",
3468
+ "Region",
3469
+ "PageCollection",
3470
+ "ElementCollection",
3471
+ List[Union["Page", "Region"]],
3472
+ ]
3473
+ ] = None,
3466
3474
  source: str = "guides_temp",
3467
3475
  cell_padding: float = 0.5,
3468
3476
  include_outer_boundaries: bool = False,
@@ -3477,6 +3485,8 @@ class Guides:
3477
3485
  apply_exclusions: bool = True,
3478
3486
  *,
3479
3487
  multi_page: Literal["auto", True, False] = "auto",
3488
+ header: Union[str, List[str], None] = "first",
3489
+ skip_repeating_headers: Optional[bool] = None,
3480
3490
  ) -> "TableResult":
3481
3491
  """
3482
3492
  Extract table data directly from guides without leaving temporary regions.
@@ -3487,8 +3497,11 @@ class Guides:
3487
3497
  3. Cleans up all temporary regions
3488
3498
  4. Returns the TableResult
3489
3499
 
3500
+ When passed a collection (PageCollection, ElementCollection, or list), this method
3501
+ will extract tables from each element and combine them into a single result.
3502
+
3490
3503
  Args:
3491
- target: Page or Region to create regions on (uses self.context if None)
3504
+ target: Page, Region, or collection of Pages/Regions to extract from (uses self.context if None)
3492
3505
  source: Source label for temporary regions (will be cleaned up)
3493
3506
  cell_padding: Internal padding for cell regions in points
3494
3507
  include_outer_boundaries: Whether to add boundaries at edges if missing
@@ -3502,6 +3515,13 @@ class Guides:
3502
3515
  content_filter: Content filtering function or patterns
3503
3516
  apply_exclusions: Whether to apply exclusion regions during text extraction (default: True)
3504
3517
  multi_page: Controls multi-region table creation for FlowRegions
3518
+ header: How to handle headers when extracting from collections:
3519
+ - "first": Use first row of first element as headers (default)
3520
+ - "all": Expect headers on each element, use from first element
3521
+ - None: No headers, use numeric indices
3522
+ - List[str]: Custom column names
3523
+ skip_repeating_headers: Whether to remove duplicate header rows when extracting from collections.
3524
+ Defaults to True when header is "first" or "all", False otherwise.
3505
3525
 
3506
3526
  Returns:
3507
3527
  TableResult: Extracted table data
@@ -3513,20 +3533,49 @@ class Guides:
3513
3533
  ```python
3514
3534
  from natural_pdf.analyzers import Guides
3515
3535
 
3516
- # Create guides from detected lines
3536
+ # Single page extraction
3517
3537
  guides = Guides.from_lines(page, source_label="detected")
3518
-
3519
- # Extract table directly - no temporary regions left behind
3520
3538
  table_data = guides.extract_table()
3521
-
3522
- # Convert to pandas DataFrame
3523
3539
  df = table_data.to_df()
3540
+
3541
+ # Multiple page extraction
3542
+ guides = Guides(pages[0])
3543
+ guides.vertical.from_content(['Column 1', 'Column 2'])
3544
+ table_result = guides.extract_table(pages, header=['Col1', 'Col2'])
3545
+ df = table_result.to_df()
3546
+
3547
+ # Region collection extraction
3548
+ regions = pdf.find_all('region[type=table]')
3549
+ guides = Guides(regions[0])
3550
+ guides.vertical.from_lines(n=3)
3551
+ table_result = guides.extract_table(regions)
3524
3552
  ```
3525
3553
  """
3526
- target_obj = target or self.context
3527
- if not target_obj:
3554
+ from natural_pdf.core.page_collection import PageCollection
3555
+ from natural_pdf.elements.element_collection import ElementCollection
3556
+
3557
+ target_obj = target if target is not None else self.context
3558
+ if target_obj is None:
3528
3559
  raise ValueError("No target object available. Provide target parameter or context.")
3529
3560
 
3561
+ # Check if target is a collection - if so, delegate to _extract_table_from_collection
3562
+ if isinstance(target_obj, (PageCollection, ElementCollection, list)):
3563
+ # For collections, pass through most parameters as-is
3564
+ return self._extract_table_from_collection(
3565
+ elements=target_obj,
3566
+ header=header,
3567
+ skip_repeating_headers=skip_repeating_headers,
3568
+ method=method,
3569
+ table_settings=table_settings,
3570
+ use_ocr=use_ocr,
3571
+ ocr_config=ocr_config,
3572
+ text_options=text_options,
3573
+ cell_extraction_func=cell_extraction_func,
3574
+ show_progress=show_progress,
3575
+ content_filter=content_filter,
3576
+ apply_exclusions=apply_exclusions,
3577
+ )
3578
+
3530
3579
  # Get the page for cleanup later
3531
3580
  if hasattr(target_obj, "x0") and hasattr(target_obj, "top"): # Region
3532
3581
  page = target_obj._page
@@ -3597,9 +3646,9 @@ class Guides:
3597
3646
  except Exception as cleanup_err:
3598
3647
  logger.warning(f"Failed to clean up temporary regions: {cleanup_err}")
3599
3648
 
3600
- def extract_table_from_pages(
3649
+ def _extract_table_from_collection(
3601
3650
  self,
3602
- pages: Union["PageCollection", List["Page"]],
3651
+ elements: Union["PageCollection", "ElementCollection", List[Union["Page", "Region"]]],
3603
3652
  header: Union[str, List[str], None] = "first",
3604
3653
  skip_repeating_headers: Optional[bool] = None,
3605
3654
  method: Optional[str] = None,
@@ -3613,17 +3662,17 @@ class Guides:
3613
3662
  apply_exclusions: bool = True,
3614
3663
  ) -> "TableResult":
3615
3664
  """
3616
- Extract tables from multiple pages using this guide pattern.
3665
+ Extract tables from multiple pages or regions using this guide pattern.
3617
3666
 
3618
- This method applies the guide to each page, extracts tables, and combines
3667
+ This method applies the guide to each element, extracts tables, and combines
3619
3668
  them into a single TableResult. Dynamic guides (using lambdas) are evaluated
3620
- for each page.
3669
+ for each element.
3621
3670
 
3622
3671
  Args:
3623
- pages: PageCollection or list of Pages to extract from
3672
+ elements: PageCollection, ElementCollection, or list of Pages/Regions to extract from
3624
3673
  header: How to handle headers:
3625
- - "first": Use first row of first page as headers (default)
3626
- - "all": Expect headers on each page, use from first page
3674
+ - "first": Use first row of first element as headers (default)
3675
+ - "all": Expect headers on each element, use from first element
3627
3676
  - None: No headers, use numeric indices
3628
3677
  - List[str]: Custom column names
3629
3678
  skip_repeating_headers: Whether to remove duplicate header rows.
@@ -3634,35 +3683,36 @@ class Guides:
3634
3683
  ocr_config: OCR configuration parameters
3635
3684
  text_options: Dictionary of options for the 'text' method
3636
3685
  cell_extraction_func: Optional callable for custom cell text extraction
3637
- show_progress: Show progress bar for multi-page extraction (default: True)
3686
+ show_progress: Show progress bar for multi-element extraction (default: True)
3638
3687
  content_filter: Content filtering function or patterns
3639
3688
  apply_exclusions: Whether to apply exclusion regions during extraction
3640
3689
 
3641
3690
  Returns:
3642
- TableResult: Combined table data from all pages
3691
+ TableResult: Combined table data from all elements
3643
3692
 
3644
3693
  Example:
3645
3694
  ```python
3646
3695
  # Create guide with static vertical, dynamic horizontal
3647
- guide = Guides(pages[0])
3696
+ guide = Guides(regions[0])
3648
3697
  guide.vertical.from_content(columns, outer="last")
3649
- guide.horizontal.from_content(lambda p: p.find_all('text:starts-with(NF-)'))
3698
+ guide.horizontal.from_content(lambda r: r.find_all('text:starts-with(NF-)'))
3650
3699
 
3651
- # Extract from all pages
3652
- table_result = guide.extract_table_from_pages(pages, header=columns)
3700
+ # Extract from all regions
3701
+ table_result = guide._extract_table_from_collection(regions, header=columns)
3653
3702
  df = table_result.to_df()
3654
3703
  ```
3655
3704
  """
3656
3705
  from natural_pdf.core.page_collection import PageCollection
3706
+ from natural_pdf.elements.element_collection import ElementCollection
3657
3707
  from natural_pdf.tables.result import TableResult
3658
3708
 
3659
- # Convert to list if it's a PageCollection
3660
- if isinstance(pages, PageCollection):
3661
- page_list = list(pages)
3709
+ # Convert to list if it's a collection
3710
+ if isinstance(elements, (PageCollection, ElementCollection)):
3711
+ element_list = list(elements)
3662
3712
  else:
3663
- page_list = pages
3713
+ element_list = elements
3664
3714
 
3665
- if not page_list:
3715
+ if not element_list:
3666
3716
  return TableResult([])
3667
3717
 
3668
3718
  # Determine header handling
@@ -3673,37 +3723,39 @@ class Guides:
3673
3723
  header_row = None
3674
3724
 
3675
3725
  # Configure progress bar
3676
- iterator = page_list
3677
- if show_progress and len(page_list) > 1:
3726
+ iterator = element_list
3727
+ if show_progress and len(element_list) > 1:
3678
3728
  try:
3679
3729
  from tqdm.auto import tqdm
3680
3730
 
3681
- iterator = tqdm(page_list, desc="Extracting tables from pages", unit="page")
3731
+ iterator = tqdm(
3732
+ element_list, desc="Extracting tables from elements", unit="element"
3733
+ )
3682
3734
  except ImportError:
3683
3735
  pass
3684
3736
 
3685
- for i, page in enumerate(iterator):
3686
- # Create a new Guides object for this page
3687
- page_guide = Guides(page)
3737
+ for i, element in enumerate(iterator):
3738
+ # Create a new Guides object for this element
3739
+ element_guide = Guides(element)
3688
3740
 
3689
3741
  # Copy vertical guides (usually static)
3690
3742
  if hasattr(self.vertical, "_callable") and self.vertical._callable is not None:
3691
3743
  # If vertical is dynamic (lambda), evaluate it
3692
- page_guide.vertical.from_content(self.vertical._callable(page))
3744
+ element_guide.vertical.from_content(self.vertical._callable(element))
3693
3745
  else:
3694
3746
  # Copy static vertical positions
3695
- page_guide.vertical.data = self.vertical.data.copy()
3747
+ element_guide.vertical.data = self.vertical.data.copy()
3696
3748
 
3697
3749
  # Handle horizontal guides
3698
3750
  if hasattr(self.horizontal, "_callable") and self.horizontal._callable is not None:
3699
3751
  # If horizontal is dynamic (lambda), evaluate it
3700
- page_guide.horizontal.from_content(self.horizontal._callable(page))
3752
+ element_guide.horizontal.from_content(self.horizontal._callable(element))
3701
3753
  else:
3702
3754
  # Copy static horizontal positions
3703
- page_guide.horizontal.data = self.horizontal.data.copy()
3755
+ element_guide.horizontal.data = self.horizontal.data.copy()
3704
3756
 
3705
- # Extract table from this page
3706
- table_result = page_guide.extract_table(
3757
+ # Extract table from this element
3758
+ table_result = element_guide.extract_table(
3707
3759
  method=method,
3708
3760
  table_settings=table_settings,
3709
3761
  use_ocr=use_ocr,
@@ -3719,7 +3771,7 @@ class Guides:
3719
3771
  rows = list(table_result)
3720
3772
 
3721
3773
  # Handle headers based on strategy
3722
- if i == 0: # First page
3774
+ if i == 0: # First element
3723
3775
  if header == "first" or header == "all":
3724
3776
  # Use first row as header
3725
3777
  if rows:
@@ -3728,7 +3780,7 @@ class Guides:
3728
3780
  elif isinstance(header, list):
3729
3781
  # Custom headers provided
3730
3782
  header_row = header
3731
- else: # Subsequent pages
3783
+ else: # Subsequent elements
3732
3784
  if header == "all" and skip_repeating_headers and rows:
3733
3785
  # Expect and remove header row
3734
3786
  if rows and header_row and rows[0] == header_row: