natural-pdf 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. {natural_pdf-0.2.2/natural_pdf.egg-info → natural_pdf-0.2.4}/PKG-INFO +1 -1
  2. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/quick-reference/index.md +15 -1
  3. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/visual-debugging/index.md +63 -1
  4. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/collections/mixins.py +16 -3
  5. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/highlighting_service.py +25 -1
  6. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/page.py +5 -3
  7. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/page_collection.py +14 -14
  8. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/pdf.py +4 -1
  9. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/pdf_collection.py +131 -4
  10. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/render_spec.py +46 -2
  11. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/elements/base.py +66 -28
  12. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/elements/element_collection.py +10 -10
  13. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/elements/region.py +29 -27
  14. natural_pdf-0.2.4/natural_pdf/vision/__init__.py +7 -0
  15. natural_pdf-0.2.4/natural_pdf/vision/mixin.py +209 -0
  16. natural_pdf-0.2.4/natural_pdf/vision/results.py +146 -0
  17. natural_pdf-0.2.4/natural_pdf/vision/similarity.py +321 -0
  18. {natural_pdf-0.2.2 → natural_pdf-0.2.4/natural_pdf.egg-info}/PKG-INFO +1 -1
  19. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf.egg-info/SOURCES.txt +9 -0
  20. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf.egg-info/top_level.txt +0 -1
  21. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_core/test_containment_geometry.py +6 -6
  22. natural_pdf-0.2.4/tests/test_element_show_crop_highlights.py +168 -0
  23. natural_pdf-0.2.4/tests/test_expand.py +150 -0
  24. natural_pdf-0.2.4/tests/test_find_similar.py +147 -0
  25. natural_pdf-0.2.4/tests/test_highlight_regions.py +161 -0
  26. natural_pdf-0.2.4/tests/test_region_show_crop_highlights.py +219 -0
  27. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.cursor/rules/analysis_framework.mdc +0 -0
  28. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.cursor/rules/coding-style.mdc +0 -0
  29. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
  30. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.cursor/rules/minimal-comments.mdc +0 -0
  31. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.cursor/rules/natural-pdf-overview.mdc +0 -0
  32. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.cursor/rules/user-friendly-library-code.mdc +0 -0
  33. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.github/workflows/ci.yml +0 -0
  34. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.github/workflows/docs.yml +0 -0
  35. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.github/workflows/nightly-tutorials.yml +0 -0
  36. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.gitignore +0 -0
  37. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/.pre-commit-config.yaml +0 -0
  38. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/01-execute_notebooks.py +0 -0
  39. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/02-run_all_tutorials.sh +0 -0
  40. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/CLAUDE.md +0 -0
  41. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/LICENSE +0 -0
  42. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/MANIFEST.in +0 -0
  43. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/README.md +0 -0
  44. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/audit_packaging.py +0 -0
  45. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/check_run_md.sh +0 -0
  46. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/api/index.md +0 -0
  47. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/assets/favicon.png +0 -0
  48. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/assets/favicon.svg +0 -0
  49. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/assets/javascripts/custom.js +0 -0
  50. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/assets/logo.svg +0 -0
  51. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/assets/sample-screen.png +0 -0
  52. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/assets/social-preview.png +0 -0
  53. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/assets/social-preview.svg +0 -0
  54. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/assets/stylesheets/custom.css +0 -0
  55. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/categorizing-documents/index.md +0 -0
  56. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/data-extraction/index.md +0 -0
  57. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/describe/index.md +0 -0
  58. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/document-qa/index.md +0 -0
  59. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/element-selection/index.md +0 -0
  60. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/extracting-clean-text/index.md +0 -0
  61. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/finetuning/index.md +0 -0
  62. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/fix-messy-tables/index.md +0 -0
  63. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/fix-messy-tables/table_1.csv +0 -0
  64. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/fix-messy-tables/table_2.csv +0 -0
  65. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/fix-messy-tables/table_3.csv +0 -0
  66. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/index.md +0 -0
  67. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/installation/index.md +0 -0
  68. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/interactive-widget/index.md +0 -0
  69. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/layout-analysis/index.md +0 -0
  70. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/loops-and-groups/index.md +0 -0
  71. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/ocr/index.md +0 -0
  72. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/pdf-navigation/index.md +0 -0
  73. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
  74. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/process-forms-and-invoices/index.md +0 -0
  75. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/reflowing-pages/index.md +0 -0
  76. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/regions/index.md +0 -0
  77. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tables/index.md +0 -0
  78. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/text-analysis/index.md +0 -0
  79. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/01-loading-and-extraction.md +0 -0
  80. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/02-finding-elements.md +0 -0
  81. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/03-extracting-blocks.md +0 -0
  82. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/04-table-extraction.md +0 -0
  83. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/05-excluding-content.md +0 -0
  84. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/06-document-qa.md +0 -0
  85. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/07-layout-analysis.md +0 -0
  86. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/07-working-with-regions.md +0 -0
  87. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/08-spatial-navigation.md +0 -0
  88. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/09-section-extraction.md +0 -0
  89. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/10-form-field-extraction.md +0 -0
  90. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  91. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/12-ocr-integration.md +0 -0
  92. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/13-semantic-search.md +0 -0
  93. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/tutorials/14-categorizing-documents.md +0 -0
  94. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/docs/visual-debugging/region.png +0 -0
  95. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/mkdocs.yml +0 -0
  96. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/__init__.py +0 -0
  97. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/__init__.py +0 -0
  98. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/guides.py +0 -0
  99. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/__init__.py +0 -0
  100. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/base.py +0 -0
  101. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/docling.py +0 -0
  102. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/gemini.py +0 -0
  103. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  104. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  105. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  106. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/paddle.py +0 -0
  107. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
  108. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/surya.py +0 -0
  109. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
  110. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/tatr.py +0 -0
  111. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/layout/yolo.py +0 -0
  112. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/shape_detection_mixin.py +0 -0
  113. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/text_options.py +0 -0
  114. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/text_structure.py +0 -0
  115. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/analyzers/utils.py +0 -0
  116. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/classification/manager.py +0 -0
  117. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/classification/mixin.py +0 -0
  118. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/classification/results.py +0 -0
  119. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/cli.py +0 -0
  120. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/__init__.py +0 -0
  121. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/element_manager.py +0 -0
  122. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/core/page_groupby.py +0 -0
  123. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/describe/__init__.py +0 -0
  124. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/describe/base.py +0 -0
  125. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/describe/elements.py +0 -0
  126. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/describe/mixin.py +0 -0
  127. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/describe/summary.py +0 -0
  128. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/elements/__init__.py +0 -0
  129. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/elements/image.py +0 -0
  130. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/elements/line.py +0 -0
  131. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/elements/rect.py +0 -0
  132. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/elements/text.py +0 -0
  133. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/export/mixin.py +0 -0
  134. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/__init__.py +0 -0
  135. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/base.py +0 -0
  136. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/data/__init__.py +0 -0
  137. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/data/pdf.ttf +0 -0
  138. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/data/sRGB.icc +0 -0
  139. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/hocr.py +0 -0
  140. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/hocr_font.py +0 -0
  141. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/original_pdf.py +0 -0
  142. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/paddleocr.py +0 -0
  143. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/exporters/searchable_pdf.py +0 -0
  144. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/extraction/manager.py +0 -0
  145. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/extraction/mixin.py +0 -0
  146. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/extraction/result.py +0 -0
  147. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/flows/__init__.py +0 -0
  148. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/flows/collections.py +0 -0
  149. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/flows/element.py +0 -0
  150. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/flows/flow.py +0 -0
  151. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/flows/region.py +0 -0
  152. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/__init__.py +0 -0
  153. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/engine.py +0 -0
  154. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/engine_doctr.py +0 -0
  155. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/engine_easyocr.py +0 -0
  156. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/engine_paddle.py +0 -0
  157. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/engine_surya.py +0 -0
  158. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/ocr_factory.py +0 -0
  159. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/ocr_manager.py +0 -0
  160. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/ocr_options.py +0 -0
  161. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/ocr/utils.py +0 -0
  162. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/qa/__init__.py +0 -0
  163. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/qa/document_qa.py +0 -0
  164. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/qa/qa_result.py +0 -0
  165. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/search/__init__.py +0 -0
  166. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/search/lancedb_search_service.py +0 -0
  167. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/search/numpy_search_service.py +0 -0
  168. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/search/search_options.py +0 -0
  169. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/search/search_service_protocol.py +0 -0
  170. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/search/searchable_mixin.py +0 -0
  171. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/selectors/__init__.py +0 -0
  172. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/selectors/parser.py +0 -0
  173. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/tables/__init__.py +0 -0
  174. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/tables/result.py +0 -0
  175. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/templates/__init__.py +0 -0
  176. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
  177. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/templates/spa/css/style.css +0 -0
  178. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/templates/spa/index.html +0 -0
  179. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/templates/spa/js/app.js +0 -0
  180. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/templates/spa/words.txt +0 -0
  181. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/text_mixin.py +0 -0
  182. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/__init__.py +0 -0
  183. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/bidi_mirror.py +0 -0
  184. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/debug.py +0 -0
  185. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/highlighting.py +0 -0
  186. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/identifiers.py +0 -0
  187. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/layout.py +0 -0
  188. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/locks.py +0 -0
  189. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/packaging.py +0 -0
  190. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/reading_order.py +0 -0
  191. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/text_extraction.py +0 -0
  192. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/utils/visualization.py +0 -0
  193. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/widgets/__init__.py +0 -0
  194. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf/widgets/viewer.py +0 -0
  195. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf.egg-info/dependency_links.txt +0 -0
  196. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf.egg-info/entry_points.txt +0 -0
  197. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/natural_pdf.egg-info/requires.txt +0 -0
  198. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/noxfile.py +0 -0
  199. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/memory_comparison.py +0 -0
  200. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/pdf_analyzer.py +0 -0
  201. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/performance_analysis.py +0 -0
  202. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/performance_results/image_heavy_snapshots.csv +0 -0
  203. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/performance_results/image_heavy_snapshots.json +0 -0
  204. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/performance_results/text_heavy_snapshots.csv +0 -0
  205. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/performance_results/text_heavy_snapshots.json +0 -0
  206. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/test_cleanup_methods.py +0 -0
  207. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/optimization/test_memory_fix.py +0 -0
  208. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/publish.sh +0 -0
  209. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/pyproject.toml +0 -0
  210. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/sample-screen.png +0 -0
  211. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/setup.cfg +0 -0
  212. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/test_install.sh +0 -0
  213. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/conftest.py +0 -0
  214. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/exporters/test_paddleocr_exporter.py +0 -0
  215. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_annotate.py +0 -0
  216. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_arabic_performance.py +0 -0
  217. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_arabic_real_world.py +0 -0
  218. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_color_conversion.py +0 -0
  219. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_core/test_elements.py +0 -0
  220. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_core/test_loading.py +0 -0
  221. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_core/test_spatial.py +0 -0
  222. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_core/test_text_extraction.py +0 -0
  223. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_core/test_text_layer.py +0 -0
  224. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_directional_defaults.py +0 -0
  225. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_document_qa.py +0 -0
  226. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_element_collection_slicing.py +0 -0
  227. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_exclusions.py +0 -0
  228. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_extraction_error.py +0 -0
  229. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_extraction_mixin_fix.py +0 -0
  230. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_extraction_text_and_vision.py +0 -0
  231. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_extraction_working.py +0 -0
  232. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_first_last_selectors.py +0 -0
  233. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_flow_region_directional.py +0 -0
  234. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_groupby.py +0 -0
  235. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_guides.py +0 -0
  236. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_guides_apply_exclusions.py +0 -0
  237. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_guides_apply_exclusions_simple.py +0 -0
  238. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_guides_extract_table.py +0 -0
  239. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_guides_extract_table_real.py +0 -0
  240. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_guides_integration.py +0 -0
  241. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_highlight_detection.py +0 -0
  242. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_highlight_protocol.py +0 -0
  243. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_highlight_protocol_simple.py +0 -0
  244. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_loading_original.py +0 -0
  245. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_multi_page_table_discovery.py +0 -0
  246. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_optional_deps.py +0 -0
  247. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_page_exclusion_lists.py +0 -0
  248. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_pdf_add_exclusion_elementcollection.py +0 -0
  249. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_region_viewer.py +0 -0
  250. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_sections_end_only.py +0 -0
  251. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_show_column_layout.py +0 -0
  252. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_show_edge_cases.py +0 -0
  253. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_show_exclusions.py +0 -0
  254. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_show_exclusions_feature.py +0 -0
  255. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_show_limit.py +0 -0
  256. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_skip_repeating_headers_multipage.py +0 -0
  257. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_strikethrough_detection.py +0 -0
  258. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_table_result_header_mismatch.py +0 -0
  259. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_table_result_keep_blank.py +0 -0
  260. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_tiny_text_tables.py +0 -0
  261. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_tiny_text_tables_table.py +0 -0
  262. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_tutorials.py +0 -0
  263. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_underline_detection.py +0 -0
  264. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tests/test_update_text.py +0 -0
  265. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/todo/bad_pdf_analysis.md +0 -0
  266. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/todo/evaluation.md +0 -0
  267. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/IMPROVEMENTS_SUMMARY.md +0 -0
  268. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/LLM_NaturalPDF_CheatSheet.md +0 -0
  269. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/LLM_NaturalPDF_Workflows.md +0 -0
  270. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/README.md +0 -0
  271. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/__init__.py +0 -0
  272. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/analyser.py +0 -0
  273. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/collate_summaries.py +0 -0
  274. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/compile_attempts_markdown.py +0 -0
  275. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/eval_suite.py +0 -0
  276. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/evaluate_quality.py +0 -0
  277. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/export_enrichment_csv.py +0 -0
  278. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/extraction_decision_tree.md +0 -0
  279. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/llm_enrich.py +0 -0
  280. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/llm_enrich_with_retry.py +0 -0
  281. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/reporter.py +0 -0
  282. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/tools/bad_pdf_eval/utils.py +0 -0
  283. {natural_pdf-0.2.2 → natural_pdf-0.2.4}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -156,11 +156,25 @@ elements.show(color="red") # Single collection
156
156
  elements.show(color="blue", label="Headers") # With label
157
157
  elements.show(group_by='type') # Color by type
158
158
 
159
- # Multiple collections together
159
+ # Quick highlighting (one-liner)
160
+ page.highlight(elements1, elements2, elements3) # Multiple elements
161
+ page.highlight( # With custom colors
162
+ (elements1, 'red'),
163
+ (elements2, 'blue'),
164
+ (elements3, 'green')
165
+ )
166
+
167
+ # Multiple collections with context manager
160
168
  with page.highlights() as h:
161
169
  h.add(elements1, color="red", label="Type 1")
162
170
  h.add(elements2, color="blue", label="Type 2")
163
171
  h.show()
172
+
173
+ # Auto-display in Jupyter/Colab
174
+ with page.highlights(show=True) as h:
175
+ h.add(elements1, label="Headers")
176
+ h.add(elements2, label="Content")
177
+ # Displays automatically when exiting context
164
178
  ```
165
179
 
166
180
  ### Viewing
@@ -83,6 +83,47 @@ with page.highlights() as h:
83
83
  h.show()
84
84
  ```
85
85
 
86
+ ### Jupyter/Colab Support
87
+
88
+ In Jupyter notebooks and Google Colab, you can use `show=True` to automatically display the highlights when exiting the context:
89
+
90
+ ```python
91
+ # Automatically displays the image in Jupyter/Colab
92
+ with page.highlights(show=True) as h:
93
+ h.add(summary_elements, label='Summary')
94
+ h.add(date_elements, label='Date')
95
+ h.add(line_elements, label='Lines')
96
+ # No need to call h.show() - displays automatically!
97
+ ```
98
+
99
+ ### Quick Highlighting with `.highlight()`
100
+
101
+ For simple highlighting tasks, use the `.highlight()` convenience method:
102
+
103
+ ```python
104
+ # Highlight multiple elements in one line
105
+ page.highlight(summary_elements, date_elements, line_elements)
106
+
107
+ # With custom colors
108
+ page.highlight(
109
+ (summary_elements, 'red'),
110
+ (date_elements, 'blue'),
111
+ (line_elements, 'green')
112
+ )
113
+
114
+ # With colors and labels
115
+ page.highlight(
116
+ (summary_elements, 'red', 'Summary Text'),
117
+ (date_elements, 'blue', 'Date Fields'),
118
+ (line_elements, 'green', 'Separator Lines')
119
+ )
120
+
121
+ # Pass additional parameters like width or resolution
122
+ page.highlight(summary_elements, date_elements, width=800, labels=True)
123
+ ```
124
+
125
+ This method is particularly useful in Jupyter/Colab environments where the image displays automatically as the cell output.
126
+
86
127
  ## Customizing Multiple Highlights
87
128
 
88
129
  Customize the appearance of multiple highlights using the context manager:
@@ -133,7 +174,7 @@ content = title.below(height=200)
133
174
  content.show()
134
175
  ```
135
176
 
136
- Or look at just the region by itself
177
+ Or look at just the region by itself:
137
178
 
138
179
  ```python
139
180
  # Find a title and create a region below it
@@ -144,6 +185,27 @@ content = title.below(height=200)
144
185
  content.show(crop=True)
145
186
  ```
146
187
 
188
+ ### Highlighting Multiple Regions
189
+
190
+ The `.highlight()` method works with regions too:
191
+
192
+ ```python
193
+ # Create multiple regions
194
+ left = page.region(left=0, right=page.width/3, top=0, bottom=page.height)
195
+ mid = page.region(left=page.width/3, right=page.width/3*2, top=0, bottom=page.height)
196
+ right = page.region(left=page.width/3*2, right=page.width, top=0, bottom=page.height)
197
+
198
+ # Highlight all three regions
199
+ page.highlight(left, mid, right)
200
+
201
+ # Or with custom colors
202
+ page.highlight(
203
+ (left, 'red', 'Left Column'),
204
+ (mid, 'green', 'Middle Column'),
205
+ (right, 'blue', 'Right Column')
206
+ )
207
+ ```
208
+
147
209
  ## Working with Text Styles
148
210
 
149
211
  Visualize text styles to understand the document structure:
@@ -29,9 +29,22 @@ class DirectionalCollectionMixin:
29
29
  """Find regions to the right of all elements in this collection."""
30
30
  return self.apply(lambda element: element.right(**kwargs))
31
31
 
32
- def expand(self, **kwargs) -> "ElementCollection":
33
- """Expand all elements in this collection."""
34
- return self.apply(lambda element: element.expand(**kwargs))
32
+ def expand(self, *args, **kwargs) -> "ElementCollection":
33
+ """Expand all elements in this collection.
34
+
35
+ Args:
36
+ *args: If a single positional argument is provided, expands all elements
37
+ by that amount in all directions.
38
+ **kwargs: Keyword arguments for directional expansion (left, right, top, bottom, etc.)
39
+
40
+ Examples:
41
+ # Expand all elements by 5 pixels in all directions
42
+ collection.expand(5)
43
+
44
+ # Expand with different amounts in each direction
45
+ collection.expand(left=10, right=5, top=3, bottom=7)
46
+ """
47
+ return self.apply(lambda element: element.expand(*args, **kwargs))
35
48
 
36
49
 
37
50
  class ApplyMixin:
@@ -335,6 +335,7 @@ class HighlightContext:
335
335
  self.show_on_exit = show_on_exit
336
336
  self.highlight_groups = []
337
337
  self._color_manager = ColorManager()
338
+ self._exit_image = None # Store image for Jupyter display
338
339
 
339
340
  def add(
340
341
  self,
@@ -421,6 +422,11 @@ class HighlightContext:
421
422
  )
422
423
  return None
423
424
 
425
+ @property
426
+ def image(self) -> Optional[Image.Image]:
427
+ """Get the last generated image (useful after context exit)."""
428
+ return self._exit_image
429
+
424
430
  def __enter__(self) -> "HighlightContext":
425
431
  """Enter the context."""
426
432
  return self
@@ -428,7 +434,25 @@ class HighlightContext:
428
434
  def __exit__(self, exc_type, exc_val, exc_tb):
429
435
  """Exit the context, optionally showing highlights."""
430
436
  if self.show_on_exit and not exc_type:
431
- self.show()
437
+ self._exit_image = self.show()
438
+
439
+ # Check if we're in a Jupyter/IPython environment
440
+ try:
441
+ # Try to get IPython instance
442
+ from IPython import get_ipython
443
+
444
+ ipython = get_ipython()
445
+ if ipython is not None:
446
+ # We're in IPython/Jupyter
447
+ from IPython.display import display
448
+
449
+ if self._exit_image is not None:
450
+ display(self._exit_image)
451
+ except (ImportError, NameError):
452
+ # Not in Jupyter or IPython not available - that's OK
453
+ pass
454
+
455
+ # __exit__ must return False to not suppress exceptions
432
456
  return False
433
457
 
434
458
 
@@ -78,6 +78,7 @@ from natural_pdf.utils.locks import pdf_render_lock # Import the lock
78
78
 
79
79
  # # Import new utils
80
80
  from natural_pdf.utils.text_extraction import filter_chars_spatially, generate_text_layout
81
+ from natural_pdf.vision.mixin import VisualSearchMixin
81
82
  from natural_pdf.widgets.viewer import _IPYWIDGETS_AVAILABLE, InteractiveViewerWidget
82
83
 
83
84
  # --- End Classification Imports --- #
@@ -101,6 +102,7 @@ class Page(
101
102
  ExtractionMixin,
102
103
  ShapeDetectionMixin,
103
104
  DescribeMixin,
105
+ VisualSearchMixin,
104
106
  Visualizable,
105
107
  ):
106
108
  """Enhanced Page wrapper built on top of pdfplumber.Page.
@@ -1976,7 +1978,7 @@ class Page(
1976
1978
  """Get all line elements on this page."""
1977
1979
  return self._element_mgr.lines
1978
1980
 
1979
- def highlight(
1981
+ def add_highlight(
1980
1982
  self,
1981
1983
  bbox: Optional[Tuple[float, float, float, float]] = None,
1982
1984
  color: Optional[Union[Tuple, str]] = None,
@@ -1987,7 +1989,7 @@ class Page(
1987
1989
  existing: str = "append",
1988
1990
  ) -> "Page":
1989
1991
  """
1990
- Highlight a bounding box or the entire page.
1992
+ Add a highlight to a bounding box or the entire page.
1991
1993
  Delegates to the central HighlightingService.
1992
1994
 
1993
1995
  Args:
@@ -2015,7 +2017,7 @@ class Page(
2015
2017
  )
2016
2018
  return self
2017
2019
 
2018
- def highlight_polygon(
2020
+ def add_highlight_polygon(
2019
2021
  self,
2020
2022
  polygon: List[Tuple[float, float]],
2021
2023
  color: Optional[Union[Tuple, str]] = None,
@@ -259,7 +259,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
259
259
  self,
260
260
  *,
261
261
  text: str,
262
- contains: str = "all",
262
+ overlap: str = "full",
263
263
  apply_exclusions: bool = True,
264
264
  regex: bool = False,
265
265
  case: bool = True,
@@ -271,7 +271,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
271
271
  self,
272
272
  selector: str,
273
273
  *,
274
- contains: str = "all",
274
+ overlap: str = "full",
275
275
  apply_exclusions: bool = True,
276
276
  regex: bool = False,
277
277
  case: bool = True,
@@ -283,7 +283,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
283
283
  selector: Optional[str] = None,
284
284
  *,
285
285
  text: Optional[str] = None,
286
- contains: str = "all",
286
+ overlap: str = "full",
287
287
  apply_exclusions: bool = True,
288
288
  regex: bool = False,
289
289
  case: bool = True,
@@ -297,9 +297,9 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
297
297
  Args:
298
298
  selector: CSS-like selector string.
299
299
  text: Text content to search for (equivalent to 'text:contains(...)').
300
- contains: How to determine if elements are inside: 'all' (fully inside),
301
- 'any' (any overlap), or 'center' (center point inside).
302
- (default: "all")
300
+ overlap: How to determine if elements overlap: 'full' (fully inside),
301
+ 'partial' (any overlap), or 'center' (center point inside).
302
+ (default: "full")
303
303
  apply_exclusions: Whether to exclude elements in exclusion regions (default: True).
304
304
  regex: Whether to use regex for text search (`selector` or `text`) (default: False).
305
305
  case: Whether to do case-sensitive text search (`selector` or `text`) (default: True).
@@ -313,7 +313,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
313
313
  element = page.find(
314
314
  selector=selector,
315
315
  text=text,
316
- contains=contains,
316
+ overlap=overlap,
317
317
  apply_exclusions=apply_exclusions,
318
318
  regex=regex,
319
319
  case=case,
@@ -328,7 +328,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
328
328
  self,
329
329
  *,
330
330
  text: str,
331
- contains: str = "all",
331
+ overlap: str = "full",
332
332
  apply_exclusions: bool = True,
333
333
  regex: bool = False,
334
334
  case: bool = True,
@@ -340,7 +340,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
340
340
  self,
341
341
  selector: str,
342
342
  *,
343
- contains: str = "all",
343
+ overlap: str = "full",
344
344
  apply_exclusions: bool = True,
345
345
  regex: bool = False,
346
346
  case: bool = True,
@@ -352,7 +352,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
352
352
  selector: Optional[str] = None,
353
353
  *,
354
354
  text: Optional[str] = None,
355
- contains: str = "all",
355
+ overlap: str = "full",
356
356
  apply_exclusions: bool = True,
357
357
  regex: bool = False,
358
358
  case: bool = True,
@@ -366,9 +366,9 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
366
366
  Args:
367
367
  selector: CSS-like selector string.
368
368
  text: Text content to search for (equivalent to 'text:contains(...)').
369
- contains: How to determine if elements are inside: 'all' (fully inside),
370
- 'any' (any overlap), or 'center' (center point inside).
371
- (default: "all")
369
+ overlap: How to determine if elements overlap: 'full' (fully inside),
370
+ 'partial' (any overlap), or 'center' (center point inside).
371
+ (default: "full")
372
372
  apply_exclusions: Whether to exclude elements in exclusion regions (default: True).
373
373
  regex: Whether to use regex for text search (`selector` or `text`) (default: False).
374
374
  case: Whether to do case-sensitive text search (`selector` or `text`) (default: True).
@@ -383,7 +383,7 @@ class PageCollection(TextMixin, Generic[P], ApplyMixin, ShapeDetectionMixin, Vis
383
383
  elements = page.find_all(
384
384
  selector=selector,
385
385
  text=text,
386
- contains=contains,
386
+ overlap=overlap,
387
387
  apply_exclusions=apply_exclusions,
388
388
  regex=regex,
389
389
  case=case,
@@ -42,6 +42,7 @@ from natural_pdf.ocr import OCRManager, OCROptions
42
42
  from natural_pdf.selectors.parser import parse_selector
43
43
  from natural_pdf.text_mixin import TextMixin
44
44
  from natural_pdf.utils.locks import pdf_render_lock
45
+ from natural_pdf.vision.mixin import VisualSearchMixin
45
46
 
46
47
  if TYPE_CHECKING:
47
48
  from natural_pdf.elements.element_collection import ElementCollection
@@ -252,7 +253,9 @@ class _LazyPageList(Sequence):
252
253
  # --- End Lazy Page List Helper --- #
253
254
 
254
255
 
255
- class PDF(TextMixin, ExtractionMixin, ExportMixin, ClassificationMixin, Visualizable):
256
+ class PDF(
257
+ TextMixin, ExtractionMixin, ExportMixin, ClassificationMixin, VisualSearchMixin, Visualizable
258
+ ):
256
259
  """Enhanced PDF wrapper built on top of pdfplumber.
257
260
 
258
261
  This class provides a fluent interface for working with PDF documents,
@@ -40,6 +40,7 @@ logger = logging.getLogger(__name__)
40
40
  from natural_pdf.core.pdf import PDF
41
41
  from natural_pdf.elements.region import Region
42
42
  from natural_pdf.export.mixin import ExportMixin
43
+ from natural_pdf.vision.mixin import VisualSearchMixin
43
44
 
44
45
  # --- Search Imports ---
45
46
  try:
@@ -69,8 +70,8 @@ from natural_pdf.search.searchable_mixin import SearchableMixin # Import the ne
69
70
 
70
71
 
71
72
  class PDFCollection(
72
- SearchableMixin, ApplyMixin, ExportMixin, ShapeDetectionMixin
73
- ): # Add ExportMixin and ShapeDetectionMixin
73
+ SearchableMixin, ApplyMixin, ExportMixin, ShapeDetectionMixin, VisualSearchMixin
74
+ ):
74
75
  def __init__(
75
76
  self,
76
77
  source: Union[str, Iterable[Union[str, "PDF"]]],
@@ -258,8 +259,6 @@ class PDFCollection(
258
259
  return iter(self._pdfs)
259
260
 
260
261
  def __repr__(self) -> str:
261
- # Removed search status
262
- return f"<PDFCollection(count={len(self._pdfs)})>"
263
262
  return f"<PDFCollection(count={len(self._pdfs)})>"
264
263
 
265
264
  @property
@@ -267,6 +266,134 @@ class PDFCollection(
267
266
  """Returns the list of PDF objects held by the collection."""
268
267
  return self._pdfs
269
268
 
269
+ def show(self, limit: Optional[int] = 30, per_pdf_limit: Optional[int] = 10, **kwargs):
270
+ """
271
+ Display all PDFs in the collection with labels.
272
+
273
+ Each PDF is shown with its pages in a grid layout (6 columns by default),
274
+ and all PDFs are stacked vertically with labels.
275
+
276
+ Args:
277
+ limit: Maximum total pages to show across all PDFs (default: 30)
278
+ per_pdf_limit: Maximum pages to show per PDF (default: 10)
279
+ **kwargs: Additional arguments passed to each PDF's show() method
280
+ (e.g., columns, exclusions, resolution, etc.)
281
+
282
+ Returns:
283
+ Displayed image in Jupyter or None
284
+ """
285
+ if not self._pdfs:
286
+ print("Empty collection")
287
+ return None
288
+
289
+ # Import here to avoid circular imports
290
+ import numpy as np
291
+ from PIL import Image, ImageDraw, ImageFont
292
+
293
+ # Calculate pages per PDF if total limit is set
294
+ if limit and not per_pdf_limit:
295
+ per_pdf_limit = max(1, limit // len(self._pdfs))
296
+
297
+ # Collect images from each PDF
298
+ all_images = []
299
+ total_pages_shown = 0
300
+
301
+ for pdf in self._pdfs:
302
+ if limit and total_pages_shown >= limit:
303
+ break
304
+
305
+ # Calculate limit for this PDF
306
+ pdf_limit = per_pdf_limit
307
+ if limit:
308
+ remaining = limit - total_pages_shown
309
+ pdf_limit = min(per_pdf_limit or remaining, remaining)
310
+
311
+ # Get PDF identifier
312
+ pdf_name = getattr(pdf, "filename", None) or getattr(pdf, "path", "Unknown")
313
+ if isinstance(pdf_name, Path):
314
+ pdf_name = pdf_name.name
315
+ elif "/" in str(pdf_name):
316
+ pdf_name = str(pdf_name).split("/")[-1]
317
+
318
+ # Render this PDF
319
+ try:
320
+ # Get render specs from the PDF
321
+ render_specs = pdf._get_render_specs(mode="show", max_pages=pdf_limit, **kwargs)
322
+
323
+ if not render_specs:
324
+ continue
325
+
326
+ # Get the highlighter and render without displaying
327
+ highlighter = pdf._get_highlighter()
328
+ pdf_image = highlighter.unified_render(
329
+ specs=render_specs,
330
+ layout="grid" if len(render_specs) > 1 else "single",
331
+ columns=6,
332
+ **kwargs,
333
+ )
334
+
335
+ if pdf_image:
336
+ # Add label above the PDF image
337
+ label_height = 40
338
+ label_bg_color = (240, 240, 240)
339
+ label_text_color = (0, 0, 0)
340
+
341
+ # Create new image with space for label
342
+ width, height = pdf_image.size
343
+ labeled_image = Image.new("RGB", (width, height + label_height), "white")
344
+
345
+ # Draw label background
346
+ draw = ImageDraw.Draw(labeled_image)
347
+ draw.rectangle([0, 0, width, label_height], fill=label_bg_color)
348
+
349
+ # Draw label text
350
+ try:
351
+ # Try to use a nice font if available
352
+ font = ImageFont.truetype("Arial", 20)
353
+ except:
354
+ # Fallback to default font
355
+ font = ImageFont.load_default()
356
+
357
+ label_text = f"{pdf_name} ({len(pdf.pages)} pages)"
358
+ draw.text((10, 10), label_text, fill=label_text_color, font=font)
359
+
360
+ # Paste PDF image below label
361
+ labeled_image.paste(pdf_image, (0, label_height))
362
+
363
+ all_images.append(labeled_image)
364
+ total_pages_shown += min(pdf_limit, len(pdf.pages))
365
+
366
+ except Exception as e:
367
+ logger.warning(f"Failed to render PDF {pdf_name}: {e}")
368
+ continue
369
+
370
+ if not all_images:
371
+ print("No PDFs could be rendered")
372
+ return None
373
+
374
+ # Combine all images vertically
375
+ if len(all_images) == 1:
376
+ combined = all_images[0]
377
+ else:
378
+ # Add spacing between PDFs
379
+ spacing = 20
380
+ total_height = sum(img.height for img in all_images) + spacing * (len(all_images) - 1)
381
+ max_width = max(img.width for img in all_images)
382
+
383
+ combined = Image.new("RGB", (max_width, total_height), "white")
384
+
385
+ y_offset = 0
386
+ for i, img in enumerate(all_images):
387
+ # Center images if they're narrower than max width
388
+ x_offset = (max_width - img.width) // 2
389
+ combined.paste(img, (x_offset, y_offset))
390
+ y_offset += img.height
391
+ if i < len(all_images) - 1:
392
+ y_offset += spacing
393
+
394
+ # Return the combined image (Jupyter will display it automatically)
395
+ return combined
396
+
270
397
  @overload
271
398
  def find_all(
272
399
  self,
@@ -92,6 +92,50 @@ class Visualizable:
92
92
  _get_render_specs() to gain full image generation capabilities.
93
93
  """
94
94
 
95
+ def highlight(self, *elements, **kwargs):
96
+ """
97
+ Convenience method for highlighting elements in Jupyter/Colab.
98
+
99
+ This method creates a highlight context, adds the elements, and returns
100
+ the resulting image. It's designed for simple one-liner usage in notebooks.
101
+
102
+ Args:
103
+ *elements: Elements or element collections to highlight
104
+ **kwargs: Additional parameters passed to show()
105
+
106
+ Returns:
107
+ PIL Image with highlights
108
+
109
+ Example:
110
+ # Simple one-liner highlighting
111
+ page.highlight(left, mid, right)
112
+
113
+ # With custom colors
114
+ page.highlight(
115
+ (tables, 'blue'),
116
+ (headers, 'red'),
117
+ (footers, 'green')
118
+ )
119
+ """
120
+ from natural_pdf.core.highlighting_service import HighlightContext
121
+
122
+ # Create context and add elements
123
+ ctx = HighlightContext(self, show_on_exit=False)
124
+
125
+ for element in elements:
126
+ if isinstance(element, tuple) and len(element) == 2:
127
+ # Element with color: (element, color)
128
+ ctx.add(element[0], color=element[1])
129
+ elif isinstance(element, tuple) and len(element) == 3:
130
+ # Element with color and label: (element, color, label)
131
+ ctx.add(element[0], color=element[1], label=element[2])
132
+ else:
133
+ # Just element
134
+ ctx.add(element)
135
+
136
+ # Return the image directly
137
+ return ctx.show(**kwargs)
138
+
95
139
  def _get_render_specs(
96
140
  self, mode: Literal["show", "render"] = "show", **kwargs
97
141
  ) -> List[RenderSpec]:
@@ -142,7 +186,7 @@ class Visualizable:
142
186
  color: Optional[Union[str, Tuple[int, int, int]]] = None,
143
187
  labels: bool = True,
144
188
  label_format: Optional[str] = None,
145
- highlights: Optional[List[Dict[str, Any]]] = None,
189
+ highlights: Optional[Union[List[Dict[str, Any]], bool]] = None,
146
190
  legend_position: str = "right",
147
191
  annotate: Optional[Union[str, List[str]]] = None,
148
192
  # Layout options for multi-page/region
@@ -167,7 +211,7 @@ class Visualizable:
167
211
  color: Default highlight color
168
212
  labels: Whether to show labels for highlights
169
213
  label_format: Format string for labels (e.g., "Element {index}")
170
- highlights: Additional highlight groups to show
214
+ highlights: Additional highlight groups to show, or False to disable all highlights
171
215
  legend_position: Position of legend/colorbar ('right', 'left', 'top', 'bottom')
172
216
  annotate: Attribute name(s) to display on highlights (string or list)
173
217
  layout: How to arrange multiple pages/regions (defaults to 'grid' for multi-page, 'single' for single page)