natural-pdf 0.1.16__tar.gz → 0.1.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. {natural_pdf-0.1.16/natural_pdf.egg-info → natural_pdf-0.1.17}/PKG-INFO +1 -1
  2. natural_pdf-0.1.17/docs/describe/index.ipynb +438 -0
  3. natural_pdf-0.1.17/docs/describe/index.md +42 -0
  4. natural_pdf-0.1.17/docs/tutorials/01-loading-and-extraction.ipynb +328 -0
  5. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/02-finding-elements.ipynb +42 -42
  6. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/03-extracting-blocks.ipynb +17 -17
  7. natural_pdf-0.1.17/docs/tutorials/04-table-extraction.ipynb +579 -0
  8. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/05-excluding-content.ipynb +30 -30
  9. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/06-document-qa.ipynb +28 -28
  10. natural_pdf-0.1.17/docs/tutorials/07-layout-analysis.ipynb +630 -0
  11. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/07-working-with-regions.ipynb +58 -58
  12. natural_pdf-0.1.17/docs/tutorials/08-spatial-navigation.ipynb +520 -0
  13. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/09-section-extraction.ipynb +109 -109
  14. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/10-form-field-extraction.ipynb +50 -50
  15. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/11-enhanced-table-processing.ipynb +6 -6
  16. natural_pdf-0.1.17/docs/tutorials/12-ocr-integration.ipynb +4353 -0
  17. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/12-ocr-integration.md +12 -0
  18. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/13-semantic-search.ipynb +160 -160
  19. natural_pdf-0.1.17/docs/tutorials/14-categorizing-documents.ipynb +2142 -0
  20. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/mkdocs.yml +1 -0
  21. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/core/page.py +2 -1
  22. natural_pdf-0.1.17/natural_pdf/describe/__init__.py +21 -0
  23. natural_pdf-0.1.17/natural_pdf/describe/base.py +457 -0
  24. natural_pdf-0.1.17/natural_pdf/describe/elements.py +411 -0
  25. natural_pdf-0.1.17/natural_pdf/describe/mixin.py +84 -0
  26. natural_pdf-0.1.17/natural_pdf/describe/summary.py +186 -0
  27. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/elements/base.py +2 -1
  28. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/elements/collections.py +11 -1
  29. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/elements/region.py +4 -1
  30. {natural_pdf-0.1.16 → natural_pdf-0.1.17/natural_pdf.egg-info}/PKG-INFO +1 -1
  31. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf.egg-info/SOURCES.txt +7 -0
  32. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pyproject.toml +1 -1
  33. natural_pdf-0.1.16/docs/tutorials/01-loading-and-extraction.ipynb +0 -328
  34. natural_pdf-0.1.16/docs/tutorials/04-table-extraction.ipynb +0 -579
  35. natural_pdf-0.1.16/docs/tutorials/07-layout-analysis.ipynb +0 -630
  36. natural_pdf-0.1.16/docs/tutorials/08-spatial-navigation.ipynb +0 -520
  37. natural_pdf-0.1.16/docs/tutorials/12-ocr-integration.ipynb +0 -4129
  38. natural_pdf-0.1.16/docs/tutorials/14-categorizing-documents.ipynb +0 -2142
  39. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.cursor/rules/analysis_framework.mdc +0 -0
  40. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.cursor/rules/coding-style.mdc +0 -0
  41. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
  42. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.cursor/rules/minimal-comments.mdc +0 -0
  43. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.cursor/rules/natural-pdf-overview.mdc +0 -0
  44. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.cursor/rules/user-friendly-library-code.mdc +0 -0
  45. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.github/workflows/docs.yml +0 -0
  46. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.gitignore +0 -0
  47. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/.pre-commit-config.yaml +0 -0
  48. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/01-execute_notebooks.py +0 -0
  49. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/02-run_all_tutorials.sh +0 -0
  50. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/CLAUDE.md +0 -0
  51. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/LICENSE +0 -0
  52. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/MANIFEST.in +0 -0
  53. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/README.md +0 -0
  54. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/audit_packaging.py +0 -0
  55. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/check_run_md.sh +0 -0
  56. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/api/index.md +0 -0
  57. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/assets/favicon.png +0 -0
  58. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/assets/favicon.svg +0 -0
  59. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/assets/javascripts/custom.js +0 -0
  60. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/assets/logo.svg +0 -0
  61. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/assets/sample-screen.png +0 -0
  62. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/assets/social-preview.png +0 -0
  63. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/assets/social-preview.svg +0 -0
  64. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/assets/stylesheets/custom.css +0 -0
  65. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/categorizing-documents/index.md +0 -0
  66. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/data-extraction/index.md +0 -0
  67. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/document-qa/index.ipynb +0 -0
  68. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/document-qa/index.md +0 -0
  69. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/element-selection/index.ipynb +0 -0
  70. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/element-selection/index.md +0 -0
  71. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/finetuning/index.md +0 -0
  72. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/index.md +0 -0
  73. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/installation/index.md +0 -0
  74. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/interactive-widget/index.ipynb +0 -0
  75. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/interactive-widget/index.md +0 -0
  76. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/layout-analysis/index.ipynb +0 -0
  77. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/layout-analysis/index.md +0 -0
  78. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/loops-and-groups/index.ipynb +0 -0
  79. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/loops-and-groups/index.md +0 -0
  80. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/ocr/index.md +0 -0
  81. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/pdf-navigation/index.ipynb +0 -0
  82. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/pdf-navigation/index.md +0 -0
  83. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/reflowing-pages/index.ipynb +0 -0
  84. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/reflowing-pages/index.md +0 -0
  85. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/regions/index.ipynb +0 -0
  86. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/regions/index.md +0 -0
  87. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tables/index.ipynb +0 -0
  88. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tables/index.md +0 -0
  89. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/text-analysis/index.ipynb +0 -0
  90. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/text-analysis/index.md +0 -0
  91. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/text-extraction/index.ipynb +0 -0
  92. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/text-extraction/index.md +0 -0
  93. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/01-loading-and-extraction.md +0 -0
  94. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/02-finding-elements.md +0 -0
  95. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/03-extracting-blocks.md +0 -0
  96. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/04-table-extraction.md +0 -0
  97. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/05-excluding-content.md +0 -0
  98. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/06-document-qa.md +0 -0
  99. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/07-layout-analysis.md +0 -0
  100. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/07-working-with-regions.md +0 -0
  101. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/08-spatial-navigation.md +0 -0
  102. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/09-section-extraction.md +0 -0
  103. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/10-form-field-extraction.md +0 -0
  104. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  105. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/13-semantic-search.md +0 -0
  106. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/tutorials/14-categorizing-documents.md +0 -0
  107. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/visual-debugging/index.ipynb +0 -0
  108. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/visual-debugging/index.md +0 -0
  109. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/docs/visual-debugging/region.png +0 -0
  110. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/__init__.py +0 -0
  111. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/__init__.py +0 -0
  112. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/__init__.py +0 -0
  113. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/base.py +0 -0
  114. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/docling.py +0 -0
  115. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/gemini.py +0 -0
  116. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  117. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  118. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  119. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/paddle.py +0 -0
  120. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
  121. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/surya.py +0 -0
  122. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
  123. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/tatr.py +0 -0
  124. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/layout/yolo.py +0 -0
  125. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/shape_detection_mixin.py +0 -0
  126. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/text_options.py +0 -0
  127. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/text_structure.py +0 -0
  128. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/analyzers/utils.py +0 -0
  129. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/classification/manager.py +0 -0
  130. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/classification/mixin.py +0 -0
  131. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/classification/results.py +0 -0
  132. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/collections/mixins.py +0 -0
  133. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/collections/pdf_collection.py +0 -0
  134. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/core/__init__.py +0 -0
  135. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/core/element_manager.py +0 -0
  136. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/core/highlighting_service.py +0 -0
  137. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/core/pdf.py +0 -0
  138. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/elements/__init__.py +0 -0
  139. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/elements/line.py +0 -0
  140. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/elements/rect.py +0 -0
  141. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/elements/text.py +0 -0
  142. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/export/mixin.py +0 -0
  143. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/__init__.py +0 -0
  144. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/base.py +0 -0
  145. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/data/__init__.py +0 -0
  146. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/data/pdf.ttf +0 -0
  147. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/data/sRGB.icc +0 -0
  148. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/hocr.py +0 -0
  149. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/hocr_font.py +0 -0
  150. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/original_pdf.py +0 -0
  151. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/paddleocr.py +0 -0
  152. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/exporters/searchable_pdf.py +0 -0
  153. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/extraction/manager.py +0 -0
  154. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/extraction/mixin.py +0 -0
  155. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/extraction/result.py +0 -0
  156. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/flows/__init__.py +0 -0
  157. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/flows/collections.py +0 -0
  158. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/flows/element.py +0 -0
  159. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/flows/flow.py +0 -0
  160. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/flows/region.py +0 -0
  161. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/__init__.py +0 -0
  162. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/engine.py +0 -0
  163. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/engine_doctr.py +0 -0
  164. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/engine_easyocr.py +0 -0
  165. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/engine_paddle.py +0 -0
  166. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/engine_surya.py +0 -0
  167. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/ocr_factory.py +0 -0
  168. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/ocr_manager.py +0 -0
  169. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/ocr_options.py +0 -0
  170. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/ocr/utils.py +0 -0
  171. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/qa/__init__.py +0 -0
  172. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/qa/document_qa.py +0 -0
  173. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/search/__init__.py +0 -0
  174. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/search/lancedb_search_service.py +0 -0
  175. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/search/numpy_search_service.py +0 -0
  176. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/search/search_options.py +0 -0
  177. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/search/search_service_protocol.py +0 -0
  178. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/search/searchable_mixin.py +0 -0
  179. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/selectors/__init__.py +0 -0
  180. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/selectors/parser.py +0 -0
  181. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/templates/__init__.py +0 -0
  182. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
  183. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/templates/spa/css/style.css +0 -0
  184. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/templates/spa/index.html +0 -0
  185. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/templates/spa/js/app.js +0 -0
  186. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/templates/spa/words.txt +0 -0
  187. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/__init__.py +0 -0
  188. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/debug.py +0 -0
  189. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/highlighting.py +0 -0
  190. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/identifiers.py +0 -0
  191. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/locks.py +0 -0
  192. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/packaging.py +0 -0
  193. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/reading_order.py +0 -0
  194. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/text_extraction.py +0 -0
  195. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/utils/visualization.py +0 -0
  196. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/widgets/__init__.py +0 -0
  197. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf/widgets/viewer.py +0 -0
  198. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf.egg-info/dependency_links.txt +0 -0
  199. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf.egg-info/requires.txt +0 -0
  200. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/natural_pdf.egg-info/top_level.txt +0 -0
  201. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/noxfile.py +0 -0
  202. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/.gitkeep +0 -0
  203. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/01-practice.pdf +0 -0
  204. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/0500000US42001.pdf +0 -0
  205. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/0500000US42007.pdf +0 -0
  206. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/2014 Statistics.pdf +0 -0
  207. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/2019 Statistics.pdf +0 -0
  208. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/30.pdf +0 -0
  209. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  210. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/anexo_edital_6604_1743480-table.pdf +0 -0
  211. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/cia-doc.pdf +0 -0
  212. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/geometry.pdf +0 -0
  213. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/image.png +0 -0
  214. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/image.png.pdf +0 -0
  215. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/multicolumn.pdf +0 -0
  216. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/needs-ocr.pdf +0 -0
  217. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/red.pdf +0 -0
  218. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/tiny-ocr-2.pdf +0 -0
  219. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/tiny-ocr-3.pdf +0 -0
  220. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/tiny-ocr-small.jpg +0 -0
  221. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/tiny-ocr-wide.jpg +0 -0
  222. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/tiny-ocr.pdf +0 -0
  223. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/tiny.pdf +0 -0
  224. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/pdfs/word-counter.pdf +0 -0
  225. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/publish.sh +0 -0
  226. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/sample-screen.png +0 -0
  227. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/setup.cfg +0 -0
  228. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/conftest.py +0 -0
  229. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/exporters/test_paddleocr_exporter.py +0 -0
  230. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/test_core/test_containment_geometry.py +0 -0
  231. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/test_core/test_elements.py +0 -0
  232. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/test_core/test_loading.py +0 -0
  233. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/test_core/test_spatial.py +0 -0
  234. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/test_core/test_text_extraction.py +0 -0
  235. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/test_loading_original.py +0 -0
  236. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/tests/test_optional_deps.py +0 -0
  237. {natural_pdf-0.1.16 → natural_pdf-0.1.17}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.16
3
+ Version: 0.1.17
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1,438 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "725fe29c",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Describe Functionality\n",
9
+ "\n",
10
+ "The `describe()` and `inspect()` methods provide an easy way to understand the contents of your PDF elements without having to visualize them as images.\n",
11
+ "\n",
12
+ "## Basic Usage\n",
13
+ "\n",
14
+ "Get a summary of an entire page:"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 1,
20
+ "id": "5bc39925",
21
+ "metadata": {
22
+ "execution": {
23
+ "iopub.execute_input": "2025-06-15T16:10:40.070939Z",
24
+ "iopub.status.busy": "2025-06-15T16:10:40.070809Z",
25
+ "iopub.status.idle": "2025-06-15T16:10:45.409526Z",
26
+ "shell.execute_reply": "2025-06-15T16:10:45.409235Z"
27
+ }
28
+ },
29
+ "outputs": [
30
+ {
31
+ "name": "stderr",
32
+ "output_type": "stream",
33
+ "text": [
34
+ "CropBox missing from /Page, defaulting to MediaBox\n"
35
+ ]
36
+ },
37
+ {
38
+ "data": {
39
+ "text/markdown": [
40
+ "## Page 1 Summary\n",
41
+ "\n",
42
+ "**Elements**:\n",
43
+ " - **text**: 44 elements\n",
44
+ " - **line**: 21 elements\n",
45
+ " - **rect**: 8 elements\n",
46
+ "\n",
47
+ "**Text Analysis**:\n",
48
+ " - **typography**:\n",
49
+ " - **fonts**:\n",
50
+ " - Helvetica: 44\n",
51
+ " - **sizes**:\n",
52
+ " - 10.0pt: 40\n",
53
+ " - 8.0pt: 3\n",
54
+ " - 12.0pt: 1\n",
55
+ " - styles: 9 bold\n",
56
+ " - **colors**:\n",
57
+ " - black: 43\n",
58
+ " - other: 1"
59
+ ],
60
+ "text/plain": [
61
+ "## Page 1 Summary\n",
62
+ "\n",
63
+ "**Elements**:\n",
64
+ " - **text**: 44 elements\n",
65
+ " - **line**: 21 elements\n",
66
+ " - **rect**: 8 elements\n",
67
+ "\n",
68
+ "**Text Analysis**:\n",
69
+ " - **typography**:\n",
70
+ " - **fonts**:\n",
71
+ " - Helvetica: 44\n",
72
+ " - **sizes**:\n",
73
+ " - 10.0pt: 40\n",
74
+ " - 8.0pt: 3\n",
75
+ " - 12.0pt: 1\n",
76
+ " - styles: 9 bold\n",
77
+ " - **colors**:\n",
78
+ " - black: 43\n",
79
+ " - other: 1"
80
+ ]
81
+ },
82
+ "execution_count": 1,
83
+ "metadata": {},
84
+ "output_type": "execute_result"
85
+ }
86
+ ],
87
+ "source": [
88
+ "from natural_pdf import PDF\n",
89
+ "\n",
90
+ "pdf = PDF(\"https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf\")\n",
91
+ "page = pdf.pages[0]\n",
92
+ "\n",
93
+ "page.describe()"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "markdown",
98
+ "id": "b0e3354c",
99
+ "metadata": {},
100
+ "source": [
101
+ "## Element collection summaries\n",
102
+ "\n",
103
+ "You can describe element collections on a page with `.describe()`."
104
+ ]
105
+ },
106
+ {
107
+ "cell_type": "code",
108
+ "execution_count": 2,
109
+ "id": "eac4e22b",
110
+ "metadata": {
111
+ "execution": {
112
+ "iopub.execute_input": "2025-06-15T16:10:45.411081Z",
113
+ "iopub.status.busy": "2025-06-15T16:10:45.410822Z",
114
+ "iopub.status.idle": "2025-06-15T16:10:45.413903Z",
115
+ "shell.execute_reply": "2025-06-15T16:10:45.413620Z"
116
+ }
117
+ },
118
+ "outputs": [
119
+ {
120
+ "data": {
121
+ "text/markdown": [
122
+ "## Collection Summary (44 elements)\n",
123
+ "\n",
124
+ "**Typography**:\n",
125
+ " - **fonts**:\n",
126
+ " - Helvetica: 44\n",
127
+ " - **sizes**:\n",
128
+ " - 10.0pt: 40\n",
129
+ " - 8.0pt: 3\n",
130
+ " - 12.0pt: 1\n",
131
+ " - **styles**: 9 bold\n",
132
+ " - **colors**:\n",
133
+ " - black: 43\n",
134
+ " - other: 1"
135
+ ],
136
+ "text/plain": [
137
+ "## Collection Summary (44 elements)\n",
138
+ "\n",
139
+ "**Typography**:\n",
140
+ " - **fonts**:\n",
141
+ " - Helvetica: 44\n",
142
+ " - **sizes**:\n",
143
+ " - 10.0pt: 40\n",
144
+ " - 8.0pt: 3\n",
145
+ " - 12.0pt: 1\n",
146
+ " - **styles**: 9 bold\n",
147
+ " - **colors**:\n",
148
+ " - black: 43\n",
149
+ " - other: 1"
150
+ ]
151
+ },
152
+ "execution_count": 2,
153
+ "metadata": {},
154
+ "output_type": "execute_result"
155
+ }
156
+ ],
157
+ "source": [
158
+ "# Describe all elements on the page\n",
159
+ "page.find_all('text').describe()"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 3,
165
+ "id": "503c2a31",
166
+ "metadata": {
167
+ "execution": {
168
+ "iopub.execute_input": "2025-06-15T16:10:45.415172Z",
169
+ "iopub.status.busy": "2025-06-15T16:10:45.415052Z",
170
+ "iopub.status.idle": "2025-06-15T16:10:45.417592Z",
171
+ "shell.execute_reply": "2025-06-15T16:10:45.417343Z"
172
+ }
173
+ },
174
+ "outputs": [
175
+ {
176
+ "data": {
177
+ "text/markdown": [
178
+ "## Collection Summary (8 elements)\n",
179
+ "\n",
180
+ "**Size Stats**:\n",
181
+ " - **width range**: 8-180\n",
182
+ " - **height range**: 8-35\n",
183
+ " - **avg area**: 844 sq pts\n",
184
+ "\n",
185
+ "**Styles**:\n",
186
+ " - **stroke widths**:\n",
187
+ " - 0.5: 7\n",
188
+ " - **colors**:\n",
189
+ " - black: 8"
190
+ ],
191
+ "text/plain": [
192
+ "## Collection Summary (8 elements)\n",
193
+ "\n",
194
+ "**Size Stats**:\n",
195
+ " - **width range**: 8-180\n",
196
+ " - **height range**: 8-35\n",
197
+ " - **avg area**: 844 sq pts\n",
198
+ "\n",
199
+ "**Styles**:\n",
200
+ " - **stroke widths**:\n",
201
+ " - 0.5: 7\n",
202
+ " - **colors**:\n",
203
+ " - black: 8"
204
+ ]
205
+ },
206
+ "execution_count": 3,
207
+ "metadata": {},
208
+ "output_type": "execute_result"
209
+ }
210
+ ],
211
+ "source": [
212
+ "# Describe all elements on the page\n",
213
+ "page.find_all('rect').describe()"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "markdown",
218
+ "id": "5b468a5e",
219
+ "metadata": {},
220
+ "source": [
221
+ "## Inspecting lists of elements\n",
222
+ "\n",
223
+ "For more detail, you can view specific details of element collections with `inspect()`."
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 4,
229
+ "id": "ea04905b",
230
+ "metadata": {
231
+ "execution": {
232
+ "iopub.execute_input": "2025-06-15T16:10:45.418792Z",
233
+ "iopub.status.busy": "2025-06-15T16:10:45.418688Z",
234
+ "iopub.status.idle": "2025-06-15T16:10:45.421581Z",
235
+ "shell.execute_reply": "2025-06-15T16:10:45.421321Z"
236
+ }
237
+ },
238
+ "outputs": [
239
+ {
240
+ "data": {
241
+ "text/markdown": [
242
+ "## Collection Inspection (44 elements)\n",
243
+ "\n",
244
+ "### Word Elements\n",
245
+ "\n",
246
+ "| text | x0 | top | x1 | bottom | font_family | size | bold | italic | source | confidence | color |\n",
247
+ "|------|------|------|------|------|------|------|------|------|------|------|------|\n",
248
+ "| Jungle Health and Safety Inspection Service | 385 | 36 | 542 | 44 | Helvetica | 8 | False | False | native | 1.00 | #000000 |\n",
249
+ "| INS-UP70N51NCL41R | 385 | 46 | 466 | 54 | Helvetica | 8 | False | False | native | 1.00 | #ff0000 |\n",
250
+ "| Site: | 50 | 84 | 74 | 94 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
251
+ "| Durham’s Meatpacking | 74 | 84 | 182 | 94 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
252
+ "| Chicago, Ill. | 182 | 84 | 235 | 94 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
253
+ "| Date: | 50 | 104 | 81 | 114 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
254
+ "| February 3, 1905 | 81 | 104 | 157 | 114 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
255
+ "| Violation Count: | 50 | 124 | 130 | 134 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
256
+ "| 7 | 130 | 124 | 136 | 134 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
257
+ "| Summary: | 50 | 144 | 102 | 154 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
258
+ "| Worst of any, however, were the fertilizer men, an... | 102 | 144 | 506 | 154 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
259
+ "| These people could not be shown to the visitor - f... | 50 | 160 | 512 | 170 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
260
+ "| visitor at a hundred yards, and as for the other m... | 50 | 176 | 491 | 186 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
261
+ "| some of which there were open vats near the level ... | 50 | 192 | 496 | 202 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
262
+ "| into the vats; and when they were fished out, ther... | 50 | 208 | 465 | 218 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
263
+ "| exhibiting - sometimes they would be overlooked fo... | 50 | 224 | 492 | 234 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
264
+ "| to the world as Durham’s Pure Leaf Lard! | 50 | 240 | 232 | 250 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
265
+ "| Violations | 50 | 372 | 107 | 384 | Helvetica | 12 | True | False | native | 1.00 | #000000 |\n",
266
+ "| Statute | 55 | 398 | 89 | 408 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
267
+ "| Description | 105 | 398 | 160 | 408 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
268
+ "| Level | 455 | 398 | 481 | 408 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
269
+ "| Repeat? | 505 | 398 | 544 | 408 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
270
+ "| 4.12.7 | 55 | 418 | 83 | 428 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
271
+ "| Unsanitary Working Conditions. | 105 | 418 | 245 | 428 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
272
+ "| Critical | 455 | 418 | 486 | 428 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
273
+ "| 5.8.3 | 55 | 438 | 77 | 448 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
274
+ "| Inadequate Protective Equipment. | 105 | 438 | 256 | 448 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
275
+ "| Serious | 455 | 438 | 489 | 448 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
276
+ "| 6.3.9 | 55 | 458 | 77 | 468 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
277
+ "| Ineffective Injury Prevention. | 105 | 458 | 231 | 468 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
278
+ "_Showing 30 of 44 elements (pass limit= to see more)_"
279
+ ],
280
+ "text/plain": [
281
+ "## Collection Inspection (44 elements)\n",
282
+ "\n",
283
+ "### Word Elements\n",
284
+ "\n",
285
+ "| text | x0 | top | x1 | bottom | font_family | size | bold | italic | source | confidence | color |\n",
286
+ "|------|------|------|------|------|------|------|------|------|------|------|------|\n",
287
+ "| Jungle Health and Safety Inspection Service | 385 | 36 | 542 | 44 | Helvetica | 8 | False | False | native | 1.00 | #000000 |\n",
288
+ "| INS-UP70N51NCL41R | 385 | 46 | 466 | 54 | Helvetica | 8 | False | False | native | 1.00 | #ff0000 |\n",
289
+ "| Site: | 50 | 84 | 74 | 94 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
290
+ "| Durham’s Meatpacking | 74 | 84 | 182 | 94 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
291
+ "| Chicago, Ill. | 182 | 84 | 235 | 94 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
292
+ "| Date: | 50 | 104 | 81 | 114 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
293
+ "| February 3, 1905 | 81 | 104 | 157 | 114 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
294
+ "| Violation Count: | 50 | 124 | 130 | 134 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
295
+ "| 7 | 130 | 124 | 136 | 134 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
296
+ "| Summary: | 50 | 144 | 102 | 154 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
297
+ "| Worst of any, however, were the fertilizer men, an... | 102 | 144 | 506 | 154 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
298
+ "| These people could not be shown to the visitor - f... | 50 | 160 | 512 | 170 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
299
+ "| visitor at a hundred yards, and as for the other m... | 50 | 176 | 491 | 186 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
300
+ "| some of which there were open vats near the level ... | 50 | 192 | 496 | 202 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
301
+ "| into the vats; and when they were fished out, ther... | 50 | 208 | 465 | 218 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
302
+ "| exhibiting - sometimes they would be overlooked fo... | 50 | 224 | 492 | 234 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
303
+ "| to the world as Durham’s Pure Leaf Lard! | 50 | 240 | 232 | 250 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
304
+ "| Violations | 50 | 372 | 107 | 384 | Helvetica | 12 | True | False | native | 1.00 | #000000 |\n",
305
+ "| Statute | 55 | 398 | 89 | 408 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
306
+ "| Description | 105 | 398 | 160 | 408 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
307
+ "| Level | 455 | 398 | 481 | 408 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
308
+ "| Repeat? | 505 | 398 | 544 | 408 | Helvetica | 10 | True | False | native | 1.00 | #000000 |\n",
309
+ "| 4.12.7 | 55 | 418 | 83 | 428 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
310
+ "| Unsanitary Working Conditions. | 105 | 418 | 245 | 428 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
311
+ "| Critical | 455 | 418 | 486 | 428 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
312
+ "| 5.8.3 | 55 | 438 | 77 | 448 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
313
+ "| Inadequate Protective Equipment. | 105 | 438 | 256 | 448 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
314
+ "| Serious | 455 | 438 | 489 | 448 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
315
+ "| 6.3.9 | 55 | 458 | 77 | 468 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
316
+ "| Ineffective Injury Prevention. | 105 | 458 | 231 | 468 | Helvetica | 10 | False | False | native | 1.00 | #000000 |\n",
317
+ "_Showing 30 of 44 elements (pass limit= to see more)_"
318
+ ]
319
+ },
320
+ "execution_count": 4,
321
+ "metadata": {},
322
+ "output_type": "execute_result"
323
+ }
324
+ ],
325
+ "source": [
326
+ "page.find_all('text').inspect()"
327
+ ]
328
+ },
329
+ {
330
+ "cell_type": "code",
331
+ "execution_count": 5,
332
+ "id": "06c8d813",
333
+ "metadata": {
334
+ "execution": {
335
+ "iopub.execute_input": "2025-06-15T16:10:45.422781Z",
336
+ "iopub.status.busy": "2025-06-15T16:10:45.422665Z",
337
+ "iopub.status.idle": "2025-06-15T16:10:45.425191Z",
338
+ "shell.execute_reply": "2025-06-15T16:10:45.424938Z"
339
+ }
340
+ },
341
+ "outputs": [
342
+ {
343
+ "data": {
344
+ "text/markdown": [
345
+ "## Collection Inspection (21 elements)\n",
346
+ "\n",
347
+ "### Line Elements\n",
348
+ "\n",
349
+ "| x0 | top | x1 | bottom | width | is_horizontal | is_vertical |\n",
350
+ "|------|------|------|------|------|------|------|\n",
351
+ "| 50 | 352 | 550 | 352 | 2 | True | False |\n",
352
+ "| 50 | 392 | 550 | 392 | 0 | True | False |\n",
353
+ "| 50 | 392 | 50 | 552 | 0 | False | True |\n",
354
+ "| 100 | 392 | 100 | 552 | 0 | False | True |\n",
355
+ "| 450 | 392 | 450 | 552 | 0 | False | True |\n",
356
+ "| 500 | 392 | 500 | 552 | 0 | False | True |\n",
357
+ "| 550 | 392 | 550 | 552 | 0 | False | True |\n",
358
+ "| 50 | 412 | 550 | 412 | 0 | True | False |\n",
359
+ "| 520 | 418 | 528 | 426 | 0 | False | False |\n",
360
+ "| 520 | 418 | 528 | 426 | 0 | False | False |\n",
361
+ "| 50 | 432 | 550 | 432 | 0 | True | False |\n",
362
+ "| 520 | 438 | 528 | 446 | 0 | False | False |\n",
363
+ "| 520 | 438 | 528 | 446 | 0 | False | False |\n",
364
+ "| 50 | 452 | 550 | 452 | 0 | True | False |\n",
365
+ "| 50 | 472 | 550 | 472 | 0 | True | False |\n",
366
+ "| 50 | 492 | 550 | 492 | 0 | True | False |\n",
367
+ "| 50 | 512 | 550 | 512 | 0 | True | False |\n",
368
+ "| 520 | 518 | 528 | 526 | 0 | False | False |\n",
369
+ "| 520 | 518 | 528 | 526 | 0 | False | False |\n",
370
+ "| 50 | 532 | 550 | 532 | 0 | True | False |\n",
371
+ "| 50 | 552 | 550 | 552 | 0 | True | False |"
372
+ ],
373
+ "text/plain": [
374
+ "## Collection Inspection (21 elements)\n",
375
+ "\n",
376
+ "### Line Elements\n",
377
+ "\n",
378
+ "| x0 | top | x1 | bottom | width | is_horizontal | is_vertical |\n",
379
+ "|------|------|------|------|------|------|------|\n",
380
+ "| 50 | 352 | 550 | 352 | 2 | True | False |\n",
381
+ "| 50 | 392 | 550 | 392 | 0 | True | False |\n",
382
+ "| 50 | 392 | 50 | 552 | 0 | False | True |\n",
383
+ "| 100 | 392 | 100 | 552 | 0 | False | True |\n",
384
+ "| 450 | 392 | 450 | 552 | 0 | False | True |\n",
385
+ "| 500 | 392 | 500 | 552 | 0 | False | True |\n",
386
+ "| 550 | 392 | 550 | 552 | 0 | False | True |\n",
387
+ "| 50 | 412 | 550 | 412 | 0 | True | False |\n",
388
+ "| 520 | 418 | 528 | 426 | 0 | False | False |\n",
389
+ "| 520 | 418 | 528 | 426 | 0 | False | False |\n",
390
+ "| 50 | 432 | 550 | 432 | 0 | True | False |\n",
391
+ "| 520 | 438 | 528 | 446 | 0 | False | False |\n",
392
+ "| 520 | 438 | 528 | 446 | 0 | False | False |\n",
393
+ "| 50 | 452 | 550 | 452 | 0 | True | False |\n",
394
+ "| 50 | 472 | 550 | 472 | 0 | True | False |\n",
395
+ "| 50 | 492 | 550 | 492 | 0 | True | False |\n",
396
+ "| 50 | 512 | 550 | 512 | 0 | True | False |\n",
397
+ "| 520 | 518 | 528 | 526 | 0 | False | False |\n",
398
+ "| 520 | 518 | 528 | 526 | 0 | False | False |\n",
399
+ "| 50 | 532 | 550 | 532 | 0 | True | False |\n",
400
+ "| 50 | 552 | 550 | 552 | 0 | True | False |"
401
+ ]
402
+ },
403
+ "execution_count": 5,
404
+ "metadata": {},
405
+ "output_type": "execute_result"
406
+ }
407
+ ],
408
+ "source": [
409
+ "page.find_all('line').inspect()"
410
+ ]
411
+ }
412
+ ],
413
+ "metadata": {
414
+ "jupytext": {
415
+ "cell_metadata_filter": "-all",
416
+ "main_language": "python",
417
+ "notebook_metadata_filter": "-all",
418
+ "text_representation": {
419
+ "extension": ".md",
420
+ "format_name": "markdown"
421
+ }
422
+ },
423
+ "language_info": {
424
+ "codemirror_mode": {
425
+ "name": "ipython",
426
+ "version": 3
427
+ },
428
+ "file_extension": ".py",
429
+ "mimetype": "text/x-python",
430
+ "name": "python",
431
+ "nbconvert_exporter": "python",
432
+ "pygments_lexer": "ipython3",
433
+ "version": "3.11.11"
434
+ }
435
+ },
436
+ "nbformat": 4,
437
+ "nbformat_minor": 5
438
+ }
@@ -0,0 +1,42 @@
1
+ # Describe Functionality
2
+
3
+ The `describe()` and `inspect()` methods provide an easy way to understand the contents of your PDF elements without having to visualize them as images.
4
+
5
+ ## Basic Usage
6
+
7
+ Get a summary of an entire page:
8
+
9
+ ```python
10
+ from natural_pdf import PDF
11
+
12
+ pdf = PDF("https://github.com/jsoma/natural-pdf/raw/refs/heads/main/pdfs/01-practice.pdf")
13
+ page = pdf.pages[0]
14
+
15
+ page.describe()
16
+ ```
17
+
18
+ ## Element collection summaries
19
+
20
+ You can describe element collections on a page with `.describe()`.
21
+
22
+ ```python
23
+ # Describe all elements on the page
24
+ page.find_all('text').describe()
25
+ ```
26
+
27
+ ```python
28
+ # Describe all elements on the page
29
+ page.find_all('rect').describe()
30
+ ```
31
+
32
+ ## Inspecting lists of elements
33
+
34
+ For more detail, you can view specific details of element collections with `inspect()`.
35
+
36
+ ```python
37
+ page.find_all('text').inspect()
38
+ ```
39
+
40
+ ```python
41
+ page.find_all('line').inspect()
42
+ ```