natural-pdf 0.1.23__tar.gz → 0.1.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. {natural_pdf-0.1.23/natural_pdf.egg-info → natural_pdf-0.1.24}/PKG-INFO +2 -1
  2. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/extracting-clean-text/index.ipynb +177 -176
  3. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/extracting-clean-text/index.md +1 -0
  4. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/index.ipynb +197 -194
  5. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/index.md +0 -24
  6. natural_pdf-0.1.24/docs/tutorials/01-loading-and-extraction.ipynb +312 -0
  7. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/02-finding-elements.ipynb +42 -42
  8. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/03-extracting-blocks.ipynb +17 -17
  9. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/04-table-extraction.ipynb +30 -30
  10. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/05-excluding-content.ipynb +29 -29
  11. natural_pdf-0.1.24/docs/tutorials/06-document-qa.ipynb +445 -0
  12. natural_pdf-0.1.24/docs/tutorials/06-document-qa.md +96 -0
  13. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/07-layout-analysis.ipynb +42 -42
  14. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/07-working-with-regions.ipynb +58 -58
  15. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/08-spatial-navigation.ipynb +71 -71
  16. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/09-section-extraction.ipynb +109 -109
  17. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/10-form-field-extraction.ipynb +57 -57
  18. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/11-enhanced-table-processing.ipynb +119 -119
  19. natural_pdf-0.1.24/docs/tutorials/12-ocr-integration.ipynb +4733 -0
  20. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/13-semantic-search.ipynb +128 -128
  21. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/14-categorizing-documents.ipynb +505 -505
  22. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/mkdocs.yml +15 -21
  23. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/shape_detection_mixin.py +40 -0
  24. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/highlighting_service.py +4 -4
  25. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/page.py +16 -2
  26. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/base.py +11 -1
  27. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/summary.py +26 -0
  28. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/base.py +2 -2
  29. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/collections.py +139 -100
  30. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/region.py +133 -12
  31. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/text.py +15 -7
  32. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/region.py +116 -1
  33. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/qa/document_qa.py +162 -105
  34. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/text_extraction.py +34 -14
  35. {natural_pdf-0.1.23 → natural_pdf-0.1.24/natural_pdf.egg-info}/PKG-INFO +2 -1
  36. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/SOURCES.txt +1 -0
  37. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/requires.txt +1 -0
  38. natural_pdf-0.1.24/pdfs/1107231007033739008.pdf +0 -0
  39. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/needs-ocr.pdf +0 -0
  40. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pyproject.toml +1 -0
  41. natural_pdf-0.1.23/docs/tutorials/01-loading-and-extraction.ipynb +0 -312
  42. natural_pdf-0.1.23/docs/tutorials/06-document-qa.ipynb +0 -401
  43. natural_pdf-0.1.23/docs/tutorials/06-document-qa.md +0 -118
  44. natural_pdf-0.1.23/docs/tutorials/12-ocr-integration.ipynb +0 -4205
  45. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/analysis_framework.mdc +0 -0
  46. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/coding-style.mdc +0 -0
  47. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
  48. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/minimal-comments.mdc +0 -0
  49. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/natural-pdf-overview.mdc +0 -0
  50. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/user-friendly-library-code.mdc +0 -0
  51. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.github/workflows/docs.yml +0 -0
  52. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.gitignore +0 -0
  53. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.pre-commit-config.yaml +0 -0
  54. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/01-execute_notebooks.py +0 -0
  55. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/02-run_all_tutorials.sh +0 -0
  56. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/CLAUDE.md +0 -0
  57. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/LICENSE +0 -0
  58. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/MANIFEST.in +0 -0
  59. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/README.md +0 -0
  60. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/audit_packaging.py +0 -0
  61. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/check_run_md.sh +0 -0
  62. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/api/index.md +0 -0
  63. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/favicon.png +0 -0
  64. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/favicon.svg +0 -0
  65. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/javascripts/custom.js +0 -0
  66. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/logo.svg +0 -0
  67. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/sample-screen.png +0 -0
  68. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/social-preview.png +0 -0
  69. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/social-preview.svg +0 -0
  70. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/stylesheets/custom.css +0 -0
  71. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/categorizing-documents/index.md +0 -0
  72. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/data-extraction/index.md +0 -0
  73. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/describe/index.ipynb +0 -0
  74. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/describe/index.md +0 -0
  75. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/document-qa/index.ipynb +0 -0
  76. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/document-qa/index.md +0 -0
  77. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/element-selection/index.ipynb +0 -0
  78. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/element-selection/index.md +0 -0
  79. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/finetuning/index.md +0 -0
  80. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/table_1.csv +0 -0
  81. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/table_2.csv +0 -0
  82. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/table_3.csv +0 -0
  83. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/index.md +0 -0
  84. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/installation/index.md +0 -0
  85. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/interactive-widget/index.ipynb +0 -0
  86. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/interactive-widget/index.md +0 -0
  87. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/layout-analysis/index.ipynb +0 -0
  88. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/layout-analysis/index.md +0 -0
  89. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/loops-and-groups/index.ipynb +0 -0
  90. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/loops-and-groups/index.md +0 -0
  91. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/ocr/index.md +0 -0
  92. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/pdf-navigation/index.ipynb +0 -0
  93. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/pdf-navigation/index.md +0 -0
  94. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
  95. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/process-forms-and-invoices/index.ipynb +0 -0
  96. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/process-forms-and-invoices/index.md +0 -0
  97. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/quick-reference/index.ipynb +0 -0
  98. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/quick-reference/index.md +0 -0
  99. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/reflowing-pages/index.ipynb +0 -0
  100. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/reflowing-pages/index.md +0 -0
  101. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/regions/index.ipynb +0 -0
  102. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/regions/index.md +0 -0
  103. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tables/index.ipynb +0 -0
  104. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tables/index.md +0 -0
  105. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/text-analysis/index.ipynb +0 -0
  106. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/text-analysis/index.md +0 -0
  107. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/text-extraction/index.ipynb +0 -0
  108. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/01-loading-and-extraction.md +0 -0
  109. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/02-finding-elements.md +0 -0
  110. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/03-extracting-blocks.md +0 -0
  111. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/04-table-extraction.md +0 -0
  112. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/05-excluding-content.md +0 -0
  113. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/07-layout-analysis.md +0 -0
  114. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/07-working-with-regions.md +0 -0
  115. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/08-spatial-navigation.md +0 -0
  116. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/09-section-extraction.md +0 -0
  117. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/10-form-field-extraction.md +0 -0
  118. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/11-enhanced-table-processing.md +0 -0
  119. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/12-ocr-integration.md +0 -0
  120. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/13-semantic-search.md +0 -0
  121. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/14-categorizing-documents.md +0 -0
  122. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/visual-debugging/index.ipynb +0 -0
  123. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/visual-debugging/index.md +0 -0
  124. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/visual-debugging/region.png +0 -0
  125. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/__init__.py +0 -0
  126. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/__init__.py +0 -0
  127. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/__init__.py +0 -0
  128. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/base.py +0 -0
  129. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/docling.py +0 -0
  130. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/gemini.py +0 -0
  131. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
  132. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
  133. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/layout_options.py +0 -0
  134. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/paddle.py +0 -0
  135. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
  136. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/surya.py +0 -0
  137. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
  138. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/tatr.py +0 -0
  139. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/yolo.py +0 -0
  140. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/text_options.py +0 -0
  141. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/text_structure.py +0 -0
  142. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/utils.py +0 -0
  143. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/classification/manager.py +0 -0
  144. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/classification/mixin.py +0 -0
  145. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/classification/results.py +0 -0
  146. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/cli.py +0 -0
  147. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/collections/mixins.py +0 -0
  148. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/collections/pdf_collection.py +0 -0
  149. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/__init__.py +0 -0
  150. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/element_manager.py +0 -0
  151. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/pdf.py +0 -0
  152. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/__init__.py +0 -0
  153. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/elements.py +0 -0
  154. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/mixin.py +0 -0
  155. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/__init__.py +0 -0
  156. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/line.py +0 -0
  157. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/rect.py +0 -0
  158. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/export/mixin.py +0 -0
  159. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/__init__.py +0 -0
  160. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/base.py +0 -0
  161. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/data/__init__.py +0 -0
  162. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/data/pdf.ttf +0 -0
  163. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/data/sRGB.icc +0 -0
  164. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/hocr.py +0 -0
  165. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/hocr_font.py +0 -0
  166. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/original_pdf.py +0 -0
  167. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/paddleocr.py +0 -0
  168. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/searchable_pdf.py +0 -0
  169. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/extraction/manager.py +0 -0
  170. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/extraction/mixin.py +0 -0
  171. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/extraction/result.py +0 -0
  172. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/__init__.py +0 -0
  173. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/collections.py +0 -0
  174. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/element.py +0 -0
  175. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/flow.py +0 -0
  176. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/__init__.py +0 -0
  177. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine.py +0 -0
  178. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine_doctr.py +0 -0
  179. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine_easyocr.py +0 -0
  180. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine_paddle.py +0 -0
  181. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine_surya.py +0 -0
  182. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/ocr_factory.py +0 -0
  183. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/ocr_manager.py +0 -0
  184. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/ocr_options.py +0 -0
  185. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/utils.py +0 -0
  186. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/qa/__init__.py +0 -0
  187. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/qa/qa_result.py +0 -0
  188. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/__init__.py +0 -0
  189. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/lancedb_search_service.py +0 -0
  190. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/numpy_search_service.py +0 -0
  191. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/search_options.py +0 -0
  192. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/search_service_protocol.py +0 -0
  193. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/searchable_mixin.py +0 -0
  194. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/selectors/__init__.py +0 -0
  195. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/selectors/parser.py +0 -0
  196. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/__init__.py +0 -0
  197. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
  198. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/spa/css/style.css +0 -0
  199. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/spa/index.html +0 -0
  200. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/spa/js/app.js +0 -0
  201. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/spa/words.txt +0 -0
  202. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/__init__.py +0 -0
  203. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/debug.py +0 -0
  204. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/highlighting.py +0 -0
  205. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/identifiers.py +0 -0
  206. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/locks.py +0 -0
  207. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/packaging.py +0 -0
  208. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/reading_order.py +0 -0
  209. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/visualization.py +0 -0
  210. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/widgets/__init__.py +0 -0
  211. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/widgets/viewer.py +0 -0
  212. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/dependency_links.txt +0 -0
  213. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/entry_points.txt +0 -0
  214. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/top_level.txt +0 -0
  215. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/noxfile.py +0 -0
  216. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/.gitkeep +0 -0
  217. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/01-practice.pdf +0 -0
  218. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/0500000US42001.pdf +0 -0
  219. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/0500000US42007.pdf +0 -0
  220. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/2014 Statistics.pdf +0 -0
  221. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/2019 Statistics.pdf +0 -0
  222. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/30.pdf +0 -0
  223. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  224. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/anexo_edital_6604_1743480-table.pdf +0 -0
  225. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/appendix_fy2026.pdf +0 -0
  226. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/cia-doc.pdf +0 -0
  227. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/geometry.pdf +0 -0
  228. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/image.png +0 -0
  229. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/image.png.pdf +0 -0
  230. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/multicolumn.pdf +0 -0
  231. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/red.pdf +0 -0
  232. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr-2.pdf +0 -0
  233. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr-3.pdf +0 -0
  234. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr-small.jpg +0 -0
  235. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr-wide.jpg +0 -0
  236. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr.pdf +0 -0
  237. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny.pdf +0 -0
  238. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/word-counter.pdf +0 -0
  239. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/publish.sh +0 -0
  240. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/sample-screen.png +0 -0
  241. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/setup.cfg +0 -0
  242. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/test_install.sh +0 -0
  243. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/conftest.py +0 -0
  244. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/exporters/test_paddleocr_exporter.py +0 -0
  245. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_containment_geometry.py +0 -0
  246. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_elements.py +0 -0
  247. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_loading.py +0 -0
  248. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_spatial.py +0 -0
  249. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_text_extraction.py +0 -0
  250. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_loading_original.py +0 -0
  251. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_optional_deps.py +0 -0
  252. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_tutorials.py +0 -0
  253. {natural_pdf-0.1.23 → natural_pdf-0.1.24}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: natural-pdf
3
- Version: 0.1.23
3
+ Version: 0.1.24
4
4
  Summary: A more intuitive interface for working with PDFs
5
5
  Author-email: Jonathan Soma <jonathan.soma@gmail.com>
6
6
  License-Expression: MIT
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
+ Requires-Dist: markdown
14
15
  Requires-Dist: pandas
15
16
  Requires-Dist: pdfplumber
16
17
  Requires-Dist: colormath2
@@ -2,7 +2,7 @@
2
2
  "cells": [
3
3
  {
4
4
  "cell_type": "markdown",
5
- "id": "1f996be8",
5
+ "id": "7a7b153a",
6
6
  "metadata": {},
7
7
  "source": [
8
8
  "# Extract Clean Text Without Headers and Footers\n",
@@ -12,6 +12,7 @@
12
12
  "## The Problem\n",
13
13
  "\n",
14
14
  "PDFs often have repeated content on every page that you don't want:\n",
15
+ "\n",
15
16
  "- Company headers with logos and contact info\n",
16
17
  "- Page numbers and footers \n",
17
18
  "- \"CONFIDENTIAL\" watermarks\n",
@@ -27,13 +28,13 @@
27
28
  {
28
29
  "cell_type": "code",
29
30
  "execution_count": 1,
30
- "id": "2c480898",
31
+ "id": "4b445e12",
31
32
  "metadata": {
32
33
  "execution": {
33
- "iopub.execute_input": "2025-06-18T20:15:52.163345Z",
34
- "iopub.status.busy": "2025-06-18T20:15:52.163198Z",
35
- "iopub.status.idle": "2025-06-18T20:15:53.480466Z",
36
- "shell.execute_reply": "2025-06-18T20:15:53.480053Z"
34
+ "iopub.execute_input": "2025-06-18T22:38:58.176399Z",
35
+ "iopub.status.busy": "2025-06-18T22:38:58.176099Z",
36
+ "iopub.status.idle": "2025-06-18T22:38:59.808871Z",
37
+ "shell.execute_reply": "2025-06-18T22:38:59.808518Z"
37
38
  }
38
39
  },
39
40
  "outputs": [
@@ -65,7 +66,7 @@
65
66
  },
66
67
  {
67
68
  "cell_type": "markdown",
68
- "id": "6cd3e357",
69
+ "id": "09a3376a",
69
70
  "metadata": {},
70
71
  "source": [
71
72
  "## Exclude Specific Elements\n",
@@ -76,13 +77,13 @@
76
77
  {
77
78
  "cell_type": "code",
78
79
  "execution_count": 2,
79
- "id": "72918c74",
80
+ "id": "8b3dbafe",
80
81
  "metadata": {
81
82
  "execution": {
82
- "iopub.execute_input": "2025-06-18T20:15:53.482129Z",
83
- "iopub.status.busy": "2025-06-18T20:15:53.481941Z",
84
- "iopub.status.idle": "2025-06-18T20:15:53.487281Z",
85
- "shell.execute_reply": "2025-06-18T20:15:53.486884Z"
83
+ "iopub.execute_input": "2025-06-18T22:38:59.810631Z",
84
+ "iopub.status.busy": "2025-06-18T22:38:59.810426Z",
85
+ "iopub.status.idle": "2025-06-18T22:38:59.815644Z",
86
+ "shell.execute_reply": "2025-06-18T22:38:59.815329Z"
86
87
  }
87
88
  },
88
89
  "outputs": [
@@ -115,7 +116,7 @@
115
116
  },
116
117
  {
117
118
  "cell_type": "markdown",
118
- "id": "bf68ec3b",
119
+ "id": "8440ad60",
119
120
  "metadata": {},
120
121
  "source": [
121
122
  "## Apply Exclusions to All Pages\n",
@@ -126,13 +127,13 @@
126
127
  {
127
128
  "cell_type": "code",
128
129
  "execution_count": 3,
129
- "id": "a1bb3f07",
130
+ "id": "e8384235",
130
131
  "metadata": {
131
132
  "execution": {
132
- "iopub.execute_input": "2025-06-18T20:15:53.488771Z",
133
- "iopub.status.busy": "2025-06-18T20:15:53.488654Z",
134
- "iopub.status.idle": "2025-06-18T20:15:53.498685Z",
135
- "shell.execute_reply": "2025-06-18T20:15:53.498359Z"
133
+ "iopub.execute_input": "2025-06-18T22:38:59.817266Z",
134
+ "iopub.status.busy": "2025-06-18T22:38:59.817140Z",
135
+ "iopub.status.idle": "2025-06-18T22:38:59.830625Z",
136
+ "shell.execute_reply": "2025-06-18T22:38:59.830204Z"
136
137
  }
137
138
  },
138
139
  "outputs": [],
@@ -162,7 +163,7 @@
162
163
  },
163
164
  {
164
165
  "cell_type": "markdown",
165
- "id": "306b8fe3",
166
+ "id": "954a3a0f",
166
167
  "metadata": {},
167
168
  "source": [
168
169
  "## Remove Noise from Scanned Documents\n",
@@ -173,20 +174,20 @@
173
174
  {
174
175
  "cell_type": "code",
175
176
  "execution_count": 4,
176
- "id": "17c0cedb",
177
+ "id": "ced60e3b",
177
178
  "metadata": {
178
179
  "execution": {
179
- "iopub.execute_input": "2025-06-18T20:15:53.500235Z",
180
- "iopub.status.busy": "2025-06-18T20:15:53.500102Z",
181
- "iopub.status.idle": "2025-06-18T20:16:08.618376Z",
182
- "shell.execute_reply": "2025-06-18T20:16:08.610184Z"
180
+ "iopub.execute_input": "2025-06-18T22:38:59.832766Z",
181
+ "iopub.status.busy": "2025-06-18T22:38:59.832638Z",
182
+ "iopub.status.idle": "2025-06-18T22:39:11.855867Z",
183
+ "shell.execute_reply": "2025-06-18T22:39:11.855411Z"
183
184
  }
184
185
  },
185
186
  "outputs": [
186
187
  {
187
188
  "data": {
188
189
  "application/vnd.jupyter.widget-view+json": {
189
- "model_id": "c9cf1651db434731a910a7476f8e6a5c",
190
+ "model_id": "e9eb0ecfa6f7426689e78d2b231bd275",
190
191
  "version_major": 2,
191
192
  "version_minor": 0
192
193
  },
@@ -229,7 +230,7 @@
229
230
  },
230
231
  {
231
232
  "cell_type": "markdown",
232
- "id": "bdd93a5e",
233
+ "id": "15703602",
233
234
  "metadata": {},
234
235
  "source": [
235
236
  "## Handle Multi-Column Layouts\n",
@@ -240,13 +241,13 @@
240
241
  {
241
242
  "cell_type": "code",
242
243
  "execution_count": 5,
243
- "id": "bf7a7488",
244
+ "id": "86e7d78e",
244
245
  "metadata": {
245
246
  "execution": {
246
- "iopub.execute_input": "2025-06-18T20:16:08.621823Z",
247
- "iopub.status.busy": "2025-06-18T20:16:08.621499Z",
248
- "iopub.status.idle": "2025-06-18T20:16:08.629993Z",
249
- "shell.execute_reply": "2025-06-18T20:16:08.629209Z"
247
+ "iopub.execute_input": "2025-06-18T22:39:11.857709Z",
248
+ "iopub.status.busy": "2025-06-18T22:39:11.857491Z",
249
+ "iopub.status.idle": "2025-06-18T22:39:11.864896Z",
250
+ "shell.execute_reply": "2025-06-18T22:39:11.864609Z"
250
251
  }
251
252
  },
252
253
  "outputs": [],
@@ -264,7 +265,7 @@
264
265
  },
265
266
  {
266
267
  "cell_type": "markdown",
267
- "id": "86928804",
268
+ "id": "f8810b4a",
268
269
  "metadata": {},
269
270
  "source": [
270
271
  "## Visual Debugging\n",
@@ -275,13 +276,13 @@
275
276
  {
276
277
  "cell_type": "code",
277
278
  "execution_count": 6,
278
- "id": "b51dcfc5",
279
+ "id": "31f5270b",
279
280
  "metadata": {
280
281
  "execution": {
281
- "iopub.execute_input": "2025-06-18T20:16:08.632490Z",
282
- "iopub.status.busy": "2025-06-18T20:16:08.632311Z",
283
- "iopub.status.idle": "2025-06-18T20:16:08.672414Z",
284
- "shell.execute_reply": "2025-06-18T20:16:08.671270Z"
282
+ "iopub.execute_input": "2025-06-18T22:39:11.866356Z",
283
+ "iopub.status.busy": "2025-06-18T22:39:11.866260Z",
284
+ "iopub.status.idle": "2025-06-18T22:39:11.893575Z",
285
+ "shell.execute_reply": "2025-06-18T22:39:11.893289Z"
285
286
  }
286
287
  },
287
288
  "outputs": [
@@ -314,7 +315,7 @@
314
315
  },
315
316
  {
316
317
  "cell_type": "markdown",
317
- "id": "e20684d3",
318
+ "id": "a30c0fbd",
318
319
  "metadata": {},
319
320
  "source": [
320
321
  "## Compare Before and After"
@@ -323,13 +324,13 @@
323
324
  {
324
325
  "cell_type": "code",
325
326
  "execution_count": 7,
326
- "id": "b5a65578",
327
+ "id": "fda69817",
327
328
  "metadata": {
328
329
  "execution": {
329
- "iopub.execute_input": "2025-06-18T20:16:08.678101Z",
330
- "iopub.status.busy": "2025-06-18T20:16:08.677019Z",
331
- "iopub.status.idle": "2025-06-18T20:16:08.713381Z",
332
- "shell.execute_reply": "2025-06-18T20:16:08.712724Z"
330
+ "iopub.execute_input": "2025-06-18T22:39:11.894974Z",
331
+ "iopub.status.busy": "2025-06-18T22:39:11.894879Z",
332
+ "iopub.status.idle": "2025-06-18T22:39:11.913465Z",
333
+ "shell.execute_reply": "2025-06-18T22:39:11.913176Z"
333
334
  }
334
335
  },
335
336
  "outputs": [
@@ -355,7 +356,7 @@
355
356
  },
356
357
  {
357
358
  "cell_type": "markdown",
358
- "id": "30175f6f",
359
+ "id": "ba57b34d",
359
360
  "metadata": {
360
361
  "lines_to_next_cell": 0
361
362
  },
@@ -368,13 +369,13 @@
368
369
  {
369
370
  "cell_type": "code",
370
371
  "execution_count": 8,
371
- "id": "a6401f82",
372
+ "id": "022a2bc8",
372
373
  "metadata": {
373
374
  "execution": {
374
- "iopub.execute_input": "2025-06-18T20:16:08.716876Z",
375
- "iopub.status.busy": "2025-06-18T20:16:08.716393Z",
376
- "iopub.status.idle": "2025-06-18T20:16:08.725931Z",
377
- "shell.execute_reply": "2025-06-18T20:16:08.724275Z"
375
+ "iopub.execute_input": "2025-06-18T22:39:11.914788Z",
376
+ "iopub.status.busy": "2025-06-18T22:39:11.914678Z",
377
+ "iopub.status.idle": "2025-06-18T22:39:11.917338Z",
378
+ "shell.execute_reply": "2025-06-18T22:39:11.917074Z"
378
379
  }
379
380
  },
380
381
  "outputs": [
@@ -399,7 +400,7 @@
399
400
  },
400
401
  {
401
402
  "cell_type": "markdown",
402
- "id": "9b7c781b",
403
+ "id": "3ce0e740",
403
404
  "metadata": {
404
405
  "lines_to_next_cell": 0
405
406
  },
@@ -410,13 +411,13 @@
410
411
  {
411
412
  "cell_type": "code",
412
413
  "execution_count": 9,
413
- "id": "3daf1fad",
414
+ "id": "20528827",
414
415
  "metadata": {
415
416
  "execution": {
416
- "iopub.execute_input": "2025-06-18T20:16:08.734863Z",
417
- "iopub.status.busy": "2025-06-18T20:16:08.733502Z",
418
- "iopub.status.idle": "2025-06-18T20:16:08.746595Z",
419
- "shell.execute_reply": "2025-06-18T20:16:08.743942Z"
417
+ "iopub.execute_input": "2025-06-18T22:39:11.918678Z",
418
+ "iopub.status.busy": "2025-06-18T22:39:11.918574Z",
419
+ "iopub.status.idle": "2025-06-18T22:39:11.922988Z",
420
+ "shell.execute_reply": "2025-06-18T22:39:11.922738Z"
420
421
  }
421
422
  },
422
423
  "outputs": [],
@@ -435,7 +436,7 @@
435
436
  },
436
437
  {
437
438
  "cell_type": "markdown",
438
- "id": "81bde895",
439
+ "id": "17c92d0a",
439
440
  "metadata": {
440
441
  "lines_to_next_cell": 0
441
442
  },
@@ -446,13 +447,13 @@
446
447
  {
447
448
  "cell_type": "code",
448
449
  "execution_count": 10,
449
- "id": "7ca7cc84",
450
+ "id": "fd4a8869",
450
451
  "metadata": {
451
452
  "execution": {
452
- "iopub.execute_input": "2025-06-18T20:16:08.752229Z",
453
- "iopub.status.busy": "2025-06-18T20:16:08.751925Z",
454
- "iopub.status.idle": "2025-06-18T20:16:08.755722Z",
455
- "shell.execute_reply": "2025-06-18T20:16:08.755185Z"
453
+ "iopub.execute_input": "2025-06-18T22:39:11.924259Z",
454
+ "iopub.status.busy": "2025-06-18T22:39:11.924155Z",
455
+ "iopub.status.idle": "2025-06-18T22:39:11.926603Z",
456
+ "shell.execute_reply": "2025-06-18T22:39:11.926359Z"
456
457
  }
457
458
  },
458
459
  "outputs": [],
@@ -470,7 +471,7 @@
470
471
  },
471
472
  {
472
473
  "cell_type": "markdown",
473
- "id": "39143eed",
474
+ "id": "52a7e8d9",
474
475
  "metadata": {},
475
476
  "source": [
476
477
  "## When Things Go Wrong\n",
@@ -534,60 +535,25 @@
534
535
  "widgets": {
535
536
  "application/vnd.jupyter.widget-state+json": {
536
537
  "state": {
537
- "097bfb75fd0a446da84374ba7b85e11a": {
538
- "model_module": "@jupyter-widgets/base",
538
+ "1a1f51e5ec284f8290cc1694ca4a7220": {
539
+ "model_module": "@jupyter-widgets/controls",
539
540
  "model_module_version": "2.0.0",
540
- "model_name": "LayoutModel",
541
+ "model_name": "HTMLStyleModel",
541
542
  "state": {
542
- "_model_module": "@jupyter-widgets/base",
543
+ "_model_module": "@jupyter-widgets/controls",
543
544
  "_model_module_version": "2.0.0",
544
- "_model_name": "LayoutModel",
545
+ "_model_name": "HTMLStyleModel",
545
546
  "_view_count": null,
546
547
  "_view_module": "@jupyter-widgets/base",
547
548
  "_view_module_version": "2.0.0",
548
- "_view_name": "LayoutView",
549
- "align_content": null,
550
- "align_items": null,
551
- "align_self": null,
552
- "border_bottom": null,
553
- "border_left": null,
554
- "border_right": null,
555
- "border_top": null,
556
- "bottom": null,
557
- "display": null,
558
- "flex": null,
559
- "flex_flow": null,
560
- "grid_area": null,
561
- "grid_auto_columns": null,
562
- "grid_auto_flow": null,
563
- "grid_auto_rows": null,
564
- "grid_column": null,
565
- "grid_gap": null,
566
- "grid_row": null,
567
- "grid_template_areas": null,
568
- "grid_template_columns": null,
569
- "grid_template_rows": null,
570
- "height": null,
571
- "justify_content": null,
572
- "justify_items": null,
573
- "left": null,
574
- "margin": null,
575
- "max_height": null,
576
- "max_width": null,
577
- "min_height": null,
578
- "min_width": null,
579
- "object_fit": null,
580
- "object_position": null,
581
- "order": null,
582
- "overflow": null,
583
- "padding": null,
584
- "right": null,
585
- "top": null,
586
- "visibility": null,
587
- "width": null
549
+ "_view_name": "StyleView",
550
+ "background": null,
551
+ "description_width": "",
552
+ "font_size": null,
553
+ "text_color": null
588
554
  }
589
555
  },
590
- "15c46d207b404199a62c6419e8ef0f6d": {
556
+ "237bfe37f6fc4c439f2b63da28a550db": {
591
557
  "model_module": "@jupyter-widgets/controls",
592
558
  "model_module_version": "2.0.0",
593
559
  "model_name": "HTMLStyleModel",
@@ -605,33 +571,53 @@
605
571
  "text_color": null
606
572
  }
607
573
  },
608
- "1ef90e83e0d24ca6b8f5cc39d9b6b773": {
574
+ "50582330607a430d865a181181b5007f": {
609
575
  "model_module": "@jupyter-widgets/controls",
610
576
  "model_module_version": "2.0.0",
611
- "model_name": "FloatProgressModel",
577
+ "model_name": "HTMLModel",
612
578
  "state": {
613
579
  "_dom_classes": [],
614
580
  "_model_module": "@jupyter-widgets/controls",
615
581
  "_model_module_version": "2.0.0",
616
- "_model_name": "FloatProgressModel",
582
+ "_model_name": "HTMLModel",
617
583
  "_view_count": null,
618
584
  "_view_module": "@jupyter-widgets/controls",
619
585
  "_view_module_version": "2.0.0",
620
- "_view_name": "ProgressView",
621
- "bar_style": "",
586
+ "_view_name": "HTMLView",
622
587
  "description": "",
623
588
  "description_allow_html": false,
624
- "layout": "IPY_MODEL_a3cd9e0ba1654cf98d51dd3667d51383",
625
- "max": 1.0,
626
- "min": 0.0,
627
- "orientation": "horizontal",
628
- "style": "IPY_MODEL_c7d6485234b34d74aa7cd280db1646d0",
589
+ "layout": "IPY_MODEL_b91f16b6bc894b0a8be7a60b44f269e9",
590
+ "placeholder": "​",
591
+ "style": "IPY_MODEL_237bfe37f6fc4c439f2b63da28a550db",
629
592
  "tabbable": null,
630
593
  "tooltip": null,
631
- "value": 1.0
594
+ "value": "Rendering pages:   0%"
632
595
  }
633
596
  },
634
- "544573d91245420fb5414e19c105ad99": {
597
+ "5d305b10ac2a4300afaf36f2d2d943e6": {
598
+ "model_module": "@jupyter-widgets/controls",
599
+ "model_module_version": "2.0.0",
600
+ "model_name": "HTMLModel",
601
+ "state": {
602
+ "_dom_classes": [],
603
+ "_model_module": "@jupyter-widgets/controls",
604
+ "_model_module_version": "2.0.0",
605
+ "_model_name": "HTMLModel",
606
+ "_view_count": null,
607
+ "_view_module": "@jupyter-widgets/controls",
608
+ "_view_module_version": "2.0.0",
609
+ "_view_name": "HTMLView",
610
+ "description": "",
611
+ "description_allow_html": false,
612
+ "layout": "IPY_MODEL_646ab02e08a14a1ea56f7591045a4ca2",
613
+ "placeholder": "​",
614
+ "style": "IPY_MODEL_1a1f51e5ec284f8290cc1694ca4a7220",
615
+ "tabbable": null,
616
+ "tooltip": null,
617
+ "value": " 0/1 [00:00&lt;?, ?it/s]"
618
+ }
619
+ },
620
+ "646ab02e08a14a1ea56f7591045a4ca2": {
635
621
  "model_module": "@jupyter-widgets/base",
636
622
  "model_module_version": "2.0.0",
637
623
  "model_name": "LayoutModel",
@@ -684,7 +670,7 @@
684
670
  "width": null
685
671
  }
686
672
  },
687
- "727e684a0f744d72838ac50a118d732e": {
673
+ "7229c9c0dc3341fa82b086290967b4f8": {
688
674
  "model_module": "@jupyter-widgets/base",
689
675
  "model_module_version": "2.0.0",
690
676
  "model_name": "LayoutModel",
@@ -737,30 +723,33 @@
737
723
  "width": null
738
724
  }
739
725
  },
740
- "761c6efc2edb4b418afe189316228cff": {
726
+ "9d86d059f8ed49baaeb1c5a0da5e46d3": {
741
727
  "model_module": "@jupyter-widgets/controls",
742
728
  "model_module_version": "2.0.0",
743
- "model_name": "HTMLModel",
729
+ "model_name": "FloatProgressModel",
744
730
  "state": {
745
731
  "_dom_classes": [],
746
732
  "_model_module": "@jupyter-widgets/controls",
747
733
  "_model_module_version": "2.0.0",
748
- "_model_name": "HTMLModel",
734
+ "_model_name": "FloatProgressModel",
749
735
  "_view_count": null,
750
736
  "_view_module": "@jupyter-widgets/controls",
751
737
  "_view_module_version": "2.0.0",
752
- "_view_name": "HTMLView",
738
+ "_view_name": "ProgressView",
739
+ "bar_style": "",
753
740
  "description": "",
754
741
  "description_allow_html": false,
755
- "layout": "IPY_MODEL_544573d91245420fb5414e19c105ad99",
756
- "placeholder": "​",
757
- "style": "IPY_MODEL_15c46d207b404199a62c6419e8ef0f6d",
742
+ "layout": "IPY_MODEL_d2b7f5b8c87e4551999b5f31dcb70e5b",
743
+ "max": 1.0,
744
+ "min": 0.0,
745
+ "orientation": "horizontal",
746
+ "style": "IPY_MODEL_ba7920cb2e7149f7a1f44ce120abe075",
758
747
  "tabbable": null,
759
748
  "tooltip": null,
760
- "value": "Rendering pages:   0%"
749
+ "value": 1.0
761
750
  }
762
751
  },
763
- "a3cd9e0ba1654cf98d51dd3667d51383": {
752
+ "b91f16b6bc894b0a8be7a60b44f269e9": {
764
753
  "model_module": "@jupyter-widgets/base",
765
754
  "model_module_version": "2.0.0",
766
755
  "model_name": "LayoutModel",
@@ -813,7 +802,7 @@
813
802
  "width": null
814
803
  }
815
804
  },
816
- "c7d6485234b34d74aa7cd280db1646d0": {
805
+ "ba7920cb2e7149f7a1f44ce120abe075": {
817
806
  "model_module": "@jupyter-widgets/controls",
818
807
  "model_module_version": "2.0.0",
819
808
  "model_name": "ProgressStyleModel",
@@ -829,69 +818,81 @@
829
818
  "description_width": ""
830
819
  }
831
820
  },
832
- "c9cf1651db434731a910a7476f8e6a5c": {
833
- "model_module": "@jupyter-widgets/controls",
834
- "model_module_version": "2.0.0",
835
- "model_name": "HBoxModel",
836
- "state": {
837
- "_dom_classes": [],
838
- "_model_module": "@jupyter-widgets/controls",
839
- "_model_module_version": "2.0.0",
840
- "_model_name": "HBoxModel",
841
- "_view_count": null,
842
- "_view_module": "@jupyter-widgets/controls",
843
- "_view_module_version": "2.0.0",
844
- "_view_name": "HBoxView",
845
- "box_style": "",
846
- "children": [
847
- "IPY_MODEL_761c6efc2edb4b418afe189316228cff",
848
- "IPY_MODEL_1ef90e83e0d24ca6b8f5cc39d9b6b773",
849
- "IPY_MODEL_fea9ed1fcef84cbb98cdf5fe2a419aab"
850
- ],
851
- "layout": "IPY_MODEL_727e684a0f744d72838ac50a118d732e",
852
- "tabbable": null,
853
- "tooltip": null
854
- }
855
- },
856
- "ebbee06fd35f4d6b8f6f0ff25979b552": {
857
- "model_module": "@jupyter-widgets/controls",
821
+ "d2b7f5b8c87e4551999b5f31dcb70e5b": {
822
+ "model_module": "@jupyter-widgets/base",
858
823
  "model_module_version": "2.0.0",
859
- "model_name": "HTMLStyleModel",
824
+ "model_name": "LayoutModel",
860
825
  "state": {
861
- "_model_module": "@jupyter-widgets/controls",
826
+ "_model_module": "@jupyter-widgets/base",
862
827
  "_model_module_version": "2.0.0",
863
- "_model_name": "HTMLStyleModel",
828
+ "_model_name": "LayoutModel",
864
829
  "_view_count": null,
865
830
  "_view_module": "@jupyter-widgets/base",
866
831
  "_view_module_version": "2.0.0",
867
- "_view_name": "StyleView",
868
- "background": null,
869
- "description_width": "",
870
- "font_size": null,
871
- "text_color": null
832
+ "_view_name": "LayoutView",
833
+ "align_content": null,
834
+ "align_items": null,
835
+ "align_self": null,
836
+ "border_bottom": null,
837
+ "border_left": null,
838
+ "border_right": null,
839
+ "border_top": null,
840
+ "bottom": null,
841
+ "display": null,
842
+ "flex": null,
843
+ "flex_flow": null,
844
+ "grid_area": null,
845
+ "grid_auto_columns": null,
846
+ "grid_auto_flow": null,
847
+ "grid_auto_rows": null,
848
+ "grid_column": null,
849
+ "grid_gap": null,
850
+ "grid_row": null,
851
+ "grid_template_areas": null,
852
+ "grid_template_columns": null,
853
+ "grid_template_rows": null,
854
+ "height": null,
855
+ "justify_content": null,
856
+ "justify_items": null,
857
+ "left": null,
858
+ "margin": null,
859
+ "max_height": null,
860
+ "max_width": null,
861
+ "min_height": null,
862
+ "min_width": null,
863
+ "object_fit": null,
864
+ "object_position": null,
865
+ "order": null,
866
+ "overflow": null,
867
+ "padding": null,
868
+ "right": null,
869
+ "top": null,
870
+ "visibility": null,
871
+ "width": null
872
872
  }
873
873
  },
874
- "fea9ed1fcef84cbb98cdf5fe2a419aab": {
874
+ "e9eb0ecfa6f7426689e78d2b231bd275": {
875
875
  "model_module": "@jupyter-widgets/controls",
876
876
  "model_module_version": "2.0.0",
877
- "model_name": "HTMLModel",
877
+ "model_name": "HBoxModel",
878
878
  "state": {
879
879
  "_dom_classes": [],
880
880
  "_model_module": "@jupyter-widgets/controls",
881
881
  "_model_module_version": "2.0.0",
882
- "_model_name": "HTMLModel",
882
+ "_model_name": "HBoxModel",
883
883
  "_view_count": null,
884
884
  "_view_module": "@jupyter-widgets/controls",
885
885
  "_view_module_version": "2.0.0",
886
- "_view_name": "HTMLView",
887
- "description": "",
888
- "description_allow_html": false,
889
- "layout": "IPY_MODEL_097bfb75fd0a446da84374ba7b85e11a",
890
- "placeholder": "​",
891
- "style": "IPY_MODEL_ebbee06fd35f4d6b8f6f0ff25979b552",
886
+ "_view_name": "HBoxView",
887
+ "box_style": "",
888
+ "children": [
889
+ "IPY_MODEL_50582330607a430d865a181181b5007f",
890
+ "IPY_MODEL_9d86d059f8ed49baaeb1c5a0da5e46d3",
891
+ "IPY_MODEL_5d305b10ac2a4300afaf36f2d2d943e6"
892
+ ],
893
+ "layout": "IPY_MODEL_7229c9c0dc3341fa82b086290967b4f8",
892
894
  "tabbable": null,
893
- "tooltip": null,
894
- "value": " 0/1 [00:00&lt;?, ?it/s]"
895
+ "tooltip": null
895
896
  }
896
897
  }
897
898
  },
@@ -5,6 +5,7 @@ You've got a PDF where you need the main content, but every page has headers, fo
5
5
  ## The Problem
6
6
 
7
7
  PDFs often have repeated content on every page that you don't want:
8
+
8
9
  - Company headers with logos and contact info
9
10
  - Page numbers and footers
10
11
  - "CONFIDENTIAL" watermarks