natural-pdf 0.1.23__tar.gz → 0.1.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {natural_pdf-0.1.23/natural_pdf.egg-info → natural_pdf-0.1.24}/PKG-INFO +2 -1
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/extracting-clean-text/index.ipynb +177 -176
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/extracting-clean-text/index.md +1 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/index.ipynb +197 -194
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/index.md +0 -24
- natural_pdf-0.1.24/docs/tutorials/01-loading-and-extraction.ipynb +312 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/02-finding-elements.ipynb +42 -42
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/03-extracting-blocks.ipynb +17 -17
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/04-table-extraction.ipynb +30 -30
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/05-excluding-content.ipynb +29 -29
- natural_pdf-0.1.24/docs/tutorials/06-document-qa.ipynb +445 -0
- natural_pdf-0.1.24/docs/tutorials/06-document-qa.md +96 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/07-layout-analysis.ipynb +42 -42
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/07-working-with-regions.ipynb +58 -58
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/08-spatial-navigation.ipynb +71 -71
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/09-section-extraction.ipynb +109 -109
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/10-form-field-extraction.ipynb +57 -57
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/11-enhanced-table-processing.ipynb +119 -119
- natural_pdf-0.1.24/docs/tutorials/12-ocr-integration.ipynb +4733 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/13-semantic-search.ipynb +128 -128
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/14-categorizing-documents.ipynb +505 -505
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/mkdocs.yml +15 -21
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/shape_detection_mixin.py +40 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/highlighting_service.py +4 -4
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/page.py +16 -2
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/base.py +11 -1
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/summary.py +26 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/base.py +2 -2
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/collections.py +139 -100
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/region.py +133 -12
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/text.py +15 -7
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/region.py +116 -1
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/qa/document_qa.py +162 -105
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/text_extraction.py +34 -14
- {natural_pdf-0.1.23 → natural_pdf-0.1.24/natural_pdf.egg-info}/PKG-INFO +2 -1
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/SOURCES.txt +1 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/requires.txt +1 -0
- natural_pdf-0.1.24/pdfs/1107231007033739008.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/needs-ocr.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pyproject.toml +1 -0
- natural_pdf-0.1.23/docs/tutorials/01-loading-and-extraction.ipynb +0 -312
- natural_pdf-0.1.23/docs/tutorials/06-document-qa.ipynb +0 -401
- natural_pdf-0.1.23/docs/tutorials/06-document-qa.md +0 -118
- natural_pdf-0.1.23/docs/tutorials/12-ocr-integration.ipynb +0 -4205
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/analysis_framework.mdc +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/coding-style.mdc +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/minimal-comments.mdc +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/natural-pdf-overview.mdc +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.cursor/rules/user-friendly-library-code.mdc +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.github/workflows/docs.yml +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.gitignore +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/.pre-commit-config.yaml +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/01-execute_notebooks.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/02-run_all_tutorials.sh +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/CLAUDE.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/LICENSE +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/MANIFEST.in +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/README.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/audit_packaging.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/check_run_md.sh +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/api/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/favicon.png +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/favicon.svg +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/javascripts/custom.js +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/logo.svg +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/sample-screen.png +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/social-preview.png +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/social-preview.svg +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/assets/stylesheets/custom.css +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/categorizing-documents/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/data-extraction/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/describe/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/describe/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/document-qa/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/document-qa/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/element-selection/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/element-selection/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/finetuning/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/table_1.csv +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/table_2.csv +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/fix-messy-tables/table_3.csv +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/installation/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/interactive-widget/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/interactive-widget/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/layout-analysis/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/layout-analysis/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/loops-and-groups/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/loops-and-groups/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/ocr/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/pdf-navigation/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/pdf-navigation/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/process-forms-and-invoices/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/process-forms-and-invoices/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/quick-reference/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/quick-reference/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/reflowing-pages/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/reflowing-pages/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/regions/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/regions/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tables/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tables/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/text-analysis/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/text-analysis/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/text-extraction/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/01-loading-and-extraction.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/02-finding-elements.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/03-extracting-blocks.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/04-table-extraction.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/05-excluding-content.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/07-layout-analysis.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/07-working-with-regions.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/08-spatial-navigation.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/09-section-extraction.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/10-form-field-extraction.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/11-enhanced-table-processing.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/12-ocr-integration.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/13-semantic-search.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/tutorials/14-categorizing-documents.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/visual-debugging/index.ipynb +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/visual-debugging/index.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/docs/visual-debugging/region.png +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/base.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/docling.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/gemini.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/layout_options.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/paddle.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/surya.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/tatr.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/layout/yolo.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/text_options.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/text_structure.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/analyzers/utils.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/classification/manager.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/classification/mixin.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/classification/results.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/cli.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/collections/mixins.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/collections/pdf_collection.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/element_manager.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/core/pdf.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/elements.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/describe/mixin.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/line.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/elements/rect.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/export/mixin.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/base.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/data/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/data/pdf.ttf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/data/sRGB.icc +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/hocr.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/hocr_font.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/original_pdf.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/paddleocr.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/exporters/searchable_pdf.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/extraction/manager.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/extraction/mixin.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/extraction/result.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/collections.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/element.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/flows/flow.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine_doctr.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine_easyocr.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine_paddle.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/engine_surya.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/ocr_factory.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/ocr_manager.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/ocr_options.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/ocr/utils.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/qa/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/qa/qa_result.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/lancedb_search_service.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/numpy_search_service.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/search_options.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/search_service_protocol.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/search/searchable_mixin.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/selectors/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/selectors/parser.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/spa/css/style.css +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/spa/index.html +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/spa/js/app.js +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/templates/spa/words.txt +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/debug.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/highlighting.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/identifiers.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/locks.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/packaging.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/reading_order.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/utils/visualization.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/widgets/__init__.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf/widgets/viewer.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/dependency_links.txt +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/entry_points.txt +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/natural_pdf.egg-info/top_level.txt +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/noxfile.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/.gitkeep +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/01-practice.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/0500000US42001.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/0500000US42007.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/2014 Statistics.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/2019 Statistics.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/30.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/anexo_edital_6604_1743480-table.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/appendix_fy2026.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/cia-doc.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/geometry.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/image.png +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/image.png.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/multicolumn.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/red.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr-2.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr-3.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr-small.jpg +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr-wide.jpg +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny-ocr.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/tiny.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/pdfs/word-counter.pdf +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/publish.sh +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/sample-screen.png +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/setup.cfg +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/test_install.sh +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/conftest.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/exporters/test_paddleocr_exporter.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_containment_geometry.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_elements.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_loading.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_spatial.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_core/test_text_extraction.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_loading_original.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_optional_deps.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/tests/test_tutorials.py +0 -0
- {natural_pdf-0.1.23 → natural_pdf-0.1.24}/uv.lock +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.24
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
11
11
|
Requires-Python: >=3.9
|
12
12
|
Description-Content-Type: text/markdown
|
13
13
|
License-File: LICENSE
|
14
|
+
Requires-Dist: markdown
|
14
15
|
Requires-Dist: pandas
|
15
16
|
Requires-Dist: pdfplumber
|
16
17
|
Requires-Dist: colormath2
|
@@ -2,7 +2,7 @@
|
|
2
2
|
"cells": [
|
3
3
|
{
|
4
4
|
"cell_type": "markdown",
|
5
|
-
"id": "
|
5
|
+
"id": "7a7b153a",
|
6
6
|
"metadata": {},
|
7
7
|
"source": [
|
8
8
|
"# Extract Clean Text Without Headers and Footers\n",
|
@@ -12,6 +12,7 @@
|
|
12
12
|
"## The Problem\n",
|
13
13
|
"\n",
|
14
14
|
"PDFs often have repeated content on every page that you don't want:\n",
|
15
|
+
"\n",
|
15
16
|
"- Company headers with logos and contact info\n",
|
16
17
|
"- Page numbers and footers \n",
|
17
18
|
"- \"CONFIDENTIAL\" watermarks\n",
|
@@ -27,13 +28,13 @@
|
|
27
28
|
{
|
28
29
|
"cell_type": "code",
|
29
30
|
"execution_count": 1,
|
30
|
-
"id": "
|
31
|
+
"id": "4b445e12",
|
31
32
|
"metadata": {
|
32
33
|
"execution": {
|
33
|
-
"iopub.execute_input": "2025-06-
|
34
|
-
"iopub.status.busy": "2025-06-
|
35
|
-
"iopub.status.idle": "2025-06-
|
36
|
-
"shell.execute_reply": "2025-06-
|
34
|
+
"iopub.execute_input": "2025-06-18T22:38:58.176399Z",
|
35
|
+
"iopub.status.busy": "2025-06-18T22:38:58.176099Z",
|
36
|
+
"iopub.status.idle": "2025-06-18T22:38:59.808871Z",
|
37
|
+
"shell.execute_reply": "2025-06-18T22:38:59.808518Z"
|
37
38
|
}
|
38
39
|
},
|
39
40
|
"outputs": [
|
@@ -65,7 +66,7 @@
|
|
65
66
|
},
|
66
67
|
{
|
67
68
|
"cell_type": "markdown",
|
68
|
-
"id": "
|
69
|
+
"id": "09a3376a",
|
69
70
|
"metadata": {},
|
70
71
|
"source": [
|
71
72
|
"## Exclude Specific Elements\n",
|
@@ -76,13 +77,13 @@
|
|
76
77
|
{
|
77
78
|
"cell_type": "code",
|
78
79
|
"execution_count": 2,
|
79
|
-
"id": "
|
80
|
+
"id": "8b3dbafe",
|
80
81
|
"metadata": {
|
81
82
|
"execution": {
|
82
|
-
"iopub.execute_input": "2025-06-
|
83
|
-
"iopub.status.busy": "2025-06-
|
84
|
-
"iopub.status.idle": "2025-06-
|
85
|
-
"shell.execute_reply": "2025-06-
|
83
|
+
"iopub.execute_input": "2025-06-18T22:38:59.810631Z",
|
84
|
+
"iopub.status.busy": "2025-06-18T22:38:59.810426Z",
|
85
|
+
"iopub.status.idle": "2025-06-18T22:38:59.815644Z",
|
86
|
+
"shell.execute_reply": "2025-06-18T22:38:59.815329Z"
|
86
87
|
}
|
87
88
|
},
|
88
89
|
"outputs": [
|
@@ -115,7 +116,7 @@
|
|
115
116
|
},
|
116
117
|
{
|
117
118
|
"cell_type": "markdown",
|
118
|
-
"id": "
|
119
|
+
"id": "8440ad60",
|
119
120
|
"metadata": {},
|
120
121
|
"source": [
|
121
122
|
"## Apply Exclusions to All Pages\n",
|
@@ -126,13 +127,13 @@
|
|
126
127
|
{
|
127
128
|
"cell_type": "code",
|
128
129
|
"execution_count": 3,
|
129
|
-
"id": "
|
130
|
+
"id": "e8384235",
|
130
131
|
"metadata": {
|
131
132
|
"execution": {
|
132
|
-
"iopub.execute_input": "2025-06-
|
133
|
-
"iopub.status.busy": "2025-06-
|
134
|
-
"iopub.status.idle": "2025-06-
|
135
|
-
"shell.execute_reply": "2025-06-
|
133
|
+
"iopub.execute_input": "2025-06-18T22:38:59.817266Z",
|
134
|
+
"iopub.status.busy": "2025-06-18T22:38:59.817140Z",
|
135
|
+
"iopub.status.idle": "2025-06-18T22:38:59.830625Z",
|
136
|
+
"shell.execute_reply": "2025-06-18T22:38:59.830204Z"
|
136
137
|
}
|
137
138
|
},
|
138
139
|
"outputs": [],
|
@@ -162,7 +163,7 @@
|
|
162
163
|
},
|
163
164
|
{
|
164
165
|
"cell_type": "markdown",
|
165
|
-
"id": "
|
166
|
+
"id": "954a3a0f",
|
166
167
|
"metadata": {},
|
167
168
|
"source": [
|
168
169
|
"## Remove Noise from Scanned Documents\n",
|
@@ -173,20 +174,20 @@
|
|
173
174
|
{
|
174
175
|
"cell_type": "code",
|
175
176
|
"execution_count": 4,
|
176
|
-
"id": "
|
177
|
+
"id": "ced60e3b",
|
177
178
|
"metadata": {
|
178
179
|
"execution": {
|
179
|
-
"iopub.execute_input": "2025-06-
|
180
|
-
"iopub.status.busy": "2025-06-
|
181
|
-
"iopub.status.idle": "2025-06-
|
182
|
-
"shell.execute_reply": "2025-06-
|
180
|
+
"iopub.execute_input": "2025-06-18T22:38:59.832766Z",
|
181
|
+
"iopub.status.busy": "2025-06-18T22:38:59.832638Z",
|
182
|
+
"iopub.status.idle": "2025-06-18T22:39:11.855867Z",
|
183
|
+
"shell.execute_reply": "2025-06-18T22:39:11.855411Z"
|
183
184
|
}
|
184
185
|
},
|
185
186
|
"outputs": [
|
186
187
|
{
|
187
188
|
"data": {
|
188
189
|
"application/vnd.jupyter.widget-view+json": {
|
189
|
-
"model_id": "
|
190
|
+
"model_id": "e9eb0ecfa6f7426689e78d2b231bd275",
|
190
191
|
"version_major": 2,
|
191
192
|
"version_minor": 0
|
192
193
|
},
|
@@ -229,7 +230,7 @@
|
|
229
230
|
},
|
230
231
|
{
|
231
232
|
"cell_type": "markdown",
|
232
|
-
"id": "
|
233
|
+
"id": "15703602",
|
233
234
|
"metadata": {},
|
234
235
|
"source": [
|
235
236
|
"## Handle Multi-Column Layouts\n",
|
@@ -240,13 +241,13 @@
|
|
240
241
|
{
|
241
242
|
"cell_type": "code",
|
242
243
|
"execution_count": 5,
|
243
|
-
"id": "
|
244
|
+
"id": "86e7d78e",
|
244
245
|
"metadata": {
|
245
246
|
"execution": {
|
246
|
-
"iopub.execute_input": "2025-06-
|
247
|
-
"iopub.status.busy": "2025-06-
|
248
|
-
"iopub.status.idle": "2025-06-
|
249
|
-
"shell.execute_reply": "2025-06-
|
247
|
+
"iopub.execute_input": "2025-06-18T22:39:11.857709Z",
|
248
|
+
"iopub.status.busy": "2025-06-18T22:39:11.857491Z",
|
249
|
+
"iopub.status.idle": "2025-06-18T22:39:11.864896Z",
|
250
|
+
"shell.execute_reply": "2025-06-18T22:39:11.864609Z"
|
250
251
|
}
|
251
252
|
},
|
252
253
|
"outputs": [],
|
@@ -264,7 +265,7 @@
|
|
264
265
|
},
|
265
266
|
{
|
266
267
|
"cell_type": "markdown",
|
267
|
-
"id": "
|
268
|
+
"id": "f8810b4a",
|
268
269
|
"metadata": {},
|
269
270
|
"source": [
|
270
271
|
"## Visual Debugging\n",
|
@@ -275,13 +276,13 @@
|
|
275
276
|
{
|
276
277
|
"cell_type": "code",
|
277
278
|
"execution_count": 6,
|
278
|
-
"id": "
|
279
|
+
"id": "31f5270b",
|
279
280
|
"metadata": {
|
280
281
|
"execution": {
|
281
|
-
"iopub.execute_input": "2025-06-
|
282
|
-
"iopub.status.busy": "2025-06-
|
283
|
-
"iopub.status.idle": "2025-06-
|
284
|
-
"shell.execute_reply": "2025-06-
|
282
|
+
"iopub.execute_input": "2025-06-18T22:39:11.866356Z",
|
283
|
+
"iopub.status.busy": "2025-06-18T22:39:11.866260Z",
|
284
|
+
"iopub.status.idle": "2025-06-18T22:39:11.893575Z",
|
285
|
+
"shell.execute_reply": "2025-06-18T22:39:11.893289Z"
|
285
286
|
}
|
286
287
|
},
|
287
288
|
"outputs": [
|
@@ -314,7 +315,7 @@
|
|
314
315
|
},
|
315
316
|
{
|
316
317
|
"cell_type": "markdown",
|
317
|
-
"id": "
|
318
|
+
"id": "a30c0fbd",
|
318
319
|
"metadata": {},
|
319
320
|
"source": [
|
320
321
|
"## Compare Before and After"
|
@@ -323,13 +324,13 @@
|
|
323
324
|
{
|
324
325
|
"cell_type": "code",
|
325
326
|
"execution_count": 7,
|
326
|
-
"id": "
|
327
|
+
"id": "fda69817",
|
327
328
|
"metadata": {
|
328
329
|
"execution": {
|
329
|
-
"iopub.execute_input": "2025-06-
|
330
|
-
"iopub.status.busy": "2025-06-
|
331
|
-
"iopub.status.idle": "2025-06-
|
332
|
-
"shell.execute_reply": "2025-06-
|
330
|
+
"iopub.execute_input": "2025-06-18T22:39:11.894974Z",
|
331
|
+
"iopub.status.busy": "2025-06-18T22:39:11.894879Z",
|
332
|
+
"iopub.status.idle": "2025-06-18T22:39:11.913465Z",
|
333
|
+
"shell.execute_reply": "2025-06-18T22:39:11.913176Z"
|
333
334
|
}
|
334
335
|
},
|
335
336
|
"outputs": [
|
@@ -355,7 +356,7 @@
|
|
355
356
|
},
|
356
357
|
{
|
357
358
|
"cell_type": "markdown",
|
358
|
-
"id": "
|
359
|
+
"id": "ba57b34d",
|
359
360
|
"metadata": {
|
360
361
|
"lines_to_next_cell": 0
|
361
362
|
},
|
@@ -368,13 +369,13 @@
|
|
368
369
|
{
|
369
370
|
"cell_type": "code",
|
370
371
|
"execution_count": 8,
|
371
|
-
"id": "
|
372
|
+
"id": "022a2bc8",
|
372
373
|
"metadata": {
|
373
374
|
"execution": {
|
374
|
-
"iopub.execute_input": "2025-06-
|
375
|
-
"iopub.status.busy": "2025-06-
|
376
|
-
"iopub.status.idle": "2025-06-
|
377
|
-
"shell.execute_reply": "2025-06-
|
375
|
+
"iopub.execute_input": "2025-06-18T22:39:11.914788Z",
|
376
|
+
"iopub.status.busy": "2025-06-18T22:39:11.914678Z",
|
377
|
+
"iopub.status.idle": "2025-06-18T22:39:11.917338Z",
|
378
|
+
"shell.execute_reply": "2025-06-18T22:39:11.917074Z"
|
378
379
|
}
|
379
380
|
},
|
380
381
|
"outputs": [
|
@@ -399,7 +400,7 @@
|
|
399
400
|
},
|
400
401
|
{
|
401
402
|
"cell_type": "markdown",
|
402
|
-
"id": "
|
403
|
+
"id": "3ce0e740",
|
403
404
|
"metadata": {
|
404
405
|
"lines_to_next_cell": 0
|
405
406
|
},
|
@@ -410,13 +411,13 @@
|
|
410
411
|
{
|
411
412
|
"cell_type": "code",
|
412
413
|
"execution_count": 9,
|
413
|
-
"id": "
|
414
|
+
"id": "20528827",
|
414
415
|
"metadata": {
|
415
416
|
"execution": {
|
416
|
-
"iopub.execute_input": "2025-06-
|
417
|
-
"iopub.status.busy": "2025-06-
|
418
|
-
"iopub.status.idle": "2025-06-
|
419
|
-
"shell.execute_reply": "2025-06-
|
417
|
+
"iopub.execute_input": "2025-06-18T22:39:11.918678Z",
|
418
|
+
"iopub.status.busy": "2025-06-18T22:39:11.918574Z",
|
419
|
+
"iopub.status.idle": "2025-06-18T22:39:11.922988Z",
|
420
|
+
"shell.execute_reply": "2025-06-18T22:39:11.922738Z"
|
420
421
|
}
|
421
422
|
},
|
422
423
|
"outputs": [],
|
@@ -435,7 +436,7 @@
|
|
435
436
|
},
|
436
437
|
{
|
437
438
|
"cell_type": "markdown",
|
438
|
-
"id": "
|
439
|
+
"id": "17c92d0a",
|
439
440
|
"metadata": {
|
440
441
|
"lines_to_next_cell": 0
|
441
442
|
},
|
@@ -446,13 +447,13 @@
|
|
446
447
|
{
|
447
448
|
"cell_type": "code",
|
448
449
|
"execution_count": 10,
|
449
|
-
"id": "
|
450
|
+
"id": "fd4a8869",
|
450
451
|
"metadata": {
|
451
452
|
"execution": {
|
452
|
-
"iopub.execute_input": "2025-06-
|
453
|
-
"iopub.status.busy": "2025-06-
|
454
|
-
"iopub.status.idle": "2025-06-
|
455
|
-
"shell.execute_reply": "2025-06-
|
453
|
+
"iopub.execute_input": "2025-06-18T22:39:11.924259Z",
|
454
|
+
"iopub.status.busy": "2025-06-18T22:39:11.924155Z",
|
455
|
+
"iopub.status.idle": "2025-06-18T22:39:11.926603Z",
|
456
|
+
"shell.execute_reply": "2025-06-18T22:39:11.926359Z"
|
456
457
|
}
|
457
458
|
},
|
458
459
|
"outputs": [],
|
@@ -470,7 +471,7 @@
|
|
470
471
|
},
|
471
472
|
{
|
472
473
|
"cell_type": "markdown",
|
473
|
-
"id": "
|
474
|
+
"id": "52a7e8d9",
|
474
475
|
"metadata": {},
|
475
476
|
"source": [
|
476
477
|
"## When Things Go Wrong\n",
|
@@ -534,60 +535,25 @@
|
|
534
535
|
"widgets": {
|
535
536
|
"application/vnd.jupyter.widget-state+json": {
|
536
537
|
"state": {
|
537
|
-
"
|
538
|
-
"model_module": "@jupyter-widgets/
|
538
|
+
"1a1f51e5ec284f8290cc1694ca4a7220": {
|
539
|
+
"model_module": "@jupyter-widgets/controls",
|
539
540
|
"model_module_version": "2.0.0",
|
540
|
-
"model_name": "
|
541
|
+
"model_name": "HTMLStyleModel",
|
541
542
|
"state": {
|
542
|
-
"_model_module": "@jupyter-widgets/
|
543
|
+
"_model_module": "@jupyter-widgets/controls",
|
543
544
|
"_model_module_version": "2.0.0",
|
544
|
-
"_model_name": "
|
545
|
+
"_model_name": "HTMLStyleModel",
|
545
546
|
"_view_count": null,
|
546
547
|
"_view_module": "@jupyter-widgets/base",
|
547
548
|
"_view_module_version": "2.0.0",
|
548
|
-
"_view_name": "
|
549
|
-
"
|
550
|
-
"
|
551
|
-
"
|
552
|
-
"
|
553
|
-
"border_left": null,
|
554
|
-
"border_right": null,
|
555
|
-
"border_top": null,
|
556
|
-
"bottom": null,
|
557
|
-
"display": null,
|
558
|
-
"flex": null,
|
559
|
-
"flex_flow": null,
|
560
|
-
"grid_area": null,
|
561
|
-
"grid_auto_columns": null,
|
562
|
-
"grid_auto_flow": null,
|
563
|
-
"grid_auto_rows": null,
|
564
|
-
"grid_column": null,
|
565
|
-
"grid_gap": null,
|
566
|
-
"grid_row": null,
|
567
|
-
"grid_template_areas": null,
|
568
|
-
"grid_template_columns": null,
|
569
|
-
"grid_template_rows": null,
|
570
|
-
"height": null,
|
571
|
-
"justify_content": null,
|
572
|
-
"justify_items": null,
|
573
|
-
"left": null,
|
574
|
-
"margin": null,
|
575
|
-
"max_height": null,
|
576
|
-
"max_width": null,
|
577
|
-
"min_height": null,
|
578
|
-
"min_width": null,
|
579
|
-
"object_fit": null,
|
580
|
-
"object_position": null,
|
581
|
-
"order": null,
|
582
|
-
"overflow": null,
|
583
|
-
"padding": null,
|
584
|
-
"right": null,
|
585
|
-
"top": null,
|
586
|
-
"visibility": null,
|
587
|
-
"width": null
|
549
|
+
"_view_name": "StyleView",
|
550
|
+
"background": null,
|
551
|
+
"description_width": "",
|
552
|
+
"font_size": null,
|
553
|
+
"text_color": null
|
588
554
|
}
|
589
555
|
},
|
590
|
-
"
|
556
|
+
"237bfe37f6fc4c439f2b63da28a550db": {
|
591
557
|
"model_module": "@jupyter-widgets/controls",
|
592
558
|
"model_module_version": "2.0.0",
|
593
559
|
"model_name": "HTMLStyleModel",
|
@@ -605,33 +571,53 @@
|
|
605
571
|
"text_color": null
|
606
572
|
}
|
607
573
|
},
|
608
|
-
"
|
574
|
+
"50582330607a430d865a181181b5007f": {
|
609
575
|
"model_module": "@jupyter-widgets/controls",
|
610
576
|
"model_module_version": "2.0.0",
|
611
|
-
"model_name": "
|
577
|
+
"model_name": "HTMLModel",
|
612
578
|
"state": {
|
613
579
|
"_dom_classes": [],
|
614
580
|
"_model_module": "@jupyter-widgets/controls",
|
615
581
|
"_model_module_version": "2.0.0",
|
616
|
-
"_model_name": "
|
582
|
+
"_model_name": "HTMLModel",
|
617
583
|
"_view_count": null,
|
618
584
|
"_view_module": "@jupyter-widgets/controls",
|
619
585
|
"_view_module_version": "2.0.0",
|
620
|
-
"_view_name": "
|
621
|
-
"bar_style": "",
|
586
|
+
"_view_name": "HTMLView",
|
622
587
|
"description": "",
|
623
588
|
"description_allow_html": false,
|
624
|
-
"layout": "
|
625
|
-
"
|
626
|
-
"
|
627
|
-
"orientation": "horizontal",
|
628
|
-
"style": "IPY_MODEL_c7d6485234b34d74aa7cd280db1646d0",
|
589
|
+
"layout": "IPY_MODEL_b91f16b6bc894b0a8be7a60b44f269e9",
|
590
|
+
"placeholder": "",
|
591
|
+
"style": "IPY_MODEL_237bfe37f6fc4c439f2b63da28a550db",
|
629
592
|
"tabbable": null,
|
630
593
|
"tooltip": null,
|
631
|
-
"value":
|
594
|
+
"value": "Rendering pages: 0%"
|
632
595
|
}
|
633
596
|
},
|
634
|
-
"
|
597
|
+
"5d305b10ac2a4300afaf36f2d2d943e6": {
|
598
|
+
"model_module": "@jupyter-widgets/controls",
|
599
|
+
"model_module_version": "2.0.0",
|
600
|
+
"model_name": "HTMLModel",
|
601
|
+
"state": {
|
602
|
+
"_dom_classes": [],
|
603
|
+
"_model_module": "@jupyter-widgets/controls",
|
604
|
+
"_model_module_version": "2.0.0",
|
605
|
+
"_model_name": "HTMLModel",
|
606
|
+
"_view_count": null,
|
607
|
+
"_view_module": "@jupyter-widgets/controls",
|
608
|
+
"_view_module_version": "2.0.0",
|
609
|
+
"_view_name": "HTMLView",
|
610
|
+
"description": "",
|
611
|
+
"description_allow_html": false,
|
612
|
+
"layout": "IPY_MODEL_646ab02e08a14a1ea56f7591045a4ca2",
|
613
|
+
"placeholder": "",
|
614
|
+
"style": "IPY_MODEL_1a1f51e5ec284f8290cc1694ca4a7220",
|
615
|
+
"tabbable": null,
|
616
|
+
"tooltip": null,
|
617
|
+
"value": " 0/1 [00:00<?, ?it/s]"
|
618
|
+
}
|
619
|
+
},
|
620
|
+
"646ab02e08a14a1ea56f7591045a4ca2": {
|
635
621
|
"model_module": "@jupyter-widgets/base",
|
636
622
|
"model_module_version": "2.0.0",
|
637
623
|
"model_name": "LayoutModel",
|
@@ -684,7 +670,7 @@
|
|
684
670
|
"width": null
|
685
671
|
}
|
686
672
|
},
|
687
|
-
"
|
673
|
+
"7229c9c0dc3341fa82b086290967b4f8": {
|
688
674
|
"model_module": "@jupyter-widgets/base",
|
689
675
|
"model_module_version": "2.0.0",
|
690
676
|
"model_name": "LayoutModel",
|
@@ -737,30 +723,33 @@
|
|
737
723
|
"width": null
|
738
724
|
}
|
739
725
|
},
|
740
|
-
"
|
726
|
+
"9d86d059f8ed49baaeb1c5a0da5e46d3": {
|
741
727
|
"model_module": "@jupyter-widgets/controls",
|
742
728
|
"model_module_version": "2.0.0",
|
743
|
-
"model_name": "
|
729
|
+
"model_name": "FloatProgressModel",
|
744
730
|
"state": {
|
745
731
|
"_dom_classes": [],
|
746
732
|
"_model_module": "@jupyter-widgets/controls",
|
747
733
|
"_model_module_version": "2.0.0",
|
748
|
-
"_model_name": "
|
734
|
+
"_model_name": "FloatProgressModel",
|
749
735
|
"_view_count": null,
|
750
736
|
"_view_module": "@jupyter-widgets/controls",
|
751
737
|
"_view_module_version": "2.0.0",
|
752
|
-
"_view_name": "
|
738
|
+
"_view_name": "ProgressView",
|
739
|
+
"bar_style": "",
|
753
740
|
"description": "",
|
754
741
|
"description_allow_html": false,
|
755
|
-
"layout": "
|
756
|
-
"
|
757
|
-
"
|
742
|
+
"layout": "IPY_MODEL_d2b7f5b8c87e4551999b5f31dcb70e5b",
|
743
|
+
"max": 1.0,
|
744
|
+
"min": 0.0,
|
745
|
+
"orientation": "horizontal",
|
746
|
+
"style": "IPY_MODEL_ba7920cb2e7149f7a1f44ce120abe075",
|
758
747
|
"tabbable": null,
|
759
748
|
"tooltip": null,
|
760
|
-
"value":
|
749
|
+
"value": 1.0
|
761
750
|
}
|
762
751
|
},
|
763
|
-
"
|
752
|
+
"b91f16b6bc894b0a8be7a60b44f269e9": {
|
764
753
|
"model_module": "@jupyter-widgets/base",
|
765
754
|
"model_module_version": "2.0.0",
|
766
755
|
"model_name": "LayoutModel",
|
@@ -813,7 +802,7 @@
|
|
813
802
|
"width": null
|
814
803
|
}
|
815
804
|
},
|
816
|
-
"
|
805
|
+
"ba7920cb2e7149f7a1f44ce120abe075": {
|
817
806
|
"model_module": "@jupyter-widgets/controls",
|
818
807
|
"model_module_version": "2.0.0",
|
819
808
|
"model_name": "ProgressStyleModel",
|
@@ -829,69 +818,81 @@
|
|
829
818
|
"description_width": ""
|
830
819
|
}
|
831
820
|
},
|
832
|
-
"
|
833
|
-
"model_module": "@jupyter-widgets/
|
834
|
-
"model_module_version": "2.0.0",
|
835
|
-
"model_name": "HBoxModel",
|
836
|
-
"state": {
|
837
|
-
"_dom_classes": [],
|
838
|
-
"_model_module": "@jupyter-widgets/controls",
|
839
|
-
"_model_module_version": "2.0.0",
|
840
|
-
"_model_name": "HBoxModel",
|
841
|
-
"_view_count": null,
|
842
|
-
"_view_module": "@jupyter-widgets/controls",
|
843
|
-
"_view_module_version": "2.0.0",
|
844
|
-
"_view_name": "HBoxView",
|
845
|
-
"box_style": "",
|
846
|
-
"children": [
|
847
|
-
"IPY_MODEL_761c6efc2edb4b418afe189316228cff",
|
848
|
-
"IPY_MODEL_1ef90e83e0d24ca6b8f5cc39d9b6b773",
|
849
|
-
"IPY_MODEL_fea9ed1fcef84cbb98cdf5fe2a419aab"
|
850
|
-
],
|
851
|
-
"layout": "IPY_MODEL_727e684a0f744d72838ac50a118d732e",
|
852
|
-
"tabbable": null,
|
853
|
-
"tooltip": null
|
854
|
-
}
|
855
|
-
},
|
856
|
-
"ebbee06fd35f4d6b8f6f0ff25979b552": {
|
857
|
-
"model_module": "@jupyter-widgets/controls",
|
821
|
+
"d2b7f5b8c87e4551999b5f31dcb70e5b": {
|
822
|
+
"model_module": "@jupyter-widgets/base",
|
858
823
|
"model_module_version": "2.0.0",
|
859
|
-
"model_name": "
|
824
|
+
"model_name": "LayoutModel",
|
860
825
|
"state": {
|
861
|
-
"_model_module": "@jupyter-widgets/
|
826
|
+
"_model_module": "@jupyter-widgets/base",
|
862
827
|
"_model_module_version": "2.0.0",
|
863
|
-
"_model_name": "
|
828
|
+
"_model_name": "LayoutModel",
|
864
829
|
"_view_count": null,
|
865
830
|
"_view_module": "@jupyter-widgets/base",
|
866
831
|
"_view_module_version": "2.0.0",
|
867
|
-
"_view_name": "
|
868
|
-
"
|
869
|
-
"
|
870
|
-
"
|
871
|
-
"
|
832
|
+
"_view_name": "LayoutView",
|
833
|
+
"align_content": null,
|
834
|
+
"align_items": null,
|
835
|
+
"align_self": null,
|
836
|
+
"border_bottom": null,
|
837
|
+
"border_left": null,
|
838
|
+
"border_right": null,
|
839
|
+
"border_top": null,
|
840
|
+
"bottom": null,
|
841
|
+
"display": null,
|
842
|
+
"flex": null,
|
843
|
+
"flex_flow": null,
|
844
|
+
"grid_area": null,
|
845
|
+
"grid_auto_columns": null,
|
846
|
+
"grid_auto_flow": null,
|
847
|
+
"grid_auto_rows": null,
|
848
|
+
"grid_column": null,
|
849
|
+
"grid_gap": null,
|
850
|
+
"grid_row": null,
|
851
|
+
"grid_template_areas": null,
|
852
|
+
"grid_template_columns": null,
|
853
|
+
"grid_template_rows": null,
|
854
|
+
"height": null,
|
855
|
+
"justify_content": null,
|
856
|
+
"justify_items": null,
|
857
|
+
"left": null,
|
858
|
+
"margin": null,
|
859
|
+
"max_height": null,
|
860
|
+
"max_width": null,
|
861
|
+
"min_height": null,
|
862
|
+
"min_width": null,
|
863
|
+
"object_fit": null,
|
864
|
+
"object_position": null,
|
865
|
+
"order": null,
|
866
|
+
"overflow": null,
|
867
|
+
"padding": null,
|
868
|
+
"right": null,
|
869
|
+
"top": null,
|
870
|
+
"visibility": null,
|
871
|
+
"width": null
|
872
872
|
}
|
873
873
|
},
|
874
|
-
"
|
874
|
+
"e9eb0ecfa6f7426689e78d2b231bd275": {
|
875
875
|
"model_module": "@jupyter-widgets/controls",
|
876
876
|
"model_module_version": "2.0.0",
|
877
|
-
"model_name": "
|
877
|
+
"model_name": "HBoxModel",
|
878
878
|
"state": {
|
879
879
|
"_dom_classes": [],
|
880
880
|
"_model_module": "@jupyter-widgets/controls",
|
881
881
|
"_model_module_version": "2.0.0",
|
882
|
-
"_model_name": "
|
882
|
+
"_model_name": "HBoxModel",
|
883
883
|
"_view_count": null,
|
884
884
|
"_view_module": "@jupyter-widgets/controls",
|
885
885
|
"_view_module_version": "2.0.0",
|
886
|
-
"_view_name": "
|
887
|
-
"
|
888
|
-
"
|
889
|
-
|
890
|
-
|
891
|
-
|
886
|
+
"_view_name": "HBoxView",
|
887
|
+
"box_style": "",
|
888
|
+
"children": [
|
889
|
+
"IPY_MODEL_50582330607a430d865a181181b5007f",
|
890
|
+
"IPY_MODEL_9d86d059f8ed49baaeb1c5a0da5e46d3",
|
891
|
+
"IPY_MODEL_5d305b10ac2a4300afaf36f2d2d943e6"
|
892
|
+
],
|
893
|
+
"layout": "IPY_MODEL_7229c9c0dc3341fa82b086290967b4f8",
|
892
894
|
"tabbable": null,
|
893
|
-
"tooltip": null
|
894
|
-
"value": " 0/1 [00:00<?, ?it/s]"
|
895
|
+
"tooltip": null
|
895
896
|
}
|
896
897
|
}
|
897
898
|
},
|
@@ -5,6 +5,7 @@ You've got a PDF where you need the main content, but every page has headers, fo
|
|
5
5
|
## The Problem
|
6
6
|
|
7
7
|
PDFs often have repeated content on every page that you don't want:
|
8
|
+
|
8
9
|
- Company headers with logos and contact info
|
9
10
|
- Page numbers and footers
|
10
11
|
- "CONFIDENTIAL" watermarks
|