natural-pdf 0.2.9__tar.gz → 0.2.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {natural_pdf-0.2.9/natural_pdf.egg-info → natural_pdf-0.2.11}/PKG-INFO +1 -1
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/guides.py +591 -3
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/cli.py +1 -1
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/elements/region.py +61 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11/natural_pdf.egg-info}/PKG-INFO +1 -1
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_guides.py +135 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.cursor/rules/analysis_framework.mdc +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.cursor/rules/coding-style.mdc +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.cursor/rules/edit-md-instead-of-ipynb.mdc +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.cursor/rules/minimal-comments.mdc +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.cursor/rules/natural-pdf-overview.mdc +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.cursor/rules/user-friendly-library-code.mdc +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.github/workflows/ci.yml +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.github/workflows/docs.yml +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.github/workflows/nightly-tutorials.yml +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.gitignore +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/.pre-commit-config.yaml +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/01-execute_notebooks.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/02-run_all_tutorials.sh +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/CLAUDE.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/LICENSE +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/MANIFEST.in +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/README.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/audit_packaging.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/check_run_md.sh +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/api/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/assets/favicon.png +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/assets/favicon.svg +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/assets/javascripts/custom.js +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/assets/logo.svg +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/assets/sample-screen.png +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/assets/social-preview.png +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/assets/social-preview.svg +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/assets/stylesheets/custom.css +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/categorizing-documents/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/data-extraction/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/describe/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/document-qa/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/element-selection/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/extracting-clean-text/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/finetuning/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/fix-messy-tables/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/fix-messy-tables/table_1.csv +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/fix-messy-tables/table_2.csv +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/fix-messy-tables/table_3.csv +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/installation/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/interactive-widget/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/layout-analysis/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/loops-and-groups/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/ocr/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/pdf-navigation/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/process-forms-and-invoices/extracted_form_data.csv +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/process-forms-and-invoices/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/quick-reference/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/reflowing-pages/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/regions/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tables/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/text-analysis/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/01-loading-and-extraction.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/02-finding-elements.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/03-extracting-blocks.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/04-table-extraction.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/05-excluding-content.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/06-document-qa.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/07-layout-analysis.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/07-working-with-regions.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/08-spatial-navigation.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/09-section-extraction.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/10-form-field-extraction.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/11-enhanced-table-processing.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/12-ocr-integration.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/13-semantic-search.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/tutorials/14-categorizing-documents.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/visual-debugging/index.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/docs/visual-debugging/region.png +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/mkdocs.yml +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/base.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/docling.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/gemini.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/layout_options.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/paddle.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/pdfplumber_table_finder.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/surya.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/table_structure_utils.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/tatr.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/layout/yolo.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/shape_detection_mixin.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/text_options.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/text_structure.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/analyzers/utils.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/classification/manager.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/classification/mixin.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/classification/results.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/collections/mixins.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/element_manager.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/highlighting_service.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/page.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/page_collection.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/page_groupby.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/pdf.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/pdf_collection.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/core/render_spec.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/describe/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/describe/base.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/describe/elements.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/describe/mixin.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/describe/summary.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/elements/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/elements/base.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/elements/element_collection.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/elements/image.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/elements/line.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/elements/rect.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/elements/text.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/export/mixin.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/base.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/data/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/data/pdf.ttf +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/data/sRGB.icc +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/hocr.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/hocr_font.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/original_pdf.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/paddleocr.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/exporters/searchable_pdf.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/extraction/manager.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/extraction/mixin.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/extraction/result.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/flows/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/flows/collections.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/flows/element.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/flows/flow.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/flows/region.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/engine.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/engine_doctr.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/engine_easyocr.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/engine_paddle.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/engine_surya.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/ocr_factory.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/ocr_manager.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/ocr_options.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/ocr/utils.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/qa/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/qa/document_qa.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/qa/qa_result.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/search/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/search/lancedb_search_service.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/search/numpy_search_service.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/search/search_options.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/search/search_service_protocol.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/search/searchable_mixin.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/selectors/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/selectors/parser.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/tables/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/tables/result.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/templates/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/templates/finetune/fine_tune_paddleocr.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/templates/spa/css/style.css +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/templates/spa/index.html +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/templates/spa/js/app.js +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/templates/spa/words.txt +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/text_mixin.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/bidi_mirror.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/color_utils.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/debug.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/highlighting.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/identifiers.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/layout.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/locks.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/packaging.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/reading_order.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/text_extraction.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/utils/visualization.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/vision/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/vision/mixin.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/vision/results.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/vision/similarity.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/widgets/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf/widgets/viewer.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf.egg-info/SOURCES.txt +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf.egg-info/dependency_links.txt +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf.egg-info/entry_points.txt +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf.egg-info/requires.txt +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/natural_pdf.egg-info/top_level.txt +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/noxfile.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/memory_comparison.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/pdf_analyzer.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/performance_analysis.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/performance_results/image_heavy_snapshots.csv +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/performance_results/image_heavy_snapshots.json +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/performance_results/text_heavy_snapshots.csv +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/performance_results/text_heavy_snapshots.json +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/test_cleanup_methods.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/optimization/test_memory_fix.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/publish.sh +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/pyproject.toml +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/sample-screen.png +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/setup.cfg +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/conftest.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/exporters/test_paddleocr_exporter.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_annotate.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_arabic_performance.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_arabic_real_world.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_color_conversion.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_color_hex_display.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_core/test_containment_geometry.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_core/test_elements.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_core/test_loading.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_core/test_spatial.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_core/test_text_extraction.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_core/test_text_layer.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_crop_enhancements.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_crop_region_highlights.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_directional_defaults.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_dissolve.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_dissolve_cross_page_bug.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_dissolve_debug_issue.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_dissolve_real_world_issue.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_dissolve_single_elements.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_dissolve_vertical_offset_issue.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_document_qa.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_element_addition.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_element_collection_show_cols.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_element_collection_slicing.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_element_show_crop_highlights.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_empty_pseudo_class.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_exclusions.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_expand.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_extraction_error.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_extraction_mixin_fix.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_extraction_text_and_vision.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_extraction_working.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_find_similar.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_first_last_selectors.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_fix_get_sections_zero_height.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_flow_region_directional.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_get_sections_fix_comprehensive.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_get_sections_zero_height.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_groupby.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_guides_apply_exclusions.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_guides_apply_exclusions_simple.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_guides_extract_table.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_guides_extract_table_collections.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_guides_extract_table_exclusions.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_guides_extract_table_real.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_guides_integration.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_highlight_detection.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_highlight_detection_comprehensive.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_highlight_protocol.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_highlight_protocol_simple.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_highlight_regions.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_comprehensive.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_debug.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_final.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_final_verification.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_fix.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_mock.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_simple.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_types_pdf.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_verification.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_include_boundaries_with_real_text.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_loading_original.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_merge_connected.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_merge_connected_real_world.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_merge_method.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_multi_page_table_discovery.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_optional_deps.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_page_exclusion_lists.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_pdf_add_exclusion_elementcollection.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_region_show_crop_highlights.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_region_viewer.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_sections_end_only.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_sections_with_start_and_end.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_show_column_layout.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_show_edge_cases.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_show_exclusions.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_show_exclusions_feature.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_show_limit.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_skip_repeating_headers_multipage.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_slice_cache_reuse.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_slice_exclusion_fix.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_slice_exclusion_issue.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_slice_exclusion_mock.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_sliced_collection_exclusions.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_strikethrough_detection.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_table_result_header_mismatch.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_table_result_keep_blank.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_tiny_text_tables.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_tiny_text_tables_table.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_tutorials.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_underline_detection.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tests/test_update_text.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/todo/bad_pdf_analysis.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/todo/evaluation.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/IMPROVEMENTS_SUMMARY.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/LLM_NaturalPDF_CheatSheet.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/LLM_NaturalPDF_Workflows.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/README.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/__init__.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/analyser.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/collate_summaries.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/compile_attempts_markdown.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/eval_suite.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/evaluate_quality.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/export_enrichment_csv.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/extraction_decision_tree.md +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/llm_enrich.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/llm_enrich_with_retry.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/reporter.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/tools/bad_pdf_eval/utils.py +0 -0
- {natural_pdf-0.2.9 → natural_pdf-0.2.11}/uv.lock +0 -0
@@ -128,19 +128,59 @@ class GuidesList(UserList):
|
|
128
128
|
"""A list of guide coordinates that also provides methods for creating guides."""
|
129
129
|
|
130
130
|
def __init__(self, parent_guides: "Guides", axis: Literal["vertical", "horizontal"], data=None):
|
131
|
-
|
131
|
+
# Always sort the initial data
|
132
|
+
super().__init__(sorted(data) if data else [])
|
132
133
|
self._parent = parent_guides
|
133
134
|
self._axis = axis
|
134
135
|
|
135
136
|
def __getitem__(self, i):
|
136
|
-
"""Override to handle slicing properly."""
|
137
|
+
"""Override to handle slicing and negative indexing properly."""
|
137
138
|
if isinstance(i, slice):
|
138
139
|
# Return a new GuidesList with the sliced data
|
139
140
|
return self.__class__(self._parent, self._axis, self.data[i])
|
140
141
|
else:
|
141
|
-
# For single index,
|
142
|
+
# For single index, handle negative indices properly
|
143
|
+
if i < 0:
|
144
|
+
# Convert negative index to positive
|
145
|
+
i = len(self.data) + i
|
142
146
|
return self.data[i]
|
143
147
|
|
148
|
+
def __setitem__(self, i, item):
|
149
|
+
"""Override to maintain sorted order."""
|
150
|
+
self.data[i] = item
|
151
|
+
self.data.sort()
|
152
|
+
|
153
|
+
def append(self, item):
|
154
|
+
"""Override to maintain sorted order."""
|
155
|
+
self.data.append(item)
|
156
|
+
self.data.sort()
|
157
|
+
|
158
|
+
def extend(self, other):
|
159
|
+
"""Override to maintain sorted order."""
|
160
|
+
self.data.extend(other)
|
161
|
+
self.data.sort()
|
162
|
+
|
163
|
+
def insert(self, i, item):
|
164
|
+
"""Override to maintain sorted order."""
|
165
|
+
self.data.append(item) # Just append and sort
|
166
|
+
self.data.sort()
|
167
|
+
|
168
|
+
def __iadd__(self, other):
|
169
|
+
"""Override to maintain sorted order."""
|
170
|
+
self.data.extend(other)
|
171
|
+
self.data.sort()
|
172
|
+
return self
|
173
|
+
|
174
|
+
@property
|
175
|
+
def data(self):
|
176
|
+
"""Get the data list."""
|
177
|
+
return self._data
|
178
|
+
|
179
|
+
@data.setter
|
180
|
+
def data(self, value):
|
181
|
+
"""Set the data list, always keeping it sorted."""
|
182
|
+
self._data = sorted(value) if value else []
|
183
|
+
|
144
184
|
def from_content(
|
145
185
|
self,
|
146
186
|
markers: Union[str, List[str], "ElementCollection", Callable, None],
|
@@ -1842,6 +1882,370 @@ class Guides:
|
|
1842
1882
|
self.horizontal.pop(index)
|
1843
1883
|
return self
|
1844
1884
|
|
1885
|
+
# -------------------------------------------------------------------------
|
1886
|
+
# Region extraction properties
|
1887
|
+
# -------------------------------------------------------------------------
|
1888
|
+
|
1889
|
+
@property
|
1890
|
+
def columns(self):
|
1891
|
+
"""Access columns by index like guides.columns[0]."""
|
1892
|
+
return _ColumnAccessor(self)
|
1893
|
+
|
1894
|
+
@property
|
1895
|
+
def rows(self):
|
1896
|
+
"""Access rows by index like guides.rows[0]."""
|
1897
|
+
return _RowAccessor(self)
|
1898
|
+
|
1899
|
+
@property
|
1900
|
+
def cells(self):
|
1901
|
+
"""Access cells by index like guides.cells[row][col] or guides.cells[row, col]."""
|
1902
|
+
return _CellAccessor(self)
|
1903
|
+
|
1904
|
+
# -------------------------------------------------------------------------
|
1905
|
+
# Region extraction methods (alternative API)
|
1906
|
+
# -------------------------------------------------------------------------
|
1907
|
+
|
1908
|
+
def column(self, index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
|
1909
|
+
"""
|
1910
|
+
Get a column region from the guides.
|
1911
|
+
|
1912
|
+
Args:
|
1913
|
+
index: Column index (0-based)
|
1914
|
+
obj: Page or Region to create the column on (uses self.context if None)
|
1915
|
+
|
1916
|
+
Returns:
|
1917
|
+
Region representing the specified column
|
1918
|
+
|
1919
|
+
Raises:
|
1920
|
+
IndexError: If column index is out of range
|
1921
|
+
"""
|
1922
|
+
target = obj or self.context
|
1923
|
+
if target is None:
|
1924
|
+
raise ValueError("No context available for region creation")
|
1925
|
+
|
1926
|
+
if not self.vertical or index < 0 or index >= len(self.vertical) - 1:
|
1927
|
+
raise IndexError(
|
1928
|
+
f"Column index {index} out of range (have {len(self.vertical)-1} columns)"
|
1929
|
+
)
|
1930
|
+
|
1931
|
+
# Get bounds from context
|
1932
|
+
bounds = self._get_context_bounds()
|
1933
|
+
if not bounds:
|
1934
|
+
raise ValueError("Could not determine bounds")
|
1935
|
+
_, y0, _, y1 = bounds
|
1936
|
+
|
1937
|
+
# Get column boundaries
|
1938
|
+
x0 = self.vertical[index]
|
1939
|
+
x1 = self.vertical[index + 1]
|
1940
|
+
|
1941
|
+
# Create region using absolute coordinates
|
1942
|
+
if hasattr(target, "region"):
|
1943
|
+
# Target has a region method (Page)
|
1944
|
+
return target.region(x0, y0, x1, y1)
|
1945
|
+
elif hasattr(target, "page"):
|
1946
|
+
# Target is a Region, use its parent page
|
1947
|
+
# The coordinates from guides are already absolute
|
1948
|
+
return target.page.region(x0, y0, x1, y1)
|
1949
|
+
else:
|
1950
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
1951
|
+
|
1952
|
+
def row(self, index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
|
1953
|
+
"""
|
1954
|
+
Get a row region from the guides.
|
1955
|
+
|
1956
|
+
Args:
|
1957
|
+
index: Row index (0-based)
|
1958
|
+
obj: Page or Region to create the row on (uses self.context if None)
|
1959
|
+
|
1960
|
+
Returns:
|
1961
|
+
Region representing the specified row
|
1962
|
+
|
1963
|
+
Raises:
|
1964
|
+
IndexError: If row index is out of range
|
1965
|
+
"""
|
1966
|
+
target = obj or self.context
|
1967
|
+
if target is None:
|
1968
|
+
raise ValueError("No context available for region creation")
|
1969
|
+
|
1970
|
+
if not self.horizontal or index < 0 or index >= len(self.horizontal) - 1:
|
1971
|
+
raise IndexError(f"Row index {index} out of range (have {len(self.horizontal)-1} rows)")
|
1972
|
+
|
1973
|
+
# Get bounds from context
|
1974
|
+
bounds = self._get_context_bounds()
|
1975
|
+
if not bounds:
|
1976
|
+
raise ValueError("Could not determine bounds")
|
1977
|
+
x0, _, x1, _ = bounds
|
1978
|
+
|
1979
|
+
# Get row boundaries
|
1980
|
+
y0 = self.horizontal[index]
|
1981
|
+
y1 = self.horizontal[index + 1]
|
1982
|
+
|
1983
|
+
# Create region using absolute coordinates
|
1984
|
+
if hasattr(target, "region"):
|
1985
|
+
# Target has a region method (Page)
|
1986
|
+
return target.region(x0, y0, x1, y1)
|
1987
|
+
elif hasattr(target, "page"):
|
1988
|
+
# Target is a Region, use its parent page
|
1989
|
+
# The coordinates from guides are already absolute
|
1990
|
+
return target.page.region(x0, y0, x1, y1)
|
1991
|
+
else:
|
1992
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
1993
|
+
|
1994
|
+
def cell(self, row: int, col: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
|
1995
|
+
"""
|
1996
|
+
Get a cell region from the guides.
|
1997
|
+
|
1998
|
+
Args:
|
1999
|
+
row: Row index (0-based)
|
2000
|
+
col: Column index (0-based)
|
2001
|
+
obj: Page or Region to create the cell on (uses self.context if None)
|
2002
|
+
|
2003
|
+
Returns:
|
2004
|
+
Region representing the specified cell
|
2005
|
+
|
2006
|
+
Raises:
|
2007
|
+
IndexError: If row or column index is out of range
|
2008
|
+
"""
|
2009
|
+
target = obj or self.context
|
2010
|
+
if target is None:
|
2011
|
+
raise ValueError("No context available for region creation")
|
2012
|
+
|
2013
|
+
if not self.vertical or col < 0 or col >= len(self.vertical) - 1:
|
2014
|
+
raise IndexError(
|
2015
|
+
f"Column index {col} out of range (have {len(self.vertical)-1} columns)"
|
2016
|
+
)
|
2017
|
+
if not self.horizontal or row < 0 or row >= len(self.horizontal) - 1:
|
2018
|
+
raise IndexError(f"Row index {row} out of range (have {len(self.horizontal)-1} rows)")
|
2019
|
+
|
2020
|
+
# Get cell boundaries
|
2021
|
+
x0 = self.vertical[col]
|
2022
|
+
x1 = self.vertical[col + 1]
|
2023
|
+
y0 = self.horizontal[row]
|
2024
|
+
y1 = self.horizontal[row + 1]
|
2025
|
+
|
2026
|
+
# Create region using absolute coordinates
|
2027
|
+
if hasattr(target, "region"):
|
2028
|
+
# Target has a region method (Page)
|
2029
|
+
return target.region(x0, y0, x1, y1)
|
2030
|
+
elif hasattr(target, "page"):
|
2031
|
+
# Target is a Region, use its parent page
|
2032
|
+
# The coordinates from guides are already absolute
|
2033
|
+
return target.page.region(x0, y0, x1, y1)
|
2034
|
+
else:
|
2035
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
2036
|
+
|
2037
|
+
def left_of(self, guide_index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
|
2038
|
+
"""
|
2039
|
+
Get a region to the left of a vertical guide.
|
2040
|
+
|
2041
|
+
Args:
|
2042
|
+
guide_index: Vertical guide index
|
2043
|
+
obj: Page or Region to create the region on (uses self.context if None)
|
2044
|
+
|
2045
|
+
Returns:
|
2046
|
+
Region to the left of the specified guide
|
2047
|
+
"""
|
2048
|
+
target = obj or self.context
|
2049
|
+
if target is None:
|
2050
|
+
raise ValueError("No context available for region creation")
|
2051
|
+
|
2052
|
+
if not self.vertical or guide_index < 0 or guide_index >= len(self.vertical):
|
2053
|
+
raise IndexError(f"Guide index {guide_index} out of range")
|
2054
|
+
|
2055
|
+
# Get bounds from context
|
2056
|
+
bounds = self._get_context_bounds()
|
2057
|
+
if not bounds:
|
2058
|
+
raise ValueError("Could not determine bounds")
|
2059
|
+
x0, y0, _, y1 = bounds
|
2060
|
+
|
2061
|
+
# Create region from left edge to guide
|
2062
|
+
x1 = self.vertical[guide_index]
|
2063
|
+
|
2064
|
+
if hasattr(target, "region"):
|
2065
|
+
return target.region(x0, y0, x1, y1)
|
2066
|
+
else:
|
2067
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
2068
|
+
|
2069
|
+
def right_of(self, guide_index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
|
2070
|
+
"""
|
2071
|
+
Get a region to the right of a vertical guide.
|
2072
|
+
|
2073
|
+
Args:
|
2074
|
+
guide_index: Vertical guide index
|
2075
|
+
obj: Page or Region to create the region on (uses self.context if None)
|
2076
|
+
|
2077
|
+
Returns:
|
2078
|
+
Region to the right of the specified guide
|
2079
|
+
"""
|
2080
|
+
target = obj or self.context
|
2081
|
+
if target is None:
|
2082
|
+
raise ValueError("No context available for region creation")
|
2083
|
+
|
2084
|
+
if not self.vertical or guide_index < 0 or guide_index >= len(self.vertical):
|
2085
|
+
raise IndexError(f"Guide index {guide_index} out of range")
|
2086
|
+
|
2087
|
+
# Get bounds from context
|
2088
|
+
bounds = self._get_context_bounds()
|
2089
|
+
if not bounds:
|
2090
|
+
raise ValueError("Could not determine bounds")
|
2091
|
+
_, y0, x1, y1 = bounds
|
2092
|
+
|
2093
|
+
# Create region from guide to right edge
|
2094
|
+
x0 = self.vertical[guide_index]
|
2095
|
+
|
2096
|
+
if hasattr(target, "region"):
|
2097
|
+
return target.region(x0, y0, x1, y1)
|
2098
|
+
else:
|
2099
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
2100
|
+
|
2101
|
+
def above(self, guide_index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
|
2102
|
+
"""
|
2103
|
+
Get a region above a horizontal guide.
|
2104
|
+
|
2105
|
+
Args:
|
2106
|
+
guide_index: Horizontal guide index
|
2107
|
+
obj: Page or Region to create the region on (uses self.context if None)
|
2108
|
+
|
2109
|
+
Returns:
|
2110
|
+
Region above the specified guide
|
2111
|
+
"""
|
2112
|
+
target = obj or self.context
|
2113
|
+
if target is None:
|
2114
|
+
raise ValueError("No context available for region creation")
|
2115
|
+
|
2116
|
+
if not self.horizontal or guide_index < 0 or guide_index >= len(self.horizontal):
|
2117
|
+
raise IndexError(f"Guide index {guide_index} out of range")
|
2118
|
+
|
2119
|
+
# Get bounds from context
|
2120
|
+
bounds = self._get_context_bounds()
|
2121
|
+
if not bounds:
|
2122
|
+
raise ValueError("Could not determine bounds")
|
2123
|
+
x0, y0, x1, _ = bounds
|
2124
|
+
|
2125
|
+
# Create region from top edge to guide
|
2126
|
+
y1 = self.horizontal[guide_index]
|
2127
|
+
|
2128
|
+
if hasattr(target, "region"):
|
2129
|
+
return target.region(x0, y0, x1, y1)
|
2130
|
+
else:
|
2131
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
2132
|
+
|
2133
|
+
def below(self, guide_index: int, obj: Optional[Union["Page", "Region"]] = None) -> "Region":
|
2134
|
+
"""
|
2135
|
+
Get a region below a horizontal guide.
|
2136
|
+
|
2137
|
+
Args:
|
2138
|
+
guide_index: Horizontal guide index
|
2139
|
+
obj: Page or Region to create the region on (uses self.context if None)
|
2140
|
+
|
2141
|
+
Returns:
|
2142
|
+
Region below the specified guide
|
2143
|
+
"""
|
2144
|
+
target = obj or self.context
|
2145
|
+
if target is None:
|
2146
|
+
raise ValueError("No context available for region creation")
|
2147
|
+
|
2148
|
+
if not self.horizontal or guide_index < 0 or guide_index >= len(self.horizontal):
|
2149
|
+
raise IndexError(f"Guide index {guide_index} out of range")
|
2150
|
+
|
2151
|
+
# Get bounds from context
|
2152
|
+
bounds = self._get_context_bounds()
|
2153
|
+
if not bounds:
|
2154
|
+
raise ValueError("Could not determine bounds")
|
2155
|
+
x0, _, x1, y1 = bounds
|
2156
|
+
|
2157
|
+
# Create region from guide to bottom edge
|
2158
|
+
y0 = self.horizontal[guide_index]
|
2159
|
+
|
2160
|
+
if hasattr(target, "region"):
|
2161
|
+
return target.region(x0, y0, x1, y1)
|
2162
|
+
else:
|
2163
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
2164
|
+
|
2165
|
+
def between_vertical(
|
2166
|
+
self, start_index: int, end_index: int, obj: Optional[Union["Page", "Region"]] = None
|
2167
|
+
) -> "Region":
|
2168
|
+
"""
|
2169
|
+
Get a region between two vertical guides.
|
2170
|
+
|
2171
|
+
Args:
|
2172
|
+
start_index: Starting vertical guide index
|
2173
|
+
end_index: Ending vertical guide index
|
2174
|
+
obj: Page or Region to create the region on (uses self.context if None)
|
2175
|
+
|
2176
|
+
Returns:
|
2177
|
+
Region between the specified guides
|
2178
|
+
"""
|
2179
|
+
target = obj or self.context
|
2180
|
+
if target is None:
|
2181
|
+
raise ValueError("No context available for region creation")
|
2182
|
+
|
2183
|
+
if not self.vertical:
|
2184
|
+
raise ValueError("No vertical guides available")
|
2185
|
+
if start_index < 0 or start_index >= len(self.vertical):
|
2186
|
+
raise IndexError(f"Start index {start_index} out of range")
|
2187
|
+
if end_index < 0 or end_index >= len(self.vertical):
|
2188
|
+
raise IndexError(f"End index {end_index} out of range")
|
2189
|
+
if start_index >= end_index:
|
2190
|
+
raise ValueError("Start index must be less than end index")
|
2191
|
+
|
2192
|
+
# Get bounds from context
|
2193
|
+
bounds = self._get_context_bounds()
|
2194
|
+
if not bounds:
|
2195
|
+
raise ValueError("Could not determine bounds")
|
2196
|
+
_, y0, _, y1 = bounds
|
2197
|
+
|
2198
|
+
# Get horizontal boundaries
|
2199
|
+
x0 = self.vertical[start_index]
|
2200
|
+
x1 = self.vertical[end_index]
|
2201
|
+
|
2202
|
+
if hasattr(target, "region"):
|
2203
|
+
return target.region(x0, y0, x1, y1)
|
2204
|
+
else:
|
2205
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
2206
|
+
|
2207
|
+
def between_horizontal(
|
2208
|
+
self, start_index: int, end_index: int, obj: Optional[Union["Page", "Region"]] = None
|
2209
|
+
) -> "Region":
|
2210
|
+
"""
|
2211
|
+
Get a region between two horizontal guides.
|
2212
|
+
|
2213
|
+
Args:
|
2214
|
+
start_index: Starting horizontal guide index
|
2215
|
+
end_index: Ending horizontal guide index
|
2216
|
+
obj: Page or Region to create the region on (uses self.context if None)
|
2217
|
+
|
2218
|
+
Returns:
|
2219
|
+
Region between the specified guides
|
2220
|
+
"""
|
2221
|
+
target = obj or self.context
|
2222
|
+
if target is None:
|
2223
|
+
raise ValueError("No context available for region creation")
|
2224
|
+
|
2225
|
+
if not self.horizontal:
|
2226
|
+
raise ValueError("No horizontal guides available")
|
2227
|
+
if start_index < 0 or start_index >= len(self.horizontal):
|
2228
|
+
raise IndexError(f"Start index {start_index} out of range")
|
2229
|
+
if end_index < 0 or end_index >= len(self.horizontal):
|
2230
|
+
raise IndexError(f"End index {end_index} out of range")
|
2231
|
+
if start_index >= end_index:
|
2232
|
+
raise ValueError("Start index must be less than end index")
|
2233
|
+
|
2234
|
+
# Get bounds from context
|
2235
|
+
bounds = self._get_context_bounds()
|
2236
|
+
if not bounds:
|
2237
|
+
raise ValueError("Could not determine bounds")
|
2238
|
+
x0, _, x1, _ = bounds
|
2239
|
+
|
2240
|
+
# Get vertical boundaries
|
2241
|
+
y0 = self.horizontal[start_index]
|
2242
|
+
y1 = self.horizontal[end_index]
|
2243
|
+
|
2244
|
+
if hasattr(target, "region"):
|
2245
|
+
return target.region(x0, y0, x1, y1)
|
2246
|
+
else:
|
2247
|
+
raise TypeError(f"Cannot create region on {type(target)}")
|
2248
|
+
|
1845
2249
|
# -------------------------------------------------------------------------
|
1846
2250
|
# Operations
|
1847
2251
|
# -------------------------------------------------------------------------
|
@@ -3825,3 +4229,187 @@ class Guides:
|
|
3825
4229
|
return "vertical"
|
3826
4230
|
else:
|
3827
4231
|
return "horizontal"
|
4232
|
+
|
4233
|
+
|
4234
|
+
# -------------------------------------------------------------------------
|
4235
|
+
# Accessor classes for property-based access
|
4236
|
+
# -------------------------------------------------------------------------
|
4237
|
+
|
4238
|
+
|
4239
|
+
class _ColumnAccessor:
|
4240
|
+
"""Provides indexed access to columns via guides.columns[index]."""
|
4241
|
+
|
4242
|
+
def __init__(self, guides: "Guides"):
|
4243
|
+
self._guides = guides
|
4244
|
+
|
4245
|
+
def __len__(self):
|
4246
|
+
"""Return number of columns (vertical guides - 1)."""
|
4247
|
+
return max(0, len(self._guides.vertical) - 1)
|
4248
|
+
|
4249
|
+
def __getitem__(self, index: Union[int, slice]) -> Union["Region", "ElementCollection"]:
|
4250
|
+
"""Get column at the specified index or slice."""
|
4251
|
+
from natural_pdf.elements.element_collection import ElementCollection
|
4252
|
+
|
4253
|
+
if isinstance(index, slice):
|
4254
|
+
# Handle slice notation - return multiple columns
|
4255
|
+
columns = []
|
4256
|
+
num_cols = len(self)
|
4257
|
+
|
4258
|
+
# Convert slice to range of indices
|
4259
|
+
start, stop, step = index.indices(num_cols)
|
4260
|
+
for i in range(start, stop, step):
|
4261
|
+
columns.append(self._guides.column(i))
|
4262
|
+
|
4263
|
+
return ElementCollection(columns)
|
4264
|
+
else:
|
4265
|
+
# Handle negative indexing
|
4266
|
+
if index < 0:
|
4267
|
+
index = len(self) + index
|
4268
|
+
return self._guides.column(index)
|
4269
|
+
|
4270
|
+
|
4271
|
+
class _RowAccessor:
|
4272
|
+
"""Provides indexed access to rows via guides.rows[index]."""
|
4273
|
+
|
4274
|
+
def __init__(self, guides: "Guides"):
|
4275
|
+
self._guides = guides
|
4276
|
+
|
4277
|
+
def __len__(self):
|
4278
|
+
"""Return number of rows (horizontal guides - 1)."""
|
4279
|
+
return max(0, len(self._guides.horizontal) - 1)
|
4280
|
+
|
4281
|
+
def __getitem__(self, index: Union[int, slice]) -> Union["Region", "ElementCollection"]:
|
4282
|
+
"""Get row at the specified index or slice."""
|
4283
|
+
from natural_pdf.elements.element_collection import ElementCollection
|
4284
|
+
|
4285
|
+
if isinstance(index, slice):
|
4286
|
+
# Handle slice notation - return multiple rows
|
4287
|
+
rows = []
|
4288
|
+
num_rows = len(self)
|
4289
|
+
|
4290
|
+
# Convert slice to range of indices
|
4291
|
+
start, stop, step = index.indices(num_rows)
|
4292
|
+
for i in range(start, stop, step):
|
4293
|
+
rows.append(self._guides.row(i))
|
4294
|
+
|
4295
|
+
return ElementCollection(rows)
|
4296
|
+
else:
|
4297
|
+
# Handle negative indexing
|
4298
|
+
if index < 0:
|
4299
|
+
index = len(self) + index
|
4300
|
+
return self._guides.row(index)
|
4301
|
+
|
4302
|
+
|
4303
|
+
class _CellAccessor:
|
4304
|
+
"""Provides indexed access to cells via guides.cells[row][col] or guides.cells[row, col]."""
|
4305
|
+
|
4306
|
+
def __init__(self, guides: "Guides"):
|
4307
|
+
self._guides = guides
|
4308
|
+
|
4309
|
+
def __getitem__(self, key) -> Union["Region", "_CellRowAccessor", "ElementCollection"]:
|
4310
|
+
"""
|
4311
|
+
Get cell(s) at the specified position.
|
4312
|
+
|
4313
|
+
Supports:
|
4314
|
+
- guides.cells[row, col] - single cell
|
4315
|
+
- guides.cells[row][col] - single cell (nested)
|
4316
|
+
- guides.cells[row, :] - all cells in a row
|
4317
|
+
- guides.cells[:, col] - all cells in a column
|
4318
|
+
- guides.cells[:, :] - all cells
|
4319
|
+
- guides.cells[row][:] - all cells in a row (nested)
|
4320
|
+
"""
|
4321
|
+
from natural_pdf.elements.element_collection import ElementCollection
|
4322
|
+
|
4323
|
+
if isinstance(key, tuple) and len(key) == 2:
|
4324
|
+
row, col = key
|
4325
|
+
|
4326
|
+
# Handle slices for row and/or column
|
4327
|
+
if isinstance(row, slice) or isinstance(col, slice):
|
4328
|
+
cells = []
|
4329
|
+
num_rows = len(self._guides.rows)
|
4330
|
+
num_cols = len(self._guides.columns)
|
4331
|
+
|
4332
|
+
# Convert slices to ranges
|
4333
|
+
if isinstance(row, slice):
|
4334
|
+
row_indices = range(*row.indices(num_rows))
|
4335
|
+
else:
|
4336
|
+
# Single row index
|
4337
|
+
if row < 0:
|
4338
|
+
row = num_rows + row
|
4339
|
+
row_indices = [row]
|
4340
|
+
|
4341
|
+
if isinstance(col, slice):
|
4342
|
+
col_indices = range(*col.indices(num_cols))
|
4343
|
+
else:
|
4344
|
+
# Single column index
|
4345
|
+
if col < 0:
|
4346
|
+
col = num_cols + col
|
4347
|
+
col_indices = [col]
|
4348
|
+
|
4349
|
+
# Collect all cells in the specified ranges
|
4350
|
+
for r in row_indices:
|
4351
|
+
for c in col_indices:
|
4352
|
+
cells.append(self._guides.cell(r, c))
|
4353
|
+
|
4354
|
+
return ElementCollection(cells)
|
4355
|
+
else:
|
4356
|
+
# Both are integers - single cell access
|
4357
|
+
# Handle negative indexing for both row and col
|
4358
|
+
if row < 0:
|
4359
|
+
row = len(self._guides.rows) + row
|
4360
|
+
if col < 0:
|
4361
|
+
col = len(self._guides.columns) + col
|
4362
|
+
return self._guides.cell(row, col)
|
4363
|
+
elif isinstance(key, slice):
|
4364
|
+
# First level slice: guides.cells[:] - return all rows as accessors
|
4365
|
+
# For now, let's return all cells flattened
|
4366
|
+
cells = []
|
4367
|
+
num_rows = len(self._guides.rows)
|
4368
|
+
row_indices = range(*key.indices(num_rows))
|
4369
|
+
|
4370
|
+
for r in row_indices:
|
4371
|
+
for c in range(len(self._guides.columns)):
|
4372
|
+
cells.append(self._guides.cell(r, c))
|
4373
|
+
|
4374
|
+
return ElementCollection(cells)
|
4375
|
+
elif isinstance(key, int):
|
4376
|
+
# First level of nested access: guides.cells[row]
|
4377
|
+
# Handle negative indexing for row
|
4378
|
+
if key < 0:
|
4379
|
+
key = len(self._guides.rows) + key
|
4380
|
+
# Return a row accessor that allows [col] or [:] indexing
|
4381
|
+
return _CellRowAccessor(self._guides, key)
|
4382
|
+
else:
|
4383
|
+
raise TypeError(
|
4384
|
+
f"Cell indices must be integers, slices, or tuple of two integers/slices, got {type(key)}"
|
4385
|
+
)
|
4386
|
+
|
4387
|
+
|
4388
|
+
class _CellRowAccessor:
|
4389
|
+
"""Provides column access for a specific row in nested cell indexing."""
|
4390
|
+
|
4391
|
+
def __init__(self, guides: "Guides", row: int):
|
4392
|
+
self._guides = guides
|
4393
|
+
self._row = row
|
4394
|
+
|
4395
|
+
def __getitem__(self, col: Union[int, slice]) -> Union["Region", "ElementCollection"]:
|
4396
|
+
"""Get cell at [row][col] or all cells in row with [row][:]."""
|
4397
|
+
from natural_pdf.elements.element_collection import ElementCollection
|
4398
|
+
|
4399
|
+
if isinstance(col, slice):
|
4400
|
+
# Handle slice notation - return all cells in this row
|
4401
|
+
cells = []
|
4402
|
+
num_cols = len(self._guides.columns)
|
4403
|
+
|
4404
|
+
# Convert slice to range of indices
|
4405
|
+
start, stop, step = col.indices(num_cols)
|
4406
|
+
for c in range(start, stop, step):
|
4407
|
+
cells.append(self._guides.cell(self._row, c))
|
4408
|
+
|
4409
|
+
return ElementCollection(cells)
|
4410
|
+
else:
|
4411
|
+
# Handle single column index
|
4412
|
+
# Handle negative indexing for column
|
4413
|
+
if col < 0:
|
4414
|
+
col = len(self._guides.columns) + col
|
4415
|
+
return self._guides.cell(self._row, col)
|
@@ -16,7 +16,7 @@ INSTALL_RECIPES: Dict[str, list[str]] = {
|
|
16
16
|
"paddle": ["paddlepaddle>=3.0.0", "paddleocr>=3.0.1", "paddlex>=3.0.2", "pandas>=2.2.0"],
|
17
17
|
"numpy-high": ["numpy>=2.0"],
|
18
18
|
"numpy-low": ["numpy<1.27"],
|
19
|
-
"surya": ["surya-ocr
|
19
|
+
"surya": ["surya-ocr<0.15"],
|
20
20
|
"yolo": ["doclayout_yolo", "huggingface_hub>=0.29.3"],
|
21
21
|
"docling": ["docling"],
|
22
22
|
# light helpers
|