natural-pdf 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/.gitignore +5 -1
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/PKG-INFO +14 -1
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/README.md +4 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/regions/index.ipynb +124 -158
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/regions/index.md +2 -3
- natural_pdf-0.1.3/docs/tutorials/01-loading-and-extraction.ipynb +1658 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/02-finding-elements.ipynb +43 -47
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/03-extracting-blocks.ipynb +18 -22
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/04-table-extraction.ipynb +13 -17
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/05-excluding-content.ipynb +66 -39
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/06-document-qa.ipynb +29 -33
- natural_pdf-0.1.3/docs/tutorials/07-layout-analysis.ipynb +260 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/07-working-with-regions.ipynb +49 -53
- natural_pdf-0.1.3/docs/tutorials/08-spatial-navigation.ipynb +508 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/09-section-extraction.ipynb +98 -102
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/10-form-field-extraction.ipynb +51 -55
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/11-enhanced-table-processing.ipynb +7 -11
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/12-ocr-integration.ipynb +173 -65
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/12-ocr-integration.md +32 -0
- natural_pdf-0.1.3/docs/tutorials/13-semantic-search.ipynb +1908 -0
- natural_pdf-0.1.3/docs/tutorials/13-semantic-search.md +77 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/mkdocs.yml +2 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/__init__.py +33 -1
- natural_pdf-0.1.3/natural_pdf/collections/pdf_collection.py +259 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/page.py +97 -69
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/pdf.py +382 -171
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/region.py +3 -1
- natural_pdf-0.1.3/natural_pdf/exporters/__init__.py +1 -0
- natural_pdf-0.1.3/natural_pdf/exporters/searchable_pdf.py +252 -0
- natural_pdf-0.1.3/natural_pdf/search/__init__.py +94 -0
- natural_pdf-0.1.3/natural_pdf/search/haystack_search_service.py +520 -0
- natural_pdf-0.1.3/natural_pdf/search/haystack_utils.py +386 -0
- natural_pdf-0.1.3/natural_pdf/search/search_options.py +72 -0
- natural_pdf-0.1.3/natural_pdf/search/search_service_protocol.py +189 -0
- natural_pdf-0.1.3/natural_pdf/search/searchable_mixin.py +464 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/PKG-INFO +14 -1
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/SOURCES.txt +11 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/requires.txt +10 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pyproject.toml +12 -2
- natural_pdf-0.1.2/docs/tutorials/01-loading-and-extraction.ipynb +0 -1137
- natural_pdf-0.1.2/docs/tutorials/07-layout-analysis.ipynb +0 -264
- natural_pdf-0.1.2/docs/tutorials/08-spatial-navigation.ipynb +0 -512
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/.github/workflows/docs.yml +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/CLAUDE.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/LICENSE +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/MANIFEST.in +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/check_run_md.sh +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/api/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/favicon.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/favicon.svg +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/javascripts/custom.js +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/logo.svg +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/sample-screen.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/social-preview.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/social-preview.svg +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/assets/stylesheets/custom.css +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/document-qa/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/document-qa/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/element-selection/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/element-selection/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/installation/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/interactive-widget/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/interactive-widget/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/layout-analysis/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/layout-analysis/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/ocr/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/pdf-navigation/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/pdf-navigation/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tables/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tables/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/text-analysis/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/text-analysis/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/text-extraction/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/text-extraction/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/01-loading-and-extraction.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/02-finding-elements.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/03-extracting-blocks.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/04-table-extraction.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/05-excluding-content.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/06-document-qa.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/07-layout-analysis.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/07-working-with-regions.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/08-spatial-navigation.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/09-section-extraction.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/10-form-field-extraction.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/tutorials/11-enhanced-table-processing.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/visual-debugging/index.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/visual-debugging/index.md +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/docs/visual-debugging/region.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/execute_notebooks.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/base.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/docling.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/layout_options.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/paddle.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/surya.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/tatr.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/layout/yolo.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/text_options.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/text_structure.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/analyzers/utils.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/element_manager.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/core/highlighting_service.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/base.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/collections.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/line.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/rect.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/elements/text.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/engine.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/engine_easyocr.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/engine_paddle.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/engine_surya.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/ocr_manager.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/ocr/ocr_options.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/qa/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/qa/document_qa.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/selectors/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/selectors/parser.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/templates/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/templates/ocr_debug.html +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/utils/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/utils/highlighting.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/utils/reading_order.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/utils/visualization.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/widgets/__init__.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/widgets/frontend/viewer.js +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf/widgets/viewer.py +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/dependency_links.txt +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/natural_pdf.egg-info/top_level.txt +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/notebooks/Examples.ipynb +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/all_detected_regions.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/all_elements.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/basic_highlighting.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/chainable_layout.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/chained_analysis.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/color_names.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/color_names_with_boxes.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/conf_display_highlight_all.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/conf_display_highlight_layout.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/conf_display_layout_only.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/confidence_color_coded.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/debug_page_image.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/detected_table.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/dimension_analysis.txt +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/direct_ocr_debug.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/easyocr_debug_input.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/easyocr_results.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/easyocr_test_input.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/exclusion_optimization_regions.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/explicit_confidence_display.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/footer_overlap_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all_styles.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all_with_all_layouts.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all_with_attrs.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_all_with_yolo.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_by_confidence.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_color_test_1.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_color_test_2.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_color_test_3.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_color_test_4.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_layout_method.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_multiple.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_no_attrs.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_region.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_single.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_specific_types.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_specific_types_with_boxes.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_specific_types_with_tables.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_colors.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual_annotated.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual_with_structure.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual_with_structure_yolo.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_test_individual_with_tables.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/highlight_with_attrs.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_conf_default.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_conf_high.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_detection.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_fix_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_fix_test2.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_fix_test3.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/layout_fix_test4.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/model_comparison.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/multiple_attributes_display.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_confidence_visualization.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_debug.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_debug_page.html +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_highlight_all_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_highlight_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_highlighted.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_simplified.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_threshold_comparison.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_visualization_clean.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_visualization_highlights.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/ocr_visualization_text.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_detection.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_polygons.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_sources.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_with_text.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddle_layout_without_text.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddleocr_highlights.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddleocr_results.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/paddleocr_test_input.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/page_1_for_ocr.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/page_4_for_ocr.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_exclusion_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_management_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_ocr_cropped.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_ocr_debug.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_ocr_full_page.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/region_ocr_highlighted.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/spatial_navigation.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/standard_highlight_all.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/table_no_ocr.csv +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/table_structure.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/table_structure_detail.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/table_with_ocr.csv +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/tatr_cells_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/tatr_ocr_table_test.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/tatr_regions.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/tatr_regions.txt +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/text_styles.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/titles_only.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/width_1200px.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/width_800px.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/width_default.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/width_with_scale.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/yolo_regions.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/output/yolo_regions.txt +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/.gitkeep +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/01-practice.pdf +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/0500000US42001.pdf +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/0500000US42007.pdf +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/2014 Statistics.pdf +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/2019 Statistics.pdf +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/pdfs/needs-ocr.pdf +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/publish.sh +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/run_all_tutorials.sh +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/sample-screen.png +0 -0
- {natural_pdf-0.1.2 → natural_pdf-0.1.3}/setup.cfg +0 -0
@@ -5,7 +5,11 @@ docs/tutorials/pdfs
|
|
5
5
|
install.sh
|
6
6
|
notebooks/Examples.md
|
7
7
|
transcript.md
|
8
|
-
|
8
|
+
natural_pdf_index
|
9
|
+
results
|
10
|
+
docs/tutorials/needs-ocr-searchable.pdf
|
11
|
+
sample.py
|
12
|
+
sample2.py
|
9
13
|
|
10
14
|
# Created by https://www.toptal.com/developers/gitignore/api/python,macos,visualstudiocode,jupyternotebooks
|
11
15
|
# Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,visualstudiocode,jupyternotebooks
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.3
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -20,8 +20,14 @@ Requires-Dist: torch>=2.0.0
|
|
20
20
|
Requires-Dist: torchvision>=0.15.0
|
21
21
|
Requires-Dist: transformers>=4.30.0
|
22
22
|
Requires-Dist: huggingface_hub>=0.19.0
|
23
|
+
Requires-Dist: ocrmypdf>=16.0.0
|
24
|
+
Requires-Dist: pikepdf>=10.0.0
|
23
25
|
Provides-Extra: interactive
|
24
26
|
Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
|
27
|
+
Provides-Extra: haystack
|
28
|
+
Requires-Dist: haystack-ai>=2.0.0b5; extra == "haystack"
|
29
|
+
Requires-Dist: chroma-haystack; extra == "haystack"
|
30
|
+
Requires-Dist: sentence-transformers; extra == "haystack"
|
25
31
|
Provides-Extra: easyocr
|
26
32
|
Requires-Dist: easyocr; extra == "easyocr"
|
27
33
|
Provides-Extra: paddle
|
@@ -39,6 +45,9 @@ Requires-Dist: paddlepaddle; extra == "all"
|
|
39
45
|
Requires-Dist: paddleocr; extra == "all"
|
40
46
|
Requires-Dist: doclayout_yolo; extra == "all"
|
41
47
|
Requires-Dist: surya-ocr; extra == "all"
|
48
|
+
Requires-Dist: haystack-ai>=2.0.0b5; extra == "all"
|
49
|
+
Requires-Dist: chroma-haystack; extra == "all"
|
50
|
+
Requires-Dist: sentence-transformers; extra == "all"
|
42
51
|
Dynamic: license-file
|
43
52
|
|
44
53
|
# Natural PDF
|
@@ -69,6 +78,9 @@ pip install natural-pdf[paddle]
|
|
69
78
|
# Example: Install with interactive viewer support
|
70
79
|
pip install natural-pdf[interactive]
|
71
80
|
|
81
|
+
# Example: Install with semantic search support (Haystack)
|
82
|
+
pip install natural-pdf[haystack]
|
83
|
+
|
72
84
|
# Install everything
|
73
85
|
pip install natural-pdf[all]
|
74
86
|
```
|
@@ -117,6 +129,7 @@ Natural PDF offers a range of features for working with PDFs:
|
|
117
129
|
* **OCR Integration:** Extract text from scanned documents using engines like EasyOCR, PaddleOCR, or Surya.
|
118
130
|
* **Layout Analysis:** Detect document structures (titles, paragraphs, tables) using AI models.
|
119
131
|
* **Document QA:** Ask natural language questions about your document's content.
|
132
|
+
* **Semantic Search:** Index PDFs and find relevant pages or documents based on semantic meaning using Haystack.
|
120
133
|
* **Visual Debugging:** Highlight elements and use an interactive viewer or save images to understand your selections.
|
121
134
|
|
122
135
|
## Learn More
|
@@ -26,6 +26,9 @@ pip install natural-pdf[paddle]
|
|
26
26
|
# Example: Install with interactive viewer support
|
27
27
|
pip install natural-pdf[interactive]
|
28
28
|
|
29
|
+
# Example: Install with semantic search support (Haystack)
|
30
|
+
pip install natural-pdf[haystack]
|
31
|
+
|
29
32
|
# Install everything
|
30
33
|
pip install natural-pdf[all]
|
31
34
|
```
|
@@ -74,6 +77,7 @@ Natural PDF offers a range of features for working with PDFs:
|
|
74
77
|
* **OCR Integration:** Extract text from scanned documents using engines like EasyOCR, PaddleOCR, or Surya.
|
75
78
|
* **Layout Analysis:** Detect document structures (titles, paragraphs, tables) using AI models.
|
76
79
|
* **Document QA:** Ask natural language questions about your document's content.
|
80
|
+
* **Semantic Search:** Index PDFs and find relevant pages or documents based on semantic meaning using Haystack.
|
77
81
|
* **Visual Debugging:** Highlight elements and use an interactive viewer or save images to understand your selections.
|
78
82
|
|
79
83
|
## Learn More
|