natural-pdf 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/.github/workflows/docs.yml +3 -1
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/.gitignore +9 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/PKG-INFO +10 -10
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/README.md +9 -9
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/check_run_md.sh +2 -1
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/assets/stylesheets/custom.css +27 -0
- natural_pdf-0.1.1/docs/document-qa/index.ipynb +435 -0
- natural_pdf-0.1.1/docs/document-qa/index.md +79 -0
- natural_pdf-0.1.1/docs/element-selection/index.ipynb +915 -0
- natural_pdf-0.1.1/docs/element-selection/index.md +229 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/index.md +17 -28
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/installation/index.md +3 -3
- natural_pdf-0.1.1/docs/interactive-widget/index.ipynb +962 -0
- natural_pdf-0.1.1/docs/interactive-widget/index.md +12 -0
- natural_pdf-0.1.1/docs/layout-analysis/index.ipynb +818 -0
- natural_pdf-0.1.1/docs/layout-analysis/index.md +185 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/ocr/index.md +3 -13
- natural_pdf-0.1.1/docs/pdf-navigation/index.ipynb +314 -0
- natural_pdf-0.1.1/docs/pdf-navigation/index.md +97 -0
- natural_pdf-0.1.1/docs/regions/index.ipynb +850 -0
- natural_pdf-0.1.1/docs/regions/index.md +300 -0
- natural_pdf-0.1.1/docs/tables/index.ipynb +658 -0
- natural_pdf-0.1.1/docs/tables/index.md +144 -0
- natural_pdf-0.1.1/docs/text-analysis/index.ipynb +370 -0
- natural_pdf-0.1.1/docs/text-analysis/index.md +105 -0
- natural_pdf-0.1.1/docs/text-extraction/index.ipynb +1478 -0
- natural_pdf-0.1.1/docs/text-extraction/index.md +292 -0
- natural_pdf-0.1.1/docs/tutorials/01-loading-and-extraction.ipynb +1137 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/01-loading-and-extraction.md +15 -7
- natural_pdf-0.1.1/docs/tutorials/02-finding-elements.ipynb +344 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/02-finding-elements.md +18 -9
- natural_pdf-0.1.1/docs/tutorials/03-extracting-blocks.ipynb +151 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/03-extracting-blocks.md +10 -3
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/04-table-extraction.ipynb +36 -12
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/04-table-extraction.md +11 -4
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/05-excluding-content.ipynb +53 -28
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/05-excluding-content.md +11 -3
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/06-document-qa.ipynb +63 -39
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/06-document-qa.md +12 -5
- natural_pdf-0.1.1/docs/tutorials/07-layout-analysis.ipynb +264 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/07-layout-analysis.md +10 -3
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/07-working-with-regions.ipynb +87 -66
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/07-working-with-regions.md +6 -2
- natural_pdf-0.1.1/docs/tutorials/08-spatial-navigation.ipynb +512 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/08-spatial-navigation.md +6 -2
- natural_pdf-0.1.1/docs/tutorials/09-section-extraction.ipynb +2432 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/09-section-extraction.md +7 -3
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/10-form-field-extraction.ipynb +93 -72
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/10-form-field-extraction.md +6 -2
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/11-enhanced-table-processing.ipynb +24 -3
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/11-enhanced-table-processing.md +6 -2
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/12-ocr-integration.ipynb +100 -79
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/tutorials/12-ocr-integration.md +6 -2
- natural_pdf-0.1.1/docs/visual-debugging/index.ipynb +2970 -0
- natural_pdf-0.1.1/docs/visual-debugging/index.md +157 -0
- natural_pdf-0.1.1/docs/visual-debugging/region.png +0 -0
- natural_pdf-0.1.1/execute_notebooks.py +413 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/mkdocs.yml +56 -19
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/__init__.py +1 -1
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/core/highlighting_service.py +48 -17
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/core/page.py +92 -27
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/core/pdf.py +11 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/elements/base.py +99 -14
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/elements/collections.py +56 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/elements/region.py +4 -106
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/qa/document_qa.py +4 -3
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/selectors/parser.py +215 -1
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/utils/visualization.py +2 -2
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf.egg-info/PKG-INFO +10 -10
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf.egg-info/SOURCES.txt +12 -102
- natural_pdf-0.1.0/docs/document-qa/index.md +0 -375
- natural_pdf-0.1.0/docs/element-selection/index.md +0 -270
- natural_pdf-0.1.0/docs/explanations/index.md +0 -28
- natural_pdf-0.1.0/docs/explanations/ocr-challenges.md +0 -221
- natural_pdf-0.1.0/docs/explanations/pdf-extraction-challenges.md +0 -203
- natural_pdf-0.1.0/docs/explanations/pdf-fonts.md +0 -214
- natural_pdf-0.1.0/docs/interactive-widget/index.md +0 -0
- natural_pdf-0.1.0/docs/layout-analysis/index.md +0 -301
- natural_pdf-0.1.0/docs/pdf-navigation/index.md +0 -255
- natural_pdf-0.1.0/docs/regions/index.md +0 -302
- natural_pdf-0.1.0/docs/tables/index.md +0 -359
- natural_pdf-0.1.0/docs/text-analysis/index.md +0 -103
- natural_pdf-0.1.0/docs/text-extraction/index.md +0 -426
- natural_pdf-0.1.0/docs/tutorials/01-loading-and-extraction.ipynb +0 -291
- natural_pdf-0.1.0/docs/tutorials/02-finding-elements.ipynb +0 -318
- natural_pdf-0.1.0/docs/tutorials/03-extracting-blocks.ipynb +0 -127
- natural_pdf-0.1.0/docs/tutorials/07-layout-analysis.ipynb +0 -240
- natural_pdf-0.1.0/docs/tutorials/08-spatial-navigation.ipynb +0 -491
- natural_pdf-0.1.0/docs/tutorials/09-section-extraction.ipynb +0 -2418
- natural_pdf-0.1.0/docs/tutorials/README.ipynb +0 -83
- natural_pdf-0.1.0/docs/tutorials/README.md +0 -51
- natural_pdf-0.1.0/docs/visual-debugging/index.md +0 -223
- natural_pdf-0.1.0/examples/__init__.py +0 -3
- natural_pdf-0.1.0/examples/another_exclusion_example.py +0 -20
- natural_pdf-0.1.0/examples/basic_usage.py +0 -190
- natural_pdf-0.1.0/examples/boundary_exclusion_test.py +0 -137
- natural_pdf-0.1.0/examples/boundary_inclusion_fix_test.py +0 -157
- natural_pdf-0.1.0/examples/chainable_layout_example.py +0 -70
- natural_pdf-0.1.0/examples/color_basic_test.py +0 -49
- natural_pdf-0.1.0/examples/color_name_example.py +0 -71
- natural_pdf-0.1.0/examples/color_test.py +0 -62
- natural_pdf-0.1.0/examples/debug_ocr.py +0 -91
- natural_pdf-0.1.0/examples/direct_ocr_test.py +0 -148
- natural_pdf-0.1.0/examples/direct_paddle_test.py +0 -99
- natural_pdf-0.1.0/examples/direct_qa_example.py +0 -71
- natural_pdf-0.1.0/examples/docling_comprehensive_test.py +0 -325
- natural_pdf-0.1.0/examples/docling_example.py +0 -192
- natural_pdf-0.1.0/examples/docling_hierarchy_example.py +0 -230
- natural_pdf-0.1.0/examples/docling_text_sources.py +0 -241
- natural_pdf-0.1.0/examples/document_layout_analysis.py +0 -123
- natural_pdf-0.1.0/examples/document_qa_example.py +0 -185
- natural_pdf-0.1.0/examples/exclusion_count_debug.py +0 -128
- natural_pdf-0.1.0/examples/exclusion_debug.py +0 -107
- natural_pdf-0.1.0/examples/exclusion_example.py +0 -150
- natural_pdf-0.1.0/examples/exclusion_optimization_example.py +0 -190
- natural_pdf-0.1.0/examples/extract_text_test.py +0 -128
- natural_pdf-0.1.0/examples/font_aware_example.py +0 -101
- natural_pdf-0.1.0/examples/font_variant_example.py +0 -124
- natural_pdf-0.1.0/examples/footer_overlap_test.py +0 -124
- natural_pdf-0.1.0/examples/highlight_all_example.py +0 -82
- natural_pdf-0.1.0/examples/highlight_attributes_test.py +0 -114
- natural_pdf-0.1.0/examples/highlight_confidence_display.py +0 -122
- natural_pdf-0.1.0/examples/highlight_demo.py +0 -110
- natural_pdf-0.1.0/examples/highlight_float_test.py +0 -71
- natural_pdf-0.1.0/examples/highlight_test.py +0 -147
- natural_pdf-0.1.0/examples/highlighting_example.py +0 -123
- natural_pdf-0.1.0/examples/image_width_example.py +0 -84
- natural_pdf-0.1.0/examples/improved_api_example.py +0 -128
- natural_pdf-0.1.0/examples/improved_qa_example.py +0 -66
- natural_pdf-0.1.0/examples/layout_confidence_display_test.py +0 -65
- natural_pdf-0.1.0/examples/layout_confidence_test.py +0 -82
- natural_pdf-0.1.0/examples/layout_coordinate_debug.py +0 -258
- natural_pdf-0.1.0/examples/layout_highlight_test.py +0 -77
- natural_pdf-0.1.0/examples/logging_example.py +0 -70
- natural_pdf-0.1.0/examples/ocr_comprehensive.py +0 -193
- natural_pdf-0.1.0/examples/ocr_debug_example.py +0 -87
- natural_pdf-0.1.0/examples/ocr_default_test.py +0 -97
- natural_pdf-0.1.0/examples/ocr_engine_comparison.py +0 -235
- natural_pdf-0.1.0/examples/ocr_example.py +0 -89
- natural_pdf-0.1.0/examples/ocr_simplified_params.py +0 -79
- natural_pdf-0.1.0/examples/ocr_visualization.py +0 -102
- natural_pdf-0.1.0/examples/ocr_visualization_test.py +0 -121
- natural_pdf-0.1.0/examples/paddle_layout_example.py +0 -315
- natural_pdf-0.1.0/examples/paddle_layout_simple.py +0 -74
- natural_pdf-0.1.0/examples/paddleocr_example.py +0 -224
- natural_pdf-0.1.0/examples/page_collection_example.py +0 -103
- natural_pdf-0.1.0/examples/polygon_highlight_example.py +0 -83
- natural_pdf-0.1.0/examples/position_methods_example.py +0 -134
- natural_pdf-0.1.0/examples/position_output/position_methods.png +0 -0
- natural_pdf-0.1.0/examples/region_boundary_test.py +0 -73
- natural_pdf-0.1.0/examples/region_exclusion_test.py +0 -149
- natural_pdf-0.1.0/examples/region_expand_example.py +0 -109
- natural_pdf-0.1.0/examples/region_image_example.py +0 -116
- natural_pdf-0.1.0/examples/region_ocr_test.py +0 -119
- natural_pdf-0.1.0/examples/region_sections_example.py +0 -115
- natural_pdf-0.1.0/examples/school_books.py +0 -49
- natural_pdf-0.1.0/examples/school_books_all.py +0 -52
- natural_pdf-0.1.0/examples/scouring.py +0 -36
- natural_pdf-0.1.0/examples/section_extraction_example.py +0 -232
- natural_pdf-0.1.0/examples/section_output/headings.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_1.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_2.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_3.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_4.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_5.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_6.png +0 -0
- natural_pdf-0.1.0/examples/section_output/sections_no_grouping.png +0 -0
- natural_pdf-0.1.0/examples/section_output/sections_with_grouping.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_both.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_end.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_none.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_start.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/separators.png +0 -0
- natural_pdf-0.1.0/examples/simple_document_qa.py +0 -97
- natural_pdf-0.1.0/examples/spatial_navigation_example.py +0 -108
- natural_pdf-0.1.0/examples/start_end_output/elements.png +0 -0
- natural_pdf-0.1.0/examples/table_extraction_example.py +0 -135
- natural_pdf-0.1.0/examples/table_structure_detection.py +0 -155
- natural_pdf-0.1.0/examples/tatr_cells_test.py +0 -56
- natural_pdf-0.1.0/examples/tatr_ocr_table_test.py +0 -94
- natural_pdf-0.1.0/examples/text_search_example.py +0 -122
- natural_pdf-0.1.0/examples/text_style_example.py +0 -109
- natural_pdf-0.1.0/examples/tiny-text.py +0 -61
- natural_pdf-0.1.0/examples/until_boundaries_example.py +0 -156
- natural_pdf-0.1.0/examples/until_example.py +0 -112
- natural_pdf-0.1.0/examples/until_output/until_boundaries_headings.png +0 -0
- natural_pdf-0.1.0/examples/url_pdf_example.py +0 -45
- natural_pdf-0.1.0/examples/very_basics.py +0 -15
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/CLAUDE.md +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/LICENSE +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/MANIFEST.in +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/api/index.md +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/assets/favicon.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/assets/favicon.svg +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/assets/javascripts/custom.js +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/assets/logo.svg +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/assets/social-preview.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/docs/assets/social-preview.svg +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/base.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/docling.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/layout_options.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/paddle.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/surya.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/tatr.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/layout/yolo.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/text_options.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/text_structure.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/analyzers/utils.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/core/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/core/element_manager.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/elements/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/elements/line.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/elements/rect.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/elements/text.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/ocr/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/ocr/engine.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/ocr/engine_easyocr.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/ocr/engine_paddle.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/ocr/engine_surya.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/ocr/ocr_manager.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/ocr/ocr_options.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/qa/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/selectors/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/templates/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/templates/ocr_debug.html +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/utils/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/utils/highlighting.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/utils/reading_order.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/widgets/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/widgets/frontend/viewer.js +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf/widgets/viewer.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf.egg-info/dependency_links.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf.egg-info/requires.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/natural_pdf.egg-info/top_level.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/notebooks/Examples.ipynb +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/all_detected_regions.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/all_elements.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/basic_highlighting.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/chainable_layout.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/chained_analysis.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/color_names.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/color_names_with_boxes.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/conf_display_highlight_all.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/conf_display_highlight_layout.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/conf_display_layout_only.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/confidence_color_coded.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/debug_page_image.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/detected_table.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/dimension_analysis.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/direct_ocr_debug.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/easyocr_debug_input.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/easyocr_results.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/easyocr_test_input.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/exclusion_optimization_regions.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/explicit_confidence_display.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/footer_overlap_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_all.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_all_styles.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_all_with_all_layouts.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_all_with_attrs.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_all_with_yolo.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_by_confidence.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_color_test_1.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_color_test_2.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_color_test_3.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_color_test_4.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_layout_method.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_multiple.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_no_attrs.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_region.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_single.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_specific_types.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_specific_types_with_boxes.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_specific_types_with_tables.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_test_colors.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_test_individual.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_test_individual_annotated.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_test_individual_with_structure.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_test_individual_with_structure_yolo.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_test_individual_with_tables.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/highlight_with_attrs.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/layout_conf_default.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/layout_conf_high.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/layout_detection.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/layout_fix_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/layout_fix_test2.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/layout_fix_test3.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/layout_fix_test4.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/model_comparison.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/multiple_attributes_display.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_confidence_visualization.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_debug.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_debug_page.html +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_highlight_all_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_highlight_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_highlighted.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_simplified.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_threshold_comparison.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_visualization_clean.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_visualization_highlights.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/ocr_visualization_text.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/paddle_layout_detection.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/paddle_layout_polygons.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/paddle_layout_sources.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/paddle_layout_with_text.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/paddle_layout_without_text.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/paddleocr_highlights.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/paddleocr_results.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/paddleocr_test_input.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/page_1_for_ocr.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/page_4_for_ocr.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/region_exclusion_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/region_management_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/region_ocr_cropped.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/region_ocr_debug.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/region_ocr_full_page.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/region_ocr_highlighted.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/spatial_navigation.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/standard_highlight_all.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/table_no_ocr.csv +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/table_structure.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/table_structure_detail.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/table_with_ocr.csv +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/tatr_cells_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/tatr_ocr_table_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/tatr_regions.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/tatr_regions.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/text_styles.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/titles_only.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/width_1200px.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/width_800px.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/width_default.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/width_with_scale.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/yolo_regions.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/output/yolo_regions.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pdfs/.gitkeep +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pdfs/01-practice.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pdfs/0500000US42001.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pdfs/0500000US42007.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pdfs/2014 Statistics.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pdfs/2019 Statistics.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pdfs/needs-ocr.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/publish.sh +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/pyproject.toml +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/run_all_tutorials.sh +0 -0
- {natural_pdf-0.1.0 → natural_pdf-0.1.1}/setup.cfg +0 -0
@@ -8,6 +8,8 @@ on:
|
|
8
8
|
- 'docs/**'
|
9
9
|
- 'mkdocs.yml'
|
10
10
|
- '.github/workflows/docs.yml'
|
11
|
+
tags:
|
12
|
+
- 'v*'
|
11
13
|
|
12
14
|
permissions:
|
13
15
|
contents: write
|
@@ -27,7 +29,7 @@ jobs:
|
|
27
29
|
- name: Install dependencies
|
28
30
|
run: |
|
29
31
|
python -m pip install --upgrade pip
|
30
|
-
pip install mkdocs-material mkdocs pymdown-extensions mkdocstrings mkdocstrings-python mkdocs-jupyter
|
32
|
+
pip install mkdocs-material mkdocs pymdown-extensions mkdocstrings mkdocstrings-python mkdocs-jupyter mkdocs-exclude
|
31
33
|
pip install -e .
|
32
34
|
|
33
35
|
- name: Build docs
|
@@ -1,3 +1,12 @@
|
|
1
|
+
.notebook_cache.json
|
2
|
+
Untitled.ipynb
|
3
|
+
conversation.md
|
4
|
+
docs/tutorials/pdfs
|
5
|
+
install.sh
|
6
|
+
notebooks/Examples.md
|
7
|
+
transcript.md
|
8
|
+
|
9
|
+
|
1
10
|
# Created by https://www.toptal.com/developers/gitignore/api/python,macos,visualstudiocode,jupyternotebooks
|
2
11
|
# Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,visualstudiocode,jupyternotebooks
|
3
12
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: natural-pdf
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: A more intuitive interface for working with PDFs
|
5
5
|
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
6
|
License-Expression: MIT
|
@@ -48,7 +48,7 @@ A friendly library for working with PDFs, built on top of [pdfplumber](https://g
|
|
48
48
|
Natural PDF lets you find and extract content from PDFs using simple code that makes sense.
|
49
49
|
|
50
50
|
- [Complete documentation here](https://jsoma.github.io/natural-pdf)
|
51
|
-
- [Live
|
51
|
+
- [Live demos here](https://colab.research.google.com/github/jsoma/natural-pdf/)
|
52
52
|
|
53
53
|
## Features
|
54
54
|
|
@@ -74,18 +74,16 @@ pip install natural-pdf
|
|
74
74
|
|
75
75
|
# Installs the core library along with required AI dependencies (PyTorch, Transformers)
|
76
76
|
```bash
|
77
|
-
# Install with support for specific OCR engines
|
78
|
-
pip install natural-pdf[easyocr]
|
79
|
-
pip install natural-pdf[paddle]
|
80
|
-
pip install natural-pdf[surya]
|
81
|
-
|
82
|
-
# Install with support for YOLO layout detection model
|
77
|
+
# Install with support for specific OCR and layout engines
|
78
|
+
pip install natural-pdf[easyocr]
|
79
|
+
pip install natural-pdf[paddle]
|
80
|
+
pip install natural-pdf[surya]
|
83
81
|
pip install natural-pdf[layout_yolo]
|
84
82
|
|
85
83
|
# Install with support for the interactive Jupyter widget
|
86
84
|
pip install natural-pdf[interactive]
|
87
85
|
|
88
|
-
#
|
86
|
+
# Just install everything
|
89
87
|
pip install natural-pdf[all]
|
90
88
|
```
|
91
89
|
|
@@ -119,6 +117,8 @@ clean_text = page.extract_text()
|
|
119
117
|
print(clean_text)
|
120
118
|
```
|
121
119
|
|
120
|
+
- [Complete documentation here](https://jsoma.github.io/natural-pdf)
|
121
|
+
|
122
122
|
## Selectors
|
123
123
|
|
124
124
|
The library supports CSS-like selectors for finding elements:
|
@@ -185,7 +185,7 @@ Exclusions work efficiently with different region types:
|
|
185
185
|
|
186
186
|
## OCR Integration
|
187
187
|
|
188
|
-
Extract text from scanned documents using OCR, with support for multiple engines (EasyOCR, PaddleOCR, Surya):
|
188
|
+
Extract text from scanned documents using OCR, with support for multiple engines ([EasyOCR](https://www.jaided.ai/easyocr/), [PaddleOCR](https://paddlepaddle.github.io/PaddleOCR/latest/en/index.html), [Surya](https://github.com/VikParuchuri/surya)):
|
189
189
|
|
190
190
|
```python
|
191
191
|
# Apply OCR using a specific engine (e.g., PaddleOCR)
|
@@ -5,7 +5,7 @@ A friendly library for working with PDFs, built on top of [pdfplumber](https://g
|
|
5
5
|
Natural PDF lets you find and extract content from PDFs using simple code that makes sense.
|
6
6
|
|
7
7
|
- [Complete documentation here](https://jsoma.github.io/natural-pdf)
|
8
|
-
- [Live
|
8
|
+
- [Live demos here](https://colab.research.google.com/github/jsoma/natural-pdf/)
|
9
9
|
|
10
10
|
## Features
|
11
11
|
|
@@ -31,18 +31,16 @@ pip install natural-pdf
|
|
31
31
|
|
32
32
|
# Installs the core library along with required AI dependencies (PyTorch, Transformers)
|
33
33
|
```bash
|
34
|
-
# Install with support for specific OCR engines
|
35
|
-
pip install natural-pdf[easyocr]
|
36
|
-
pip install natural-pdf[paddle]
|
37
|
-
pip install natural-pdf[surya]
|
38
|
-
|
39
|
-
# Install with support for YOLO layout detection model
|
34
|
+
# Install with support for specific OCR and layout engines
|
35
|
+
pip install natural-pdf[easyocr]
|
36
|
+
pip install natural-pdf[paddle]
|
37
|
+
pip install natural-pdf[surya]
|
40
38
|
pip install natural-pdf[layout_yolo]
|
41
39
|
|
42
40
|
# Install with support for the interactive Jupyter widget
|
43
41
|
pip install natural-pdf[interactive]
|
44
42
|
|
45
|
-
#
|
43
|
+
# Just install everything
|
46
44
|
pip install natural-pdf[all]
|
47
45
|
```
|
48
46
|
|
@@ -76,6 +74,8 @@ clean_text = page.extract_text()
|
|
76
74
|
print(clean_text)
|
77
75
|
```
|
78
76
|
|
77
|
+
- [Complete documentation here](https://jsoma.github.io/natural-pdf)
|
78
|
+
|
79
79
|
## Selectors
|
80
80
|
|
81
81
|
The library supports CSS-like selectors for finding elements:
|
@@ -142,7 +142,7 @@ Exclusions work efficiently with different region types:
|
|
142
142
|
|
143
143
|
## OCR Integration
|
144
144
|
|
145
|
-
Extract text from scanned documents using OCR, with support for multiple engines (EasyOCR, PaddleOCR, Surya):
|
145
|
+
Extract text from scanned documents using OCR, with support for multiple engines ([EasyOCR](https://www.jaided.ai/easyocr/), [PaddleOCR](https://paddlepaddle.github.io/PaddleOCR/latest/en/index.html), [Surya](https://github.com/VikParuchuri/surya)):
|
146
146
|
|
147
147
|
```python
|
148
148
|
# Apply OCR using a specific engine (e.g., PaddleOCR)
|
@@ -11,9 +11,10 @@ MARKDOWN_FILE=$1
|
|
11
11
|
NOTEBOOK_FILE="${MARKDOWN_FILE%.md}.ipynb"
|
12
12
|
|
13
13
|
echo "Converting $MARKDOWN_FILE to notebook..."
|
14
|
+
# Jupytext will now automatically add tags based on markdown metadata
|
14
15
|
jupytext --to ipynb "$MARKDOWN_FILE" || { echo "Conversion failed"; exit 1; }
|
15
16
|
|
16
17
|
echo "Executing notebook $NOTEBOOK_FILE..."
|
17
18
|
jupyter execute "$NOTEBOOK_FILE" --inplace || { echo "Execution failed"; exit 1; }
|
18
19
|
|
19
|
-
echo "Success! Notebook executed and results saved to $NOTEBOOK_FILE"
|
20
|
+
echo "Success! Notebook executed and results saved to $NOTEBOOK_FILE"
|
@@ -1,5 +1,32 @@
|
|
1
1
|
/* Natural PDF - Minimal Custom Styling */
|
2
2
|
|
3
|
+
.jp-InputPrompt, .jp-OutputPrompt {
|
4
|
+
display: none !important;
|
5
|
+
}
|
6
|
+
|
7
|
+
.jupyter-wrapper .CodeMirror {
|
8
|
+
font-size: 0.85em !important;
|
9
|
+
}
|
10
|
+
|
11
|
+
.highlight-ipynb pre {
|
12
|
+
white-space: pre-wrap !important;
|
13
|
+
word-wrap: break-word !important;
|
14
|
+
}
|
15
|
+
|
16
|
+
.CodeMirror pre {
|
17
|
+
white-space: pre-wrap !important;
|
18
|
+
word-wrap: break-word !important;
|
19
|
+
}
|
20
|
+
|
21
|
+
.jp-CodeMirrorEditor {
|
22
|
+
max-width: 100%;
|
23
|
+
overflow-x: auto;
|
24
|
+
}
|
25
|
+
|
26
|
+
.jupyter-wrapper{
|
27
|
+
--jp-code-font-size: 0.85em !important;
|
28
|
+
}
|
29
|
+
|
3
30
|
/* Typography improvements */
|
4
31
|
.md-typeset h1 {
|
5
32
|
font-weight: 400;
|