natural-pdf 0.1.0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/.github/workflows/docs.yml +3 -1
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/.gitignore +9 -0
- natural_pdf-1.1.1/PKG-INFO +124 -0
- natural_pdf-1.1.1/README.md +81 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/check_run_md.sh +2 -1
- natural_pdf-1.1.1/docs/assets/sample-screen.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/assets/stylesheets/custom.css +27 -0
- natural_pdf-1.1.1/docs/document-qa/index.ipynb +435 -0
- natural_pdf-1.1.1/docs/document-qa/index.md +79 -0
- natural_pdf-1.1.1/docs/element-selection/index.ipynb +915 -0
- natural_pdf-1.1.1/docs/element-selection/index.md +229 -0
- natural_pdf-1.1.1/docs/index.md +170 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/installation/index.md +4 -5
- natural_pdf-1.1.1/docs/interactive-widget/index.ipynb +962 -0
- natural_pdf-1.1.1/docs/interactive-widget/index.md +12 -0
- natural_pdf-1.1.1/docs/layout-analysis/index.ipynb +818 -0
- natural_pdf-1.1.1/docs/layout-analysis/index.md +185 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/ocr/index.md +3 -13
- natural_pdf-1.1.1/docs/pdf-navigation/index.ipynb +314 -0
- natural_pdf-1.1.1/docs/pdf-navigation/index.md +97 -0
- natural_pdf-1.1.1/docs/regions/index.ipynb +850 -0
- natural_pdf-1.1.1/docs/regions/index.md +295 -0
- natural_pdf-1.1.1/docs/tables/index.ipynb +658 -0
- natural_pdf-1.1.1/docs/tables/index.md +144 -0
- natural_pdf-1.1.1/docs/text-analysis/index.ipynb +370 -0
- natural_pdf-1.1.1/docs/text-analysis/index.md +105 -0
- natural_pdf-1.1.1/docs/text-extraction/index.ipynb +1478 -0
- natural_pdf-1.1.1/docs/text-extraction/index.md +292 -0
- natural_pdf-1.1.1/docs/tutorials/01-loading-and-extraction.ipynb +1137 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/01-loading-and-extraction.md +15 -7
- natural_pdf-1.1.1/docs/tutorials/02-finding-elements.ipynb +344 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/02-finding-elements.md +18 -9
- natural_pdf-1.1.1/docs/tutorials/03-extracting-blocks.ipynb +151 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/03-extracting-blocks.md +10 -3
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/04-table-extraction.ipynb +36 -12
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/04-table-extraction.md +11 -4
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/05-excluding-content.ipynb +53 -28
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/05-excluding-content.md +24 -13
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/06-document-qa.ipynb +63 -39
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/06-document-qa.md +12 -5
- natural_pdf-1.1.1/docs/tutorials/07-layout-analysis.ipynb +264 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/07-layout-analysis.md +10 -3
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/07-working-with-regions.ipynb +87 -66
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/07-working-with-regions.md +6 -2
- natural_pdf-1.1.1/docs/tutorials/08-spatial-navigation.ipynb +512 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/08-spatial-navigation.md +6 -2
- natural_pdf-1.1.1/docs/tutorials/09-section-extraction.ipynb +2432 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/09-section-extraction.md +7 -3
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/10-form-field-extraction.ipynb +93 -72
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/10-form-field-extraction.md +6 -2
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/11-enhanced-table-processing.ipynb +24 -3
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/11-enhanced-table-processing.md +6 -2
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/12-ocr-integration.ipynb +100 -79
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/tutorials/12-ocr-integration.md +6 -2
- natural_pdf-1.1.1/docs/visual-debugging/index.ipynb +2970 -0
- natural_pdf-1.1.1/docs/visual-debugging/index.md +157 -0
- natural_pdf-1.1.1/docs/visual-debugging/region.png +0 -0
- natural_pdf-1.1.1/execute_notebooks.py +413 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/mkdocs.yml +56 -19
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/__init__.py +1 -1
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/core/highlighting_service.py +48 -17
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/core/page.py +92 -27
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/core/pdf.py +11 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/elements/base.py +99 -14
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/elements/collections.py +56 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/elements/region.py +4 -106
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/qa/document_qa.py +4 -3
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/selectors/parser.py +215 -1
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/utils/visualization.py +2 -2
- natural_pdf-1.1.1/natural_pdf.egg-info/PKG-INFO +124 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf.egg-info/SOURCES.txt +14 -102
- natural_pdf-1.1.1/notebooks/Examples.ipynb +1293 -0
- natural_pdf-1.1.1/sample-screen.png +0 -0
- natural_pdf-0.1.0/PKG-INFO +0 -295
- natural_pdf-0.1.0/README.md +0 -252
- natural_pdf-0.1.0/docs/document-qa/index.md +0 -375
- natural_pdf-0.1.0/docs/element-selection/index.md +0 -270
- natural_pdf-0.1.0/docs/explanations/index.md +0 -28
- natural_pdf-0.1.0/docs/explanations/ocr-challenges.md +0 -221
- natural_pdf-0.1.0/docs/explanations/pdf-extraction-challenges.md +0 -203
- natural_pdf-0.1.0/docs/explanations/pdf-fonts.md +0 -214
- natural_pdf-0.1.0/docs/index.md +0 -310
- natural_pdf-0.1.0/docs/interactive-widget/index.md +0 -0
- natural_pdf-0.1.0/docs/layout-analysis/index.md +0 -301
- natural_pdf-0.1.0/docs/pdf-navigation/index.md +0 -255
- natural_pdf-0.1.0/docs/regions/index.md +0 -302
- natural_pdf-0.1.0/docs/tables/index.md +0 -359
- natural_pdf-0.1.0/docs/text-analysis/index.md +0 -103
- natural_pdf-0.1.0/docs/text-extraction/index.md +0 -426
- natural_pdf-0.1.0/docs/tutorials/01-loading-and-extraction.ipynb +0 -291
- natural_pdf-0.1.0/docs/tutorials/02-finding-elements.ipynb +0 -318
- natural_pdf-0.1.0/docs/tutorials/03-extracting-blocks.ipynb +0 -127
- natural_pdf-0.1.0/docs/tutorials/07-layout-analysis.ipynb +0 -240
- natural_pdf-0.1.0/docs/tutorials/08-spatial-navigation.ipynb +0 -491
- natural_pdf-0.1.0/docs/tutorials/09-section-extraction.ipynb +0 -2418
- natural_pdf-0.1.0/docs/tutorials/README.ipynb +0 -83
- natural_pdf-0.1.0/docs/tutorials/README.md +0 -51
- natural_pdf-0.1.0/docs/visual-debugging/index.md +0 -223
- natural_pdf-0.1.0/examples/__init__.py +0 -3
- natural_pdf-0.1.0/examples/another_exclusion_example.py +0 -20
- natural_pdf-0.1.0/examples/basic_usage.py +0 -190
- natural_pdf-0.1.0/examples/boundary_exclusion_test.py +0 -137
- natural_pdf-0.1.0/examples/boundary_inclusion_fix_test.py +0 -157
- natural_pdf-0.1.0/examples/chainable_layout_example.py +0 -70
- natural_pdf-0.1.0/examples/color_basic_test.py +0 -49
- natural_pdf-0.1.0/examples/color_name_example.py +0 -71
- natural_pdf-0.1.0/examples/color_test.py +0 -62
- natural_pdf-0.1.0/examples/debug_ocr.py +0 -91
- natural_pdf-0.1.0/examples/direct_ocr_test.py +0 -148
- natural_pdf-0.1.0/examples/direct_paddle_test.py +0 -99
- natural_pdf-0.1.0/examples/direct_qa_example.py +0 -71
- natural_pdf-0.1.0/examples/docling_comprehensive_test.py +0 -325
- natural_pdf-0.1.0/examples/docling_example.py +0 -192
- natural_pdf-0.1.0/examples/docling_hierarchy_example.py +0 -230
- natural_pdf-0.1.0/examples/docling_text_sources.py +0 -241
- natural_pdf-0.1.0/examples/document_layout_analysis.py +0 -123
- natural_pdf-0.1.0/examples/document_qa_example.py +0 -185
- natural_pdf-0.1.0/examples/exclusion_count_debug.py +0 -128
- natural_pdf-0.1.0/examples/exclusion_debug.py +0 -107
- natural_pdf-0.1.0/examples/exclusion_example.py +0 -150
- natural_pdf-0.1.0/examples/exclusion_optimization_example.py +0 -190
- natural_pdf-0.1.0/examples/extract_text_test.py +0 -128
- natural_pdf-0.1.0/examples/font_aware_example.py +0 -101
- natural_pdf-0.1.0/examples/font_variant_example.py +0 -124
- natural_pdf-0.1.0/examples/footer_overlap_test.py +0 -124
- natural_pdf-0.1.0/examples/highlight_all_example.py +0 -82
- natural_pdf-0.1.0/examples/highlight_attributes_test.py +0 -114
- natural_pdf-0.1.0/examples/highlight_confidence_display.py +0 -122
- natural_pdf-0.1.0/examples/highlight_demo.py +0 -110
- natural_pdf-0.1.0/examples/highlight_float_test.py +0 -71
- natural_pdf-0.1.0/examples/highlight_test.py +0 -147
- natural_pdf-0.1.0/examples/highlighting_example.py +0 -123
- natural_pdf-0.1.0/examples/image_width_example.py +0 -84
- natural_pdf-0.1.0/examples/improved_api_example.py +0 -128
- natural_pdf-0.1.0/examples/improved_qa_example.py +0 -66
- natural_pdf-0.1.0/examples/layout_confidence_display_test.py +0 -65
- natural_pdf-0.1.0/examples/layout_confidence_test.py +0 -82
- natural_pdf-0.1.0/examples/layout_coordinate_debug.py +0 -258
- natural_pdf-0.1.0/examples/layout_highlight_test.py +0 -77
- natural_pdf-0.1.0/examples/logging_example.py +0 -70
- natural_pdf-0.1.0/examples/ocr_comprehensive.py +0 -193
- natural_pdf-0.1.0/examples/ocr_debug_example.py +0 -87
- natural_pdf-0.1.0/examples/ocr_default_test.py +0 -97
- natural_pdf-0.1.0/examples/ocr_engine_comparison.py +0 -235
- natural_pdf-0.1.0/examples/ocr_example.py +0 -89
- natural_pdf-0.1.0/examples/ocr_simplified_params.py +0 -79
- natural_pdf-0.1.0/examples/ocr_visualization.py +0 -102
- natural_pdf-0.1.0/examples/ocr_visualization_test.py +0 -121
- natural_pdf-0.1.0/examples/paddle_layout_example.py +0 -315
- natural_pdf-0.1.0/examples/paddle_layout_simple.py +0 -74
- natural_pdf-0.1.0/examples/paddleocr_example.py +0 -224
- natural_pdf-0.1.0/examples/page_collection_example.py +0 -103
- natural_pdf-0.1.0/examples/polygon_highlight_example.py +0 -83
- natural_pdf-0.1.0/examples/position_methods_example.py +0 -134
- natural_pdf-0.1.0/examples/position_output/position_methods.png +0 -0
- natural_pdf-0.1.0/examples/region_boundary_test.py +0 -73
- natural_pdf-0.1.0/examples/region_exclusion_test.py +0 -149
- natural_pdf-0.1.0/examples/region_expand_example.py +0 -109
- natural_pdf-0.1.0/examples/region_image_example.py +0 -116
- natural_pdf-0.1.0/examples/region_ocr_test.py +0 -119
- natural_pdf-0.1.0/examples/region_sections_example.py +0 -115
- natural_pdf-0.1.0/examples/school_books.py +0 -49
- natural_pdf-0.1.0/examples/school_books_all.py +0 -52
- natural_pdf-0.1.0/examples/scouring.py +0 -36
- natural_pdf-0.1.0/examples/section_extraction_example.py +0 -232
- natural_pdf-0.1.0/examples/section_output/headings.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_1.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_2.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_3.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_4.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_5.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_6.png +0 -0
- natural_pdf-0.1.0/examples/section_output/sections_no_grouping.png +0 -0
- natural_pdf-0.1.0/examples/section_output/sections_with_grouping.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_both.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_end.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_none.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_start.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/separators.png +0 -0
- natural_pdf-0.1.0/examples/simple_document_qa.py +0 -97
- natural_pdf-0.1.0/examples/spatial_navigation_example.py +0 -108
- natural_pdf-0.1.0/examples/start_end_output/elements.png +0 -0
- natural_pdf-0.1.0/examples/table_extraction_example.py +0 -135
- natural_pdf-0.1.0/examples/table_structure_detection.py +0 -155
- natural_pdf-0.1.0/examples/tatr_cells_test.py +0 -56
- natural_pdf-0.1.0/examples/tatr_ocr_table_test.py +0 -94
- natural_pdf-0.1.0/examples/text_search_example.py +0 -122
- natural_pdf-0.1.0/examples/text_style_example.py +0 -109
- natural_pdf-0.1.0/examples/tiny-text.py +0 -61
- natural_pdf-0.1.0/examples/until_boundaries_example.py +0 -156
- natural_pdf-0.1.0/examples/until_example.py +0 -112
- natural_pdf-0.1.0/examples/until_output/until_boundaries_headings.png +0 -0
- natural_pdf-0.1.0/examples/url_pdf_example.py +0 -45
- natural_pdf-0.1.0/examples/very_basics.py +0 -15
- natural_pdf-0.1.0/natural_pdf.egg-info/PKG-INFO +0 -295
- natural_pdf-0.1.0/notebooks/Examples.ipynb +0 -1166
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/CLAUDE.md +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/LICENSE +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/MANIFEST.in +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/api/index.md +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/assets/favicon.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/assets/favicon.svg +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/assets/javascripts/custom.js +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/assets/logo.svg +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/assets/social-preview.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/docs/assets/social-preview.svg +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/base.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/docling.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/layout_analyzer.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/layout_manager.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/layout_options.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/paddle.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/surya.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/tatr.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/layout/yolo.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/text_options.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/text_structure.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/analyzers/utils.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/core/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/core/element_manager.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/elements/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/elements/line.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/elements/rect.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/elements/text.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/ocr/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/ocr/engine.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/ocr/engine_easyocr.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/ocr/engine_paddle.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/ocr/engine_surya.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/ocr/ocr_manager.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/ocr/ocr_options.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/qa/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/selectors/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/templates/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/templates/ocr_debug.html +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/utils/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/utils/highlighting.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/utils/reading_order.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/widgets/__init__.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/widgets/frontend/viewer.js +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf/widgets/viewer.py +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf.egg-info/dependency_links.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf.egg-info/requires.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/natural_pdf.egg-info/top_level.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/all_detected_regions.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/all_elements.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/basic_highlighting.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/chainable_layout.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/chained_analysis.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/color_names.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/color_names_with_boxes.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/conf_display_highlight_all.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/conf_display_highlight_layout.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/conf_display_layout_only.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/confidence_color_coded.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/debug_page_image.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/detected_table.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/dimension_analysis.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/direct_ocr_debug.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/easyocr_debug_input.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/easyocr_results.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/easyocr_test_input.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/exclusion_optimization_regions.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/explicit_confidence_display.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/footer_overlap_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_all.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_all_styles.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_all_with_all_layouts.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_all_with_attrs.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_all_with_yolo.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_by_confidence.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_color_test_1.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_color_test_2.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_color_test_3.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_color_test_4.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_layout_method.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_multiple.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_no_attrs.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_region.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_single.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_specific_types.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_specific_types_with_boxes.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_specific_types_with_tables.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_test_colors.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_test_individual.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_test_individual_annotated.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_test_individual_with_structure.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_test_individual_with_structure_yolo.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_test_individual_with_tables.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/highlight_with_attrs.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/layout_conf_default.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/layout_conf_high.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/layout_detection.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/layout_fix_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/layout_fix_test2.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/layout_fix_test3.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/layout_fix_test4.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/model_comparison.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/multiple_attributes_display.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_confidence_visualization.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_debug.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_debug_page.html +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_highlight_all_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_highlight_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_highlighted.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_simplified.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_threshold_comparison.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_visualization_clean.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_visualization_highlights.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/ocr_visualization_text.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/paddle_layout_detection.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/paddle_layout_polygons.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/paddle_layout_sources.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/paddle_layout_with_text.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/paddle_layout_without_text.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/paddleocr_highlights.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/paddleocr_results.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/paddleocr_test_input.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/page_1_for_ocr.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/page_4_for_ocr.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/region_exclusion_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/region_management_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/region_ocr_cropped.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/region_ocr_debug.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/region_ocr_full_page.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/region_ocr_highlighted.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/spatial_navigation.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/standard_highlight_all.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/table_no_ocr.csv +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/table_structure.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/table_structure_detail.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/table_with_ocr.csv +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/tatr_cells_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/tatr_ocr_table_test.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/tatr_regions.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/tatr_regions.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/text_styles.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/titles_only.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/width_1200px.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/width_800px.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/width_default.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/width_with_scale.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/yolo_regions.png +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/output/yolo_regions.txt +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pdfs/.gitkeep +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pdfs/01-practice.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pdfs/0500000US42001.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pdfs/0500000US42007.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pdfs/2014 Statistics.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pdfs/2019 Statistics.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pdfs/needs-ocr.pdf +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/publish.sh +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/pyproject.toml +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/run_all_tutorials.sh +0 -0
- {natural_pdf-0.1.0 → natural_pdf-1.1.1}/setup.cfg +0 -0
@@ -8,6 +8,8 @@ on:
|
|
8
8
|
- 'docs/**'
|
9
9
|
- 'mkdocs.yml'
|
10
10
|
- '.github/workflows/docs.yml'
|
11
|
+
tags:
|
12
|
+
- 'v*'
|
11
13
|
|
12
14
|
permissions:
|
13
15
|
contents: write
|
@@ -27,7 +29,7 @@ jobs:
|
|
27
29
|
- name: Install dependencies
|
28
30
|
run: |
|
29
31
|
python -m pip install --upgrade pip
|
30
|
-
pip install mkdocs-material mkdocs pymdown-extensions mkdocstrings mkdocstrings-python mkdocs-jupyter
|
32
|
+
pip install mkdocs-material mkdocs pymdown-extensions mkdocstrings mkdocstrings-python mkdocs-jupyter mkdocs-exclude
|
31
33
|
pip install -e .
|
32
34
|
|
33
35
|
- name: Build docs
|
@@ -1,3 +1,12 @@
|
|
1
|
+
.notebook_cache.json
|
2
|
+
Untitled.ipynb
|
3
|
+
conversation.md
|
4
|
+
docs/tutorials/pdfs
|
5
|
+
install.sh
|
6
|
+
notebooks/Examples.md
|
7
|
+
transcript.md
|
8
|
+
|
9
|
+
|
1
10
|
# Created by https://www.toptal.com/developers/gitignore/api/python,macos,visualstudiocode,jupyternotebooks
|
2
11
|
# Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,visualstudiocode,jupyternotebooks
|
3
12
|
|
@@ -0,0 +1,124 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: natural-pdf
|
3
|
+
Version: 1.1.1
|
4
|
+
Summary: A more intuitive interface for working with PDFs
|
5
|
+
Author-email: Jonathan Soma <jonathan.soma@gmail.com>
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/jsoma/natural-pdf
|
8
|
+
Project-URL: Repository, https://github.com/jsoma/natural-pdf
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Operating System :: OS Independent
|
11
|
+
Requires-Python: >=3.7
|
12
|
+
Description-Content-Type: text/markdown
|
13
|
+
License-File: LICENSE
|
14
|
+
Requires-Dist: pdfplumber>=0.7.0
|
15
|
+
Requires-Dist: Pillow>=8.0.0
|
16
|
+
Requires-Dist: colour>=0.1.5
|
17
|
+
Requires-Dist: numpy>=1.20.0
|
18
|
+
Requires-Dist: urllib3>=1.26.0
|
19
|
+
Requires-Dist: torch>=2.0.0
|
20
|
+
Requires-Dist: torchvision>=0.15.0
|
21
|
+
Requires-Dist: transformers>=4.30.0
|
22
|
+
Requires-Dist: huggingface_hub>=0.19.0
|
23
|
+
Provides-Extra: interactive
|
24
|
+
Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "interactive"
|
25
|
+
Provides-Extra: easyocr
|
26
|
+
Requires-Dist: easyocr; extra == "easyocr"
|
27
|
+
Provides-Extra: paddle
|
28
|
+
Requires-Dist: paddlepaddle; extra == "paddle"
|
29
|
+
Requires-Dist: paddleocr; extra == "paddle"
|
30
|
+
Provides-Extra: layout-yolo
|
31
|
+
Requires-Dist: doclayout_yolo; extra == "layout-yolo"
|
32
|
+
Provides-Extra: surya
|
33
|
+
Requires-Dist: surya-ocr; extra == "surya"
|
34
|
+
Provides-Extra: qa
|
35
|
+
Provides-Extra: all
|
36
|
+
Requires-Dist: ipywidgets<9.0.0,>=7.0.0; extra == "all"
|
37
|
+
Requires-Dist: easyocr; extra == "all"
|
38
|
+
Requires-Dist: paddlepaddle; extra == "all"
|
39
|
+
Requires-Dist: paddleocr; extra == "all"
|
40
|
+
Requires-Dist: doclayout_yolo; extra == "all"
|
41
|
+
Requires-Dist: surya-ocr; extra == "all"
|
42
|
+
Dynamic: license-file
|
43
|
+
|
44
|
+
# Natural PDF
|
45
|
+
|
46
|
+
A friendly library for working with PDFs, built on top of [pdfplumber](https://github.com/jsvine/pdfplumber).
|
47
|
+
|
48
|
+
Natural PDF lets you find and extract content from PDFs using simple code that makes sense.
|
49
|
+
|
50
|
+
- [Complete documentation here](https://jsoma.github.io/natural-pdf)
|
51
|
+
- [Live demos here](https://colab.research.google.com/github/jsoma/natural-pdf/)
|
52
|
+
|
53
|
+
<div style="max-width: 400px; margin: auto"><a href="sample-screen.png"><img src="sample-screen.png"></a></div>
|
54
|
+
|
55
|
+
## Installation
|
56
|
+
|
57
|
+
```bash
|
58
|
+
pip install natural-pdf
|
59
|
+
```
|
60
|
+
|
61
|
+
For optional features like specific OCR engines, layout analysis models, or the interactive Jupyter widget, you can install extras:
|
62
|
+
|
63
|
+
```bash
|
64
|
+
# Example: Install with EasyOCR support
|
65
|
+
pip install natural-pdf[easyocr]
|
66
|
+
pip install natural-pdf[surya]
|
67
|
+
pip install natural-pdf[paddle]
|
68
|
+
|
69
|
+
# Example: Install with interactive viewer support
|
70
|
+
pip install natural-pdf[interactive]
|
71
|
+
|
72
|
+
# Install everything
|
73
|
+
pip install natural-pdf[all]
|
74
|
+
```
|
75
|
+
|
76
|
+
See the [installation guide](https://jsoma.github.io/natural-pdf/installation/) for more details on extras.
|
77
|
+
|
78
|
+
## Quick Start
|
79
|
+
|
80
|
+
```python
|
81
|
+
from natural_pdf import PDF
|
82
|
+
|
83
|
+
# Open a PDF
|
84
|
+
pdf = PDF('document.pdf')
|
85
|
+
page = pdf.pages[0]
|
86
|
+
|
87
|
+
# Find elements using CSS-like selectors
|
88
|
+
heading = page.find('text:contains("Summary"):bold')
|
89
|
+
|
90
|
+
# Extract content below the heading
|
91
|
+
content = heading.below().extract_text()
|
92
|
+
print("Content below Summary:", content[:100] + "...")
|
93
|
+
|
94
|
+
# Exclude headers/footers automatically (example)
|
95
|
+
# You might define these based on common text or position
|
96
|
+
page.add_exclusion(page.find('text:contains("CONFIDENTIAL")').above())
|
97
|
+
page.add_exclusion(page.find_all('line')[-1].below())
|
98
|
+
|
99
|
+
# Extract clean text from the page
|
100
|
+
clean_text = page.extract_text()
|
101
|
+
print("\nClean page text:", clean_text[:200] + "...")
|
102
|
+
|
103
|
+
# Highlight the heading and view the page
|
104
|
+
heading.highlight(color='red')
|
105
|
+
page.to_image()
|
106
|
+
```
|
107
|
+
|
108
|
+
And as a fun bonus, `page.viewer()` will provide an interactive method to explore the PDF.
|
109
|
+
|
110
|
+
## Key Features
|
111
|
+
|
112
|
+
Natural PDF offers a range of features for working with PDFs:
|
113
|
+
|
114
|
+
* **CSS-like Selectors:** Find elements using intuitive query strings (`page.find('text:bold')`).
|
115
|
+
* **Spatial Navigation:** Select content relative to other elements (`heading.below()`, `element.select_until(...)`).
|
116
|
+
* **Text & Table Extraction:** Get clean text or structured table data, automatically handling exclusions.
|
117
|
+
* **OCR Integration:** Extract text from scanned documents using engines like EasyOCR, PaddleOCR, or Surya.
|
118
|
+
* **Layout Analysis:** Detect document structures (titles, paragraphs, tables) using AI models.
|
119
|
+
* **Document QA:** Ask natural language questions about your document's content.
|
120
|
+
* **Visual Debugging:** Highlight elements and use an interactive viewer or save images to understand your selections.
|
121
|
+
|
122
|
+
## Learn More
|
123
|
+
|
124
|
+
Dive deeper into the features and explore advanced usage in the [**Complete Documentation**](https://jsoma.github.io/natural-pdf).
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# Natural PDF
|
2
|
+
|
3
|
+
A friendly library for working with PDFs, built on top of [pdfplumber](https://github.com/jsvine/pdfplumber).
|
4
|
+
|
5
|
+
Natural PDF lets you find and extract content from PDFs using simple code that makes sense.
|
6
|
+
|
7
|
+
- [Complete documentation here](https://jsoma.github.io/natural-pdf)
|
8
|
+
- [Live demos here](https://colab.research.google.com/github/jsoma/natural-pdf/)
|
9
|
+
|
10
|
+
<div style="max-width: 400px; margin: auto"><a href="sample-screen.png"><img src="sample-screen.png"></a></div>
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
```bash
|
15
|
+
pip install natural-pdf
|
16
|
+
```
|
17
|
+
|
18
|
+
For optional features like specific OCR engines, layout analysis models, or the interactive Jupyter widget, you can install extras:
|
19
|
+
|
20
|
+
```bash
|
21
|
+
# Example: Install with EasyOCR support
|
22
|
+
pip install natural-pdf[easyocr]
|
23
|
+
pip install natural-pdf[surya]
|
24
|
+
pip install natural-pdf[paddle]
|
25
|
+
|
26
|
+
# Example: Install with interactive viewer support
|
27
|
+
pip install natural-pdf[interactive]
|
28
|
+
|
29
|
+
# Install everything
|
30
|
+
pip install natural-pdf[all]
|
31
|
+
```
|
32
|
+
|
33
|
+
See the [installation guide](https://jsoma.github.io/natural-pdf/installation/) for more details on extras.
|
34
|
+
|
35
|
+
## Quick Start
|
36
|
+
|
37
|
+
```python
|
38
|
+
from natural_pdf import PDF
|
39
|
+
|
40
|
+
# Open a PDF
|
41
|
+
pdf = PDF('document.pdf')
|
42
|
+
page = pdf.pages[0]
|
43
|
+
|
44
|
+
# Find elements using CSS-like selectors
|
45
|
+
heading = page.find('text:contains("Summary"):bold')
|
46
|
+
|
47
|
+
# Extract content below the heading
|
48
|
+
content = heading.below().extract_text()
|
49
|
+
print("Content below Summary:", content[:100] + "...")
|
50
|
+
|
51
|
+
# Exclude headers/footers automatically (example)
|
52
|
+
# You might define these based on common text or position
|
53
|
+
page.add_exclusion(page.find('text:contains("CONFIDENTIAL")').above())
|
54
|
+
page.add_exclusion(page.find_all('line')[-1].below())
|
55
|
+
|
56
|
+
# Extract clean text from the page
|
57
|
+
clean_text = page.extract_text()
|
58
|
+
print("\nClean page text:", clean_text[:200] + "...")
|
59
|
+
|
60
|
+
# Highlight the heading and view the page
|
61
|
+
heading.highlight(color='red')
|
62
|
+
page.to_image()
|
63
|
+
```
|
64
|
+
|
65
|
+
And as a fun bonus, `page.viewer()` will provide an interactive method to explore the PDF.
|
66
|
+
|
67
|
+
## Key Features
|
68
|
+
|
69
|
+
Natural PDF offers a range of features for working with PDFs:
|
70
|
+
|
71
|
+
* **CSS-like Selectors:** Find elements using intuitive query strings (`page.find('text:bold')`).
|
72
|
+
* **Spatial Navigation:** Select content relative to other elements (`heading.below()`, `element.select_until(...)`).
|
73
|
+
* **Text & Table Extraction:** Get clean text or structured table data, automatically handling exclusions.
|
74
|
+
* **OCR Integration:** Extract text from scanned documents using engines like EasyOCR, PaddleOCR, or Surya.
|
75
|
+
* **Layout Analysis:** Detect document structures (titles, paragraphs, tables) using AI models.
|
76
|
+
* **Document QA:** Ask natural language questions about your document's content.
|
77
|
+
* **Visual Debugging:** Highlight elements and use an interactive viewer or save images to understand your selections.
|
78
|
+
|
79
|
+
## Learn More
|
80
|
+
|
81
|
+
Dive deeper into the features and explore advanced usage in the [**Complete Documentation**](https://jsoma.github.io/natural-pdf).
|
@@ -11,9 +11,10 @@ MARKDOWN_FILE=$1
|
|
11
11
|
NOTEBOOK_FILE="${MARKDOWN_FILE%.md}.ipynb"
|
12
12
|
|
13
13
|
echo "Converting $MARKDOWN_FILE to notebook..."
|
14
|
+
# Jupytext will now automatically add tags based on markdown metadata
|
14
15
|
jupytext --to ipynb "$MARKDOWN_FILE" || { echo "Conversion failed"; exit 1; }
|
15
16
|
|
16
17
|
echo "Executing notebook $NOTEBOOK_FILE..."
|
17
18
|
jupyter execute "$NOTEBOOK_FILE" --inplace || { echo "Execution failed"; exit 1; }
|
18
19
|
|
19
|
-
echo "Success! Notebook executed and results saved to $NOTEBOOK_FILE"
|
20
|
+
echo "Success! Notebook executed and results saved to $NOTEBOOK_FILE"
|
Binary file
|
@@ -1,5 +1,32 @@
|
|
1
1
|
/* Natural PDF - Minimal Custom Styling */
|
2
2
|
|
3
|
+
.jp-InputPrompt, .jp-OutputPrompt {
|
4
|
+
display: none !important;
|
5
|
+
}
|
6
|
+
|
7
|
+
.jupyter-wrapper .CodeMirror {
|
8
|
+
font-size: 0.85em !important;
|
9
|
+
}
|
10
|
+
|
11
|
+
.highlight-ipynb pre {
|
12
|
+
white-space: pre-wrap !important;
|
13
|
+
word-wrap: break-word !important;
|
14
|
+
}
|
15
|
+
|
16
|
+
.CodeMirror pre {
|
17
|
+
white-space: pre-wrap !important;
|
18
|
+
word-wrap: break-word !important;
|
19
|
+
}
|
20
|
+
|
21
|
+
.jp-CodeMirrorEditor {
|
22
|
+
max-width: 100%;
|
23
|
+
overflow-x: auto;
|
24
|
+
}
|
25
|
+
|
26
|
+
.jupyter-wrapper{
|
27
|
+
--jp-code-font-size: 0.85em !important;
|
28
|
+
}
|
29
|
+
|
3
30
|
/* Typography improvements */
|
4
31
|
.md-typeset h1 {
|
5
32
|
font-weight: 400;
|