natural-pdf 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- natural_pdf-0.1.0/.github/workflows/docs.yml +40 -0
- natural_pdf-0.1.0/.gitignore +266 -0
- natural_pdf-0.1.0/CLAUDE.md +1128 -0
- natural_pdf-0.1.0/LICENSE +21 -0
- natural_pdf-0.1.0/MANIFEST.in +8 -0
- natural_pdf-0.1.0/PKG-INFO +295 -0
- natural_pdf-0.1.0/README.md +252 -0
- natural_pdf-0.1.0/check_run_md.sh +19 -0
- natural_pdf-0.1.0/docs/api/index.md +386 -0
- natural_pdf-0.1.0/docs/assets/favicon.png +3 -0
- natural_pdf-0.1.0/docs/assets/favicon.svg +3 -0
- natural_pdf-0.1.0/docs/assets/javascripts/custom.js +17 -0
- natural_pdf-0.1.0/docs/assets/logo.svg +3 -0
- natural_pdf-0.1.0/docs/assets/social-preview.png +17 -0
- natural_pdf-0.1.0/docs/assets/social-preview.svg +17 -0
- natural_pdf-0.1.0/docs/assets/stylesheets/custom.css +38 -0
- natural_pdf-0.1.0/docs/document-qa/index.md +375 -0
- natural_pdf-0.1.0/docs/element-selection/index.md +270 -0
- natural_pdf-0.1.0/docs/explanations/index.md +28 -0
- natural_pdf-0.1.0/docs/explanations/ocr-challenges.md +221 -0
- natural_pdf-0.1.0/docs/explanations/pdf-extraction-challenges.md +203 -0
- natural_pdf-0.1.0/docs/explanations/pdf-fonts.md +214 -0
- natural_pdf-0.1.0/docs/index.md +310 -0
- natural_pdf-0.1.0/docs/installation/index.md +70 -0
- natural_pdf-0.1.0/docs/interactive-widget/index.md +0 -0
- natural_pdf-0.1.0/docs/layout-analysis/index.md +301 -0
- natural_pdf-0.1.0/docs/ocr/index.md +232 -0
- natural_pdf-0.1.0/docs/pdf-navigation/index.md +255 -0
- natural_pdf-0.1.0/docs/regions/index.md +302 -0
- natural_pdf-0.1.0/docs/tables/index.md +359 -0
- natural_pdf-0.1.0/docs/text-analysis/index.md +103 -0
- natural_pdf-0.1.0/docs/text-extraction/index.md +426 -0
- natural_pdf-0.1.0/docs/tutorials/01-loading-and-extraction.ipynb +291 -0
- natural_pdf-0.1.0/docs/tutorials/01-loading-and-extraction.md +87 -0
- natural_pdf-0.1.0/docs/tutorials/02-finding-elements.ipynb +318 -0
- natural_pdf-0.1.0/docs/tutorials/02-finding-elements.md +140 -0
- natural_pdf-0.1.0/docs/tutorials/03-extracting-blocks.ipynb +127 -0
- natural_pdf-0.1.0/docs/tutorials/03-extracting-blocks.md +41 -0
- natural_pdf-0.1.0/docs/tutorials/04-table-extraction.ipynb +94 -0
- natural_pdf-0.1.0/docs/tutorials/04-table-extraction.md +43 -0
- natural_pdf-0.1.0/docs/tutorials/05-excluding-content.ipynb +222 -0
- natural_pdf-0.1.0/docs/tutorials/05-excluding-content.md +98 -0
- natural_pdf-0.1.0/docs/tutorials/06-document-qa.ipynb +312 -0
- natural_pdf-0.1.0/docs/tutorials/06-document-qa.md +84 -0
- natural_pdf-0.1.0/docs/tutorials/07-layout-analysis.ipynb +240 -0
- natural_pdf-0.1.0/docs/tutorials/07-layout-analysis.md +59 -0
- natural_pdf-0.1.0/docs/tutorials/07-working-with-regions.ipynb +392 -0
- natural_pdf-0.1.0/docs/tutorials/07-working-with-regions.md +147 -0
- natural_pdf-0.1.0/docs/tutorials/08-spatial-navigation.ipynb +491 -0
- natural_pdf-0.1.0/docs/tutorials/08-spatial-navigation.md +186 -0
- natural_pdf-0.1.0/docs/tutorials/09-section-extraction.ipynb +2418 -0
- natural_pdf-0.1.0/docs/tutorials/09-section-extraction.md +252 -0
- natural_pdf-0.1.0/docs/tutorials/10-form-field-extraction.ipynb +467 -0
- natural_pdf-0.1.0/docs/tutorials/10-form-field-extraction.md +197 -0
- natural_pdf-0.1.0/docs/tutorials/11-enhanced-table-processing.ipynb +37 -0
- natural_pdf-0.1.0/docs/tutorials/11-enhanced-table-processing.md +5 -0
- natural_pdf-0.1.0/docs/tutorials/12-ocr-integration.ipynb +506 -0
- natural_pdf-0.1.0/docs/tutorials/12-ocr-integration.md +165 -0
- natural_pdf-0.1.0/docs/tutorials/README.ipynb +83 -0
- natural_pdf-0.1.0/docs/tutorials/README.md +51 -0
- natural_pdf-0.1.0/docs/visual-debugging/index.md +223 -0
- natural_pdf-0.1.0/examples/__init__.py +3 -0
- natural_pdf-0.1.0/examples/another_exclusion_example.py +20 -0
- natural_pdf-0.1.0/examples/basic_usage.py +190 -0
- natural_pdf-0.1.0/examples/boundary_exclusion_test.py +137 -0
- natural_pdf-0.1.0/examples/boundary_inclusion_fix_test.py +157 -0
- natural_pdf-0.1.0/examples/chainable_layout_example.py +70 -0
- natural_pdf-0.1.0/examples/color_basic_test.py +49 -0
- natural_pdf-0.1.0/examples/color_name_example.py +71 -0
- natural_pdf-0.1.0/examples/color_test.py +62 -0
- natural_pdf-0.1.0/examples/debug_ocr.py +91 -0
- natural_pdf-0.1.0/examples/direct_ocr_test.py +148 -0
- natural_pdf-0.1.0/examples/direct_paddle_test.py +99 -0
- natural_pdf-0.1.0/examples/direct_qa_example.py +71 -0
- natural_pdf-0.1.0/examples/docling_comprehensive_test.py +325 -0
- natural_pdf-0.1.0/examples/docling_example.py +192 -0
- natural_pdf-0.1.0/examples/docling_hierarchy_example.py +230 -0
- natural_pdf-0.1.0/examples/docling_text_sources.py +241 -0
- natural_pdf-0.1.0/examples/document_layout_analysis.py +123 -0
- natural_pdf-0.1.0/examples/document_qa_example.py +185 -0
- natural_pdf-0.1.0/examples/exclusion_count_debug.py +128 -0
- natural_pdf-0.1.0/examples/exclusion_debug.py +107 -0
- natural_pdf-0.1.0/examples/exclusion_example.py +150 -0
- natural_pdf-0.1.0/examples/exclusion_optimization_example.py +190 -0
- natural_pdf-0.1.0/examples/extract_text_test.py +128 -0
- natural_pdf-0.1.0/examples/font_aware_example.py +101 -0
- natural_pdf-0.1.0/examples/font_variant_example.py +124 -0
- natural_pdf-0.1.0/examples/footer_overlap_test.py +124 -0
- natural_pdf-0.1.0/examples/highlight_all_example.py +82 -0
- natural_pdf-0.1.0/examples/highlight_attributes_test.py +114 -0
- natural_pdf-0.1.0/examples/highlight_confidence_display.py +122 -0
- natural_pdf-0.1.0/examples/highlight_demo.py +110 -0
- natural_pdf-0.1.0/examples/highlight_float_test.py +71 -0
- natural_pdf-0.1.0/examples/highlight_test.py +147 -0
- natural_pdf-0.1.0/examples/highlighting_example.py +123 -0
- natural_pdf-0.1.0/examples/image_width_example.py +84 -0
- natural_pdf-0.1.0/examples/improved_api_example.py +128 -0
- natural_pdf-0.1.0/examples/improved_qa_example.py +66 -0
- natural_pdf-0.1.0/examples/layout_confidence_display_test.py +65 -0
- natural_pdf-0.1.0/examples/layout_confidence_test.py +82 -0
- natural_pdf-0.1.0/examples/layout_coordinate_debug.py +258 -0
- natural_pdf-0.1.0/examples/layout_highlight_test.py +77 -0
- natural_pdf-0.1.0/examples/logging_example.py +70 -0
- natural_pdf-0.1.0/examples/ocr_comprehensive.py +193 -0
- natural_pdf-0.1.0/examples/ocr_debug_example.py +87 -0
- natural_pdf-0.1.0/examples/ocr_default_test.py +97 -0
- natural_pdf-0.1.0/examples/ocr_engine_comparison.py +235 -0
- natural_pdf-0.1.0/examples/ocr_example.py +89 -0
- natural_pdf-0.1.0/examples/ocr_simplified_params.py +79 -0
- natural_pdf-0.1.0/examples/ocr_visualization.py +102 -0
- natural_pdf-0.1.0/examples/ocr_visualization_test.py +121 -0
- natural_pdf-0.1.0/examples/paddle_layout_example.py +315 -0
- natural_pdf-0.1.0/examples/paddle_layout_simple.py +74 -0
- natural_pdf-0.1.0/examples/paddleocr_example.py +224 -0
- natural_pdf-0.1.0/examples/page_collection_example.py +103 -0
- natural_pdf-0.1.0/examples/polygon_highlight_example.py +83 -0
- natural_pdf-0.1.0/examples/position_methods_example.py +134 -0
- natural_pdf-0.1.0/examples/position_output/position_methods.png +0 -0
- natural_pdf-0.1.0/examples/region_boundary_test.py +73 -0
- natural_pdf-0.1.0/examples/region_exclusion_test.py +149 -0
- natural_pdf-0.1.0/examples/region_expand_example.py +109 -0
- natural_pdf-0.1.0/examples/region_image_example.py +116 -0
- natural_pdf-0.1.0/examples/region_ocr_test.py +119 -0
- natural_pdf-0.1.0/examples/region_sections_example.py +115 -0
- natural_pdf-0.1.0/examples/school_books.py +49 -0
- natural_pdf-0.1.0/examples/school_books_all.py +52 -0
- natural_pdf-0.1.0/examples/scouring.py +36 -0
- natural_pdf-0.1.0/examples/section_extraction_example.py +232 -0
- natural_pdf-0.1.0/examples/section_output/headings.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_1.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_2.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_3.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_4.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_5.png +0 -0
- natural_pdf-0.1.0/examples/section_output/section_6.png +0 -0
- natural_pdf-0.1.0/examples/section_output/sections_no_grouping.png +0 -0
- natural_pdf-0.1.0/examples/section_output/sections_with_grouping.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_both.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_end.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_none.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/sections_start.png +0 -0
- natural_pdf-0.1.0/examples/separator_output/separators.png +0 -0
- natural_pdf-0.1.0/examples/simple_document_qa.py +97 -0
- natural_pdf-0.1.0/examples/spatial_navigation_example.py +108 -0
- natural_pdf-0.1.0/examples/start_end_output/elements.png +0 -0
- natural_pdf-0.1.0/examples/table_extraction_example.py +135 -0
- natural_pdf-0.1.0/examples/table_structure_detection.py +155 -0
- natural_pdf-0.1.0/examples/tatr_cells_test.py +56 -0
- natural_pdf-0.1.0/examples/tatr_ocr_table_test.py +94 -0
- natural_pdf-0.1.0/examples/text_search_example.py +122 -0
- natural_pdf-0.1.0/examples/text_style_example.py +109 -0
- natural_pdf-0.1.0/examples/tiny-text.py +61 -0
- natural_pdf-0.1.0/examples/until_boundaries_example.py +156 -0
- natural_pdf-0.1.0/examples/until_example.py +112 -0
- natural_pdf-0.1.0/examples/until_output/until_boundaries_headings.png +0 -0
- natural_pdf-0.1.0/examples/url_pdf_example.py +45 -0
- natural_pdf-0.1.0/examples/very_basics.py +15 -0
- natural_pdf-0.1.0/mkdocs.yml +136 -0
- natural_pdf-0.1.0/natural_pdf/__init__.py +55 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/__init__.py +6 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/__init__.py +1 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/base.py +151 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/docling.py +247 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/layout_analyzer.py +166 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/layout_manager.py +200 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/layout_options.py +78 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/paddle.py +240 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/surya.py +151 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/tatr.py +251 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/layout/yolo.py +165 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/text_options.py +60 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/text_structure.py +270 -0
- natural_pdf-0.1.0/natural_pdf/analyzers/utils.py +57 -0
- natural_pdf-0.1.0/natural_pdf/core/__init__.py +3 -0
- natural_pdf-0.1.0/natural_pdf/core/element_manager.py +457 -0
- natural_pdf-0.1.0/natural_pdf/core/highlighting_service.py +698 -0
- natural_pdf-0.1.0/natural_pdf/core/page.py +1444 -0
- natural_pdf-0.1.0/natural_pdf/core/pdf.py +653 -0
- natural_pdf-0.1.0/natural_pdf/elements/__init__.py +3 -0
- natural_pdf-0.1.0/natural_pdf/elements/base.py +761 -0
- natural_pdf-0.1.0/natural_pdf/elements/collections.py +1345 -0
- natural_pdf-0.1.0/natural_pdf/elements/line.py +140 -0
- natural_pdf-0.1.0/natural_pdf/elements/rect.py +122 -0
- natural_pdf-0.1.0/natural_pdf/elements/region.py +1793 -0
- natural_pdf-0.1.0/natural_pdf/elements/text.py +304 -0
- natural_pdf-0.1.0/natural_pdf/ocr/__init__.py +56 -0
- natural_pdf-0.1.0/natural_pdf/ocr/engine.py +104 -0
- natural_pdf-0.1.0/natural_pdf/ocr/engine_easyocr.py +179 -0
- natural_pdf-0.1.0/natural_pdf/ocr/engine_paddle.py +204 -0
- natural_pdf-0.1.0/natural_pdf/ocr/engine_surya.py +171 -0
- natural_pdf-0.1.0/natural_pdf/ocr/ocr_manager.py +191 -0
- natural_pdf-0.1.0/natural_pdf/ocr/ocr_options.py +114 -0
- natural_pdf-0.1.0/natural_pdf/qa/__init__.py +3 -0
- natural_pdf-0.1.0/natural_pdf/qa/document_qa.py +396 -0
- natural_pdf-0.1.0/natural_pdf/selectors/__init__.py +4 -0
- natural_pdf-0.1.0/natural_pdf/selectors/parser.py +354 -0
- natural_pdf-0.1.0/natural_pdf/templates/__init__.py +1 -0
- natural_pdf-0.1.0/natural_pdf/templates/ocr_debug.html +517 -0
- natural_pdf-0.1.0/natural_pdf/utils/__init__.py +3 -0
- natural_pdf-0.1.0/natural_pdf/utils/highlighting.py +12 -0
- natural_pdf-0.1.0/natural_pdf/utils/reading_order.py +227 -0
- natural_pdf-0.1.0/natural_pdf/utils/visualization.py +223 -0
- natural_pdf-0.1.0/natural_pdf/widgets/__init__.py +4 -0
- natural_pdf-0.1.0/natural_pdf/widgets/frontend/viewer.js +88 -0
- natural_pdf-0.1.0/natural_pdf/widgets/viewer.py +765 -0
- natural_pdf-0.1.0/natural_pdf.egg-info/PKG-INFO +295 -0
- natural_pdf-0.1.0/natural_pdf.egg-info/SOURCES.txt +322 -0
- natural_pdf-0.1.0/natural_pdf.egg-info/dependency_links.txt +1 -0
- natural_pdf-0.1.0/natural_pdf.egg-info/requires.txt +35 -0
- natural_pdf-0.1.0/natural_pdf.egg-info/top_level.txt +1 -0
- natural_pdf-0.1.0/notebooks/Examples.ipynb +1166 -0
- natural_pdf-0.1.0/output/all_detected_regions.png +0 -0
- natural_pdf-0.1.0/output/all_elements.png +0 -0
- natural_pdf-0.1.0/output/basic_highlighting.png +0 -0
- natural_pdf-0.1.0/output/chainable_layout.png +0 -0
- natural_pdf-0.1.0/output/chained_analysis.png +0 -0
- natural_pdf-0.1.0/output/color_names.png +0 -0
- natural_pdf-0.1.0/output/color_names_with_boxes.png +0 -0
- natural_pdf-0.1.0/output/conf_display_highlight_all.png +0 -0
- natural_pdf-0.1.0/output/conf_display_highlight_layout.png +0 -0
- natural_pdf-0.1.0/output/conf_display_layout_only.png +0 -0
- natural_pdf-0.1.0/output/confidence_color_coded.png +0 -0
- natural_pdf-0.1.0/output/debug_page_image.png +0 -0
- natural_pdf-0.1.0/output/detected_table.png +0 -0
- natural_pdf-0.1.0/output/dimension_analysis.txt +48 -0
- natural_pdf-0.1.0/output/direct_ocr_debug.png +0 -0
- natural_pdf-0.1.0/output/easyocr_debug_input.png +0 -0
- natural_pdf-0.1.0/output/easyocr_results.png +0 -0
- natural_pdf-0.1.0/output/easyocr_test_input.png +0 -0
- natural_pdf-0.1.0/output/exclusion_optimization_regions.png +0 -0
- natural_pdf-0.1.0/output/explicit_confidence_display.png +0 -0
- natural_pdf-0.1.0/output/footer_overlap_test.png +0 -0
- natural_pdf-0.1.0/output/highlight_all.png +0 -0
- natural_pdf-0.1.0/output/highlight_all_styles.png +0 -0
- natural_pdf-0.1.0/output/highlight_all_with_all_layouts.png +0 -0
- natural_pdf-0.1.0/output/highlight_all_with_attrs.png +0 -0
- natural_pdf-0.1.0/output/highlight_all_with_yolo.png +0 -0
- natural_pdf-0.1.0/output/highlight_by_confidence.png +0 -0
- natural_pdf-0.1.0/output/highlight_color_test_1.png +0 -0
- natural_pdf-0.1.0/output/highlight_color_test_2.png +0 -0
- natural_pdf-0.1.0/output/highlight_color_test_3.png +0 -0
- natural_pdf-0.1.0/output/highlight_color_test_4.png +0 -0
- natural_pdf-0.1.0/output/highlight_layout_method.png +0 -0
- natural_pdf-0.1.0/output/highlight_multiple.png +0 -0
- natural_pdf-0.1.0/output/highlight_no_attrs.png +0 -0
- natural_pdf-0.1.0/output/highlight_region.png +0 -0
- natural_pdf-0.1.0/output/highlight_single.png +0 -0
- natural_pdf-0.1.0/output/highlight_specific_types.png +0 -0
- natural_pdf-0.1.0/output/highlight_specific_types_with_boxes.png +0 -0
- natural_pdf-0.1.0/output/highlight_specific_types_with_tables.png +0 -0
- natural_pdf-0.1.0/output/highlight_test.png +0 -0
- natural_pdf-0.1.0/output/highlight_test_colors.png +0 -0
- natural_pdf-0.1.0/output/highlight_test_individual.png +0 -0
- natural_pdf-0.1.0/output/highlight_test_individual_annotated.png +0 -0
- natural_pdf-0.1.0/output/highlight_test_individual_with_structure.png +0 -0
- natural_pdf-0.1.0/output/highlight_test_individual_with_structure_yolo.png +0 -0
- natural_pdf-0.1.0/output/highlight_test_individual_with_tables.png +0 -0
- natural_pdf-0.1.0/output/highlight_with_attrs.png +0 -0
- natural_pdf-0.1.0/output/layout_conf_default.png +0 -0
- natural_pdf-0.1.0/output/layout_conf_high.png +0 -0
- natural_pdf-0.1.0/output/layout_detection.png +0 -0
- natural_pdf-0.1.0/output/layout_fix_test.png +0 -0
- natural_pdf-0.1.0/output/layout_fix_test2.png +0 -0
- natural_pdf-0.1.0/output/layout_fix_test3.png +0 -0
- natural_pdf-0.1.0/output/layout_fix_test4.png +0 -0
- natural_pdf-0.1.0/output/model_comparison.png +0 -0
- natural_pdf-0.1.0/output/multiple_attributes_display.png +0 -0
- natural_pdf-0.1.0/output/ocr_confidence_visualization.png +0 -0
- natural_pdf-0.1.0/output/ocr_debug.png +0 -0
- natural_pdf-0.1.0/output/ocr_debug_page.html +517 -0
- natural_pdf-0.1.0/output/ocr_highlight_all_test.png +0 -0
- natural_pdf-0.1.0/output/ocr_highlight_test.png +0 -0
- natural_pdf-0.1.0/output/ocr_highlighted.png +0 -0
- natural_pdf-0.1.0/output/ocr_simplified.png +0 -0
- natural_pdf-0.1.0/output/ocr_threshold_comparison.png +0 -0
- natural_pdf-0.1.0/output/ocr_visualization_clean.png +0 -0
- natural_pdf-0.1.0/output/ocr_visualization_highlights.png +0 -0
- natural_pdf-0.1.0/output/ocr_visualization_text.png +0 -0
- natural_pdf-0.1.0/output/paddle_layout_detection.png +0 -0
- natural_pdf-0.1.0/output/paddle_layout_polygons.png +0 -0
- natural_pdf-0.1.0/output/paddle_layout_sources.png +0 -0
- natural_pdf-0.1.0/output/paddle_layout_with_text.png +0 -0
- natural_pdf-0.1.0/output/paddle_layout_without_text.png +0 -0
- natural_pdf-0.1.0/output/paddleocr_highlights.png +0 -0
- natural_pdf-0.1.0/output/paddleocr_results.png +0 -0
- natural_pdf-0.1.0/output/paddleocr_test_input.png +0 -0
- natural_pdf-0.1.0/output/page_1_for_ocr.png +0 -0
- natural_pdf-0.1.0/output/page_4_for_ocr.png +0 -0
- natural_pdf-0.1.0/output/region_exclusion_test.png +0 -0
- natural_pdf-0.1.0/output/region_management_test.png +0 -0
- natural_pdf-0.1.0/output/region_ocr_cropped.png +0 -0
- natural_pdf-0.1.0/output/region_ocr_debug.png +0 -0
- natural_pdf-0.1.0/output/region_ocr_full_page.png +0 -0
- natural_pdf-0.1.0/output/region_ocr_highlighted.png +0 -0
- natural_pdf-0.1.0/output/spatial_navigation.png +0 -0
- natural_pdf-0.1.0/output/standard_highlight_all.png +0 -0
- natural_pdf-0.1.0/output/table_no_ocr.csv +54 -0
- natural_pdf-0.1.0/output/table_structure.png +0 -0
- natural_pdf-0.1.0/output/table_structure_detail.png +0 -0
- natural_pdf-0.1.0/output/table_with_ocr.csv +54 -0
- natural_pdf-0.1.0/output/tatr_cells_test.png +0 -0
- natural_pdf-0.1.0/output/tatr_ocr_table_test.png +0 -0
- natural_pdf-0.1.0/output/tatr_regions.png +0 -0
- natural_pdf-0.1.0/output/tatr_regions.txt +16 -0
- natural_pdf-0.1.0/output/text_styles.png +0 -0
- natural_pdf-0.1.0/output/titles_only.png +0 -0
- natural_pdf-0.1.0/output/width_1200px.png +0 -0
- natural_pdf-0.1.0/output/width_800px.png +0 -0
- natural_pdf-0.1.0/output/width_default.png +0 -0
- natural_pdf-0.1.0/output/width_with_scale.png +0 -0
- natural_pdf-0.1.0/output/yolo_regions.png +0 -0
- natural_pdf-0.1.0/output/yolo_regions.txt +9 -0
- natural_pdf-0.1.0/pdfs/.gitkeep +0 -0
- natural_pdf-0.1.0/pdfs/01-practice.pdf +543 -0
- natural_pdf-0.1.0/pdfs/0500000US42001.pdf +0 -0
- natural_pdf-0.1.0/pdfs/0500000US42007.pdf +0 -0
- natural_pdf-0.1.0/pdfs/2014 Statistics.pdf +0 -0
- natural_pdf-0.1.0/pdfs/2019 Statistics.pdf +0 -0
- natural_pdf-0.1.0/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
- natural_pdf-0.1.0/pdfs/needs-ocr.pdf +0 -0
- natural_pdf-0.1.0/publish.sh +58 -0
- natural_pdf-0.1.0/pyproject.toml +81 -0
- natural_pdf-0.1.0/run_all_tutorials.sh +31 -0
- natural_pdf-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
name: Build and deploy docs
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches:
|
6
|
+
- main
|
7
|
+
paths:
|
8
|
+
- 'docs/**'
|
9
|
+
- 'mkdocs.yml'
|
10
|
+
- '.github/workflows/docs.yml'
|
11
|
+
|
12
|
+
permissions:
|
13
|
+
contents: write
|
14
|
+
|
15
|
+
jobs:
|
16
|
+
build-and-deploy:
|
17
|
+
runs-on: ubuntu-latest
|
18
|
+
steps:
|
19
|
+
- name: Checkout repository
|
20
|
+
uses: actions/checkout@v3
|
21
|
+
|
22
|
+
- name: Set up Python
|
23
|
+
uses: actions/setup-python@v4
|
24
|
+
with:
|
25
|
+
python-version: '3.10'
|
26
|
+
|
27
|
+
- name: Install dependencies
|
28
|
+
run: |
|
29
|
+
python -m pip install --upgrade pip
|
30
|
+
pip install mkdocs-material mkdocs pymdown-extensions mkdocstrings mkdocstrings-python mkdocs-jupyter
|
31
|
+
pip install -e .
|
32
|
+
|
33
|
+
- name: Build docs
|
34
|
+
run: mkdocs build
|
35
|
+
|
36
|
+
- name: Deploy to GitHub Pages
|
37
|
+
uses: JamesIves/github-pages-deploy-action@v4
|
38
|
+
with:
|
39
|
+
folder: site
|
40
|
+
branch: gh-pages
|
@@ -0,0 +1,266 @@
|
|
1
|
+
# Created by https://www.toptal.com/developers/gitignore/api/python,macos,visualstudiocode,jupyternotebooks
|
2
|
+
# Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,visualstudiocode,jupyternotebooks
|
3
|
+
|
4
|
+
### JupyterNotebooks ###
|
5
|
+
# gitignore template for Jupyter Notebooks
|
6
|
+
# website: http://jupyter.org/
|
7
|
+
|
8
|
+
.ipynb_checkpoints
|
9
|
+
*/.ipynb_checkpoints/*
|
10
|
+
|
11
|
+
# IPython
|
12
|
+
profile_default/
|
13
|
+
ipython_config.py
|
14
|
+
|
15
|
+
# Remove previous ipynb_checkpoints
|
16
|
+
# git rm -r .ipynb_checkpoints/
|
17
|
+
|
18
|
+
### macOS ###
|
19
|
+
# General
|
20
|
+
.DS_Store
|
21
|
+
.AppleDouble
|
22
|
+
.LSOverride
|
23
|
+
|
24
|
+
# Icon must end with two \r
|
25
|
+
Icon
|
26
|
+
|
27
|
+
|
28
|
+
# Thumbnails
|
29
|
+
._*
|
30
|
+
|
31
|
+
# Files that might appear in the root of a volume
|
32
|
+
.DocumentRevisions-V100
|
33
|
+
.fseventsd
|
34
|
+
.Spotlight-V100
|
35
|
+
.TemporaryItems
|
36
|
+
.Trashes
|
37
|
+
.VolumeIcon.icns
|
38
|
+
.com.apple.timemachine.donotpresent
|
39
|
+
|
40
|
+
# Directories potentially created on remote AFP share
|
41
|
+
.AppleDB
|
42
|
+
.AppleDesktop
|
43
|
+
Network Trash Folder
|
44
|
+
Temporary Items
|
45
|
+
.apdisk
|
46
|
+
|
47
|
+
### macOS Patch ###
|
48
|
+
# iCloud generated files
|
49
|
+
*.icloud
|
50
|
+
|
51
|
+
### Python ###
|
52
|
+
# Byte-compiled / optimized / DLL files
|
53
|
+
__pycache__/
|
54
|
+
*.py[cod]
|
55
|
+
*$py.class
|
56
|
+
|
57
|
+
# C extensions
|
58
|
+
*.so
|
59
|
+
|
60
|
+
# Distribution / packaging
|
61
|
+
.Python
|
62
|
+
build/
|
63
|
+
develop-eggs/
|
64
|
+
dist/
|
65
|
+
downloads/
|
66
|
+
eggs/
|
67
|
+
.eggs/
|
68
|
+
lib/
|
69
|
+
lib64/
|
70
|
+
parts/
|
71
|
+
sdist/
|
72
|
+
var/
|
73
|
+
wheels/
|
74
|
+
share/python-wheels/
|
75
|
+
*.egg-info/
|
76
|
+
.installed.cfg
|
77
|
+
*.egg
|
78
|
+
MANIFEST
|
79
|
+
|
80
|
+
# PyInstaller
|
81
|
+
# Usually these files are written by a python script from a template
|
82
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
83
|
+
*.manifest
|
84
|
+
*.spec
|
85
|
+
|
86
|
+
# Installer logs
|
87
|
+
pip-log.txt
|
88
|
+
pip-delete-this-directory.txt
|
89
|
+
|
90
|
+
# Unit test / coverage reports
|
91
|
+
htmlcov/
|
92
|
+
.tox/
|
93
|
+
.nox/
|
94
|
+
.coverage
|
95
|
+
.coverage.*
|
96
|
+
.cache
|
97
|
+
nosetests.xml
|
98
|
+
coverage.xml
|
99
|
+
*.cover
|
100
|
+
*.py,cover
|
101
|
+
.hypothesis/
|
102
|
+
.pytest_cache/
|
103
|
+
cover/
|
104
|
+
|
105
|
+
# Translations
|
106
|
+
*.mo
|
107
|
+
*.pot
|
108
|
+
|
109
|
+
# Django stuff:
|
110
|
+
*.log
|
111
|
+
local_settings.py
|
112
|
+
db.sqlite3
|
113
|
+
db.sqlite3-journal
|
114
|
+
|
115
|
+
# Flask stuff:
|
116
|
+
instance/
|
117
|
+
.webassets-cache
|
118
|
+
|
119
|
+
# Scrapy stuff:
|
120
|
+
.scrapy
|
121
|
+
|
122
|
+
# Sphinx documentation
|
123
|
+
docs/_build/
|
124
|
+
|
125
|
+
# PyBuilder
|
126
|
+
.pybuilder/
|
127
|
+
target/
|
128
|
+
|
129
|
+
# Jupyter Notebook
|
130
|
+
|
131
|
+
# IPython
|
132
|
+
|
133
|
+
# pyenv
|
134
|
+
# For a library or package, you might want to ignore these files since the code is
|
135
|
+
# intended to run in multiple environments; otherwise, check them in:
|
136
|
+
# .python-version
|
137
|
+
|
138
|
+
# pipenv
|
139
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
140
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
141
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
142
|
+
# install all needed dependencies.
|
143
|
+
#Pipfile.lock
|
144
|
+
|
145
|
+
# poetry
|
146
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
147
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
148
|
+
# commonly ignored for libraries.
|
149
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
150
|
+
#poetry.lock
|
151
|
+
|
152
|
+
# pdm
|
153
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
154
|
+
#pdm.lock
|
155
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
156
|
+
# in version control.
|
157
|
+
# https://pdm.fming.dev/#use-with-ide
|
158
|
+
.pdm.toml
|
159
|
+
|
160
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
161
|
+
__pypackages__/
|
162
|
+
|
163
|
+
# Celery stuff
|
164
|
+
celerybeat-schedule
|
165
|
+
celerybeat.pid
|
166
|
+
|
167
|
+
# SageMath parsed files
|
168
|
+
*.sage.py
|
169
|
+
|
170
|
+
# Environments
|
171
|
+
.env
|
172
|
+
.venv
|
173
|
+
env/
|
174
|
+
venv/
|
175
|
+
ENV/
|
176
|
+
env.bak/
|
177
|
+
venv.bak/
|
178
|
+
|
179
|
+
# Spyder project settings
|
180
|
+
.spyderproject
|
181
|
+
.spyproject
|
182
|
+
|
183
|
+
# Rope project settings
|
184
|
+
.ropeproject
|
185
|
+
|
186
|
+
# mkdocs documentation
|
187
|
+
/site
|
188
|
+
|
189
|
+
# mypy
|
190
|
+
.mypy_cache/
|
191
|
+
.dmypy.json
|
192
|
+
dmypy.json
|
193
|
+
|
194
|
+
# Pyre type checker
|
195
|
+
.pyre/
|
196
|
+
|
197
|
+
# pytype static type analyzer
|
198
|
+
.pytype/
|
199
|
+
|
200
|
+
# Cython debug symbols
|
201
|
+
cython_debug/
|
202
|
+
|
203
|
+
# PyCharm
|
204
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
205
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
206
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
207
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
208
|
+
#.idea/
|
209
|
+
|
210
|
+
### Python Patch ###
|
211
|
+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
212
|
+
poetry.toml
|
213
|
+
|
214
|
+
# ruff
|
215
|
+
.ruff_cache/
|
216
|
+
|
217
|
+
# LSP config files
|
218
|
+
pyrightconfig.json
|
219
|
+
|
220
|
+
### VisualStudioCode ###
|
221
|
+
.vscode/*
|
222
|
+
!.vscode/settings.json
|
223
|
+
!.vscode/tasks.json
|
224
|
+
!.vscode/launch.json
|
225
|
+
!.vscode/extensions.json
|
226
|
+
!.vscode/*.code-snippets
|
227
|
+
|
228
|
+
# Local History for Visual Studio Code
|
229
|
+
.history/
|
230
|
+
|
231
|
+
# Built Visual Studio Code Extensions
|
232
|
+
*.vsix
|
233
|
+
|
234
|
+
### VisualStudioCode Patch ###
|
235
|
+
# Ignore all local history of files
|
236
|
+
.history
|
237
|
+
.ionide
|
238
|
+
|
239
|
+
# End of https://www.toptal.com/developers/gitignore/api/python,macos,visualstudiocode,jupyternotebooks
|
240
|
+
|
241
|
+
# Project-specific additions
|
242
|
+
# Only exclude large PDFs that we don't want to track
|
243
|
+
pdfs/Nigeria*.pdf
|
244
|
+
pdfs/HARRY*.pdf
|
245
|
+
# But keep other PDFs
|
246
|
+
# Ensure directory exists
|
247
|
+
!pdfs/.gitkeep
|
248
|
+
|
249
|
+
# Output files
|
250
|
+
output/*
|
251
|
+
# Ensure directory exists
|
252
|
+
!output/.gitkeep
|
253
|
+
|
254
|
+
# MkDocs generated site
|
255
|
+
site/
|
256
|
+
|
257
|
+
# Virtual environments
|
258
|
+
venv/
|
259
|
+
mkdocs-venv/
|
260
|
+
.venv/
|
261
|
+
env/
|
262
|
+
|
263
|
+
# PyPI distribution files
|
264
|
+
dist/
|
265
|
+
build/
|
266
|
+
*.egg-info/
|