natural-pdf 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. natural_pdf-0.1.0/.github/workflows/docs.yml +40 -0
  2. natural_pdf-0.1.0/.gitignore +266 -0
  3. natural_pdf-0.1.0/CLAUDE.md +1128 -0
  4. natural_pdf-0.1.0/LICENSE +21 -0
  5. natural_pdf-0.1.0/MANIFEST.in +8 -0
  6. natural_pdf-0.1.0/PKG-INFO +295 -0
  7. natural_pdf-0.1.0/README.md +252 -0
  8. natural_pdf-0.1.0/check_run_md.sh +19 -0
  9. natural_pdf-0.1.0/docs/api/index.md +386 -0
  10. natural_pdf-0.1.0/docs/assets/favicon.png +3 -0
  11. natural_pdf-0.1.0/docs/assets/favicon.svg +3 -0
  12. natural_pdf-0.1.0/docs/assets/javascripts/custom.js +17 -0
  13. natural_pdf-0.1.0/docs/assets/logo.svg +3 -0
  14. natural_pdf-0.1.0/docs/assets/social-preview.png +17 -0
  15. natural_pdf-0.1.0/docs/assets/social-preview.svg +17 -0
  16. natural_pdf-0.1.0/docs/assets/stylesheets/custom.css +38 -0
  17. natural_pdf-0.1.0/docs/document-qa/index.md +375 -0
  18. natural_pdf-0.1.0/docs/element-selection/index.md +270 -0
  19. natural_pdf-0.1.0/docs/explanations/index.md +28 -0
  20. natural_pdf-0.1.0/docs/explanations/ocr-challenges.md +221 -0
  21. natural_pdf-0.1.0/docs/explanations/pdf-extraction-challenges.md +203 -0
  22. natural_pdf-0.1.0/docs/explanations/pdf-fonts.md +214 -0
  23. natural_pdf-0.1.0/docs/index.md +310 -0
  24. natural_pdf-0.1.0/docs/installation/index.md +70 -0
  25. natural_pdf-0.1.0/docs/interactive-widget/index.md +0 -0
  26. natural_pdf-0.1.0/docs/layout-analysis/index.md +301 -0
  27. natural_pdf-0.1.0/docs/ocr/index.md +232 -0
  28. natural_pdf-0.1.0/docs/pdf-navigation/index.md +255 -0
  29. natural_pdf-0.1.0/docs/regions/index.md +302 -0
  30. natural_pdf-0.1.0/docs/tables/index.md +359 -0
  31. natural_pdf-0.1.0/docs/text-analysis/index.md +103 -0
  32. natural_pdf-0.1.0/docs/text-extraction/index.md +426 -0
  33. natural_pdf-0.1.0/docs/tutorials/01-loading-and-extraction.ipynb +291 -0
  34. natural_pdf-0.1.0/docs/tutorials/01-loading-and-extraction.md +87 -0
  35. natural_pdf-0.1.0/docs/tutorials/02-finding-elements.ipynb +318 -0
  36. natural_pdf-0.1.0/docs/tutorials/02-finding-elements.md +140 -0
  37. natural_pdf-0.1.0/docs/tutorials/03-extracting-blocks.ipynb +127 -0
  38. natural_pdf-0.1.0/docs/tutorials/03-extracting-blocks.md +41 -0
  39. natural_pdf-0.1.0/docs/tutorials/04-table-extraction.ipynb +94 -0
  40. natural_pdf-0.1.0/docs/tutorials/04-table-extraction.md +43 -0
  41. natural_pdf-0.1.0/docs/tutorials/05-excluding-content.ipynb +222 -0
  42. natural_pdf-0.1.0/docs/tutorials/05-excluding-content.md +98 -0
  43. natural_pdf-0.1.0/docs/tutorials/06-document-qa.ipynb +312 -0
  44. natural_pdf-0.1.0/docs/tutorials/06-document-qa.md +84 -0
  45. natural_pdf-0.1.0/docs/tutorials/07-layout-analysis.ipynb +240 -0
  46. natural_pdf-0.1.0/docs/tutorials/07-layout-analysis.md +59 -0
  47. natural_pdf-0.1.0/docs/tutorials/07-working-with-regions.ipynb +392 -0
  48. natural_pdf-0.1.0/docs/tutorials/07-working-with-regions.md +147 -0
  49. natural_pdf-0.1.0/docs/tutorials/08-spatial-navigation.ipynb +491 -0
  50. natural_pdf-0.1.0/docs/tutorials/08-spatial-navigation.md +186 -0
  51. natural_pdf-0.1.0/docs/tutorials/09-section-extraction.ipynb +2418 -0
  52. natural_pdf-0.1.0/docs/tutorials/09-section-extraction.md +252 -0
  53. natural_pdf-0.1.0/docs/tutorials/10-form-field-extraction.ipynb +467 -0
  54. natural_pdf-0.1.0/docs/tutorials/10-form-field-extraction.md +197 -0
  55. natural_pdf-0.1.0/docs/tutorials/11-enhanced-table-processing.ipynb +37 -0
  56. natural_pdf-0.1.0/docs/tutorials/11-enhanced-table-processing.md +5 -0
  57. natural_pdf-0.1.0/docs/tutorials/12-ocr-integration.ipynb +506 -0
  58. natural_pdf-0.1.0/docs/tutorials/12-ocr-integration.md +165 -0
  59. natural_pdf-0.1.0/docs/tutorials/README.ipynb +83 -0
  60. natural_pdf-0.1.0/docs/tutorials/README.md +51 -0
  61. natural_pdf-0.1.0/docs/visual-debugging/index.md +223 -0
  62. natural_pdf-0.1.0/examples/__init__.py +3 -0
  63. natural_pdf-0.1.0/examples/another_exclusion_example.py +20 -0
  64. natural_pdf-0.1.0/examples/basic_usage.py +190 -0
  65. natural_pdf-0.1.0/examples/boundary_exclusion_test.py +137 -0
  66. natural_pdf-0.1.0/examples/boundary_inclusion_fix_test.py +157 -0
  67. natural_pdf-0.1.0/examples/chainable_layout_example.py +70 -0
  68. natural_pdf-0.1.0/examples/color_basic_test.py +49 -0
  69. natural_pdf-0.1.0/examples/color_name_example.py +71 -0
  70. natural_pdf-0.1.0/examples/color_test.py +62 -0
  71. natural_pdf-0.1.0/examples/debug_ocr.py +91 -0
  72. natural_pdf-0.1.0/examples/direct_ocr_test.py +148 -0
  73. natural_pdf-0.1.0/examples/direct_paddle_test.py +99 -0
  74. natural_pdf-0.1.0/examples/direct_qa_example.py +71 -0
  75. natural_pdf-0.1.0/examples/docling_comprehensive_test.py +325 -0
  76. natural_pdf-0.1.0/examples/docling_example.py +192 -0
  77. natural_pdf-0.1.0/examples/docling_hierarchy_example.py +230 -0
  78. natural_pdf-0.1.0/examples/docling_text_sources.py +241 -0
  79. natural_pdf-0.1.0/examples/document_layout_analysis.py +123 -0
  80. natural_pdf-0.1.0/examples/document_qa_example.py +185 -0
  81. natural_pdf-0.1.0/examples/exclusion_count_debug.py +128 -0
  82. natural_pdf-0.1.0/examples/exclusion_debug.py +107 -0
  83. natural_pdf-0.1.0/examples/exclusion_example.py +150 -0
  84. natural_pdf-0.1.0/examples/exclusion_optimization_example.py +190 -0
  85. natural_pdf-0.1.0/examples/extract_text_test.py +128 -0
  86. natural_pdf-0.1.0/examples/font_aware_example.py +101 -0
  87. natural_pdf-0.1.0/examples/font_variant_example.py +124 -0
  88. natural_pdf-0.1.0/examples/footer_overlap_test.py +124 -0
  89. natural_pdf-0.1.0/examples/highlight_all_example.py +82 -0
  90. natural_pdf-0.1.0/examples/highlight_attributes_test.py +114 -0
  91. natural_pdf-0.1.0/examples/highlight_confidence_display.py +122 -0
  92. natural_pdf-0.1.0/examples/highlight_demo.py +110 -0
  93. natural_pdf-0.1.0/examples/highlight_float_test.py +71 -0
  94. natural_pdf-0.1.0/examples/highlight_test.py +147 -0
  95. natural_pdf-0.1.0/examples/highlighting_example.py +123 -0
  96. natural_pdf-0.1.0/examples/image_width_example.py +84 -0
  97. natural_pdf-0.1.0/examples/improved_api_example.py +128 -0
  98. natural_pdf-0.1.0/examples/improved_qa_example.py +66 -0
  99. natural_pdf-0.1.0/examples/layout_confidence_display_test.py +65 -0
  100. natural_pdf-0.1.0/examples/layout_confidence_test.py +82 -0
  101. natural_pdf-0.1.0/examples/layout_coordinate_debug.py +258 -0
  102. natural_pdf-0.1.0/examples/layout_highlight_test.py +77 -0
  103. natural_pdf-0.1.0/examples/logging_example.py +70 -0
  104. natural_pdf-0.1.0/examples/ocr_comprehensive.py +193 -0
  105. natural_pdf-0.1.0/examples/ocr_debug_example.py +87 -0
  106. natural_pdf-0.1.0/examples/ocr_default_test.py +97 -0
  107. natural_pdf-0.1.0/examples/ocr_engine_comparison.py +235 -0
  108. natural_pdf-0.1.0/examples/ocr_example.py +89 -0
  109. natural_pdf-0.1.0/examples/ocr_simplified_params.py +79 -0
  110. natural_pdf-0.1.0/examples/ocr_visualization.py +102 -0
  111. natural_pdf-0.1.0/examples/ocr_visualization_test.py +121 -0
  112. natural_pdf-0.1.0/examples/paddle_layout_example.py +315 -0
  113. natural_pdf-0.1.0/examples/paddle_layout_simple.py +74 -0
  114. natural_pdf-0.1.0/examples/paddleocr_example.py +224 -0
  115. natural_pdf-0.1.0/examples/page_collection_example.py +103 -0
  116. natural_pdf-0.1.0/examples/polygon_highlight_example.py +83 -0
  117. natural_pdf-0.1.0/examples/position_methods_example.py +134 -0
  118. natural_pdf-0.1.0/examples/position_output/position_methods.png +0 -0
  119. natural_pdf-0.1.0/examples/region_boundary_test.py +73 -0
  120. natural_pdf-0.1.0/examples/region_exclusion_test.py +149 -0
  121. natural_pdf-0.1.0/examples/region_expand_example.py +109 -0
  122. natural_pdf-0.1.0/examples/region_image_example.py +116 -0
  123. natural_pdf-0.1.0/examples/region_ocr_test.py +119 -0
  124. natural_pdf-0.1.0/examples/region_sections_example.py +115 -0
  125. natural_pdf-0.1.0/examples/school_books.py +49 -0
  126. natural_pdf-0.1.0/examples/school_books_all.py +52 -0
  127. natural_pdf-0.1.0/examples/scouring.py +36 -0
  128. natural_pdf-0.1.0/examples/section_extraction_example.py +232 -0
  129. natural_pdf-0.1.0/examples/section_output/headings.png +0 -0
  130. natural_pdf-0.1.0/examples/section_output/section_1.png +0 -0
  131. natural_pdf-0.1.0/examples/section_output/section_2.png +0 -0
  132. natural_pdf-0.1.0/examples/section_output/section_3.png +0 -0
  133. natural_pdf-0.1.0/examples/section_output/section_4.png +0 -0
  134. natural_pdf-0.1.0/examples/section_output/section_5.png +0 -0
  135. natural_pdf-0.1.0/examples/section_output/section_6.png +0 -0
  136. natural_pdf-0.1.0/examples/section_output/sections_no_grouping.png +0 -0
  137. natural_pdf-0.1.0/examples/section_output/sections_with_grouping.png +0 -0
  138. natural_pdf-0.1.0/examples/separator_output/sections_both.png +0 -0
  139. natural_pdf-0.1.0/examples/separator_output/sections_end.png +0 -0
  140. natural_pdf-0.1.0/examples/separator_output/sections_none.png +0 -0
  141. natural_pdf-0.1.0/examples/separator_output/sections_start.png +0 -0
  142. natural_pdf-0.1.0/examples/separator_output/separators.png +0 -0
  143. natural_pdf-0.1.0/examples/simple_document_qa.py +97 -0
  144. natural_pdf-0.1.0/examples/spatial_navigation_example.py +108 -0
  145. natural_pdf-0.1.0/examples/start_end_output/elements.png +0 -0
  146. natural_pdf-0.1.0/examples/table_extraction_example.py +135 -0
  147. natural_pdf-0.1.0/examples/table_structure_detection.py +155 -0
  148. natural_pdf-0.1.0/examples/tatr_cells_test.py +56 -0
  149. natural_pdf-0.1.0/examples/tatr_ocr_table_test.py +94 -0
  150. natural_pdf-0.1.0/examples/text_search_example.py +122 -0
  151. natural_pdf-0.1.0/examples/text_style_example.py +109 -0
  152. natural_pdf-0.1.0/examples/tiny-text.py +61 -0
  153. natural_pdf-0.1.0/examples/until_boundaries_example.py +156 -0
  154. natural_pdf-0.1.0/examples/until_example.py +112 -0
  155. natural_pdf-0.1.0/examples/until_output/until_boundaries_headings.png +0 -0
  156. natural_pdf-0.1.0/examples/url_pdf_example.py +45 -0
  157. natural_pdf-0.1.0/examples/very_basics.py +15 -0
  158. natural_pdf-0.1.0/mkdocs.yml +136 -0
  159. natural_pdf-0.1.0/natural_pdf/__init__.py +55 -0
  160. natural_pdf-0.1.0/natural_pdf/analyzers/__init__.py +6 -0
  161. natural_pdf-0.1.0/natural_pdf/analyzers/layout/__init__.py +1 -0
  162. natural_pdf-0.1.0/natural_pdf/analyzers/layout/base.py +151 -0
  163. natural_pdf-0.1.0/natural_pdf/analyzers/layout/docling.py +247 -0
  164. natural_pdf-0.1.0/natural_pdf/analyzers/layout/layout_analyzer.py +166 -0
  165. natural_pdf-0.1.0/natural_pdf/analyzers/layout/layout_manager.py +200 -0
  166. natural_pdf-0.1.0/natural_pdf/analyzers/layout/layout_options.py +78 -0
  167. natural_pdf-0.1.0/natural_pdf/analyzers/layout/paddle.py +240 -0
  168. natural_pdf-0.1.0/natural_pdf/analyzers/layout/surya.py +151 -0
  169. natural_pdf-0.1.0/natural_pdf/analyzers/layout/tatr.py +251 -0
  170. natural_pdf-0.1.0/natural_pdf/analyzers/layout/yolo.py +165 -0
  171. natural_pdf-0.1.0/natural_pdf/analyzers/text_options.py +60 -0
  172. natural_pdf-0.1.0/natural_pdf/analyzers/text_structure.py +270 -0
  173. natural_pdf-0.1.0/natural_pdf/analyzers/utils.py +57 -0
  174. natural_pdf-0.1.0/natural_pdf/core/__init__.py +3 -0
  175. natural_pdf-0.1.0/natural_pdf/core/element_manager.py +457 -0
  176. natural_pdf-0.1.0/natural_pdf/core/highlighting_service.py +698 -0
  177. natural_pdf-0.1.0/natural_pdf/core/page.py +1444 -0
  178. natural_pdf-0.1.0/natural_pdf/core/pdf.py +653 -0
  179. natural_pdf-0.1.0/natural_pdf/elements/__init__.py +3 -0
  180. natural_pdf-0.1.0/natural_pdf/elements/base.py +761 -0
  181. natural_pdf-0.1.0/natural_pdf/elements/collections.py +1345 -0
  182. natural_pdf-0.1.0/natural_pdf/elements/line.py +140 -0
  183. natural_pdf-0.1.0/natural_pdf/elements/rect.py +122 -0
  184. natural_pdf-0.1.0/natural_pdf/elements/region.py +1793 -0
  185. natural_pdf-0.1.0/natural_pdf/elements/text.py +304 -0
  186. natural_pdf-0.1.0/natural_pdf/ocr/__init__.py +56 -0
  187. natural_pdf-0.1.0/natural_pdf/ocr/engine.py +104 -0
  188. natural_pdf-0.1.0/natural_pdf/ocr/engine_easyocr.py +179 -0
  189. natural_pdf-0.1.0/natural_pdf/ocr/engine_paddle.py +204 -0
  190. natural_pdf-0.1.0/natural_pdf/ocr/engine_surya.py +171 -0
  191. natural_pdf-0.1.0/natural_pdf/ocr/ocr_manager.py +191 -0
  192. natural_pdf-0.1.0/natural_pdf/ocr/ocr_options.py +114 -0
  193. natural_pdf-0.1.0/natural_pdf/qa/__init__.py +3 -0
  194. natural_pdf-0.1.0/natural_pdf/qa/document_qa.py +396 -0
  195. natural_pdf-0.1.0/natural_pdf/selectors/__init__.py +4 -0
  196. natural_pdf-0.1.0/natural_pdf/selectors/parser.py +354 -0
  197. natural_pdf-0.1.0/natural_pdf/templates/__init__.py +1 -0
  198. natural_pdf-0.1.0/natural_pdf/templates/ocr_debug.html +517 -0
  199. natural_pdf-0.1.0/natural_pdf/utils/__init__.py +3 -0
  200. natural_pdf-0.1.0/natural_pdf/utils/highlighting.py +12 -0
  201. natural_pdf-0.1.0/natural_pdf/utils/reading_order.py +227 -0
  202. natural_pdf-0.1.0/natural_pdf/utils/visualization.py +223 -0
  203. natural_pdf-0.1.0/natural_pdf/widgets/__init__.py +4 -0
  204. natural_pdf-0.1.0/natural_pdf/widgets/frontend/viewer.js +88 -0
  205. natural_pdf-0.1.0/natural_pdf/widgets/viewer.py +765 -0
  206. natural_pdf-0.1.0/natural_pdf.egg-info/PKG-INFO +295 -0
  207. natural_pdf-0.1.0/natural_pdf.egg-info/SOURCES.txt +322 -0
  208. natural_pdf-0.1.0/natural_pdf.egg-info/dependency_links.txt +1 -0
  209. natural_pdf-0.1.0/natural_pdf.egg-info/requires.txt +35 -0
  210. natural_pdf-0.1.0/natural_pdf.egg-info/top_level.txt +1 -0
  211. natural_pdf-0.1.0/notebooks/Examples.ipynb +1166 -0
  212. natural_pdf-0.1.0/output/all_detected_regions.png +0 -0
  213. natural_pdf-0.1.0/output/all_elements.png +0 -0
  214. natural_pdf-0.1.0/output/basic_highlighting.png +0 -0
  215. natural_pdf-0.1.0/output/chainable_layout.png +0 -0
  216. natural_pdf-0.1.0/output/chained_analysis.png +0 -0
  217. natural_pdf-0.1.0/output/color_names.png +0 -0
  218. natural_pdf-0.1.0/output/color_names_with_boxes.png +0 -0
  219. natural_pdf-0.1.0/output/conf_display_highlight_all.png +0 -0
  220. natural_pdf-0.1.0/output/conf_display_highlight_layout.png +0 -0
  221. natural_pdf-0.1.0/output/conf_display_layout_only.png +0 -0
  222. natural_pdf-0.1.0/output/confidence_color_coded.png +0 -0
  223. natural_pdf-0.1.0/output/debug_page_image.png +0 -0
  224. natural_pdf-0.1.0/output/detected_table.png +0 -0
  225. natural_pdf-0.1.0/output/dimension_analysis.txt +48 -0
  226. natural_pdf-0.1.0/output/direct_ocr_debug.png +0 -0
  227. natural_pdf-0.1.0/output/easyocr_debug_input.png +0 -0
  228. natural_pdf-0.1.0/output/easyocr_results.png +0 -0
  229. natural_pdf-0.1.0/output/easyocr_test_input.png +0 -0
  230. natural_pdf-0.1.0/output/exclusion_optimization_regions.png +0 -0
  231. natural_pdf-0.1.0/output/explicit_confidence_display.png +0 -0
  232. natural_pdf-0.1.0/output/footer_overlap_test.png +0 -0
  233. natural_pdf-0.1.0/output/highlight_all.png +0 -0
  234. natural_pdf-0.1.0/output/highlight_all_styles.png +0 -0
  235. natural_pdf-0.1.0/output/highlight_all_with_all_layouts.png +0 -0
  236. natural_pdf-0.1.0/output/highlight_all_with_attrs.png +0 -0
  237. natural_pdf-0.1.0/output/highlight_all_with_yolo.png +0 -0
  238. natural_pdf-0.1.0/output/highlight_by_confidence.png +0 -0
  239. natural_pdf-0.1.0/output/highlight_color_test_1.png +0 -0
  240. natural_pdf-0.1.0/output/highlight_color_test_2.png +0 -0
  241. natural_pdf-0.1.0/output/highlight_color_test_3.png +0 -0
  242. natural_pdf-0.1.0/output/highlight_color_test_4.png +0 -0
  243. natural_pdf-0.1.0/output/highlight_layout_method.png +0 -0
  244. natural_pdf-0.1.0/output/highlight_multiple.png +0 -0
  245. natural_pdf-0.1.0/output/highlight_no_attrs.png +0 -0
  246. natural_pdf-0.1.0/output/highlight_region.png +0 -0
  247. natural_pdf-0.1.0/output/highlight_single.png +0 -0
  248. natural_pdf-0.1.0/output/highlight_specific_types.png +0 -0
  249. natural_pdf-0.1.0/output/highlight_specific_types_with_boxes.png +0 -0
  250. natural_pdf-0.1.0/output/highlight_specific_types_with_tables.png +0 -0
  251. natural_pdf-0.1.0/output/highlight_test.png +0 -0
  252. natural_pdf-0.1.0/output/highlight_test_colors.png +0 -0
  253. natural_pdf-0.1.0/output/highlight_test_individual.png +0 -0
  254. natural_pdf-0.1.0/output/highlight_test_individual_annotated.png +0 -0
  255. natural_pdf-0.1.0/output/highlight_test_individual_with_structure.png +0 -0
  256. natural_pdf-0.1.0/output/highlight_test_individual_with_structure_yolo.png +0 -0
  257. natural_pdf-0.1.0/output/highlight_test_individual_with_tables.png +0 -0
  258. natural_pdf-0.1.0/output/highlight_with_attrs.png +0 -0
  259. natural_pdf-0.1.0/output/layout_conf_default.png +0 -0
  260. natural_pdf-0.1.0/output/layout_conf_high.png +0 -0
  261. natural_pdf-0.1.0/output/layout_detection.png +0 -0
  262. natural_pdf-0.1.0/output/layout_fix_test.png +0 -0
  263. natural_pdf-0.1.0/output/layout_fix_test2.png +0 -0
  264. natural_pdf-0.1.0/output/layout_fix_test3.png +0 -0
  265. natural_pdf-0.1.0/output/layout_fix_test4.png +0 -0
  266. natural_pdf-0.1.0/output/model_comparison.png +0 -0
  267. natural_pdf-0.1.0/output/multiple_attributes_display.png +0 -0
  268. natural_pdf-0.1.0/output/ocr_confidence_visualization.png +0 -0
  269. natural_pdf-0.1.0/output/ocr_debug.png +0 -0
  270. natural_pdf-0.1.0/output/ocr_debug_page.html +517 -0
  271. natural_pdf-0.1.0/output/ocr_highlight_all_test.png +0 -0
  272. natural_pdf-0.1.0/output/ocr_highlight_test.png +0 -0
  273. natural_pdf-0.1.0/output/ocr_highlighted.png +0 -0
  274. natural_pdf-0.1.0/output/ocr_simplified.png +0 -0
  275. natural_pdf-0.1.0/output/ocr_threshold_comparison.png +0 -0
  276. natural_pdf-0.1.0/output/ocr_visualization_clean.png +0 -0
  277. natural_pdf-0.1.0/output/ocr_visualization_highlights.png +0 -0
  278. natural_pdf-0.1.0/output/ocr_visualization_text.png +0 -0
  279. natural_pdf-0.1.0/output/paddle_layout_detection.png +0 -0
  280. natural_pdf-0.1.0/output/paddle_layout_polygons.png +0 -0
  281. natural_pdf-0.1.0/output/paddle_layout_sources.png +0 -0
  282. natural_pdf-0.1.0/output/paddle_layout_with_text.png +0 -0
  283. natural_pdf-0.1.0/output/paddle_layout_without_text.png +0 -0
  284. natural_pdf-0.1.0/output/paddleocr_highlights.png +0 -0
  285. natural_pdf-0.1.0/output/paddleocr_results.png +0 -0
  286. natural_pdf-0.1.0/output/paddleocr_test_input.png +0 -0
  287. natural_pdf-0.1.0/output/page_1_for_ocr.png +0 -0
  288. natural_pdf-0.1.0/output/page_4_for_ocr.png +0 -0
  289. natural_pdf-0.1.0/output/region_exclusion_test.png +0 -0
  290. natural_pdf-0.1.0/output/region_management_test.png +0 -0
  291. natural_pdf-0.1.0/output/region_ocr_cropped.png +0 -0
  292. natural_pdf-0.1.0/output/region_ocr_debug.png +0 -0
  293. natural_pdf-0.1.0/output/region_ocr_full_page.png +0 -0
  294. natural_pdf-0.1.0/output/region_ocr_highlighted.png +0 -0
  295. natural_pdf-0.1.0/output/spatial_navigation.png +0 -0
  296. natural_pdf-0.1.0/output/standard_highlight_all.png +0 -0
  297. natural_pdf-0.1.0/output/table_no_ocr.csv +54 -0
  298. natural_pdf-0.1.0/output/table_structure.png +0 -0
  299. natural_pdf-0.1.0/output/table_structure_detail.png +0 -0
  300. natural_pdf-0.1.0/output/table_with_ocr.csv +54 -0
  301. natural_pdf-0.1.0/output/tatr_cells_test.png +0 -0
  302. natural_pdf-0.1.0/output/tatr_ocr_table_test.png +0 -0
  303. natural_pdf-0.1.0/output/tatr_regions.png +0 -0
  304. natural_pdf-0.1.0/output/tatr_regions.txt +16 -0
  305. natural_pdf-0.1.0/output/text_styles.png +0 -0
  306. natural_pdf-0.1.0/output/titles_only.png +0 -0
  307. natural_pdf-0.1.0/output/width_1200px.png +0 -0
  308. natural_pdf-0.1.0/output/width_800px.png +0 -0
  309. natural_pdf-0.1.0/output/width_default.png +0 -0
  310. natural_pdf-0.1.0/output/width_with_scale.png +0 -0
  311. natural_pdf-0.1.0/output/yolo_regions.png +0 -0
  312. natural_pdf-0.1.0/output/yolo_regions.txt +9 -0
  313. natural_pdf-0.1.0/pdfs/.gitkeep +0 -0
  314. natural_pdf-0.1.0/pdfs/01-practice.pdf +543 -0
  315. natural_pdf-0.1.0/pdfs/0500000US42001.pdf +0 -0
  316. natural_pdf-0.1.0/pdfs/0500000US42007.pdf +0 -0
  317. natural_pdf-0.1.0/pdfs/2014 Statistics.pdf +0 -0
  318. natural_pdf-0.1.0/pdfs/2019 Statistics.pdf +0 -0
  319. natural_pdf-0.1.0/pdfs/Atlanta_Public_Schools_GA_sample.pdf +0 -0
  320. natural_pdf-0.1.0/pdfs/needs-ocr.pdf +0 -0
  321. natural_pdf-0.1.0/publish.sh +58 -0
  322. natural_pdf-0.1.0/pyproject.toml +81 -0
  323. natural_pdf-0.1.0/run_all_tutorials.sh +31 -0
  324. natural_pdf-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,40 @@
1
+ name: Build and deploy docs
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - 'docs/**'
9
+ - 'mkdocs.yml'
10
+ - '.github/workflows/docs.yml'
11
+
12
+ permissions:
13
+ contents: write
14
+
15
+ jobs:
16
+ build-and-deploy:
17
+ runs-on: ubuntu-latest
18
+ steps:
19
+ - name: Checkout repository
20
+ uses: actions/checkout@v3
21
+
22
+ - name: Set up Python
23
+ uses: actions/setup-python@v4
24
+ with:
25
+ python-version: '3.10'
26
+
27
+ - name: Install dependencies
28
+ run: |
29
+ python -m pip install --upgrade pip
30
+ pip install mkdocs-material mkdocs pymdown-extensions mkdocstrings mkdocstrings-python mkdocs-jupyter
31
+ pip install -e .
32
+
33
+ - name: Build docs
34
+ run: mkdocs build
35
+
36
+ - name: Deploy to GitHub Pages
37
+ uses: JamesIves/github-pages-deploy-action@v4
38
+ with:
39
+ folder: site
40
+ branch: gh-pages
@@ -0,0 +1,266 @@
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,macos,visualstudiocode,jupyternotebooks
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,visualstudiocode,jupyternotebooks
3
+
4
+ ### JupyterNotebooks ###
5
+ # gitignore template for Jupyter Notebooks
6
+ # website: http://jupyter.org/
7
+
8
+ .ipynb_checkpoints
9
+ */.ipynb_checkpoints/*
10
+
11
+ # IPython
12
+ profile_default/
13
+ ipython_config.py
14
+
15
+ # Remove previous ipynb_checkpoints
16
+ # git rm -r .ipynb_checkpoints/
17
+
18
+ ### macOS ###
19
+ # General
20
+ .DS_Store
21
+ .AppleDouble
22
+ .LSOverride
23
+
24
+ # Icon must end with two \r
25
+ Icon
26
+
27
+
28
+ # Thumbnails
29
+ ._*
30
+
31
+ # Files that might appear in the root of a volume
32
+ .DocumentRevisions-V100
33
+ .fseventsd
34
+ .Spotlight-V100
35
+ .TemporaryItems
36
+ .Trashes
37
+ .VolumeIcon.icns
38
+ .com.apple.timemachine.donotpresent
39
+
40
+ # Directories potentially created on remote AFP share
41
+ .AppleDB
42
+ .AppleDesktop
43
+ Network Trash Folder
44
+ Temporary Items
45
+ .apdisk
46
+
47
+ ### macOS Patch ###
48
+ # iCloud generated files
49
+ *.icloud
50
+
51
+ ### Python ###
52
+ # Byte-compiled / optimized / DLL files
53
+ __pycache__/
54
+ *.py[cod]
55
+ *$py.class
56
+
57
+ # C extensions
58
+ *.so
59
+
60
+ # Distribution / packaging
61
+ .Python
62
+ build/
63
+ develop-eggs/
64
+ dist/
65
+ downloads/
66
+ eggs/
67
+ .eggs/
68
+ lib/
69
+ lib64/
70
+ parts/
71
+ sdist/
72
+ var/
73
+ wheels/
74
+ share/python-wheels/
75
+ *.egg-info/
76
+ .installed.cfg
77
+ *.egg
78
+ MANIFEST
79
+
80
+ # PyInstaller
81
+ # Usually these files are written by a python script from a template
82
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
83
+ *.manifest
84
+ *.spec
85
+
86
+ # Installer logs
87
+ pip-log.txt
88
+ pip-delete-this-directory.txt
89
+
90
+ # Unit test / coverage reports
91
+ htmlcov/
92
+ .tox/
93
+ .nox/
94
+ .coverage
95
+ .coverage.*
96
+ .cache
97
+ nosetests.xml
98
+ coverage.xml
99
+ *.cover
100
+ *.py,cover
101
+ .hypothesis/
102
+ .pytest_cache/
103
+ cover/
104
+
105
+ # Translations
106
+ *.mo
107
+ *.pot
108
+
109
+ # Django stuff:
110
+ *.log
111
+ local_settings.py
112
+ db.sqlite3
113
+ db.sqlite3-journal
114
+
115
+ # Flask stuff:
116
+ instance/
117
+ .webassets-cache
118
+
119
+ # Scrapy stuff:
120
+ .scrapy
121
+
122
+ # Sphinx documentation
123
+ docs/_build/
124
+
125
+ # PyBuilder
126
+ .pybuilder/
127
+ target/
128
+
129
+ # Jupyter Notebook
130
+
131
+ # IPython
132
+
133
+ # pyenv
134
+ # For a library or package, you might want to ignore these files since the code is
135
+ # intended to run in multiple environments; otherwise, check them in:
136
+ # .python-version
137
+
138
+ # pipenv
139
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
140
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
141
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
142
+ # install all needed dependencies.
143
+ #Pipfile.lock
144
+
145
+ # poetry
146
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
147
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
148
+ # commonly ignored for libraries.
149
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
150
+ #poetry.lock
151
+
152
+ # pdm
153
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
154
+ #pdm.lock
155
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
156
+ # in version control.
157
+ # https://pdm.fming.dev/#use-with-ide
158
+ .pdm.toml
159
+
160
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
161
+ __pypackages__/
162
+
163
+ # Celery stuff
164
+ celerybeat-schedule
165
+ celerybeat.pid
166
+
167
+ # SageMath parsed files
168
+ *.sage.py
169
+
170
+ # Environments
171
+ .env
172
+ .venv
173
+ env/
174
+ venv/
175
+ ENV/
176
+ env.bak/
177
+ venv.bak/
178
+
179
+ # Spyder project settings
180
+ .spyderproject
181
+ .spyproject
182
+
183
+ # Rope project settings
184
+ .ropeproject
185
+
186
+ # mkdocs documentation
187
+ /site
188
+
189
+ # mypy
190
+ .mypy_cache/
191
+ .dmypy.json
192
+ dmypy.json
193
+
194
+ # Pyre type checker
195
+ .pyre/
196
+
197
+ # pytype static type analyzer
198
+ .pytype/
199
+
200
+ # Cython debug symbols
201
+ cython_debug/
202
+
203
+ # PyCharm
204
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
205
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
206
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
207
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
208
+ #.idea/
209
+
210
+ ### Python Patch ###
211
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
212
+ poetry.toml
213
+
214
+ # ruff
215
+ .ruff_cache/
216
+
217
+ # LSP config files
218
+ pyrightconfig.json
219
+
220
+ ### VisualStudioCode ###
221
+ .vscode/*
222
+ !.vscode/settings.json
223
+ !.vscode/tasks.json
224
+ !.vscode/launch.json
225
+ !.vscode/extensions.json
226
+ !.vscode/*.code-snippets
227
+
228
+ # Local History for Visual Studio Code
229
+ .history/
230
+
231
+ # Built Visual Studio Code Extensions
232
+ *.vsix
233
+
234
+ ### VisualStudioCode Patch ###
235
+ # Ignore all local history of files
236
+ .history
237
+ .ionide
238
+
239
+ # End of https://www.toptal.com/developers/gitignore/api/python,macos,visualstudiocode,jupyternotebooks
240
+
241
+ # Project-specific additions
242
+ # Only exclude large PDFs that we don't want to track
243
+ pdfs/Nigeria*.pdf
244
+ pdfs/HARRY*.pdf
245
+ # But keep other PDFs
246
+ # Ensure directory exists
247
+ !pdfs/.gitkeep
248
+
249
+ # Output files
250
+ output/*
251
+ # Ensure directory exists
252
+ !output/.gitkeep
253
+
254
+ # MkDocs generated site
255
+ site/
256
+
257
+ # Virtual environments
258
+ venv/
259
+ mkdocs-venv/
260
+ .venv/
261
+ env/
262
+
263
+ # PyPI distribution files
264
+ dist/
265
+ build/
266
+ *.egg-info/