deepdoctection 0.26__tar.gz → 0.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (238) hide show
  1. {deepdoctection-0.26 → deepdoctection-0.27}/PKG-INFO +128 -9
  2. {deepdoctection-0.26 → deepdoctection-0.27}/README.md +7 -7
  3. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/__init__.py +7 -1
  4. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/analyzer/dd.py +15 -3
  5. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/configs/conf_dd_one.yaml +4 -0
  6. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/convert.py +5 -10
  7. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/image.py +2 -2
  8. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/view.py +38 -18
  9. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/save.py +3 -3
  10. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/d2detect.py +1 -2
  11. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/doctrocr.py +14 -9
  12. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/common.py +2 -3
  13. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +6 -6
  14. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +3 -3
  15. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -2
  16. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +5 -3
  17. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +3 -1
  18. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/predict.py +1 -0
  19. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/laylmstruct.py +2 -3
  20. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/context.py +2 -2
  21. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/file_utils.py +63 -26
  22. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/fs.py +6 -6
  23. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/pdf_utils.py +2 -2
  24. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/settings.py +8 -1
  25. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/transform.py +9 -9
  26. deepdoctection-0.27/deepdoctection/utils/viz.py +659 -0
  27. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/PKG-INFO +128 -9
  28. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/requires.txt +9 -10
  29. {deepdoctection-0.26 → deepdoctection-0.27}/setup.cfg +1 -2
  30. {deepdoctection-0.26 → deepdoctection-0.27}/setup.py +11 -9
  31. {deepdoctection-0.26 → deepdoctection-0.27}/tests/analyzer/test_dd.py +6 -57
  32. {deepdoctection-0.26 → deepdoctection-0.27}/tests/conftest.py +2 -0
  33. deepdoctection-0.26/deepdoctection/utils/viz.py +0 -340
  34. {deepdoctection-0.26 → deepdoctection-0.27}/LICENSE +0 -0
  35. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/analyzer/__init__.py +0 -0
  36. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/configs/__init__.py +0 -0
  37. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  38. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/__init__.py +0 -0
  39. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/base.py +0 -0
  40. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/common.py +0 -0
  41. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/custom.py +0 -0
  42. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/custom_serialize.py +0 -0
  43. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/parallel_map.py +0 -0
  44. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/serialize.py +0 -0
  45. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/stats.py +0 -0
  46. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/__init__.py +0 -0
  47. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/annotation.py +0 -0
  48. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/box.py +0 -0
  49. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/__init__.py +0 -0
  50. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/adapter.py +0 -0
  51. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/base.py +0 -0
  52. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/dataflow_builder.py +0 -0
  53. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/info.py +0 -0
  54. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/__init__.py +0 -0
  55. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  56. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  57. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/funsd.py +0 -0
  58. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  59. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/layouttest.py +0 -0
  60. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/publaynet.py +0 -0
  61. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  62. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  63. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  64. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/xfund.py +0 -0
  65. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  66. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  67. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/registry.py +0 -0
  68. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/__init__.py +0 -0
  69. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/accmetric.py +0 -0
  70. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/base.py +0 -0
  71. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/cocometric.py +0 -0
  72. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/eval.py +0 -0
  73. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/registry.py +0 -0
  74. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/tedsmetric.py +0 -0
  75. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/tp_eval_callback.py +0 -0
  76. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/__init__.py +0 -0
  77. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/base.py +0 -0
  78. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/deskew.py +0 -0
  79. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/fastlang.py +0 -0
  80. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/hfdetr.py +0 -0
  81. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/hflayoutlm.py +0 -0
  82. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/model.py +0 -0
  83. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/pdftext.py +0 -0
  84. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/pt/__init__.py +0 -0
  85. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/pt/nms.py +0 -0
  86. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/pt/ptutils.py +0 -0
  87. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tessocr.py +0 -0
  88. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/texocr.py +0 -0
  89. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/__init__.py +0 -0
  90. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tfutils.py +0 -0
  91. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpcompat.py +0 -0
  92. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  93. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  94. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  95. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  96. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  97. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  98. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  99. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  100. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  101. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  102. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  103. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tpdetect.py +0 -0
  104. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/__init__.py +0 -0
  105. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/cats.py +0 -0
  106. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/cocostruct.py +0 -0
  107. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/d2struct.py +0 -0
  108. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/hfstruct.py +0 -0
  109. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/maputils.py +0 -0
  110. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/match.py +0 -0
  111. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/misc.py +0 -0
  112. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/pascalstruct.py +0 -0
  113. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/prodigystruct.py +0 -0
  114. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/pubstruct.py +0 -0
  115. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/tpstruct.py +0 -0
  116. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/xfundstruct.py +0 -0
  117. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/__init__.py +0 -0
  118. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/anngen.py +0 -0
  119. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/base.py +0 -0
  120. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/cell.py +0 -0
  121. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/common.py +0 -0
  122. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/concurrency.py +0 -0
  123. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/doctectionpipe.py +0 -0
  124. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/language.py +0 -0
  125. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/layout.py +0 -0
  126. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/lm.py +0 -0
  127. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/order.py +0 -0
  128. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/refine.py +0 -0
  129. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/registry.py +0 -0
  130. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/segment.py +0 -0
  131. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/text.py +0 -0
  132. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/transform.py +0 -0
  133. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/py.typed +0 -0
  134. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/__init__.py +0 -0
  135. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/d2_frcnn_train.py +0 -0
  136. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/hf_detr_train.py +0 -0
  137. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/hf_layoutlm_train.py +0 -0
  138. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/tp_frcnn_train.py +0 -0
  139. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/__init__.py +0 -0
  140. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/concurrency.py +0 -0
  141. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/detection_types.py +0 -0
  142. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/develop.py +0 -0
  143. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/identifier.py +0 -0
  144. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/logger.py +0 -0
  145. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/metacfg.py +0 -0
  146. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/systools.py +0 -0
  147. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/tqdm.py +0 -0
  148. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/utils.py +0 -0
  149. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/SOURCES.txt +0 -0
  150. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/dependency_links.txt +0 -0
  151. {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/top_level.txt +0 -0
  152. {deepdoctection-0.26 → deepdoctection-0.27}/tests/__init__.py +0 -0
  153. {deepdoctection-0.26 → deepdoctection-0.27}/tests/analyzer/__init__.py +0 -0
  154. {deepdoctection-0.26 → deepdoctection-0.27}/tests/data.py +0 -0
  155. {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/__init__.py +0 -0
  156. {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/conftest.py +0 -0
  157. {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_common.py +0 -0
  158. {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_custom.py +0 -0
  159. {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_custom_serialize.py +0 -0
  160. {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_parallel_map.py +0 -0
  161. {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_stats.py +0 -0
  162. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/__init__.py +0 -0
  163. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/conftest.py +0 -0
  164. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_annotation.py +0 -0
  165. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_box.py +0 -0
  166. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_convert.py +0 -0
  167. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_image.py +0 -0
  168. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_view.py +0 -0
  169. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/__init__.py +0 -0
  170. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/__init__.py +0 -0
  171. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/conftest.py +0 -0
  172. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_doclaynet.py +0 -0
  173. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_fintabnet.py +0 -0
  174. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_funsd.py +0 -0
  175. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_iiitar13k.py +0 -0
  176. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_layouttest.py +0 -0
  177. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_publaynet.py +0 -0
  178. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_pubtables1m.py +0 -0
  179. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_pubtabnet.py +0 -0
  180. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_rvlcdip.py +0 -0
  181. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/test_adapter.py +0 -0
  182. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/test_info.py +0 -0
  183. {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/test_registry.py +0 -0
  184. {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/__init__.py +0 -0
  185. {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/conftest.py +0 -0
  186. {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_accmetric.py +0 -0
  187. {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_cocometric.py +0 -0
  188. {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_eval.py +0 -0
  189. {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_registry.py +0 -0
  190. {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_tedsmetric.py +0 -0
  191. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/__init__.py +0 -0
  192. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/conftest.py +0 -0
  193. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/data.py +0 -0
  194. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_deskew.py +0 -0
  195. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_doctrocr.py +0 -0
  196. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_fastlang.py +0 -0
  197. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_hfdetr.py +0 -0
  198. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_hflayoutlm.py +0 -0
  199. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_pdftext.py +0 -0
  200. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_tessocr.py +0 -0
  201. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_texocr.py +0 -0
  202. {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_tpdetect.py +0 -0
  203. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/__init__.py +0 -0
  204. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/conftest.py +0 -0
  205. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/data.py +0 -0
  206. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_cats.py +0 -0
  207. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_cocostruct.py +0 -0
  208. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_d2struct.py +0 -0
  209. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_hfstruct.py +0 -0
  210. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_iiitar13k.py +0 -0
  211. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_laylmstruct.py +0 -0
  212. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_misc.py +0 -0
  213. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_prodigystruct.py +0 -0
  214. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_pubstruct.py +0 -0
  215. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_tpstruct.py +0 -0
  216. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_utils.py +0 -0
  217. {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_xfundstruct.py +0 -0
  218. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/__init__.py +0 -0
  219. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_anngen.py +0 -0
  220. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_cell.py +0 -0
  221. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_common.py +0 -0
  222. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_language.py +0 -0
  223. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_layout.py +0 -0
  224. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_lm.py +0 -0
  225. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_order.py +0 -0
  226. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_refine.py +0 -0
  227. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_registry.py +0 -0
  228. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_segment.py +0 -0
  229. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_text.py +0 -0
  230. {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_transform.py +0 -0
  231. {deepdoctection-0.26 → deepdoctection-0.27}/tests/test_utils.py +0 -0
  232. {deepdoctection-0.26 → deepdoctection-0.27}/tests/train/__init__.py +0 -0
  233. {deepdoctection-0.26 → deepdoctection-0.27}/tests/train/conftest.py +0 -0
  234. {deepdoctection-0.26 → deepdoctection-0.27}/tests/train/test_d2_frcnn_train.py +0 -0
  235. {deepdoctection-0.26 → deepdoctection-0.27}/tests/train/test_tp_frcnn_train.py +0 -0
  236. {deepdoctection-0.26 → deepdoctection-0.27}/tests_d2/__init__.py +0 -0
  237. {deepdoctection-0.26 → deepdoctection-0.27}/tests_d2/conftest.py +0 -0
  238. {deepdoctection-0.26 → deepdoctection-0.27}/tests_d2/test_d2detect.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.26
3
+ Version: 0.27
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -13,17 +13,136 @@ Classifier: Programming Language :: Python :: 3.8
13
13
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
14
  Requires-Python: >=3.8
15
15
  Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: catalogue==2.0.7
18
+ Requires-Dist: huggingface_hub>=0.12.0
19
+ Requires-Dist: importlib-metadata>=4.11.2
20
+ Requires-Dist: jsonlines==3.0.0
21
+ Requires-Dist: mock==4.0.3
22
+ Requires-Dist: networkx>=2.7.1
23
+ Requires-Dist: numpy>=1.21
24
+ Requires-Dist: packaging>=20.0
25
+ Requires-Dist: Pillow>=10.0.0
26
+ Requires-Dist: pypdf2
27
+ Requires-Dist: pyyaml==6.0
28
+ Requires-Dist: pyzmq>=16
29
+ Requires-Dist: termcolor>=1.1
30
+ Requires-Dist: tabulate>=0.7.7
31
+ Requires-Dist: tqdm==4.64.0
16
32
  Provides-Extra: tf
33
+ Requires-Dist: catalogue==2.0.7; extra == "tf"
34
+ Requires-Dist: huggingface_hub>=0.12.0; extra == "tf"
35
+ Requires-Dist: importlib-metadata>=4.11.2; extra == "tf"
36
+ Requires-Dist: jsonlines==3.0.0; extra == "tf"
37
+ Requires-Dist: mock==4.0.3; extra == "tf"
38
+ Requires-Dist: networkx>=2.7.1; extra == "tf"
39
+ Requires-Dist: numpy>=1.21; extra == "tf"
40
+ Requires-Dist: packaging>=20.0; extra == "tf"
41
+ Requires-Dist: Pillow>=10.0.0; extra == "tf"
42
+ Requires-Dist: pypdf2; extra == "tf"
43
+ Requires-Dist: pyyaml==6.0; extra == "tf"
44
+ Requires-Dist: pyzmq>=16; extra == "tf"
45
+ Requires-Dist: termcolor>=1.1; extra == "tf"
46
+ Requires-Dist: tabulate>=0.7.7; extra == "tf"
47
+ Requires-Dist: tqdm==4.64.0; extra == "tf"
48
+ Requires-Dist: tensorpack; extra == "tf"
49
+ Requires-Dist: protobuf==3.20.1; extra == "tf"
50
+ Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
51
+ Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
52
+ Requires-Dist: python-doctr==0.7.0; extra == "tf"
53
+ Requires-Dist: pycocotools>=2.0.2; extra == "tf"
54
+ Requires-Dist: boto3; extra == "tf"
55
+ Requires-Dist: pdfplumber>=0.7.1; extra == "tf"
56
+ Requires-Dist: fasttext; extra == "tf"
57
+ Requires-Dist: jdeskew; extra == "tf"
58
+ Requires-Dist: apted==1.0.3; extra == "tf"
59
+ Requires-Dist: distance==0.1.3; extra == "tf"
60
+ Requires-Dist: lxml>=4.9.1; extra == "tf"
17
61
  Provides-Extra: pt
62
+ Requires-Dist: catalogue==2.0.7; extra == "pt"
63
+ Requires-Dist: huggingface_hub>=0.12.0; extra == "pt"
64
+ Requires-Dist: importlib-metadata>=4.11.2; extra == "pt"
65
+ Requires-Dist: jsonlines==3.0.0; extra == "pt"
66
+ Requires-Dist: mock==4.0.3; extra == "pt"
67
+ Requires-Dist: networkx>=2.7.1; extra == "pt"
68
+ Requires-Dist: numpy>=1.21; extra == "pt"
69
+ Requires-Dist: packaging>=20.0; extra == "pt"
70
+ Requires-Dist: Pillow>=10.0.0; extra == "pt"
71
+ Requires-Dist: pypdf2; extra == "pt"
72
+ Requires-Dist: pyyaml==6.0; extra == "pt"
73
+ Requires-Dist: pyzmq>=16; extra == "pt"
74
+ Requires-Dist: termcolor>=1.1; extra == "pt"
75
+ Requires-Dist: tabulate>=0.7.7; extra == "pt"
76
+ Requires-Dist: tqdm==4.64.0; extra == "pt"
77
+ Requires-Dist: timm; extra == "pt"
78
+ Requires-Dist: transformers; extra == "pt"
79
+ Requires-Dist: python-doctr==0.7.0; extra == "pt"
80
+ Requires-Dist: boto3; extra == "pt"
81
+ Requires-Dist: pdfplumber>=0.7.1; extra == "pt"
82
+ Requires-Dist: fasttext; extra == "pt"
83
+ Requires-Dist: jdeskew; extra == "pt"
84
+ Requires-Dist: apted==1.0.3; extra == "pt"
85
+ Requires-Dist: distance==0.1.3; extra == "pt"
86
+ Requires-Dist: lxml>=4.9.1; extra == "pt"
18
87
  Provides-Extra: docs
88
+ Requires-Dist: tensorpack; extra == "docs"
89
+ Requires-Dist: boto3; extra == "docs"
90
+ Requires-Dist: transformers; extra == "docs"
91
+ Requires-Dist: pdfplumber>=0.7.1; extra == "docs"
92
+ Requires-Dist: lxml>=4.9.1; extra == "docs"
93
+ Requires-Dist: lxml-stubs; extra == "docs"
94
+ Requires-Dist: jdeskew; extra == "docs"
95
+ Requires-Dist: jinja2==3.0.3; extra == "docs"
96
+ Requires-Dist: mkdocs-material; extra == "docs"
97
+ Requires-Dist: mkdocstrings-python; extra == "docs"
98
+ Requires-Dist: griffe==0.25.0; extra == "docs"
19
99
  Provides-Extra: dev
100
+ Requires-Dist: click; extra == "dev"
101
+ Requires-Dist: black==23.7.0; extra == "dev"
102
+ Requires-Dist: isort; extra == "dev"
103
+ Requires-Dist: pylint==2.17.4; extra == "dev"
104
+ Requires-Dist: mypy==1.4.1; extra == "dev"
105
+ Requires-Dist: wandb; extra == "dev"
106
+ Requires-Dist: types-PyYAML; extra == "dev"
107
+ Requires-Dist: types-termcolor==1.1.3; extra == "dev"
108
+ Requires-Dist: types-tabulate; extra == "dev"
109
+ Requires-Dist: types-tqdm; extra == "dev"
110
+ Requires-Dist: lxml-stubs; extra == "dev"
111
+ Requires-Dist: types-Pillow; extra == "dev"
112
+ Requires-Dist: types-urllib3; extra == "dev"
20
113
  Provides-Extra: test
114
+ Requires-Dist: pytest; extra == "test"
115
+ Requires-Dist: pytest-cov; extra == "test"
21
116
  Provides-Extra: hf
22
- License-File: LICENSE
117
+ Requires-Dist: catalogue==2.0.7; extra == "hf"
118
+ Requires-Dist: huggingface_hub>=0.12.0; extra == "hf"
119
+ Requires-Dist: importlib-metadata>=4.11.2; extra == "hf"
120
+ Requires-Dist: jsonlines==3.0.0; extra == "hf"
121
+ Requires-Dist: mock==4.0.3; extra == "hf"
122
+ Requires-Dist: networkx>=2.7.1; extra == "hf"
123
+ Requires-Dist: numpy>=1.21; extra == "hf"
124
+ Requires-Dist: packaging>=20.0; extra == "hf"
125
+ Requires-Dist: Pillow>=10.0.0; extra == "hf"
126
+ Requires-Dist: pypdf2; extra == "hf"
127
+ Requires-Dist: pyyaml==6.0; extra == "hf"
128
+ Requires-Dist: pyzmq>=16; extra == "hf"
129
+ Requires-Dist: termcolor>=1.1; extra == "hf"
130
+ Requires-Dist: tabulate>=0.7.7; extra == "hf"
131
+ Requires-Dist: tqdm==4.64.0; extra == "hf"
132
+ Requires-Dist: timm; extra == "hf"
133
+ Requires-Dist: transformers; extra == "hf"
134
+ Requires-Dist: python-doctr==0.7.0; extra == "hf"
135
+ Requires-Dist: boto3; extra == "hf"
136
+ Requires-Dist: pdfplumber>=0.7.1; extra == "hf"
137
+ Requires-Dist: fasttext; extra == "hf"
138
+ Requires-Dist: jdeskew; extra == "hf"
139
+ Requires-Dist: apted==1.0.3; extra == "hf"
140
+ Requires-Dist: distance==0.1.3; extra == "hf"
141
+ Requires-Dist: lxml>=4.9.1; extra == "hf"
23
142
 
24
143
 
25
144
  <p align="center">
26
- <img src="https://github.com/deepdoctection/deepdoctection/blob/master/docs/tutorials/_imgs/dd_logo.png" alt="Deep Doctection Logo" width="60%">
145
+ <img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_logo.png" alt="Deep Doctection Logo" width="60%">
27
146
  <h3 align="center">
28
147
  A Document AI Package
29
148
  </h3>
@@ -123,20 +242,20 @@ plt.axis('off')
123
242
  plt.imshow(image)
124
243
  ```
125
244
 
126
- ![text](./docs/tutorials/_imgs/dd_rm_sample.png)
245
+ ![text](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_sample.png)
127
246
 
128
247
  ```
129
248
  HTML(page.tables[0].html)
130
249
  ```
131
250
 
132
- ![table](./docs/tutorials/_imgs/dd_rm_table.png)
251
+ ![table](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_table.png)
133
252
 
134
253
 
135
254
  ```
136
255
  print(page.text)
137
256
  ```
138
257
 
139
- ![table](./docs/tutorials/_imgs/dd_rm_text.png)
258
+ ![table](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_text.png)
140
259
 
141
260
 
142
261
  ## Documentation
@@ -150,14 +269,14 @@ documentation.
150
269
 
151
270
  ## Requirements
152
271
 
153
- ![requirements](./docs/tutorials/_imgs/requirements_deepdoctection.png)
272
+ ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection.png)
154
273
 
155
274
  Everything in the overview listed below the **deep**doctection layer are necessary requirements and have to be installed
156
275
  separately.
157
276
 
158
277
  - Linux or macOS. (Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available)
159
278
  - Python >= 3.8
160
- - 1.8 <= PyTorch < 2.0 **or** Tensorflow >= 2.9 and CUDA. If you want to run the models provided by Tensorpack a GPU is
279
+ - 1.12 <= PyTorch < 2.0 **or** Tensorflow >= 2.9 and CUDA. If you want to run the models provided by Tensorpack a GPU is
161
280
  required. You can run on PyTorch with a CPU only.
162
281
  - **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF documents into
163
282
  images.
@@ -230,7 +349,7 @@ pip install deepdoctection
230
349
  This will ignore all model libraries (layers above the **deep**doctection layer in the diagram) and you
231
350
  will be responsible to install them by yourself. Note, that you will not be able to run any pipeline with this setup.
232
351
 
233
- For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/manual/install.html).
352
+ For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
234
353
 
235
354
 
236
355
  ### Installation from source
@@ -1,6 +1,6 @@
1
1
 
2
2
  <p align="center">
3
- <img src="https://github.com/deepdoctection/deepdoctection/blob/master/docs/tutorials/_imgs/dd_logo.png" alt="Deep Doctection Logo" width="60%">
3
+ <img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_logo.png" alt="Deep Doctection Logo" width="60%">
4
4
  <h3 align="center">
5
5
  A Document AI Package
6
6
  </h3>
@@ -100,20 +100,20 @@ plt.axis('off')
100
100
  plt.imshow(image)
101
101
  ```
102
102
 
103
- ![text](./docs/tutorials/_imgs/dd_rm_sample.png)
103
+ ![text](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_sample.png)
104
104
 
105
105
  ```
106
106
  HTML(page.tables[0].html)
107
107
  ```
108
108
 
109
- ![table](./docs/tutorials/_imgs/dd_rm_table.png)
109
+ ![table](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_table.png)
110
110
 
111
111
 
112
112
  ```
113
113
  print(page.text)
114
114
  ```
115
115
 
116
- ![table](./docs/tutorials/_imgs/dd_rm_text.png)
116
+ ![table](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_text.png)
117
117
 
118
118
 
119
119
  ## Documentation
@@ -127,14 +127,14 @@ documentation.
127
127
 
128
128
  ## Requirements
129
129
 
130
- ![requirements](./docs/tutorials/_imgs/requirements_deepdoctection.png)
130
+ ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection.png)
131
131
 
132
132
  Everything in the overview listed below the **deep**doctection layer are necessary requirements and have to be installed
133
133
  separately.
134
134
 
135
135
  - Linux or macOS. (Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available)
136
136
  - Python >= 3.8
137
- - 1.8 <= PyTorch < 2.0 **or** Tensorflow >= 2.9 and CUDA. If you want to run the models provided by Tensorpack a GPU is
137
+ - 1.12 <= PyTorch < 2.0 **or** Tensorflow >= 2.9 and CUDA. If you want to run the models provided by Tensorpack a GPU is
138
138
  required. You can run on PyTorch with a CPU only.
139
139
  - **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF documents into
140
140
  images.
@@ -207,7 +207,7 @@ pip install deepdoctection
207
207
  This will ignore all model libraries (layers above the **deep**doctection layer in the diagram) and you
208
208
  will be responsible to install them by yourself. Note, that you will not be able to run any pipeline with this setup.
209
209
 
210
- For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/manual/install.html).
210
+ For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
211
211
 
212
212
 
213
213
  ### Installation from source
@@ -14,7 +14,7 @@ from packaging import version
14
14
  from .utils.file_utils import _LazyModule, get_tf_version, pytorch_available, tf_available
15
15
  from .utils.logger import logger
16
16
 
17
- __version__ = 0.26
17
+ __version__ = 0.27
18
18
 
19
19
  _IMPORT_STRUCTURE = {
20
20
  "analyzer": ["get_dd_analyzer", "build_analyzer"],
@@ -311,6 +311,10 @@ _IMPORT_STRUCTURE = {
311
311
  "get_fasttext_requirement",
312
312
  "wandb_available",
313
313
  "get_wandb_requirement",
314
+ "opencv_available",
315
+ "get_opencv_requirement",
316
+ "pillow_available",
317
+ "get_pillow_requirement",
314
318
  "load_image_from_file",
315
319
  "load_bytes_from_pdf_file",
316
320
  "get_load_image_func",
@@ -378,6 +382,7 @@ _IMPORT_STRUCTURE = {
378
382
  "draw_text",
379
383
  "draw_boxes",
380
384
  "interactive_imshow",
385
+ "viz_handler",
381
386
  ],
382
387
  }
383
388
 
@@ -403,6 +408,7 @@ if tf_available():
403
408
  except Exception: # pylint: disable=W0703
404
409
  pass
405
410
 
411
+
406
412
  # Direct imports for type-checking
407
413
  if TYPE_CHECKING:
408
414
  from .analyzer import *
@@ -36,7 +36,7 @@ from ..extern.tessocr import TesseractOcrDetector
36
36
  from ..extern.texocr import TextractOcrDetector
37
37
  from ..pipe.base import PipelineComponent
38
38
  from ..pipe.cell import DetectResultGenerator, SubImageLayoutService
39
- from ..pipe.common import MatchingService, PageParsingService
39
+ from ..pipe.common import AnnotationNmsService, MatchingService, PageParsingService
40
40
  from ..pipe.doctectionpipe import DoctectionPipe
41
41
  from ..pipe.layout import ImageLayoutService
42
42
  from ..pipe.order import TextOrderService
@@ -206,7 +206,7 @@ def _build_ocr(cfg: AttrDict) -> Union[TesseractOcrDetector, DoctrTextRecognizer
206
206
  profile = ModelCatalog.get_profile(weights)
207
207
  if profile.architecture is None:
208
208
  raise ValueError("model profile.architecture must be specified")
209
- return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE)
209
+ return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE, lib=cfg.LIB)
210
210
  if cfg.OCR.USE_TEXTRACT:
211
211
  credentials_kwargs = {
212
212
  "aws_access_key_id": environ.get("ACCESS_KEY"),
@@ -225,7 +225,7 @@ def _build_doctr_word(cfg: AttrDict) -> DoctrTextlineDetector:
225
225
  raise ValueError("model profile.architecture must be specified")
226
226
  if profile.categories is None:
227
227
  raise ValueError("model profile.categories must be specified")
228
- return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE)
228
+ return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE, lib=cfg.LIB)
229
229
 
230
230
 
231
231
  def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
@@ -242,6 +242,17 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
242
242
  layout = _build_service(d_layout, cfg, "LAYOUT")
243
243
  pipe_component_list.append(layout)
244
244
 
245
+ # setup layout nms service
246
+ if cfg.LAYOUT_NMS_PAIRS.COMBINATIONS and cfg.USE_LAYOUT:
247
+ if not isinstance(cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, list) and not isinstance(
248
+ cfg.LAYOUT_NMS_PAIRS.COMBINATIONS[0], list
249
+ ):
250
+ raise ValueError("LAYOUT_NMS_PAIRS mus be a list of lists")
251
+ layout_nms_serivce = AnnotationNmsService(
252
+ cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, cfg.LAYOUT_NMS_PAIRS.THRESHOLDS, cfg.LAYOUT_NMS_PAIRS.PRIORITY
253
+ )
254
+ pipe_component_list.append(layout_nms_serivce)
255
+
245
256
  # setup tables service
246
257
  if cfg.USE_TABLE_SEGMENTATION:
247
258
  d_item = _build_detector(cfg, "ITEM")
@@ -302,6 +313,7 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
302
313
  )
303
314
  pipe_component_list.append(text)
304
315
 
316
+ if cfg.USE_PDF_MINER or cfg.USE_OCR:
305
317
  match = MatchingService(
306
318
  parent_categories=cfg.WORD_MATCHING.PARENTAL_CATEGORIES,
307
319
  child_categories=LayoutType.word,
@@ -33,6 +33,10 @@ PT:
33
33
  WEIGHTS: cell/d2_model_1849999_cell_inf_only.pt
34
34
  WEIGHTS_TS: cell/d2_model_1849999_cell_inf_only.ts
35
35
  FILTER:
36
+ LAYOUT_NMS_PAIRS:
37
+ COMBINATIONS:
38
+ THRESHOLDS:
39
+ PRIORITY:
36
40
  SEGMENTATION:
37
41
  ASSIGNMENT_RULE: ioa
38
42
  THRESHOLD_ROWS: 0.4
@@ -25,7 +25,6 @@ from io import BytesIO
25
25
  from shutil import which
26
26
  from typing import Any, Optional, Union, no_type_check
27
27
 
28
- import cv2
29
28
  import numpy as np
30
29
  from numpy import uint8
31
30
  from numpy.typing import NDArray
@@ -34,6 +33,7 @@ from PyPDF2 import PdfReader
34
33
  from ..utils.detection_types import ImageType
35
34
  from ..utils.develop import deprecated
36
35
  from ..utils.pdf_utils import pdf_to_np_array
36
+ from ..utils.viz import viz_handler
37
37
 
38
38
  __all__ = [
39
39
  "convert_b64_to_np_array",
@@ -81,9 +81,8 @@ def convert_b64_to_np_array(image: str) -> ImageType:
81
81
  :param image: An image as base64 string.
82
82
  :return: numpy array.
83
83
  """
84
- np_array = np.fromstring(base64.b64decode(image), np.uint8) # type: ignore
85
- np_array = cv2.imdecode(np_array, cv2.IMREAD_COLOR).astype(np.float32)
86
- return np_array.astype(uint8)
84
+
85
+ return viz_handler.convert_b64_to_np(image).astype(uint8)
87
86
 
88
87
 
89
88
  def convert_np_array_to_b64(np_image: ImageType) -> str:
@@ -93,9 +92,7 @@ def convert_np_array_to_b64(np_image: ImageType) -> str:
93
92
  :param np_image: An image as numpy array.
94
93
  :return: An image as base64 string.
95
94
  """
96
- np_encode = cv2.imencode(".png", np_image)
97
- image = base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
98
- return image
95
+ return viz_handler.convert_np_to_b64(np_image)
99
96
 
100
97
 
101
98
  @no_type_check
@@ -106,9 +103,7 @@ def convert_np_array_to_b64_b(np_image: ImageType) -> bytes:
106
103
  :param np_image: An image as numpy array.
107
104
  :return: An image as base64 bytes.
108
105
  """
109
- np_encode = cv2.imencode(".png", np_image)
110
- b_image = np_encode[1].tobytes()
111
- return b_image
106
+ return viz_handler.encode(np_image)
112
107
 
113
108
 
114
109
  @deprecated("Use convert_pdf_bytes_to_np_array_v2", "2022-02-23")
@@ -626,8 +626,8 @@ class Image:
626
626
  self.remove_image_from_lower_hierachy()
627
627
  export_dict = self.as_dict()
628
628
  export_dict["location"] = str(export_dict["location"])
629
- if image_to_json and self.image is not None:
630
- export_dict["_image"] = convert_np_array_to_b64(self.image)
629
+ if not image_to_json:
630
+ export_dict["_image"] = None
631
631
  if dry:
632
632
  return export_dict
633
633
  with open(path_json, "w", encoding="UTF-8") as file:
@@ -23,7 +23,6 @@ simplify consumption
23
23
  from copy import copy
24
24
  from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union, no_type_check
25
25
 
26
- import cv2
27
26
  import numpy as np
28
27
 
29
28
  from ..utils.detection_types import ImageType, JsonDict, Pathlike
@@ -39,7 +38,7 @@ from ..utils.settings import (
39
38
  WordType,
40
39
  get_type,
41
40
  )
42
- from ..utils.viz import draw_boxes, interactive_imshow
41
+ from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
43
42
  from .annotation import ContainerAnnotation, ImageAnnotation, SummaryAnnotation, ann_from_dict
44
43
  from .box import BoundingBox
45
44
  from .image import Image
@@ -415,6 +414,17 @@ class Page(Image):
415
414
  text_container: ObjectTypes
416
415
  floating_text_block_categories: List[ObjectTypes]
417
416
  image_orig: Image
417
+ _attribute_names: Set[str] = {
418
+ "text",
419
+ "chunks",
420
+ "tables",
421
+ "layouts",
422
+ "words",
423
+ "file_name",
424
+ "location",
425
+ "document_id",
426
+ "page_number",
427
+ }
418
428
 
419
429
  @no_type_check
420
430
  def get_annotation(
@@ -734,7 +744,9 @@ class Page(Image):
734
744
  )
735
745
  else:
736
746
  img = draw_boxes(self.image, boxes, category_names_list)
737
- img = cv2.resize(img, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC)
747
+ scale_fx, scale_fy = 1.3, 1.3
748
+ scaled_width, scaled_height = int(self.width * scale_fx), int(self.height * scale_fy)
749
+ img = viz_handler.resize(img, scaled_width, scaled_height, "VIZ")
738
750
  else:
739
751
  img = self.image
740
752
 
@@ -744,24 +756,32 @@ class Page(Image):
744
756
  return img
745
757
  return None
746
758
 
747
- @staticmethod
748
- def get_attribute_names() -> Set[str]:
759
+ @classmethod
760
+ def get_attribute_names(cls) -> Set[str]:
749
761
  """
750
762
  :return: A set of registered attributes.
751
763
  """
752
- return set(PageType).union(
753
- {
754
- "text",
755
- "chunks",
756
- "tables",
757
- "layouts",
758
- "words",
759
- "file_name",
760
- "location",
761
- "document_id",
762
- "page_number",
763
- }
764
- )
764
+ return set(PageType).union(cls._attribute_names)
765
+
766
+ @classmethod
767
+ def add_attribute_name(cls, attribute_name: Union[str, ObjectTypes]) -> None:
768
+ """
769
+ Adding a custom attribute name to a Page class.
770
+
771
+ **Example:**
772
+
773
+ Page.add_attribute_name("foo")
774
+
775
+ page = Page.from_image(...)
776
+ print(page.foo)
777
+
778
+ Note, that the attribute must be registered as a valid `ObjectTypes`
779
+
780
+ :param attribute_name: attribute name to add
781
+ """
782
+
783
+ attribute_name = get_type(attribute_name)
784
+ cls._attribute_names.add(attribute_name.value)
765
785
 
766
786
  def save(
767
787
  self,
@@ -23,13 +23,12 @@ import json
23
23
  from pathlib import Path
24
24
  from typing import Optional
25
25
 
26
- from cv2 import imwrite
27
-
28
26
  from ..dataflow import DataFlow, MapData, SerializerJsonlines
29
27
  from ..datapoint.convert import convert_b64_to_np_array
30
28
  from ..datapoint.image import Image
31
29
  from ..utils.detection_types import JsonDict, Pathlike
32
30
  from ..utils.fs import mkdir_p
31
+ from ..utils.viz import viz_handler
33
32
 
34
33
 
35
34
  def dataflow_to_json(
@@ -84,7 +83,8 @@ def dataflow_to_json(
84
83
  target_file_png = path / "image" / (dp["file_name"].split(".")[0] + ".png")
85
84
  image = dp.pop("_image")
86
85
  image = convert_b64_to_np_array(image)
87
- imwrite(str(target_file_png), image)
86
+
87
+ viz_handler.write_image(str(target_file_png), image)
88
88
 
89
89
  with open(target_file, "w", encoding="UTF-8") as file:
90
90
  json.dump(dp, file)
@@ -23,7 +23,6 @@ from copy import copy
23
23
  from pathlib import Path
24
24
  from typing import Any, Dict, List, Literal, Mapping, Optional, Sequence
25
25
 
26
- import cv2
27
26
  import numpy as np
28
27
 
29
28
  from ..utils.detection_types import ImageType, Requirement
@@ -130,7 +129,7 @@ def d2_jit_predict_image(
130
129
  keep = batched_nms(boxes, scores, class_masks, nms_thresh_class_agnostic).cpu()
131
130
 
132
131
  # The exported model does not contain the final resize step, so we need to add it manually here
133
- inverse_resizer = ResizeTransform(new_height, new_width, height, width, cv2.INTER_LINEAR)
132
+ inverse_resizer = ResizeTransform(new_height, new_width, height, width, "VIZ")
134
133
  np_boxes = np.reshape(boxes.cpu().numpy(), (-1, 2))
135
134
  np_boxes = inverse_resizer.apply_coords(np_boxes)
136
135
  np_boxes = np.reshape(np_boxes, (-1, 4))
@@ -62,14 +62,14 @@ def _set_device_str(device: Optional[str] = None) -> str:
62
62
  return device
63
63
 
64
64
 
65
- def _load_model(path_weights: str, doctr_predictor: Any, device: str) -> None:
66
- if pytorch_available():
65
+ def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str) -> None:
66
+ if lib == "PT" and pytorch_available():
67
67
  state_dict = torch.load(path_weights, map_location=device)
68
68
  for key in list(state_dict.keys()):
69
69
  state_dict["model." + key] = state_dict.pop(key)
70
70
  doctr_predictor.load_state_dict(state_dict)
71
71
  doctr_predictor.to(device)
72
- elif tf_available():
72
+ elif lib == "TF" and tf_available():
73
73
  # Unzip the archive
74
74
  params_path = Path(path_weights).parent
75
75
  is_zip_path = path_weights.endswith(".zip")
@@ -99,7 +99,7 @@ def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor",
99
99
  DetectionResult(
100
100
  box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.word
101
101
  )
102
- for box in raw_output[0]
102
+ for box in raw_output[0]["words"]
103
103
  ]
104
104
  return detection_results
105
105
 
@@ -173,7 +173,9 @@ class DoctrTextlineDetector(ObjectDetector):
173
173
  path_weights: str,
174
174
  categories: Mapping[str, TypeOrStr],
175
175
  device: Optional[Literal["cpu", "cuda"]] = None,
176
+ lib: str = "TF",
176
177
  ) -> None:
178
+ self.lib = lib
177
179
  self.name = "doctr_text_detector"
178
180
  self.architecture = architecture
179
181
  self.path_weights = path_weights
@@ -205,14 +207,14 @@ class DoctrTextlineDetector(ObjectDetector):
205
207
  raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextlineDetector")
206
208
 
207
209
  def clone(self) -> PredictorBase:
208
- return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input)
210
+ return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input, self.lib)
209
211
 
210
212
  def possible_categories(self) -> List[ObjectTypes]:
211
213
  return [LayoutType.word]
212
214
 
213
215
  def load_model(self) -> None:
214
216
  """Loading model weights"""
215
- _load_model(self.path_weights, self.doctr_predictor, self.device)
217
+ _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
216
218
 
217
219
 
218
220
  class DoctrTextRecognizer(TextRecognizer):
@@ -252,7 +254,10 @@ class DoctrTextRecognizer(TextRecognizer):
252
254
 
253
255
  """
254
256
 
255
- def __init__(self, architecture: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None) -> None:
257
+ def __init__(
258
+ self, architecture: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None, lib: str = "TF"
259
+ ) -> None:
260
+ self.lib = lib
256
261
  self.name = "doctr_text_recognizer"
257
262
  self.architecture = architecture
258
263
  self.path_weights = path_weights
@@ -281,8 +286,8 @@ class DoctrTextRecognizer(TextRecognizer):
281
286
  raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextRecognizer")
282
287
 
283
288
  def clone(self) -> PredictorBase:
284
- return self.__class__(self.architecture, self.path_weights, self.device_input)
289
+ return self.__class__(self.architecture, self.path_weights, self.device_input, self.lib)
285
290
 
286
291
  def load_model(self) -> None:
287
292
  """Loading model weights"""
288
- _load_model(self.path_weights, self.doctr_predictor, self.device)
293
+ _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
@@ -10,7 +10,6 @@ This file is modified from
10
10
  """
11
11
 
12
12
 
13
- import cv2
14
13
  import numpy as np
15
14
  from tensorpack.dataflow.imgaug import ImageAugmentor, ResizeTransform # pylint: disable=E0401
16
15
 
@@ -25,11 +24,11 @@ class CustomResize(ImageAugmentor):
25
24
  Try resizing the shortest edge to a certain number while avoiding the longest edge to exceed max_size.
26
25
  """
27
26
 
28
- def __init__(self, short_edge_length, max_size, interp=cv2.INTER_LINEAR):
27
+ def __init__(self, short_edge_length, max_size, interp=1):
29
28
  """
30
29
  :param short_edge_length: a [min, max] interval from which to sample the shortest edge length.
31
30
  :param max_size: maximum allowed longest edge length.
32
- :param interp: cv2 interpolation mode
31
+ :param interp: Interpolation mode. We use Tensorpack's internal `ResizeTransform`, that always requires OpenCV
33
32
  """
34
33
  super().__init__()
35
34
  if isinstance(short_edge_length, int):