deepdoctection 0.29__tar.gz → 0.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (240) hide show
  1. {deepdoctection-0.29 → deepdoctection-0.31}/PKG-INFO +39 -61
  2. {deepdoctection-0.29 → deepdoctection-0.31}/README.md +3 -1
  3. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/__init__.py +6 -2
  4. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/analyzer/dd.py +13 -8
  5. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/base.py +0 -19
  6. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/custom.py +6 -5
  7. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/custom_serialize.py +20 -5
  8. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/parallel_map.py +22 -17
  9. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/serialize.py +5 -4
  10. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/stats.py +5 -5
  11. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/annotation.py +35 -14
  12. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/box.py +9 -6
  13. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/convert.py +3 -1
  14. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/image.py +66 -29
  15. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/view.py +62 -24
  16. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/adapter.py +4 -5
  17. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/base.py +87 -14
  18. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/dataflow_builder.py +1 -1
  19. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/info.py +2 -2
  20. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/fintabnet.py +3 -3
  21. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/layouttest.py +2 -7
  22. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/pubtabnet.py +3 -3
  23. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/accmetric.py +7 -5
  24. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/base.py +5 -4
  25. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/eval.py +9 -7
  26. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/tedsmetric.py +9 -3
  27. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/tp_eval_callback.py +8 -7
  28. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/base.py +39 -13
  29. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/d2detect.py +164 -64
  30. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/deskew.py +32 -7
  31. deepdoctection-0.31/deepdoctection/extern/doctrocr.py +532 -0
  32. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/fastlang.py +45 -7
  33. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/hfdetr.py +90 -33
  34. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/hflayoutlm.py +109 -22
  35. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/model.py +30 -11
  36. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/pdftext.py +2 -1
  37. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/pt/ptutils.py +3 -2
  38. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tessocr.py +134 -22
  39. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/texocr.py +4 -2
  40. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpcompat.py +4 -4
  41. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/preproc.py +2 -7
  42. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tpdetect.py +50 -23
  43. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/d2struct.py +1 -1
  44. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/hfstruct.py +1 -1
  45. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/laylmstruct.py +1 -1
  46. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/maputils.py +19 -5
  47. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/prodigystruct.py +15 -13
  48. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/pubstruct.py +10 -10
  49. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/tpstruct.py +1 -1
  50. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/anngen.py +35 -8
  51. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/base.py +53 -19
  52. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/cell.py +29 -8
  53. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/common.py +12 -4
  54. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/doctectionpipe.py +4 -3
  55. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/language.py +3 -2
  56. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/layout.py +3 -2
  57. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/lm.py +2 -2
  58. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/order.py +67 -39
  59. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/refine.py +18 -10
  60. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/segment.py +34 -20
  61. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/text.py +14 -8
  62. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/transform.py +16 -8
  63. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/d2_frcnn_train.py +17 -14
  64. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/hf_detr_train.py +13 -9
  65. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/hf_layoutlm_train.py +31 -19
  66. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/__init__.py +3 -0
  67. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/concurrency.py +1 -1
  68. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/context.py +5 -5
  69. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/develop.py +2 -2
  70. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/env_info.py +64 -27
  71. deepdoctection-0.31/deepdoctection/utils/error.py +84 -0
  72. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/file_utils.py +28 -17
  73. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/fs.py +16 -14
  74. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/logger.py +43 -19
  75. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/pdf_utils.py +14 -7
  76. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/settings.py +5 -1
  77. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/transform.py +1 -1
  78. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/utils.py +0 -6
  79. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/viz.py +83 -14
  80. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/PKG-INFO +39 -61
  81. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/SOURCES.txt +1 -0
  82. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/requires.txt +32 -60
  83. {deepdoctection-0.29 → deepdoctection-0.31}/setup.py +24 -22
  84. deepdoctection-0.31/tests/analyzer/test_dd.py +202 -0
  85. {deepdoctection-0.29 → deepdoctection-0.31}/tests/conftest.py +0 -1
  86. {deepdoctection-0.29 → deepdoctection-0.31}/tests/data.py +88 -48
  87. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_image.py +50 -4
  88. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/conftest.py +1 -1
  89. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_funsd.py +2 -2
  90. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/test_info.py +1 -1
  91. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/conftest.py +10 -1
  92. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/data.py +2 -0
  93. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_deskew.py +13 -3
  94. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_doctrocr.py +46 -2
  95. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_hfdetr.py +2 -2
  96. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_tessocr.py +62 -3
  97. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/data.py +6 -6
  98. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_prodigystruct.py +1 -1
  99. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_anngen.py +6 -6
  100. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_cell.py +21 -0
  101. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_layout.py +1 -0
  102. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_segment.py +1 -1
  103. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_text.py +4 -0
  104. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_transform.py +2 -0
  105. {deepdoctection-0.29 → deepdoctection-0.31}/tests_d2/test_d2detect.py +1 -1
  106. deepdoctection-0.29/deepdoctection/extern/doctrocr.py +0 -293
  107. deepdoctection-0.29/tests/analyzer/test_dd.py +0 -200
  108. {deepdoctection-0.29 → deepdoctection-0.31}/LICENSE +0 -0
  109. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/analyzer/__init__.py +0 -0
  110. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/configs/__init__.py +0 -0
  111. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  112. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  113. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/__init__.py +0 -0
  114. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/common.py +0 -0
  115. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/__init__.py +0 -0
  116. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/__init__.py +0 -0
  117. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/__init__.py +0 -0
  118. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  119. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/funsd.py +0 -0
  120. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  121. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/publaynet.py +0 -0
  122. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  123. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  124. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/xfund.py +0 -0
  125. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  126. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  127. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/registry.py +0 -0
  128. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/save.py +0 -0
  129. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/__init__.py +0 -0
  130. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/cocometric.py +0 -0
  131. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/registry.py +0 -0
  132. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/__init__.py +0 -0
  133. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/pt/__init__.py +0 -0
  134. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/pt/nms.py +0 -0
  135. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/__init__.py +0 -0
  136. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tfutils.py +0 -0
  137. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  138. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  139. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  140. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  141. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  142. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  143. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  144. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  145. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  146. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  147. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  148. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  149. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  150. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  151. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  152. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  153. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  154. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/__init__.py +0 -0
  155. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/cats.py +0 -0
  156. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/cocostruct.py +0 -0
  157. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/match.py +0 -0
  158. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/misc.py +0 -0
  159. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/pascalstruct.py +0 -0
  160. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/xfundstruct.py +0 -0
  161. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/__init__.py +0 -0
  162. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/concurrency.py +0 -0
  163. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/registry.py +0 -0
  164. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/py.typed +0 -0
  165. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/__init__.py +0 -0
  166. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/tp_frcnn_train.py +0 -0
  167. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/detection_types.py +0 -0
  168. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/identifier.py +0 -0
  169. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/metacfg.py +0 -0
  170. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/tqdm.py +0 -0
  171. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/dependency_links.txt +0 -0
  172. {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/top_level.txt +0 -0
  173. {deepdoctection-0.29 → deepdoctection-0.31}/setup.cfg +0 -0
  174. {deepdoctection-0.29 → deepdoctection-0.31}/tests/__init__.py +0 -0
  175. {deepdoctection-0.29 → deepdoctection-0.31}/tests/analyzer/__init__.py +0 -0
  176. {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/__init__.py +0 -0
  177. {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/conftest.py +0 -0
  178. {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_common.py +0 -0
  179. {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_custom.py +0 -0
  180. {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_custom_serialize.py +0 -0
  181. {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_parallel_map.py +0 -0
  182. {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_stats.py +0 -0
  183. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/__init__.py +0 -0
  184. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/conftest.py +0 -0
  185. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_annotation.py +0 -0
  186. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_box.py +0 -0
  187. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_convert.py +0 -0
  188. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_view.py +0 -0
  189. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/__init__.py +0 -0
  190. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/__init__.py +0 -0
  191. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_doclaynet.py +0 -0
  192. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_fintabnet.py +0 -0
  193. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_iiitar13k.py +0 -0
  194. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_layouttest.py +0 -0
  195. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_publaynet.py +0 -0
  196. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_pubtables1m.py +0 -0
  197. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_pubtabnet.py +0 -0
  198. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_rvlcdip.py +0 -0
  199. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/test_adapter.py +0 -0
  200. {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/test_registry.py +0 -0
  201. {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/__init__.py +0 -0
  202. {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/conftest.py +0 -0
  203. {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_accmetric.py +0 -0
  204. {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_cocometric.py +0 -0
  205. {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_eval.py +0 -0
  206. {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_registry.py +0 -0
  207. {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_tedsmetric.py +0 -0
  208. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/__init__.py +0 -0
  209. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_fastlang.py +0 -0
  210. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_hflayoutlm.py +0 -0
  211. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_pdftext.py +0 -0
  212. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_texocr.py +0 -0
  213. {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_tpdetect.py +0 -0
  214. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/__init__.py +0 -0
  215. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/conftest.py +0 -0
  216. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_cats.py +0 -0
  217. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_cocostruct.py +0 -0
  218. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_d2struct.py +0 -0
  219. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_hfstruct.py +0 -0
  220. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_iiitar13k.py +0 -0
  221. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_laylmstruct.py +0 -0
  222. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_misc.py +0 -0
  223. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_pubstruct.py +0 -0
  224. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_tpstruct.py +0 -0
  225. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_utils.py +0 -0
  226. {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_xfundstruct.py +0 -0
  227. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/__init__.py +0 -0
  228. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_common.py +0 -0
  229. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_language.py +0 -0
  230. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_lm.py +0 -0
  231. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_order.py +0 -0
  232. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_refine.py +0 -0
  233. {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_registry.py +0 -0
  234. {deepdoctection-0.29 → deepdoctection-0.31}/tests/test_utils.py +0 -0
  235. {deepdoctection-0.29 → deepdoctection-0.31}/tests/train/__init__.py +0 -0
  236. {deepdoctection-0.29 → deepdoctection-0.31}/tests/train/conftest.py +0 -0
  237. {deepdoctection-0.29 → deepdoctection-0.31}/tests/train/test_d2_frcnn_train.py +0 -0
  238. {deepdoctection-0.29 → deepdoctection-0.31}/tests/train/test_tp_frcnn_train.py +0 -0
  239. {deepdoctection-0.29 → deepdoctection-0.31}/tests_d2/__init__.py +0 -0
  240. {deepdoctection-0.29 → deepdoctection-0.31}/tests_d2/conftest.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.29
3
+ Version: 0.31
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -10,42 +10,45 @@ Classifier: License :: OSI Approved :: Apache Software License
10
10
  Classifier: Natural Language :: English
11
11
  Classifier: Operating System :: POSIX :: Linux
12
12
  Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
13
16
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
17
  Requires-Python: >=3.8
15
18
  Description-Content-Type: text/markdown
16
19
  License-File: LICENSE
17
- Requires-Dist: catalogue==2.0.7
20
+ Requires-Dist: catalogue==2.0.10
18
21
  Requires-Dist: huggingface_hub>=0.12.0
19
- Requires-Dist: importlib-metadata>=4.11.2
20
- Requires-Dist: jsonlines==3.0.0
22
+ Requires-Dist: importlib-metadata>=5.0.0
23
+ Requires-Dist: jsonlines==3.1.0
21
24
  Requires-Dist: mock==4.0.3
22
25
  Requires-Dist: networkx>=2.7.1
23
26
  Requires-Dist: numpy>=1.21
24
27
  Requires-Dist: packaging>=20.0
25
28
  Requires-Dist: Pillow>=10.0.0
26
29
  Requires-Dist: pypdf>=3.16.0
27
- Requires-Dist: pyyaml==6.0
30
+ Requires-Dist: pyyaml>=6.0.1
28
31
  Requires-Dist: pyzmq>=16
29
32
  Requires-Dist: termcolor>=1.1
30
33
  Requires-Dist: tabulate>=0.7.7
31
34
  Requires-Dist: tqdm==4.64.0
32
35
  Provides-Extra: tf
33
- Requires-Dist: catalogue==2.0.7; extra == "tf"
36
+ Requires-Dist: catalogue==2.0.10; extra == "tf"
34
37
  Requires-Dist: huggingface_hub>=0.12.0; extra == "tf"
35
- Requires-Dist: importlib-metadata>=4.11.2; extra == "tf"
36
- Requires-Dist: jsonlines==3.0.0; extra == "tf"
38
+ Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
39
+ Requires-Dist: jsonlines==3.1.0; extra == "tf"
37
40
  Requires-Dist: mock==4.0.3; extra == "tf"
38
41
  Requires-Dist: networkx>=2.7.1; extra == "tf"
39
42
  Requires-Dist: numpy>=1.21; extra == "tf"
40
43
  Requires-Dist: packaging>=20.0; extra == "tf"
41
44
  Requires-Dist: Pillow>=10.0.0; extra == "tf"
42
45
  Requires-Dist: pypdf>=3.16.0; extra == "tf"
43
- Requires-Dist: pyyaml==6.0; extra == "tf"
46
+ Requires-Dist: pyyaml>=6.0.1; extra == "tf"
44
47
  Requires-Dist: pyzmq>=16; extra == "tf"
45
48
  Requires-Dist: termcolor>=1.1; extra == "tf"
46
49
  Requires-Dist: tabulate>=0.7.7; extra == "tf"
47
50
  Requires-Dist: tqdm==4.64.0; extra == "tf"
48
- Requires-Dist: tensorpack; extra == "tf"
51
+ Requires-Dist: tensorpack==0.11; extra == "tf"
49
52
  Requires-Dist: protobuf==3.20.1; extra == "tf"
50
53
  Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
51
54
  Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
@@ -53,47 +56,47 @@ Requires-Dist: python-doctr==0.7.0; extra == "tf"
53
56
  Requires-Dist: pycocotools>=2.0.2; extra == "tf"
54
57
  Requires-Dist: boto3; extra == "tf"
55
58
  Requires-Dist: pdfplumber>=0.7.1; extra == "tf"
56
- Requires-Dist: fasttext; extra == "tf"
57
- Requires-Dist: jdeskew; extra == "tf"
59
+ Requires-Dist: fasttext==0.9.2; extra == "tf"
60
+ Requires-Dist: jdeskew>=0.2.2; extra == "tf"
58
61
  Requires-Dist: apted==1.0.3; extra == "tf"
59
62
  Requires-Dist: distance==0.1.3; extra == "tf"
60
63
  Requires-Dist: lxml>=4.9.1; extra == "tf"
61
64
  Provides-Extra: pt
62
- Requires-Dist: catalogue==2.0.7; extra == "pt"
65
+ Requires-Dist: catalogue==2.0.10; extra == "pt"
63
66
  Requires-Dist: huggingface_hub>=0.12.0; extra == "pt"
64
- Requires-Dist: importlib-metadata>=4.11.2; extra == "pt"
65
- Requires-Dist: jsonlines==3.0.0; extra == "pt"
67
+ Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
68
+ Requires-Dist: jsonlines==3.1.0; extra == "pt"
66
69
  Requires-Dist: mock==4.0.3; extra == "pt"
67
70
  Requires-Dist: networkx>=2.7.1; extra == "pt"
68
71
  Requires-Dist: numpy>=1.21; extra == "pt"
69
72
  Requires-Dist: packaging>=20.0; extra == "pt"
70
73
  Requires-Dist: Pillow>=10.0.0; extra == "pt"
71
74
  Requires-Dist: pypdf>=3.16.0; extra == "pt"
72
- Requires-Dist: pyyaml==6.0; extra == "pt"
75
+ Requires-Dist: pyyaml>=6.0.1; extra == "pt"
73
76
  Requires-Dist: pyzmq>=16; extra == "pt"
74
77
  Requires-Dist: termcolor>=1.1; extra == "pt"
75
78
  Requires-Dist: tabulate>=0.7.7; extra == "pt"
76
79
  Requires-Dist: tqdm==4.64.0; extra == "pt"
77
- Requires-Dist: timm; extra == "pt"
80
+ Requires-Dist: timm>=0.9.16; extra == "pt"
78
81
  Requires-Dist: transformers>=4.36.0; extra == "pt"
79
- Requires-Dist: accelerate; extra == "pt"
82
+ Requires-Dist: accelerate>=0.29.1; extra == "pt"
80
83
  Requires-Dist: python-doctr==0.7.0; extra == "pt"
81
84
  Requires-Dist: boto3; extra == "pt"
82
85
  Requires-Dist: pdfplumber>=0.7.1; extra == "pt"
83
- Requires-Dist: fasttext; extra == "pt"
84
- Requires-Dist: jdeskew; extra == "pt"
86
+ Requires-Dist: fasttext==0.9.2; extra == "pt"
87
+ Requires-Dist: jdeskew>=0.2.2; extra == "pt"
85
88
  Requires-Dist: apted==1.0.3; extra == "pt"
86
89
  Requires-Dist: distance==0.1.3; extra == "pt"
87
90
  Requires-Dist: lxml>=4.9.1; extra == "pt"
88
91
  Provides-Extra: docs
89
- Requires-Dist: tensorpack; extra == "docs"
92
+ Requires-Dist: tensorpack==0.11; extra == "docs"
90
93
  Requires-Dist: boto3; extra == "docs"
91
94
  Requires-Dist: transformers>=4.36.0; extra == "docs"
92
- Requires-Dist: accelerate; extra == "docs"
95
+ Requires-Dist: accelerate>=0.29.1; extra == "docs"
93
96
  Requires-Dist: pdfplumber>=0.7.1; extra == "docs"
94
97
  Requires-Dist: lxml>=4.9.1; extra == "docs"
95
- Requires-Dist: lxml-stubs; extra == "docs"
96
- Requires-Dist: jdeskew; extra == "docs"
98
+ Requires-Dist: lxml-stubs>=0.5.1; extra == "docs"
99
+ Requires-Dist: jdeskew>=0.2.2; extra == "docs"
97
100
  Requires-Dist: jinja2==3.0.3; extra == "docs"
98
101
  Requires-Dist: mkdocs-material; extra == "docs"
99
102
  Requires-Dist: mkdocstrings-python; extra == "docs"
@@ -102,47 +105,20 @@ Provides-Extra: dev
102
105
  Requires-Dist: python-dotenv==1.0.0; extra == "dev"
103
106
  Requires-Dist: click; extra == "dev"
104
107
  Requires-Dist: black==23.7.0; extra == "dev"
105
- Requires-Dist: isort; extra == "dev"
108
+ Requires-Dist: isort==5.13.2; extra == "dev"
106
109
  Requires-Dist: pylint==2.17.4; extra == "dev"
107
110
  Requires-Dist: mypy==1.4.1; extra == "dev"
108
111
  Requires-Dist: wandb; extra == "dev"
109
- Requires-Dist: types-PyYAML; extra == "dev"
110
- Requires-Dist: types-termcolor==1.1.3; extra == "dev"
111
- Requires-Dist: types-tabulate; extra == "dev"
112
- Requires-Dist: types-tqdm; extra == "dev"
113
- Requires-Dist: lxml-stubs; extra == "dev"
114
- Requires-Dist: types-Pillow; extra == "dev"
115
- Requires-Dist: types-urllib3; extra == "dev"
112
+ Requires-Dist: types-PyYAML>=6.0.12.12; extra == "dev"
113
+ Requires-Dist: types-termcolor>=1.1.3; extra == "dev"
114
+ Requires-Dist: types-tabulate>=0.9.0.3; extra == "dev"
115
+ Requires-Dist: types-tqdm>=4.66.0.5; extra == "dev"
116
+ Requires-Dist: lxml-stubs>=0.5.1; extra == "dev"
117
+ Requires-Dist: types-Pillow>=10.2.0.20240406; extra == "dev"
118
+ Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
116
119
  Provides-Extra: test
117
- Requires-Dist: pytest; extra == "test"
120
+ Requires-Dist: pytest==8.0.2; extra == "test"
118
121
  Requires-Dist: pytest-cov; extra == "test"
119
- Provides-Extra: hf
120
- Requires-Dist: catalogue==2.0.7; extra == "hf"
121
- Requires-Dist: huggingface_hub>=0.12.0; extra == "hf"
122
- Requires-Dist: importlib-metadata>=4.11.2; extra == "hf"
123
- Requires-Dist: jsonlines==3.0.0; extra == "hf"
124
- Requires-Dist: mock==4.0.3; extra == "hf"
125
- Requires-Dist: networkx>=2.7.1; extra == "hf"
126
- Requires-Dist: numpy>=1.21; extra == "hf"
127
- Requires-Dist: packaging>=20.0; extra == "hf"
128
- Requires-Dist: Pillow>=10.0.0; extra == "hf"
129
- Requires-Dist: pypdf>=3.16.0; extra == "hf"
130
- Requires-Dist: pyyaml==6.0; extra == "hf"
131
- Requires-Dist: pyzmq>=16; extra == "hf"
132
- Requires-Dist: termcolor>=1.1; extra == "hf"
133
- Requires-Dist: tabulate>=0.7.7; extra == "hf"
134
- Requires-Dist: tqdm==4.64.0; extra == "hf"
135
- Requires-Dist: timm; extra == "hf"
136
- Requires-Dist: transformers>=4.36.0; extra == "hf"
137
- Requires-Dist: accelerate; extra == "hf"
138
- Requires-Dist: python-doctr==0.7.0; extra == "hf"
139
- Requires-Dist: boto3; extra == "hf"
140
- Requires-Dist: pdfplumber>=0.7.1; extra == "hf"
141
- Requires-Dist: fasttext; extra == "hf"
142
- Requires-Dist: jdeskew; extra == "hf"
143
- Requires-Dist: apted==1.0.3; extra == "hf"
144
- Requires-Dist: distance==0.1.3; extra == "hf"
145
- Requires-Dist: lxml>=4.9.1; extra == "hf"
146
122
 
147
123
 
148
124
  <p align="center">
@@ -188,7 +164,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
188
164
  Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
189
165
  [docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
190
166
  - Document layout analysis and table recognition now runs with Torchscript (CPU) as well and Detectron2 is
191
- not required anymore for basic inference.
167
+ not required anymore for basic inference.
168
+ - [**new**] More angle predictors for determining the rotation of a document based on Tesseract and DocTr
169
+ (not contained in the built-in Analyzer).
192
170
 
193
171
  **deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
194
172
  post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
@@ -42,7 +42,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
42
42
  Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
43
43
  [docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
44
44
  - Document layout analysis and table recognition now runs with Torchscript (CPU) as well and Detectron2 is
45
- not required anymore for basic inference.
45
+ not required anymore for basic inference.
46
+ - [**new**] More angle predictors for determining the rotation of a document based on Tesseract and DocTr
47
+ (not contained in the built-in Analyzer).
46
48
 
47
49
  **deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
48
50
  post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
@@ -27,7 +27,7 @@ from .utils.logger import logger
27
27
 
28
28
  # pylint: enable=wrong-import-position
29
29
 
30
- __version__ = 0.29
30
+ __version__ = 0.31
31
31
 
32
32
  _IMPORT_STRUCTURE = {
33
33
  "analyzer": [
@@ -179,6 +179,7 @@ _IMPORT_STRUCTURE = {
179
179
  "Jdeskewer",
180
180
  "DoctrTextlineDetector",
181
181
  "DoctrTextRecognizer",
182
+ "DocTrRotationTransformer",
182
183
  "FasttextLangDetector",
183
184
  "HFDetrDerivedDetector",
184
185
  "HFLayoutLmTokenClassifierBase",
@@ -194,6 +195,7 @@ _IMPORT_STRUCTURE = {
194
195
  "ModelDownloadManager",
195
196
  "PdfPlumberTextDetector",
196
197
  "TesseractOcrDetector",
198
+ "TesseractRotationTransformer",
197
199
  "TextractOcrDetector",
198
200
  "TPFrcnnDetector",
199
201
  ],
@@ -279,7 +281,7 @@ _IMPORT_STRUCTURE = {
279
281
  "PubtablesSegmentationService",
280
282
  "SegmentationResult",
281
283
  "TextExtractionService",
282
- "SimpleTransformPipelineComponent",
284
+ "SimpleTransformService",
283
285
  ],
284
286
  "train": [
285
287
  "D2Trainer",
@@ -343,6 +345,8 @@ _IMPORT_STRUCTURE = {
343
345
  "get_opencv_requirement",
344
346
  "pillow_available",
345
347
  "get_pillow_requirement",
348
+ "spacy_available",
349
+ "get_spacy_requirement",
346
350
  "load_image_from_file",
347
351
  "load_bytes_from_pdf_file",
348
352
  "get_load_image_func",
@@ -54,7 +54,7 @@ from ..utils.file_utils import (
54
54
  tf_available,
55
55
  )
56
56
  from ..utils.fs import get_configs_dir_path, get_package_path, mkdir_p
57
- from ..utils.logger import logger
57
+ from ..utils.logger import LoggingRecord, logger
58
58
  from ..utils.metacfg import AttrDict, set_config_by_yaml
59
59
  from ..utils.settings import CellType, LayoutType
60
60
  from ..utils.transform import PadTransform
@@ -113,11 +113,12 @@ def config_sanity_checks(cfg: AttrDict) -> None:
113
113
  """Some config sanity checks"""
114
114
  if cfg.USE_PDF_MINER and cfg.USE_OCR and cfg.OCR.USE_DOCTR:
115
115
  raise ValueError("Configuration USE_PDF_MINER= True and USE_OCR=True and USE_DOCTR=True is not allowed")
116
- if cfg.OCR.USE_TESSERACT + cfg.OCR.USE_DOCTR + cfg.OCR.USE_TEXTRACT != 1:
117
- raise ValueError(
118
- "Choose either OCR.USE_TESSERACT=True or OCR.USE_DOCTR=True or OCR.USE_TEXTRACT=True and set the other two "
119
- "to False. Only one OCR system can be activated."
120
- )
116
+ if cfg.USE_OCR:
117
+ if cfg.OCR.USE_TESSERACT + cfg.OCR.USE_DOCTR + cfg.OCR.USE_TEXTRACT != 1:
118
+ raise ValueError(
119
+ "Choose either OCR.USE_TESSERACT=True or OCR.USE_DOCTR=True or OCR.USE_TEXTRACT=True "
120
+ "and set the other two to False. Only one OCR system can be activated."
121
+ )
121
122
 
122
123
 
123
124
  def build_detector(
@@ -231,9 +232,13 @@ def build_ocr(cfg: AttrDict) -> Union[TesseractOcrDetector, DoctrTextRecognizer,
231
232
  weights = cfg.OCR.WEIGHTS.DOCTR_RECOGNITION.TF if cfg.LIB == "TF" else cfg.OCR.WEIGHTS.DOCTR_RECOGNITION.PT
232
233
  weights_path = ModelDownloadManager.maybe_download_weights_and_configs(weights)
233
234
  profile = ModelCatalog.get_profile(weights)
235
+ # get_full_path_configs will complete the path even if the model is not registered
236
+ config_path = ModelCatalog.get_full_path_configs(weights) if profile.config is not None else None
234
237
  if profile.architecture is None:
235
238
  raise ValueError("model profile.architecture must be specified")
236
- return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE, lib=cfg.LIB)
239
+ return DoctrTextRecognizer(
240
+ profile.architecture, weights_path, cfg.DEVICE, lib=cfg.LIB, path_config_json=config_path
241
+ )
237
242
  if cfg.OCR.USE_TEXTRACT:
238
243
  credentials_kwargs = {
239
244
  "aws_access_key_id": environ.get("ACCESS_KEY"),
@@ -445,7 +450,7 @@ def get_dd_analyzer(
445
450
  cfg.update_args(config_overwrite)
446
451
 
447
452
  config_sanity_checks(cfg)
448
- logger.info("Config: \n %s", str(cfg), cfg.to_dict())
453
+ logger.info(LoggingRecord(f"Config: \n {str(cfg)}", cfg.to_dict())) # type: ignore
449
454
 
450
455
  # will silent all TP logging while building the tower
451
456
  if tensorpack_available():
@@ -17,25 +17,6 @@ from typing import Any, Iterator, no_type_check
17
17
  from ..utils.utils import get_rng
18
18
 
19
19
 
20
- class DataFlowTerminated(BaseException):
21
- """
22
- An exception indicating that the DataFlow is unable to produce any more
23
- data, i.e. something wrong happened so that calling `__iter__`
24
- cannot give a valid iterator anymore.
25
- In most DataFlow this will never be raised.
26
- """
27
-
28
-
29
- class DataFlowResetStateNotCalled(BaseException):
30
- """
31
- An exception indicating that `reset_state()` has not been called before starting
32
- iteration.
33
- """
34
-
35
- def __init__(self) -> None:
36
- super().__init__("Iterating a dataflow requires .reset_state() to be called first")
37
-
38
-
39
20
  class DataFlowReentrantGuard:
40
21
  """
41
22
  A tool to enforce non-reentrancy.
@@ -25,10 +25,11 @@ from typing import Any, Callable, Iterable, Iterator, List, Optional
25
25
 
26
26
  import numpy as np
27
27
 
28
- from ..utils.logger import logger
28
+ from ..utils.error import DataFlowResetStateNotCalledError
29
+ from ..utils.logger import LoggingRecord, logger
29
30
  from ..utils.tqdm import get_tqdm
30
31
  from ..utils.utils import get_rng
31
- from .base import DataFlow, DataFlowReentrantGuard, DataFlowResetStateNotCalled, ProxyDataFlow
32
+ from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
32
33
  from .serialize import DataFromIterable, DataFromList
33
34
 
34
35
  __all__ = ["CacheData", "CustomDataFromList", "CustomDataFromIterable"]
@@ -65,7 +66,7 @@ class CacheData(ProxyDataFlow):
65
66
 
66
67
  def __iter__(self) -> Iterator[Any]:
67
68
  if self._guard is None:
68
- raise DataFlowResetStateNotCalled()
69
+ raise DataFlowResetStateNotCalledError()
69
70
 
70
71
  with self._guard:
71
72
  if self.buffer:
@@ -139,10 +140,10 @@ class CustomDataFromList(DataFromList):
139
140
 
140
141
  def __iter__(self) -> Iterator[Any]:
141
142
  if self.rng is None:
142
- raise DataFlowResetStateNotCalled()
143
+ raise DataFlowResetStateNotCalledError()
143
144
  if self.rebalance_func is not None:
144
145
  lst_tmp = self.rebalance_func(self.lst)
145
- logger.info("subset size after re-balancing: %s", len(lst_tmp))
146
+ logger.info(LoggingRecord(f"CustomDataFromList: subset size after re-balancing: {len(lst_tmp)}"))
146
147
  else:
147
148
  lst_tmp = self.lst
148
149
 
@@ -23,16 +23,20 @@ import itertools
23
23
  import json
24
24
  import os
25
25
  from collections import defaultdict
26
+ from pathlib import Path
26
27
  from typing import DefaultDict, Dict, List, Optional, Sequence, Union
27
28
 
28
29
  from jsonlines import Reader, Writer
30
+ from tabulate import tabulate
31
+ from termcolor import colored
29
32
 
30
33
  from ..utils.context import timed_operation
31
34
  from ..utils.detection_types import JsonDict, Pathlike
35
+ from ..utils.error import FileExtensionError
32
36
  from ..utils.identifier import get_uuid_from_str
33
37
  from ..utils.pdf_utils import PDFStreamer
34
38
  from ..utils.tqdm import get_tqdm
35
- from ..utils.utils import FileExtensionError, is_file_extension
39
+ from ..utils.utils import is_file_extension
36
40
  from .base import DataFlow
37
41
  from .common import FlattenData, JoinData, MapData
38
42
  from .custom import CacheData, CustomDataFromIterable, CustomDataFromList
@@ -186,6 +190,11 @@ class SerializerFiles:
186
190
  df2: DataFlow
187
191
  df3: DataFlow
188
192
 
193
+ if isinstance(path, str):
194
+ path = Path(path)
195
+ if not path.exists():
196
+ raise NotADirectoryError(f"The path {path} to the directory or file does not exist")
197
+
189
198
  if shuffle:
190
199
  sort = False
191
200
  it1 = os.walk(path, topdown=False)
@@ -217,7 +226,7 @@ class SerializerFiles:
217
226
  """
218
227
  Not implemented
219
228
  """
220
- raise NotImplementedError
229
+ raise NotImplementedError()
221
230
 
222
231
 
223
232
  class CocoParser:
@@ -277,8 +286,14 @@ class CocoParser:
277
286
  """
278
287
  Print information about the annotation file.
279
288
  """
289
+ rows = []
280
290
  for key, value in self.dataset["info"].items():
281
- print(f"{key}: {value}")
291
+ row = [key, value]
292
+ rows.append(row)
293
+
294
+ header = ["key", "value"]
295
+ table = tabulate(rows, headers=header, tablefmt="fancy_grid", stralign="left", numalign="left")
296
+ print(colored(table, "cyan"))
282
297
 
283
298
  def get_ann_ids(
284
299
  self,
@@ -493,7 +508,7 @@ class SerializerCoco:
493
508
  """
494
509
  Not implemented
495
510
  """
496
- raise NotImplementedError
511
+ raise NotImplementedError()
497
512
 
498
513
 
499
514
  class SerializerPdfDoc:
@@ -541,7 +556,7 @@ class SerializerPdfDoc:
541
556
  """
542
557
  Not implemented
543
558
  """
544
- raise NotImplementedError
559
+ raise NotImplementedError()
545
560
 
546
561
  @staticmethod
547
562
  def split(path: Pathlike, path_target: Optional[Pathlike] = None, max_datapoint: Optional[int] = None) -> None:
@@ -28,8 +28,9 @@ from typing import Any, Callable, Iterator, List, no_type_check
28
28
  import zmq
29
29
 
30
30
  from ..utils.concurrency import StoppableThread, enable_death_signal, start_proc_mask_signal
31
- from ..utils.logger import logger
32
- from .base import DataFlow, DataFlowReentrantGuard, DataFlowTerminated, ProxyDataFlow
31
+ from ..utils.error import DataFlowTerminatedError
32
+ from ..utils.logger import LoggingRecord, logger
33
+ from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
33
34
  from .common import RepeatedData
34
35
  from .serialize import PickleSerializer
35
36
 
@@ -48,15 +49,15 @@ def _zmq_catch_error(name):
48
49
  try:
49
50
  yield
50
51
  except zmq.ContextTerminated as exc:
51
- logger.info("[%s] Context terminated.", name)
52
- raise DataFlowTerminated() from exc
52
+ logger.info(LoggingRecord(f"_zmq_catch_error: [{name}] Context terminated."))
53
+ raise DataFlowTerminatedError() from exc
53
54
  except zmq.ZMQError as exc:
54
55
  if exc.errno == errno.ENOTSOCK: # socket closed
55
- logger.info("[%s] Socket closed.", name)
56
- raise DataFlowTerminated() from exc
57
- raise ValueError from exc
56
+ logger.info(LoggingRecord(f"_zmq_catch_error: [{name}] Socket closed."))
57
+ raise DataFlowTerminatedError() from exc
58
+ raise ValueError() from exc
58
59
  except Exception as exc:
59
- raise ValueError from exc
60
+ raise ValueError() from exc
60
61
 
61
62
 
62
63
  @no_type_check
@@ -78,8 +79,8 @@ def _get_pipe_name(name):
78
79
  class _ParallelMapData(ProxyDataFlow, ABC):
79
80
  def __init__(self, df: DataFlow, buffer_size: int, strict: bool = False) -> None:
80
81
  super().__init__(df)
81
- if not buffer_size:
82
- raise ValueError("buffer_size must be a positive number")
82
+ if buffer_size <= 0:
83
+ raise ValueError(f"buffer_size must be a positive number, got {buffer_size}")
83
84
  self._buffer_size = buffer_size
84
85
  self._buffer_occupancy = 0 # actual #elements in buffer, only useful in strict mode
85
86
  self._strict = strict
@@ -95,12 +96,12 @@ class _ParallelMapData(ProxyDataFlow, ABC):
95
96
  @no_type_check
96
97
  @abstractmethod
97
98
  def _recv(self):
98
- raise NotImplementedError
99
+ raise NotImplementedError()
99
100
 
100
101
  @no_type_check
101
102
  @abstractmethod
102
103
  def _send(self, dp: Any):
103
- raise NotImplementedError
104
+ raise NotImplementedError()
104
105
 
105
106
  @no_type_check
106
107
  def _recv_filter_none(self):
@@ -312,7 +313,8 @@ class _MultiProcessZMQDataFlow(DataFlow, ABC):
312
313
  for x in self._procs:
313
314
  x.terminate()
314
315
  x.join(5)
315
- logger.info("%s successfully cleaned-up.", type(self).__name__)
316
+ logger.info(LoggingRecord(f"_MultiProcessZMQDataFlow [{type(self).__name__}] successfully cleaned-up."))
317
+
316
318
  except Exception: # pylint: disable=W0703
317
319
  pass
318
320
 
@@ -323,9 +325,12 @@ def _bind_guard(sock, name):
323
325
  sock.bind(name)
324
326
  except zmq.ZMQError:
325
327
  logger.error(
326
- "ZMQError in socket.bind('{name}'). Perhaps you're using pipes on a non-local file system. "
327
- "See documentation of MultiProcessRunnerZMQ for more information."
328
+ LoggingRecord(
329
+ f"ZMQError in socket.bind('{name}'). Perhaps you're using pipes on a non-local file system. "
330
+ "See documentation of MultiProcessRunnerZMQ for more information."
331
+ )
328
332
  )
333
+
329
334
  raise
330
335
 
331
336
 
@@ -394,8 +399,8 @@ class MultiProcessMapData(_ParallelMapData, _MultiProcessZMQDataFlow):
394
399
 
395
400
  _ParallelMapData.__init__(self, df, buffer_size, strict)
396
401
  _MultiProcessZMQDataFlow.__init__(self)
397
- if not num_proc:
398
- raise ValueError("num_proc must be a positive number")
402
+ if num_proc <= 0:
403
+ raise ValueError(f"num_proc must be a positive number, got {num_proc}")
399
404
  self.num_proc = num_proc
400
405
  self.map_func = map_func
401
406
  self._strict = strict
@@ -16,7 +16,8 @@ from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
16
16
 
17
17
  import numpy as np
18
18
 
19
- from .base import DataFlow, DataFlowResetStateNotCalled, RNGDataFlow
19
+ from ..utils.error import DataFlowResetStateNotCalledError
20
+ from .base import DataFlow, RNGDataFlow
20
21
 
21
22
 
22
23
  class DataFromList(RNGDataFlow):
@@ -44,7 +45,7 @@ class DataFromList(RNGDataFlow):
44
45
  for k in idxs:
45
46
  yield self.lst[k]
46
47
  else:
47
- raise DataFlowResetStateNotCalled()
48
+ raise DataFlowResetStateNotCalledError()
48
49
 
49
50
 
50
51
  class DataFromIterable(DataFlow):
@@ -63,7 +64,7 @@ class DataFromIterable(DataFlow):
63
64
 
64
65
  def __len__(self) -> int:
65
66
  if self._len is None:
66
- raise NotImplementedError
67
+ raise NotImplementedError()
67
68
  return self._len
68
69
 
69
70
  def __iter__(self) -> Iterator[Any]:
@@ -107,7 +108,7 @@ class FakeData(RNGDataFlow):
107
108
 
108
109
  def __iter__(self) -> Iterator[Any]:
109
110
  if self.rng is None:
110
- raise DataFlowResetStateNotCalled()
111
+ raise DataFlowResetStateNotCalledError()
111
112
  if self.random:
112
113
  for _ in range(self._size):
113
114
  val = []
@@ -23,7 +23,7 @@ from typing import Any, Optional, Tuple, Union
23
23
  import numpy as np
24
24
  import numpy.typing as npt
25
25
 
26
- from ..utils.logger import logger
26
+ from ..utils.logger import LoggingRecord, logger
27
27
  from ..utils.tqdm import get_tqdm
28
28
  from .base import DataFlow, ProxyDataFlow
29
29
 
@@ -95,7 +95,7 @@ class MeanFromDataFlow(ProxyDataFlow):
95
95
  self.df.reset_state()
96
96
  itr = iter(self.df)
97
97
 
98
- logger.info("Calculating mean")
98
+ logger.info(LoggingRecord("Calculating mean"))
99
99
 
100
100
  len_df: Optional[int]
101
101
  try:
@@ -139,7 +139,7 @@ class MeanFromDataFlow(ProxyDataFlow):
139
139
  if n == self.max_datapoints:
140
140
  break
141
141
 
142
- logger.info("Mean from %s datapoints along axis %s: %s", n, self.axis, self.mean)
142
+ logger.info(LoggingRecord(f"Mean from {n} datapoints along axis {self.axis}: {self.mean}"))
143
143
 
144
144
  return self.mean
145
145
 
@@ -216,7 +216,7 @@ class StdFromDataFlow(ProxyDataFlow):
216
216
  self.df.reset_state()
217
217
  itr = iter(self.df)
218
218
 
219
- logger.info("Calculating standard deviation")
219
+ logger.info(LoggingRecord("Calculating standard deviation"))
220
220
  try:
221
221
  len_df = len(self.df)
222
222
  except NotImplementedError:
@@ -266,6 +266,6 @@ class StdFromDataFlow(ProxyDataFlow):
266
266
  var = (ex2 - (ex * ex) / n) / (n - 1)
267
267
  self.std = np.sqrt(var)
268
268
 
269
- logger.info("Standard deviation from %s datapoints along axis %s: %s", n, self.axis, self.std)
269
+ logger.info(LoggingRecord(f"Standard deviation from {n} datapoints along axis {self.axis}: {self.std}"))
270
270
 
271
271
  return self.std