deepdoctection 0.30__tar.gz → 0.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (238) hide show
  1. {deepdoctection-0.30 → deepdoctection-0.31}/PKG-INFO +33 -58
  2. {deepdoctection-0.30 → deepdoctection-0.31}/README.md +3 -1
  3. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/__init__.py +4 -2
  4. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/analyzer/dd.py +6 -5
  5. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/base.py +0 -19
  6. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/custom.py +4 -3
  7. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/custom_serialize.py +14 -5
  8. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/parallel_map.py +12 -11
  9. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/serialize.py +5 -4
  10. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/annotation.py +33 -12
  11. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/box.py +1 -4
  12. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/convert.py +3 -1
  13. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/image.py +66 -29
  14. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/view.py +57 -25
  15. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/adapter.py +1 -1
  16. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/base.py +83 -10
  17. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/dataflow_builder.py +1 -1
  18. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/info.py +2 -2
  19. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/layouttest.py +2 -7
  20. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/accmetric.py +1 -1
  21. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/base.py +5 -4
  22. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/eval.py +2 -2
  23. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/tp_eval_callback.py +5 -4
  24. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/base.py +39 -13
  25. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/d2detect.py +164 -64
  26. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/deskew.py +32 -7
  27. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/doctrocr.py +227 -39
  28. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/fastlang.py +45 -7
  29. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/hfdetr.py +90 -33
  30. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/hflayoutlm.py +109 -22
  31. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/pdftext.py +2 -1
  32. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/pt/ptutils.py +3 -2
  33. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tessocr.py +134 -22
  34. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/texocr.py +2 -0
  35. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpcompat.py +4 -4
  36. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/preproc.py +2 -7
  37. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tpdetect.py +50 -23
  38. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/d2struct.py +1 -1
  39. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/hfstruct.py +1 -1
  40. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/laylmstruct.py +1 -1
  41. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/maputils.py +13 -2
  42. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/prodigystruct.py +1 -1
  43. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/pubstruct.py +10 -10
  44. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/tpstruct.py +1 -1
  45. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/anngen.py +35 -8
  46. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/base.py +53 -19
  47. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/cell.py +29 -8
  48. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/common.py +12 -4
  49. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/doctectionpipe.py +2 -2
  50. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/language.py +3 -2
  51. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/layout.py +3 -2
  52. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/lm.py +2 -2
  53. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/refine.py +18 -10
  54. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/segment.py +21 -16
  55. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/text.py +14 -8
  56. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/transform.py +16 -9
  57. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/d2_frcnn_train.py +15 -12
  58. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/hf_detr_train.py +8 -6
  59. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/hf_layoutlm_train.py +16 -11
  60. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/__init__.py +3 -0
  61. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/concurrency.py +1 -1
  62. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/context.py +2 -2
  63. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/env_info.py +55 -22
  64. deepdoctection-0.31/deepdoctection/utils/error.py +84 -0
  65. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/file_utils.py +4 -15
  66. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/fs.py +7 -7
  67. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/pdf_utils.py +5 -4
  68. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/settings.py +5 -1
  69. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/transform.py +1 -1
  70. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/utils.py +0 -6
  71. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/viz.py +44 -2
  72. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/PKG-INFO +33 -58
  73. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/SOURCES.txt +1 -0
  74. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/requires.txt +29 -57
  75. {deepdoctection-0.30 → deepdoctection-0.31}/setup.py +20 -21
  76. {deepdoctection-0.30 → deepdoctection-0.31}/tests/conftest.py +0 -1
  77. {deepdoctection-0.30 → deepdoctection-0.31}/tests/data.py +88 -48
  78. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_image.py +50 -4
  79. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_funsd.py +2 -2
  80. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/test_info.py +1 -1
  81. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/conftest.py +10 -1
  82. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/data.py +2 -0
  83. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_deskew.py +13 -3
  84. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_doctrocr.py +46 -2
  85. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_hfdetr.py +2 -2
  86. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_tessocr.py +62 -3
  87. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/data.py +6 -6
  88. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_anngen.py +6 -6
  89. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_cell.py +21 -0
  90. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_layout.py +1 -0
  91. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_text.py +4 -0
  92. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_transform.py +2 -0
  93. {deepdoctection-0.30 → deepdoctection-0.31}/tests_d2/test_d2detect.py +1 -1
  94. {deepdoctection-0.30 → deepdoctection-0.31}/LICENSE +0 -0
  95. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/analyzer/__init__.py +0 -0
  96. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/configs/__init__.py +0 -0
  97. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  98. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  99. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/__init__.py +0 -0
  100. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/common.py +0 -0
  101. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/stats.py +0 -0
  102. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/__init__.py +0 -0
  103. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/__init__.py +0 -0
  104. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/__init__.py +0 -0
  105. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  106. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  107. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/funsd.py +0 -0
  108. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  109. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/publaynet.py +0 -0
  110. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  111. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  112. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  113. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/xfund.py +0 -0
  114. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  115. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  116. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/registry.py +0 -0
  117. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/save.py +0 -0
  118. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/__init__.py +0 -0
  119. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/cocometric.py +0 -0
  120. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/registry.py +0 -0
  121. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/tedsmetric.py +0 -0
  122. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/__init__.py +0 -0
  123. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/model.py +0 -0
  124. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/pt/__init__.py +0 -0
  125. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/pt/nms.py +0 -0
  126. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/__init__.py +0 -0
  127. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tfutils.py +0 -0
  128. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  129. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  130. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  131. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  132. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  133. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  134. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  135. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  136. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  137. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  138. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  139. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  140. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  141. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  142. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  143. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  144. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  145. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/__init__.py +0 -0
  146. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/cats.py +0 -0
  147. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/cocostruct.py +0 -0
  148. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/match.py +0 -0
  149. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/misc.py +0 -0
  150. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/pascalstruct.py +0 -0
  151. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/xfundstruct.py +0 -0
  152. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/__init__.py +0 -0
  153. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/concurrency.py +0 -0
  154. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/order.py +0 -0
  155. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/registry.py +0 -0
  156. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/py.typed +0 -0
  157. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/__init__.py +0 -0
  158. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/tp_frcnn_train.py +0 -0
  159. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/detection_types.py +0 -0
  160. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/develop.py +0 -0
  161. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/identifier.py +0 -0
  162. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/logger.py +0 -0
  163. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/metacfg.py +0 -0
  164. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/tqdm.py +0 -0
  165. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/dependency_links.txt +0 -0
  166. {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/top_level.txt +0 -0
  167. {deepdoctection-0.30 → deepdoctection-0.31}/setup.cfg +0 -0
  168. {deepdoctection-0.30 → deepdoctection-0.31}/tests/__init__.py +0 -0
  169. {deepdoctection-0.30 → deepdoctection-0.31}/tests/analyzer/__init__.py +0 -0
  170. {deepdoctection-0.30 → deepdoctection-0.31}/tests/analyzer/test_dd.py +0 -0
  171. {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/__init__.py +0 -0
  172. {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/conftest.py +0 -0
  173. {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_common.py +0 -0
  174. {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_custom.py +0 -0
  175. {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_custom_serialize.py +0 -0
  176. {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_parallel_map.py +0 -0
  177. {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_stats.py +0 -0
  178. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/__init__.py +0 -0
  179. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/conftest.py +0 -0
  180. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_annotation.py +0 -0
  181. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_box.py +0 -0
  182. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_convert.py +0 -0
  183. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_view.py +0 -0
  184. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/__init__.py +0 -0
  185. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/__init__.py +0 -0
  186. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/conftest.py +0 -0
  187. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_doclaynet.py +0 -0
  188. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_fintabnet.py +0 -0
  189. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_iiitar13k.py +0 -0
  190. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_layouttest.py +0 -0
  191. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_publaynet.py +0 -0
  192. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_pubtables1m.py +0 -0
  193. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_pubtabnet.py +0 -0
  194. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_rvlcdip.py +0 -0
  195. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/test_adapter.py +0 -0
  196. {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/test_registry.py +0 -0
  197. {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/__init__.py +0 -0
  198. {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/conftest.py +0 -0
  199. {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_accmetric.py +0 -0
  200. {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_cocometric.py +0 -0
  201. {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_eval.py +0 -0
  202. {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_registry.py +0 -0
  203. {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_tedsmetric.py +0 -0
  204. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/__init__.py +0 -0
  205. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_fastlang.py +0 -0
  206. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_hflayoutlm.py +0 -0
  207. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_pdftext.py +0 -0
  208. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_texocr.py +0 -0
  209. {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_tpdetect.py +0 -0
  210. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/__init__.py +0 -0
  211. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/conftest.py +0 -0
  212. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_cats.py +0 -0
  213. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_cocostruct.py +0 -0
  214. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_d2struct.py +0 -0
  215. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_hfstruct.py +0 -0
  216. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_iiitar13k.py +0 -0
  217. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_laylmstruct.py +0 -0
  218. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_misc.py +0 -0
  219. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_prodigystruct.py +0 -0
  220. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_pubstruct.py +0 -0
  221. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_tpstruct.py +0 -0
  222. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_utils.py +0 -0
  223. {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_xfundstruct.py +0 -0
  224. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/__init__.py +0 -0
  225. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_common.py +0 -0
  226. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_language.py +0 -0
  227. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_lm.py +0 -0
  228. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_order.py +0 -0
  229. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_refine.py +0 -0
  230. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_registry.py +0 -0
  231. {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_segment.py +0 -0
  232. {deepdoctection-0.30 → deepdoctection-0.31}/tests/test_utils.py +0 -0
  233. {deepdoctection-0.30 → deepdoctection-0.31}/tests/train/__init__.py +0 -0
  234. {deepdoctection-0.30 → deepdoctection-0.31}/tests/train/conftest.py +0 -0
  235. {deepdoctection-0.30 → deepdoctection-0.31}/tests/train/test_d2_frcnn_train.py +0 -0
  236. {deepdoctection-0.30 → deepdoctection-0.31}/tests/train/test_tp_frcnn_train.py +0 -0
  237. {deepdoctection-0.30 → deepdoctection-0.31}/tests_d2/__init__.py +0 -0
  238. {deepdoctection-0.30 → deepdoctection-0.31}/tests_d2/conftest.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.30
3
+ Version: 0.31
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -17,9 +17,9 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
17
  Requires-Python: >=3.8
18
18
  Description-Content-Type: text/markdown
19
19
  License-File: LICENSE
20
- Requires-Dist: catalogue==2.0.7
20
+ Requires-Dist: catalogue==2.0.10
21
21
  Requires-Dist: huggingface_hub>=0.12.0
22
- Requires-Dist: importlib-metadata>=4.11.2
22
+ Requires-Dist: importlib-metadata>=5.0.0
23
23
  Requires-Dist: jsonlines==3.1.0
24
24
  Requires-Dist: mock==4.0.3
25
25
  Requires-Dist: networkx>=2.7.1
@@ -27,15 +27,15 @@ Requires-Dist: numpy>=1.21
27
27
  Requires-Dist: packaging>=20.0
28
28
  Requires-Dist: Pillow>=10.0.0
29
29
  Requires-Dist: pypdf>=3.16.0
30
- Requires-Dist: pyyaml==6.0
30
+ Requires-Dist: pyyaml>=6.0.1
31
31
  Requires-Dist: pyzmq>=16
32
32
  Requires-Dist: termcolor>=1.1
33
33
  Requires-Dist: tabulate>=0.7.7
34
34
  Requires-Dist: tqdm==4.64.0
35
35
  Provides-Extra: tf
36
- Requires-Dist: catalogue==2.0.7; extra == "tf"
36
+ Requires-Dist: catalogue==2.0.10; extra == "tf"
37
37
  Requires-Dist: huggingface_hub>=0.12.0; extra == "tf"
38
- Requires-Dist: importlib-metadata>=4.11.2; extra == "tf"
38
+ Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
39
39
  Requires-Dist: jsonlines==3.1.0; extra == "tf"
40
40
  Requires-Dist: mock==4.0.3; extra == "tf"
41
41
  Requires-Dist: networkx>=2.7.1; extra == "tf"
@@ -43,12 +43,12 @@ Requires-Dist: numpy>=1.21; extra == "tf"
43
43
  Requires-Dist: packaging>=20.0; extra == "tf"
44
44
  Requires-Dist: Pillow>=10.0.0; extra == "tf"
45
45
  Requires-Dist: pypdf>=3.16.0; extra == "tf"
46
- Requires-Dist: pyyaml==6.0; extra == "tf"
46
+ Requires-Dist: pyyaml>=6.0.1; extra == "tf"
47
47
  Requires-Dist: pyzmq>=16; extra == "tf"
48
48
  Requires-Dist: termcolor>=1.1; extra == "tf"
49
49
  Requires-Dist: tabulate>=0.7.7; extra == "tf"
50
50
  Requires-Dist: tqdm==4.64.0; extra == "tf"
51
- Requires-Dist: tensorpack; extra == "tf"
51
+ Requires-Dist: tensorpack==0.11; extra == "tf"
52
52
  Requires-Dist: protobuf==3.20.1; extra == "tf"
53
53
  Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
54
54
  Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
@@ -56,15 +56,15 @@ Requires-Dist: python-doctr==0.7.0; extra == "tf"
56
56
  Requires-Dist: pycocotools>=2.0.2; extra == "tf"
57
57
  Requires-Dist: boto3; extra == "tf"
58
58
  Requires-Dist: pdfplumber>=0.7.1; extra == "tf"
59
- Requires-Dist: fasttext; extra == "tf"
60
- Requires-Dist: jdeskew; extra == "tf"
59
+ Requires-Dist: fasttext==0.9.2; extra == "tf"
60
+ Requires-Dist: jdeskew>=0.2.2; extra == "tf"
61
61
  Requires-Dist: apted==1.0.3; extra == "tf"
62
62
  Requires-Dist: distance==0.1.3; extra == "tf"
63
63
  Requires-Dist: lxml>=4.9.1; extra == "tf"
64
64
  Provides-Extra: pt
65
- Requires-Dist: catalogue==2.0.7; extra == "pt"
65
+ Requires-Dist: catalogue==2.0.10; extra == "pt"
66
66
  Requires-Dist: huggingface_hub>=0.12.0; extra == "pt"
67
- Requires-Dist: importlib-metadata>=4.11.2; extra == "pt"
67
+ Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
68
68
  Requires-Dist: jsonlines==3.1.0; extra == "pt"
69
69
  Requires-Dist: mock==4.0.3; extra == "pt"
70
70
  Requires-Dist: networkx>=2.7.1; extra == "pt"
@@ -72,31 +72,31 @@ Requires-Dist: numpy>=1.21; extra == "pt"
72
72
  Requires-Dist: packaging>=20.0; extra == "pt"
73
73
  Requires-Dist: Pillow>=10.0.0; extra == "pt"
74
74
  Requires-Dist: pypdf>=3.16.0; extra == "pt"
75
- Requires-Dist: pyyaml==6.0; extra == "pt"
75
+ Requires-Dist: pyyaml>=6.0.1; extra == "pt"
76
76
  Requires-Dist: pyzmq>=16; extra == "pt"
77
77
  Requires-Dist: termcolor>=1.1; extra == "pt"
78
78
  Requires-Dist: tabulate>=0.7.7; extra == "pt"
79
79
  Requires-Dist: tqdm==4.64.0; extra == "pt"
80
- Requires-Dist: timm; extra == "pt"
80
+ Requires-Dist: timm>=0.9.16; extra == "pt"
81
81
  Requires-Dist: transformers>=4.36.0; extra == "pt"
82
- Requires-Dist: accelerate; extra == "pt"
82
+ Requires-Dist: accelerate>=0.29.1; extra == "pt"
83
83
  Requires-Dist: python-doctr==0.7.0; extra == "pt"
84
84
  Requires-Dist: boto3; extra == "pt"
85
85
  Requires-Dist: pdfplumber>=0.7.1; extra == "pt"
86
- Requires-Dist: fasttext; extra == "pt"
87
- Requires-Dist: jdeskew; extra == "pt"
86
+ Requires-Dist: fasttext==0.9.2; extra == "pt"
87
+ Requires-Dist: jdeskew>=0.2.2; extra == "pt"
88
88
  Requires-Dist: apted==1.0.3; extra == "pt"
89
89
  Requires-Dist: distance==0.1.3; extra == "pt"
90
90
  Requires-Dist: lxml>=4.9.1; extra == "pt"
91
91
  Provides-Extra: docs
92
- Requires-Dist: tensorpack; extra == "docs"
92
+ Requires-Dist: tensorpack==0.11; extra == "docs"
93
93
  Requires-Dist: boto3; extra == "docs"
94
94
  Requires-Dist: transformers>=4.36.0; extra == "docs"
95
- Requires-Dist: accelerate; extra == "docs"
95
+ Requires-Dist: accelerate>=0.29.1; extra == "docs"
96
96
  Requires-Dist: pdfplumber>=0.7.1; extra == "docs"
97
97
  Requires-Dist: lxml>=4.9.1; extra == "docs"
98
- Requires-Dist: lxml-stubs; extra == "docs"
99
- Requires-Dist: jdeskew; extra == "docs"
98
+ Requires-Dist: lxml-stubs>=0.5.1; extra == "docs"
99
+ Requires-Dist: jdeskew>=0.2.2; extra == "docs"
100
100
  Requires-Dist: jinja2==3.0.3; extra == "docs"
101
101
  Requires-Dist: mkdocs-material; extra == "docs"
102
102
  Requires-Dist: mkdocstrings-python; extra == "docs"
@@ -105,47 +105,20 @@ Provides-Extra: dev
105
105
  Requires-Dist: python-dotenv==1.0.0; extra == "dev"
106
106
  Requires-Dist: click; extra == "dev"
107
107
  Requires-Dist: black==23.7.0; extra == "dev"
108
- Requires-Dist: isort; extra == "dev"
108
+ Requires-Dist: isort==5.13.2; extra == "dev"
109
109
  Requires-Dist: pylint==2.17.4; extra == "dev"
110
110
  Requires-Dist: mypy==1.4.1; extra == "dev"
111
111
  Requires-Dist: wandb; extra == "dev"
112
- Requires-Dist: types-PyYAML; extra == "dev"
113
- Requires-Dist: types-termcolor==1.1.3; extra == "dev"
114
- Requires-Dist: types-tabulate; extra == "dev"
115
- Requires-Dist: types-tqdm; extra == "dev"
116
- Requires-Dist: lxml-stubs; extra == "dev"
117
- Requires-Dist: types-Pillow; extra == "dev"
118
- Requires-Dist: types-urllib3; extra == "dev"
112
+ Requires-Dist: types-PyYAML>=6.0.12.12; extra == "dev"
113
+ Requires-Dist: types-termcolor>=1.1.3; extra == "dev"
114
+ Requires-Dist: types-tabulate>=0.9.0.3; extra == "dev"
115
+ Requires-Dist: types-tqdm>=4.66.0.5; extra == "dev"
116
+ Requires-Dist: lxml-stubs>=0.5.1; extra == "dev"
117
+ Requires-Dist: types-Pillow>=10.2.0.20240406; extra == "dev"
118
+ Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
119
119
  Provides-Extra: test
120
- Requires-Dist: pytest; extra == "test"
120
+ Requires-Dist: pytest==8.0.2; extra == "test"
121
121
  Requires-Dist: pytest-cov; extra == "test"
122
- Provides-Extra: hf
123
- Requires-Dist: catalogue==2.0.7; extra == "hf"
124
- Requires-Dist: huggingface_hub>=0.12.0; extra == "hf"
125
- Requires-Dist: importlib-metadata>=4.11.2; extra == "hf"
126
- Requires-Dist: jsonlines==3.1.0; extra == "hf"
127
- Requires-Dist: mock==4.0.3; extra == "hf"
128
- Requires-Dist: networkx>=2.7.1; extra == "hf"
129
- Requires-Dist: numpy>=1.21; extra == "hf"
130
- Requires-Dist: packaging>=20.0; extra == "hf"
131
- Requires-Dist: Pillow>=10.0.0; extra == "hf"
132
- Requires-Dist: pypdf>=3.16.0; extra == "hf"
133
- Requires-Dist: pyyaml==6.0; extra == "hf"
134
- Requires-Dist: pyzmq>=16; extra == "hf"
135
- Requires-Dist: termcolor>=1.1; extra == "hf"
136
- Requires-Dist: tabulate>=0.7.7; extra == "hf"
137
- Requires-Dist: tqdm==4.64.0; extra == "hf"
138
- Requires-Dist: timm; extra == "hf"
139
- Requires-Dist: transformers>=4.36.0; extra == "hf"
140
- Requires-Dist: accelerate; extra == "hf"
141
- Requires-Dist: python-doctr==0.7.0; extra == "hf"
142
- Requires-Dist: boto3; extra == "hf"
143
- Requires-Dist: pdfplumber>=0.7.1; extra == "hf"
144
- Requires-Dist: fasttext; extra == "hf"
145
- Requires-Dist: jdeskew; extra == "hf"
146
- Requires-Dist: apted==1.0.3; extra == "hf"
147
- Requires-Dist: distance==0.1.3; extra == "hf"
148
- Requires-Dist: lxml>=4.9.1; extra == "hf"
149
122
 
150
123
 
151
124
  <p align="center">
@@ -191,7 +164,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
191
164
  Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
192
165
  [docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
193
166
  - Document layout analysis and table recognition now runs with Torchscript (CPU) as well and Detectron2 is
194
- not required anymore for basic inference.
167
+ not required anymore for basic inference.
168
+ - [**new**] More angle predictors for determining the rotation of a document based on Tesseract and DocTr
169
+ (not contained in the built-in Analyzer).
195
170
 
196
171
  **deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
197
172
  post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
@@ -42,7 +42,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
42
42
  Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
43
43
  [docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
44
44
  - Document layout analysis and table recognition now runs with Torchscript (CPU) as well and Detectron2 is
45
- not required anymore for basic inference.
45
+ not required anymore for basic inference.
46
+ - [**new**] More angle predictors for determining the rotation of a document based on Tesseract and DocTr
47
+ (not contained in the built-in Analyzer).
46
48
 
47
49
  **deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
48
50
  post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
@@ -27,7 +27,7 @@ from .utils.logger import logger
27
27
 
28
28
  # pylint: enable=wrong-import-position
29
29
 
30
- __version__ = 0.30
30
+ __version__ = 0.31
31
31
 
32
32
  _IMPORT_STRUCTURE = {
33
33
  "analyzer": [
@@ -179,6 +179,7 @@ _IMPORT_STRUCTURE = {
179
179
  "Jdeskewer",
180
180
  "DoctrTextlineDetector",
181
181
  "DoctrTextRecognizer",
182
+ "DocTrRotationTransformer",
182
183
  "FasttextLangDetector",
183
184
  "HFDetrDerivedDetector",
184
185
  "HFLayoutLmTokenClassifierBase",
@@ -194,6 +195,7 @@ _IMPORT_STRUCTURE = {
194
195
  "ModelDownloadManager",
195
196
  "PdfPlumberTextDetector",
196
197
  "TesseractOcrDetector",
198
+ "TesseractRotationTransformer",
197
199
  "TextractOcrDetector",
198
200
  "TPFrcnnDetector",
199
201
  ],
@@ -279,7 +281,7 @@ _IMPORT_STRUCTURE = {
279
281
  "PubtablesSegmentationService",
280
282
  "SegmentationResult",
281
283
  "TextExtractionService",
282
- "SimpleTransformPipelineComponent",
284
+ "SimpleTransformService",
283
285
  ],
284
286
  "train": [
285
287
  "D2Trainer",
@@ -113,11 +113,12 @@ def config_sanity_checks(cfg: AttrDict) -> None:
113
113
  """Some config sanity checks"""
114
114
  if cfg.USE_PDF_MINER and cfg.USE_OCR and cfg.OCR.USE_DOCTR:
115
115
  raise ValueError("Configuration USE_PDF_MINER= True and USE_OCR=True and USE_DOCTR=True is not allowed")
116
- if cfg.OCR.USE_TESSERACT + cfg.OCR.USE_DOCTR + cfg.OCR.USE_TEXTRACT != 1:
117
- raise ValueError(
118
- "Choose either OCR.USE_TESSERACT=True or OCR.USE_DOCTR=True or OCR.USE_TEXTRACT=True and set the other two "
119
- "to False. Only one OCR system can be activated."
120
- )
116
+ if cfg.USE_OCR:
117
+ if cfg.OCR.USE_TESSERACT + cfg.OCR.USE_DOCTR + cfg.OCR.USE_TEXTRACT != 1:
118
+ raise ValueError(
119
+ "Choose either OCR.USE_TESSERACT=True or OCR.USE_DOCTR=True or OCR.USE_TEXTRACT=True "
120
+ "and set the other two to False. Only one OCR system can be activated."
121
+ )
121
122
 
122
123
 
123
124
  def build_detector(
@@ -17,25 +17,6 @@ from typing import Any, Iterator, no_type_check
17
17
  from ..utils.utils import get_rng
18
18
 
19
19
 
20
- class DataFlowTerminated(BaseException):
21
- """
22
- An exception indicating that the DataFlow is unable to produce any more
23
- data, i.e. something wrong happened so that calling `__iter__`
24
- cannot give a valid iterator anymore.
25
- In most DataFlow this will never be raised.
26
- """
27
-
28
-
29
- class DataFlowResetStateNotCalled(BaseException):
30
- """
31
- An exception indicating that `reset_state()` has not been called before starting
32
- iteration.
33
- """
34
-
35
- def __init__(self) -> None:
36
- super().__init__("Iterating a dataflow requires .reset_state() to be called first")
37
-
38
-
39
20
  class DataFlowReentrantGuard:
40
21
  """
41
22
  A tool to enforce non-reentrancy.
@@ -25,10 +25,11 @@ from typing import Any, Callable, Iterable, Iterator, List, Optional
25
25
 
26
26
  import numpy as np
27
27
 
28
+ from ..utils.error import DataFlowResetStateNotCalledError
28
29
  from ..utils.logger import LoggingRecord, logger
29
30
  from ..utils.tqdm import get_tqdm
30
31
  from ..utils.utils import get_rng
31
- from .base import DataFlow, DataFlowReentrantGuard, DataFlowResetStateNotCalled, ProxyDataFlow
32
+ from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
32
33
  from .serialize import DataFromIterable, DataFromList
33
34
 
34
35
  __all__ = ["CacheData", "CustomDataFromList", "CustomDataFromIterable"]
@@ -65,7 +66,7 @@ class CacheData(ProxyDataFlow):
65
66
 
66
67
  def __iter__(self) -> Iterator[Any]:
67
68
  if self._guard is None:
68
- raise DataFlowResetStateNotCalled()
69
+ raise DataFlowResetStateNotCalledError()
69
70
 
70
71
  with self._guard:
71
72
  if self.buffer:
@@ -139,7 +140,7 @@ class CustomDataFromList(DataFromList):
139
140
 
140
141
  def __iter__(self) -> Iterator[Any]:
141
142
  if self.rng is None:
142
- raise DataFlowResetStateNotCalled()
143
+ raise DataFlowResetStateNotCalledError()
143
144
  if self.rebalance_func is not None:
144
145
  lst_tmp = self.rebalance_func(self.lst)
145
146
  logger.info(LoggingRecord(f"CustomDataFromList: subset size after re-balancing: {len(lst_tmp)}"))
@@ -27,13 +27,16 @@ from pathlib import Path
27
27
  from typing import DefaultDict, Dict, List, Optional, Sequence, Union
28
28
 
29
29
  from jsonlines import Reader, Writer
30
+ from tabulate import tabulate
31
+ from termcolor import colored
30
32
 
31
33
  from ..utils.context import timed_operation
32
34
  from ..utils.detection_types import JsonDict, Pathlike
35
+ from ..utils.error import FileExtensionError
33
36
  from ..utils.identifier import get_uuid_from_str
34
37
  from ..utils.pdf_utils import PDFStreamer
35
38
  from ..utils.tqdm import get_tqdm
36
- from ..utils.utils import FileExtensionError, is_file_extension
39
+ from ..utils.utils import is_file_extension
37
40
  from .base import DataFlow
38
41
  from .common import FlattenData, JoinData, MapData
39
42
  from .custom import CacheData, CustomDataFromIterable, CustomDataFromList
@@ -223,7 +226,7 @@ class SerializerFiles:
223
226
  """
224
227
  Not implemented
225
228
  """
226
- raise NotImplementedError
229
+ raise NotImplementedError()
227
230
 
228
231
 
229
232
  class CocoParser:
@@ -283,8 +286,14 @@ class CocoParser:
283
286
  """
284
287
  Print information about the annotation file.
285
288
  """
289
+ rows = []
286
290
  for key, value in self.dataset["info"].items():
287
- print(f"{key}: {value}")
291
+ row = [key, value]
292
+ rows.append(row)
293
+
294
+ header = ["key", "value"]
295
+ table = tabulate(rows, headers=header, tablefmt="fancy_grid", stralign="left", numalign="left")
296
+ print(colored(table, "cyan"))
288
297
 
289
298
  def get_ann_ids(
290
299
  self,
@@ -499,7 +508,7 @@ class SerializerCoco:
499
508
  """
500
509
  Not implemented
501
510
  """
502
- raise NotImplementedError
511
+ raise NotImplementedError()
503
512
 
504
513
 
505
514
  class SerializerPdfDoc:
@@ -547,7 +556,7 @@ class SerializerPdfDoc:
547
556
  """
548
557
  Not implemented
549
558
  """
550
- raise NotImplementedError
559
+ raise NotImplementedError()
551
560
 
552
561
  @staticmethod
553
562
  def split(path: Pathlike, path_target: Optional[Pathlike] = None, max_datapoint: Optional[int] = None) -> None:
@@ -28,8 +28,9 @@ from typing import Any, Callable, Iterator, List, no_type_check
28
28
  import zmq
29
29
 
30
30
  from ..utils.concurrency import StoppableThread, enable_death_signal, start_proc_mask_signal
31
+ from ..utils.error import DataFlowTerminatedError
31
32
  from ..utils.logger import LoggingRecord, logger
32
- from .base import DataFlow, DataFlowReentrantGuard, DataFlowTerminated, ProxyDataFlow
33
+ from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
33
34
  from .common import RepeatedData
34
35
  from .serialize import PickleSerializer
35
36
 
@@ -49,14 +50,14 @@ def _zmq_catch_error(name):
49
50
  yield
50
51
  except zmq.ContextTerminated as exc:
51
52
  logger.info(LoggingRecord(f"_zmq_catch_error: [{name}] Context terminated."))
52
- raise DataFlowTerminated() from exc
53
+ raise DataFlowTerminatedError() from exc
53
54
  except zmq.ZMQError as exc:
54
55
  if exc.errno == errno.ENOTSOCK: # socket closed
55
56
  logger.info(LoggingRecord(f"_zmq_catch_error: [{name}] Socket closed."))
56
- raise DataFlowTerminated() from exc
57
- raise ValueError from exc
57
+ raise DataFlowTerminatedError() from exc
58
+ raise ValueError() from exc
58
59
  except Exception as exc:
59
- raise ValueError from exc
60
+ raise ValueError() from exc
60
61
 
61
62
 
62
63
  @no_type_check
@@ -78,8 +79,8 @@ def _get_pipe_name(name):
78
79
  class _ParallelMapData(ProxyDataFlow, ABC):
79
80
  def __init__(self, df: DataFlow, buffer_size: int, strict: bool = False) -> None:
80
81
  super().__init__(df)
81
- if not buffer_size:
82
- raise ValueError("buffer_size must be a positive number")
82
+ if buffer_size <= 0:
83
+ raise ValueError(f"buffer_size must be a positive number, got {buffer_size}")
83
84
  self._buffer_size = buffer_size
84
85
  self._buffer_occupancy = 0 # actual #elements in buffer, only useful in strict mode
85
86
  self._strict = strict
@@ -95,12 +96,12 @@ class _ParallelMapData(ProxyDataFlow, ABC):
95
96
  @no_type_check
96
97
  @abstractmethod
97
98
  def _recv(self):
98
- raise NotImplementedError
99
+ raise NotImplementedError()
99
100
 
100
101
  @no_type_check
101
102
  @abstractmethod
102
103
  def _send(self, dp: Any):
103
- raise NotImplementedError
104
+ raise NotImplementedError()
104
105
 
105
106
  @no_type_check
106
107
  def _recv_filter_none(self):
@@ -398,8 +399,8 @@ class MultiProcessMapData(_ParallelMapData, _MultiProcessZMQDataFlow):
398
399
 
399
400
  _ParallelMapData.__init__(self, df, buffer_size, strict)
400
401
  _MultiProcessZMQDataFlow.__init__(self)
401
- if not num_proc:
402
- raise ValueError("num_proc must be a positive number")
402
+ if num_proc <= 0:
403
+ raise ValueError(f"num_proc must be a positive number, got {num_proc}")
403
404
  self.num_proc = num_proc
404
405
  self.map_func = map_func
405
406
  self._strict = strict
@@ -16,7 +16,8 @@ from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
16
16
 
17
17
  import numpy as np
18
18
 
19
- from .base import DataFlow, DataFlowResetStateNotCalled, RNGDataFlow
19
+ from ..utils.error import DataFlowResetStateNotCalledError
20
+ from .base import DataFlow, RNGDataFlow
20
21
 
21
22
 
22
23
  class DataFromList(RNGDataFlow):
@@ -44,7 +45,7 @@ class DataFromList(RNGDataFlow):
44
45
  for k in idxs:
45
46
  yield self.lst[k]
46
47
  else:
47
- raise DataFlowResetStateNotCalled()
48
+ raise DataFlowResetStateNotCalledError()
48
49
 
49
50
 
50
51
  class DataFromIterable(DataFlow):
@@ -63,7 +64,7 @@ class DataFromIterable(DataFlow):
63
64
 
64
65
  def __len__(self) -> int:
65
66
  if self._len is None:
66
- raise NotImplementedError
67
+ raise NotImplementedError()
67
68
  return self._len
68
69
 
69
70
  def __iter__(self) -> Iterator[Any]:
@@ -107,7 +108,7 @@ class FakeData(RNGDataFlow):
107
108
 
108
109
  def __iter__(self) -> Iterator[Any]:
109
110
  if self.rng is None:
110
- raise DataFlowResetStateNotCalled()
111
+ raise DataFlowResetStateNotCalledError()
111
112
  if self.random:
112
113
  for _ in range(self._size):
113
114
  val = []
@@ -24,6 +24,7 @@ from dataclasses import dataclass, field
24
24
  from typing import Any, Dict, List, Optional, Union, no_type_check
25
25
 
26
26
  from ..utils.detection_types import JsonDict
27
+ from ..utils.error import AnnotationError, UUIDError
27
28
  from ..utils.identifier import get_uuid, is_uuid_like
28
29
  from ..utils.logger import LoggingRecord, logger
29
30
  from ..utils.settings import DefaultType, ObjectTypes, SummaryType, TypeOrStr, get_type
@@ -36,7 +37,16 @@ def ann_from_dict(cls, **kwargs):
36
37
  """
37
38
  A factory function to create subclasses of annotations from a given dict
38
39
  """
39
- ann = cls(kwargs.get("external_id"), kwargs.get("category_name"), kwargs.get("category_id"), kwargs.get("score"))
40
+ _init_kwargs = {
41
+ "external_id": kwargs.get("external_id"),
42
+ "category_name": kwargs.get("category_name"),
43
+ "category_id": kwargs.get("category_id"),
44
+ "score": kwargs.get("score"),
45
+ "service_id": kwargs.get("service_id"),
46
+ "model_id": kwargs.get("model_id"),
47
+ "session_id": kwargs.get("session_id"),
48
+ }
49
+ ann = cls(**_init_kwargs)
40
50
  ann.active = kwargs.get("active")
41
51
  ann._annotation_id = kwargs.get("_annotation_id") # pylint: disable=W0212
42
52
  if isinstance(kwargs.get("sub_categories"), dict):
@@ -74,11 +84,17 @@ class Annotation(ABC):
74
84
  id will not depend on the defining attributes.
75
85
 
76
86
  `_annotation_id`: Unique id for annotations. Will always be given as string representation of a md5-hash.
87
+ `service_id`: Service that generated the annotation. This will be the name of a pipeline component
88
+ `model_id`: Model that generated the annotation. This will be the name of particular model
89
+ `session_id`: Session id for the annotation. This will be the id of the session in which the annotation was created.
77
90
  """
78
91
 
79
92
  active: bool = field(default=True, init=False, repr=True)
80
93
  external_id: Optional[Union[str, int]] = field(default=None, init=True, repr=False)
81
94
  _annotation_id: Optional[str] = field(default=None, init=False, repr=True)
95
+ service_id: Optional[str] = field(default=None)
96
+ model_id: Optional[str] = field(default=None)
97
+ session_id: Optional[str] = field(default=None)
82
98
 
83
99
  def __post_init__(self) -> None:
84
100
  """
@@ -101,7 +117,7 @@ class Annotation(ABC):
101
117
  """
102
118
  if self._annotation_id:
103
119
  return self._annotation_id
104
- raise ValueError("Dump annotation first or pass external_id to create an annotation id")
120
+ raise AnnotationError("Dump annotation first or pass external_id to create an annotation id")
105
121
 
106
122
  @annotation_id.setter
107
123
  def annotation_id(self, input_id: str) -> None:
@@ -109,13 +125,13 @@ class Annotation(ABC):
109
125
  annotation_id setter
110
126
  """
111
127
  if self._annotation_id is not None:
112
- raise AssertionError("Annotation_id already defined and cannot be reset")
128
+ raise AnnotationError("Annotation_id already defined and cannot be reset")
113
129
  if is_uuid_like(input_id):
114
130
  self._annotation_id = input_id
115
131
  elif isinstance(input_id, property):
116
132
  pass
117
133
  else:
118
- raise ValueError("Annotation_id must be uuid3 string")
134
+ raise AnnotationError("Annotation_id must be uuid3 string")
119
135
 
120
136
  @abstractmethod
121
137
  def get_defining_attributes(self) -> List[str]:
@@ -126,13 +142,13 @@ class Annotation(ABC):
126
142
 
127
143
  :return: A list of attributes.
128
144
  """
129
- raise NotImplementedError
145
+ raise NotImplementedError()
130
146
 
131
147
  def _assert_attributes_have_str(self, state_id: bool = False) -> None:
132
148
  defining_attributes = self.get_state_attributes() if state_id else self.get_defining_attributes()
133
149
  for attr in defining_attributes:
134
150
  if not hasattr(eval("self." + attr), "__str__"): # pylint: disable=W0123
135
- raise AttributeError(f"Attribute {attr} must have __str__ method")
151
+ raise AnnotationError(f"Attribute {attr} must have __str__ method")
136
152
 
137
153
  @staticmethod
138
154
  def set_annotation_id(annotation: "CategoryAnnotation", *container_id_context: Optional[str]) -> str:
@@ -179,7 +195,7 @@ class Annotation(ABC):
179
195
 
180
196
  :return: Annotation instance
181
197
  """
182
- raise NotImplementedError
198
+ raise NotImplementedError()
183
199
 
184
200
  @staticmethod
185
201
  @abstractmethod
@@ -189,7 +205,7 @@ class Annotation(ABC):
189
205
 
190
206
  :return: A list of attributes.
191
207
  """
192
- raise NotImplementedError
208
+ raise NotImplementedError()
193
209
 
194
210
  @property
195
211
  def state_id(self) -> str:
@@ -290,7 +306,12 @@ class CategoryAnnotation(Annotation):
290
306
  """
291
307
 
292
308
  if sub_category_name in self.sub_categories:
293
- raise KeyError(f"{sub_category_name} as sub category already defined for " f"{self.annotation_id}")
309
+ raise AnnotationError(
310
+ f"sub category {sub_category_name} already defined: "
311
+ f"annotation_id: {self.annotation_id}, "
312
+ f"category_name: {self.category_name}, "
313
+ f"category_id: {self.category_id}"
314
+ )
294
315
 
295
316
  if self._annotation_id is not None:
296
317
  if annotation._annotation_id is None: # pylint: disable=W0212
@@ -333,7 +354,7 @@ class CategoryAnnotation(Annotation):
333
354
  :param annotation_id: An annotation id
334
355
  """
335
356
  if not is_uuid_like(annotation_id):
336
- raise ValueError("Annotation_id must be uuid")
357
+ raise UUIDError("Annotation_id must be uuid")
337
358
 
338
359
  key_type = get_type(key)
339
360
  if key not in self.relationships:
@@ -436,14 +457,14 @@ class ImageAnnotation(CategoryAnnotation):
436
457
  box = self.bounding_box
437
458
  if box:
438
459
  return box
439
- raise ValueError(f"bounding_box has not been initialized for {self.annotation_id}")
460
+ raise AnnotationError(f"bounding_box has not been initialized for {self.annotation_id}")
440
461
 
441
462
  def get_summary(self, key: ObjectTypes) -> CategoryAnnotation:
442
463
  """Get summary sub categories from `image`. Raises `ValueError` if `key` is not available"""
443
464
  if self.image:
444
465
  if self.image.summary:
445
466
  return self.image.summary.get_sub_category(key)
446
- raise ValueError(f"Summary does not exist for {self.annotation_id} and key: {key}")
467
+ raise AnnotationError(f"Summary does not exist for {self.annotation_id} and key: {key}")
447
468
 
448
469
 
449
470
  @dataclass
@@ -28,6 +28,7 @@ import numpy.typing as npt
28
28
  from numpy import float32
29
29
 
30
30
  from ..utils.detection_types import ImageType
31
+ from ..utils.error import BoundingBoxError
31
32
  from ..utils.file_utils import cocotools_available
32
33
  from ..utils.logger import LoggingRecord, logger
33
34
 
@@ -140,10 +141,6 @@ def iou(boxes1: npt.NDArray[float32], boxes2: npt.NDArray[float32]) -> npt.NDArr
140
141
  return np_iou(boxes1, boxes2)
141
142
 
142
143
 
143
- class BoundingBoxError(BaseException):
144
- """Special exception only for `BoundingBox`"""
145
-
146
-
147
144
  @dataclass
148
145
  class BoundingBox:
149
146
  """
@@ -32,6 +32,7 @@ from pypdf import PdfReader
32
32
 
33
33
  from ..utils.detection_types import ImageType
34
34
  from ..utils.develop import deprecated
35
+ from ..utils.error import DependencyError
35
36
  from ..utils.pdf_utils import pdf_to_np_array
36
37
  from ..utils.viz import viz_handler
37
38
 
@@ -121,7 +122,8 @@ def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -
121
122
  """
122
123
  from pdf2image import convert_from_bytes # type: ignore # pylint: disable=C0415, E0401
123
124
 
124
- assert which("pdftoppm") is not None, "convert_pdf_bytes_to_np_array requires poppler to be installed"
125
+ if which("pdftoppm") is None:
126
+ raise DependencyError("convert_pdf_bytes_to_np_array requires poppler to be installed")
125
127
 
126
128
  with BytesIO(pdf_bytes) as pdf_file:
127
129
  pdf = PdfReader(pdf_file).pages[0]