deepdoctection 0.43__tar.gz → 0.43.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (155) hide show
  1. {deepdoctection-0.43 → deepdoctection-0.43.3}/PKG-INFO +5 -6
  2. {deepdoctection-0.43 → deepdoctection-0.43.3}/README.md +4 -5
  3. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/__init__.py +3 -2
  4. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/analyzer/config.py +15 -0
  5. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datapoint/image.py +2 -2
  6. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datapoint/view.py +19 -5
  7. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/base.py +2 -0
  8. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/doctrocr.py +5 -2
  9. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/texocr.py +1 -1
  10. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/xfundstruct.py +1 -1
  11. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/anngen.py +14 -1
  12. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/order.py +9 -6
  13. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/context.py +1 -1
  14. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/develop.py +1 -0
  15. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/env_info.py +1 -1
  16. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/file_utils.py +0 -9
  17. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/settings.py +3 -1
  18. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection.egg-info/PKG-INFO +5 -6
  19. {deepdoctection-0.43 → deepdoctection-0.43.3}/LICENSE +0 -0
  20. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/analyzer/__init__.py +0 -0
  21. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/analyzer/dd.py +0 -0
  22. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/analyzer/factory.py +0 -0
  23. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/configs/__init__.py +0 -0
  24. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  25. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  26. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/configs/profiles.jsonl +0 -0
  27. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/dataflow/__init__.py +0 -0
  28. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/dataflow/base.py +0 -0
  29. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/dataflow/common.py +0 -0
  30. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/dataflow/custom.py +0 -0
  31. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/dataflow/custom_serialize.py +0 -0
  32. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/dataflow/parallel_map.py +0 -0
  33. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/dataflow/serialize.py +0 -0
  34. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/dataflow/stats.py +0 -0
  35. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datapoint/__init__.py +0 -0
  36. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datapoint/annotation.py +0 -0
  37. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datapoint/box.py +0 -0
  38. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datapoint/convert.py +0 -0
  39. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/__init__.py +0 -0
  40. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/adapter.py +0 -0
  41. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/base.py +0 -0
  42. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/dataflow_builder.py +0 -0
  43. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/info.py +0 -0
  44. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/__init__.py +0 -0
  45. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  46. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  47. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/funsd.py +0 -0
  48. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  49. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/layouttest.py +0 -0
  50. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/publaynet.py +0 -0
  51. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  52. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  53. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  54. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/xfund.py +0 -0
  55. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  56. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  57. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/registry.py +0 -0
  58. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/datasets/save.py +0 -0
  59. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/eval/__init__.py +0 -0
  60. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/eval/accmetric.py +0 -0
  61. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/eval/base.py +0 -0
  62. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/eval/cocometric.py +0 -0
  63. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/eval/eval.py +0 -0
  64. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/eval/registry.py +0 -0
  65. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/eval/tedsmetric.py +0 -0
  66. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/eval/tp_eval_callback.py +0 -0
  67. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/__init__.py +0 -0
  68. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/d2detect.py +0 -0
  69. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/deskew.py +0 -0
  70. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/fastlang.py +0 -0
  71. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/hfdetr.py +0 -0
  72. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/hflayoutlm.py +0 -0
  73. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/hflm.py +0 -0
  74. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/model.py +0 -0
  75. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/pdftext.py +0 -0
  76. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/pt/__init__.py +0 -0
  77. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/pt/nms.py +0 -0
  78. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/pt/ptutils.py +0 -0
  79. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tessocr.py +0 -0
  80. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/__init__.py +0 -0
  81. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tfutils.py +0 -0
  82. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpcompat.py +0 -0
  83. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  84. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  85. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  86. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  87. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  88. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  89. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  90. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  91. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  92. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  93. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  94. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  95. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  96. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  97. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  98. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  99. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  100. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  101. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/extern/tpdetect.py +0 -0
  102. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/__init__.py +0 -0
  103. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/cats.py +0 -0
  104. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/cocostruct.py +0 -0
  105. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/d2struct.py +0 -0
  106. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/hfstruct.py +0 -0
  107. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/laylmstruct.py +0 -0
  108. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/maputils.py +0 -0
  109. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/match.py +0 -0
  110. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/misc.py +0 -0
  111. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/pascalstruct.py +0 -0
  112. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/prodigystruct.py +0 -0
  113. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/pubstruct.py +0 -0
  114. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/mapper/tpstruct.py +0 -0
  115. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/__init__.py +0 -0
  116. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/base.py +0 -0
  117. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/common.py +0 -0
  118. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/concurrency.py +0 -0
  119. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/doctectionpipe.py +0 -0
  120. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/language.py +0 -0
  121. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/layout.py +0 -0
  122. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/lm.py +0 -0
  123. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/refine.py +0 -0
  124. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/registry.py +0 -0
  125. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/segment.py +0 -0
  126. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/sub_layout.py +0 -0
  127. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/text.py +0 -0
  128. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/pipe/transform.py +0 -0
  129. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/py.typed +0 -0
  130. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/train/__init__.py +0 -0
  131. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/train/d2_frcnn_train.py +0 -0
  132. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/train/hf_detr_train.py +0 -0
  133. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/train/hf_layoutlm_train.py +0 -0
  134. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/train/tp_frcnn_train.py +0 -0
  135. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/__init__.py +0 -0
  136. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/concurrency.py +0 -0
  137. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/error.py +0 -0
  138. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/fs.py +0 -0
  139. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/identifier.py +0 -0
  140. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/logger.py +0 -0
  141. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/metacfg.py +0 -0
  142. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/mocks.py +0 -0
  143. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/pdf_utils.py +0 -0
  144. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/tqdm.py +0 -0
  145. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/transform.py +0 -0
  146. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/types.py +0 -0
  147. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/utils.py +0 -0
  148. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection/utils/viz.py +0 -0
  149. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection.egg-info/SOURCES.txt +0 -0
  150. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection.egg-info/dependency_links.txt +0 -0
  151. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection.egg-info/requires.txt +0 -0
  152. {deepdoctection-0.43 → deepdoctection-0.43.3}/deepdoctection.egg-info/top_level.txt +0 -0
  153. {deepdoctection-0.43 → deepdoctection-0.43.3}/setup.cfg +0 -0
  154. {deepdoctection-0.43 → deepdoctection-0.43.3}/setup.py +0 -0
  155. {deepdoctection-0.43 → deepdoctection-0.43.3}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43
3
+ Version: 0.43.3
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -192,7 +192,7 @@ Check the demo of a document layout analysis pipeline with OCR on 🤗
192
192
  - Fine-tuning and evaluation tools.
193
193
  - Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
194
194
 
195
- Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb)
195
+ Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb)
196
196
  for an easy start.
197
197
 
198
198
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
@@ -245,11 +245,11 @@ alt="text" width="40%">
245
245
 
246
246
  ## Requirements
247
247
 
248
- ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection_220525.png)
248
+ ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
249
249
 
250
250
  - Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
251
251
  - Python >= 3.9
252
- - 1.13 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
252
+ - 2.2 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
253
253
  Tensorflow support will be stopped from Python 3.11 onwards.
254
254
  - To fine-tune models, a GPU is recommended.
255
255
 
@@ -344,8 +344,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
344
344
 
345
345
  ### Running a Docker container from Docker hub
346
346
 
347
- Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
348
- com/r/deepdoctection/deepdoctection).
347
+ Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
349
348
 
350
349
  ```
351
350
  docker pull deepdoctection/deepdoctection:<release_tag>
@@ -49,7 +49,7 @@ Check the demo of a document layout analysis pipeline with OCR on 🤗
49
49
  - Fine-tuning and evaluation tools.
50
50
  - Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
51
51
 
52
- Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb)
52
+ Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb)
53
53
  for an easy start.
54
54
 
55
55
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
@@ -102,11 +102,11 @@ alt="text" width="40%">
102
102
 
103
103
  ## Requirements
104
104
 
105
- ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection_220525.png)
105
+ ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
106
106
 
107
107
  - Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
108
108
  - Python >= 3.9
109
- - 1.13 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
109
+ - 2.2 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
110
110
  Tensorflow support will be stopped from Python 3.11 onwards.
111
111
  - To fine-tune models, a GPU is recommended.
112
112
 
@@ -201,8 +201,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
201
201
 
202
202
  ### Running a Docker container from Docker hub
203
203
 
204
- Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
205
- com/r/deepdoctection/deepdoctection).
204
+ Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
206
205
 
207
206
  ```
208
207
  docker pull deepdoctection/deepdoctection:<release_tag>
@@ -25,11 +25,11 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.43"
28
+ __version__ = "0.43.3"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
32
- "configs": [],
32
+ "configs": ["update_cfg_from_defaults"],
33
33
  "dataflow": [
34
34
  "DataFlowTerminated",
35
35
  "DataFlowResetStateNotCalled",
@@ -98,6 +98,7 @@ _IMPORT_STRUCTURE = {
98
98
  "List",
99
99
  "Cell",
100
100
  "Table",
101
+ "IMAGE_DEFAULTS",
101
102
  "Page",
102
103
  ],
103
104
  "datasets": [
@@ -902,3 +902,18 @@ cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutType.CAPTION]
902
902
  # Freezes the configuration to make it immutable.
903
903
  # This prevents accidental modification at runtime.
904
904
  cfg.freeze()
905
+
906
+ def update_cfg_from_defaults() -> None:
907
+ """
908
+ Update the configuration with current values from IMAGE_DEFAULTS.
909
+ """
910
+ cfg.freeze(False)
911
+
912
+ # Update all dependent fields from IMAGE_DEFAULTS
913
+ cfg.TEXT_CONTAINER = IMAGE_DEFAULTS.TEXT_CONTAINER
914
+ cfg.WORD_MATCHING.PARENTAL_CATEGORIES = IMAGE_DEFAULTS.TEXT_BLOCK_CATEGORIES
915
+ cfg.TEXT_ORDERING.TEXT_BLOCK_CATEGORIES = IMAGE_DEFAULTS.TEXT_BLOCK_CATEGORIES
916
+ cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK_CATEGORIES = IMAGE_DEFAULTS.FLOATING_TEXT_BLOCK_CATEGORIES
917
+
918
+ # Re-freeze the configuration
919
+ cfg.freeze()
@@ -479,8 +479,8 @@ class Image:
479
479
 
480
480
  def remove(
481
481
  self,
482
- annotation_ids: Optional[Union[str, list[str]]] = None,
483
- service_ids: Optional[Union[str, list[str]]] = None,
482
+ annotation_ids: Optional[Union[str, Sequence[str]]] = None,
483
+ service_ids: Optional[Union[str, Sequence[str]]] = None,
484
484
  ) -> None:
485
485
  """
486
486
  Instead of removing consider deactivating annotations.
@@ -183,7 +183,7 @@ class Word(ImageAnnotationBaseView):
183
183
  attr_names = (
184
184
  set(WordType)
185
185
  .union(super().get_attribute_names())
186
- .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
186
+ .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK, Relationships.LINK})
187
187
  )
188
188
  return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
189
189
 
@@ -773,6 +773,7 @@ class Page(Image):
773
773
  "figures",
774
774
  "residual_layouts",
775
775
  "document_summary",
776
+ "document_mapping",
776
777
  }
777
778
  include_residual_text_container: bool = True
778
779
 
@@ -1389,19 +1390,32 @@ class Page(Image):
1389
1390
  include_residual_text_container=include_residual_text_container,
1390
1391
  )
1391
1392
 
1392
- def get_token(self) -> list[Mapping[str, str]]:
1393
+ def get_entities(self) -> list[Mapping[str, str]]:
1393
1394
  """
1394
1395
  Returns:
1395
- A list of tuples with word and non default token tags
1396
+ A list of dicts with the following structure:
1397
+
1398
+ ```python
1399
+ {
1400
+ "word": str, # word characters
1401
+ "entity": str, # token tag
1402
+ "annotation_id": str, # annotation id of the word
1403
+ "successor_annotation_id": Optional[str] # annotation_id of the successor word, if any
1404
+ }
1405
+ ```
1406
+
1396
1407
  """
1397
1408
  block_with_order = self._order("layouts")
1398
1409
  all_words = []
1399
1410
  for block in block_with_order:
1400
1411
  all_words.extend(block.get_ordered_words()) # type: ignore
1401
1412
  return [
1402
- {"word": word.CHARACTERS, "entity": word.TOKEN_TAG}
1413
+ {"word": word.characters,
1414
+ "entity": word.token_tag.value,
1415
+ "annotation_id": word.annotation_id,
1416
+ "successor_annotation_id": word.successor[0].annotation_id if word.successor else None}
1403
1417
  for word in all_words
1404
- if word.TOKEN_TAG not in (TokenClasses.OTHER, None)
1418
+ if word.token_tag not in (TokenClasses.OTHER, None)
1405
1419
  ]
1406
1420
 
1407
1421
  def __copy__(self) -> Page:
@@ -502,6 +502,7 @@ class TokenClassResult:
502
502
  semantic_name: semantic name
503
503
  bio_tag: bio tag
504
504
  score: prediction score
505
+ successor_uuid: uuid of the next token in the sequence
505
506
  """
506
507
 
507
508
  uuid: str
@@ -512,6 +513,7 @@ class TokenClassResult:
512
513
  bio_tag: ObjectTypes = DefaultType.DEFAULT_TYPE
513
514
  score: Optional[float] = None
514
515
  token_id: Optional[int] = None
516
+ successor_uuid: Optional[str] = None
515
517
 
516
518
 
517
519
  @dataclass
@@ -269,7 +269,10 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
269
269
  if self.lib == "PT":
270
270
  self.device = get_torch_device(device)
271
271
 
272
- self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device, self.lib)
272
+ self.doctr_predictor = self.get_wrapped_model(self.architecture,
273
+ self.path_weights,
274
+ self.device,
275
+ self.lib)
273
276
 
274
277
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
275
278
  """
@@ -424,7 +427,7 @@ class DoctrTextRecognizer(TextRecognizer):
424
427
  return _get_doctr_requirements()
425
428
 
426
429
  def clone(self) -> DoctrTextRecognizer:
427
- return self.__class__(self.architecture, self.path_weights, self.device, self.lib)
430
+ return self.__class__(self.architecture, self.path_weights, self.device, self.lib, self.path_config_json)
428
431
 
429
432
  @staticmethod
430
433
  def load_model(
@@ -60,7 +60,7 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
60
60
  return all_results
61
61
 
62
62
 
63
- def predict_text(np_img: PixelValues, client: boto3.client, text_lines: bool) -> list[DetectionResult]: # type: ignore
63
+ def predict_text(np_img: PixelValues, client: boto3.client, text_lines: bool) -> list[DetectionResult]:
64
64
  """
65
65
  Calls AWS Textract client (`detect_document_text`) and returns plain OCR results.
66
66
  AWS account required.
@@ -200,7 +200,7 @@ def xfund_to_image(
200
200
  ann_ids.extend(entity_id_to_ann_id[linked_entity])
201
201
  for ann_id in ann_ids:
202
202
  if ann_id != word.annotation_id:
203
- word.dump_relationship(Relationships.SEMANTIC_ENTITY_LINK, ann_id)
203
+ word.dump_relationship(Relationships.LINK, ann_id)
204
204
 
205
205
  if mapping_context.context_error:
206
206
  return None
@@ -19,7 +19,7 @@
19
19
  Datapoint manager
20
20
  """
21
21
  from dataclasses import asdict
22
- from typing import Optional, Union
22
+ from typing import Optional, Union, Sequence
23
23
 
24
24
  import numpy as np
25
25
 
@@ -372,6 +372,19 @@ class DatapointManager:
372
372
  return None
373
373
  return ann.annotation_id
374
374
 
375
+ def remove_annotations(self, annotation_ids: Sequence[str]) -> None:
376
+ """
377
+ Removes the annotation by the given `annotation_id`.
378
+
379
+ Args:
380
+ annotation_ids: The `annotation_id` to remove.
381
+ """
382
+ self.assert_datapoint_passed()
383
+ self.datapoint.remove(annotation_ids)
384
+ for ann_id in annotation_ids:
385
+ if ann_id in self._cache_anns:
386
+ self._cache_anns.pop(ann_id)
387
+
375
388
  def deactivate_annotation(self, annotation_id: str) -> None:
376
389
  """
377
390
  Deactivates the annotation by the given `annotation_id`.
@@ -738,10 +738,8 @@ class TextOrderService(TextLineServiceMixin):
738
738
  text_block_anns.extend(residual_text_container_anns)
739
739
  for text_block_ann in text_block_anns:
740
740
  self.order_text_in_text_block(text_block_ann)
741
- floating_text_block_anns_to_order = [
742
- ann for ann in text_block_anns if ann.category_name in self.floating_text_block_categories
743
- ]
744
- self.order_blocks(floating_text_block_anns_to_order)
741
+ floating_text_block_anns = dp.get_annotation(category_names=self.floating_text_block_categories)
742
+ self.order_blocks(floating_text_block_anns)
745
743
  self._create_columns()
746
744
 
747
745
  def _create_columns(self) -> None:
@@ -803,9 +801,14 @@ class TextOrderService(TextLineServiceMixin):
803
801
  if self.include_residual_text_container:
804
802
  add_category.append(LayoutType.LINE)
805
803
 
806
- assert set(self.floating_text_block_categories) <= set(
804
+ if set(self.floating_text_block_categories) <= set(
807
805
  self.text_block_categories + tuple(add_category)
808
- ), "floating_text_block_categories must be a subset of text_block_categories"
806
+ ):
807
+ logger.warning("In most cases floating_text_block_categories must be a subset of text_block_categories. "
808
+ "Adding categories to floating_text_block_categories, that do not belong to "
809
+ "text_block_categories makes only sense for categories set have CHILD relationships with"
810
+ " annotations that belong to text_block_categories.")
811
+
809
812
 
810
813
  def get_meta_annotation(self) -> MetaAnnotation:
811
814
  add_category = [self.text_container]
@@ -38,7 +38,7 @@ __all__ = ["timeout_manager", "save_tmp_file", "timed_operation"]
38
38
 
39
39
 
40
40
  @contextmanager
41
- def timeout_manager(proc: Any, seconds: Optional[int] = None) -> Iterator[str]: # type: ignore
41
+ def timeout_manager(proc: Any, seconds: Optional[int] = None) -> Iterator[str]:
42
42
  """
43
43
  Manager for time handling while some process is being called.
44
44
 
@@ -71,6 +71,7 @@ def log_deprecated(name: str, text: str, eos: str = "", max_num_warnings: Option
71
71
  logger.info(LoggingRecord(f"[Deprecated] {info_msg}"))
72
72
 
73
73
 
74
+
74
75
  def deprecated(
75
76
  text: str = "", eos: str = "", max_num_warnings: Optional[int] = None
76
77
  ) -> Callable[[Callable[..., T]], Callable[..., T]]:
@@ -462,7 +462,7 @@ def pt_info(data: KeyValEnvInfos) -> KeyValEnvInfos:
462
462
  data.append(("torchvision arch flags", msg))
463
463
  except (ImportError, AttributeError):
464
464
  data.append(("torchvision._C", "Not found"))
465
- except AttributeError:
465
+ except (AttributeError, ModuleNotFoundError):
466
466
  data.append(("torchvision", "unknown"))
467
467
 
468
468
  return data
@@ -12,7 +12,6 @@ import importlib.util
12
12
  import multiprocessing as mp
13
13
  import string
14
14
  import subprocess
15
- import sys
16
15
  from os import environ, path
17
16
  from shutil import which
18
17
  from types import ModuleType
@@ -22,7 +21,6 @@ import importlib_metadata
22
21
  from packaging import version
23
22
 
24
23
  from .error import DependencyError
25
- from .logger import LoggingRecord, logger
26
24
  from .metacfg import AttrDict
27
25
  from .types import PathLikeOrStr, Requirement
28
26
 
@@ -662,13 +660,6 @@ def get_doctr_requirement() -> Requirement:
662
660
  On macOS, if `poppler` is not available, this function will recursively check the requirement.
663
661
  It is not yet known how to check whether `pango`, `gdk-pixbuf`, and `libffi` are installed.
664
662
  """
665
- if sys.platform == "darwin":
666
- if not get_poppler_version():
667
- return get_doctr_requirement()
668
- # don't know yet how to check whether pango gdk-pixbuf libffi are installed
669
- logger.info(
670
- LoggingRecord("package requires weasyprint. Check that poppler pango gdk-pixbuf libffi are installed")
671
- )
672
663
  return "doctr", doctr_available(), _DOCTR_ERR_MSG
673
664
 
674
665
 
@@ -80,6 +80,7 @@ class SummaryType(ObjectTypes):
80
80
 
81
81
  SUMMARY = "summary"
82
82
  DOCUMENT_SUMMARY = "document_summary"
83
+ DOCUMENT_MAPPING = "document_mapping"
83
84
 
84
85
 
85
86
  @object_types_registry.register("DocumentType")
@@ -228,8 +229,9 @@ class Relationships(ObjectTypes):
228
229
 
229
230
  CHILD = "child"
230
231
  READING_ORDER = "reading_order"
231
- SEMANTIC_ENTITY_LINK = "semantic_entity_link"
232
+ LINK = "link"
232
233
  LAYOUT_LINK = "layout_link"
234
+ SUCCESSOR = "successor"
233
235
 
234
236
 
235
237
  @object_types_registry.register("Languages")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43
3
+ Version: 0.43.3
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -192,7 +192,7 @@ Check the demo of a document layout analysis pipeline with OCR on 🤗
192
192
  - Fine-tuning and evaluation tools.
193
193
  - Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
194
194
 
195
- Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb)
195
+ Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb)
196
196
  for an easy start.
197
197
 
198
198
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
@@ -245,11 +245,11 @@ alt="text" width="40%">
245
245
 
246
246
  ## Requirements
247
247
 
248
- ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection_220525.png)
248
+ ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
249
249
 
250
250
  - Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
251
251
  - Python >= 3.9
252
- - 1.13 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
252
+ - 2.2 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
253
253
  Tensorflow support will be stopped from Python 3.11 onwards.
254
254
  - To fine-tune models, a GPU is recommended.
255
255
 
@@ -344,8 +344,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
344
344
 
345
345
  ### Running a Docker container from Docker hub
346
346
 
347
- Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
348
- com/r/deepdoctection/deepdoctection).
347
+ Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
349
348
 
350
349
  ```
351
350
  docker pull deepdoctection/deepdoctection:<release_tag>
File without changes
File without changes
File without changes