deepdoctection 0.34__tar.gz → 0.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (155) hide show
  1. {deepdoctection-0.34 → deepdoctection-0.35}/PKG-INFO +17 -11
  2. {deepdoctection-0.34 → deepdoctection-0.35}/README.md +10 -7
  3. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/__init__.py +6 -10
  4. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/analyzer/__init__.py +1 -0
  5. deepdoctection-0.35/deepdoctection/analyzer/_config.py +150 -0
  6. deepdoctection-0.35/deepdoctection/analyzer/dd.py +154 -0
  7. deepdoctection-0.35/deepdoctection/analyzer/factory.py +522 -0
  8. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/configs/conf_dd_one.yaml +1 -0
  9. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datapoint/annotation.py +1 -1
  10. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datapoint/convert.py +6 -4
  11. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datapoint/image.py +16 -6
  12. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datapoint/view.py +1 -0
  13. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/pdftext.py +96 -5
  14. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tessocr.py +1 -0
  15. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/env_info.py +30 -1
  16. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/file_utils.py +19 -0
  17. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/metacfg.py +12 -0
  18. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/pdf_utils.py +86 -3
  19. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection.egg-info/PKG-INFO +17 -11
  20. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection.egg-info/SOURCES.txt +2 -0
  21. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection.egg-info/requires.txt +6 -3
  22. {deepdoctection-0.34 → deepdoctection-0.35}/setup.cfg +4 -0
  23. {deepdoctection-0.34 → deepdoctection-0.35}/setup.py +3 -1
  24. deepdoctection-0.34/deepdoctection/analyzer/dd.py +0 -478
  25. {deepdoctection-0.34 → deepdoctection-0.35}/LICENSE +0 -0
  26. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/configs/__init__.py +0 -0
  27. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  28. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/dataflow/__init__.py +0 -0
  29. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/dataflow/base.py +0 -0
  30. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/dataflow/common.py +0 -0
  31. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/dataflow/custom.py +0 -0
  32. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/dataflow/custom_serialize.py +0 -0
  33. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/dataflow/parallel_map.py +0 -0
  34. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/dataflow/serialize.py +0 -0
  35. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/dataflow/stats.py +0 -0
  36. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datapoint/__init__.py +0 -0
  37. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datapoint/box.py +0 -0
  38. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/__init__.py +0 -0
  39. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/adapter.py +0 -0
  40. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/base.py +0 -0
  41. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/dataflow_builder.py +0 -0
  42. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/info.py +0 -0
  43. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/__init__.py +0 -0
  44. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  45. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  46. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/funsd.py +0 -0
  47. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  48. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/layouttest.py +0 -0
  49. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/publaynet.py +0 -0
  50. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  51. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  52. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  53. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/xfund.py +0 -0
  54. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  55. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  56. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/registry.py +0 -0
  57. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/datasets/save.py +0 -0
  58. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/eval/__init__.py +0 -0
  59. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/eval/accmetric.py +0 -0
  60. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/eval/base.py +0 -0
  61. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/eval/cocometric.py +0 -0
  62. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/eval/eval.py +0 -0
  63. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/eval/registry.py +0 -0
  64. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/eval/tedsmetric.py +0 -0
  65. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/eval/tp_eval_callback.py +0 -0
  66. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/__init__.py +0 -0
  67. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/base.py +0 -0
  68. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/d2detect.py +0 -0
  69. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/deskew.py +0 -0
  70. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/doctrocr.py +0 -0
  71. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/fastlang.py +0 -0
  72. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/hfdetr.py +0 -0
  73. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/hflayoutlm.py +0 -0
  74. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/hflm.py +0 -0
  75. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/model.py +0 -0
  76. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/pt/__init__.py +0 -0
  77. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/pt/nms.py +0 -0
  78. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/pt/ptutils.py +0 -0
  79. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/texocr.py +0 -0
  80. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/__init__.py +0 -0
  81. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tfutils.py +0 -0
  82. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpcompat.py +0 -0
  83. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  84. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  85. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  86. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  87. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  88. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  89. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  90. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  91. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  92. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  93. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  94. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  95. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  96. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  97. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  98. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  99. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  100. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  101. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/extern/tpdetect.py +0 -0
  102. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/__init__.py +0 -0
  103. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/cats.py +0 -0
  104. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/cocostruct.py +0 -0
  105. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/d2struct.py +0 -0
  106. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/hfstruct.py +0 -0
  107. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/laylmstruct.py +0 -0
  108. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/maputils.py +0 -0
  109. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/match.py +0 -0
  110. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/misc.py +0 -0
  111. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/pascalstruct.py +0 -0
  112. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/prodigystruct.py +0 -0
  113. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/pubstruct.py +0 -0
  114. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/tpstruct.py +0 -0
  115. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/mapper/xfundstruct.py +0 -0
  116. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/__init__.py +0 -0
  117. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/anngen.py +0 -0
  118. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/base.py +0 -0
  119. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/common.py +0 -0
  120. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/concurrency.py +0 -0
  121. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/doctectionpipe.py +0 -0
  122. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/language.py +0 -0
  123. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/layout.py +0 -0
  124. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/lm.py +0 -0
  125. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/order.py +0 -0
  126. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/refine.py +0 -0
  127. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/registry.py +0 -0
  128. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/segment.py +0 -0
  129. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/sub_layout.py +0 -0
  130. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/text.py +0 -0
  131. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/pipe/transform.py +0 -0
  132. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/py.typed +0 -0
  133. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/train/__init__.py +0 -0
  134. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/train/d2_frcnn_train.py +0 -0
  135. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/train/hf_detr_train.py +0 -0
  136. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/train/hf_layoutlm_train.py +0 -0
  137. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/train/tp_frcnn_train.py +0 -0
  138. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/__init__.py +0 -0
  139. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/concurrency.py +0 -0
  140. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/context.py +0 -0
  141. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/develop.py +0 -0
  142. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/error.py +0 -0
  143. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/fs.py +0 -0
  144. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/identifier.py +0 -0
  145. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/logger.py +0 -0
  146. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/mocks.py +0 -0
  147. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/settings.py +0 -0
  148. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/tqdm.py +0 -0
  149. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/transform.py +0 -0
  150. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/types.py +0 -0
  151. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/utils.py +0 -0
  152. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection/utils/viz.py +0 -0
  153. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection.egg-info/dependency_links.txt +0 -0
  154. {deepdoctection-0.34 → deepdoctection-0.35}/deepdoctection.egg-info/top_level.txt +0 -0
  155. {deepdoctection-0.34 → deepdoctection-0.35}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.34
3
+ Version: 0.35
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
19
  Requires-Dist: catalogue==2.0.10
20
- Requires-Dist: huggingface_hub>=0.12.0
20
+ Requires-Dist: huggingface_hub<0.26,>=0.12.0
21
21
  Requires-Dist: importlib-metadata>=5.0.0
22
22
  Requires-Dist: jsonlines==3.1.0
23
23
  Requires-Dist: lazy-imports==0.3.1
@@ -27,6 +27,7 @@ Requires-Dist: numpy<2.0,>=1.21
27
27
  Requires-Dist: packaging>=20.0
28
28
  Requires-Dist: Pillow>=10.0.0
29
29
  Requires-Dist: pypdf>=3.16.0
30
+ Requires-Dist: pypdfium2>=4.30.0
30
31
  Requires-Dist: pyyaml>=6.0.1
31
32
  Requires-Dist: pyzmq>=16
32
33
  Requires-Dist: scipy>=1.13.1
@@ -35,7 +36,7 @@ Requires-Dist: tabulate>=0.7.7
35
36
  Requires-Dist: tqdm==4.64.0
36
37
  Provides-Extra: tf
37
38
  Requires-Dist: catalogue==2.0.10; extra == "tf"
38
- Requires-Dist: huggingface_hub>=0.12.0; extra == "tf"
39
+ Requires-Dist: huggingface_hub<0.26,>=0.12.0; extra == "tf"
39
40
  Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
40
41
  Requires-Dist: jsonlines==3.1.0; extra == "tf"
41
42
  Requires-Dist: lazy-imports==0.3.1; extra == "tf"
@@ -45,6 +46,7 @@ Requires-Dist: numpy<2.0,>=1.21; extra == "tf"
45
46
  Requires-Dist: packaging>=20.0; extra == "tf"
46
47
  Requires-Dist: Pillow>=10.0.0; extra == "tf"
47
48
  Requires-Dist: pypdf>=3.16.0; extra == "tf"
49
+ Requires-Dist: pypdfium2>=4.30.0; extra == "tf"
48
50
  Requires-Dist: pyyaml>=6.0.1; extra == "tf"
49
51
  Requires-Dist: pyzmq>=16; extra == "tf"
50
52
  Requires-Dist: scipy>=1.13.1; extra == "tf"
@@ -66,7 +68,7 @@ Requires-Dist: distance==0.1.3; extra == "tf"
66
68
  Requires-Dist: lxml>=4.9.1; extra == "tf"
67
69
  Provides-Extra: pt
68
70
  Requires-Dist: catalogue==2.0.10; extra == "pt"
69
- Requires-Dist: huggingface_hub>=0.12.0; extra == "pt"
71
+ Requires-Dist: huggingface_hub<0.26,>=0.12.0; extra == "pt"
70
72
  Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
71
73
  Requires-Dist: jsonlines==3.1.0; extra == "pt"
72
74
  Requires-Dist: lazy-imports==0.3.1; extra == "pt"
@@ -76,6 +78,7 @@ Requires-Dist: numpy<2.0,>=1.21; extra == "pt"
76
78
  Requires-Dist: packaging>=20.0; extra == "pt"
77
79
  Requires-Dist: Pillow>=10.0.0; extra == "pt"
78
80
  Requires-Dist: pypdf>=3.16.0; extra == "pt"
81
+ Requires-Dist: pypdfium2>=4.30.0; extra == "pt"
79
82
  Requires-Dist: pyyaml>=6.0.1; extra == "pt"
80
83
  Requires-Dist: pyzmq>=16; extra == "pt"
81
84
  Requires-Dist: scipy>=1.13.1; extra == "pt"
@@ -172,9 +175,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
172
175
  - Document layout analysis and table recognition now runs with
173
176
  [**Torchscript**](https://pytorch.org/docs/stable/jit.html) (CPU) as well and [**Detectron2**](https://github.com/facebookresearch/detectron2/tree/main/detectron2) is not required
174
177
  anymore for basic inference.
175
- - [**new**] More angle predictors for determining the rotation of a document based on [**Tesseract**](https://github.com/tesseract-ocr/tesseract) and [**DocTr**](https://github.com/mindee/doctr)
178
+ - More angle predictors for determining the rotation of a document based on [**Tesseract**](https://github.com/tesseract-ocr/tesseract) and [**DocTr**](https://github.com/mindee/doctr)
176
179
  (not contained in the built-in Analyzer).
177
- - [**new**] Token classification with [**LiLT**](https://github.com/jpWang/LiLT) via
180
+ - Token classification with [**LiLT**](https://github.com/jpWang/LiLT) via
178
181
  [**transformers**](https://github.com/huggingface/transformers).
179
182
  We have added a model wrapper for token classification with LiLT and added a some LiLT models to the model catalog
180
183
  that seem to look promising, especially if you want to train a model on non-english data. The training script for
@@ -263,7 +266,7 @@ documentation.
263
266
 
264
267
  ## Requirements
265
268
 
266
- ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection.png)
269
+ ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection_081124.png)
267
270
 
268
271
  Everything in the overview listed below the **deep**doctection layer are necessary requirements and have to be installed
269
272
  separately.
@@ -272,13 +275,16 @@ separately.
272
275
  - Python >= 3.9
273
276
  - 1.13 <= PyTorch **or** 2.11 <= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
274
277
  In general, if you want to train or fine-tune models, a GPU is required.
275
- - **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF documents into
276
- images.
278
+
277
279
  - With respect to the Deep Learning framework, you must decide between [Tensorflow](https://www.tensorflow.org/install?hl=en)
278
280
  and [PyTorch](https://pytorch.org/get-started/locally/).
279
281
  - [Tesseract](https://github.com/tesseract-ocr/tesseract) OCR engine will be used through a Python wrapper. The core
280
282
  engine has to be installed separately.
281
283
 
284
+
285
+ - For release `v.0.34.0` and below **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF
286
+ documents into images. For release `v.0.35.0` this dependency will be optional.
287
+
282
288
  The following overview shows the availability of the models in conjunction with the DL framework.
283
289
 
284
290
  | Task | PyTorch | Torchscript | Tensorflow |
@@ -396,8 +402,8 @@ to develop this framework.
396
402
  ## Problems
397
403
 
398
404
  We try hard to eliminate bugs. We also know that the code is not free of issues. We welcome all issues relevant to this
399
- repo and try to address them as quickly as possible. Bug fixes or enhancements will be deployed in a new release every 4
400
- to 6 weeks.
405
+ repo and try to address them as quickly as possible. Bug fixes or enhancements will be deployed in a new release every 10
406
+ to 12 weeks.
401
407
 
402
408
  ## If you like **deep**doctection ...
403
409
 
@@ -45,9 +45,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
45
45
  - Document layout analysis and table recognition now runs with
46
46
  [**Torchscript**](https://pytorch.org/docs/stable/jit.html) (CPU) as well and [**Detectron2**](https://github.com/facebookresearch/detectron2/tree/main/detectron2) is not required
47
47
  anymore for basic inference.
48
- - [**new**] More angle predictors for determining the rotation of a document based on [**Tesseract**](https://github.com/tesseract-ocr/tesseract) and [**DocTr**](https://github.com/mindee/doctr)
48
+ - More angle predictors for determining the rotation of a document based on [**Tesseract**](https://github.com/tesseract-ocr/tesseract) and [**DocTr**](https://github.com/mindee/doctr)
49
49
  (not contained in the built-in Analyzer).
50
- - [**new**] Token classification with [**LiLT**](https://github.com/jpWang/LiLT) via
50
+ - Token classification with [**LiLT**](https://github.com/jpWang/LiLT) via
51
51
  [**transformers**](https://github.com/huggingface/transformers).
52
52
  We have added a model wrapper for token classification with LiLT and added a some LiLT models to the model catalog
53
53
  that seem to look promising, especially if you want to train a model on non-english data. The training script for
@@ -136,7 +136,7 @@ documentation.
136
136
 
137
137
  ## Requirements
138
138
 
139
- ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection.png)
139
+ ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection_081124.png)
140
140
 
141
141
  Everything in the overview listed below the **deep**doctection layer are necessary requirements and have to be installed
142
142
  separately.
@@ -145,13 +145,16 @@ separately.
145
145
  - Python >= 3.9
146
146
  - 1.13 <= PyTorch **or** 2.11 <= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
147
147
  In general, if you want to train or fine-tune models, a GPU is required.
148
- - **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF documents into
149
- images.
148
+
150
149
  - With respect to the Deep Learning framework, you must decide between [Tensorflow](https://www.tensorflow.org/install?hl=en)
151
150
  and [PyTorch](https://pytorch.org/get-started/locally/).
152
151
  - [Tesseract](https://github.com/tesseract-ocr/tesseract) OCR engine will be used through a Python wrapper. The core
153
152
  engine has to be installed separately.
154
153
 
154
+
155
+ - For release `v.0.34.0` and below **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF
156
+ documents into images. For release `v.0.35.0` this dependency will be optional.
157
+
155
158
  The following overview shows the availability of the models in conjunction with the DL framework.
156
159
 
157
160
  | Task | PyTorch | Torchscript | Tensorflow |
@@ -269,8 +272,8 @@ to develop this framework.
269
272
  ## Problems
270
273
 
271
274
  We try hard to eliminate bugs. We also know that the code is not free of issues. We welcome all issues relevant to this
272
- repo and try to address them as quickly as possible. Bug fixes or enhancements will be deployed in a new release every 4
273
- to 6 weeks.
275
+ repo and try to address them as quickly as possible. Bug fixes or enhancements will be deployed in a new release every 10
276
+ to 12 weeks.
274
277
 
275
278
  ## If you like **deep**doctection ...
276
279
 
@@ -18,25 +18,19 @@ if importlib.util.find_spec("dotenv") is not None:
18
18
  import sys
19
19
  from typing import TYPE_CHECKING
20
20
 
21
- from .utils.env_info import collect_env_info
21
+ from .utils.env_info import auto_select_pdf_render_framework, collect_env_info
22
22
  from .utils.file_utils import _LazyModule, get_tf_version, pytorch_available, tf_available
23
23
  from .utils.logger import LoggingRecord, logger
24
24
 
25
25
  # pylint: enable=wrong-import-position
26
26
 
27
- __version__ = 0.34
27
+ __version__ = 0.35
28
28
 
29
29
  _IMPORT_STRUCTURE = {
30
30
  "analyzer": [
31
31
  "config_sanity_checks",
32
- "build_detector",
33
- "build_padder",
34
- "build_service",
35
- "build_sub_image_service",
36
- "build_ocr",
37
- "build_doctr_word",
38
32
  "get_dd_analyzer",
39
- "build_analyzer",
33
+ "ServiceFactory"
40
34
  ],
41
35
  "configs": [],
42
36
  "dataflow": [
@@ -197,6 +191,7 @@ _IMPORT_STRUCTURE = {
197
191
  "print_model_infos",
198
192
  "ModelDownloadManager",
199
193
  "PdfPlumberTextDetector",
194
+ "Pdfmium2TextDetector",
200
195
  "TesseractOcrDetector",
201
196
  "TesseractRotationTransformer",
202
197
  "TextractOcrDetector",
@@ -304,6 +299,7 @@ _IMPORT_STRUCTURE = {
304
299
  "timed_operation",
305
300
  "collect_env_info",
306
301
  "auto_select_viz_library",
302
+ "auto_select_pdf_render_framework",
307
303
  "get_tensorflow_requirement",
308
304
  "tf_addons_available",
309
305
  "get_tf_addons_requirements",
@@ -427,7 +423,7 @@ _IMPORT_STRUCTURE = {
427
423
  # Setting some environment variables so that standard functions can be invoked with available hardware
428
424
  env_info = collect_env_info()
429
425
  logger.debug(LoggingRecord(msg=env_info))
430
-
426
+ auto_select_pdf_render_framework()
431
427
 
432
428
  # Direct imports for type-checking
433
429
  if TYPE_CHECKING:
@@ -20,3 +20,4 @@ Package for pre-built pipelines
20
20
  """
21
21
 
22
22
  from .dd import *
23
+ from .factory import *
@@ -0,0 +1,150 @@
1
+ # -*- coding: utf-8 -*-
2
+ # File: config.py
3
+
4
+ # Copyright 2024 Dr. Janis Meyer. All rights reserved.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ """Pipeline configuration for deepdoctection analyzer. Do not change the defaults in this file. """
19
+
20
+ from ..utils.metacfg import AttrDict
21
+ from ..utils.settings import CellType, LayoutType
22
+
23
+ cfg = AttrDict()
24
+
25
+ cfg.LANGUAGE = None
26
+ cfg.LIB = None
27
+ cfg.DEVICE = None
28
+ cfg.USE_ROTATOR = False
29
+ cfg.USE_LAYOUT = True
30
+ cfg.USE_TABLE_SEGMENTATION = True
31
+
32
+ cfg.TF.LAYOUT.WEIGHTS = "layout/model-800000_inf_only.data-00000-of-00001"
33
+ cfg.TF.LAYOUT.FILTER = None
34
+
35
+
36
+ cfg.TF.CELL.WEIGHTS = "cell/model-1800000_inf_only.data-00000-of-00001"
37
+ cfg.TF.CELL.FILTER = None
38
+
39
+
40
+ cfg.TF.ITEM.WEIGHTS = "item/model-1620000_inf_only.data-00000-of-00001"
41
+ cfg.TF.ITEM.FILTER = None
42
+
43
+ cfg.PT.LAYOUT.WEIGHTS = "layout/d2_model_0829999_layout_inf_only.pt"
44
+ cfg.PT.LAYOUT.WEIGHTS_TS = "layout/d2_model_0829999_layout_inf_only.ts"
45
+ cfg.PT.LAYOUT.FILTER = None
46
+ cfg.PT.LAYOUT.PAD.TOP = 60
47
+ cfg.PT.LAYOUT.PAD.RIGHT = 60
48
+ cfg.PT.LAYOUT.PAD.BOTTOM = 60
49
+ cfg.PT.LAYOUT.PAD.LEFT = 60
50
+
51
+ cfg.PT.ITEM.WEIGHTS = "item/d2_model_1639999_item_inf_only.pt"
52
+ cfg.PT.ITEM.WEIGHTS_TS = "item/d2_model_1639999_item_inf_only.ts"
53
+ cfg.PT.ITEM.FILTER = None
54
+ cfg.PT.ITEM.PAD.TOP = 60
55
+ cfg.PT.ITEM.PAD.RIGHT = 60
56
+ cfg.PT.ITEM.PAD.BOTTOM = 60
57
+ cfg.PT.ITEM.PAD.LEFT = 60
58
+
59
+ cfg.PT.CELL.WEIGHTS = "cell/d2_model_1849999_cell_inf_only.pt"
60
+ cfg.PT.CELL.WEIGHTS_TS = "cell/d2_model_1849999_cell_inf_only.ts"
61
+ cfg.PT.CELL.FILTER = None
62
+
63
+ cfg.USE_LAYOUT_NMS = False
64
+ cfg.LAYOUT_NMS_PAIRS.COMBINATIONS = None
65
+ cfg.LAYOUT_NMS_PAIRS.THRESHOLDS = None
66
+ cfg.LAYOUT_NMS_PAIRS.PRIORITY = None
67
+
68
+ cfg.SEGMENTATION.ASSIGNMENT_RULE = "ioa"
69
+ cfg.SEGMENTATION.THRESHOLD_ROWS = 0.4
70
+ cfg.SEGMENTATION.THRESHOLD_COLS = 0.4
71
+ cfg.SEGMENTATION.FULL_TABLE_TILING = True
72
+ cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS = 0.001
73
+ cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS = 0.001
74
+ cfg.SEGMENTATION.CELL_CATEGORY_ID = 12
75
+ cfg.SEGMENTATION.TABLE_NAME = LayoutType.TABLE
76
+ cfg.SEGMENTATION.PUBTABLES_CELL_NAMES = [
77
+ CellType.SPANNING,
78
+ CellType.ROW_HEADER,
79
+ CellType.COLUMN_HEADER,
80
+ CellType.PROJECTED_ROW_HEADER,
81
+ LayoutType.CELL,
82
+ ]
83
+ cfg.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES = [
84
+ CellType.SPANNING,
85
+ CellType.ROW_HEADER,
86
+ CellType.COLUMN_HEADER,
87
+ CellType.PROJECTED_ROW_HEADER,
88
+ ]
89
+ cfg.SEGMENTATION.PUBTABLES_ITEM_NAMES = [LayoutType.ROW, LayoutType.COLUMN]
90
+ cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER]
91
+ cfg.SEGMENTATION.CELL_NAMES = [CellType.HEADER, CellType.BODY, LayoutType.CELL]
92
+ cfg.SEGMENTATION.ITEM_NAMES = [LayoutType.ROW, LayoutType.COLUMN]
93
+ cfg.SEGMENTATION.SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER]
94
+
95
+ cfg.SEGMENTATION.STRETCH_RULE = "equal"
96
+
97
+ cfg.USE_TABLE_REFINEMENT = True
98
+ cfg.USE_PDF_MINER = False
99
+
100
+ cfg.PDF_MINER.X_TOLERANCE = 3
101
+ cfg.PDF_MINER.Y_TOLERANCE = 3
102
+
103
+ cfg.USE_OCR = True
104
+
105
+ cfg.OCR.USE_TESSERACT = True
106
+ cfg.OCR.USE_DOCTR = False
107
+ cfg.OCR.USE_TEXTRACT = False
108
+ cfg.OCR.CONFIG.TESSERACT = "dd/conf_tesseract.yaml"
109
+
110
+ cfg.OCR.WEIGHTS.DOCTR_WORD.TF = "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip"
111
+ cfg.OCR.WEIGHTS.DOCTR_WORD.PT = "doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt"
112
+ cfg.OCR.WEIGHTS.DOCTR_RECOGNITION.TF = "doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip"
113
+ cfg.OCR.WEIGHTS.DOCTR_RECOGNITION.PT = "doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt"
114
+
115
+ cfg.TEXT_CONTAINER = LayoutType.WORD
116
+ cfg.WORD_MATCHING.PARENTAL_CATEGORIES = [
117
+ LayoutType.TEXT,
118
+ LayoutType.TITLE,
119
+ LayoutType.LIST,
120
+ LayoutType.CELL,
121
+ CellType.COLUMN_HEADER,
122
+ CellType.PROJECTED_ROW_HEADER,
123
+ CellType.SPANNING,
124
+ CellType.ROW_HEADER,
125
+ ]
126
+ cfg.WORD_MATCHING.RULE = "ioa"
127
+ cfg.WORD_MATCHING.THRESHOLD = 0.6
128
+ cfg.WORD_MATCHING.MAX_PARENT_ONLY = True
129
+
130
+ cfg.TEXT_ORDERING.TEXT_BLOCK_CATEGORIES = [
131
+ LayoutType.TEXT,
132
+ LayoutType.TITLE,
133
+ LayoutType.LIST,
134
+ LayoutType.CELL,
135
+ CellType.COLUMN_HEADER,
136
+ CellType.PROJECTED_ROW_HEADER,
137
+ CellType.SPANNING,
138
+ CellType.ROW_HEADER,
139
+ ]
140
+ cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK_CATEGORIES = [
141
+ LayoutType.TEXT,
142
+ LayoutType.TITLE,
143
+ LayoutType.LIST,
144
+ ]
145
+ cfg.TEXT_ORDERING.INCLUDE_RESIDUAL_TEXT_CONTAINER = False
146
+ cfg.TEXT_ORDERING.STARTING_POINT_TOLERANCE = 0.005
147
+ cfg.TEXT_ORDERING.BROKEN_LINE_TOLERANCE = 0.003
148
+ cfg.TEXT_ORDERING.HEIGHT_TOLERANCE = 2.0
149
+ cfg.TEXT_ORDERING.PARAGRAPH_BREAK = 0.035
150
+ cfg.freeze()
@@ -0,0 +1,154 @@
1
+ # -*- coding: utf-8 -*-
2
+ # File: dd.py
3
+
4
+ # Copyright 2021 Dr. Janis Meyer. All rights reserved.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+
18
+ """
19
+ Module for **deep**doctection analyzer.
20
+
21
+ -factory build_analyzer for a given config
22
+
23
+ -user factory with a reduced config setting
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ from typing import Optional
30
+
31
+ from ..extern.pt.ptutils import get_torch_device
32
+ from ..extern.tp.tfutils import disable_tp_layer_logging, get_tf_device
33
+ from ..pipe.doctectionpipe import DoctectionPipe
34
+ from ..utils.env_info import ENV_VARS_TRUE
35
+ from ..utils.error import DependencyError
36
+ from ..utils.file_utils import tensorpack_available
37
+ from ..utils.fs import get_configs_dir_path, get_package_path, maybe_copy_config_to_cache
38
+ from ..utils.logger import LoggingRecord, logger
39
+ from ..utils.metacfg import set_config_by_yaml
40
+ from ..utils.types import PathLikeOrStr
41
+ from ._config import cfg
42
+ from .factory import ServiceFactory
43
+
44
+ __all__ = [
45
+ "config_sanity_checks",
46
+ "get_dd_analyzer",
47
+ ]
48
+
49
+ _DD_ONE = "deepdoctection/configs/conf_dd_one.yaml"
50
+ _TESSERACT = "deepdoctection/configs/conf_tesseract.yaml"
51
+ _MODEL_CHOICES = {
52
+ "layout": [
53
+ "layout/d2_model_0829999_layout_inf_only.pt",
54
+ "xrf_layout/model_final_inf_only.pt",
55
+ "microsoft/table-transformer-detection/pytorch_model.bin",
56
+ ],
57
+ "segmentation": [
58
+ "item/model-1620000_inf_only.data-00000-of-00001",
59
+ "xrf_item/model_final_inf_only.pt",
60
+ "microsoft/table-transformer-structure-recognition/pytorch_model.bin",
61
+ "deepdoctection/tatr_tab_struct_v2/pytorch_model.bin",
62
+ ],
63
+ "ocr": ["Tesseract", "DocTr", "Textract"],
64
+ "doctr_word": ["doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt"],
65
+ "doctr_recognition": [
66
+ "doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt",
67
+ "doctr/crnn_vgg16_bn/pt/pytorch_model.bin",
68
+ ],
69
+ "llm": ["gpt-3.5-turbo", "gpt-4"],
70
+ "segmentation_choices": {
71
+ "item/model-1620000_inf_only.data-00000-of-00001": "cell/model-1800000_inf_only.data-00000-of-00001",
72
+ "xrf_item/model_final_inf_only.pt": "xrf_cell/model_final_inf_only.pt",
73
+ "microsoft/table-transformer-structure-recognition/pytorch_model.bin": None,
74
+ "deepdoctection/tatr_tab_struct_v2/pytorch_model.bin": None,
75
+ },
76
+ }
77
+
78
+
79
+ def config_sanity_checks() -> None:
80
+ """Some config sanity checks"""
81
+ if cfg.USE_PDF_MINER and cfg.USE_OCR and cfg.OCR.USE_DOCTR:
82
+ raise ValueError("Configuration USE_PDF_MINER= True and USE_OCR=True and USE_DOCTR=True is not allowed")
83
+ if cfg.USE_OCR:
84
+ if cfg.OCR.USE_TESSERACT + cfg.OCR.USE_DOCTR + cfg.OCR.USE_TEXTRACT != 1:
85
+ raise ValueError(
86
+ "Choose either OCR.USE_TESSERACT=True or OCR.USE_DOCTR=True or OCR.USE_TEXTRACT=True "
87
+ "and set the other two to False. Only one OCR system can be activated."
88
+ )
89
+
90
+
91
+ def get_dd_analyzer(
92
+ reset_config_file: bool = True,
93
+ config_overwrite: Optional[list[str]] = None,
94
+ path_config_file: Optional[PathLikeOrStr] = None,
95
+ ) -> DoctectionPipe:
96
+ """
97
+ Factory function for creating the built-in **deep**doctection analyzer.
98
+
99
+ The Standard Analyzer is a pipeline that comprises the following analysis components:
100
+
101
+ - Document layout analysis
102
+
103
+ - Table segmentation
104
+
105
+ - Text extraction/OCR
106
+
107
+ - Reading order
108
+
109
+ We refer to the various notebooks and docs for running an analyzer and changing the configs.
110
+
111
+ :param reset_config_file: This will copy the `.yaml` file with default variables to the `.cache` and therefore
112
+ resetting all configurations if set to `True`.
113
+ :param config_overwrite: Passing a list of string arguments and values to overwrite the `.yaml` configuration with
114
+ highest priority, e.g. ["USE_TABLE_SEGMENTATION=False",
115
+ "USE_OCR=False",
116
+ "TF.LAYOUT.WEIGHTS=my_fancy_pytorch_model"]
117
+ :param path_config_file: Path to a custom config file. Can be outside of the .cache directory.
118
+ :return: A DoctectionPipe instance with given configs
119
+ """
120
+ config_overwrite = [] if config_overwrite is None else config_overwrite
121
+ lib = "TF" if os.environ.get("DD_USE_TF", "0") in ENV_VARS_TRUE else "PT"
122
+ if lib == "TF":
123
+ device = get_tf_device()
124
+ elif lib == "PT":
125
+ device = get_torch_device()
126
+ else:
127
+ raise DependencyError("At least one of the env variables DD_USE_TF or DD_USE_TORCH must be set.")
128
+ dd_one_config_path = maybe_copy_config_to_cache(
129
+ get_package_path(), get_configs_dir_path() / "dd", _DD_ONE, reset_config_file
130
+ )
131
+ maybe_copy_config_to_cache(get_package_path(), get_configs_dir_path() / "dd", _TESSERACT)
132
+
133
+ # Set up of the configuration and logging
134
+ file_cfg = set_config_by_yaml(dd_one_config_path if not path_config_file else path_config_file)
135
+ cfg.freeze(freezed=False)
136
+ cfg.overwrite_config(file_cfg)
137
+
138
+ cfg.freeze(freezed=False)
139
+ cfg.LANGUAGE = None
140
+ cfg.LIB = lib
141
+ cfg.DEVICE = device
142
+ cfg.freeze()
143
+
144
+ if config_overwrite:
145
+ cfg.update_args(config_overwrite)
146
+
147
+ config_sanity_checks()
148
+ logger.info(LoggingRecord(f"Config: \n {str(cfg)}", cfg.to_dict())) # type: ignore
149
+
150
+ # will silent all TP logging while building the tower
151
+ if tensorpack_available():
152
+ disable_tp_layer_logging()
153
+
154
+ return ServiceFactory.build_analyzer(cfg)