deepdoctection 0.39__tar.gz → 0.39.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (154) hide show
  1. {deepdoctection-0.39 → deepdoctection-0.39.2}/PKG-INFO +5 -5
  2. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/__init__.py +1 -1
  3. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/analyzer/_config.py +1 -0
  4. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/analyzer/factory.py +5 -1
  5. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datapoint/view.py +29 -0
  6. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/base.py +37 -5
  7. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/common.py +3 -3
  8. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/lm.py +5 -6
  9. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/train/hf_detr_train.py +1 -1
  10. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/train/hf_layoutlm_train.py +1 -3
  11. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection.egg-info/PKG-INFO +5 -5
  12. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection.egg-info/requires.txt +4 -4
  13. {deepdoctection-0.39 → deepdoctection-0.39.2}/setup.py +3 -3
  14. {deepdoctection-0.39 → deepdoctection-0.39.2}/LICENSE +0 -0
  15. {deepdoctection-0.39 → deepdoctection-0.39.2}/README.md +0 -0
  16. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/analyzer/__init__.py +0 -0
  17. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/analyzer/dd.py +0 -0
  18. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/configs/__init__.py +0 -0
  19. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  20. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  21. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/dataflow/__init__.py +0 -0
  22. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/dataflow/base.py +0 -0
  23. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/dataflow/common.py +0 -0
  24. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/dataflow/custom.py +0 -0
  25. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/dataflow/custom_serialize.py +0 -0
  26. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/dataflow/parallel_map.py +0 -0
  27. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/dataflow/serialize.py +0 -0
  28. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/dataflow/stats.py +0 -0
  29. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datapoint/__init__.py +0 -0
  30. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datapoint/annotation.py +0 -0
  31. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datapoint/box.py +0 -0
  32. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datapoint/convert.py +0 -0
  33. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datapoint/image.py +0 -0
  34. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/__init__.py +0 -0
  35. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/adapter.py +0 -0
  36. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/base.py +0 -0
  37. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/dataflow_builder.py +0 -0
  38. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/info.py +0 -0
  39. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/__init__.py +0 -0
  40. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  41. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  42. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/funsd.py +0 -0
  43. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  44. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/layouttest.py +0 -0
  45. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/publaynet.py +0 -0
  46. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  47. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  48. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  49. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/xfund.py +0 -0
  50. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  51. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  52. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/registry.py +0 -0
  53. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/datasets/save.py +0 -0
  54. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/eval/__init__.py +0 -0
  55. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/eval/accmetric.py +0 -0
  56. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/eval/base.py +0 -0
  57. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/eval/cocometric.py +0 -0
  58. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/eval/eval.py +0 -0
  59. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/eval/registry.py +0 -0
  60. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/eval/tedsmetric.py +0 -0
  61. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/eval/tp_eval_callback.py +0 -0
  62. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/__init__.py +0 -0
  63. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/base.py +0 -0
  64. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/d2detect.py +0 -0
  65. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/deskew.py +0 -0
  66. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/doctrocr.py +0 -0
  67. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/fastlang.py +0 -0
  68. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/hfdetr.py +0 -0
  69. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/hflayoutlm.py +0 -0
  70. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/hflm.py +0 -0
  71. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/model.py +0 -0
  72. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/pdftext.py +0 -0
  73. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/pt/__init__.py +0 -0
  74. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/pt/nms.py +0 -0
  75. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/pt/ptutils.py +0 -0
  76. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tessocr.py +0 -0
  77. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/texocr.py +0 -0
  78. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/__init__.py +0 -0
  79. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tfutils.py +0 -0
  80. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpcompat.py +0 -0
  81. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  82. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  83. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  84. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  85. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  86. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  87. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  88. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  89. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  90. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  91. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  92. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  93. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  94. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  95. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  96. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  97. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  98. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  99. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/extern/tpdetect.py +0 -0
  100. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/__init__.py +0 -0
  101. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/cats.py +0 -0
  102. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/cocostruct.py +0 -0
  103. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/d2struct.py +0 -0
  104. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/hfstruct.py +0 -0
  105. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/laylmstruct.py +0 -0
  106. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/maputils.py +0 -0
  107. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/match.py +0 -0
  108. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/misc.py +0 -0
  109. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/pascalstruct.py +0 -0
  110. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/prodigystruct.py +0 -0
  111. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/pubstruct.py +0 -0
  112. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/tpstruct.py +0 -0
  113. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/mapper/xfundstruct.py +0 -0
  114. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/__init__.py +0 -0
  115. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/anngen.py +0 -0
  116. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/concurrency.py +0 -0
  117. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/doctectionpipe.py +0 -0
  118. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/language.py +0 -0
  119. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/layout.py +0 -0
  120. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/order.py +0 -0
  121. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/refine.py +0 -0
  122. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/registry.py +0 -0
  123. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/segment.py +0 -0
  124. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/sub_layout.py +0 -0
  125. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/text.py +0 -0
  126. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/pipe/transform.py +0 -0
  127. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/py.typed +0 -0
  128. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/train/__init__.py +0 -0
  129. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/train/d2_frcnn_train.py +0 -0
  130. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/train/tp_frcnn_train.py +0 -0
  131. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/__init__.py +0 -0
  132. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/concurrency.py +0 -0
  133. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/context.py +0 -0
  134. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/develop.py +0 -0
  135. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/env_info.py +0 -0
  136. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/error.py +0 -0
  137. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/file_utils.py +0 -0
  138. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/fs.py +0 -0
  139. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/identifier.py +0 -0
  140. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/logger.py +0 -0
  141. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/metacfg.py +0 -0
  142. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/mocks.py +0 -0
  143. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/pdf_utils.py +0 -0
  144. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/settings.py +1 -1
  145. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/tqdm.py +0 -0
  146. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/transform.py +0 -0
  147. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/types.py +0 -0
  148. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/utils.py +0 -0
  149. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection/utils/viz.py +0 -0
  150. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection.egg-info/SOURCES.txt +0 -0
  151. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection.egg-info/dependency_links.txt +0 -0
  152. {deepdoctection-0.39 → deepdoctection-0.39.2}/deepdoctection.egg-info/top_level.txt +0 -0
  153. {deepdoctection-0.39 → deepdoctection-0.39.2}/setup.cfg +0 -0
  154. {deepdoctection-0.39 → deepdoctection-0.39.2}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.39
3
+ Version: 0.39.2
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -61,7 +61,7 @@ Requires-Dist: python-doctr==0.8.1; extra == "tf"
61
61
  Requires-Dist: pycocotools>=2.0.2; extra == "tf"
62
62
  Requires-Dist: boto3==1.34.102; extra == "tf"
63
63
  Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
64
- Requires-Dist: fasttext==0.9.2; extra == "tf"
64
+ Requires-Dist: fasttext-wheel; extra == "tf"
65
65
  Requires-Dist: jdeskew>=0.2.2; extra == "tf"
66
66
  Requires-Dist: apted==1.0.3; extra == "tf"
67
67
  Requires-Dist: distance==0.1.3; extra == "tf"
@@ -86,12 +86,12 @@ Requires-Dist: termcolor>=1.1; extra == "pt"
86
86
  Requires-Dist: tabulate>=0.7.7; extra == "pt"
87
87
  Requires-Dist: tqdm==4.64.0; extra == "pt"
88
88
  Requires-Dist: timm>=0.9.16; extra == "pt"
89
- Requires-Dist: transformers>=4.36.0; extra == "pt"
89
+ Requires-Dist: transformers>=4.48.0; extra == "pt"
90
90
  Requires-Dist: accelerate>=0.29.1; extra == "pt"
91
91
  Requires-Dist: python-doctr==0.8.1; extra == "pt"
92
92
  Requires-Dist: boto3==1.34.102; extra == "pt"
93
93
  Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
94
- Requires-Dist: fasttext==0.9.2; extra == "pt"
94
+ Requires-Dist: fasttext-wheel; extra == "pt"
95
95
  Requires-Dist: jdeskew>=0.2.2; extra == "pt"
96
96
  Requires-Dist: apted==1.0.3; extra == "pt"
97
97
  Requires-Dist: distance==0.1.3; extra == "pt"
@@ -99,7 +99,7 @@ Requires-Dist: lxml>=4.9.1; extra == "pt"
99
99
  Provides-Extra: docs
100
100
  Requires-Dist: tensorpack==0.11; extra == "docs"
101
101
  Requires-Dist: boto3==1.34.102; extra == "docs"
102
- Requires-Dist: transformers>=4.36.0; extra == "docs"
102
+ Requires-Dist: transformers>=4.48.0; extra == "docs"
103
103
  Requires-Dist: accelerate>=0.29.1; extra == "docs"
104
104
  Requires-Dist: pdfplumber>=0.11.0; extra == "docs"
105
105
  Requires-Dist: lxml>=4.9.1; extra == "docs"
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.39"
28
+ __version__ = "0.39.2"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -40,6 +40,7 @@ cfg.TF.CELL.FILTER = None
40
40
  cfg.TF.ITEM.WEIGHTS = "item/model-1620000_inf_only.data-00000-of-00001"
41
41
  cfg.TF.ITEM.FILTER = None
42
42
 
43
+ cfg.PT.ENFORCE_WEIGHTS = False
43
44
  cfg.PT.LAYOUT.WEIGHTS = "layout/d2_model_0829999_layout_inf_only.pt"
44
45
  cfg.PT.LAYOUT.WEIGHTS_TS = "layout/d2_model_0829999_layout_inf_only.ts"
45
46
  cfg.PT.LAYOUT.FILTER = None
@@ -98,7 +98,11 @@ class ServiceFactory:
98
98
  weights = (
99
99
  getattr(config.TF, mode).WEIGHTS
100
100
  if config.LIB == "TF"
101
- else (getattr(config.PT, mode).WEIGHTS if detectron2_available() else getattr(config.PT, mode).WEIGHTS_TS)
101
+ else (
102
+ getattr(config.PT, mode).WEIGHTS
103
+ if detectron2_available() or config.PT.ENFORCE_WEIGHTS
104
+ else getattr(config.PT, mode).WEIGHTS_TS
105
+ )
102
106
  )
103
107
  filter_categories = (
104
108
  getattr(getattr(config.TF, mode), "FILTER")
@@ -407,6 +407,35 @@ class Table(Layout):
407
407
  col_anns = self.base_page.get_annotation(annotation_ids=all_relation_ids, category_names=[LayoutType.COLUMN])
408
408
  return col_anns
409
409
 
410
+ def row(self, row_number: int) -> list[ImageAnnotationBaseView]:
411
+ """
412
+ Get a list of cells in a row.
413
+ """
414
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
415
+ all_cells = self.base_page.get_annotation(
416
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
417
+ )
418
+ row_cells = list(
419
+ filter(lambda c: row_number in (c.row_number, c.row_number + c.row_span - 1), all_cells) # type: ignore
420
+ )
421
+ row_cells.sort(key=lambda c: c.column_number) # type: ignore
422
+ return row_cells # type: ignore
423
+
424
+ def column(self, column_number: int) -> list[ImageAnnotationBaseView]:
425
+ """
426
+ Get a list of cells in a column.
427
+ """
428
+ all_relation_ids = self.get_relationship(Relationships.CHILD)
429
+ all_cells = self.base_page.get_annotation(
430
+ category_names=[LayoutType.CELL, CellType.SPANNING], annotation_ids=all_relation_ids
431
+ )
432
+ column_cells = list(
433
+ filter(lambda c: column_number in # type: ignore
434
+ (c.column_number, c.column_number + c.column_span - 1), all_cells) # type: ignore
435
+ )
436
+ column_cells.sort(key=lambda c: c.row_number) # type: ignore
437
+ return column_cells # type: ignore
438
+
410
439
  @property
411
440
  def html(self) -> HTML:
412
441
  """
@@ -24,7 +24,7 @@ from __future__ import annotations
24
24
  from abc import ABC, abstractmethod
25
25
  from collections import defaultdict
26
26
  from dataclasses import dataclass, field
27
- from typing import Any, Mapping, Optional, Union
27
+ from typing import Any, Callable, Mapping, Optional, Union
28
28
  from uuid import uuid1
29
29
 
30
30
  from ..dataflow import DataFlow, MapData
@@ -33,6 +33,7 @@ from ..mapper.misc import curry
33
33
  from ..utils.context import timed_operation
34
34
  from ..utils.identifier import get_uuid_from_str
35
35
  from ..utils.settings import ObjectTypes
36
+ from ..utils.types import DP
36
37
  from .anngen import DatapointManager
37
38
 
38
39
 
@@ -76,6 +77,30 @@ class PipelineComponent(ABC):
76
77
  self.service_id = self.get_service_id()
77
78
  self.dp_manager = DatapointManager(self.service_id, model_id)
78
79
  self.timer_on = False
80
+ self.filter_func: Callable[[DP], bool] = lambda dp: False
81
+
82
+ def set_inbound_filter(self, filter_func: Callable[[DP], bool]) -> None:
83
+ """
84
+ Set a filter function to decide, if an image of the inbound dataflow should be passed to self.serve.
85
+ The filter function should return a boolean value. If the function returns True, the image will not be processed
86
+ by this pipeline component.
87
+
88
+ **Example:**
89
+
90
+ ```python
91
+ def do_not_process_tables(dp: Image) -> bool:
92
+ if "table" not in dp.get_categories_from_current_state():
93
+ return True
94
+ return False
95
+
96
+ layout_component = ImageLayoutService(...)
97
+ layout_component.set_inbound_filter(do_not_process_tables)
98
+ ```
99
+
100
+
101
+ :param filter_func: A function that takes an image datapoint and returns a boolean value
102
+ """
103
+ self.filter_func = filter_func # type: ignore
79
104
 
80
105
  @abstractmethod
81
106
  def serve(self, dp: Image) -> None:
@@ -92,6 +117,11 @@ class PipelineComponent(ABC):
92
117
  """
93
118
  raise NotImplementedError()
94
119
 
120
+ def _pass_datapoint(self, dp: Image) -> None:
121
+ self.dp_manager.datapoint = dp
122
+ if not self.filter_func(dp):
123
+ self.serve(dp)
124
+
95
125
  def pass_datapoint(self, dp: Image) -> Image:
96
126
  """
97
127
  Acceptance, handover to dp_manager, transformation and forwarding of dp. To measure the time, use
@@ -103,11 +133,9 @@ class PipelineComponent(ABC):
103
133
  """
104
134
  if self.timer_on:
105
135
  with timed_operation(self.__class__.__name__):
106
- self.dp_manager.datapoint = dp
107
- self.serve(dp)
136
+ self._pass_datapoint(dp)
108
137
  else:
109
- self.dp_manager.datapoint = dp
110
- self.serve(dp)
138
+ self._pass_datapoint(dp)
111
139
  return self.dp_manager.datapoint
112
140
 
113
141
  def predict_dataflow(self, df: DataFlow) -> DataFlow:
@@ -205,6 +233,7 @@ class Pipeline(ABC):
205
233
 
206
234
  **Example:**
207
235
 
236
+ ```python
208
237
  layout = LayoutPipeComponent(layout_detector ...)
209
238
  text = TextExtractPipeComponent(text_detector ...)
210
239
  simple_pipe = MyPipeline(pipeline_component = [layout, text])
@@ -212,6 +241,7 @@ class Pipeline(ABC):
212
241
 
213
242
  for page in doc_dataflow:
214
243
  print(page)
244
+ ```
215
245
 
216
246
  In doing so, page contains all document structures determined via the pipeline (either directly from the Image core
217
247
  model or already processed further).
@@ -225,10 +255,12 @@ class Pipeline(ABC):
225
255
 
226
256
  **Example:**
227
257
 
258
+ ```python
228
259
  pipe = MyPipeline(pipeline_component = [layout, text])
229
260
  pipe.set_session_id = True
230
261
 
231
262
  df = pipe.analyze(input = "path/to/dir") # session_id is generated automatically
263
+ ```
232
264
  """
233
265
 
234
266
  def __init__(self, pipeline_component_list: list[PipelineComponent]) -> None:
@@ -349,8 +349,8 @@ class AnnotationNmsService(PipelineComponent):
349
349
  def __init__(
350
350
  self,
351
351
  nms_pairs: Sequence[Sequence[TypeOrStr]],
352
- thresholds: Union[float, list[float]],
353
- priority: Optional[list[Union[Optional[TypeOrStr]]]] = None,
352
+ thresholds: Union[float, Sequence[float]],
353
+ priority: Optional[Sequence[Union[Optional[TypeOrStr]]]] = None,
354
354
  ):
355
355
  """
356
356
  :param nms_pairs: Groups of categories, either as string or by `ObjectType`.
@@ -362,7 +362,7 @@ class AnnotationNmsService(PipelineComponent):
362
362
  self.threshold = [thresholds for _ in self.nms_pairs]
363
363
  else:
364
364
  assert len(self.nms_pairs) == len(thresholds), "Sequences of nms_pairs and thresholds must have same length"
365
- self.threshold = thresholds
365
+ self.threshold = thresholds # type: ignore
366
366
  if priority:
367
367
  assert len(self.nms_pairs) == len(priority), "Sequences of nms_pairs and priority must have same length"
368
368
 
@@ -265,7 +265,7 @@ class LMSequenceClassifierService(PipelineComponent):
265
265
  padding: Literal["max_length", "do_not_pad", "longest"] = "max_length",
266
266
  truncation: bool = True,
267
267
  return_overflowing_tokens: bool = False,
268
- use_other_as_default_category: bool = False
268
+ use_other_as_default_category: bool = False,
269
269
  ) -> None:
270
270
  """
271
271
  :param tokenizer: Tokenizer, typing allows currently anything. This will be changed in the future
@@ -309,11 +309,10 @@ class LMSequenceClassifierService(PipelineComponent):
309
309
  lm_output = None
310
310
  if lm_input is None:
311
311
  if self.use_other_as_default_category:
312
- class_id = self.language_model.categories.get_categories(as_dict=True,
313
- name_as_key=True).get(TokenClasses.OTHER, 1)
314
- lm_output = SequenceClassResult(class_name=TokenClasses.OTHER,
315
- class_id = class_id,
316
- score=-1.)
312
+ class_id = self.language_model.categories.get_categories(as_dict=True, name_as_key=True).get(
313
+ TokenClasses.OTHER, 1
314
+ )
315
+ lm_output = SequenceClassResult(class_name=TokenClasses.OTHER, class_id=class_id, score=-1.0)
317
316
  else:
318
317
  lm_output = self.language_model.predict(**lm_input)
319
318
  if lm_output:
@@ -73,7 +73,7 @@ class DetrDerivedTrainer(Trainer):
73
73
  model: Union[PreTrainedModel, nn.Module],
74
74
  args: TrainingArguments,
75
75
  data_collator: DetrDataCollator,
76
- train_dataset: Dataset[Any],
76
+ train_dataset: DatasetAdapter,
77
77
  ):
78
78
  self.evaluator: Optional[Evaluator] = None
79
79
  self.build_eval_kwargs: Optional[dict[str, Any]] = None
@@ -499,9 +499,7 @@ def train_hf_layoutlm(
499
499
  )
500
500
  pipeline_component_cls = pipeline_component_registry.get(pipeline_component_name)
501
501
  if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
502
- pipeline_component = pipeline_component_cls(tokenizer_fast,
503
- dd_model,
504
- use_other_as_default_category=True)
502
+ pipeline_component = pipeline_component_cls(tokenizer_fast, dd_model, use_other_as_default_category=True)
505
503
  else:
506
504
  pipeline_component = pipeline_component_cls(
507
505
  tokenizer_fast,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.39
3
+ Version: 0.39.2
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -61,7 +61,7 @@ Requires-Dist: python-doctr==0.8.1; extra == "tf"
61
61
  Requires-Dist: pycocotools>=2.0.2; extra == "tf"
62
62
  Requires-Dist: boto3==1.34.102; extra == "tf"
63
63
  Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
64
- Requires-Dist: fasttext==0.9.2; extra == "tf"
64
+ Requires-Dist: fasttext-wheel; extra == "tf"
65
65
  Requires-Dist: jdeskew>=0.2.2; extra == "tf"
66
66
  Requires-Dist: apted==1.0.3; extra == "tf"
67
67
  Requires-Dist: distance==0.1.3; extra == "tf"
@@ -86,12 +86,12 @@ Requires-Dist: termcolor>=1.1; extra == "pt"
86
86
  Requires-Dist: tabulate>=0.7.7; extra == "pt"
87
87
  Requires-Dist: tqdm==4.64.0; extra == "pt"
88
88
  Requires-Dist: timm>=0.9.16; extra == "pt"
89
- Requires-Dist: transformers>=4.36.0; extra == "pt"
89
+ Requires-Dist: transformers>=4.48.0; extra == "pt"
90
90
  Requires-Dist: accelerate>=0.29.1; extra == "pt"
91
91
  Requires-Dist: python-doctr==0.8.1; extra == "pt"
92
92
  Requires-Dist: boto3==1.34.102; extra == "pt"
93
93
  Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
94
- Requires-Dist: fasttext==0.9.2; extra == "pt"
94
+ Requires-Dist: fasttext-wheel; extra == "pt"
95
95
  Requires-Dist: jdeskew>=0.2.2; extra == "pt"
96
96
  Requires-Dist: apted==1.0.3; extra == "pt"
97
97
  Requires-Dist: distance==0.1.3; extra == "pt"
@@ -99,7 +99,7 @@ Requires-Dist: lxml>=4.9.1; extra == "pt"
99
99
  Provides-Extra: docs
100
100
  Requires-Dist: tensorpack==0.11; extra == "docs"
101
101
  Requires-Dist: boto3==1.34.102; extra == "docs"
102
- Requires-Dist: transformers>=4.36.0; extra == "docs"
102
+ Requires-Dist: transformers>=4.48.0; extra == "docs"
103
103
  Requires-Dist: accelerate>=0.29.1; extra == "docs"
104
104
  Requires-Dist: pdfplumber>=0.11.0; extra == "docs"
105
105
  Requires-Dist: lxml>=4.9.1; extra == "docs"
@@ -36,7 +36,7 @@ types-urllib3>=1.26.25.14
36
36
  [docs]
37
37
  tensorpack==0.11
38
38
  boto3==1.34.102
39
- transformers>=4.36.0
39
+ transformers>=4.48.0
40
40
  accelerate>=0.29.1
41
41
  pdfplumber>=0.11.0
42
42
  lxml>=4.9.1
@@ -67,12 +67,12 @@ termcolor>=1.1
67
67
  tabulate>=0.7.7
68
68
  tqdm==4.64.0
69
69
  timm>=0.9.16
70
- transformers>=4.36.0
70
+ transformers>=4.48.0
71
71
  accelerate>=0.29.1
72
72
  python-doctr==0.8.1
73
73
  boto3==1.34.102
74
74
  pdfplumber>=0.11.0
75
- fasttext==0.9.2
75
+ fasttext-wheel
76
76
  jdeskew>=0.2.2
77
77
  apted==1.0.3
78
78
  distance==0.1.3
@@ -109,7 +109,7 @@ python-doctr==0.8.1
109
109
  pycocotools>=2.0.2
110
110
  boto3==1.34.102
111
111
  pdfplumber>=0.11.0
112
- fasttext==0.9.2
112
+ fasttext-wheel
113
113
  jdeskew>=0.2.2
114
114
  apted==1.0.3
115
115
  distance==0.1.3
@@ -78,7 +78,7 @@ _DEPS = [
78
78
  "tensorpack==0.11",
79
79
  # PyTorch related dependencies
80
80
  "timm>=0.9.16",
81
- "transformers>=4.36.0",
81
+ "transformers>=4.48.0",
82
82
  "accelerate>=0.29.1",
83
83
  # As maintenance of Detectron2 decreases, we will now use our own Fork the keep updating after rigorous testing.
84
84
  # This will hopefully prevent from issues like 233
@@ -90,7 +90,7 @@ _DEPS = [
90
90
  "tensorflow-addons>=0.17.1",
91
91
  "tf2onnx>=1.9.2",
92
92
  "python-doctr==0.8.1",
93
- "fasttext==0.9.2",
93
+ "fasttext-wheel",
94
94
  # dev dependencies
95
95
  "python-dotenv==1.0.0",
96
96
  "click", # version will not break black
@@ -147,7 +147,7 @@ dist_deps = deps_list(
147
147
  additional_deps = deps_list(
148
148
  "boto3",
149
149
  "pdfplumber",
150
- "fasttext",
150
+ "fasttext-wheel",
151
151
  "jdeskew",
152
152
  "apted",
153
153
  "distance",
File without changes
File without changes
@@ -101,7 +101,6 @@ class DocumentType(ObjectTypes):
101
101
  GOVERNMENT_TENDERS = "government_tenders"
102
102
  MANUALS = "manuals"
103
103
  PATENTS = "patents"
104
- MARK = "mark"
105
104
 
106
105
 
107
106
  @object_types_registry.register("LayoutType")
@@ -132,6 +131,7 @@ class LayoutType(ObjectTypes):
132
131
  PAGE_NUMBER = "page_number"
133
132
  KEY_VALUE_AREA = "key_value_area"
134
133
  LIST_ITEM = "list_item"
134
+ MARK = "mark"
135
135
 
136
136
 
137
137
  @object_types_registry.register("TableType")
File without changes