deepdoctection 0.39__tar.gz → 0.39.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (154) hide show
  1. {deepdoctection-0.39 → deepdoctection-0.39.1}/PKG-INFO +5 -5
  2. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/__init__.py +1 -1
  3. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/base.py +38 -5
  4. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/common.py +3 -3
  5. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/hf_detr_train.py +1 -1
  6. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/PKG-INFO +5 -5
  7. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/requires.txt +4 -4
  8. {deepdoctection-0.39 → deepdoctection-0.39.1}/setup.py +3 -2
  9. {deepdoctection-0.39 → deepdoctection-0.39.1}/LICENSE +0 -0
  10. {deepdoctection-0.39 → deepdoctection-0.39.1}/README.md +0 -0
  11. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/analyzer/__init__.py +0 -0
  12. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/analyzer/_config.py +0 -0
  13. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/analyzer/dd.py +0 -0
  14. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/analyzer/factory.py +0 -0
  15. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/configs/__init__.py +0 -0
  16. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  17. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  18. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/__init__.py +0 -0
  19. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/base.py +0 -0
  20. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/common.py +0 -0
  21. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/custom.py +0 -0
  22. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/custom_serialize.py +0 -0
  23. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/parallel_map.py +0 -0
  24. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/serialize.py +0 -0
  25. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/dataflow/stats.py +0 -0
  26. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/__init__.py +0 -0
  27. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/annotation.py +0 -0
  28. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/box.py +0 -0
  29. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/convert.py +0 -0
  30. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/image.py +0 -0
  31. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datapoint/view.py +0 -0
  32. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/__init__.py +0 -0
  33. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/adapter.py +0 -0
  34. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/base.py +0 -0
  35. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/dataflow_builder.py +0 -0
  36. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/info.py +0 -0
  37. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/__init__.py +0 -0
  38. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  39. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  40. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/funsd.py +0 -0
  41. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  42. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/layouttest.py +0 -0
  43. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/publaynet.py +0 -0
  44. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  45. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  46. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  47. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/xfund.py +0 -0
  48. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  49. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  50. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/registry.py +0 -0
  51. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/datasets/save.py +0 -0
  52. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/__init__.py +0 -0
  53. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/accmetric.py +0 -0
  54. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/base.py +0 -0
  55. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/cocometric.py +0 -0
  56. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/eval.py +0 -0
  57. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/registry.py +0 -0
  58. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/tedsmetric.py +0 -0
  59. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/eval/tp_eval_callback.py +0 -0
  60. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/__init__.py +0 -0
  61. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/base.py +0 -0
  62. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/d2detect.py +0 -0
  63. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/deskew.py +0 -0
  64. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/doctrocr.py +0 -0
  65. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/fastlang.py +0 -0
  66. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/hfdetr.py +0 -0
  67. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/hflayoutlm.py +0 -0
  68. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/hflm.py +0 -0
  69. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/model.py +0 -0
  70. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/pdftext.py +0 -0
  71. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/pt/__init__.py +0 -0
  72. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/pt/nms.py +0 -0
  73. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/pt/ptutils.py +0 -0
  74. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tessocr.py +0 -0
  75. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/texocr.py +0 -0
  76. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/__init__.py +0 -0
  77. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tfutils.py +0 -0
  78. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpcompat.py +0 -0
  79. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  80. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  81. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  82. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  83. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  84. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  85. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  86. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  87. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  88. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  89. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  90. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  91. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  92. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  93. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  94. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  95. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  96. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  97. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/extern/tpdetect.py +0 -0
  98. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/__init__.py +0 -0
  99. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/cats.py +0 -0
  100. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/cocostruct.py +0 -0
  101. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/d2struct.py +0 -0
  102. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/hfstruct.py +0 -0
  103. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/laylmstruct.py +0 -0
  104. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/maputils.py +0 -0
  105. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/match.py +0 -0
  106. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/misc.py +0 -0
  107. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/pascalstruct.py +0 -0
  108. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/prodigystruct.py +0 -0
  109. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/pubstruct.py +0 -0
  110. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/tpstruct.py +0 -0
  111. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/mapper/xfundstruct.py +0 -0
  112. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/__init__.py +0 -0
  113. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/anngen.py +0 -0
  114. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/concurrency.py +0 -0
  115. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/doctectionpipe.py +0 -0
  116. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/language.py +0 -0
  117. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/layout.py +0 -0
  118. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/lm.py +0 -0
  119. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/order.py +0 -0
  120. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/refine.py +0 -0
  121. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/registry.py +0 -0
  122. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/segment.py +0 -0
  123. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/sub_layout.py +0 -0
  124. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/text.py +0 -0
  125. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/pipe/transform.py +0 -0
  126. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/py.typed +0 -0
  127. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/__init__.py +0 -0
  128. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/d2_frcnn_train.py +0 -0
  129. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/hf_layoutlm_train.py +0 -0
  130. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/train/tp_frcnn_train.py +0 -0
  131. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/__init__.py +0 -0
  132. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/concurrency.py +0 -0
  133. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/context.py +0 -0
  134. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/develop.py +0 -0
  135. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/env_info.py +0 -0
  136. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/error.py +0 -0
  137. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/file_utils.py +0 -0
  138. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/fs.py +0 -0
  139. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/identifier.py +0 -0
  140. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/logger.py +0 -0
  141. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/metacfg.py +0 -0
  142. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/mocks.py +0 -0
  143. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/pdf_utils.py +0 -0
  144. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/settings.py +1 -1
  145. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/tqdm.py +0 -0
  146. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/transform.py +0 -0
  147. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/types.py +0 -0
  148. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/utils.py +0 -0
  149. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection/utils/viz.py +0 -0
  150. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/SOURCES.txt +0 -0
  151. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/dependency_links.txt +0 -0
  152. {deepdoctection-0.39 → deepdoctection-0.39.1}/deepdoctection.egg-info/top_level.txt +0 -0
  153. {deepdoctection-0.39 → deepdoctection-0.39.1}/setup.cfg +0 -0
  154. {deepdoctection-0.39 → deepdoctection-0.39.1}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.39
3
+ Version: 0.39.1
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -61,7 +61,7 @@ Requires-Dist: python-doctr==0.8.1; extra == "tf"
61
61
  Requires-Dist: pycocotools>=2.0.2; extra == "tf"
62
62
  Requires-Dist: boto3==1.34.102; extra == "tf"
63
63
  Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
64
- Requires-Dist: fasttext==0.9.2; extra == "tf"
64
+ Requires-Dist: fasttext-wheel; extra == "tf"
65
65
  Requires-Dist: jdeskew>=0.2.2; extra == "tf"
66
66
  Requires-Dist: apted==1.0.3; extra == "tf"
67
67
  Requires-Dist: distance==0.1.3; extra == "tf"
@@ -86,12 +86,12 @@ Requires-Dist: termcolor>=1.1; extra == "pt"
86
86
  Requires-Dist: tabulate>=0.7.7; extra == "pt"
87
87
  Requires-Dist: tqdm==4.64.0; extra == "pt"
88
88
  Requires-Dist: timm>=0.9.16; extra == "pt"
89
- Requires-Dist: transformers>=4.36.0; extra == "pt"
89
+ Requires-Dist: transformers>=4.48.0; extra == "pt"
90
90
  Requires-Dist: accelerate>=0.29.1; extra == "pt"
91
91
  Requires-Dist: python-doctr==0.8.1; extra == "pt"
92
92
  Requires-Dist: boto3==1.34.102; extra == "pt"
93
93
  Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
94
- Requires-Dist: fasttext==0.9.2; extra == "pt"
94
+ Requires-Dist: fasttext-wheel; extra == "pt"
95
95
  Requires-Dist: jdeskew>=0.2.2; extra == "pt"
96
96
  Requires-Dist: apted==1.0.3; extra == "pt"
97
97
  Requires-Dist: distance==0.1.3; extra == "pt"
@@ -99,7 +99,7 @@ Requires-Dist: lxml>=4.9.1; extra == "pt"
99
99
  Provides-Extra: docs
100
100
  Requires-Dist: tensorpack==0.11; extra == "docs"
101
101
  Requires-Dist: boto3==1.34.102; extra == "docs"
102
- Requires-Dist: transformers>=4.36.0; extra == "docs"
102
+ Requires-Dist: transformers>=4.48.0; extra == "docs"
103
103
  Requires-Dist: accelerate>=0.29.1; extra == "docs"
104
104
  Requires-Dist: pdfplumber>=0.11.0; extra == "docs"
105
105
  Requires-Dist: lxml>=4.9.1; extra == "docs"
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.39"
28
+ __version__ = "0.39.1"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -24,7 +24,7 @@ from __future__ import annotations
24
24
  from abc import ABC, abstractmethod
25
25
  from collections import defaultdict
26
26
  from dataclasses import dataclass, field
27
- from typing import Any, Mapping, Optional, Union
27
+ from typing import Any, Mapping, Optional, Union, Callable
28
28
  from uuid import uuid1
29
29
 
30
30
  from ..dataflow import DataFlow, MapData
@@ -33,6 +33,7 @@ from ..mapper.misc import curry
33
33
  from ..utils.context import timed_operation
34
34
  from ..utils.identifier import get_uuid_from_str
35
35
  from ..utils.settings import ObjectTypes
36
+ from ..utils.types import DP
36
37
  from .anngen import DatapointManager
37
38
 
38
39
 
@@ -76,6 +77,30 @@ class PipelineComponent(ABC):
76
77
  self.service_id = self.get_service_id()
77
78
  self.dp_manager = DatapointManager(self.service_id, model_id)
78
79
  self.timer_on = False
80
+ self.filter_func: Callable[[DP], bool] = lambda dp: False
81
+
82
+ def set_inbound_filter(self, filter_func: Callable[[DP], bool]) -> None:
83
+ """
84
+ Set a filter function to decide, if an image of the inbound dataflow should be passed to self.serve.
85
+ The filter function should return a boolean value. If the function returns True, the image will not be processed
86
+ by this pipeline component.
87
+
88
+ **Example:**
89
+
90
+ ```python
91
+ def do_not_process_tables(dp: Image) -> bool:
92
+ if "table" not in dp.get_categories_from_current_state():
93
+ return True
94
+ return False
95
+
96
+ layout_component = ImageLayoutService(...)
97
+ layout_component.set_inbound_filter(do_not_process_tables)
98
+ ```
99
+
100
+
101
+ :param filter_func: A function that takes an image datapoint and returns a boolean value
102
+ """
103
+ self.filter_func = filter_func # type: ignore
79
104
 
80
105
  @abstractmethod
81
106
  def serve(self, dp: Image) -> None:
@@ -92,6 +117,12 @@ class PipelineComponent(ABC):
92
117
  """
93
118
  raise NotImplementedError()
94
119
 
120
+ def _pass_datapoint(self, dp: Image) -> None:
121
+ self.dp_manager.datapoint = dp
122
+ if not self.filter_func(dp):
123
+ self.serve(dp)
124
+
125
+
95
126
  def pass_datapoint(self, dp: Image) -> Image:
96
127
  """
97
128
  Acceptance, handover to dp_manager, transformation and forwarding of dp. To measure the time, use
@@ -103,11 +134,9 @@ class PipelineComponent(ABC):
103
134
  """
104
135
  if self.timer_on:
105
136
  with timed_operation(self.__class__.__name__):
106
- self.dp_manager.datapoint = dp
107
- self.serve(dp)
137
+ self._pass_datapoint(dp)
108
138
  else:
109
- self.dp_manager.datapoint = dp
110
- self.serve(dp)
139
+ self._pass_datapoint(dp)
111
140
  return self.dp_manager.datapoint
112
141
 
113
142
  def predict_dataflow(self, df: DataFlow) -> DataFlow:
@@ -205,6 +234,7 @@ class Pipeline(ABC):
205
234
 
206
235
  **Example:**
207
236
 
237
+ ```python
208
238
  layout = LayoutPipeComponent(layout_detector ...)
209
239
  text = TextExtractPipeComponent(text_detector ...)
210
240
  simple_pipe = MyPipeline(pipeline_component = [layout, text])
@@ -212,6 +242,7 @@ class Pipeline(ABC):
212
242
 
213
243
  for page in doc_dataflow:
214
244
  print(page)
245
+ ```
215
246
 
216
247
  In doing so, page contains all document structures determined via the pipeline (either directly from the Image core
217
248
  model or already processed further).
@@ -225,10 +256,12 @@ class Pipeline(ABC):
225
256
 
226
257
  **Example:**
227
258
 
259
+ ```python
228
260
  pipe = MyPipeline(pipeline_component = [layout, text])
229
261
  pipe.set_session_id = True
230
262
 
231
263
  df = pipe.analyze(input = "path/to/dir") # session_id is generated automatically
264
+ ```
232
265
  """
233
266
 
234
267
  def __init__(self, pipeline_component_list: list[PipelineComponent]) -> None:
@@ -349,8 +349,8 @@ class AnnotationNmsService(PipelineComponent):
349
349
  def __init__(
350
350
  self,
351
351
  nms_pairs: Sequence[Sequence[TypeOrStr]],
352
- thresholds: Union[float, list[float]],
353
- priority: Optional[list[Union[Optional[TypeOrStr]]]] = None,
352
+ thresholds: Union[float, Sequence[float]],
353
+ priority: Optional[Sequence[Union[Optional[TypeOrStr]]]] = None,
354
354
  ):
355
355
  """
356
356
  :param nms_pairs: Groups of categories, either as string or by `ObjectType`.
@@ -362,7 +362,7 @@ class AnnotationNmsService(PipelineComponent):
362
362
  self.threshold = [thresholds for _ in self.nms_pairs]
363
363
  else:
364
364
  assert len(self.nms_pairs) == len(thresholds), "Sequences of nms_pairs and thresholds must have same length"
365
- self.threshold = thresholds
365
+ self.threshold = thresholds # type: ignore
366
366
  if priority:
367
367
  assert len(self.nms_pairs) == len(priority), "Sequences of nms_pairs and priority must have same length"
368
368
 
@@ -73,7 +73,7 @@ class DetrDerivedTrainer(Trainer):
73
73
  model: Union[PreTrainedModel, nn.Module],
74
74
  args: TrainingArguments,
75
75
  data_collator: DetrDataCollator,
76
- train_dataset: Dataset[Any],
76
+ train_dataset: DatasetAdapter,
77
77
  ):
78
78
  self.evaluator: Optional[Evaluator] = None
79
79
  self.build_eval_kwargs: Optional[dict[str, Any]] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deepdoctection
3
- Version: 0.39
3
+ Version: 0.39.1
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -61,7 +61,7 @@ Requires-Dist: python-doctr==0.8.1; extra == "tf"
61
61
  Requires-Dist: pycocotools>=2.0.2; extra == "tf"
62
62
  Requires-Dist: boto3==1.34.102; extra == "tf"
63
63
  Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
64
- Requires-Dist: fasttext==0.9.2; extra == "tf"
64
+ Requires-Dist: fasttext-wheel; extra == "tf"
65
65
  Requires-Dist: jdeskew>=0.2.2; extra == "tf"
66
66
  Requires-Dist: apted==1.0.3; extra == "tf"
67
67
  Requires-Dist: distance==0.1.3; extra == "tf"
@@ -86,12 +86,12 @@ Requires-Dist: termcolor>=1.1; extra == "pt"
86
86
  Requires-Dist: tabulate>=0.7.7; extra == "pt"
87
87
  Requires-Dist: tqdm==4.64.0; extra == "pt"
88
88
  Requires-Dist: timm>=0.9.16; extra == "pt"
89
- Requires-Dist: transformers>=4.36.0; extra == "pt"
89
+ Requires-Dist: transformers>=4.48.0; extra == "pt"
90
90
  Requires-Dist: accelerate>=0.29.1; extra == "pt"
91
91
  Requires-Dist: python-doctr==0.8.1; extra == "pt"
92
92
  Requires-Dist: boto3==1.34.102; extra == "pt"
93
93
  Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
94
- Requires-Dist: fasttext==0.9.2; extra == "pt"
94
+ Requires-Dist: fasttext-wheel; extra == "pt"
95
95
  Requires-Dist: jdeskew>=0.2.2; extra == "pt"
96
96
  Requires-Dist: apted==1.0.3; extra == "pt"
97
97
  Requires-Dist: distance==0.1.3; extra == "pt"
@@ -99,7 +99,7 @@ Requires-Dist: lxml>=4.9.1; extra == "pt"
99
99
  Provides-Extra: docs
100
100
  Requires-Dist: tensorpack==0.11; extra == "docs"
101
101
  Requires-Dist: boto3==1.34.102; extra == "docs"
102
- Requires-Dist: transformers>=4.36.0; extra == "docs"
102
+ Requires-Dist: transformers>=4.48.0; extra == "docs"
103
103
  Requires-Dist: accelerate>=0.29.1; extra == "docs"
104
104
  Requires-Dist: pdfplumber>=0.11.0; extra == "docs"
105
105
  Requires-Dist: lxml>=4.9.1; extra == "docs"
@@ -36,7 +36,7 @@ types-urllib3>=1.26.25.14
36
36
  [docs]
37
37
  tensorpack==0.11
38
38
  boto3==1.34.102
39
- transformers>=4.36.0
39
+ transformers>=4.48.0
40
40
  accelerate>=0.29.1
41
41
  pdfplumber>=0.11.0
42
42
  lxml>=4.9.1
@@ -67,12 +67,12 @@ termcolor>=1.1
67
67
  tabulate>=0.7.7
68
68
  tqdm==4.64.0
69
69
  timm>=0.9.16
70
- transformers>=4.36.0
70
+ transformers>=4.48.0
71
71
  accelerate>=0.29.1
72
72
  python-doctr==0.8.1
73
73
  boto3==1.34.102
74
74
  pdfplumber>=0.11.0
75
- fasttext==0.9.2
75
+ fasttext-wheel
76
76
  jdeskew>=0.2.2
77
77
  apted==1.0.3
78
78
  distance==0.1.3
@@ -109,7 +109,7 @@ python-doctr==0.8.1
109
109
  pycocotools>=2.0.2
110
110
  boto3==1.34.102
111
111
  pdfplumber>=0.11.0
112
- fasttext==0.9.2
112
+ fasttext-wheel
113
113
  jdeskew>=0.2.2
114
114
  apted==1.0.3
115
115
  distance==0.1.3
@@ -78,7 +78,7 @@ _DEPS = [
78
78
  "tensorpack==0.11",
79
79
  # PyTorch related dependencies
80
80
  "timm>=0.9.16",
81
- "transformers>=4.36.0",
81
+ "transformers>=4.48.0",
82
82
  "accelerate>=0.29.1",
83
83
  # As maintenance of Detectron2 decreases, we will now use our own Fork the keep updating after rigorous testing.
84
84
  # This will hopefully prevent from issues like 233
@@ -91,6 +91,7 @@ _DEPS = [
91
91
  "tf2onnx>=1.9.2",
92
92
  "python-doctr==0.8.1",
93
93
  "fasttext==0.9.2",
94
+ "fasttext-wheel",
94
95
  # dev dependencies
95
96
  "python-dotenv==1.0.0",
96
97
  "click", # version will not break black
@@ -147,7 +148,7 @@ dist_deps = deps_list(
147
148
  additional_deps = deps_list(
148
149
  "boto3",
149
150
  "pdfplumber",
150
- "fasttext",
151
+ "fasttext-wheel",
151
152
  "jdeskew",
152
153
  "apted",
153
154
  "distance",
File without changes
File without changes
@@ -101,7 +101,6 @@ class DocumentType(ObjectTypes):
101
101
  GOVERNMENT_TENDERS = "government_tenders"
102
102
  MANUALS = "manuals"
103
103
  PATENTS = "patents"
104
- MARK = "mark"
105
104
 
106
105
 
107
106
  @object_types_registry.register("LayoutType")
@@ -132,6 +131,7 @@ class LayoutType(ObjectTypes):
132
131
  PAGE_NUMBER = "page_number"
133
132
  KEY_VALUE_AREA = "key_value_area"
134
133
  LIST_ITEM = "list_item"
134
+ MARK = "mark"
135
135
 
136
136
 
137
137
  @object_types_registry.register("TableType")
File without changes