deepdoctection 0.30__py3-none-any.whl → 0.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (120) hide show
  1. deepdoctection/__init__.py +38 -29
  2. deepdoctection/analyzer/dd.py +36 -29
  3. deepdoctection/configs/conf_dd_one.yaml +34 -31
  4. deepdoctection/dataflow/base.py +0 -19
  5. deepdoctection/dataflow/custom.py +4 -3
  6. deepdoctection/dataflow/custom_serialize.py +14 -5
  7. deepdoctection/dataflow/parallel_map.py +12 -11
  8. deepdoctection/dataflow/serialize.py +5 -4
  9. deepdoctection/datapoint/annotation.py +35 -13
  10. deepdoctection/datapoint/box.py +3 -5
  11. deepdoctection/datapoint/convert.py +3 -1
  12. deepdoctection/datapoint/image.py +79 -36
  13. deepdoctection/datapoint/view.py +152 -49
  14. deepdoctection/datasets/__init__.py +1 -4
  15. deepdoctection/datasets/adapter.py +6 -3
  16. deepdoctection/datasets/base.py +86 -11
  17. deepdoctection/datasets/dataflow_builder.py +1 -1
  18. deepdoctection/datasets/info.py +4 -4
  19. deepdoctection/datasets/instances/doclaynet.py +3 -2
  20. deepdoctection/datasets/instances/fintabnet.py +2 -1
  21. deepdoctection/datasets/instances/funsd.py +2 -1
  22. deepdoctection/datasets/instances/iiitar13k.py +5 -2
  23. deepdoctection/datasets/instances/layouttest.py +4 -8
  24. deepdoctection/datasets/instances/publaynet.py +2 -2
  25. deepdoctection/datasets/instances/pubtables1m.py +6 -3
  26. deepdoctection/datasets/instances/pubtabnet.py +2 -1
  27. deepdoctection/datasets/instances/rvlcdip.py +2 -1
  28. deepdoctection/datasets/instances/xfund.py +2 -1
  29. deepdoctection/eval/__init__.py +1 -4
  30. deepdoctection/eval/accmetric.py +1 -1
  31. deepdoctection/eval/base.py +5 -4
  32. deepdoctection/eval/cocometric.py +2 -1
  33. deepdoctection/eval/eval.py +19 -15
  34. deepdoctection/eval/tedsmetric.py +14 -11
  35. deepdoctection/eval/tp_eval_callback.py +14 -7
  36. deepdoctection/extern/__init__.py +2 -7
  37. deepdoctection/extern/base.py +39 -13
  38. deepdoctection/extern/d2detect.py +182 -90
  39. deepdoctection/extern/deskew.py +36 -9
  40. deepdoctection/extern/doctrocr.py +265 -83
  41. deepdoctection/extern/fastlang.py +49 -9
  42. deepdoctection/extern/hfdetr.py +106 -55
  43. deepdoctection/extern/hflayoutlm.py +441 -122
  44. deepdoctection/extern/hflm.py +225 -0
  45. deepdoctection/extern/model.py +56 -47
  46. deepdoctection/extern/pdftext.py +10 -5
  47. deepdoctection/extern/pt/__init__.py +1 -3
  48. deepdoctection/extern/pt/nms.py +6 -2
  49. deepdoctection/extern/pt/ptutils.py +27 -18
  50. deepdoctection/extern/tessocr.py +134 -22
  51. deepdoctection/extern/texocr.py +6 -2
  52. deepdoctection/extern/tp/tfutils.py +43 -9
  53. deepdoctection/extern/tp/tpcompat.py +14 -11
  54. deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
  55. deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
  56. deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
  57. deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
  58. deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
  59. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
  60. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
  61. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
  62. deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
  63. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
  64. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
  65. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
  66. deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
  67. deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
  68. deepdoctection/extern/tp/tpfrcnn/preproc.py +8 -9
  69. deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
  70. deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
  71. deepdoctection/extern/tpdetect.py +54 -30
  72. deepdoctection/mapper/__init__.py +3 -8
  73. deepdoctection/mapper/d2struct.py +9 -7
  74. deepdoctection/mapper/hfstruct.py +7 -2
  75. deepdoctection/mapper/laylmstruct.py +164 -21
  76. deepdoctection/mapper/maputils.py +16 -3
  77. deepdoctection/mapper/misc.py +6 -3
  78. deepdoctection/mapper/prodigystruct.py +1 -1
  79. deepdoctection/mapper/pubstruct.py +10 -10
  80. deepdoctection/mapper/tpstruct.py +3 -3
  81. deepdoctection/pipe/__init__.py +1 -1
  82. deepdoctection/pipe/anngen.py +35 -8
  83. deepdoctection/pipe/base.py +53 -19
  84. deepdoctection/pipe/common.py +23 -13
  85. deepdoctection/pipe/concurrency.py +2 -1
  86. deepdoctection/pipe/doctectionpipe.py +2 -2
  87. deepdoctection/pipe/language.py +3 -2
  88. deepdoctection/pipe/layout.py +6 -3
  89. deepdoctection/pipe/lm.py +34 -66
  90. deepdoctection/pipe/order.py +142 -35
  91. deepdoctection/pipe/refine.py +26 -24
  92. deepdoctection/pipe/segment.py +21 -16
  93. deepdoctection/pipe/{cell.py → sub_layout.py} +30 -9
  94. deepdoctection/pipe/text.py +14 -8
  95. deepdoctection/pipe/transform.py +16 -9
  96. deepdoctection/train/__init__.py +6 -12
  97. deepdoctection/train/d2_frcnn_train.py +36 -28
  98. deepdoctection/train/hf_detr_train.py +26 -17
  99. deepdoctection/train/hf_layoutlm_train.py +133 -111
  100. deepdoctection/train/tp_frcnn_train.py +21 -19
  101. deepdoctection/utils/__init__.py +3 -0
  102. deepdoctection/utils/concurrency.py +1 -1
  103. deepdoctection/utils/context.py +2 -2
  104. deepdoctection/utils/env_info.py +41 -84
  105. deepdoctection/utils/error.py +84 -0
  106. deepdoctection/utils/file_utils.py +4 -15
  107. deepdoctection/utils/fs.py +7 -7
  108. deepdoctection/utils/logger.py +1 -0
  109. deepdoctection/utils/mocks.py +93 -0
  110. deepdoctection/utils/pdf_utils.py +5 -4
  111. deepdoctection/utils/settings.py +6 -1
  112. deepdoctection/utils/transform.py +1 -1
  113. deepdoctection/utils/utils.py +0 -6
  114. deepdoctection/utils/viz.py +48 -5
  115. {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/METADATA +57 -73
  116. deepdoctection-0.32.dist-info/RECORD +146 -0
  117. {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/WHEEL +1 -1
  118. deepdoctection-0.30.dist-info/RECORD +0 -143
  119. {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
  120. {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0
@@ -18,60 +18,60 @@
18
18
  """
19
19
  Deepdoctection wrappers for DocTr OCR text line detection and text recognition models
20
20
  """
21
+ from __future__ import annotations
21
22
 
23
+ import os
24
+ from abc import ABC
22
25
  from pathlib import Path
23
- from typing import Any, List, Literal, Mapping, Optional, Tuple
26
+ from typing import Any, List, Literal, Mapping, Optional, Tuple, Union
24
27
  from zipfile import ZipFile
25
28
 
29
+ from lazy_imports import try_import
30
+
26
31
  from ..utils.detection_types import ImageType, Requirement
32
+ from ..utils.error import DependencyError
27
33
  from ..utils.file_utils import (
28
- doctr_available,
29
34
  get_doctr_requirement,
30
35
  get_pytorch_requirement,
31
36
  get_tensorflow_requirement,
32
37
  get_tf_addons_requirements,
33
38
  pytorch_available,
34
- tf_addons_available,
35
39
  tf_available,
36
40
  )
37
41
  from ..utils.fs import load_json
38
- from ..utils.settings import LayoutType, ObjectTypes, TypeOrStr
39
- from .base import DetectionResult, ObjectDetector, PredictorBase, TextRecognizer
40
- from .pt.ptutils import set_torch_auto_device
42
+ from ..utils.settings import LayoutType, ObjectTypes, PageType, TypeOrStr
43
+ from ..utils.viz import viz_handler
44
+ from .base import DetectionResult, ImageTransformer, ObjectDetector, PredictorBase, TextRecognizer
45
+ from .pt.ptutils import get_torch_device
46
+ from .tp.tfutils import get_tf_device
47
+
48
+ with try_import() as pt_import_guard:
49
+ import torch
41
50
 
42
- if doctr_available() and ((tf_addons_available() and tf_available()) or pytorch_available()):
51
+ with try_import() as tf_import_guard:
52
+ import tensorflow as tf # type: ignore # pylint: disable=E0401
53
+
54
+ with try_import() as doctr_import_guard:
55
+ from doctr.models._utils import estimate_orientation
43
56
  from doctr.models.detection.predictor import DetectionPredictor # pylint: disable=W0611
44
57
  from doctr.models.detection.zoo import detection_predictor
45
58
  from doctr.models.preprocessor import PreProcessor
46
59
  from doctr.models.recognition.predictor import RecognitionPredictor # pylint: disable=W0611
47
60
  from doctr.models.recognition.zoo import ARCHS, recognition
48
61
 
49
- if pytorch_available():
50
- import torch
51
-
52
- if tf_available():
53
- import tensorflow as tf # type: ignore # pylint: disable=E0401
54
-
55
-
56
- def _set_device_str(device: Optional[str] = None) -> str:
57
- if device is not None:
58
- if tf_available():
59
- device = "/" + device.replace("cuda", "gpu") + ":0"
60
- elif pytorch_available():
61
- device = set_torch_auto_device()
62
- else:
63
- device = "/gpu:0" # we impose to install tensorflow-gpu because of Tensorpack models
64
- return device
65
-
66
62
 
67
- def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str) -> None:
68
- if lib == "PT" and pytorch_available():
63
+ def _load_model(
64
+ path_weights: str, doctr_predictor: Any, device: Union[torch.device, tf.device], lib: Literal["PT", "TF"]
65
+ ) -> None:
66
+ """Loading a model either in TF or PT. We only shift the model to the device when using PyTorch. The shift of
67
+ the model to the device in Tensorflow is done in the predict function."""
68
+ if lib == "PT":
69
69
  state_dict = torch.load(path_weights, map_location=device)
70
70
  for key in list(state_dict.keys()):
71
71
  state_dict["model." + key] = state_dict.pop(key)
72
72
  doctr_predictor.load_state_dict(state_dict)
73
73
  doctr_predictor.to(device)
74
- elif lib == "TF" and tf_available():
74
+ elif lib == "TF":
75
75
  # Unzip the archive
76
76
  params_path = Path(path_weights).parent
77
77
  is_zip_path = path_weights.endswith(".zip")
@@ -83,20 +83,34 @@ def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str)
83
83
  doctr_predictor.model.load_weights(path_weights)
84
84
 
85
85
 
86
- def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor", device: str) -> List[DetectionResult]:
86
+ def auto_select_lib_for_doctr() -> Literal["PT", "TF"]:
87
+ """Auto select the DL library from environment variables"""
88
+ if os.environ.get("USE_TORCH"):
89
+ return "PT"
90
+ if os.environ.get("USE_TF"):
91
+ return "TF"
92
+ raise DependencyError("At least one of the env variables USE_TORCH or USE_TF must be set.")
93
+
94
+
95
+ def doctr_predict_text_lines(
96
+ np_img: ImageType, predictor: DetectionPredictor, device: Union[torch.device, tf.device], lib: Literal["TF", "PT"]
97
+ ) -> List[DetectionResult]:
87
98
  """
88
99
  Generating text line DetectionResult based on Doctr DetectionPredictor.
89
100
 
90
101
  :param np_img: Image in np.array.
91
102
  :param predictor: `doctr.models.detection.predictor.DetectionPredictor`
92
103
  :param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
104
+ :param lib: "TF" or "PT"
93
105
  :return: A list of text line detection results (without text).
94
106
  """
95
- if tf_available() and device is not None:
96
- with tf.device(device):
107
+ if lib == "TF":
108
+ with device:
97
109
  raw_output = predictor([np_img])
98
- else:
110
+ elif lib == "PT":
99
111
  raw_output = predictor([np_img])
112
+ else:
113
+ raise DependencyError("Tensorflow or PyTorch must be installed.")
100
114
  detection_results = [
101
115
  DetectionResult(
102
116
  box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.word
@@ -107,7 +121,10 @@ def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor",
107
121
 
108
122
 
109
123
  def doctr_predict_text(
110
- inputs: List[Tuple[str, ImageType]], predictor: "RecognitionPredictor", device: str
124
+ inputs: List[Tuple[str, ImageType]],
125
+ predictor: RecognitionPredictor,
126
+ device: Union[torch.device, tf.device],
127
+ lib: Literal["TF", "PT"],
111
128
  ) -> List[DetectionResult]:
112
129
  """
113
130
  Calls Doctr text recognition model on a batch of numpy arrays (text lines predicted from a text line detector) and
@@ -117,22 +134,46 @@ def doctr_predict_text(
117
134
  text line
118
135
  :param predictor: `doctr.models.detection.predictor.RecognitionPredictor`
119
136
  :param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
137
+ :param lib: "TF" or "PT"
120
138
  :return: A list of DetectionResult containing recognized text.
121
139
  """
122
140
 
123
141
  uuids, images = list(zip(*inputs))
124
- if tf_available() and device is not None:
125
- with tf.device(device):
142
+ if lib == "TF":
143
+ with device:
126
144
  raw_output = predictor(list(images))
127
- else:
145
+ elif lib == "PT":
128
146
  raw_output = predictor(list(images))
147
+ else:
148
+ raise DependencyError("Tensorflow or PyTorch must be installed.")
129
149
  detection_results = [
130
150
  DetectionResult(score=output[1], text=output[0], uuid=uuid) for uuid, output in zip(uuids, raw_output)
131
151
  ]
132
152
  return detection_results
133
153
 
134
154
 
135
- class DoctrTextlineDetector(ObjectDetector):
155
+ class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
156
+ """Base class for Doctr textline detector. This class only implements the basic wrapper functions"""
157
+
158
+ def __init__(self, categories: Mapping[str, TypeOrStr], lib: Optional[Literal["PT", "TF"]] = None):
159
+ self.categories = categories # type: ignore
160
+ self.lib = lib if lib is not None else self.auto_select_lib()
161
+
162
+ def possible_categories(self) -> List[ObjectTypes]:
163
+ return [LayoutType.word]
164
+
165
+ @staticmethod
166
+ def get_name(path_weights: str, architecture: str) -> str:
167
+ """Returns the name of the model"""
168
+ return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
169
+
170
+ @staticmethod
171
+ def auto_select_lib() -> Literal["PT", "TF"]:
172
+ """Auto select the DL library from the installed and from environment variables"""
173
+ return auto_select_lib_for_doctr()
174
+
175
+
176
+ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
136
177
  """
137
178
  A deepdoctection wrapper of DocTr text line detector. We model text line detection as ObjectDetector
138
179
  and assume to use this detector in a ImageLayoutService.
@@ -165,8 +206,6 @@ class DoctrTextlineDetector(ObjectDetector):
165
206
 
166
207
  for dp in df:
167
208
  ...
168
-
169
-
170
209
  """
171
210
 
172
211
  def __init__(
@@ -174,21 +213,31 @@ class DoctrTextlineDetector(ObjectDetector):
174
213
  architecture: str,
175
214
  path_weights: str,
176
215
  categories: Mapping[str, TypeOrStr],
177
- device: Optional[Literal["cpu", "cuda"]] = None,
178
- lib: str = "TF",
216
+ device: Optional[Union[Literal["cpu", "cuda"], torch.device, tf.device]] = None,
217
+ lib: Optional[Literal["PT", "TF"]] = None,
179
218
  ) -> None:
180
- self.lib = lib
181
- self.name = "doctr_text_detector"
219
+ """
220
+ :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
221
+ "db_mobilenet_v3_large". The full list can be found here:
222
+ https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
223
+ :param path_weights: Path to the weights of the model
224
+ :param categories: A dict with the model output label and value
225
+ :param device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
226
+ :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
227
+ """
228
+ super().__init__(categories, lib)
182
229
  self.architecture = architecture
183
230
  self.path_weights = path_weights
184
- self.doctr_predictor = detection_predictor(
185
- arch=self.architecture, pretrained=False, pretrained_backbone=False
186
- ) # we will be loading the model
187
- # later because there is no easy way in doctr to load a model by giving only a path to its weights
188
- self.categories = categories # type: ignore
189
- self.device_input = device
190
- self.device = _set_device_str(device)
191
- self.load_model()
231
+
232
+ self.name = self.get_name(self.path_weights, self.architecture)
233
+ self.model_id = self.get_model_id()
234
+
235
+ if self.lib == "TF":
236
+ self.device = get_tf_device(device)
237
+ if self.lib == "PT":
238
+ self.device = get_torch_device(device)
239
+
240
+ self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device, self.lib)
192
241
 
193
242
  def predict(self, np_img: ImageType) -> List[DetectionResult]:
194
243
  """
@@ -197,26 +246,49 @@ class DoctrTextlineDetector(ObjectDetector):
197
246
  :param np_img: image as numpy array
198
247
  :return: A list of DetectionResult
199
248
  """
200
- detection_results = doctr_predict_text_lines(np_img, self.doctr_predictor, self.device)
249
+ detection_results = doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)
201
250
  return detection_results
202
251
 
203
252
  @classmethod
204
253
  def get_requirements(cls) -> List[Requirement]:
205
- if tf_available():
254
+ if os.environ.get("DD_USE_TF"):
206
255
  return [get_tensorflow_requirement(), get_doctr_requirement(), get_tf_addons_requirements()]
207
- if pytorch_available():
256
+ if os.environ.get("DD_USE_TORCH"):
208
257
  return [get_pytorch_requirement(), get_doctr_requirement()]
209
258
  raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextlineDetector")
210
259
 
211
260
  def clone(self) -> PredictorBase:
212
- return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input, self.lib)
213
-
214
- def possible_categories(self) -> List[ObjectTypes]:
215
- return [LayoutType.word]
261
+ return self.__class__(self.architecture, self.path_weights, self.categories, self.device, self.lib)
216
262
 
217
- def load_model(self) -> None:
263
+ @staticmethod
264
+ def load_model(
265
+ path_weights: str, doctr_predictor: Any, device: Union[torch.device, tf.device], lib: Literal["PT", "TF"]
266
+ ) -> None:
218
267
  """Loading model weights"""
219
- _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
268
+ _load_model(path_weights, doctr_predictor, device, lib)
269
+
270
+ @staticmethod
271
+ def get_wrapped_model(
272
+ architecture: str, path_weights: str, device: Union[torch.device, tf.device], lib: Literal["PT", "TF"]
273
+ ) -> Any:
274
+ """
275
+ Get the inner (wrapped) model.
276
+
277
+ :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
278
+ "db_mobilenet_v3_large". The full list can be found here:
279
+ https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
280
+ :param path_weights: Path to the weights of the model
281
+ :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
282
+ :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used. Make sure,
283
+ these variables are set. If not, use
284
+
285
+ deepdoctection.utils.env_info.auto_select_lib_and_device
286
+
287
+ :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
288
+ """
289
+ doctr_predictor = detection_predictor(arch=architecture, pretrained=False, pretrained_backbone=False)
290
+ DoctrTextlineDetector.load_model(path_weights, doctr_predictor, device, lib)
291
+ return doctr_predictor
220
292
 
221
293
 
222
294
  class DoctrTextRecognizer(TextRecognizer):
@@ -253,15 +325,14 @@ class DoctrTextRecognizer(TextRecognizer):
253
325
 
254
326
  for dp in df:
255
327
  ...
256
-
257
328
  """
258
329
 
259
330
  def __init__(
260
331
  self,
261
332
  architecture: str,
262
333
  path_weights: str,
263
- device: Optional[Literal["cpu", "cuda"]] = None,
264
- lib: str = "TF",
334
+ device: Optional[Union[Literal["cpu", "cuda"], torch.device, tf.device]] = None,
335
+ lib: Optional[Literal["PT", "TF"]] = None,
265
336
  path_config_json: Optional[str] = None,
266
337
  ) -> None:
267
338
  """
@@ -270,19 +341,30 @@ class DoctrTextRecognizer(TextRecognizer):
270
341
  https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
271
342
  :param path_weights: Path to the weights of the model
272
343
  :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
273
- :param lib: "TF" or "PT". Will default to "TF".
344
+ :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
274
345
  :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
275
346
  a model trained on custom vocab.
276
347
  """
277
- self.lib = lib
278
- self.name = "doctr_text_recognizer"
348
+
349
+ self.lib = lib if lib is not None else self.auto_select_lib()
350
+
279
351
  self.architecture = architecture
280
352
  self.path_weights = path_weights
281
- self.device_input = device
282
- self.device = _set_device_str(device)
353
+
354
+ self.name = self.get_name(self.path_weights, self.architecture)
355
+ self.model_id = self.get_model_id()
356
+
357
+ if self.lib == "TF":
358
+ self.device = get_tf_device(device)
359
+ if self.lib == "PT":
360
+ self.device = get_torch_device(device)
361
+
283
362
  self.path_config_json = path_config_json
284
- self.doctr_predictor = self.build_model()
285
- self.load_model()
363
+ self.doctr_predictor = self.build_model(self.architecture, self.path_config_json)
364
+ self.load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
365
+ self.doctr_predictor = self.get_wrapped_model(
366
+ self.architecture, self.path_weights, self.device, self.lib, self.path_config_json
367
+ )
286
368
 
287
369
  def predict(self, images: List[Tuple[str, ImageType]]) -> List[DetectionResult]:
288
370
  """
@@ -292,7 +374,7 @@ class DoctrTextRecognizer(TextRecognizer):
292
374
  :return: A list of DetectionResult
293
375
  """
294
376
  if images:
295
- return doctr_predict_text(images, self.doctr_predictor, self.device)
377
+ return doctr_predict_text(images, self.doctr_predictor, self.device, self.lib)
296
378
  return []
297
379
 
298
380
  @classmethod
@@ -304,21 +386,25 @@ class DoctrTextRecognizer(TextRecognizer):
304
386
  raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextRecognizer")
305
387
 
306
388
  def clone(self) -> PredictorBase:
307
- return self.__class__(self.architecture, self.path_weights, self.device_input, self.lib)
389
+ return self.__class__(self.architecture, self.path_weights, self.device, self.lib)
308
390
 
309
- def load_model(self) -> None:
391
+ @staticmethod
392
+ def load_model(
393
+ path_weights: str, doctr_predictor: Any, device: Union[torch.device, tf.device], lib: Literal["PT", "TF"]
394
+ ) -> None:
310
395
  """Loading model weights"""
311
- _load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
396
+ _load_model(path_weights, doctr_predictor, device, lib)
312
397
 
313
- def build_model(self) -> "RecognitionPredictor":
398
+ @staticmethod
399
+ def build_model(architecture: str, path_config_json: Optional[str] = None) -> "RecognitionPredictor":
314
400
  """Building the model"""
315
401
 
316
402
  # inspired and adapted from https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py
317
403
  custom_configs = {}
318
404
  batch_size = 32
319
405
  recognition_configs = {}
320
- if self.path_config_json:
321
- custom_configs = load_json(self.path_config_json)
406
+ if path_config_json:
407
+ custom_configs = load_json(path_config_json)
322
408
  custom_configs.pop("arch", None)
323
409
  custom_configs.pop("url", None)
324
410
  custom_configs.pop("task", None)
@@ -327,18 +413,114 @@ class DoctrTextRecognizer(TextRecognizer):
327
413
  batch_size = custom_configs.pop("batch_size")
328
414
  recognition_configs["batch_size"] = batch_size
329
415
 
330
- if isinstance(self.architecture, str):
331
- if self.architecture not in ARCHS:
332
- raise ValueError(f"unknown architecture '{self.architecture}'")
416
+ if isinstance(architecture, str):
417
+ if architecture not in ARCHS:
418
+ raise ValueError(f"unknown architecture '{architecture}'")
333
419
 
334
- model = recognition.__dict__[self.architecture](pretrained=True, pretrained_backbone=True, **custom_configs)
420
+ model = recognition.__dict__[architecture](pretrained=True, pretrained_backbone=True, **custom_configs)
335
421
  else:
336
422
  if not isinstance(
337
- self.architecture,
423
+ architecture,
338
424
  (recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq),
339
425
  ):
340
- raise ValueError(f"unknown architecture: {type(self.architecture)}")
341
- model = self.architecture
426
+ raise ValueError(f"unknown architecture: {type(architecture)}")
427
+ model = architecture
342
428
 
343
429
  input_shape = model.cfg["input_shape"][:2] if tf_available() else model.cfg["input_shape"][-2:]
344
430
  return RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **recognition_configs), model)
431
+
432
+ @staticmethod
433
+ def get_wrapped_model(
434
+ architecture: str,
435
+ path_weights: str,
436
+ device: Union[torch.device, tf.device],
437
+ lib: Literal["PT", "TF"],
438
+ path_config_json: Optional[str] = None,
439
+ ) -> Any:
440
+ """
441
+ Get the inner (wrapped) model.
442
+
443
+ :param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
444
+ "crnn_mobilenet_v3_small". The full list can be found here:
445
+ https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
446
+ :param path_weights: Path to the weights of the model
447
+ :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
448
+ :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
449
+ :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
450
+ a model trained on custom vocab.
451
+ :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
452
+ """
453
+ doctr_predictor = DoctrTextRecognizer.build_model(architecture, path_config_json)
454
+ DoctrTextRecognizer.load_model(path_weights, doctr_predictor, device, lib)
455
+ return doctr_predictor
456
+
457
+ @staticmethod
458
+ def get_name(path_weights: str, architecture: str) -> str:
459
+ """Returns the name of the model"""
460
+ return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
461
+
462
+ @staticmethod
463
+ def auto_select_lib() -> Literal["PT", "TF"]:
464
+ """Auto select the DL library from the installed and from environment variables"""
465
+ return auto_select_lib_for_doctr()
466
+
467
+
468
+ class DocTrRotationTransformer(ImageTransformer):
469
+ """
470
+ The `DocTrRotationTransformer` class is a specialized image transformer that is designed to handle image rotation
471
+ in the context of Optical Character Recognition (OCR) tasks. It inherits from the `ImageTransformer` base class and
472
+ implements methods for predicting and applying rotation transformations to images.
473
+
474
+ The `predict` method determines the angle of the rotated image using the `estimate_orientation` function from the
475
+ `doctr.models._utils` module. The `n_ct` and `ratio_threshold_for_lines` parameters for this function can be
476
+ configured when instantiating the class.
477
+
478
+ The `transform` method applies the predicted rotation to the image, effectively rotating the image backwards.
479
+ This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
480
+
481
+ This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
482
+ The class also provides methods for cloning itself and for getting the requirements of the OCR system.
483
+
484
+ **Example:**
485
+ transformer = DocTrRotationTransformer()
486
+ detection_result = transformer.predict(np_img)
487
+ rotated_image = transformer.transform(np_img, detection_result)
488
+ """
489
+
490
+ def __init__(self, number_contours: int = 50, ratio_threshold_for_lines: float = 5):
491
+ """
492
+
493
+ :param number_contours: the number of contours used for the orientation estimation
494
+ :param ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
495
+ """
496
+ self.number_contours = number_contours
497
+ self.ratio_threshold_for_lines = ratio_threshold_for_lines
498
+ self.name = "doctr_rotation_transformer"
499
+
500
+ def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
501
+ """
502
+ Applies the predicted rotation to the image, effectively rotating the image backwards.
503
+ This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
504
+
505
+ :param np_img: The input image as a numpy array.
506
+ :param specification: A `DetectionResult` object containing the predicted rotation angle.
507
+ :return: The rotated image as a numpy array.
508
+ """
509
+ return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
510
+
511
+ def predict(self, np_img: ImageType) -> DetectionResult:
512
+ angle = estimate_orientation(np_img, self.number_contours, self.ratio_threshold_for_lines)
513
+ if angle < 0:
514
+ angle += 360
515
+ return DetectionResult(angle=round(angle, 2))
516
+
517
+ @classmethod
518
+ def get_requirements(cls) -> List[Requirement]:
519
+ return [get_doctr_requirement()]
520
+
521
+ def clone(self) -> PredictorBase:
522
+ return self.__class__(self.number_contours, self.ratio_threshold_for_lines)
523
+
524
+ @staticmethod
525
+ def possible_category() -> PageType:
526
+ return PageType.angle
@@ -18,18 +18,47 @@
18
18
  """
19
19
  Deepdoctection wrappers for fasttext language detection models
20
20
  """
21
+ from abc import ABC
21
22
  from copy import copy
22
- from typing import List, Mapping
23
+ from pathlib import Path
24
+ from typing import Any, List, Mapping, Tuple, Union
23
25
 
24
- from ..utils.file_utils import Requirement, fasttext_available, get_fasttext_requirement
25
- from ..utils.settings import TypeOrStr
26
+ from lazy_imports import try_import
27
+
28
+ from ..utils.file_utils import Requirement, get_fasttext_requirement
29
+ from ..utils.settings import TypeOrStr, get_type
26
30
  from .base import DetectionResult, LanguageDetector, PredictorBase
27
31
 
28
- if fasttext_available():
32
+ with try_import() as import_guard:
29
33
  from fasttext import load_model # type: ignore
30
34
 
31
35
 
32
- class FasttextLangDetector(LanguageDetector):
36
+ class FasttextLangDetectorMixin(LanguageDetector, ABC):
37
+ """
38
+ Base class for Fasttext language detection implementation. This class only implements the basic wrapper functions.
39
+ """
40
+
41
+ def __init__(self, categories: Mapping[str, TypeOrStr]) -> None:
42
+ """
43
+ :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
44
+ """
45
+ self.categories = copy({idx: get_type(cat) for idx, cat in categories.items()})
46
+
47
+ def output_to_detection_result(self, output: Union[Tuple[Any, Any]]) -> DetectionResult:
48
+ """
49
+ Generating `DetectionResult` from model output
50
+ :param output: FastText model output
51
+ :return: `DetectionResult` filled with `text` and `score`
52
+ """
53
+ return DetectionResult(text=self.categories[output[0][0]], score=output[1][0])
54
+
55
+ @staticmethod
56
+ def get_name(path_weights: str) -> str:
57
+ """Returns the name of the model"""
58
+ return "fasttext_" + "_".join(Path(path_weights).parts[-2:])
59
+
60
+
61
+ class FasttextLangDetector(FasttextLangDetectorMixin):
33
62
  """
34
63
  Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
35
64
  The background to the models can be found in the works:
@@ -57,15 +86,18 @@ class FasttextLangDetector(LanguageDetector):
57
86
  :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
58
87
  code.
59
88
  """
89
+ super().__init__(categories)
60
90
 
61
- self.name = "fasttest_lang_detector"
62
91
  self.path_weights = path_weights
63
- self.model = load_model(self.path_weights)
64
- self.categories = copy(categories) # type: ignore
92
+
93
+ self.name = self.get_name(self.path_weights)
94
+ self.model_id = self.get_model_id()
95
+
96
+ self.model = self.get_wrapped_model(self.path_weights)
65
97
 
66
98
  def predict(self, text_string: str) -> DetectionResult:
67
99
  output = self.model.predict(text_string)
68
- return DetectionResult(text=self.categories[output[0][0]], score=output[1][0])
100
+ return self.output_to_detection_result(output)
69
101
 
70
102
  @classmethod
71
103
  def get_requirements(cls) -> List[Requirement]:
@@ -73,3 +105,11 @@ class FasttextLangDetector(LanguageDetector):
73
105
 
74
106
  def clone(self) -> PredictorBase:
75
107
  return self.__class__(self.path_weights, self.categories)
108
+
109
+ @staticmethod
110
+ def get_wrapped_model(path_weights: str) -> Any:
111
+ """
112
+ Get the wrapped model
113
+ :param path_weights: path to model weights
114
+ """
115
+ return load_model(path_weights)