deepdoctection 0.30__py3-none-any.whl → 0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +38 -29
- deepdoctection/analyzer/dd.py +36 -29
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/base.py +0 -19
- deepdoctection/dataflow/custom.py +4 -3
- deepdoctection/dataflow/custom_serialize.py +14 -5
- deepdoctection/dataflow/parallel_map.py +12 -11
- deepdoctection/dataflow/serialize.py +5 -4
- deepdoctection/datapoint/annotation.py +35 -13
- deepdoctection/datapoint/box.py +3 -5
- deepdoctection/datapoint/convert.py +3 -1
- deepdoctection/datapoint/image.py +79 -36
- deepdoctection/datapoint/view.py +152 -49
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +6 -3
- deepdoctection/datasets/base.py +86 -11
- deepdoctection/datasets/dataflow_builder.py +1 -1
- deepdoctection/datasets/info.py +4 -4
- deepdoctection/datasets/instances/doclaynet.py +3 -2
- deepdoctection/datasets/instances/fintabnet.py +2 -1
- deepdoctection/datasets/instances/funsd.py +2 -1
- deepdoctection/datasets/instances/iiitar13k.py +5 -2
- deepdoctection/datasets/instances/layouttest.py +4 -8
- deepdoctection/datasets/instances/publaynet.py +2 -2
- deepdoctection/datasets/instances/pubtables1m.py +6 -3
- deepdoctection/datasets/instances/pubtabnet.py +2 -1
- deepdoctection/datasets/instances/rvlcdip.py +2 -1
- deepdoctection/datasets/instances/xfund.py +2 -1
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +1 -1
- deepdoctection/eval/base.py +5 -4
- deepdoctection/eval/cocometric.py +2 -1
- deepdoctection/eval/eval.py +19 -15
- deepdoctection/eval/tedsmetric.py +14 -11
- deepdoctection/eval/tp_eval_callback.py +14 -7
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +39 -13
- deepdoctection/extern/d2detect.py +182 -90
- deepdoctection/extern/deskew.py +36 -9
- deepdoctection/extern/doctrocr.py +265 -83
- deepdoctection/extern/fastlang.py +49 -9
- deepdoctection/extern/hfdetr.py +106 -55
- deepdoctection/extern/hflayoutlm.py +441 -122
- deepdoctection/extern/hflm.py +225 -0
- deepdoctection/extern/model.py +56 -47
- deepdoctection/extern/pdftext.py +10 -5
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +27 -18
- deepdoctection/extern/tessocr.py +134 -22
- deepdoctection/extern/texocr.py +6 -2
- deepdoctection/extern/tp/tfutils.py +43 -9
- deepdoctection/extern/tp/tpcompat.py +14 -11
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +9 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +17 -7
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +16 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +17 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +14 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +8 -9
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +54 -30
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/d2struct.py +9 -7
- deepdoctection/mapper/hfstruct.py +7 -2
- deepdoctection/mapper/laylmstruct.py +164 -21
- deepdoctection/mapper/maputils.py +16 -3
- deepdoctection/mapper/misc.py +6 -3
- deepdoctection/mapper/prodigystruct.py +1 -1
- deepdoctection/mapper/pubstruct.py +10 -10
- deepdoctection/mapper/tpstruct.py +3 -3
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +35 -8
- deepdoctection/pipe/base.py +53 -19
- deepdoctection/pipe/common.py +23 -13
- deepdoctection/pipe/concurrency.py +2 -1
- deepdoctection/pipe/doctectionpipe.py +2 -2
- deepdoctection/pipe/language.py +3 -2
- deepdoctection/pipe/layout.py +6 -3
- deepdoctection/pipe/lm.py +34 -66
- deepdoctection/pipe/order.py +142 -35
- deepdoctection/pipe/refine.py +26 -24
- deepdoctection/pipe/segment.py +21 -16
- deepdoctection/pipe/{cell.py → sub_layout.py} +30 -9
- deepdoctection/pipe/text.py +14 -8
- deepdoctection/pipe/transform.py +16 -9
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +36 -28
- deepdoctection/train/hf_detr_train.py +26 -17
- deepdoctection/train/hf_layoutlm_train.py +133 -111
- deepdoctection/train/tp_frcnn_train.py +21 -19
- deepdoctection/utils/__init__.py +3 -0
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +2 -2
- deepdoctection/utils/env_info.py +41 -84
- deepdoctection/utils/error.py +84 -0
- deepdoctection/utils/file_utils.py +4 -15
- deepdoctection/utils/fs.py +7 -7
- deepdoctection/utils/logger.py +1 -0
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +5 -4
- deepdoctection/utils/settings.py +6 -1
- deepdoctection/utils/transform.py +1 -1
- deepdoctection/utils/utils.py +0 -6
- deepdoctection/utils/viz.py +48 -5
- {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/METADATA +57 -73
- deepdoctection-0.32.dist-info/RECORD +146 -0
- {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/WHEEL +1 -1
- deepdoctection-0.30.dist-info/RECORD +0 -143
- {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/LICENSE +0 -0
- {deepdoctection-0.30.dist-info → deepdoctection-0.32.dist-info}/top_level.txt +0 -0
|
@@ -18,60 +18,60 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Deepdoctection wrappers for DocTr OCR text line detection and text recognition models
|
|
20
20
|
"""
|
|
21
|
+
from __future__ import annotations
|
|
21
22
|
|
|
23
|
+
import os
|
|
24
|
+
from abc import ABC
|
|
22
25
|
from pathlib import Path
|
|
23
|
-
from typing import Any, List, Literal, Mapping, Optional, Tuple
|
|
26
|
+
from typing import Any, List, Literal, Mapping, Optional, Tuple, Union
|
|
24
27
|
from zipfile import ZipFile
|
|
25
28
|
|
|
29
|
+
from lazy_imports import try_import
|
|
30
|
+
|
|
26
31
|
from ..utils.detection_types import ImageType, Requirement
|
|
32
|
+
from ..utils.error import DependencyError
|
|
27
33
|
from ..utils.file_utils import (
|
|
28
|
-
doctr_available,
|
|
29
34
|
get_doctr_requirement,
|
|
30
35
|
get_pytorch_requirement,
|
|
31
36
|
get_tensorflow_requirement,
|
|
32
37
|
get_tf_addons_requirements,
|
|
33
38
|
pytorch_available,
|
|
34
|
-
tf_addons_available,
|
|
35
39
|
tf_available,
|
|
36
40
|
)
|
|
37
41
|
from ..utils.fs import load_json
|
|
38
|
-
from ..utils.settings import LayoutType, ObjectTypes, TypeOrStr
|
|
39
|
-
from .
|
|
40
|
-
from .
|
|
42
|
+
from ..utils.settings import LayoutType, ObjectTypes, PageType, TypeOrStr
|
|
43
|
+
from ..utils.viz import viz_handler
|
|
44
|
+
from .base import DetectionResult, ImageTransformer, ObjectDetector, PredictorBase, TextRecognizer
|
|
45
|
+
from .pt.ptutils import get_torch_device
|
|
46
|
+
from .tp.tfutils import get_tf_device
|
|
47
|
+
|
|
48
|
+
with try_import() as pt_import_guard:
|
|
49
|
+
import torch
|
|
41
50
|
|
|
42
|
-
|
|
51
|
+
with try_import() as tf_import_guard:
|
|
52
|
+
import tensorflow as tf # type: ignore # pylint: disable=E0401
|
|
53
|
+
|
|
54
|
+
with try_import() as doctr_import_guard:
|
|
55
|
+
from doctr.models._utils import estimate_orientation
|
|
43
56
|
from doctr.models.detection.predictor import DetectionPredictor # pylint: disable=W0611
|
|
44
57
|
from doctr.models.detection.zoo import detection_predictor
|
|
45
58
|
from doctr.models.preprocessor import PreProcessor
|
|
46
59
|
from doctr.models.recognition.predictor import RecognitionPredictor # pylint: disable=W0611
|
|
47
60
|
from doctr.models.recognition.zoo import ARCHS, recognition
|
|
48
61
|
|
|
49
|
-
if pytorch_available():
|
|
50
|
-
import torch
|
|
51
|
-
|
|
52
|
-
if tf_available():
|
|
53
|
-
import tensorflow as tf # type: ignore # pylint: disable=E0401
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def _set_device_str(device: Optional[str] = None) -> str:
|
|
57
|
-
if device is not None:
|
|
58
|
-
if tf_available():
|
|
59
|
-
device = "/" + device.replace("cuda", "gpu") + ":0"
|
|
60
|
-
elif pytorch_available():
|
|
61
|
-
device = set_torch_auto_device()
|
|
62
|
-
else:
|
|
63
|
-
device = "/gpu:0" # we impose to install tensorflow-gpu because of Tensorpack models
|
|
64
|
-
return device
|
|
65
|
-
|
|
66
62
|
|
|
67
|
-
def _load_model(
|
|
68
|
-
|
|
63
|
+
def _load_model(
|
|
64
|
+
path_weights: str, doctr_predictor: Any, device: Union[torch.device, tf.device], lib: Literal["PT", "TF"]
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Loading a model either in TF or PT. We only shift the model to the device when using PyTorch. The shift of
|
|
67
|
+
the model to the device in Tensorflow is done in the predict function."""
|
|
68
|
+
if lib == "PT":
|
|
69
69
|
state_dict = torch.load(path_weights, map_location=device)
|
|
70
70
|
for key in list(state_dict.keys()):
|
|
71
71
|
state_dict["model." + key] = state_dict.pop(key)
|
|
72
72
|
doctr_predictor.load_state_dict(state_dict)
|
|
73
73
|
doctr_predictor.to(device)
|
|
74
|
-
elif lib == "TF"
|
|
74
|
+
elif lib == "TF":
|
|
75
75
|
# Unzip the archive
|
|
76
76
|
params_path = Path(path_weights).parent
|
|
77
77
|
is_zip_path = path_weights.endswith(".zip")
|
|
@@ -83,20 +83,34 @@ def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str)
|
|
|
83
83
|
doctr_predictor.model.load_weights(path_weights)
|
|
84
84
|
|
|
85
85
|
|
|
86
|
-
def
|
|
86
|
+
def auto_select_lib_for_doctr() -> Literal["PT", "TF"]:
|
|
87
|
+
"""Auto select the DL library from environment variables"""
|
|
88
|
+
if os.environ.get("USE_TORCH"):
|
|
89
|
+
return "PT"
|
|
90
|
+
if os.environ.get("USE_TF"):
|
|
91
|
+
return "TF"
|
|
92
|
+
raise DependencyError("At least one of the env variables USE_TORCH or USE_TF must be set.")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def doctr_predict_text_lines(
|
|
96
|
+
np_img: ImageType, predictor: DetectionPredictor, device: Union[torch.device, tf.device], lib: Literal["TF", "PT"]
|
|
97
|
+
) -> List[DetectionResult]:
|
|
87
98
|
"""
|
|
88
99
|
Generating text line DetectionResult based on Doctr DetectionPredictor.
|
|
89
100
|
|
|
90
101
|
:param np_img: Image in np.array.
|
|
91
102
|
:param predictor: `doctr.models.detection.predictor.DetectionPredictor`
|
|
92
103
|
:param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
|
|
104
|
+
:param lib: "TF" or "PT"
|
|
93
105
|
:return: A list of text line detection results (without text).
|
|
94
106
|
"""
|
|
95
|
-
if
|
|
96
|
-
with
|
|
107
|
+
if lib == "TF":
|
|
108
|
+
with device:
|
|
97
109
|
raw_output = predictor([np_img])
|
|
98
|
-
|
|
110
|
+
elif lib == "PT":
|
|
99
111
|
raw_output = predictor([np_img])
|
|
112
|
+
else:
|
|
113
|
+
raise DependencyError("Tensorflow or PyTorch must be installed.")
|
|
100
114
|
detection_results = [
|
|
101
115
|
DetectionResult(
|
|
102
116
|
box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.word
|
|
@@ -107,7 +121,10 @@ def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor",
|
|
|
107
121
|
|
|
108
122
|
|
|
109
123
|
def doctr_predict_text(
|
|
110
|
-
inputs: List[Tuple[str, ImageType]],
|
|
124
|
+
inputs: List[Tuple[str, ImageType]],
|
|
125
|
+
predictor: RecognitionPredictor,
|
|
126
|
+
device: Union[torch.device, tf.device],
|
|
127
|
+
lib: Literal["TF", "PT"],
|
|
111
128
|
) -> List[DetectionResult]:
|
|
112
129
|
"""
|
|
113
130
|
Calls Doctr text recognition model on a batch of numpy arrays (text lines predicted from a text line detector) and
|
|
@@ -117,22 +134,46 @@ def doctr_predict_text(
|
|
|
117
134
|
text line
|
|
118
135
|
:param predictor: `doctr.models.detection.predictor.RecognitionPredictor`
|
|
119
136
|
:param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
|
|
137
|
+
:param lib: "TF" or "PT"
|
|
120
138
|
:return: A list of DetectionResult containing recognized text.
|
|
121
139
|
"""
|
|
122
140
|
|
|
123
141
|
uuids, images = list(zip(*inputs))
|
|
124
|
-
if
|
|
125
|
-
with
|
|
142
|
+
if lib == "TF":
|
|
143
|
+
with device:
|
|
126
144
|
raw_output = predictor(list(images))
|
|
127
|
-
|
|
145
|
+
elif lib == "PT":
|
|
128
146
|
raw_output = predictor(list(images))
|
|
147
|
+
else:
|
|
148
|
+
raise DependencyError("Tensorflow or PyTorch must be installed.")
|
|
129
149
|
detection_results = [
|
|
130
150
|
DetectionResult(score=output[1], text=output[0], uuid=uuid) for uuid, output in zip(uuids, raw_output)
|
|
131
151
|
]
|
|
132
152
|
return detection_results
|
|
133
153
|
|
|
134
154
|
|
|
135
|
-
class
|
|
155
|
+
class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
|
|
156
|
+
"""Base class for Doctr textline detector. This class only implements the basic wrapper functions"""
|
|
157
|
+
|
|
158
|
+
def __init__(self, categories: Mapping[str, TypeOrStr], lib: Optional[Literal["PT", "TF"]] = None):
|
|
159
|
+
self.categories = categories # type: ignore
|
|
160
|
+
self.lib = lib if lib is not None else self.auto_select_lib()
|
|
161
|
+
|
|
162
|
+
def possible_categories(self) -> List[ObjectTypes]:
|
|
163
|
+
return [LayoutType.word]
|
|
164
|
+
|
|
165
|
+
@staticmethod
|
|
166
|
+
def get_name(path_weights: str, architecture: str) -> str:
|
|
167
|
+
"""Returns the name of the model"""
|
|
168
|
+
return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def auto_select_lib() -> Literal["PT", "TF"]:
|
|
172
|
+
"""Auto select the DL library from the installed and from environment variables"""
|
|
173
|
+
return auto_select_lib_for_doctr()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
136
177
|
"""
|
|
137
178
|
A deepdoctection wrapper of DocTr text line detector. We model text line detection as ObjectDetector
|
|
138
179
|
and assume to use this detector in a ImageLayoutService.
|
|
@@ -165,8 +206,6 @@ class DoctrTextlineDetector(ObjectDetector):
|
|
|
165
206
|
|
|
166
207
|
for dp in df:
|
|
167
208
|
...
|
|
168
|
-
|
|
169
|
-
|
|
170
209
|
"""
|
|
171
210
|
|
|
172
211
|
def __init__(
|
|
@@ -174,21 +213,31 @@ class DoctrTextlineDetector(ObjectDetector):
|
|
|
174
213
|
architecture: str,
|
|
175
214
|
path_weights: str,
|
|
176
215
|
categories: Mapping[str, TypeOrStr],
|
|
177
|
-
device: Optional[Literal["cpu", "cuda"]] = None,
|
|
178
|
-
lib:
|
|
216
|
+
device: Optional[Union[Literal["cpu", "cuda"], torch.device, tf.device]] = None,
|
|
217
|
+
lib: Optional[Literal["PT", "TF"]] = None,
|
|
179
218
|
) -> None:
|
|
180
|
-
|
|
181
|
-
|
|
219
|
+
"""
|
|
220
|
+
:param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
|
|
221
|
+
"db_mobilenet_v3_large". The full list can be found here:
|
|
222
|
+
https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
|
|
223
|
+
:param path_weights: Path to the weights of the model
|
|
224
|
+
:param categories: A dict with the model output label and value
|
|
225
|
+
:param device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
|
|
226
|
+
:param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
|
|
227
|
+
"""
|
|
228
|
+
super().__init__(categories, lib)
|
|
182
229
|
self.architecture = architecture
|
|
183
230
|
self.path_weights = path_weights
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
self.
|
|
189
|
-
|
|
190
|
-
self.
|
|
191
|
-
|
|
231
|
+
|
|
232
|
+
self.name = self.get_name(self.path_weights, self.architecture)
|
|
233
|
+
self.model_id = self.get_model_id()
|
|
234
|
+
|
|
235
|
+
if self.lib == "TF":
|
|
236
|
+
self.device = get_tf_device(device)
|
|
237
|
+
if self.lib == "PT":
|
|
238
|
+
self.device = get_torch_device(device)
|
|
239
|
+
|
|
240
|
+
self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device, self.lib)
|
|
192
241
|
|
|
193
242
|
def predict(self, np_img: ImageType) -> List[DetectionResult]:
|
|
194
243
|
"""
|
|
@@ -197,26 +246,49 @@ class DoctrTextlineDetector(ObjectDetector):
|
|
|
197
246
|
:param np_img: image as numpy array
|
|
198
247
|
:return: A list of DetectionResult
|
|
199
248
|
"""
|
|
200
|
-
detection_results = doctr_predict_text_lines(np_img, self.doctr_predictor, self.device)
|
|
249
|
+
detection_results = doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)
|
|
201
250
|
return detection_results
|
|
202
251
|
|
|
203
252
|
@classmethod
|
|
204
253
|
def get_requirements(cls) -> List[Requirement]:
|
|
205
|
-
if
|
|
254
|
+
if os.environ.get("DD_USE_TF"):
|
|
206
255
|
return [get_tensorflow_requirement(), get_doctr_requirement(), get_tf_addons_requirements()]
|
|
207
|
-
if
|
|
256
|
+
if os.environ.get("DD_USE_TORCH"):
|
|
208
257
|
return [get_pytorch_requirement(), get_doctr_requirement()]
|
|
209
258
|
raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextlineDetector")
|
|
210
259
|
|
|
211
260
|
def clone(self) -> PredictorBase:
|
|
212
|
-
return self.__class__(self.architecture, self.path_weights, self.categories, self.
|
|
213
|
-
|
|
214
|
-
def possible_categories(self) -> List[ObjectTypes]:
|
|
215
|
-
return [LayoutType.word]
|
|
261
|
+
return self.__class__(self.architecture, self.path_weights, self.categories, self.device, self.lib)
|
|
216
262
|
|
|
217
|
-
|
|
263
|
+
@staticmethod
|
|
264
|
+
def load_model(
|
|
265
|
+
path_weights: str, doctr_predictor: Any, device: Union[torch.device, tf.device], lib: Literal["PT", "TF"]
|
|
266
|
+
) -> None:
|
|
218
267
|
"""Loading model weights"""
|
|
219
|
-
_load_model(
|
|
268
|
+
_load_model(path_weights, doctr_predictor, device, lib)
|
|
269
|
+
|
|
270
|
+
@staticmethod
|
|
271
|
+
def get_wrapped_model(
|
|
272
|
+
architecture: str, path_weights: str, device: Union[torch.device, tf.device], lib: Literal["PT", "TF"]
|
|
273
|
+
) -> Any:
|
|
274
|
+
"""
|
|
275
|
+
Get the inner (wrapped) model.
|
|
276
|
+
|
|
277
|
+
:param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
|
|
278
|
+
"db_mobilenet_v3_large". The full list can be found here:
|
|
279
|
+
https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
|
|
280
|
+
:param path_weights: Path to the weights of the model
|
|
281
|
+
:param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
282
|
+
:param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used. Make sure,
|
|
283
|
+
these variables are set. If not, use
|
|
284
|
+
|
|
285
|
+
deepdoctection.utils.env_info.auto_select_lib_and_device
|
|
286
|
+
|
|
287
|
+
:return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
|
|
288
|
+
"""
|
|
289
|
+
doctr_predictor = detection_predictor(arch=architecture, pretrained=False, pretrained_backbone=False)
|
|
290
|
+
DoctrTextlineDetector.load_model(path_weights, doctr_predictor, device, lib)
|
|
291
|
+
return doctr_predictor
|
|
220
292
|
|
|
221
293
|
|
|
222
294
|
class DoctrTextRecognizer(TextRecognizer):
|
|
@@ -253,15 +325,14 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
253
325
|
|
|
254
326
|
for dp in df:
|
|
255
327
|
...
|
|
256
|
-
|
|
257
328
|
"""
|
|
258
329
|
|
|
259
330
|
def __init__(
|
|
260
331
|
self,
|
|
261
332
|
architecture: str,
|
|
262
333
|
path_weights: str,
|
|
263
|
-
device: Optional[Literal["cpu", "cuda"]] = None,
|
|
264
|
-
lib:
|
|
334
|
+
device: Optional[Union[Literal["cpu", "cuda"], torch.device, tf.device]] = None,
|
|
335
|
+
lib: Optional[Literal["PT", "TF"]] = None,
|
|
265
336
|
path_config_json: Optional[str] = None,
|
|
266
337
|
) -> None:
|
|
267
338
|
"""
|
|
@@ -270,19 +341,30 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
270
341
|
https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
|
|
271
342
|
:param path_weights: Path to the weights of the model
|
|
272
343
|
:param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
273
|
-
:param lib: "TF" or "PT".
|
|
344
|
+
:param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
|
|
274
345
|
:param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
|
|
275
346
|
a model trained on custom vocab.
|
|
276
347
|
"""
|
|
277
|
-
|
|
278
|
-
self.
|
|
348
|
+
|
|
349
|
+
self.lib = lib if lib is not None else self.auto_select_lib()
|
|
350
|
+
|
|
279
351
|
self.architecture = architecture
|
|
280
352
|
self.path_weights = path_weights
|
|
281
|
-
|
|
282
|
-
self.
|
|
353
|
+
|
|
354
|
+
self.name = self.get_name(self.path_weights, self.architecture)
|
|
355
|
+
self.model_id = self.get_model_id()
|
|
356
|
+
|
|
357
|
+
if self.lib == "TF":
|
|
358
|
+
self.device = get_tf_device(device)
|
|
359
|
+
if self.lib == "PT":
|
|
360
|
+
self.device = get_torch_device(device)
|
|
361
|
+
|
|
283
362
|
self.path_config_json = path_config_json
|
|
284
|
-
self.doctr_predictor = self.build_model()
|
|
285
|
-
self.load_model()
|
|
363
|
+
self.doctr_predictor = self.build_model(self.architecture, self.path_config_json)
|
|
364
|
+
self.load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
|
|
365
|
+
self.doctr_predictor = self.get_wrapped_model(
|
|
366
|
+
self.architecture, self.path_weights, self.device, self.lib, self.path_config_json
|
|
367
|
+
)
|
|
286
368
|
|
|
287
369
|
def predict(self, images: List[Tuple[str, ImageType]]) -> List[DetectionResult]:
|
|
288
370
|
"""
|
|
@@ -292,7 +374,7 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
292
374
|
:return: A list of DetectionResult
|
|
293
375
|
"""
|
|
294
376
|
if images:
|
|
295
|
-
return doctr_predict_text(images, self.doctr_predictor, self.device)
|
|
377
|
+
return doctr_predict_text(images, self.doctr_predictor, self.device, self.lib)
|
|
296
378
|
return []
|
|
297
379
|
|
|
298
380
|
@classmethod
|
|
@@ -304,21 +386,25 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
304
386
|
raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextRecognizer")
|
|
305
387
|
|
|
306
388
|
def clone(self) -> PredictorBase:
|
|
307
|
-
return self.__class__(self.architecture, self.path_weights, self.
|
|
389
|
+
return self.__class__(self.architecture, self.path_weights, self.device, self.lib)
|
|
308
390
|
|
|
309
|
-
|
|
391
|
+
@staticmethod
|
|
392
|
+
def load_model(
|
|
393
|
+
path_weights: str, doctr_predictor: Any, device: Union[torch.device, tf.device], lib: Literal["PT", "TF"]
|
|
394
|
+
) -> None:
|
|
310
395
|
"""Loading model weights"""
|
|
311
|
-
_load_model(
|
|
396
|
+
_load_model(path_weights, doctr_predictor, device, lib)
|
|
312
397
|
|
|
313
|
-
|
|
398
|
+
@staticmethod
|
|
399
|
+
def build_model(architecture: str, path_config_json: Optional[str] = None) -> "RecognitionPredictor":
|
|
314
400
|
"""Building the model"""
|
|
315
401
|
|
|
316
402
|
# inspired and adapted from https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py
|
|
317
403
|
custom_configs = {}
|
|
318
404
|
batch_size = 32
|
|
319
405
|
recognition_configs = {}
|
|
320
|
-
if
|
|
321
|
-
custom_configs = load_json(
|
|
406
|
+
if path_config_json:
|
|
407
|
+
custom_configs = load_json(path_config_json)
|
|
322
408
|
custom_configs.pop("arch", None)
|
|
323
409
|
custom_configs.pop("url", None)
|
|
324
410
|
custom_configs.pop("task", None)
|
|
@@ -327,18 +413,114 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
327
413
|
batch_size = custom_configs.pop("batch_size")
|
|
328
414
|
recognition_configs["batch_size"] = batch_size
|
|
329
415
|
|
|
330
|
-
if isinstance(
|
|
331
|
-
if
|
|
332
|
-
raise ValueError(f"unknown architecture '{
|
|
416
|
+
if isinstance(architecture, str):
|
|
417
|
+
if architecture not in ARCHS:
|
|
418
|
+
raise ValueError(f"unknown architecture '{architecture}'")
|
|
333
419
|
|
|
334
|
-
model = recognition.__dict__[
|
|
420
|
+
model = recognition.__dict__[architecture](pretrained=True, pretrained_backbone=True, **custom_configs)
|
|
335
421
|
else:
|
|
336
422
|
if not isinstance(
|
|
337
|
-
|
|
423
|
+
architecture,
|
|
338
424
|
(recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq),
|
|
339
425
|
):
|
|
340
|
-
raise ValueError(f"unknown architecture: {type(
|
|
341
|
-
model =
|
|
426
|
+
raise ValueError(f"unknown architecture: {type(architecture)}")
|
|
427
|
+
model = architecture
|
|
342
428
|
|
|
343
429
|
input_shape = model.cfg["input_shape"][:2] if tf_available() else model.cfg["input_shape"][-2:]
|
|
344
430
|
return RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **recognition_configs), model)
|
|
431
|
+
|
|
432
|
+
@staticmethod
|
|
433
|
+
def get_wrapped_model(
|
|
434
|
+
architecture: str,
|
|
435
|
+
path_weights: str,
|
|
436
|
+
device: Union[torch.device, tf.device],
|
|
437
|
+
lib: Literal["PT", "TF"],
|
|
438
|
+
path_config_json: Optional[str] = None,
|
|
439
|
+
) -> Any:
|
|
440
|
+
"""
|
|
441
|
+
Get the inner (wrapped) model.
|
|
442
|
+
|
|
443
|
+
:param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
|
|
444
|
+
"crnn_mobilenet_v3_small". The full list can be found here:
|
|
445
|
+
https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
|
|
446
|
+
:param path_weights: Path to the weights of the model
|
|
447
|
+
:param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
448
|
+
:param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
|
|
449
|
+
:param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
|
|
450
|
+
a model trained on custom vocab.
|
|
451
|
+
:return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
|
|
452
|
+
"""
|
|
453
|
+
doctr_predictor = DoctrTextRecognizer.build_model(architecture, path_config_json)
|
|
454
|
+
DoctrTextRecognizer.load_model(path_weights, doctr_predictor, device, lib)
|
|
455
|
+
return doctr_predictor
|
|
456
|
+
|
|
457
|
+
@staticmethod
|
|
458
|
+
def get_name(path_weights: str, architecture: str) -> str:
|
|
459
|
+
"""Returns the name of the model"""
|
|
460
|
+
return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
|
|
461
|
+
|
|
462
|
+
@staticmethod
|
|
463
|
+
def auto_select_lib() -> Literal["PT", "TF"]:
|
|
464
|
+
"""Auto select the DL library from the installed and from environment variables"""
|
|
465
|
+
return auto_select_lib_for_doctr()
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
class DocTrRotationTransformer(ImageTransformer):
|
|
469
|
+
"""
|
|
470
|
+
The `DocTrRotationTransformer` class is a specialized image transformer that is designed to handle image rotation
|
|
471
|
+
in the context of Optical Character Recognition (OCR) tasks. It inherits from the `ImageTransformer` base class and
|
|
472
|
+
implements methods for predicting and applying rotation transformations to images.
|
|
473
|
+
|
|
474
|
+
The `predict` method determines the angle of the rotated image using the `estimate_orientation` function from the
|
|
475
|
+
`doctr.models._utils` module. The `n_ct` and `ratio_threshold_for_lines` parameters for this function can be
|
|
476
|
+
configured when instantiating the class.
|
|
477
|
+
|
|
478
|
+
The `transform` method applies the predicted rotation to the image, effectively rotating the image backwards.
|
|
479
|
+
This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
|
|
480
|
+
|
|
481
|
+
This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
|
|
482
|
+
The class also provides methods for cloning itself and for getting the requirements of the OCR system.
|
|
483
|
+
|
|
484
|
+
**Example:**
|
|
485
|
+
transformer = DocTrRotationTransformer()
|
|
486
|
+
detection_result = transformer.predict(np_img)
|
|
487
|
+
rotated_image = transformer.transform(np_img, detection_result)
|
|
488
|
+
"""
|
|
489
|
+
|
|
490
|
+
def __init__(self, number_contours: int = 50, ratio_threshold_for_lines: float = 5):
|
|
491
|
+
"""
|
|
492
|
+
|
|
493
|
+
:param number_contours: the number of contours used for the orientation estimation
|
|
494
|
+
:param ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
|
|
495
|
+
"""
|
|
496
|
+
self.number_contours = number_contours
|
|
497
|
+
self.ratio_threshold_for_lines = ratio_threshold_for_lines
|
|
498
|
+
self.name = "doctr_rotation_transformer"
|
|
499
|
+
|
|
500
|
+
def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
|
|
501
|
+
"""
|
|
502
|
+
Applies the predicted rotation to the image, effectively rotating the image backwards.
|
|
503
|
+
This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
|
|
504
|
+
|
|
505
|
+
:param np_img: The input image as a numpy array.
|
|
506
|
+
:param specification: A `DetectionResult` object containing the predicted rotation angle.
|
|
507
|
+
:return: The rotated image as a numpy array.
|
|
508
|
+
"""
|
|
509
|
+
return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
|
|
510
|
+
|
|
511
|
+
def predict(self, np_img: ImageType) -> DetectionResult:
|
|
512
|
+
angle = estimate_orientation(np_img, self.number_contours, self.ratio_threshold_for_lines)
|
|
513
|
+
if angle < 0:
|
|
514
|
+
angle += 360
|
|
515
|
+
return DetectionResult(angle=round(angle, 2))
|
|
516
|
+
|
|
517
|
+
@classmethod
|
|
518
|
+
def get_requirements(cls) -> List[Requirement]:
|
|
519
|
+
return [get_doctr_requirement()]
|
|
520
|
+
|
|
521
|
+
def clone(self) -> PredictorBase:
|
|
522
|
+
return self.__class__(self.number_contours, self.ratio_threshold_for_lines)
|
|
523
|
+
|
|
524
|
+
@staticmethod
|
|
525
|
+
def possible_category() -> PageType:
|
|
526
|
+
return PageType.angle
|
|
@@ -18,18 +18,47 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Deepdoctection wrappers for fasttext language detection models
|
|
20
20
|
"""
|
|
21
|
+
from abc import ABC
|
|
21
22
|
from copy import copy
|
|
22
|
-
from
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any, List, Mapping, Tuple, Union
|
|
23
25
|
|
|
24
|
-
from
|
|
25
|
-
|
|
26
|
+
from lazy_imports import try_import
|
|
27
|
+
|
|
28
|
+
from ..utils.file_utils import Requirement, get_fasttext_requirement
|
|
29
|
+
from ..utils.settings import TypeOrStr, get_type
|
|
26
30
|
from .base import DetectionResult, LanguageDetector, PredictorBase
|
|
27
31
|
|
|
28
|
-
|
|
32
|
+
with try_import() as import_guard:
|
|
29
33
|
from fasttext import load_model # type: ignore
|
|
30
34
|
|
|
31
35
|
|
|
32
|
-
class
|
|
36
|
+
class FasttextLangDetectorMixin(LanguageDetector, ABC):
|
|
37
|
+
"""
|
|
38
|
+
Base class for Fasttext language detection implementation. This class only implements the basic wrapper functions.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, categories: Mapping[str, TypeOrStr]) -> None:
|
|
42
|
+
"""
|
|
43
|
+
:param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
|
|
44
|
+
"""
|
|
45
|
+
self.categories = copy({idx: get_type(cat) for idx, cat in categories.items()})
|
|
46
|
+
|
|
47
|
+
def output_to_detection_result(self, output: Union[Tuple[Any, Any]]) -> DetectionResult:
|
|
48
|
+
"""
|
|
49
|
+
Generating `DetectionResult` from model output
|
|
50
|
+
:param output: FastText model output
|
|
51
|
+
:return: `DetectionResult` filled with `text` and `score`
|
|
52
|
+
"""
|
|
53
|
+
return DetectionResult(text=self.categories[output[0][0]], score=output[1][0])
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def get_name(path_weights: str) -> str:
|
|
57
|
+
"""Returns the name of the model"""
|
|
58
|
+
return "fasttext_" + "_".join(Path(path_weights).parts[-2:])
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class FasttextLangDetector(FasttextLangDetectorMixin):
|
|
33
62
|
"""
|
|
34
63
|
Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
|
|
35
64
|
The background to the models can be found in the works:
|
|
@@ -57,15 +86,18 @@ class FasttextLangDetector(LanguageDetector):
|
|
|
57
86
|
:param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
|
|
58
87
|
code.
|
|
59
88
|
"""
|
|
89
|
+
super().__init__(categories)
|
|
60
90
|
|
|
61
|
-
self.name = "fasttest_lang_detector"
|
|
62
91
|
self.path_weights = path_weights
|
|
63
|
-
|
|
64
|
-
self.
|
|
92
|
+
|
|
93
|
+
self.name = self.get_name(self.path_weights)
|
|
94
|
+
self.model_id = self.get_model_id()
|
|
95
|
+
|
|
96
|
+
self.model = self.get_wrapped_model(self.path_weights)
|
|
65
97
|
|
|
66
98
|
def predict(self, text_string: str) -> DetectionResult:
|
|
67
99
|
output = self.model.predict(text_string)
|
|
68
|
-
return
|
|
100
|
+
return self.output_to_detection_result(output)
|
|
69
101
|
|
|
70
102
|
@classmethod
|
|
71
103
|
def get_requirements(cls) -> List[Requirement]:
|
|
@@ -73,3 +105,11 @@ class FasttextLangDetector(LanguageDetector):
|
|
|
73
105
|
|
|
74
106
|
def clone(self) -> PredictorBase:
|
|
75
107
|
return self.__class__(self.path_weights, self.categories)
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def get_wrapped_model(path_weights: str) -> Any:
|
|
111
|
+
"""
|
|
112
|
+
Get the wrapped model
|
|
113
|
+
:param path_weights: path to model weights
|
|
114
|
+
"""
|
|
115
|
+
return load_model(path_weights)
|