deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.0.dist-info/METADATA +0 -431
- deepdoctection-0.42.0.dist-info/RECORD +0 -148
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
|
@@ -16,8 +16,9 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Wrappers for DocTr text line detection and text recognition models
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
from __future__ import annotations
|
|
22
23
|
|
|
23
24
|
import os
|
|
@@ -106,13 +107,16 @@ def doctr_predict_text_lines(
|
|
|
106
107
|
np_img: PixelValues, predictor: DetectionPredictor, device: Union[torch.device, tf.device], lib: Literal["TF", "PT"]
|
|
107
108
|
) -> list[DetectionResult]:
|
|
108
109
|
"""
|
|
109
|
-
Generating text line DetectionResult based on
|
|
110
|
+
Generating text line `DetectionResult` based on DocTr `DetectionPredictor`.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
np_img: Image in `np.array`
|
|
114
|
+
predictor: `doctr.models.detection.predictor.DetectionPredictor`
|
|
115
|
+
device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
|
|
116
|
+
lib: "TF" or "PT"
|
|
110
117
|
|
|
111
|
-
:
|
|
112
|
-
|
|
113
|
-
:param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
|
|
114
|
-
:param lib: "TF" or "PT"
|
|
115
|
-
:return: A list of text line detection results (without text).
|
|
118
|
+
Returns:
|
|
119
|
+
A list of text line `DetectionResult` (without text)
|
|
116
120
|
"""
|
|
117
121
|
if lib == "TF":
|
|
118
122
|
with device:
|
|
@@ -137,15 +141,18 @@ def doctr_predict_text(
|
|
|
137
141
|
lib: Literal["TF", "PT"],
|
|
138
142
|
) -> list[DetectionResult]:
|
|
139
143
|
"""
|
|
140
|
-
Calls
|
|
141
|
-
returns the recognized text as DetectionResult
|
|
142
|
-
|
|
143
|
-
:
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
144
|
+
Calls DocTr text recognition model on a batch of `np.array`s (text lines predicted from a text line detector) and
|
|
145
|
+
returns the recognized text as `DetectionResult`
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
inputs: list of tuples containing the `annotation_id` of the input image and the `np.array` of the cropped
|
|
149
|
+
text line
|
|
150
|
+
predictor: `doctr.models.detection.predictor.RecognitionPredictor`
|
|
151
|
+
device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
|
|
152
|
+
lib: "TF" or "PT"
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
A list of `DetectionResult` containing recognized text
|
|
149
156
|
"""
|
|
150
157
|
|
|
151
158
|
uuids, images = list(zip(*inputs))
|
|
@@ -163,7 +170,7 @@ def doctr_predict_text(
|
|
|
163
170
|
|
|
164
171
|
|
|
165
172
|
class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
|
|
166
|
-
"""Base class for
|
|
173
|
+
"""Base class for DocTr text line detector. This class only implements the basic wrapper functions"""
|
|
167
174
|
|
|
168
175
|
def __init__(self, categories: Mapping[int, TypeOrStr], lib: Optional[Literal["PT", "TF"]] = None):
|
|
169
176
|
self.categories = ModelCategories(init_categories=categories)
|
|
@@ -174,12 +181,26 @@ class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
|
|
|
174
181
|
|
|
175
182
|
@staticmethod
|
|
176
183
|
def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
|
|
177
|
-
"""
|
|
184
|
+
"""
|
|
185
|
+
Returns the name of the model
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
path_weights: Path to the model weights
|
|
189
|
+
architecture: Architecture name
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
The name of the model as string
|
|
193
|
+
"""
|
|
178
194
|
return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
|
|
179
195
|
|
|
180
196
|
@staticmethod
|
|
181
197
|
def auto_select_lib() -> Literal["PT", "TF"]:
|
|
182
|
-
"""
|
|
198
|
+
"""
|
|
199
|
+
Auto select the DL library from the installed and from environment variables
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Either "PT" or "TF" based on environment variables
|
|
203
|
+
"""
|
|
183
204
|
return auto_select_lib_for_doctr()
|
|
184
205
|
|
|
185
206
|
|
|
@@ -194,28 +215,28 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
194
215
|
Some other pre-trained models exist that have not been registered in `ModelCatalog`. Please check the DocTr library
|
|
195
216
|
and organize the download of the pre-trained model by yourself.
|
|
196
217
|
|
|
197
|
-
|
|
218
|
+
Example:
|
|
219
|
+
```python
|
|
220
|
+
path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
|
|
221
|
+
/db_resnet50-ac60cadc.pt")
|
|
222
|
+
# Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
|
|
198
223
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
224
|
+
categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
|
|
225
|
+
det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
|
|
226
|
+
layout = ImageLayoutService(det,to_image=True, crop_image=True)
|
|
202
227
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
228
|
+
path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
|
|
229
|
+
/pt/crnn_vgg16_bn-9762b0b0.pt")
|
|
230
|
+
rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
|
|
231
|
+
text = TextExtractionService(rec, extract_from_roi="word")
|
|
206
232
|
|
|
207
|
-
|
|
208
|
-
/pt/crnn_vgg16_bn-9762b0b0.pt")
|
|
209
|
-
rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
|
|
210
|
-
text = TextExtractionService(rec, extract_from_roi="word")
|
|
233
|
+
analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
|
|
211
234
|
|
|
212
|
-
|
|
235
|
+
path = "/path/to/image_dir"
|
|
236
|
+
df = analyzer.analyze(path = path)
|
|
213
237
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
for dp in df:
|
|
218
|
-
...
|
|
238
|
+
for dp in df:
|
|
239
|
+
...
|
|
219
240
|
"""
|
|
220
241
|
|
|
221
242
|
def __init__(
|
|
@@ -227,13 +248,14 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
227
248
|
lib: Optional[Literal["PT", "TF"]] = None,
|
|
228
249
|
) -> None:
|
|
229
250
|
"""
|
|
230
|
-
:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
251
|
+
Args:
|
|
252
|
+
architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
|
|
253
|
+
"db_mobilenet_v3_large". The full list can be found here:
|
|
254
|
+
<https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
|
|
255
|
+
path_weights: Path to the weights of the model
|
|
256
|
+
categories: A dict with the model output label and value
|
|
257
|
+
device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
|
|
258
|
+
lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
|
|
237
259
|
"""
|
|
238
260
|
super().__init__(categories, lib)
|
|
239
261
|
self.architecture = architecture
|
|
@@ -253,8 +275,11 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
253
275
|
"""
|
|
254
276
|
Prediction per image.
|
|
255
277
|
|
|
256
|
-
:
|
|
257
|
-
|
|
278
|
+
Args:
|
|
279
|
+
np_img: image as `np.array`
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
A list of `DetectionResult`
|
|
258
283
|
"""
|
|
259
284
|
return doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)
|
|
260
285
|
|
|
@@ -284,17 +309,17 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
284
309
|
"""
|
|
285
310
|
Get the inner (wrapped) model.
|
|
286
311
|
|
|
287
|
-
:
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
312
|
+
Args:
|
|
313
|
+
architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
|
|
314
|
+
"db_mobilenet_v3_large". The full list can be found here:
|
|
315
|
+
<https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
|
|
316
|
+
path_weights: Path to the weights of the model
|
|
317
|
+
device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
318
|
+
lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used. Make
|
|
319
|
+
sure, these variables are set. If not, use `deepdoctection.utils.env_info.auto_select_lib_and_device`
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
|
|
298
323
|
"""
|
|
299
324
|
doctr_predictor = detection_predictor(arch=architecture, pretrained=False, pretrained_backbone=False)
|
|
300
325
|
DoctrTextlineDetector.load_model(path_weights, doctr_predictor, device, lib)
|
|
@@ -306,7 +331,7 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
306
331
|
|
|
307
332
|
class DoctrTextRecognizer(TextRecognizer):
|
|
308
333
|
"""
|
|
309
|
-
A deepdoctection wrapper of DocTr text recognition predictor. The base class is a TextRecognizer that takes
|
|
334
|
+
A deepdoctection wrapper of DocTr text recognition predictor. The base class is a `TextRecognizer` that takes
|
|
310
335
|
a batch of sub images (e.g. text lines from a text detector) and returns a list with text spotted in the sub images.
|
|
311
336
|
DocTr supports several text recognition models but provides only a subset of pre-trained models.
|
|
312
337
|
|
|
@@ -314,30 +339,30 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
314
339
|
described in “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to
|
|
315
340
|
Scene Text Recognition”. It can be used in either Tensorflow or PyTorch.
|
|
316
341
|
|
|
317
|
-
For more details please check the official DocTr documentation by Mindee: https://mindee.github.io/doctr
|
|
318
|
-
|
|
319
|
-
**Example:**
|
|
342
|
+
For more details please check the official DocTr documentation by Mindee: <https://mindee.github.io/doctr/>
|
|
320
343
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
344
|
+
Example:
|
|
345
|
+
```python
|
|
346
|
+
path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
|
|
347
|
+
/db_resnet50-ac60cadc.pt")
|
|
348
|
+
# Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
|
|
324
349
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
350
|
+
categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
|
|
351
|
+
det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
|
|
352
|
+
layout = ImageLayoutService(det,to_image=True, crop_image=True)
|
|
328
353
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
354
|
+
path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
|
|
355
|
+
/pt/crnn_vgg16_bn-9762b0b0.pt")
|
|
356
|
+
rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
|
|
357
|
+
text = TextExtractionService(rec, extract_from_roi="word")
|
|
333
358
|
|
|
334
|
-
|
|
359
|
+
analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
|
|
335
360
|
|
|
336
|
-
|
|
337
|
-
|
|
361
|
+
path = "/path/to/image_dir"
|
|
362
|
+
df = analyzer.analyze(path = path)
|
|
338
363
|
|
|
339
|
-
|
|
340
|
-
|
|
364
|
+
for dp in df:
|
|
365
|
+
...
|
|
341
366
|
"""
|
|
342
367
|
|
|
343
368
|
def __init__(
|
|
@@ -349,14 +374,15 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
349
374
|
path_config_json: Optional[PathLikeOrStr] = None,
|
|
350
375
|
) -> None:
|
|
351
376
|
"""
|
|
352
|
-
:
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
377
|
+
Args:
|
|
378
|
+
architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
|
|
379
|
+
"crnn_mobilenet_v3_small". The full list can be found here:
|
|
380
|
+
<https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
|
|
381
|
+
path_weights: Path to the weights of the model
|
|
382
|
+
device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
383
|
+
lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used.
|
|
384
|
+
path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
|
|
385
|
+
a model trained on custom vocab.
|
|
360
386
|
"""
|
|
361
387
|
|
|
362
388
|
self.lib = lib if lib is not None else self.auto_select_lib()
|
|
@@ -383,8 +409,11 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
383
409
|
"""
|
|
384
410
|
Prediction on a batch of text lines
|
|
385
411
|
|
|
386
|
-
:
|
|
387
|
-
|
|
412
|
+
Args:
|
|
413
|
+
images: list of tuples with the `annotation_id` of the sub image and a `np.array`
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
A list of `DetectionResult`
|
|
388
417
|
"""
|
|
389
418
|
if images:
|
|
390
419
|
return doctr_predict_text(images, self.doctr_predictor, self.device, self.lib)
|
|
@@ -456,15 +485,18 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
456
485
|
"""
|
|
457
486
|
Get the inner (wrapped) model.
|
|
458
487
|
|
|
459
|
-
:
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
488
|
+
Args:
|
|
489
|
+
architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
|
|
490
|
+
"crnn_mobilenet_v3_small". The full list can be found here:
|
|
491
|
+
<https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
|
|
492
|
+
path_weights: Path to the weights of the model
|
|
493
|
+
device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
494
|
+
lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
|
|
495
|
+
path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
|
|
496
|
+
a model trained on custom vocab.
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
|
|
468
500
|
"""
|
|
469
501
|
doctr_predictor = DoctrTextRecognizer.build_model(architecture, lib, path_config_json)
|
|
470
502
|
DoctrTextRecognizer.load_model(path_weights, doctr_predictor, device, lib)
|
|
@@ -472,12 +504,26 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
472
504
|
|
|
473
505
|
@staticmethod
|
|
474
506
|
def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
|
|
475
|
-
"""
|
|
507
|
+
"""
|
|
508
|
+
Returns the name of the model
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
path_weights: Path to the model weights
|
|
512
|
+
architecture: Architecture name
|
|
513
|
+
|
|
514
|
+
Returns:
|
|
515
|
+
The name of the model as string
|
|
516
|
+
"""
|
|
476
517
|
return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
|
|
477
518
|
|
|
478
519
|
@staticmethod
|
|
479
520
|
def auto_select_lib() -> Literal["PT", "TF"]:
|
|
480
|
-
"""
|
|
521
|
+
"""
|
|
522
|
+
Auto select the DL library from the installed and from environment variables
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
Either "PT" or "TF" based on environment variables
|
|
526
|
+
"""
|
|
481
527
|
return auto_select_lib_for_doctr()
|
|
482
528
|
|
|
483
529
|
def clear_model(self) -> None:
|
|
@@ -500,17 +546,19 @@ class DocTrRotationTransformer(ImageTransformer):
|
|
|
500
546
|
This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
|
|
501
547
|
The class also provides methods for cloning itself and for getting the requirements of the OCR system.
|
|
502
548
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
549
|
+
Example:
|
|
550
|
+
```python
|
|
551
|
+
transformer = DocTrRotationTransformer()
|
|
552
|
+
detection_result = transformer.predict(np_img)
|
|
553
|
+
rotated_image = transformer.transform(np_img, detection_result)
|
|
554
|
+
```
|
|
507
555
|
"""
|
|
508
556
|
|
|
509
557
|
def __init__(self, number_contours: int = 50, ratio_threshold_for_lines: float = 5):
|
|
510
558
|
"""
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
559
|
+
Args:
|
|
560
|
+
number_contours: the number of contours used for the orientation estimation
|
|
561
|
+
ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
|
|
514
562
|
"""
|
|
515
563
|
self.number_contours = number_contours
|
|
516
564
|
self.ratio_threshold_for_lines = ratio_threshold_for_lines
|
|
@@ -522,9 +570,12 @@ class DocTrRotationTransformer(ImageTransformer):
|
|
|
522
570
|
Applies the predicted rotation to the image, effectively rotating the image backwards.
|
|
523
571
|
This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
|
|
524
572
|
|
|
525
|
-
:
|
|
526
|
-
|
|
527
|
-
|
|
573
|
+
Args:
|
|
574
|
+
np_img: The input image as a `np.array`
|
|
575
|
+
specification: A `DetectionResult` object containing the predicted rotation angle
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
The rotated image as a `np.array`
|
|
528
579
|
"""
|
|
529
580
|
return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
|
|
530
581
|
|
|
@@ -16,8 +16,9 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Wrappers for fasttext language detection models
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
from __future__ import annotations
|
|
22
23
|
|
|
23
24
|
import os
|
|
@@ -39,12 +40,13 @@ with try_import() as import_guard:
|
|
|
39
40
|
|
|
40
41
|
class FasttextLangDetectorMixin(LanguageDetector, ABC):
|
|
41
42
|
"""
|
|
42
|
-
Base class for Fasttext language detection implementation. This class only implements the basic wrapper functions.
|
|
43
|
+
Base class for `Fasttext` language detection implementation. This class only implements the basic wrapper functions.
|
|
43
44
|
"""
|
|
44
45
|
|
|
45
46
|
def __init__(self, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]) -> None:
|
|
46
47
|
"""
|
|
47
|
-
:
|
|
48
|
+
Args:
|
|
49
|
+
categories: A `dict` with the model output label and value. We use as convention the `ISO 639-2` language
|
|
48
50
|
"""
|
|
49
51
|
self.categories = ModelCategories(init_categories=categories)
|
|
50
52
|
self.categories_orig = MappingProxyType({cat_orig: get_type(cat) for cat_orig, cat in categories_orig.items()})
|
|
@@ -52,8 +54,12 @@ class FasttextLangDetectorMixin(LanguageDetector, ABC):
|
|
|
52
54
|
def output_to_detection_result(self, output: Union[tuple[Any, Any]]) -> DetectionResult:
|
|
53
55
|
"""
|
|
54
56
|
Generating `DetectionResult` from model output
|
|
55
|
-
|
|
56
|
-
:
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
output: `FastText` model output
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
`DetectionResult` filled with `text` and `score`
|
|
57
63
|
"""
|
|
58
64
|
return DetectionResult(text=self.categories_orig[output[0][0]], score=output[1][0])
|
|
59
65
|
|
|
@@ -68,30 +74,30 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
|
|
|
68
74
|
Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
|
|
69
75
|
The background to the models can be found in the works:
|
|
70
76
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
Info:
|
|
78
|
+
[1] Joulin A, Grave E, Bojanowski P, Mikolov T, Bag of Tricks for Efficient Text Classification
|
|
79
|
+
[2] Joulin A, Grave E, Bojanowski P, Douze M, Jégou H, Mikolov T, FastText.zip: Compressing text classification
|
|
74
80
|
models
|
|
75
81
|
|
|
76
|
-
|
|
77
|
-
(<https://creativecommons.org/licenses/by-sa/3.0/>)
|
|
78
|
-
|
|
79
|
-
When loading the models via the ModelCatalog, the original and unmodified models are used.
|
|
82
|
+
When loading the models via the `ModelCatalog`, the original and unmodified models are used.
|
|
80
83
|
|
|
84
|
+
Example:
|
|
85
|
+
```python
|
|
81
86
|
path_weights = ModelCatalog.get_full_path_weights("fasttext/lid.176.bin")
|
|
82
87
|
profile = ModelCatalog.get_profile("fasttext/lid.176.bin")
|
|
83
88
|
lang_detector = FasttextLangDetector(path_weights,profile.categories)
|
|
84
89
|
detection_result = lang_detector.predict("some text in some language")
|
|
85
|
-
|
|
90
|
+
```
|
|
86
91
|
"""
|
|
87
92
|
|
|
88
93
|
def __init__(
|
|
89
94
|
self, path_weights: PathLikeOrStr, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]
|
|
90
95
|
):
|
|
91
96
|
"""
|
|
92
|
-
:
|
|
93
|
-
|
|
94
|
-
|
|
97
|
+
Args:
|
|
98
|
+
path_weights: path to model weights
|
|
99
|
+
categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
|
|
100
|
+
code.
|
|
95
101
|
"""
|
|
96
102
|
super().__init__(categories, categories_orig)
|
|
97
103
|
|
|
@@ -117,6 +123,8 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
|
|
|
117
123
|
def get_wrapped_model(path_weights: PathLikeOrStr) -> Any:
|
|
118
124
|
"""
|
|
119
125
|
Get the wrapped model
|
|
120
|
-
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
path_weights: path to model weights
|
|
121
129
|
"""
|
|
122
130
|
return load_model(os.fspath(path_weights))
|