deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +4 -2
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +919 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +162 -108
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +205 -119
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +26 -17
- deepdoctection/utils/env_info.py +86 -37
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -71
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.1.dist-info/METADATA +376 -0
- deepdoctection-0.43.1.dist-info/RECORD +149 -0
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.1.dist-info/METADATA +0 -431
- deepdoctection-0.42.1.dist-info/RECORD +0 -148
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0
|
@@ -16,8 +16,9 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Wrappers for DocTr text line detection and text recognition models
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
from __future__ import annotations
|
|
22
23
|
|
|
23
24
|
import os
|
|
@@ -106,13 +107,16 @@ def doctr_predict_text_lines(
|
|
|
106
107
|
np_img: PixelValues, predictor: DetectionPredictor, device: Union[torch.device, tf.device], lib: Literal["TF", "PT"]
|
|
107
108
|
) -> list[DetectionResult]:
|
|
108
109
|
"""
|
|
109
|
-
Generating text line DetectionResult based on
|
|
110
|
+
Generating text line `DetectionResult` based on DocTr `DetectionPredictor`.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
np_img: Image in `np.array`
|
|
114
|
+
predictor: `doctr.models.detection.predictor.DetectionPredictor`
|
|
115
|
+
device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
|
|
116
|
+
lib: "TF" or "PT"
|
|
110
117
|
|
|
111
|
-
:
|
|
112
|
-
|
|
113
|
-
:param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
|
|
114
|
-
:param lib: "TF" or "PT"
|
|
115
|
-
:return: A list of text line detection results (without text).
|
|
118
|
+
Returns:
|
|
119
|
+
A list of text line `DetectionResult` (without text)
|
|
116
120
|
"""
|
|
117
121
|
if lib == "TF":
|
|
118
122
|
with device:
|
|
@@ -137,15 +141,18 @@ def doctr_predict_text(
|
|
|
137
141
|
lib: Literal["TF", "PT"],
|
|
138
142
|
) -> list[DetectionResult]:
|
|
139
143
|
"""
|
|
140
|
-
Calls
|
|
141
|
-
returns the recognized text as DetectionResult
|
|
142
|
-
|
|
143
|
-
:
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
144
|
+
Calls DocTr text recognition model on a batch of `np.array`s (text lines predicted from a text line detector) and
|
|
145
|
+
returns the recognized text as `DetectionResult`
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
inputs: list of tuples containing the `annotation_id` of the input image and the `np.array` of the cropped
|
|
149
|
+
text line
|
|
150
|
+
predictor: `doctr.models.detection.predictor.RecognitionPredictor`
|
|
151
|
+
device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
|
|
152
|
+
lib: "TF" or "PT"
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
A list of `DetectionResult` containing recognized text
|
|
149
156
|
"""
|
|
150
157
|
|
|
151
158
|
uuids, images = list(zip(*inputs))
|
|
@@ -163,7 +170,7 @@ def doctr_predict_text(
|
|
|
163
170
|
|
|
164
171
|
|
|
165
172
|
class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
|
|
166
|
-
"""Base class for
|
|
173
|
+
"""Base class for DocTr text line detector. This class only implements the basic wrapper functions"""
|
|
167
174
|
|
|
168
175
|
def __init__(self, categories: Mapping[int, TypeOrStr], lib: Optional[Literal["PT", "TF"]] = None):
|
|
169
176
|
self.categories = ModelCategories(init_categories=categories)
|
|
@@ -174,12 +181,26 @@ class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
|
|
|
174
181
|
|
|
175
182
|
@staticmethod
|
|
176
183
|
def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
|
|
177
|
-
"""
|
|
184
|
+
"""
|
|
185
|
+
Returns the name of the model
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
path_weights: Path to the model weights
|
|
189
|
+
architecture: Architecture name
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
The name of the model as string
|
|
193
|
+
"""
|
|
178
194
|
return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
|
|
179
195
|
|
|
180
196
|
@staticmethod
|
|
181
197
|
def auto_select_lib() -> Literal["PT", "TF"]:
|
|
182
|
-
"""
|
|
198
|
+
"""
|
|
199
|
+
Auto select the DL library from the installed and from environment variables
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Either "PT" or "TF" based on environment variables
|
|
203
|
+
"""
|
|
183
204
|
return auto_select_lib_for_doctr()
|
|
184
205
|
|
|
185
206
|
|
|
@@ -194,28 +215,28 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
194
215
|
Some other pre-trained models exist that have not been registered in `ModelCatalog`. Please check the DocTr library
|
|
195
216
|
and organize the download of the pre-trained model by yourself.
|
|
196
217
|
|
|
197
|
-
|
|
218
|
+
Example:
|
|
219
|
+
```python
|
|
220
|
+
path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
|
|
221
|
+
/db_resnet50-ac60cadc.pt")
|
|
222
|
+
# Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
|
|
198
223
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
224
|
+
categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
|
|
225
|
+
det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
|
|
226
|
+
layout = ImageLayoutService(det,to_image=True, crop_image=True)
|
|
202
227
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
228
|
+
path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
|
|
229
|
+
/pt/crnn_vgg16_bn-9762b0b0.pt")
|
|
230
|
+
rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
|
|
231
|
+
text = TextExtractionService(rec, extract_from_roi="word")
|
|
206
232
|
|
|
207
|
-
|
|
208
|
-
/pt/crnn_vgg16_bn-9762b0b0.pt")
|
|
209
|
-
rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
|
|
210
|
-
text = TextExtractionService(rec, extract_from_roi="word")
|
|
233
|
+
analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
|
|
211
234
|
|
|
212
|
-
|
|
235
|
+
path = "/path/to/image_dir"
|
|
236
|
+
df = analyzer.analyze(path = path)
|
|
213
237
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
for dp in df:
|
|
218
|
-
...
|
|
238
|
+
for dp in df:
|
|
239
|
+
...
|
|
219
240
|
"""
|
|
220
241
|
|
|
221
242
|
def __init__(
|
|
@@ -227,13 +248,14 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
227
248
|
lib: Optional[Literal["PT", "TF"]] = None,
|
|
228
249
|
) -> None:
|
|
229
250
|
"""
|
|
230
|
-
:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
251
|
+
Args:
|
|
252
|
+
architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
|
|
253
|
+
"db_mobilenet_v3_large". The full list can be found here:
|
|
254
|
+
<https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
|
|
255
|
+
path_weights: Path to the weights of the model
|
|
256
|
+
categories: A dict with the model output label and value
|
|
257
|
+
device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
|
|
258
|
+
lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
|
|
237
259
|
"""
|
|
238
260
|
super().__init__(categories, lib)
|
|
239
261
|
self.architecture = architecture
|
|
@@ -247,14 +269,20 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
247
269
|
if self.lib == "PT":
|
|
248
270
|
self.device = get_torch_device(device)
|
|
249
271
|
|
|
250
|
-
self.doctr_predictor = self.get_wrapped_model(self.architecture,
|
|
272
|
+
self.doctr_predictor = self.get_wrapped_model(self.architecture,
|
|
273
|
+
self.path_weights,
|
|
274
|
+
self.device,
|
|
275
|
+
self.lib)
|
|
251
276
|
|
|
252
277
|
def predict(self, np_img: PixelValues) -> list[DetectionResult]:
|
|
253
278
|
"""
|
|
254
279
|
Prediction per image.
|
|
255
280
|
|
|
256
|
-
:
|
|
257
|
-
|
|
281
|
+
Args:
|
|
282
|
+
np_img: image as `np.array`
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
A list of `DetectionResult`
|
|
258
286
|
"""
|
|
259
287
|
return doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)
|
|
260
288
|
|
|
@@ -284,17 +312,17 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
284
312
|
"""
|
|
285
313
|
Get the inner (wrapped) model.
|
|
286
314
|
|
|
287
|
-
:
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
315
|
+
Args:
|
|
316
|
+
architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
|
|
317
|
+
"db_mobilenet_v3_large". The full list can be found here:
|
|
318
|
+
<https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
|
|
319
|
+
path_weights: Path to the weights of the model
|
|
320
|
+
device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
321
|
+
lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used. Make
|
|
322
|
+
sure, these variables are set. If not, use `deepdoctection.utils.env_info.auto_select_lib_and_device`
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
|
|
298
326
|
"""
|
|
299
327
|
doctr_predictor = detection_predictor(arch=architecture, pretrained=False, pretrained_backbone=False)
|
|
300
328
|
DoctrTextlineDetector.load_model(path_weights, doctr_predictor, device, lib)
|
|
@@ -306,7 +334,7 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
306
334
|
|
|
307
335
|
class DoctrTextRecognizer(TextRecognizer):
|
|
308
336
|
"""
|
|
309
|
-
A deepdoctection wrapper of DocTr text recognition predictor. The base class is a TextRecognizer that takes
|
|
337
|
+
A deepdoctection wrapper of DocTr text recognition predictor. The base class is a `TextRecognizer` that takes
|
|
310
338
|
a batch of sub images (e.g. text lines from a text detector) and returns a list with text spotted in the sub images.
|
|
311
339
|
DocTr supports several text recognition models but provides only a subset of pre-trained models.
|
|
312
340
|
|
|
@@ -314,30 +342,30 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
314
342
|
described in “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to
|
|
315
343
|
Scene Text Recognition”. It can be used in either Tensorflow or PyTorch.
|
|
316
344
|
|
|
317
|
-
For more details please check the official DocTr documentation by Mindee: https://mindee.github.io/doctr
|
|
318
|
-
|
|
319
|
-
**Example:**
|
|
345
|
+
For more details please check the official DocTr documentation by Mindee: <https://mindee.github.io/doctr/>
|
|
320
346
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
347
|
+
Example:
|
|
348
|
+
```python
|
|
349
|
+
path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
|
|
350
|
+
/db_resnet50-ac60cadc.pt")
|
|
351
|
+
# Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
|
|
324
352
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
353
|
+
categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
|
|
354
|
+
det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
|
|
355
|
+
layout = ImageLayoutService(det,to_image=True, crop_image=True)
|
|
328
356
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
357
|
+
path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
|
|
358
|
+
/pt/crnn_vgg16_bn-9762b0b0.pt")
|
|
359
|
+
rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
|
|
360
|
+
text = TextExtractionService(rec, extract_from_roi="word")
|
|
333
361
|
|
|
334
|
-
|
|
362
|
+
analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
|
|
335
363
|
|
|
336
|
-
|
|
337
|
-
|
|
364
|
+
path = "/path/to/image_dir"
|
|
365
|
+
df = analyzer.analyze(path = path)
|
|
338
366
|
|
|
339
|
-
|
|
340
|
-
|
|
367
|
+
for dp in df:
|
|
368
|
+
...
|
|
341
369
|
"""
|
|
342
370
|
|
|
343
371
|
def __init__(
|
|
@@ -349,14 +377,15 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
349
377
|
path_config_json: Optional[PathLikeOrStr] = None,
|
|
350
378
|
) -> None:
|
|
351
379
|
"""
|
|
352
|
-
:
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
380
|
+
Args:
|
|
381
|
+
architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
|
|
382
|
+
"crnn_mobilenet_v3_small". The full list can be found here:
|
|
383
|
+
<https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
|
|
384
|
+
path_weights: Path to the weights of the model
|
|
385
|
+
device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
386
|
+
lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used.
|
|
387
|
+
path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
|
|
388
|
+
a model trained on custom vocab.
|
|
360
389
|
"""
|
|
361
390
|
|
|
362
391
|
self.lib = lib if lib is not None else self.auto_select_lib()
|
|
@@ -383,8 +412,11 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
383
412
|
"""
|
|
384
413
|
Prediction on a batch of text lines
|
|
385
414
|
|
|
386
|
-
:
|
|
387
|
-
|
|
415
|
+
Args:
|
|
416
|
+
images: list of tuples with the `annotation_id` of the sub image and a `np.array`
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
A list of `DetectionResult`
|
|
388
420
|
"""
|
|
389
421
|
if images:
|
|
390
422
|
return doctr_predict_text(images, self.doctr_predictor, self.device, self.lib)
|
|
@@ -395,7 +427,7 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
395
427
|
return _get_doctr_requirements()
|
|
396
428
|
|
|
397
429
|
def clone(self) -> DoctrTextRecognizer:
|
|
398
|
-
return self.__class__(self.architecture, self.path_weights, self.device, self.lib)
|
|
430
|
+
return self.__class__(self.architecture, self.path_weights, self.device, self.lib, self.path_config_json)
|
|
399
431
|
|
|
400
432
|
@staticmethod
|
|
401
433
|
def load_model(
|
|
@@ -456,15 +488,18 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
456
488
|
"""
|
|
457
489
|
Get the inner (wrapped) model.
|
|
458
490
|
|
|
459
|
-
:
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
491
|
+
Args:
|
|
492
|
+
architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
|
|
493
|
+
"crnn_mobilenet_v3_small". The full list can be found here:
|
|
494
|
+
<https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
|
|
495
|
+
path_weights: Path to the weights of the model
|
|
496
|
+
device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
|
|
497
|
+
lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
|
|
498
|
+
path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
|
|
499
|
+
a model trained on custom vocab.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
|
|
468
503
|
"""
|
|
469
504
|
doctr_predictor = DoctrTextRecognizer.build_model(architecture, lib, path_config_json)
|
|
470
505
|
DoctrTextRecognizer.load_model(path_weights, doctr_predictor, device, lib)
|
|
@@ -472,12 +507,26 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
472
507
|
|
|
473
508
|
@staticmethod
|
|
474
509
|
def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
|
|
475
|
-
"""
|
|
510
|
+
"""
|
|
511
|
+
Returns the name of the model
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
path_weights: Path to the model weights
|
|
515
|
+
architecture: Architecture name
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
The name of the model as string
|
|
519
|
+
"""
|
|
476
520
|
return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
|
|
477
521
|
|
|
478
522
|
@staticmethod
|
|
479
523
|
def auto_select_lib() -> Literal["PT", "TF"]:
|
|
480
|
-
"""
|
|
524
|
+
"""
|
|
525
|
+
Auto select the DL library from the installed and from environment variables
|
|
526
|
+
|
|
527
|
+
Returns:
|
|
528
|
+
Either "PT" or "TF" based on environment variables
|
|
529
|
+
"""
|
|
481
530
|
return auto_select_lib_for_doctr()
|
|
482
531
|
|
|
483
532
|
def clear_model(self) -> None:
|
|
@@ -500,17 +549,19 @@ class DocTrRotationTransformer(ImageTransformer):
|
|
|
500
549
|
This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
|
|
501
550
|
The class also provides methods for cloning itself and for getting the requirements of the OCR system.
|
|
502
551
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
552
|
+
Example:
|
|
553
|
+
```python
|
|
554
|
+
transformer = DocTrRotationTransformer()
|
|
555
|
+
detection_result = transformer.predict(np_img)
|
|
556
|
+
rotated_image = transformer.transform(np_img, detection_result)
|
|
557
|
+
```
|
|
507
558
|
"""
|
|
508
559
|
|
|
509
560
|
def __init__(self, number_contours: int = 50, ratio_threshold_for_lines: float = 5):
|
|
510
561
|
"""
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
562
|
+
Args:
|
|
563
|
+
number_contours: the number of contours used for the orientation estimation
|
|
564
|
+
ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
|
|
514
565
|
"""
|
|
515
566
|
self.number_contours = number_contours
|
|
516
567
|
self.ratio_threshold_for_lines = ratio_threshold_for_lines
|
|
@@ -522,9 +573,12 @@ class DocTrRotationTransformer(ImageTransformer):
|
|
|
522
573
|
Applies the predicted rotation to the image, effectively rotating the image backwards.
|
|
523
574
|
This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
|
|
524
575
|
|
|
525
|
-
:
|
|
526
|
-
|
|
527
|
-
|
|
576
|
+
Args:
|
|
577
|
+
np_img: The input image as a `np.array`
|
|
578
|
+
specification: A `DetectionResult` object containing the predicted rotation angle
|
|
579
|
+
|
|
580
|
+
Returns:
|
|
581
|
+
The rotated image as a `np.array`
|
|
528
582
|
"""
|
|
529
583
|
return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
|
|
530
584
|
|
|
@@ -16,8 +16,9 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Wrappers for fasttext language detection models
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
from __future__ import annotations
|
|
22
23
|
|
|
23
24
|
import os
|
|
@@ -39,12 +40,13 @@ with try_import() as import_guard:
|
|
|
39
40
|
|
|
40
41
|
class FasttextLangDetectorMixin(LanguageDetector, ABC):
|
|
41
42
|
"""
|
|
42
|
-
Base class for Fasttext language detection implementation. This class only implements the basic wrapper functions.
|
|
43
|
+
Base class for `Fasttext` language detection implementation. This class only implements the basic wrapper functions.
|
|
43
44
|
"""
|
|
44
45
|
|
|
45
46
|
def __init__(self, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]) -> None:
|
|
46
47
|
"""
|
|
47
|
-
:
|
|
48
|
+
Args:
|
|
49
|
+
categories: A `dict` with the model output label and value. We use as convention the `ISO 639-2` language
|
|
48
50
|
"""
|
|
49
51
|
self.categories = ModelCategories(init_categories=categories)
|
|
50
52
|
self.categories_orig = MappingProxyType({cat_orig: get_type(cat) for cat_orig, cat in categories_orig.items()})
|
|
@@ -52,8 +54,12 @@ class FasttextLangDetectorMixin(LanguageDetector, ABC):
|
|
|
52
54
|
def output_to_detection_result(self, output: Union[tuple[Any, Any]]) -> DetectionResult:
|
|
53
55
|
"""
|
|
54
56
|
Generating `DetectionResult` from model output
|
|
55
|
-
|
|
56
|
-
:
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
output: `FastText` model output
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
`DetectionResult` filled with `text` and `score`
|
|
57
63
|
"""
|
|
58
64
|
return DetectionResult(text=self.categories_orig[output[0][0]], score=output[1][0])
|
|
59
65
|
|
|
@@ -68,30 +74,30 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
|
|
|
68
74
|
Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
|
|
69
75
|
The background to the models can be found in the works:
|
|
70
76
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
77
|
+
Info:
|
|
78
|
+
[1] Joulin A, Grave E, Bojanowski P, Mikolov T, Bag of Tricks for Efficient Text Classification
|
|
79
|
+
[2] Joulin A, Grave E, Bojanowski P, Douze M, Jégou H, Mikolov T, FastText.zip: Compressing text classification
|
|
74
80
|
models
|
|
75
81
|
|
|
76
|
-
|
|
77
|
-
(<https://creativecommons.org/licenses/by-sa/3.0/>)
|
|
78
|
-
|
|
79
|
-
When loading the models via the ModelCatalog, the original and unmodified models are used.
|
|
82
|
+
When loading the models via the `ModelCatalog`, the original and unmodified models are used.
|
|
80
83
|
|
|
84
|
+
Example:
|
|
85
|
+
```python
|
|
81
86
|
path_weights = ModelCatalog.get_full_path_weights("fasttext/lid.176.bin")
|
|
82
87
|
profile = ModelCatalog.get_profile("fasttext/lid.176.bin")
|
|
83
88
|
lang_detector = FasttextLangDetector(path_weights,profile.categories)
|
|
84
89
|
detection_result = lang_detector.predict("some text in some language")
|
|
85
|
-
|
|
90
|
+
```
|
|
86
91
|
"""
|
|
87
92
|
|
|
88
93
|
def __init__(
|
|
89
94
|
self, path_weights: PathLikeOrStr, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]
|
|
90
95
|
):
|
|
91
96
|
"""
|
|
92
|
-
:
|
|
93
|
-
|
|
94
|
-
|
|
97
|
+
Args:
|
|
98
|
+
path_weights: path to model weights
|
|
99
|
+
categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
|
|
100
|
+
code.
|
|
95
101
|
"""
|
|
96
102
|
super().__init__(categories, categories_orig)
|
|
97
103
|
|
|
@@ -117,6 +123,8 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
|
|
|
117
123
|
def get_wrapped_model(path_weights: PathLikeOrStr) -> Any:
|
|
118
124
|
"""
|
|
119
125
|
Get the wrapped model
|
|
120
|
-
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
path_weights: path to model weights
|
|
121
129
|
"""
|
|
122
130
|
return load_model(os.fspath(path_weights))
|