deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. deepdoctection/analyzer/_config.py +0 -146
  120. deepdoctection-0.42.1.dist-info/METADATA +0 -431
  121. deepdoctection-0.42.1.dist-info/RECORD +0 -148
  122. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
  123. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -16,8 +16,9 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Deepdoctection wrappers for DocTr OCR text line detection and text recognition models
19
+ Wrappers for DocTr text line detection and text recognition models
20
20
  """
21
+
21
22
  from __future__ import annotations
22
23
 
23
24
  import os
@@ -106,13 +107,16 @@ def doctr_predict_text_lines(
106
107
  np_img: PixelValues, predictor: DetectionPredictor, device: Union[torch.device, tf.device], lib: Literal["TF", "PT"]
107
108
  ) -> list[DetectionResult]:
108
109
  """
109
- Generating text line DetectionResult based on Doctr DetectionPredictor.
110
+ Generating text line `DetectionResult` based on DocTr `DetectionPredictor`.
111
+
112
+ Args:
113
+ np_img: Image in `np.array`
114
+ predictor: `doctr.models.detection.predictor.DetectionPredictor`
115
+ device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
116
+ lib: "TF" or "PT"
110
117
 
111
- :param np_img: Image in np.array.
112
- :param predictor: `doctr.models.detection.predictor.DetectionPredictor`
113
- :param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
114
- :param lib: "TF" or "PT"
115
- :return: A list of text line detection results (without text).
118
+ Returns:
119
+ A list of text line `DetectionResult` (without text)
116
120
  """
117
121
  if lib == "TF":
118
122
  with device:
@@ -137,15 +141,18 @@ def doctr_predict_text(
137
141
  lib: Literal["TF", "PT"],
138
142
  ) -> list[DetectionResult]:
139
143
  """
140
- Calls Doctr text recognition model on a batch of numpy arrays (text lines predicted from a text line detector) and
141
- returns the recognized text as DetectionResult
142
-
143
- :param inputs: list of tuples containing the annotation_id of the input image and the numpy array of the cropped
144
- text line
145
- :param predictor: `doctr.models.detection.predictor.RecognitionPredictor`
146
- :param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
147
- :param lib: "TF" or "PT"
148
- :return: A list of DetectionResult containing recognized text.
144
+ Calls DocTr text recognition model on a batch of `np.array`s (text lines predicted from a text line detector) and
145
+ returns the recognized text as `DetectionResult`
146
+
147
+ Args:
148
+ inputs: list of tuples containing the `annotation_id` of the input image and the `np.array` of the cropped
149
+ text line
150
+ predictor: `doctr.models.detection.predictor.RecognitionPredictor`
151
+ device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
152
+ lib: "TF" or "PT"
153
+
154
+ Returns:
155
+ A list of `DetectionResult` containing recognized text
149
156
  """
150
157
 
151
158
  uuids, images = list(zip(*inputs))
@@ -163,7 +170,7 @@ def doctr_predict_text(
163
170
 
164
171
 
165
172
  class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
166
- """Base class for Doctr textline detector. This class only implements the basic wrapper functions"""
173
+ """Base class for DocTr text line detector. This class only implements the basic wrapper functions"""
167
174
 
168
175
  def __init__(self, categories: Mapping[int, TypeOrStr], lib: Optional[Literal["PT", "TF"]] = None):
169
176
  self.categories = ModelCategories(init_categories=categories)
@@ -174,12 +181,26 @@ class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
174
181
 
175
182
  @staticmethod
176
183
  def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
177
- """Returns the name of the model"""
184
+ """
185
+ Returns the name of the model
186
+
187
+ Args:
188
+ path_weights: Path to the model weights
189
+ architecture: Architecture name
190
+
191
+ Returns:
192
+ The name of the model as string
193
+ """
178
194
  return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
179
195
 
180
196
  @staticmethod
181
197
  def auto_select_lib() -> Literal["PT", "TF"]:
182
- """Auto select the DL library from the installed and from environment variables"""
198
+ """
199
+ Auto select the DL library from the installed and from environment variables
200
+
201
+ Returns:
202
+ Either "PT" or "TF" based on environment variables
203
+ """
183
204
  return auto_select_lib_for_doctr()
184
205
 
185
206
 
@@ -194,28 +215,28 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
194
215
  Some other pre-trained models exist that have not been registered in `ModelCatalog`. Please check the DocTr library
195
216
  and organize the download of the pre-trained model by yourself.
196
217
 
197
- **Example:**
218
+ Example:
219
+ ```python
220
+ path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
221
+ /db_resnet50-ac60cadc.pt")
222
+ # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
198
223
 
199
- path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
200
- /db_resnet50-ac60cadc.pt")
201
- # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
224
+ categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
225
+ det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
226
+ layout = ImageLayoutService(det,to_image=True, crop_image=True)
202
227
 
203
- categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
204
- det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
205
- layout = ImageLayoutService(det,to_image=True, crop_image=True)
228
+ path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
229
+ /pt/crnn_vgg16_bn-9762b0b0.pt")
230
+ rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
231
+ text = TextExtractionService(rec, extract_from_roi="word")
206
232
 
207
- path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
208
- /pt/crnn_vgg16_bn-9762b0b0.pt")
209
- rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
210
- text = TextExtractionService(rec, extract_from_roi="word")
233
+ analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
211
234
 
212
- analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
235
+ path = "/path/to/image_dir"
236
+ df = analyzer.analyze(path = path)
213
237
 
214
- path = "/path/to/image_dir"
215
- df = analyzer.analyze(path = path)
216
-
217
- for dp in df:
218
- ...
238
+ for dp in df:
239
+ ...
219
240
  """
220
241
 
221
242
  def __init__(
@@ -227,13 +248,14 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
227
248
  lib: Optional[Literal["PT", "TF"]] = None,
228
249
  ) -> None:
229
250
  """
230
- :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
231
- "db_mobilenet_v3_large". The full list can be found here:
232
- https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
233
- :param path_weights: Path to the weights of the model
234
- :param categories: A dict with the model output label and value
235
- :param device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
236
- :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
251
+ Args:
252
+ architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
253
+ "db_mobilenet_v3_large". The full list can be found here:
254
+ <https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
255
+ path_weights: Path to the weights of the model
256
+ categories: A dict with the model output label and value
257
+ device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
258
+ lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
237
259
  """
238
260
  super().__init__(categories, lib)
239
261
  self.architecture = architecture
@@ -253,8 +275,11 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
253
275
  """
254
276
  Prediction per image.
255
277
 
256
- :param np_img: image as numpy array
257
- :return: A list of DetectionResult
278
+ Args:
279
+ np_img: image as `np.array`
280
+
281
+ Returns:
282
+ A list of `DetectionResult`
258
283
  """
259
284
  return doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)
260
285
 
@@ -284,17 +309,17 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
284
309
  """
285
310
  Get the inner (wrapped) model.
286
311
 
287
- :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
288
- "db_mobilenet_v3_large". The full list can be found here:
289
- https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
290
- :param path_weights: Path to the weights of the model
291
- :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
292
- :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used. Make sure,
293
- these variables are set. If not, use
294
-
295
- deepdoctection.utils.env_info.auto_select_lib_and_device
296
-
297
- :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
312
+ Args:
313
+ architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
314
+ "db_mobilenet_v3_large". The full list can be found here:
315
+ <https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
316
+ path_weights: Path to the weights of the model
317
+ device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
318
+ lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used. Make
319
+ sure, these variables are set. If not, use `deepdoctection.utils.env_info.auto_select_lib_and_device`
320
+
321
+ Returns:
322
+ Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
298
323
  """
299
324
  doctr_predictor = detection_predictor(arch=architecture, pretrained=False, pretrained_backbone=False)
300
325
  DoctrTextlineDetector.load_model(path_weights, doctr_predictor, device, lib)
@@ -306,7 +331,7 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
306
331
 
307
332
  class DoctrTextRecognizer(TextRecognizer):
308
333
  """
309
- A deepdoctection wrapper of DocTr text recognition predictor. The base class is a TextRecognizer that takes
334
+ A deepdoctection wrapper of DocTr text recognition predictor. The base class is a `TextRecognizer` that takes
310
335
  a batch of sub images (e.g. text lines from a text detector) and returns a list with text spotted in the sub images.
311
336
  DocTr supports several text recognition models but provides only a subset of pre-trained models.
312
337
 
@@ -314,30 +339,30 @@ class DoctrTextRecognizer(TextRecognizer):
314
339
  described in “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to
315
340
  Scene Text Recognition”. It can be used in either Tensorflow or PyTorch.
316
341
 
317
- For more details please check the official DocTr documentation by Mindee: https://mindee.github.io/doctr/
318
-
319
- **Example:**
342
+ For more details please check the official DocTr documentation by Mindee: <https://mindee.github.io/doctr/>
320
343
 
321
- path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
322
- /db_resnet50-ac60cadc.pt")
323
- # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
344
+ Example:
345
+ ```python
346
+ path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
347
+ /db_resnet50-ac60cadc.pt")
348
+ # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
324
349
 
325
- categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
326
- det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
327
- layout = ImageLayoutService(det,to_image=True, crop_image=True)
350
+ categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
351
+ det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
352
+ layout = ImageLayoutService(det,to_image=True, crop_image=True)
328
353
 
329
- path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
330
- /pt/crnn_vgg16_bn-9762b0b0.pt")
331
- rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
332
- text = TextExtractionService(rec, extract_from_roi="word")
354
+ path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
355
+ /pt/crnn_vgg16_bn-9762b0b0.pt")
356
+ rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
357
+ text = TextExtractionService(rec, extract_from_roi="word")
333
358
 
334
- analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
359
+ analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
335
360
 
336
- path = "/path/to/image_dir"
337
- df = analyzer.analyze(path = path)
361
+ path = "/path/to/image_dir"
362
+ df = analyzer.analyze(path = path)
338
363
 
339
- for dp in df:
340
- ...
364
+ for dp in df:
365
+ ...
341
366
  """
342
367
 
343
368
  def __init__(
@@ -349,14 +374,15 @@ class DoctrTextRecognizer(TextRecognizer):
349
374
  path_config_json: Optional[PathLikeOrStr] = None,
350
375
  ) -> None:
351
376
  """
352
- :param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
353
- "crnn_mobilenet_v3_small". The full list can be found here:
354
- https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
355
- :param path_weights: Path to the weights of the model
356
- :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
357
- :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
358
- :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
359
- a model trained on custom vocab.
377
+ Args:
378
+ architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
379
+ "crnn_mobilenet_v3_small". The full list can be found here:
380
+ <https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
381
+ path_weights: Path to the weights of the model
382
+ device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
383
+ lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used.
384
+ path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
385
+ a model trained on custom vocab.
360
386
  """
361
387
 
362
388
  self.lib = lib if lib is not None else self.auto_select_lib()
@@ -383,8 +409,11 @@ class DoctrTextRecognizer(TextRecognizer):
383
409
  """
384
410
  Prediction on a batch of text lines
385
411
 
386
- :param images: list of tuples with the annotation_id of the sub image and a numpy array
387
- :return: A list of DetectionResult
412
+ Args:
413
+ images: list of tuples with the `annotation_id` of the sub image and a `np.array`
414
+
415
+ Returns:
416
+ A list of `DetectionResult`
388
417
  """
389
418
  if images:
390
419
  return doctr_predict_text(images, self.doctr_predictor, self.device, self.lib)
@@ -456,15 +485,18 @@ class DoctrTextRecognizer(TextRecognizer):
456
485
  """
457
486
  Get the inner (wrapped) model.
458
487
 
459
- :param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
460
- "crnn_mobilenet_v3_small". The full list can be found here:
461
- https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
462
- :param path_weights: Path to the weights of the model
463
- :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
464
- :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
465
- :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
466
- a model trained on custom vocab.
467
- :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
488
+ Args:
489
+ architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
490
+ "crnn_mobilenet_v3_small". The full list can be found here:
491
+ <https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
492
+ path_weights: Path to the weights of the model
493
+ device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
494
+ lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
495
+ path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
496
+ a model trained on custom vocab.
497
+
498
+ Returns:
499
+ Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
468
500
  """
469
501
  doctr_predictor = DoctrTextRecognizer.build_model(architecture, lib, path_config_json)
470
502
  DoctrTextRecognizer.load_model(path_weights, doctr_predictor, device, lib)
@@ -472,12 +504,26 @@ class DoctrTextRecognizer(TextRecognizer):
472
504
 
473
505
  @staticmethod
474
506
  def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
475
- """Returns the name of the model"""
507
+ """
508
+ Returns the name of the model
509
+
510
+ Args:
511
+ path_weights: Path to the model weights
512
+ architecture: Architecture name
513
+
514
+ Returns:
515
+ The name of the model as string
516
+ """
476
517
  return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
477
518
 
478
519
  @staticmethod
479
520
  def auto_select_lib() -> Literal["PT", "TF"]:
480
- """Auto select the DL library from the installed and from environment variables"""
521
+ """
522
+ Auto select the DL library from the installed and from environment variables
523
+
524
+ Returns:
525
+ Either "PT" or "TF" based on environment variables
526
+ """
481
527
  return auto_select_lib_for_doctr()
482
528
 
483
529
  def clear_model(self) -> None:
@@ -500,17 +546,19 @@ class DocTrRotationTransformer(ImageTransformer):
500
546
  This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
501
547
  The class also provides methods for cloning itself and for getting the requirements of the OCR system.
502
548
 
503
- **Example:**
504
- transformer = DocTrRotationTransformer()
505
- detection_result = transformer.predict(np_img)
506
- rotated_image = transformer.transform(np_img, detection_result)
549
+ Example:
550
+ ```python
551
+ transformer = DocTrRotationTransformer()
552
+ detection_result = transformer.predict(np_img)
553
+ rotated_image = transformer.transform(np_img, detection_result)
554
+ ```
507
555
  """
508
556
 
509
557
  def __init__(self, number_contours: int = 50, ratio_threshold_for_lines: float = 5):
510
558
  """
511
-
512
- :param number_contours: the number of contours used for the orientation estimation
513
- :param ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
559
+ Args:
560
+ number_contours: the number of contours used for the orientation estimation
561
+ ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
514
562
  """
515
563
  self.number_contours = number_contours
516
564
  self.ratio_threshold_for_lines = ratio_threshold_for_lines
@@ -522,9 +570,12 @@ class DocTrRotationTransformer(ImageTransformer):
522
570
  Applies the predicted rotation to the image, effectively rotating the image backwards.
523
571
  This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
524
572
 
525
- :param np_img: The input image as a numpy array.
526
- :param specification: A `DetectionResult` object containing the predicted rotation angle.
527
- :return: The rotated image as a numpy array.
573
+ Args:
574
+ np_img: The input image as a `np.array`
575
+ specification: A `DetectionResult` object containing the predicted rotation angle
576
+
577
+ Returns:
578
+ The rotated image as a `np.array`
528
579
  """
529
580
  return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
530
581
 
@@ -16,8 +16,9 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Deepdoctection wrappers for fasttext language detection models
19
+ Wrappers for fasttext language detection models
20
20
  """
21
+
21
22
  from __future__ import annotations
22
23
 
23
24
  import os
@@ -39,12 +40,13 @@ with try_import() as import_guard:
39
40
 
40
41
  class FasttextLangDetectorMixin(LanguageDetector, ABC):
41
42
  """
42
- Base class for Fasttext language detection implementation. This class only implements the basic wrapper functions.
43
+ Base class for `Fasttext` language detection implementation. This class only implements the basic wrapper functions.
43
44
  """
44
45
 
45
46
  def __init__(self, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]) -> None:
46
47
  """
47
- :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
48
+ Args:
49
+ categories: A `dict` with the model output label and value. We use as convention the `ISO 639-2` language
48
50
  """
49
51
  self.categories = ModelCategories(init_categories=categories)
50
52
  self.categories_orig = MappingProxyType({cat_orig: get_type(cat) for cat_orig, cat in categories_orig.items()})
@@ -52,8 +54,12 @@ class FasttextLangDetectorMixin(LanguageDetector, ABC):
52
54
  def output_to_detection_result(self, output: Union[tuple[Any, Any]]) -> DetectionResult:
53
55
  """
54
56
  Generating `DetectionResult` from model output
55
- :param output: FastText model output
56
- :return: `DetectionResult` filled with `text` and `score`
57
+
58
+ Args:
59
+ output: `FastText` model output
60
+
61
+ Returns:
62
+ `DetectionResult` filled with `text` and `score`
57
63
  """
58
64
  return DetectionResult(text=self.categories_orig[output[0][0]], score=output[1][0])
59
65
 
@@ -68,30 +74,30 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
68
74
  Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
69
75
  The background to the models can be found in the works:
70
76
 
71
- [1] Joulin A, Grave E, Bojanowski P, Mikolov T, Bag of Tricks for Efficient Text Classification
72
-
73
- [2] Joulin A, Grave E, Bojanowski P, Douze M, Jégou H, Mikolov T, FastText.zip: Compressing text classification
77
+ Info:
78
+ [1] Joulin A, Grave E, Bojanowski P, Mikolov T, Bag of Tricks for Efficient Text Classification
79
+ [2] Joulin A, Grave E, Bojanowski P, Douze M, Jégou H, Mikolov T, FastText.zip: Compressing text classification
74
80
  models
75
81
 
76
- The models are distributed under the Creative Commons Attribution-Share-Alike License 3.0.
77
- (<https://creativecommons.org/licenses/by-sa/3.0/>)
78
-
79
- When loading the models via the ModelCatalog, the original and unmodified models are used.
82
+ When loading the models via the `ModelCatalog`, the original and unmodified models are used.
80
83
 
84
+ Example:
85
+ ```python
81
86
  path_weights = ModelCatalog.get_full_path_weights("fasttext/lid.176.bin")
82
87
  profile = ModelCatalog.get_profile("fasttext/lid.176.bin")
83
88
  lang_detector = FasttextLangDetector(path_weights,profile.categories)
84
89
  detection_result = lang_detector.predict("some text in some language")
85
-
90
+ ```
86
91
  """
87
92
 
88
93
  def __init__(
89
94
  self, path_weights: PathLikeOrStr, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]
90
95
  ):
91
96
  """
92
- :param path_weights: path to model weights
93
- :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
94
- code.
97
+ Args:
98
+ path_weights: path to model weights
99
+ categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
100
+ code.
95
101
  """
96
102
  super().__init__(categories, categories_orig)
97
103
 
@@ -117,6 +123,8 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
117
123
  def get_wrapped_model(path_weights: PathLikeOrStr) -> Any:
118
124
  """
119
125
  Get the wrapped model
120
- :param path_weights: path to model weights
126
+
127
+ Args:
128
+ path_weights: path to model weights
121
129
  """
122
130
  return load_model(os.fspath(path_weights))