deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +4 -2
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +919 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +162 -108
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +205 -119
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +26 -17
  104. deepdoctection/utils/env_info.py +86 -37
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -71
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.1.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.1.dist-info/RECORD +149 -0
  119. deepdoctection/analyzer/_config.py +0 -146
  120. deepdoctection-0.42.1.dist-info/METADATA +0 -431
  121. deepdoctection-0.42.1.dist-info/RECORD +0 -148
  122. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
  123. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0
@@ -16,8 +16,9 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Deepdoctection wrappers for DocTr OCR text line detection and text recognition models
19
+ Wrappers for DocTr text line detection and text recognition models
20
20
  """
21
+
21
22
  from __future__ import annotations
22
23
 
23
24
  import os
@@ -106,13 +107,16 @@ def doctr_predict_text_lines(
106
107
  np_img: PixelValues, predictor: DetectionPredictor, device: Union[torch.device, tf.device], lib: Literal["TF", "PT"]
107
108
  ) -> list[DetectionResult]:
108
109
  """
109
- Generating text line DetectionResult based on Doctr DetectionPredictor.
110
+ Generating text line `DetectionResult` based on DocTr `DetectionPredictor`.
111
+
112
+ Args:
113
+ np_img: Image in `np.array`
114
+ predictor: `doctr.models.detection.predictor.DetectionPredictor`
115
+ device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
116
+ lib: "TF" or "PT"
110
117
 
111
- :param np_img: Image in np.array.
112
- :param predictor: `doctr.models.detection.predictor.DetectionPredictor`
113
- :param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
114
- :param lib: "TF" or "PT"
115
- :return: A list of text line detection results (without text).
118
+ Returns:
119
+ A list of text line `DetectionResult` (without text)
116
120
  """
117
121
  if lib == "TF":
118
122
  with device:
@@ -137,15 +141,18 @@ def doctr_predict_text(
137
141
  lib: Literal["TF", "PT"],
138
142
  ) -> list[DetectionResult]:
139
143
  """
140
- Calls Doctr text recognition model on a batch of numpy arrays (text lines predicted from a text line detector) and
141
- returns the recognized text as DetectionResult
142
-
143
- :param inputs: list of tuples containing the annotation_id of the input image and the numpy array of the cropped
144
- text line
145
- :param predictor: `doctr.models.detection.predictor.RecognitionPredictor`
146
- :param device: Will only be used in tensorflow settings. Either /gpu:0 or /cpu:0
147
- :param lib: "TF" or "PT"
148
- :return: A list of DetectionResult containing recognized text.
144
+ Calls DocTr text recognition model on a batch of `np.array`s (text lines predicted from a text line detector) and
145
+ returns the recognized text as `DetectionResult`
146
+
147
+ Args:
148
+ inputs: list of tuples containing the `annotation_id` of the input image and the `np.array` of the cropped
149
+ text line
150
+ predictor: `doctr.models.detection.predictor.RecognitionPredictor`
151
+ device: Will only be used in Tensorflow settings. Either `/gpu:0` or `/cpu:0`
152
+ lib: "TF" or "PT"
153
+
154
+ Returns:
155
+ A list of `DetectionResult` containing recognized text
149
156
  """
150
157
 
151
158
  uuids, images = list(zip(*inputs))
@@ -163,7 +170,7 @@ def doctr_predict_text(
163
170
 
164
171
 
165
172
  class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
166
- """Base class for Doctr textline detector. This class only implements the basic wrapper functions"""
173
+ """Base class for DocTr text line detector. This class only implements the basic wrapper functions"""
167
174
 
168
175
  def __init__(self, categories: Mapping[int, TypeOrStr], lib: Optional[Literal["PT", "TF"]] = None):
169
176
  self.categories = ModelCategories(init_categories=categories)
@@ -174,12 +181,26 @@ class DoctrTextlineDetectorMixin(ObjectDetector, ABC):
174
181
 
175
182
  @staticmethod
176
183
  def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
177
- """Returns the name of the model"""
184
+ """
185
+ Returns the name of the model
186
+
187
+ Args:
188
+ path_weights: Path to the model weights
189
+ architecture: Architecture name
190
+
191
+ Returns:
192
+ The name of the model as string
193
+ """
178
194
  return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
179
195
 
180
196
  @staticmethod
181
197
  def auto_select_lib() -> Literal["PT", "TF"]:
182
- """Auto select the DL library from the installed and from environment variables"""
198
+ """
199
+ Auto select the DL library from the installed and from environment variables
200
+
201
+ Returns:
202
+ Either "PT" or "TF" based on environment variables
203
+ """
183
204
  return auto_select_lib_for_doctr()
184
205
 
185
206
 
@@ -194,28 +215,28 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
194
215
  Some other pre-trained models exist that have not been registered in `ModelCatalog`. Please check the DocTr library
195
216
  and organize the download of the pre-trained model by yourself.
196
217
 
197
- **Example:**
218
+ Example:
219
+ ```python
220
+ path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
221
+ /db_resnet50-ac60cadc.pt")
222
+ # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
198
223
 
199
- path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
200
- /db_resnet50-ac60cadc.pt")
201
- # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
224
+ categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
225
+ det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
226
+ layout = ImageLayoutService(det,to_image=True, crop_image=True)
202
227
 
203
- categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
204
- det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
205
- layout = ImageLayoutService(det,to_image=True, crop_image=True)
228
+ path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
229
+ /pt/crnn_vgg16_bn-9762b0b0.pt")
230
+ rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
231
+ text = TextExtractionService(rec, extract_from_roi="word")
206
232
 
207
- path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
208
- /pt/crnn_vgg16_bn-9762b0b0.pt")
209
- rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
210
- text = TextExtractionService(rec, extract_from_roi="word")
233
+ analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
211
234
 
212
- analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
235
+ path = "/path/to/image_dir"
236
+ df = analyzer.analyze(path = path)
213
237
 
214
- path = "/path/to/image_dir"
215
- df = analyzer.analyze(path = path)
216
-
217
- for dp in df:
218
- ...
238
+ for dp in df:
239
+ ...
219
240
  """
220
241
 
221
242
  def __init__(
@@ -227,13 +248,14 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
227
248
  lib: Optional[Literal["PT", "TF"]] = None,
228
249
  ) -> None:
229
250
  """
230
- :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
231
- "db_mobilenet_v3_large". The full list can be found here:
232
- https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
233
- :param path_weights: Path to the weights of the model
234
- :param categories: A dict with the model output label and value
235
- :param device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
236
- :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
251
+ Args:
252
+ architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
253
+ "db_mobilenet_v3_large". The full list can be found here:
254
+ <https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
255
+ path_weights: Path to the weights of the model
256
+ categories: A dict with the model output label and value
257
+ device: "cpu" or "cuda" or any tf.device or torch.device. The device must be compatible with the dll
258
+ lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
237
259
  """
238
260
  super().__init__(categories, lib)
239
261
  self.architecture = architecture
@@ -247,14 +269,20 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
247
269
  if self.lib == "PT":
248
270
  self.device = get_torch_device(device)
249
271
 
250
- self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device, self.lib)
272
+ self.doctr_predictor = self.get_wrapped_model(self.architecture,
273
+ self.path_weights,
274
+ self.device,
275
+ self.lib)
251
276
 
252
277
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
253
278
  """
254
279
  Prediction per image.
255
280
 
256
- :param np_img: image as numpy array
257
- :return: A list of DetectionResult
281
+ Args:
282
+ np_img: image as `np.array`
283
+
284
+ Returns:
285
+ A list of `DetectionResult`
258
286
  """
259
287
  return doctr_predict_text_lines(np_img, self.doctr_predictor, self.device, self.lib)
260
288
 
@@ -284,17 +312,17 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
284
312
  """
285
313
  Get the inner (wrapped) model.
286
314
 
287
- :param architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
288
- "db_mobilenet_v3_large". The full list can be found here:
289
- https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20
290
- :param path_weights: Path to the weights of the model
291
- :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
292
- :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used. Make sure,
293
- these variables are set. If not, use
294
-
295
- deepdoctection.utils.env_info.auto_select_lib_and_device
296
-
297
- :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
315
+ Args:
316
+ architecture: DocTR supports various text line detection models, e.g. "db_resnet50",
317
+ "db_mobilenet_v3_large". The full list can be found here:
318
+ <https://github.com/mindee/doctr/blob/main/doctr/models/detection/zoo.py#L20>
319
+ path_weights: Path to the weights of the model
320
+ device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
321
+ lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used. Make
322
+ sure, these variables are set. If not, use `deepdoctection.utils.env_info.auto_select_lib_and_device`
323
+
324
+ Returns:
325
+ Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
298
326
  """
299
327
  doctr_predictor = detection_predictor(arch=architecture, pretrained=False, pretrained_backbone=False)
300
328
  DoctrTextlineDetector.load_model(path_weights, doctr_predictor, device, lib)
@@ -306,7 +334,7 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
306
334
 
307
335
  class DoctrTextRecognizer(TextRecognizer):
308
336
  """
309
- A deepdoctection wrapper of DocTr text recognition predictor. The base class is a TextRecognizer that takes
337
+ A deepdoctection wrapper of DocTr text recognition predictor. The base class is a `TextRecognizer` that takes
310
338
  a batch of sub images (e.g. text lines from a text detector) and returns a list with text spotted in the sub images.
311
339
  DocTr supports several text recognition models but provides only a subset of pre-trained models.
312
340
 
@@ -314,30 +342,30 @@ class DoctrTextRecognizer(TextRecognizer):
314
342
  described in “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to
315
343
  Scene Text Recognition”. It can be used in either Tensorflow or PyTorch.
316
344
 
317
- For more details please check the official DocTr documentation by Mindee: https://mindee.github.io/doctr/
318
-
319
- **Example:**
345
+ For more details please check the official DocTr documentation by Mindee: <https://mindee.github.io/doctr/>
320
346
 
321
- path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
322
- /db_resnet50-ac60cadc.pt")
323
- # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
347
+ Example:
348
+ ```python
349
+ path_weights_tl = ModelDownloadManager.maybe_download_weights_and_configs("doctr/db_resnet50/pt
350
+ /db_resnet50-ac60cadc.pt")
351
+ # Use "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip" for Tensorflow
324
352
 
325
- categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
326
- det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
327
- layout = ImageLayoutService(det,to_image=True, crop_image=True)
353
+ categories = ModelCatalog.get_profile("doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt").categories
354
+ det = DoctrTextlineDetector("db_resnet50",path_weights_tl,categories,"cpu")
355
+ layout = ImageLayoutService(det,to_image=True, crop_image=True)
328
356
 
329
- path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
330
- /pt/crnn_vgg16_bn-9762b0b0.pt")
331
- rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
332
- text = TextExtractionService(rec, extract_from_roi="word")
357
+ path_weights_tr = dd.ModelDownloadManager.maybe_download_weights_and_configs("doctr/crnn_vgg16_bn
358
+ /pt/crnn_vgg16_bn-9762b0b0.pt")
359
+ rec = DoctrTextRecognizer("crnn_vgg16_bn", path_weights_tr, "cpu")
360
+ text = TextExtractionService(rec, extract_from_roi="word")
333
361
 
334
- analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
362
+ analyzer = DoctectionPipe(pipeline_component_list=[layout,text])
335
363
 
336
- path = "/path/to/image_dir"
337
- df = analyzer.analyze(path = path)
364
+ path = "/path/to/image_dir"
365
+ df = analyzer.analyze(path = path)
338
366
 
339
- for dp in df:
340
- ...
367
+ for dp in df:
368
+ ...
341
369
  """
342
370
 
343
371
  def __init__(
@@ -349,14 +377,15 @@ class DoctrTextRecognizer(TextRecognizer):
349
377
  path_config_json: Optional[PathLikeOrStr] = None,
350
378
  ) -> None:
351
379
  """
352
- :param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
353
- "crnn_mobilenet_v3_small". The full list can be found here:
354
- https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
355
- :param path_weights: Path to the weights of the model
356
- :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
357
- :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
358
- :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
359
- a model trained on custom vocab.
380
+ Args:
381
+ architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
382
+ "crnn_mobilenet_v3_small". The full list can be found here:
383
+ <https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
384
+ path_weights: Path to the weights of the model
385
+ device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
386
+ lib: "TF" or "PT" or `None`. If `None`, env variables `USE_TENSORFLOW`, `USE_PYTORCH` will be used.
387
+ path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
388
+ a model trained on custom vocab.
360
389
  """
361
390
 
362
391
  self.lib = lib if lib is not None else self.auto_select_lib()
@@ -383,8 +412,11 @@ class DoctrTextRecognizer(TextRecognizer):
383
412
  """
384
413
  Prediction on a batch of text lines
385
414
 
386
- :param images: list of tuples with the annotation_id of the sub image and a numpy array
387
- :return: A list of DetectionResult
415
+ Args:
416
+ images: list of tuples with the `annotation_id` of the sub image and a `np.array`
417
+
418
+ Returns:
419
+ A list of `DetectionResult`
388
420
  """
389
421
  if images:
390
422
  return doctr_predict_text(images, self.doctr_predictor, self.device, self.lib)
@@ -395,7 +427,7 @@ class DoctrTextRecognizer(TextRecognizer):
395
427
  return _get_doctr_requirements()
396
428
 
397
429
  def clone(self) -> DoctrTextRecognizer:
398
- return self.__class__(self.architecture, self.path_weights, self.device, self.lib)
430
+ return self.__class__(self.architecture, self.path_weights, self.device, self.lib, self.path_config_json)
399
431
 
400
432
  @staticmethod
401
433
  def load_model(
@@ -456,15 +488,18 @@ class DoctrTextRecognizer(TextRecognizer):
456
488
  """
457
489
  Get the inner (wrapped) model.
458
490
 
459
- :param architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
460
- "crnn_mobilenet_v3_small". The full list can be found here:
461
- https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16.
462
- :param path_weights: Path to the weights of the model
463
- :param device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
464
- :param lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
465
- :param path_config_json: Path to a json file containing the configuration of the model. Useful, if you have
466
- a model trained on custom vocab.
467
- :return: Inner model which is a "nn.Module" in PyTorch or a "tf.keras.Model" in Tensorflow
491
+ Args:
492
+ architecture: DocTR supports various text recognition models, e.g. "crnn_vgg16_bn",
493
+ "crnn_mobilenet_v3_small". The full list can be found here:
494
+ <https://github.com/mindee/doctr/blob/main/doctr/models/recognition/zoo.py#L16>.
495
+ path_weights: Path to the weights of the model
496
+ device: "cpu" or "cuda". Will default to "cuda" if the required hardware is available.
497
+ lib: "TF" or "PT" or None. If None, env variables USE_TENSORFLOW, USE_PYTORCH will be used.
498
+ path_config_json: Path to a `JSON` file containing the configuration of the model. Useful, if you have
499
+ a model trained on custom vocab.
500
+
501
+ Returns:
502
+ Inner model which is a `nn.Module` in PyTorch or a `tf.keras.Model` in Tensorflow
468
503
  """
469
504
  doctr_predictor = DoctrTextRecognizer.build_model(architecture, lib, path_config_json)
470
505
  DoctrTextRecognizer.load_model(path_weights, doctr_predictor, device, lib)
@@ -472,12 +507,26 @@ class DoctrTextRecognizer(TextRecognizer):
472
507
 
473
508
  @staticmethod
474
509
  def get_name(path_weights: PathLikeOrStr, architecture: str) -> str:
475
- """Returns the name of the model"""
510
+ """
511
+ Returns the name of the model
512
+
513
+ Args:
514
+ path_weights: Path to the model weights
515
+ architecture: Architecture name
516
+
517
+ Returns:
518
+ The name of the model as string
519
+ """
476
520
  return f"doctr_{architecture}" + "_".join(Path(path_weights).parts[-2:])
477
521
 
478
522
  @staticmethod
479
523
  def auto_select_lib() -> Literal["PT", "TF"]:
480
- """Auto select the DL library from the installed and from environment variables"""
524
+ """
525
+ Auto select the DL library from the installed and from environment variables
526
+
527
+ Returns:
528
+ Either "PT" or "TF" based on environment variables
529
+ """
481
530
  return auto_select_lib_for_doctr()
482
531
 
483
532
  def clear_model(self) -> None:
@@ -500,17 +549,19 @@ class DocTrRotationTransformer(ImageTransformer):
500
549
  This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
501
550
  The class also provides methods for cloning itself and for getting the requirements of the OCR system.
502
551
 
503
- **Example:**
504
- transformer = DocTrRotationTransformer()
505
- detection_result = transformer.predict(np_img)
506
- rotated_image = transformer.transform(np_img, detection_result)
552
+ Example:
553
+ ```python
554
+ transformer = DocTrRotationTransformer()
555
+ detection_result = transformer.predict(np_img)
556
+ rotated_image = transformer.transform(np_img, detection_result)
557
+ ```
507
558
  """
508
559
 
509
560
  def __init__(self, number_contours: int = 50, ratio_threshold_for_lines: float = 5):
510
561
  """
511
-
512
- :param number_contours: the number of contours used for the orientation estimation
513
- :param ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
562
+ Args:
563
+ number_contours: the number of contours used for the orientation estimation
564
+ ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
514
565
  """
515
566
  self.number_contours = number_contours
516
567
  self.ratio_threshold_for_lines = ratio_threshold_for_lines
@@ -522,9 +573,12 @@ class DocTrRotationTransformer(ImageTransformer):
522
573
  Applies the predicted rotation to the image, effectively rotating the image backwards.
523
574
  This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
524
575
 
525
- :param np_img: The input image as a numpy array.
526
- :param specification: A `DetectionResult` object containing the predicted rotation angle.
527
- :return: The rotated image as a numpy array.
576
+ Args:
577
+ np_img: The input image as a `np.array`
578
+ specification: A `DetectionResult` object containing the predicted rotation angle
579
+
580
+ Returns:
581
+ The rotated image as a `np.array`
528
582
  """
529
583
  return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
530
584
 
@@ -16,8 +16,9 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Deepdoctection wrappers for fasttext language detection models
19
+ Wrappers for fasttext language detection models
20
20
  """
21
+
21
22
  from __future__ import annotations
22
23
 
23
24
  import os
@@ -39,12 +40,13 @@ with try_import() as import_guard:
39
40
 
40
41
  class FasttextLangDetectorMixin(LanguageDetector, ABC):
41
42
  """
42
- Base class for Fasttext language detection implementation. This class only implements the basic wrapper functions.
43
+ Base class for `Fasttext` language detection implementation. This class only implements the basic wrapper functions.
43
44
  """
44
45
 
45
46
  def __init__(self, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]) -> None:
46
47
  """
47
- :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
48
+ Args:
49
+ categories: A `dict` with the model output label and value. We use as convention the `ISO 639-2` language
48
50
  """
49
51
  self.categories = ModelCategories(init_categories=categories)
50
52
  self.categories_orig = MappingProxyType({cat_orig: get_type(cat) for cat_orig, cat in categories_orig.items()})
@@ -52,8 +54,12 @@ class FasttextLangDetectorMixin(LanguageDetector, ABC):
52
54
  def output_to_detection_result(self, output: Union[tuple[Any, Any]]) -> DetectionResult:
53
55
  """
54
56
  Generating `DetectionResult` from model output
55
- :param output: FastText model output
56
- :return: `DetectionResult` filled with `text` and `score`
57
+
58
+ Args:
59
+ output: `FastText` model output
60
+
61
+ Returns:
62
+ `DetectionResult` filled with `text` and `score`
57
63
  """
58
64
  return DetectionResult(text=self.categories_orig[output[0][0]], score=output[1][0])
59
65
 
@@ -68,30 +74,30 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
68
74
  Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
69
75
  The background to the models can be found in the works:
70
76
 
71
- [1] Joulin A, Grave E, Bojanowski P, Mikolov T, Bag of Tricks for Efficient Text Classification
72
-
73
- [2] Joulin A, Grave E, Bojanowski P, Douze M, Jégou H, Mikolov T, FastText.zip: Compressing text classification
77
+ Info:
78
+ [1] Joulin A, Grave E, Bojanowski P, Mikolov T, Bag of Tricks for Efficient Text Classification
79
+ [2] Joulin A, Grave E, Bojanowski P, Douze M, Jégou H, Mikolov T, FastText.zip: Compressing text classification
74
80
  models
75
81
 
76
- The models are distributed under the Creative Commons Attribution-Share-Alike License 3.0.
77
- (<https://creativecommons.org/licenses/by-sa/3.0/>)
78
-
79
- When loading the models via the ModelCatalog, the original and unmodified models are used.
82
+ When loading the models via the `ModelCatalog`, the original and unmodified models are used.
80
83
 
84
+ Example:
85
+ ```python
81
86
  path_weights = ModelCatalog.get_full_path_weights("fasttext/lid.176.bin")
82
87
  profile = ModelCatalog.get_profile("fasttext/lid.176.bin")
83
88
  lang_detector = FasttextLangDetector(path_weights,profile.categories)
84
89
  detection_result = lang_detector.predict("some text in some language")
85
-
90
+ ```
86
91
  """
87
92
 
88
93
  def __init__(
89
94
  self, path_weights: PathLikeOrStr, categories: Mapping[int, TypeOrStr], categories_orig: Mapping[str, TypeOrStr]
90
95
  ):
91
96
  """
92
- :param path_weights: path to model weights
93
- :param categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
94
- code.
97
+ Args:
98
+ path_weights: path to model weights
99
+ categories: A dict with the model output label and value. We use as convention the ISO 639-2 language
100
+ code.
95
101
  """
96
102
  super().__init__(categories, categories_orig)
97
103
 
@@ -117,6 +123,8 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
117
123
  def get_wrapped_model(path_weights: PathLikeOrStr) -> Any:
118
124
  """
119
125
  Get the wrapped model
120
- :param path_weights: path to model weights
126
+
127
+ Args:
128
+ path_weights: path to model weights
121
129
  """
122
130
  return load_model(os.fspath(path_weights))