deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
  120. deepdoctection/analyzer/_config.py +0 -146
  121. deepdoctection-0.42.0.dist-info/METADATA +0 -431
  122. deepdoctection-0.42.0.dist-info/RECORD +0 -148
  123. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -41,14 +41,20 @@ _TF_ERR_MSG = f"Tensorflow must be installed. {_GENERIC_ERR_MSG}"
41
41
 
42
42
  def tf_available() -> bool:
43
43
  """
44
- Returns True if TF is installed
44
+ Returns whether TensorFlow is installed.
45
+
46
+ Returns:
47
+ bool: True if TensorFlow is installed, False otherwise.
45
48
  """
46
49
  return bool(_TF_AVAILABLE)
47
50
 
48
51
 
49
52
  def get_tf_version() -> str:
50
53
  """
51
- Determine the TF version which is installed
54
+ Determines the installed TensorFlow version.
55
+
56
+ Returns:
57
+ str: The installed TensorFlow version, or `0.0` if not installed.
52
58
  """
53
59
  tf_version = "0.0"
54
60
  if tf_available():
@@ -76,7 +82,10 @@ def get_tf_version() -> str:
76
82
 
77
83
  def get_tensorflow_requirement() -> Requirement:
78
84
  """
79
- Returns Tensorflow requirement
85
+ Returns the TensorFlow requirement.
86
+
87
+ Returns:
88
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
80
89
  """
81
90
 
82
91
  tf_requirement_satisfied = False
@@ -100,14 +109,20 @@ _TF_ADDONS_ERR_MSG = (
100
109
 
101
110
  def tf_addons_available() -> bool:
102
111
  """
103
- Returns True if tensorflow addons is installed
112
+ Returns whether `tensorflow_addons` is installed.
113
+
114
+ Returns:
115
+ bool: `True` if `tensorflow_addons` is installed, False otherwise.
104
116
  """
105
117
  return bool(_TF_ADDONS_AVAILABLE)
106
118
 
107
119
 
108
120
  def get_tf_addons_requirements() -> Requirement:
109
121
  """
110
- Returns Tensorflow Addons requirement
122
+ Returns the `tensorflow_addons` requirement.
123
+
124
+ Returns:
125
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
111
126
  """
112
127
  return "tensorflow-addons", tf_addons_available(), _TF_ADDONS_ERR_MSG
113
128
 
@@ -118,14 +133,20 @@ _TP_ERR_MSG = f"Tensorpack must be installed. {_GENERIC_ERR_MSG}"
118
133
 
119
134
  def tensorpack_available() -> bool:
120
135
  """
121
- Returns True if Tensorpack is installed
136
+ Returns whether `tensorpack` is installed.
137
+
138
+ Returns:
139
+ bool: `True` if `tensorpack` is installed, False otherwise.
122
140
  """
123
141
  return bool(_TP_AVAILABLE)
124
142
 
125
143
 
126
144
  def get_tensorpack_requirement() -> Requirement:
127
145
  """
128
- Returns Tensorpack requirement
146
+ Returns the `tensorpack` requirement.
147
+
148
+ Returns:
149
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
129
150
  """
130
151
  return "tensorpack", tensorpack_available(), _TP_ERR_MSG
131
152
 
@@ -137,14 +158,20 @@ _PYTORCH_ERR_MSG = f"Pytorch must be installed. {_GENERIC_ERR_MSG}"
137
158
 
138
159
  def pytorch_available() -> bool:
139
160
  """
140
- Returns True if Pytorch is installed
161
+ Returns whether PyTorch is installed.
162
+
163
+ Returns:
164
+ bool: True if PyTorch is installed, False otherwise.
141
165
  """
142
166
  return bool(_PYTORCH_AVAILABLE)
143
167
 
144
168
 
145
169
  def get_pytorch_requirement() -> Requirement:
146
170
  """
147
- Returns HF Pytorch requirement
171
+ Returns the PyTorch requirement.
172
+
173
+ Returns:
174
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
148
175
  """
149
176
  return "torch", pytorch_available(), _PYTORCH_ERR_MSG
150
177
 
@@ -156,14 +183,20 @@ _LXML_ERR_MSG = f"lxml must be installed. {_GENERIC_ERR_MSG}"
156
183
 
157
184
  def lxml_available() -> bool:
158
185
  """
159
- Returns True if lxml is installed
186
+ Returns whether `lxml` is installed.
187
+
188
+ Returns:
189
+ bool: True if `lxml` is installed, `False` otherwise.
160
190
  """
161
191
  return bool(_LXML_AVAILABLE)
162
192
 
163
193
 
164
194
  def get_lxml_requirement() -> Requirement:
165
195
  """
166
- Returns lxml requirement
196
+ Returns the `lxml` requirement.
197
+
198
+ Returns:
199
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
167
200
  """
168
201
  return "lxml", lxml_available(), _LXML_ERR_MSG
169
202
 
@@ -175,14 +208,20 @@ _APTED_ERR_MSG = f"apted must be installed. {_GENERIC_ERR_MSG}"
175
208
 
176
209
  def apted_available() -> bool:
177
210
  """
178
- Returns True if apted available
211
+ Returns whether `apted` is available.
212
+
213
+ Returns:
214
+ bool: `True` if `apted` is available, False otherwise.
179
215
  """
180
216
  return bool(_APTED_AVAILABLE)
181
217
 
182
218
 
183
219
  def get_apted_requirement() -> Requirement:
184
220
  """
185
- Returns APTED requirement
221
+ Returns the `apted` requirement.
222
+
223
+ Returns:
224
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
186
225
  """
187
226
  return "apted", apted_available(), _TRANSFORMERS_ERR_MSG
188
227
 
@@ -194,14 +233,20 @@ _DISTANCE_ERR_MSG = f"distance must be installed. {_GENERIC_ERR_MSG}"
194
233
 
195
234
  def distance_available() -> bool:
196
235
  """
197
- Returns True if apted available
236
+ Returns whether `distance` is available.
237
+
238
+ Returns:
239
+ bool: `True` if `distance` is available, False otherwise.
198
240
  """
199
241
  return bool(_DISTANCE_AVAILABLE)
200
242
 
201
243
 
202
244
  def get_distance_requirement() -> Requirement:
203
245
  """
204
- Returns distance requirement
246
+ Returns the `distance` requirement.
247
+
248
+ Returns:
249
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
205
250
  """
206
251
  return "distance", distance_available(), _DISTANCE_ERR_MSG
207
252
 
@@ -213,14 +258,20 @@ _TRANSFORMERS_ERR_MSG = f"transformers must be installed. {_GENERIC_ERR_MSG}"
213
258
 
214
259
  def transformers_available() -> bool:
215
260
  """
216
- Returns True if HF Transformers is installed
261
+ Returns whether HuggingFace Transformers is installed.
262
+
263
+ Returns:
264
+ bool: `True` if Transformers is installed, False otherwise.
217
265
  """
218
266
  return bool(_TRANSFORMERS_AVAILABLE)
219
267
 
220
268
 
221
269
  def get_transformers_requirement() -> Requirement:
222
270
  """
223
- Returns HF Transformers requirement
271
+ Returns the HuggingFace Transformers requirement.
272
+
273
+ Returns:
274
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
224
275
  """
225
276
  return "transformers", transformers_available(), _TRANSFORMERS_ERR_MSG
226
277
 
@@ -235,14 +286,20 @@ _DETECTRON2_ERR_MSG = (
235
286
 
236
287
  def detectron2_available() -> bool:
237
288
  """
238
- Returns True if Detectron2 is installed
289
+ Returns whether `detectron2` is installed.
290
+
291
+ Returns:
292
+ bool: True if `detectron2` is installed, False otherwise.
239
293
  """
240
294
  return bool(_DETECTRON2_AVAILABLE)
241
295
 
242
296
 
243
297
  def get_detectron2_requirement() -> Requirement:
244
298
  """
245
- Returns Detectron2 requirement
299
+ Returns the `detectron2` requirement.
300
+
301
+ Returns:
302
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
246
303
  """
247
304
  return "detectron2", detectron2_available(), _DETECTRON2_ERR_MSG
248
305
 
@@ -258,10 +315,14 @@ _TESS_ERR_MSG = (
258
315
 
259
316
 
260
317
  def set_tesseract_path(tesseract_path: PathLikeOrStr) -> None:
261
- """Set the Tesseract path. If you have tesseract installed in Anaconda,
262
- you can use this function to set tesseract path.
318
+ """
319
+ Sets the Tesseract path.
263
320
 
264
- :param tesseract_path: Tesseract installation path.
321
+ Note:
322
+ If you have Tesseract installed in Anaconda, you can use this function to set the Tesseract path.
323
+
324
+ Args:
325
+ tesseract_path: The Tesseract installation path.
265
326
  """
266
327
 
267
328
  global _TESS_AVAILABLE # pylint: disable=W0603
@@ -286,7 +347,13 @@ def tesseract_available() -> bool:
286
347
 
287
348
  def get_tesseract_version() -> Union[int, version.Version]:
288
349
  """
289
- Returns Version object of the Tesseract version. We need at least Tesseract 3.05
350
+ Returns the version of the installed Tesseract.
351
+
352
+ Returns:
353
+ int or packaging.version.Version: The Tesseract version if installed and `>= 4.0`, otherwise `0`.
354
+
355
+ Note:
356
+ The minimum required version is `3.05`.
290
357
  """
291
358
  try:
292
359
  output = subprocess.check_output(
@@ -311,7 +378,13 @@ def get_tesseract_version() -> Union[int, version.Version]:
311
378
 
312
379
  def get_tesseract_requirement() -> Requirement:
313
380
  """
314
- Returns Tesseract requirement. The minimum version must be 3.05
381
+ Returns the Tesseract requirement.
382
+
383
+ Returns:
384
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
385
+
386
+ Note:
387
+ The minimum required version is `3.05`.
315
388
  """
316
389
  if get_tesseract_version():
317
390
  return "tesseract", True, _TESS_ERR_MSG
@@ -326,21 +399,30 @@ _POPPLER_ERR_MSG = "Poppler cannot be found. Please check that Poppler is instal
326
399
 
327
400
  def pdf_to_ppm_available() -> bool:
328
401
  """
329
- Returns True if pdftoppm is installed
402
+ Returns whether `pdftoppm` is installed.
403
+
404
+ Returns:
405
+ bool: True if `pdftoppm` is installed, False otherwise.
330
406
  """
331
407
  return bool(_PDF_TO_PPM_AVAILABLE)
332
408
 
333
409
 
334
410
  def pdf_to_cairo_available() -> bool:
335
411
  """
336
- Returns True if pdftocairo is installed
412
+ Returns whether `pdftocairo` is installed.
413
+
414
+ Returns:
415
+ bool: `True` if `pdftocairo` is installed, `False` otherwise.
337
416
  """
338
417
  return bool(_PDF_TO_CAIRO_AVAILABLE)
339
418
 
340
419
 
341
420
  def get_poppler_version() -> Union[int, version.Version]:
342
421
  """
343
- Returns Version object of the Poppler version. We need at least Tesseract 3.05
422
+ Returns the version of the installed Poppler utility.
423
+
424
+ Returns:
425
+ int or packaging.version.Version: The Poppler version if installed, otherwise 0.
344
426
  """
345
427
 
346
428
  if pdf_to_ppm_available():
@@ -367,7 +449,10 @@ def get_poppler_version() -> Union[int, version.Version]:
367
449
 
368
450
  def get_poppler_requirement() -> Requirement:
369
451
  """
370
- Returns Poppler requirement. The minimum version is not required in our setting
452
+ Returns the Poppler requirement.
453
+
454
+ Returns:
455
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
371
456
  """
372
457
  if get_poppler_version():
373
458
  return "poppler", True, _POPPLER_ERR_MSG
@@ -381,14 +466,20 @@ _PDFPLUMBER_ERR_MSG = f"pdfplumber must be installed. {_GENERIC_ERR_MSG}"
381
466
 
382
467
  def pdfplumber_available() -> bool:
383
468
  """
384
- Returns True if pdfplumber is installed
469
+ Returns whether `pdfplumber` is installed.
470
+
471
+ Returns:
472
+ bool: `True` if `pdfplumber` is installed, False otherwise.
385
473
  """
386
474
  return bool(_PDFPLUMBER_AVAILABLE)
387
475
 
388
476
 
389
477
  def get_pdfplumber_requirement() -> Requirement:
390
478
  """
391
- Returns pdfplumber requirement.
479
+ Returns the `pdfplumber` requirement.
480
+
481
+ Returns:
482
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
392
483
  """
393
484
  return "pdfplumber", pdfplumber_available(), _PDFPLUMBER_ERR_MSG
394
485
 
@@ -400,14 +491,20 @@ _COCOTOOLS_ERR_MSG = f"pycocotools must be installed. {_GENERIC_ERR_MSG}"
400
491
 
401
492
  def cocotools_available() -> bool:
402
493
  """
403
- Returns True if pycocotools is installed
494
+ Returns whether `pycocotools` is installed.
495
+
496
+ Returns:
497
+ bool: `True` if `pycocotools` is installed, `False` otherwise.
404
498
  """
405
499
  return bool(_COCOTOOLS_AVAILABLE)
406
500
 
407
501
 
408
502
  def get_cocotools_requirement() -> Requirement:
409
503
  """
410
- Returns cocotools requirement.
504
+ Returns the `pycocotools` requirement.
505
+
506
+ Returns:
507
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
411
508
  """
412
509
  return "pycocotools", cocotools_available(), _COCOTOOLS_ERR_MSG
413
510
 
@@ -418,7 +515,10 @@ _SCIPY_AVAILABLE = importlib.util.find_spec("scipy") is not None
418
515
 
419
516
  def scipy_available() -> bool:
420
517
  """
421
- Returns True if scipy is installed
518
+ Returns whether `scipy` is installed.
519
+
520
+ Returns:
521
+ bool: `True` if `scipy` is installed, `False` otherwise.
422
522
  """
423
523
  return bool(_SCIPY_AVAILABLE)
424
524
 
@@ -430,14 +530,20 @@ _JDESKEW_ERR_MSG = f"jdeskew must be installed. {_GENERIC_ERR_MSG}"
430
530
 
431
531
  def jdeskew_available() -> bool:
432
532
  """
433
- Returns True if jdeskew is installed
533
+ Returns whether `jdeskew` is installed.
534
+
535
+ Returns:
536
+ bool: `True` if `jdeskew` is installed, `False` otherwise.
434
537
  """
435
538
  return bool(_JDESKEW_AVAILABLE)
436
539
 
437
540
 
438
541
  def get_jdeskew_requirement() -> Requirement:
439
542
  """
440
- Returns jdeskew requirement.
543
+ Returns the `jdeskew` requirement.
544
+
545
+ Returns:
546
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
441
547
  """
442
548
  return "jdeskew", jdeskew_available(), _JDESKEW_ERR_MSG
443
549
 
@@ -449,14 +555,20 @@ _SKLEARN_ERR_MSG = f"scikit-learn must be installed. {_GENERIC_ERR_MSG}"
449
555
 
450
556
  def sklearn_available() -> bool:
451
557
  """
452
- Returns True if sklearn is installed
558
+ Returns whether `sklearn` is installed.
559
+
560
+ Returns:
561
+ bool: `True` if `sklearn` is installed, `False` otherwise.
453
562
  """
454
563
  return bool(_SKLEARN_AVAILABLE)
455
564
 
456
565
 
457
566
  def get_sklearn_requirement() -> Requirement:
458
567
  """
459
- Returns sklearn requirement.
568
+ Returns the `sklearn` requirement.
569
+
570
+ Returns:
571
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
460
572
  """
461
573
  return "sklearn", sklearn_available(), _SKLEARN_ERR_MSG
462
574
 
@@ -467,7 +579,10 @@ _QPDF_AVAILABLE = which("qpdf") is not None
467
579
 
468
580
  def qpdf_available() -> bool:
469
581
  """
470
- Returns True if qpdf is installed
582
+ Returns whether `qpdf` is installed.
583
+
584
+ Returns:
585
+ bool: `True` if `qpdf` is installed, `False` otherwise.
471
586
  """
472
587
  return bool(_QPDF_AVAILABLE)
473
588
 
@@ -482,7 +597,10 @@ _AWS_ERR_MSG = "AWS CLI must be installed https://docs.aws.amazon.com/cli/latest
482
597
 
483
598
  def boto3_available() -> bool:
484
599
  """
485
- Returns True if Boto3 is installed
600
+ Returns whether `boto3` is installed.
601
+
602
+ Returns:
603
+ bool: `True` if `boto3` is installed, `False` otherwise.
486
604
  """
487
605
 
488
606
  return bool(_BOTO3_AVAILABLE)
@@ -490,21 +608,30 @@ def boto3_available() -> bool:
490
608
 
491
609
  def get_boto3_requirement() -> Requirement:
492
610
  """
493
- Return Boto3 requirement
611
+ Returns the `boto3` requirement.
612
+
613
+ Returns:
614
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
494
615
  """
495
616
  return "boto3", boto3_available(), _BOTO3_ERR_MSG
496
617
 
497
618
 
498
619
  def aws_available() -> bool:
499
620
  """
500
- Returns True if AWS CLI is installed
621
+ Returns whether AWS CLI is installed.
622
+
623
+ Returns:
624
+ bool: `True` if AWS CLI is installed, `False` otherwise.
501
625
  """
502
626
  return bool(_AWS_CLI_AVAILABLE)
503
627
 
504
628
 
505
629
  def get_aws_requirement() -> Requirement:
506
630
  """
507
- Return AWS CLI requirement
631
+ Returns the AWS CLI requirement.
632
+
633
+ Returns:
634
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
508
635
  """
509
636
  return "aws", aws_available(), _AWS_ERR_MSG
510
637
 
@@ -516,14 +643,24 @@ _DOCTR_ERR_MSG = f"DocTr must be installed. {_GENERIC_ERR_MSG}"
516
643
 
517
644
  def doctr_available() -> bool:
518
645
  """
519
- Returns True if doctr is installed
646
+ Returns whether `doctr` is installed.
647
+
648
+ Returns:
649
+ bool: `True` if `doctr` is installed, `False` otherwise.
520
650
  """
521
651
  return bool(_DOCTR_AVAILABLE)
522
652
 
523
653
 
524
654
  def get_doctr_requirement() -> Requirement:
525
655
  """
526
- Return Doctr requirement
656
+ Returns the `doctr` requirement.
657
+
658
+ Returns:
659
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
660
+
661
+ Note:
662
+ On macOS, if `poppler` is not available, this function will recursively check the requirement.
663
+ It is not yet known how to check whether `pango`, `gdk-pixbuf`, and `libffi` are installed.
527
664
  """
528
665
  if sys.platform == "darwin":
529
666
  if not get_poppler_version():
@@ -542,14 +679,20 @@ _FASTTEXT_ERR_MSG = f"fasttext must be installed. {_GENERIC_ERR_MSG}"
542
679
 
543
680
  def fasttext_available() -> bool:
544
681
  """
545
- Returns True if fasttext is installed
682
+ Returns whether `fasttext` is installed.
683
+
684
+ Returns:
685
+ bool: `True` if `fasttext` is installed, False otherwise.
546
686
  """
547
687
  return bool(_FASTTEXT_AVAILABLE)
548
688
 
549
689
 
550
690
  def get_fasttext_requirement() -> Requirement:
551
691
  """
552
- Return Fasttext requirement
692
+ Returns the `fasttext` requirement.
693
+
694
+ Returns:
695
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
553
696
  """
554
697
  return "fasttext", fasttext_available(), _FASTTEXT_ERR_MSG
555
698
 
@@ -561,14 +704,20 @@ _WANDB_ERR_MSG = f"WandB must be installed. {_GENERIC_ERR_MSG}"
561
704
 
562
705
  def wandb_available() -> bool:
563
706
  """
564
- Returns True if W&B package wandb is installed
707
+ Returns whether the W&B package `wandb` is installed.
708
+
709
+ Returns:
710
+ bool: `True` if `wandb` is installed, `False` otherwise.
565
711
  """
566
712
  return bool(_WANDB_AVAILABLE)
567
713
 
568
714
 
569
715
  def get_wandb_requirement() -> Requirement:
570
716
  """
571
- Return WandB requirement
717
+ Returns the W&B requirement.
718
+
719
+ Returns:
720
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
572
721
  """
573
722
  return "wandb", wandb_available(), _WANDB_ERR_MSG
574
723
 
@@ -585,14 +734,20 @@ _CV2_ERR_MSG = f"OpenCV must be installed. {_GENERIC_ERR_MSG}"
585
734
 
586
735
  def opencv_available() -> bool:
587
736
  """
588
- Returns True if OpenCV is installed
737
+ Returns whether OpenCV is installed.
738
+
739
+ Returns:
740
+ bool: `True` if OpenCV is installed, `False` otherwise.
589
741
  """
590
742
  return bool(_CV2_AVAILABLE)
591
743
 
592
744
 
593
745
  def get_opencv_requirement() -> Requirement:
594
746
  """
595
- Return OpenCV requirement
747
+ Returns the OpenCV requirement.
748
+
749
+ Returns:
750
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
596
751
  """
597
752
  return "opencv", opencv_available(), _CV2_ERR_MSG
598
753
 
@@ -604,14 +759,20 @@ _PILLOW_ERR_MSG = f"pillow must be installed. {_GENERIC_ERR_MSG}"
604
759
 
605
760
  def pillow_available() -> bool:
606
761
  """
607
- Returns True if Pillow is installed
762
+ Returns whether Pillow is installed.
763
+
764
+ Returns:
765
+ bool: `True` if Pillow is installed, False otherwise.
608
766
  """
609
767
  return bool(_PILLOW_AVAILABLE)
610
768
 
611
769
 
612
770
  def get_pillow_requirement() -> Requirement:
613
771
  """
614
- Return OpenCV requirement
772
+ Returns the Pillow requirement.
773
+
774
+ Returns:
775
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
615
776
  """
616
777
  return "pillow", pillow_available(), _PILLOW_ERR_MSG
617
778
 
@@ -623,14 +784,20 @@ _PYPDFIUM2_ERR_MSG = f"pypdfium2 must be installed. {_GENERIC_ERR_MSG}"
623
784
 
624
785
  def pypdfium2_available() -> bool:
625
786
  """
626
- Returns True if pypdfium2 is installed
787
+ Returns whether `pypdfium2` is installed.
788
+
789
+ Returns:
790
+ bool: `True` if `pypdfium2` is installed, `False` otherwise.
627
791
  """
628
792
  return bool(_PYPDFIUM2_AVAILABLE)
629
793
 
630
794
 
631
795
  def get_pypdfium2_requirement() -> Requirement:
632
796
  """
633
- Return pypdfium2 requirement
797
+ Returns the `pypdfium2` requirement.
798
+
799
+ Returns:
800
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
634
801
  """
635
802
  return "pypdfium2", pypdfium2_available(), _PYPDFIUM2_ERR_MSG
636
803
 
@@ -642,7 +809,10 @@ _SPACY_ERR_MSG = f"SpaCy must be installed. {_GENERIC_ERR_MSG}"
642
809
 
643
810
  def spacy_available() -> bool:
644
811
  """
645
- Returns True if SpaCy is installed
812
+ Returns whether SpaCy is installed.
813
+
814
+ Returns:
815
+ bool: True if SpaCy is installed, False otherwise.
646
816
  """
647
817
 
648
818
  return bool(_SPACY_AVAILABLE)
@@ -650,20 +820,21 @@ def spacy_available() -> bool:
650
820
 
651
821
  def get_spacy_requirement() -> Requirement:
652
822
  """
653
- Return SpaCy requirement
823
+ Returns the SpaCy requirement.
824
+
825
+ Returns:
826
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
654
827
  """
655
828
  return "spacy", spacy_available(), _SPACY_ERR_MSG
656
829
 
657
830
 
658
831
  def set_mp_spawn() -> None:
659
832
  """
660
- Sets multiprocessing method to "spawn".
833
+ Sets the multiprocessing method to "spawn".
661
834
 
662
- from https://github.com/tensorpack/tensorpack/blob/master/examples/FasterRCNN/train.py:
663
-
664
- "spawn/forkserver" is safer than the default "fork" method and
665
- produce more deterministic behavior & memory saving
666
- However its limitation is you cannot pass a lambda function to subprocesses.
835
+ Note:
836
+ "spawn/forkserver" is safer than the default "fork" method and produces more deterministic behavior and memory
837
+ saving. However, its limitation is that you cannot pass a lambda function to subprocesses.
667
838
  """
668
839
 
669
840
  if not _S.mp_context_set:
@@ -679,10 +850,23 @@ def set_mp_spawn() -> None:
679
850
  class _LazyModule(ModuleType):
680
851
  """
681
852
  Module class that surfaces all objects but only performs associated imports when the objects are requested.
853
+
854
+
855
+ Note:
856
+ This class is needed for autocompletion in an IDE.
682
857
  """
683
858
 
684
859
  @no_type_check
685
860
  def __init__(self, name, module_file, import_structure, module_spec=None, extra_objects=None):
861
+ """
862
+ Args:
863
+ name: The name of the module.
864
+ module_file: The file path of the module.
865
+ import_structure: The import structure dictionary.
866
+ module_spec: The module specification.
867
+ extra_objects: Additional objects to include.
868
+
869
+ """
686
870
  super().__init__(name)
687
871
  self._modules = set(import_structure.keys())
688
872
  self._class_to_module = {}