deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +4 -2
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +919 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +162 -108
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +205 -119
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +26 -17
  104. deepdoctection/utils/env_info.py +86 -37
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -71
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.1.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.1.dist-info/RECORD +149 -0
  119. deepdoctection/analyzer/_config.py +0 -146
  120. deepdoctection-0.42.1.dist-info/METADATA +0 -431
  121. deepdoctection-0.42.1.dist-info/RECORD +0 -148
  122. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
  123. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,6 @@ import importlib.util
12
12
  import multiprocessing as mp
13
13
  import string
14
14
  import subprocess
15
- import sys
16
15
  from os import environ, path
17
16
  from shutil import which
18
17
  from types import ModuleType
@@ -22,7 +21,6 @@ import importlib_metadata
22
21
  from packaging import version
23
22
 
24
23
  from .error import DependencyError
25
- from .logger import LoggingRecord, logger
26
24
  from .metacfg import AttrDict
27
25
  from .types import PathLikeOrStr, Requirement
28
26
 
@@ -41,14 +39,20 @@ _TF_ERR_MSG = f"Tensorflow must be installed. {_GENERIC_ERR_MSG}"
41
39
 
42
40
  def tf_available() -> bool:
43
41
  """
44
- Returns True if TF is installed
42
+ Returns whether TensorFlow is installed.
43
+
44
+ Returns:
45
+ bool: True if TensorFlow is installed, False otherwise.
45
46
  """
46
47
  return bool(_TF_AVAILABLE)
47
48
 
48
49
 
49
50
  def get_tf_version() -> str:
50
51
  """
51
- Determine the TF version which is installed
52
+ Determines the installed TensorFlow version.
53
+
54
+ Returns:
55
+ str: The installed TensorFlow version, or `0.0` if not installed.
52
56
  """
53
57
  tf_version = "0.0"
54
58
  if tf_available():
@@ -76,7 +80,10 @@ def get_tf_version() -> str:
76
80
 
77
81
  def get_tensorflow_requirement() -> Requirement:
78
82
  """
79
- Returns Tensorflow requirement
83
+ Returns the TensorFlow requirement.
84
+
85
+ Returns:
86
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
80
87
  """
81
88
 
82
89
  tf_requirement_satisfied = False
@@ -100,14 +107,20 @@ _TF_ADDONS_ERR_MSG = (
100
107
 
101
108
  def tf_addons_available() -> bool:
102
109
  """
103
- Returns True if tensorflow addons is installed
110
+ Returns whether `tensorflow_addons` is installed.
111
+
112
+ Returns:
113
+ bool: `True` if `tensorflow_addons` is installed, False otherwise.
104
114
  """
105
115
  return bool(_TF_ADDONS_AVAILABLE)
106
116
 
107
117
 
108
118
  def get_tf_addons_requirements() -> Requirement:
109
119
  """
110
- Returns Tensorflow Addons requirement
120
+ Returns the `tensorflow_addons` requirement.
121
+
122
+ Returns:
123
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
111
124
  """
112
125
  return "tensorflow-addons", tf_addons_available(), _TF_ADDONS_ERR_MSG
113
126
 
@@ -118,14 +131,20 @@ _TP_ERR_MSG = f"Tensorpack must be installed. {_GENERIC_ERR_MSG}"
118
131
 
119
132
  def tensorpack_available() -> bool:
120
133
  """
121
- Returns True if Tensorpack is installed
134
+ Returns whether `tensorpack` is installed.
135
+
136
+ Returns:
137
+ bool: `True` if `tensorpack` is installed, False otherwise.
122
138
  """
123
139
  return bool(_TP_AVAILABLE)
124
140
 
125
141
 
126
142
  def get_tensorpack_requirement() -> Requirement:
127
143
  """
128
- Returns Tensorpack requirement
144
+ Returns the `tensorpack` requirement.
145
+
146
+ Returns:
147
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
129
148
  """
130
149
  return "tensorpack", tensorpack_available(), _TP_ERR_MSG
131
150
 
@@ -137,14 +156,20 @@ _PYTORCH_ERR_MSG = f"Pytorch must be installed. {_GENERIC_ERR_MSG}"
137
156
 
138
157
  def pytorch_available() -> bool:
139
158
  """
140
- Returns True if Pytorch is installed
159
+ Returns whether PyTorch is installed.
160
+
161
+ Returns:
162
+ bool: True if PyTorch is installed, False otherwise.
141
163
  """
142
164
  return bool(_PYTORCH_AVAILABLE)
143
165
 
144
166
 
145
167
  def get_pytorch_requirement() -> Requirement:
146
168
  """
147
- Returns HF Pytorch requirement
169
+ Returns the PyTorch requirement.
170
+
171
+ Returns:
172
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
148
173
  """
149
174
  return "torch", pytorch_available(), _PYTORCH_ERR_MSG
150
175
 
@@ -156,14 +181,20 @@ _LXML_ERR_MSG = f"lxml must be installed. {_GENERIC_ERR_MSG}"
156
181
 
157
182
  def lxml_available() -> bool:
158
183
  """
159
- Returns True if lxml is installed
184
+ Returns whether `lxml` is installed.
185
+
186
+ Returns:
187
+ bool: True if `lxml` is installed, `False` otherwise.
160
188
  """
161
189
  return bool(_LXML_AVAILABLE)
162
190
 
163
191
 
164
192
  def get_lxml_requirement() -> Requirement:
165
193
  """
166
- Returns lxml requirement
194
+ Returns the `lxml` requirement.
195
+
196
+ Returns:
197
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
167
198
  """
168
199
  return "lxml", lxml_available(), _LXML_ERR_MSG
169
200
 
@@ -175,14 +206,20 @@ _APTED_ERR_MSG = f"apted must be installed. {_GENERIC_ERR_MSG}"
175
206
 
176
207
  def apted_available() -> bool:
177
208
  """
178
- Returns True if apted available
209
+ Returns whether `apted` is available.
210
+
211
+ Returns:
212
+ bool: `True` if `apted` is available, False otherwise.
179
213
  """
180
214
  return bool(_APTED_AVAILABLE)
181
215
 
182
216
 
183
217
  def get_apted_requirement() -> Requirement:
184
218
  """
185
- Returns APTED requirement
219
+ Returns the `apted` requirement.
220
+
221
+ Returns:
222
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
186
223
  """
187
224
  return "apted", apted_available(), _TRANSFORMERS_ERR_MSG
188
225
 
@@ -194,14 +231,20 @@ _DISTANCE_ERR_MSG = f"distance must be installed. {_GENERIC_ERR_MSG}"
194
231
 
195
232
  def distance_available() -> bool:
196
233
  """
197
- Returns True if apted available
234
+ Returns whether `distance` is available.
235
+
236
+ Returns:
237
+ bool: `True` if `distance` is available, False otherwise.
198
238
  """
199
239
  return bool(_DISTANCE_AVAILABLE)
200
240
 
201
241
 
202
242
  def get_distance_requirement() -> Requirement:
203
243
  """
204
- Returns distance requirement
244
+ Returns the `distance` requirement.
245
+
246
+ Returns:
247
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
205
248
  """
206
249
  return "distance", distance_available(), _DISTANCE_ERR_MSG
207
250
 
@@ -213,14 +256,20 @@ _TRANSFORMERS_ERR_MSG = f"transformers must be installed. {_GENERIC_ERR_MSG}"
213
256
 
214
257
  def transformers_available() -> bool:
215
258
  """
216
- Returns True if HF Transformers is installed
259
+ Returns whether HuggingFace Transformers is installed.
260
+
261
+ Returns:
262
+ bool: `True` if Transformers is installed, False otherwise.
217
263
  """
218
264
  return bool(_TRANSFORMERS_AVAILABLE)
219
265
 
220
266
 
221
267
  def get_transformers_requirement() -> Requirement:
222
268
  """
223
- Returns HF Transformers requirement
269
+ Returns the HuggingFace Transformers requirement.
270
+
271
+ Returns:
272
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
224
273
  """
225
274
  return "transformers", transformers_available(), _TRANSFORMERS_ERR_MSG
226
275
 
@@ -235,14 +284,20 @@ _DETECTRON2_ERR_MSG = (
235
284
 
236
285
  def detectron2_available() -> bool:
237
286
  """
238
- Returns True if Detectron2 is installed
287
+ Returns whether `detectron2` is installed.
288
+
289
+ Returns:
290
+ bool: True if `detectron2` is installed, False otherwise.
239
291
  """
240
292
  return bool(_DETECTRON2_AVAILABLE)
241
293
 
242
294
 
243
295
  def get_detectron2_requirement() -> Requirement:
244
296
  """
245
- Returns Detectron2 requirement
297
+ Returns the `detectron2` requirement.
298
+
299
+ Returns:
300
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
246
301
  """
247
302
  return "detectron2", detectron2_available(), _DETECTRON2_ERR_MSG
248
303
 
@@ -258,10 +313,14 @@ _TESS_ERR_MSG = (
258
313
 
259
314
 
260
315
  def set_tesseract_path(tesseract_path: PathLikeOrStr) -> None:
261
- """Set the Tesseract path. If you have tesseract installed in Anaconda,
262
- you can use this function to set tesseract path.
316
+ """
317
+ Sets the Tesseract path.
263
318
 
264
- :param tesseract_path: Tesseract installation path.
319
+ Note:
320
+ If you have Tesseract installed in Anaconda, you can use this function to set the Tesseract path.
321
+
322
+ Args:
323
+ tesseract_path: The Tesseract installation path.
265
324
  """
266
325
 
267
326
  global _TESS_AVAILABLE # pylint: disable=W0603
@@ -286,7 +345,13 @@ def tesseract_available() -> bool:
286
345
 
287
346
  def get_tesseract_version() -> Union[int, version.Version]:
288
347
  """
289
- Returns Version object of the Tesseract version. We need at least Tesseract 3.05
348
+ Returns the version of the installed Tesseract.
349
+
350
+ Returns:
351
+ int or packaging.version.Version: The Tesseract version if installed and `>= 4.0`, otherwise `0`.
352
+
353
+ Note:
354
+ The minimum required version is `3.05`.
290
355
  """
291
356
  try:
292
357
  output = subprocess.check_output(
@@ -311,7 +376,13 @@ def get_tesseract_version() -> Union[int, version.Version]:
311
376
 
312
377
  def get_tesseract_requirement() -> Requirement:
313
378
  """
314
- Returns Tesseract requirement. The minimum version must be 3.05
379
+ Returns the Tesseract requirement.
380
+
381
+ Returns:
382
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
383
+
384
+ Note:
385
+ The minimum required version is `3.05`.
315
386
  """
316
387
  if get_tesseract_version():
317
388
  return "tesseract", True, _TESS_ERR_MSG
@@ -326,21 +397,30 @@ _POPPLER_ERR_MSG = "Poppler cannot be found. Please check that Poppler is instal
326
397
 
327
398
  def pdf_to_ppm_available() -> bool:
328
399
  """
329
- Returns True if pdftoppm is installed
400
+ Returns whether `pdftoppm` is installed.
401
+
402
+ Returns:
403
+ bool: True if `pdftoppm` is installed, False otherwise.
330
404
  """
331
405
  return bool(_PDF_TO_PPM_AVAILABLE)
332
406
 
333
407
 
334
408
  def pdf_to_cairo_available() -> bool:
335
409
  """
336
- Returns True if pdftocairo is installed
410
+ Returns whether `pdftocairo` is installed.
411
+
412
+ Returns:
413
+ bool: `True` if `pdftocairo` is installed, `False` otherwise.
337
414
  """
338
415
  return bool(_PDF_TO_CAIRO_AVAILABLE)
339
416
 
340
417
 
341
418
  def get_poppler_version() -> Union[int, version.Version]:
342
419
  """
343
- Returns Version object of the Poppler version. We need at least Tesseract 3.05
420
+ Returns the version of the installed Poppler utility.
421
+
422
+ Returns:
423
+ int or packaging.version.Version: The Poppler version if installed, otherwise 0.
344
424
  """
345
425
 
346
426
  if pdf_to_ppm_available():
@@ -367,7 +447,10 @@ def get_poppler_version() -> Union[int, version.Version]:
367
447
 
368
448
  def get_poppler_requirement() -> Requirement:
369
449
  """
370
- Returns Poppler requirement. The minimum version is not required in our setting
450
+ Returns the Poppler requirement.
451
+
452
+ Returns:
453
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
371
454
  """
372
455
  if get_poppler_version():
373
456
  return "poppler", True, _POPPLER_ERR_MSG
@@ -381,14 +464,20 @@ _PDFPLUMBER_ERR_MSG = f"pdfplumber must be installed. {_GENERIC_ERR_MSG}"
381
464
 
382
465
  def pdfplumber_available() -> bool:
383
466
  """
384
- Returns True if pdfplumber is installed
467
+ Returns whether `pdfplumber` is installed.
468
+
469
+ Returns:
470
+ bool: `True` if `pdfplumber` is installed, False otherwise.
385
471
  """
386
472
  return bool(_PDFPLUMBER_AVAILABLE)
387
473
 
388
474
 
389
475
  def get_pdfplumber_requirement() -> Requirement:
390
476
  """
391
- Returns pdfplumber requirement.
477
+ Returns the `pdfplumber` requirement.
478
+
479
+ Returns:
480
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
392
481
  """
393
482
  return "pdfplumber", pdfplumber_available(), _PDFPLUMBER_ERR_MSG
394
483
 
@@ -400,14 +489,20 @@ _COCOTOOLS_ERR_MSG = f"pycocotools must be installed. {_GENERIC_ERR_MSG}"
400
489
 
401
490
  def cocotools_available() -> bool:
402
491
  """
403
- Returns True if pycocotools is installed
492
+ Returns whether `pycocotools` is installed.
493
+
494
+ Returns:
495
+ bool: `True` if `pycocotools` is installed, `False` otherwise.
404
496
  """
405
497
  return bool(_COCOTOOLS_AVAILABLE)
406
498
 
407
499
 
408
500
  def get_cocotools_requirement() -> Requirement:
409
501
  """
410
- Returns cocotools requirement.
502
+ Returns the `pycocotools` requirement.
503
+
504
+ Returns:
505
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
411
506
  """
412
507
  return "pycocotools", cocotools_available(), _COCOTOOLS_ERR_MSG
413
508
 
@@ -418,7 +513,10 @@ _SCIPY_AVAILABLE = importlib.util.find_spec("scipy") is not None
418
513
 
419
514
  def scipy_available() -> bool:
420
515
  """
421
- Returns True if scipy is installed
516
+ Returns whether `scipy` is installed.
517
+
518
+ Returns:
519
+ bool: `True` if `scipy` is installed, `False` otherwise.
422
520
  """
423
521
  return bool(_SCIPY_AVAILABLE)
424
522
 
@@ -430,14 +528,20 @@ _JDESKEW_ERR_MSG = f"jdeskew must be installed. {_GENERIC_ERR_MSG}"
430
528
 
431
529
  def jdeskew_available() -> bool:
432
530
  """
433
- Returns True if jdeskew is installed
531
+ Returns whether `jdeskew` is installed.
532
+
533
+ Returns:
534
+ bool: `True` if `jdeskew` is installed, `False` otherwise.
434
535
  """
435
536
  return bool(_JDESKEW_AVAILABLE)
436
537
 
437
538
 
438
539
  def get_jdeskew_requirement() -> Requirement:
439
540
  """
440
- Returns jdeskew requirement.
541
+ Returns the `jdeskew` requirement.
542
+
543
+ Returns:
544
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
441
545
  """
442
546
  return "jdeskew", jdeskew_available(), _JDESKEW_ERR_MSG
443
547
 
@@ -449,14 +553,20 @@ _SKLEARN_ERR_MSG = f"scikit-learn must be installed. {_GENERIC_ERR_MSG}"
449
553
 
450
554
  def sklearn_available() -> bool:
451
555
  """
452
- Returns True if sklearn is installed
556
+ Returns whether `sklearn` is installed.
557
+
558
+ Returns:
559
+ bool: `True` if `sklearn` is installed, `False` otherwise.
453
560
  """
454
561
  return bool(_SKLEARN_AVAILABLE)
455
562
 
456
563
 
457
564
  def get_sklearn_requirement() -> Requirement:
458
565
  """
459
- Returns sklearn requirement.
566
+ Returns the `sklearn` requirement.
567
+
568
+ Returns:
569
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
460
570
  """
461
571
  return "sklearn", sklearn_available(), _SKLEARN_ERR_MSG
462
572
 
@@ -467,7 +577,10 @@ _QPDF_AVAILABLE = which("qpdf") is not None
467
577
 
468
578
  def qpdf_available() -> bool:
469
579
  """
470
- Returns True if qpdf is installed
580
+ Returns whether `qpdf` is installed.
581
+
582
+ Returns:
583
+ bool: `True` if `qpdf` is installed, `False` otherwise.
471
584
  """
472
585
  return bool(_QPDF_AVAILABLE)
473
586
 
@@ -482,7 +595,10 @@ _AWS_ERR_MSG = "AWS CLI must be installed https://docs.aws.amazon.com/cli/latest
482
595
 
483
596
  def boto3_available() -> bool:
484
597
  """
485
- Returns True if Boto3 is installed
598
+ Returns whether `boto3` is installed.
599
+
600
+ Returns:
601
+ bool: `True` if `boto3` is installed, `False` otherwise.
486
602
  """
487
603
 
488
604
  return bool(_BOTO3_AVAILABLE)
@@ -490,21 +606,30 @@ def boto3_available() -> bool:
490
606
 
491
607
  def get_boto3_requirement() -> Requirement:
492
608
  """
493
- Return Boto3 requirement
609
+ Returns the `boto3` requirement.
610
+
611
+ Returns:
612
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
494
613
  """
495
614
  return "boto3", boto3_available(), _BOTO3_ERR_MSG
496
615
 
497
616
 
498
617
  def aws_available() -> bool:
499
618
  """
500
- Returns True if AWS CLI is installed
619
+ Returns whether AWS CLI is installed.
620
+
621
+ Returns:
622
+ bool: `True` if AWS CLI is installed, `False` otherwise.
501
623
  """
502
624
  return bool(_AWS_CLI_AVAILABLE)
503
625
 
504
626
 
505
627
  def get_aws_requirement() -> Requirement:
506
628
  """
507
- Return AWS CLI requirement
629
+ Returns the AWS CLI requirement.
630
+
631
+ Returns:
632
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
508
633
  """
509
634
  return "aws", aws_available(), _AWS_ERR_MSG
510
635
 
@@ -516,22 +641,25 @@ _DOCTR_ERR_MSG = f"DocTr must be installed. {_GENERIC_ERR_MSG}"
516
641
 
517
642
  def doctr_available() -> bool:
518
643
  """
519
- Returns True if doctr is installed
644
+ Returns whether `doctr` is installed.
645
+
646
+ Returns:
647
+ bool: `True` if `doctr` is installed, `False` otherwise.
520
648
  """
521
649
  return bool(_DOCTR_AVAILABLE)
522
650
 
523
651
 
524
652
  def get_doctr_requirement() -> Requirement:
525
653
  """
526
- Return Doctr requirement
654
+ Returns the `doctr` requirement.
655
+
656
+ Returns:
657
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
658
+
659
+ Note:
660
+ On macOS, if `poppler` is not available, this function will recursively check the requirement.
661
+ It is not yet known how to check whether `pango`, `gdk-pixbuf`, and `libffi` are installed.
527
662
  """
528
- if sys.platform == "darwin":
529
- if not get_poppler_version():
530
- return get_doctr_requirement()
531
- # don't know yet how to check whether pango gdk-pixbuf libffi are installed
532
- logger.info(
533
- LoggingRecord("package requires weasyprint. Check that poppler pango gdk-pixbuf libffi are installed")
534
- )
535
663
  return "doctr", doctr_available(), _DOCTR_ERR_MSG
536
664
 
537
665
 
@@ -542,14 +670,20 @@ _FASTTEXT_ERR_MSG = f"fasttext must be installed. {_GENERIC_ERR_MSG}"
542
670
 
543
671
  def fasttext_available() -> bool:
544
672
  """
545
- Returns True if fasttext is installed
673
+ Returns whether `fasttext` is installed.
674
+
675
+ Returns:
676
+ bool: `True` if `fasttext` is installed, False otherwise.
546
677
  """
547
678
  return bool(_FASTTEXT_AVAILABLE)
548
679
 
549
680
 
550
681
  def get_fasttext_requirement() -> Requirement:
551
682
  """
552
- Return Fasttext requirement
683
+ Returns the `fasttext` requirement.
684
+
685
+ Returns:
686
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
553
687
  """
554
688
  return "fasttext", fasttext_available(), _FASTTEXT_ERR_MSG
555
689
 
@@ -561,14 +695,20 @@ _WANDB_ERR_MSG = f"WandB must be installed. {_GENERIC_ERR_MSG}"
561
695
 
562
696
  def wandb_available() -> bool:
563
697
  """
564
- Returns True if W&B package wandb is installed
698
+ Returns whether the W&B package `wandb` is installed.
699
+
700
+ Returns:
701
+ bool: `True` if `wandb` is installed, `False` otherwise.
565
702
  """
566
703
  return bool(_WANDB_AVAILABLE)
567
704
 
568
705
 
569
706
  def get_wandb_requirement() -> Requirement:
570
707
  """
571
- Return WandB requirement
708
+ Returns the W&B requirement.
709
+
710
+ Returns:
711
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
572
712
  """
573
713
  return "wandb", wandb_available(), _WANDB_ERR_MSG
574
714
 
@@ -585,14 +725,20 @@ _CV2_ERR_MSG = f"OpenCV must be installed. {_GENERIC_ERR_MSG}"
585
725
 
586
726
  def opencv_available() -> bool:
587
727
  """
588
- Returns True if OpenCV is installed
728
+ Returns whether OpenCV is installed.
729
+
730
+ Returns:
731
+ bool: `True` if OpenCV is installed, `False` otherwise.
589
732
  """
590
733
  return bool(_CV2_AVAILABLE)
591
734
 
592
735
 
593
736
  def get_opencv_requirement() -> Requirement:
594
737
  """
595
- Return OpenCV requirement
738
+ Returns the OpenCV requirement.
739
+
740
+ Returns:
741
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
596
742
  """
597
743
  return "opencv", opencv_available(), _CV2_ERR_MSG
598
744
 
@@ -604,14 +750,20 @@ _PILLOW_ERR_MSG = f"pillow must be installed. {_GENERIC_ERR_MSG}"
604
750
 
605
751
  def pillow_available() -> bool:
606
752
  """
607
- Returns True if Pillow is installed
753
+ Returns whether Pillow is installed.
754
+
755
+ Returns:
756
+ bool: `True` if Pillow is installed, False otherwise.
608
757
  """
609
758
  return bool(_PILLOW_AVAILABLE)
610
759
 
611
760
 
612
761
  def get_pillow_requirement() -> Requirement:
613
762
  """
614
- Return OpenCV requirement
763
+ Returns the Pillow requirement.
764
+
765
+ Returns:
766
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
615
767
  """
616
768
  return "pillow", pillow_available(), _PILLOW_ERR_MSG
617
769
 
@@ -623,14 +775,20 @@ _PYPDFIUM2_ERR_MSG = f"pypdfium2 must be installed. {_GENERIC_ERR_MSG}"
623
775
 
624
776
  def pypdfium2_available() -> bool:
625
777
  """
626
- Returns True if pypdfium2 is installed
778
+ Returns whether `pypdfium2` is installed.
779
+
780
+ Returns:
781
+ bool: `True` if `pypdfium2` is installed, `False` otherwise.
627
782
  """
628
783
  return bool(_PYPDFIUM2_AVAILABLE)
629
784
 
630
785
 
631
786
  def get_pypdfium2_requirement() -> Requirement:
632
787
  """
633
- Return pypdfium2 requirement
788
+ Returns the `pypdfium2` requirement.
789
+
790
+ Returns:
791
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
634
792
  """
635
793
  return "pypdfium2", pypdfium2_available(), _PYPDFIUM2_ERR_MSG
636
794
 
@@ -642,7 +800,10 @@ _SPACY_ERR_MSG = f"SpaCy must be installed. {_GENERIC_ERR_MSG}"
642
800
 
643
801
  def spacy_available() -> bool:
644
802
  """
645
- Returns True if SpaCy is installed
803
+ Returns whether SpaCy is installed.
804
+
805
+ Returns:
806
+ bool: True if SpaCy is installed, False otherwise.
646
807
  """
647
808
 
648
809
  return bool(_SPACY_AVAILABLE)
@@ -650,20 +811,21 @@ def spacy_available() -> bool:
650
811
 
651
812
  def get_spacy_requirement() -> Requirement:
652
813
  """
653
- Return SpaCy requirement
814
+ Returns the SpaCy requirement.
815
+
816
+ Returns:
817
+ tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
654
818
  """
655
819
  return "spacy", spacy_available(), _SPACY_ERR_MSG
656
820
 
657
821
 
658
822
  def set_mp_spawn() -> None:
659
823
  """
660
- Sets multiprocessing method to "spawn".
824
+ Sets the multiprocessing method to "spawn".
661
825
 
662
- from https://github.com/tensorpack/tensorpack/blob/master/examples/FasterRCNN/train.py:
663
-
664
- "spawn/forkserver" is safer than the default "fork" method and
665
- produce more deterministic behavior & memory saving
666
- However its limitation is you cannot pass a lambda function to subprocesses.
826
+ Note:
827
+ "spawn/forkserver" is safer than the default "fork" method and produces more deterministic behavior and memory
828
+ saving. However, its limitation is that you cannot pass a lambda function to subprocesses.
667
829
  """
668
830
 
669
831
  if not _S.mp_context_set:
@@ -679,10 +841,23 @@ def set_mp_spawn() -> None:
679
841
  class _LazyModule(ModuleType):
680
842
  """
681
843
  Module class that surfaces all objects but only performs associated imports when the objects are requested.
844
+
845
+
846
+ Note:
847
+ This class is needed for autocompletion in an IDE.
682
848
  """
683
849
 
684
850
  @no_type_check
685
851
  def __init__(self, name, module_file, import_structure, module_spec=None, extra_objects=None):
852
+ """
853
+ Args:
854
+ name: The name of the module.
855
+ module_file: The file path of the module.
856
+ import_structure: The import structure dictionary.
857
+ module_spec: The module specification.
858
+ extra_objects: Additional objects to include.
859
+
860
+ """
686
861
  super().__init__(name)
687
862
  self._modules = set(import_structure.keys())
688
863
  self._class_to_module = {}