deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.1.dist-info/METADATA +0 -431
- deepdoctection-0.42.1.dist-info/RECORD +0 -148
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
|
@@ -41,14 +41,20 @@ _TF_ERR_MSG = f"Tensorflow must be installed. {_GENERIC_ERR_MSG}"
|
|
|
41
41
|
|
|
42
42
|
def tf_available() -> bool:
|
|
43
43
|
"""
|
|
44
|
-
Returns
|
|
44
|
+
Returns whether TensorFlow is installed.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
bool: True if TensorFlow is installed, False otherwise.
|
|
45
48
|
"""
|
|
46
49
|
return bool(_TF_AVAILABLE)
|
|
47
50
|
|
|
48
51
|
|
|
49
52
|
def get_tf_version() -> str:
|
|
50
53
|
"""
|
|
51
|
-
|
|
54
|
+
Determines the installed TensorFlow version.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
str: The installed TensorFlow version, or `0.0` if not installed.
|
|
52
58
|
"""
|
|
53
59
|
tf_version = "0.0"
|
|
54
60
|
if tf_available():
|
|
@@ -76,7 +82,10 @@ def get_tf_version() -> str:
|
|
|
76
82
|
|
|
77
83
|
def get_tensorflow_requirement() -> Requirement:
|
|
78
84
|
"""
|
|
79
|
-
Returns
|
|
85
|
+
Returns the TensorFlow requirement.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
80
89
|
"""
|
|
81
90
|
|
|
82
91
|
tf_requirement_satisfied = False
|
|
@@ -100,14 +109,20 @@ _TF_ADDONS_ERR_MSG = (
|
|
|
100
109
|
|
|
101
110
|
def tf_addons_available() -> bool:
|
|
102
111
|
"""
|
|
103
|
-
Returns
|
|
112
|
+
Returns whether `tensorflow_addons` is installed.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
bool: `True` if `tensorflow_addons` is installed, False otherwise.
|
|
104
116
|
"""
|
|
105
117
|
return bool(_TF_ADDONS_AVAILABLE)
|
|
106
118
|
|
|
107
119
|
|
|
108
120
|
def get_tf_addons_requirements() -> Requirement:
|
|
109
121
|
"""
|
|
110
|
-
Returns
|
|
122
|
+
Returns the `tensorflow_addons` requirement.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
111
126
|
"""
|
|
112
127
|
return "tensorflow-addons", tf_addons_available(), _TF_ADDONS_ERR_MSG
|
|
113
128
|
|
|
@@ -118,14 +133,20 @@ _TP_ERR_MSG = f"Tensorpack must be installed. {_GENERIC_ERR_MSG}"
|
|
|
118
133
|
|
|
119
134
|
def tensorpack_available() -> bool:
|
|
120
135
|
"""
|
|
121
|
-
Returns
|
|
136
|
+
Returns whether `tensorpack` is installed.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
bool: `True` if `tensorpack` is installed, False otherwise.
|
|
122
140
|
"""
|
|
123
141
|
return bool(_TP_AVAILABLE)
|
|
124
142
|
|
|
125
143
|
|
|
126
144
|
def get_tensorpack_requirement() -> Requirement:
|
|
127
145
|
"""
|
|
128
|
-
Returns
|
|
146
|
+
Returns the `tensorpack` requirement.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
129
150
|
"""
|
|
130
151
|
return "tensorpack", tensorpack_available(), _TP_ERR_MSG
|
|
131
152
|
|
|
@@ -137,14 +158,20 @@ _PYTORCH_ERR_MSG = f"Pytorch must be installed. {_GENERIC_ERR_MSG}"
|
|
|
137
158
|
|
|
138
159
|
def pytorch_available() -> bool:
|
|
139
160
|
"""
|
|
140
|
-
Returns
|
|
161
|
+
Returns whether PyTorch is installed.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
bool: True if PyTorch is installed, False otherwise.
|
|
141
165
|
"""
|
|
142
166
|
return bool(_PYTORCH_AVAILABLE)
|
|
143
167
|
|
|
144
168
|
|
|
145
169
|
def get_pytorch_requirement() -> Requirement:
|
|
146
170
|
"""
|
|
147
|
-
Returns
|
|
171
|
+
Returns the PyTorch requirement.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
148
175
|
"""
|
|
149
176
|
return "torch", pytorch_available(), _PYTORCH_ERR_MSG
|
|
150
177
|
|
|
@@ -156,14 +183,20 @@ _LXML_ERR_MSG = f"lxml must be installed. {_GENERIC_ERR_MSG}"
|
|
|
156
183
|
|
|
157
184
|
def lxml_available() -> bool:
|
|
158
185
|
"""
|
|
159
|
-
Returns
|
|
186
|
+
Returns whether `lxml` is installed.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
bool: True if `lxml` is installed, `False` otherwise.
|
|
160
190
|
"""
|
|
161
191
|
return bool(_LXML_AVAILABLE)
|
|
162
192
|
|
|
163
193
|
|
|
164
194
|
def get_lxml_requirement() -> Requirement:
|
|
165
195
|
"""
|
|
166
|
-
Returns lxml requirement
|
|
196
|
+
Returns the `lxml` requirement.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
167
200
|
"""
|
|
168
201
|
return "lxml", lxml_available(), _LXML_ERR_MSG
|
|
169
202
|
|
|
@@ -175,14 +208,20 @@ _APTED_ERR_MSG = f"apted must be installed. {_GENERIC_ERR_MSG}"
|
|
|
175
208
|
|
|
176
209
|
def apted_available() -> bool:
|
|
177
210
|
"""
|
|
178
|
-
Returns
|
|
211
|
+
Returns whether `apted` is available.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
bool: `True` if `apted` is available, False otherwise.
|
|
179
215
|
"""
|
|
180
216
|
return bool(_APTED_AVAILABLE)
|
|
181
217
|
|
|
182
218
|
|
|
183
219
|
def get_apted_requirement() -> Requirement:
|
|
184
220
|
"""
|
|
185
|
-
Returns
|
|
221
|
+
Returns the `apted` requirement.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
186
225
|
"""
|
|
187
226
|
return "apted", apted_available(), _TRANSFORMERS_ERR_MSG
|
|
188
227
|
|
|
@@ -194,14 +233,20 @@ _DISTANCE_ERR_MSG = f"distance must be installed. {_GENERIC_ERR_MSG}"
|
|
|
194
233
|
|
|
195
234
|
def distance_available() -> bool:
|
|
196
235
|
"""
|
|
197
|
-
Returns
|
|
236
|
+
Returns whether `distance` is available.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
bool: `True` if `distance` is available, False otherwise.
|
|
198
240
|
"""
|
|
199
241
|
return bool(_DISTANCE_AVAILABLE)
|
|
200
242
|
|
|
201
243
|
|
|
202
244
|
def get_distance_requirement() -> Requirement:
|
|
203
245
|
"""
|
|
204
|
-
Returns distance requirement
|
|
246
|
+
Returns the `distance` requirement.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
205
250
|
"""
|
|
206
251
|
return "distance", distance_available(), _DISTANCE_ERR_MSG
|
|
207
252
|
|
|
@@ -213,14 +258,20 @@ _TRANSFORMERS_ERR_MSG = f"transformers must be installed. {_GENERIC_ERR_MSG}"
|
|
|
213
258
|
|
|
214
259
|
def transformers_available() -> bool:
|
|
215
260
|
"""
|
|
216
|
-
Returns
|
|
261
|
+
Returns whether HuggingFace Transformers is installed.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
bool: `True` if Transformers is installed, False otherwise.
|
|
217
265
|
"""
|
|
218
266
|
return bool(_TRANSFORMERS_AVAILABLE)
|
|
219
267
|
|
|
220
268
|
|
|
221
269
|
def get_transformers_requirement() -> Requirement:
|
|
222
270
|
"""
|
|
223
|
-
Returns
|
|
271
|
+
Returns the HuggingFace Transformers requirement.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
224
275
|
"""
|
|
225
276
|
return "transformers", transformers_available(), _TRANSFORMERS_ERR_MSG
|
|
226
277
|
|
|
@@ -235,14 +286,20 @@ _DETECTRON2_ERR_MSG = (
|
|
|
235
286
|
|
|
236
287
|
def detectron2_available() -> bool:
|
|
237
288
|
"""
|
|
238
|
-
Returns
|
|
289
|
+
Returns whether `detectron2` is installed.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
bool: True if `detectron2` is installed, False otherwise.
|
|
239
293
|
"""
|
|
240
294
|
return bool(_DETECTRON2_AVAILABLE)
|
|
241
295
|
|
|
242
296
|
|
|
243
297
|
def get_detectron2_requirement() -> Requirement:
|
|
244
298
|
"""
|
|
245
|
-
Returns
|
|
299
|
+
Returns the `detectron2` requirement.
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
246
303
|
"""
|
|
247
304
|
return "detectron2", detectron2_available(), _DETECTRON2_ERR_MSG
|
|
248
305
|
|
|
@@ -258,10 +315,14 @@ _TESS_ERR_MSG = (
|
|
|
258
315
|
|
|
259
316
|
|
|
260
317
|
def set_tesseract_path(tesseract_path: PathLikeOrStr) -> None:
|
|
261
|
-
"""
|
|
262
|
-
|
|
318
|
+
"""
|
|
319
|
+
Sets the Tesseract path.
|
|
263
320
|
|
|
264
|
-
|
|
321
|
+
Note:
|
|
322
|
+
If you have Tesseract installed in Anaconda, you can use this function to set the Tesseract path.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
tesseract_path: The Tesseract installation path.
|
|
265
326
|
"""
|
|
266
327
|
|
|
267
328
|
global _TESS_AVAILABLE # pylint: disable=W0603
|
|
@@ -286,7 +347,13 @@ def tesseract_available() -> bool:
|
|
|
286
347
|
|
|
287
348
|
def get_tesseract_version() -> Union[int, version.Version]:
|
|
288
349
|
"""
|
|
289
|
-
Returns
|
|
350
|
+
Returns the version of the installed Tesseract.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
int or packaging.version.Version: The Tesseract version if installed and `>= 4.0`, otherwise `0`.
|
|
354
|
+
|
|
355
|
+
Note:
|
|
356
|
+
The minimum required version is `3.05`.
|
|
290
357
|
"""
|
|
291
358
|
try:
|
|
292
359
|
output = subprocess.check_output(
|
|
@@ -311,7 +378,13 @@ def get_tesseract_version() -> Union[int, version.Version]:
|
|
|
311
378
|
|
|
312
379
|
def get_tesseract_requirement() -> Requirement:
|
|
313
380
|
"""
|
|
314
|
-
Returns Tesseract requirement.
|
|
381
|
+
Returns the Tesseract requirement.
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
385
|
+
|
|
386
|
+
Note:
|
|
387
|
+
The minimum required version is `3.05`.
|
|
315
388
|
"""
|
|
316
389
|
if get_tesseract_version():
|
|
317
390
|
return "tesseract", True, _TESS_ERR_MSG
|
|
@@ -326,21 +399,30 @@ _POPPLER_ERR_MSG = "Poppler cannot be found. Please check that Poppler is instal
|
|
|
326
399
|
|
|
327
400
|
def pdf_to_ppm_available() -> bool:
|
|
328
401
|
"""
|
|
329
|
-
Returns
|
|
402
|
+
Returns whether `pdftoppm` is installed.
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
bool: True if `pdftoppm` is installed, False otherwise.
|
|
330
406
|
"""
|
|
331
407
|
return bool(_PDF_TO_PPM_AVAILABLE)
|
|
332
408
|
|
|
333
409
|
|
|
334
410
|
def pdf_to_cairo_available() -> bool:
|
|
335
411
|
"""
|
|
336
|
-
Returns
|
|
412
|
+
Returns whether `pdftocairo` is installed.
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
bool: `True` if `pdftocairo` is installed, `False` otherwise.
|
|
337
416
|
"""
|
|
338
417
|
return bool(_PDF_TO_CAIRO_AVAILABLE)
|
|
339
418
|
|
|
340
419
|
|
|
341
420
|
def get_poppler_version() -> Union[int, version.Version]:
|
|
342
421
|
"""
|
|
343
|
-
Returns
|
|
422
|
+
Returns the version of the installed Poppler utility.
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
int or packaging.version.Version: The Poppler version if installed, otherwise 0.
|
|
344
426
|
"""
|
|
345
427
|
|
|
346
428
|
if pdf_to_ppm_available():
|
|
@@ -367,7 +449,10 @@ def get_poppler_version() -> Union[int, version.Version]:
|
|
|
367
449
|
|
|
368
450
|
def get_poppler_requirement() -> Requirement:
|
|
369
451
|
"""
|
|
370
|
-
Returns Poppler requirement.
|
|
452
|
+
Returns the Poppler requirement.
|
|
453
|
+
|
|
454
|
+
Returns:
|
|
455
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
371
456
|
"""
|
|
372
457
|
if get_poppler_version():
|
|
373
458
|
return "poppler", True, _POPPLER_ERR_MSG
|
|
@@ -381,14 +466,20 @@ _PDFPLUMBER_ERR_MSG = f"pdfplumber must be installed. {_GENERIC_ERR_MSG}"
|
|
|
381
466
|
|
|
382
467
|
def pdfplumber_available() -> bool:
|
|
383
468
|
"""
|
|
384
|
-
Returns
|
|
469
|
+
Returns whether `pdfplumber` is installed.
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
bool: `True` if `pdfplumber` is installed, False otherwise.
|
|
385
473
|
"""
|
|
386
474
|
return bool(_PDFPLUMBER_AVAILABLE)
|
|
387
475
|
|
|
388
476
|
|
|
389
477
|
def get_pdfplumber_requirement() -> Requirement:
|
|
390
478
|
"""
|
|
391
|
-
Returns pdfplumber requirement.
|
|
479
|
+
Returns the `pdfplumber` requirement.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
392
483
|
"""
|
|
393
484
|
return "pdfplumber", pdfplumber_available(), _PDFPLUMBER_ERR_MSG
|
|
394
485
|
|
|
@@ -400,14 +491,20 @@ _COCOTOOLS_ERR_MSG = f"pycocotools must be installed. {_GENERIC_ERR_MSG}"
|
|
|
400
491
|
|
|
401
492
|
def cocotools_available() -> bool:
|
|
402
493
|
"""
|
|
403
|
-
Returns
|
|
494
|
+
Returns whether `pycocotools` is installed.
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
bool: `True` if `pycocotools` is installed, `False` otherwise.
|
|
404
498
|
"""
|
|
405
499
|
return bool(_COCOTOOLS_AVAILABLE)
|
|
406
500
|
|
|
407
501
|
|
|
408
502
|
def get_cocotools_requirement() -> Requirement:
|
|
409
503
|
"""
|
|
410
|
-
Returns
|
|
504
|
+
Returns the `pycocotools` requirement.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
411
508
|
"""
|
|
412
509
|
return "pycocotools", cocotools_available(), _COCOTOOLS_ERR_MSG
|
|
413
510
|
|
|
@@ -418,7 +515,10 @@ _SCIPY_AVAILABLE = importlib.util.find_spec("scipy") is not None
|
|
|
418
515
|
|
|
419
516
|
def scipy_available() -> bool:
|
|
420
517
|
"""
|
|
421
|
-
Returns
|
|
518
|
+
Returns whether `scipy` is installed.
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
bool: `True` if `scipy` is installed, `False` otherwise.
|
|
422
522
|
"""
|
|
423
523
|
return bool(_SCIPY_AVAILABLE)
|
|
424
524
|
|
|
@@ -430,14 +530,20 @@ _JDESKEW_ERR_MSG = f"jdeskew must be installed. {_GENERIC_ERR_MSG}"
|
|
|
430
530
|
|
|
431
531
|
def jdeskew_available() -> bool:
|
|
432
532
|
"""
|
|
433
|
-
Returns
|
|
533
|
+
Returns whether `jdeskew` is installed.
|
|
534
|
+
|
|
535
|
+
Returns:
|
|
536
|
+
bool: `True` if `jdeskew` is installed, `False` otherwise.
|
|
434
537
|
"""
|
|
435
538
|
return bool(_JDESKEW_AVAILABLE)
|
|
436
539
|
|
|
437
540
|
|
|
438
541
|
def get_jdeskew_requirement() -> Requirement:
|
|
439
542
|
"""
|
|
440
|
-
Returns jdeskew requirement.
|
|
543
|
+
Returns the `jdeskew` requirement.
|
|
544
|
+
|
|
545
|
+
Returns:
|
|
546
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
441
547
|
"""
|
|
442
548
|
return "jdeskew", jdeskew_available(), _JDESKEW_ERR_MSG
|
|
443
549
|
|
|
@@ -449,14 +555,20 @@ _SKLEARN_ERR_MSG = f"scikit-learn must be installed. {_GENERIC_ERR_MSG}"
|
|
|
449
555
|
|
|
450
556
|
def sklearn_available() -> bool:
|
|
451
557
|
"""
|
|
452
|
-
Returns
|
|
558
|
+
Returns whether `sklearn` is installed.
|
|
559
|
+
|
|
560
|
+
Returns:
|
|
561
|
+
bool: `True` if `sklearn` is installed, `False` otherwise.
|
|
453
562
|
"""
|
|
454
563
|
return bool(_SKLEARN_AVAILABLE)
|
|
455
564
|
|
|
456
565
|
|
|
457
566
|
def get_sklearn_requirement() -> Requirement:
|
|
458
567
|
"""
|
|
459
|
-
Returns sklearn requirement.
|
|
568
|
+
Returns the `sklearn` requirement.
|
|
569
|
+
|
|
570
|
+
Returns:
|
|
571
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
460
572
|
"""
|
|
461
573
|
return "sklearn", sklearn_available(), _SKLEARN_ERR_MSG
|
|
462
574
|
|
|
@@ -467,7 +579,10 @@ _QPDF_AVAILABLE = which("qpdf") is not None
|
|
|
467
579
|
|
|
468
580
|
def qpdf_available() -> bool:
|
|
469
581
|
"""
|
|
470
|
-
Returns
|
|
582
|
+
Returns whether `qpdf` is installed.
|
|
583
|
+
|
|
584
|
+
Returns:
|
|
585
|
+
bool: `True` if `qpdf` is installed, `False` otherwise.
|
|
471
586
|
"""
|
|
472
587
|
return bool(_QPDF_AVAILABLE)
|
|
473
588
|
|
|
@@ -482,7 +597,10 @@ _AWS_ERR_MSG = "AWS CLI must be installed https://docs.aws.amazon.com/cli/latest
|
|
|
482
597
|
|
|
483
598
|
def boto3_available() -> bool:
|
|
484
599
|
"""
|
|
485
|
-
Returns
|
|
600
|
+
Returns whether `boto3` is installed.
|
|
601
|
+
|
|
602
|
+
Returns:
|
|
603
|
+
bool: `True` if `boto3` is installed, `False` otherwise.
|
|
486
604
|
"""
|
|
487
605
|
|
|
488
606
|
return bool(_BOTO3_AVAILABLE)
|
|
@@ -490,21 +608,30 @@ def boto3_available() -> bool:
|
|
|
490
608
|
|
|
491
609
|
def get_boto3_requirement() -> Requirement:
|
|
492
610
|
"""
|
|
493
|
-
|
|
611
|
+
Returns the `boto3` requirement.
|
|
612
|
+
|
|
613
|
+
Returns:
|
|
614
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
494
615
|
"""
|
|
495
616
|
return "boto3", boto3_available(), _BOTO3_ERR_MSG
|
|
496
617
|
|
|
497
618
|
|
|
498
619
|
def aws_available() -> bool:
|
|
499
620
|
"""
|
|
500
|
-
Returns
|
|
621
|
+
Returns whether AWS CLI is installed.
|
|
622
|
+
|
|
623
|
+
Returns:
|
|
624
|
+
bool: `True` if AWS CLI is installed, `False` otherwise.
|
|
501
625
|
"""
|
|
502
626
|
return bool(_AWS_CLI_AVAILABLE)
|
|
503
627
|
|
|
504
628
|
|
|
505
629
|
def get_aws_requirement() -> Requirement:
|
|
506
630
|
"""
|
|
507
|
-
|
|
631
|
+
Returns the AWS CLI requirement.
|
|
632
|
+
|
|
633
|
+
Returns:
|
|
634
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
508
635
|
"""
|
|
509
636
|
return "aws", aws_available(), _AWS_ERR_MSG
|
|
510
637
|
|
|
@@ -516,14 +643,24 @@ _DOCTR_ERR_MSG = f"DocTr must be installed. {_GENERIC_ERR_MSG}"
|
|
|
516
643
|
|
|
517
644
|
def doctr_available() -> bool:
|
|
518
645
|
"""
|
|
519
|
-
Returns
|
|
646
|
+
Returns whether `doctr` is installed.
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
bool: `True` if `doctr` is installed, `False` otherwise.
|
|
520
650
|
"""
|
|
521
651
|
return bool(_DOCTR_AVAILABLE)
|
|
522
652
|
|
|
523
653
|
|
|
524
654
|
def get_doctr_requirement() -> Requirement:
|
|
525
655
|
"""
|
|
526
|
-
|
|
656
|
+
Returns the `doctr` requirement.
|
|
657
|
+
|
|
658
|
+
Returns:
|
|
659
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
660
|
+
|
|
661
|
+
Note:
|
|
662
|
+
On macOS, if `poppler` is not available, this function will recursively check the requirement.
|
|
663
|
+
It is not yet known how to check whether `pango`, `gdk-pixbuf`, and `libffi` are installed.
|
|
527
664
|
"""
|
|
528
665
|
if sys.platform == "darwin":
|
|
529
666
|
if not get_poppler_version():
|
|
@@ -542,14 +679,20 @@ _FASTTEXT_ERR_MSG = f"fasttext must be installed. {_GENERIC_ERR_MSG}"
|
|
|
542
679
|
|
|
543
680
|
def fasttext_available() -> bool:
|
|
544
681
|
"""
|
|
545
|
-
Returns
|
|
682
|
+
Returns whether `fasttext` is installed.
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
bool: `True` if `fasttext` is installed, False otherwise.
|
|
546
686
|
"""
|
|
547
687
|
return bool(_FASTTEXT_AVAILABLE)
|
|
548
688
|
|
|
549
689
|
|
|
550
690
|
def get_fasttext_requirement() -> Requirement:
|
|
551
691
|
"""
|
|
552
|
-
|
|
692
|
+
Returns the `fasttext` requirement.
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
553
696
|
"""
|
|
554
697
|
return "fasttext", fasttext_available(), _FASTTEXT_ERR_MSG
|
|
555
698
|
|
|
@@ -561,14 +704,20 @@ _WANDB_ERR_MSG = f"WandB must be installed. {_GENERIC_ERR_MSG}"
|
|
|
561
704
|
|
|
562
705
|
def wandb_available() -> bool:
|
|
563
706
|
"""
|
|
564
|
-
Returns
|
|
707
|
+
Returns whether the W&B package `wandb` is installed.
|
|
708
|
+
|
|
709
|
+
Returns:
|
|
710
|
+
bool: `True` if `wandb` is installed, `False` otherwise.
|
|
565
711
|
"""
|
|
566
712
|
return bool(_WANDB_AVAILABLE)
|
|
567
713
|
|
|
568
714
|
|
|
569
715
|
def get_wandb_requirement() -> Requirement:
|
|
570
716
|
"""
|
|
571
|
-
|
|
717
|
+
Returns the W&B requirement.
|
|
718
|
+
|
|
719
|
+
Returns:
|
|
720
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
572
721
|
"""
|
|
573
722
|
return "wandb", wandb_available(), _WANDB_ERR_MSG
|
|
574
723
|
|
|
@@ -585,14 +734,20 @@ _CV2_ERR_MSG = f"OpenCV must be installed. {_GENERIC_ERR_MSG}"
|
|
|
585
734
|
|
|
586
735
|
def opencv_available() -> bool:
|
|
587
736
|
"""
|
|
588
|
-
Returns
|
|
737
|
+
Returns whether OpenCV is installed.
|
|
738
|
+
|
|
739
|
+
Returns:
|
|
740
|
+
bool: `True` if OpenCV is installed, `False` otherwise.
|
|
589
741
|
"""
|
|
590
742
|
return bool(_CV2_AVAILABLE)
|
|
591
743
|
|
|
592
744
|
|
|
593
745
|
def get_opencv_requirement() -> Requirement:
|
|
594
746
|
"""
|
|
595
|
-
|
|
747
|
+
Returns the OpenCV requirement.
|
|
748
|
+
|
|
749
|
+
Returns:
|
|
750
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
596
751
|
"""
|
|
597
752
|
return "opencv", opencv_available(), _CV2_ERR_MSG
|
|
598
753
|
|
|
@@ -604,14 +759,20 @@ _PILLOW_ERR_MSG = f"pillow must be installed. {_GENERIC_ERR_MSG}"
|
|
|
604
759
|
|
|
605
760
|
def pillow_available() -> bool:
|
|
606
761
|
"""
|
|
607
|
-
Returns
|
|
762
|
+
Returns whether Pillow is installed.
|
|
763
|
+
|
|
764
|
+
Returns:
|
|
765
|
+
bool: `True` if Pillow is installed, False otherwise.
|
|
608
766
|
"""
|
|
609
767
|
return bool(_PILLOW_AVAILABLE)
|
|
610
768
|
|
|
611
769
|
|
|
612
770
|
def get_pillow_requirement() -> Requirement:
|
|
613
771
|
"""
|
|
614
|
-
|
|
772
|
+
Returns the Pillow requirement.
|
|
773
|
+
|
|
774
|
+
Returns:
|
|
775
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
615
776
|
"""
|
|
616
777
|
return "pillow", pillow_available(), _PILLOW_ERR_MSG
|
|
617
778
|
|
|
@@ -623,14 +784,20 @@ _PYPDFIUM2_ERR_MSG = f"pypdfium2 must be installed. {_GENERIC_ERR_MSG}"
|
|
|
623
784
|
|
|
624
785
|
def pypdfium2_available() -> bool:
|
|
625
786
|
"""
|
|
626
|
-
Returns
|
|
787
|
+
Returns whether `pypdfium2` is installed.
|
|
788
|
+
|
|
789
|
+
Returns:
|
|
790
|
+
bool: `True` if `pypdfium2` is installed, `False` otherwise.
|
|
627
791
|
"""
|
|
628
792
|
return bool(_PYPDFIUM2_AVAILABLE)
|
|
629
793
|
|
|
630
794
|
|
|
631
795
|
def get_pypdfium2_requirement() -> Requirement:
|
|
632
796
|
"""
|
|
633
|
-
|
|
797
|
+
Returns the `pypdfium2` requirement.
|
|
798
|
+
|
|
799
|
+
Returns:
|
|
800
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
634
801
|
"""
|
|
635
802
|
return "pypdfium2", pypdfium2_available(), _PYPDFIUM2_ERR_MSG
|
|
636
803
|
|
|
@@ -642,7 +809,10 @@ _SPACY_ERR_MSG = f"SpaCy must be installed. {_GENERIC_ERR_MSG}"
|
|
|
642
809
|
|
|
643
810
|
def spacy_available() -> bool:
|
|
644
811
|
"""
|
|
645
|
-
Returns
|
|
812
|
+
Returns whether SpaCy is installed.
|
|
813
|
+
|
|
814
|
+
Returns:
|
|
815
|
+
bool: True if SpaCy is installed, False otherwise.
|
|
646
816
|
"""
|
|
647
817
|
|
|
648
818
|
return bool(_SPACY_AVAILABLE)
|
|
@@ -650,20 +820,21 @@ def spacy_available() -> bool:
|
|
|
650
820
|
|
|
651
821
|
def get_spacy_requirement() -> Requirement:
|
|
652
822
|
"""
|
|
653
|
-
|
|
823
|
+
Returns the SpaCy requirement.
|
|
824
|
+
|
|
825
|
+
Returns:
|
|
826
|
+
tuple: A tuple containing the package name, whether the requirement is satisfied, and an error message.
|
|
654
827
|
"""
|
|
655
828
|
return "spacy", spacy_available(), _SPACY_ERR_MSG
|
|
656
829
|
|
|
657
830
|
|
|
658
831
|
def set_mp_spawn() -> None:
|
|
659
832
|
"""
|
|
660
|
-
Sets multiprocessing method to "spawn".
|
|
833
|
+
Sets the multiprocessing method to "spawn".
|
|
661
834
|
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
produce more deterministic behavior & memory saving
|
|
666
|
-
However its limitation is you cannot pass a lambda function to subprocesses.
|
|
835
|
+
Note:
|
|
836
|
+
"spawn/forkserver" is safer than the default "fork" method and produces more deterministic behavior and memory
|
|
837
|
+
saving. However, its limitation is that you cannot pass a lambda function to subprocesses.
|
|
667
838
|
"""
|
|
668
839
|
|
|
669
840
|
if not _S.mp_context_set:
|
|
@@ -679,10 +850,23 @@ def set_mp_spawn() -> None:
|
|
|
679
850
|
class _LazyModule(ModuleType):
|
|
680
851
|
"""
|
|
681
852
|
Module class that surfaces all objects but only performs associated imports when the objects are requested.
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
Note:
|
|
856
|
+
This class is needed for autocompletion in an IDE.
|
|
682
857
|
"""
|
|
683
858
|
|
|
684
859
|
@no_type_check
|
|
685
860
|
def __init__(self, name, module_file, import_structure, module_spec=None, extra_objects=None):
|
|
861
|
+
"""
|
|
862
|
+
Args:
|
|
863
|
+
name: The name of the module.
|
|
864
|
+
module_file: The file path of the module.
|
|
865
|
+
import_structure: The import structure dictionary.
|
|
866
|
+
module_spec: The module specification.
|
|
867
|
+
extra_objects: Additional objects to include.
|
|
868
|
+
|
|
869
|
+
"""
|
|
686
870
|
super().__init__(name)
|
|
687
871
|
self._modules = set(import_structure.keys())
|
|
688
872
|
self._class_to_module = {}
|