deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +16 -29
- deepdoctection/analyzer/dd.py +70 -59
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +41 -56
- deepdoctection/datapoint/box.py +9 -8
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +56 -44
- deepdoctection/datapoint/view.py +245 -150
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +35 -26
- deepdoctection/datasets/base.py +14 -12
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +24 -26
- deepdoctection/datasets/instances/doclaynet.py +51 -51
- deepdoctection/datasets/instances/fintabnet.py +46 -46
- deepdoctection/datasets/instances/funsd.py +25 -24
- deepdoctection/datasets/instances/iiitar13k.py +13 -10
- deepdoctection/datasets/instances/layouttest.py +4 -3
- deepdoctection/datasets/instances/publaynet.py +5 -5
- deepdoctection/datasets/instances/pubtables1m.py +24 -21
- deepdoctection/datasets/instances/pubtabnet.py +32 -30
- deepdoctection/datasets/instances/rvlcdip.py +30 -30
- deepdoctection/datasets/instances/xfund.py +26 -26
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +15 -13
- deepdoctection/eval/eval.py +41 -37
- deepdoctection/eval/tedsmetric.py +30 -23
- deepdoctection/eval/tp_eval_callback.py +16 -19
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +85 -113
- deepdoctection/extern/deskew.py +14 -11
- deepdoctection/extern/doctrocr.py +141 -130
- deepdoctection/extern/fastlang.py +27 -18
- deepdoctection/extern/hfdetr.py +71 -62
- deepdoctection/extern/hflayoutlm.py +504 -211
- deepdoctection/extern/hflm.py +230 -0
- deepdoctection/extern/model.py +488 -302
- deepdoctection/extern/pdftext.py +23 -19
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +29 -19
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +18 -18
- deepdoctection/extern/tp/tfutils.py +57 -9
- deepdoctection/extern/tp/tpcompat.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +45 -53
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/cats.py +27 -29
- deepdoctection/mapper/cocostruct.py +10 -10
- deepdoctection/mapper/d2struct.py +27 -26
- deepdoctection/mapper/hfstruct.py +13 -8
- deepdoctection/mapper/laylmstruct.py +178 -37
- deepdoctection/mapper/maputils.py +12 -11
- deepdoctection/mapper/match.py +2 -2
- deepdoctection/mapper/misc.py +11 -9
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +5 -5
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +5 -5
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +12 -14
- deepdoctection/pipe/base.py +52 -106
- deepdoctection/pipe/common.py +72 -59
- deepdoctection/pipe/concurrency.py +16 -11
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +20 -16
- deepdoctection/pipe/lm.py +75 -105
- deepdoctection/pipe/order.py +194 -89
- deepdoctection/pipe/refine.py +111 -124
- deepdoctection/pipe/segment.py +156 -161
- deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +48 -41
- deepdoctection/train/hf_detr_train.py +41 -30
- deepdoctection/train/hf_layoutlm_train.py +153 -135
- deepdoctection/train/tp_frcnn_train.py +32 -31
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +87 -125
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +22 -18
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +16 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +11 -11
- deepdoctection/utils/settings.py +185 -181
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +74 -72
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
- deepdoctection-0.33.dist-info/RECORD +146 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.31.dist-info/RECORD +0 -144
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
deepdoctection/utils/env_info.py
CHANGED
|
@@ -46,16 +46,16 @@ can store an (absolute) path to a `.jsonl` file.
|
|
|
46
46
|
|
|
47
47
|
"""
|
|
48
48
|
|
|
49
|
-
import ast
|
|
50
49
|
import importlib
|
|
51
50
|
import os
|
|
52
51
|
import re
|
|
53
52
|
import subprocess
|
|
54
53
|
import sys
|
|
55
54
|
from collections import defaultdict
|
|
56
|
-
from typing import
|
|
55
|
+
from typing import Optional
|
|
57
56
|
|
|
58
57
|
import numpy as np
|
|
58
|
+
from packaging import version
|
|
59
59
|
from tabulate import tabulate
|
|
60
60
|
|
|
61
61
|
from .file_utils import (
|
|
@@ -68,6 +68,7 @@ from .file_utils import (
|
|
|
68
68
|
fasttext_available,
|
|
69
69
|
get_poppler_version,
|
|
70
70
|
get_tesseract_version,
|
|
71
|
+
get_tf_version,
|
|
71
72
|
jdeskew_available,
|
|
72
73
|
lxml_available,
|
|
73
74
|
opencv_available,
|
|
@@ -85,17 +86,14 @@ from .file_utils import (
|
|
|
85
86
|
wandb_available,
|
|
86
87
|
)
|
|
87
88
|
from .logger import LoggingRecord, logger
|
|
89
|
+
from .types import KeyValEnvInfos, PathLikeOrStr
|
|
88
90
|
|
|
89
|
-
__all__ = [
|
|
90
|
-
"collect_torch_env",
|
|
91
|
-
"collect_env_info",
|
|
92
|
-
"get_device",
|
|
93
|
-
"auto_select_lib_and_device",
|
|
94
|
-
"auto_select_viz_library",
|
|
95
|
-
]
|
|
91
|
+
__all__ = ["collect_env_info", "auto_select_viz_library", "ENV_VARS_TRUE"]
|
|
96
92
|
|
|
97
93
|
# pylint: disable=import-outside-toplevel
|
|
98
94
|
|
|
95
|
+
ENV_VARS_TRUE: set[str] = {"1", "True", "TRUE", "true", "yes"}
|
|
96
|
+
|
|
99
97
|
|
|
100
98
|
def collect_torch_env() -> str:
|
|
101
99
|
"""Wrapper for torch.utils.collect_env.get_pretty_env_info"""
|
|
@@ -110,7 +108,7 @@ def collect_torch_env() -> str:
|
|
|
110
108
|
return get_pretty_env_info()
|
|
111
109
|
|
|
112
110
|
|
|
113
|
-
def collect_installed_dependencies(data:
|
|
111
|
+
def collect_installed_dependencies(data: KeyValEnvInfos) -> KeyValEnvInfos:
|
|
114
112
|
"""Collect installed dependencies for all third party libraries.
|
|
115
113
|
|
|
116
114
|
:param data: A list of tuples to dump all collected package information such as the name and the version
|
|
@@ -235,7 +233,7 @@ def collect_installed_dependencies(data: List[Tuple[str, str]]) -> List[Tuple[st
|
|
|
235
233
|
return data
|
|
236
234
|
|
|
237
235
|
|
|
238
|
-
def detect_compute_compatibility(cuda_home: Optional[
|
|
236
|
+
def detect_compute_compatibility(cuda_home: Optional[PathLikeOrStr], so_file: Optional[PathLikeOrStr]) -> str:
|
|
239
237
|
"""
|
|
240
238
|
Detect the compute compatibility of a CUDA library.
|
|
241
239
|
|
|
@@ -261,7 +259,7 @@ def detect_compute_compatibility(cuda_home: Optional[str], so_file: Optional[str
|
|
|
261
259
|
|
|
262
260
|
|
|
263
261
|
# Copied from https://github.com/tensorpack/tensorpack/blob/master/tensorpack/tfutils/collect_env.py
|
|
264
|
-
def tf_info(data:
|
|
262
|
+
def tf_info(data: KeyValEnvInfos) -> KeyValEnvInfos:
|
|
265
263
|
"""Returns a list of (key, value) pairs containing tensorflow information.
|
|
266
264
|
|
|
267
265
|
:param data: A list of tuples to dump all collected package information such as the name and the version
|
|
@@ -270,21 +268,42 @@ def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
|
|
270
268
|
if tf_available():
|
|
271
269
|
import tensorflow as tf # type: ignore # pylint: disable=E0401
|
|
272
270
|
|
|
271
|
+
os.environ["TENSORFLOW_AVAILABLE"] = "1"
|
|
272
|
+
|
|
273
273
|
data.append(("Tensorflow", tf.__version__))
|
|
274
|
+
if version.parse(get_tf_version()) > version.parse("2.4.1"):
|
|
275
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
|
|
276
|
+
try:
|
|
277
|
+
import tensorflow.python.util.deprecation as deprecation # type: ignore # pylint: disable=E0401,R0402,E0611
|
|
278
|
+
|
|
279
|
+
deprecation._PRINT_DEPRECATION_WARNINGS = False # pylint: disable=W0212
|
|
280
|
+
except Exception: # pylint: disable=W0703
|
|
281
|
+
try:
|
|
282
|
+
from tensorflow.python.util import deprecation # type: ignore # pylint: disable=E0401,E0611
|
|
283
|
+
|
|
284
|
+
deprecation._PRINT_DEPRECATION_WARNINGS = False # pylint: disable=W0212
|
|
285
|
+
except Exception: # pylint: disable=W0703
|
|
286
|
+
pass
|
|
274
287
|
else:
|
|
275
288
|
data.append(("Tensorflow", "None"))
|
|
276
289
|
return data
|
|
277
290
|
|
|
278
|
-
from tensorflow.python.platform import build_info # type: ignore # pylint: disable=E0401
|
|
291
|
+
from tensorflow.python.platform import build_info # type: ignore # pylint: disable=E0401,E0611
|
|
279
292
|
|
|
280
293
|
try:
|
|
281
294
|
for key, value in list(build_info.build_info.items()):
|
|
282
|
-
if key == "
|
|
295
|
+
if key == "is_cuda_build":
|
|
296
|
+
data.append(("TF compiled with CUDA", value))
|
|
297
|
+
if value and len(tf.config.list_physical_devices("GPU")):
|
|
298
|
+
os.environ["USE_CUDA"] = "1"
|
|
299
|
+
elif key == "cuda_version":
|
|
283
300
|
data.append(("TF built with CUDA", value))
|
|
284
301
|
elif key == "cudnn_version":
|
|
285
302
|
data.append(("TF built with CUDNN", value))
|
|
286
303
|
elif key == "cuda_compute_capabilities":
|
|
287
304
|
data.append(("TF compute capabilities", ",".join([k.replace("compute_", "") for k in value])))
|
|
305
|
+
elif key == "is_rocm_build":
|
|
306
|
+
data.append(("TF compiled with ROCM", value))
|
|
288
307
|
return data
|
|
289
308
|
except AttributeError:
|
|
290
309
|
pass
|
|
@@ -297,7 +316,7 @@ def tf_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
|
|
297
316
|
|
|
298
317
|
|
|
299
318
|
# Heavily inspired by https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/collect_env.py
|
|
300
|
-
def pt_info(data:
|
|
319
|
+
def pt_info(data: KeyValEnvInfos) -> KeyValEnvInfos:
|
|
301
320
|
"""Returns a list of (key, value) pairs containing Pytorch information.
|
|
302
321
|
|
|
303
322
|
:param data: A list of tuples to dump all collected package information such as the name and the version
|
|
@@ -306,6 +325,13 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
|
|
306
325
|
|
|
307
326
|
if pytorch_available():
|
|
308
327
|
import torch
|
|
328
|
+
|
|
329
|
+
os.environ["PYTORCH_AVAILABLE"] = "1"
|
|
330
|
+
|
|
331
|
+
else:
|
|
332
|
+
data.append(("PyTorch", "None"))
|
|
333
|
+
return []
|
|
334
|
+
|
|
309
335
|
has_gpu = torch.cuda.is_available() # true for both CUDA & ROCM
|
|
310
336
|
has_mps = torch.backends.mps.is_available()
|
|
311
337
|
|
|
@@ -331,12 +357,9 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
|
|
331
357
|
data.append(("PyTorch", torch_version + " @" + os.path.dirname(torch.__file__)))
|
|
332
358
|
data.append(("PyTorch debug build", str(torch.version.debug)))
|
|
333
359
|
|
|
334
|
-
if not has_gpu:
|
|
335
|
-
has_gpu_text = "No: torch.cuda.is_available() == False"
|
|
336
|
-
else:
|
|
337
|
-
has_gpu_text = "Yes"
|
|
338
|
-
data.append(("GPU available", has_gpu_text))
|
|
339
360
|
if has_gpu:
|
|
361
|
+
os.environ["USE_CUDA"] = "1"
|
|
362
|
+
has_gpu_text = "Yes"
|
|
340
363
|
devices = defaultdict(list)
|
|
341
364
|
for k in range(torch.cuda.device_count()):
|
|
342
365
|
cap = ".".join((str(x) for x in torch.cuda.get_device_capability(k)))
|
|
@@ -362,6 +385,10 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
|
|
362
385
|
cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
|
|
363
386
|
if cuda_arch_list:
|
|
364
387
|
data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
|
|
388
|
+
else:
|
|
389
|
+
has_gpu_text = "No: torch.cuda.is_available() == False"
|
|
390
|
+
|
|
391
|
+
data.append(("GPU available", has_gpu_text))
|
|
365
392
|
|
|
366
393
|
mps_build = "No: torch.backends.mps.is_built() == False"
|
|
367
394
|
if not has_mps:
|
|
@@ -369,9 +396,11 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
|
|
369
396
|
else:
|
|
370
397
|
has_mps_text = "Yes"
|
|
371
398
|
mps_build = str(torch.backends.mps.is_built())
|
|
399
|
+
if mps_build == "True":
|
|
400
|
+
os.environ["USE_MPS"] = "1"
|
|
372
401
|
|
|
373
402
|
data.append(("MPS available", has_mps_text))
|
|
374
|
-
data.append(("MPS
|
|
403
|
+
data.append(("MPS built", mps_build))
|
|
375
404
|
|
|
376
405
|
try:
|
|
377
406
|
import torchvision # type: ignore
|
|
@@ -395,6 +424,42 @@ def pt_info(data: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
|
|
395
424
|
return data
|
|
396
425
|
|
|
397
426
|
|
|
427
|
+
def set_dl_env_vars() -> None:
|
|
428
|
+
"""Set the environment variables that steer the selection of the DL framework.
|
|
429
|
+
If both PyTorch and TensorFlow are available, PyTorch will be selected by default.
|
|
430
|
+
It is possible that for testing purposes, e.g. on Colab you can find yourself with a pre-installed Tensorflow
|
|
431
|
+
version. If you want to enforce PyTorch you must set:
|
|
432
|
+
|
|
433
|
+
os.environ["DD_USE_TORCH"] = "1"
|
|
434
|
+
os.environ["USE_TORCH"] = "1" # necessary if you make use of DocTr's OCR engine
|
|
435
|
+
os.environ["DD_USE_TF"] = "0"
|
|
436
|
+
os.environ["USE_TF"] = "0" # it's better to explcitly disable Tensorflow
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
if os.environ.get("PYTORCH_AVAILABLE") and os.environ.get("DD_USE_TORCH") is None:
|
|
442
|
+
os.environ["DD_USE_TORCH"] = "1"
|
|
443
|
+
os.environ["USE_TORCH"] = "1"
|
|
444
|
+
if os.environ.get("TENSORFLOW_AVAILABLE") and os.environ.get("DD_USE_TF") is None:
|
|
445
|
+
os.environ["DD_USE_TF"] = "1"
|
|
446
|
+
os.environ["USE_TF"] = "1"
|
|
447
|
+
|
|
448
|
+
if os.environ.get("DD_USE_TORCH", "0") in ENV_VARS_TRUE and os.environ.get("DD_USE_TF", "0") in ENV_VARS_TRUE:
|
|
449
|
+
logger.warning(
|
|
450
|
+
"Both DD_USE_TORCH and DD_USE_TF are set. Defaulting to PyTorch. If you want a different "
|
|
451
|
+
"behaviour, set DD_USE_TORCH to None before importing deepdoctection."
|
|
452
|
+
)
|
|
453
|
+
os.environ["DD_USE_TF"] = "0"
|
|
454
|
+
os.environ["USE_TF"] = "0"
|
|
455
|
+
|
|
456
|
+
if (
|
|
457
|
+
os.environ.get("PYTORCH_AVAILABLE") not in ENV_VARS_TRUE
|
|
458
|
+
and os.environ.get("TENSORFLOW_AVAILABLE") not in ENV_VARS_TRUE
|
|
459
|
+
):
|
|
460
|
+
logger.warning(LoggingRecord(msg="Neither Tensorflow or Pytorch are available."))
|
|
461
|
+
|
|
462
|
+
|
|
398
463
|
def collect_env_info() -> str:
|
|
399
464
|
"""
|
|
400
465
|
|
|
@@ -441,6 +506,7 @@ def collect_env_info() -> str:
|
|
|
441
506
|
|
|
442
507
|
data = pt_info(data)
|
|
443
508
|
data = tf_info(data)
|
|
509
|
+
set_dl_env_vars()
|
|
444
510
|
|
|
445
511
|
data = collect_installed_dependencies(data)
|
|
446
512
|
|
|
@@ -452,110 +518,6 @@ def collect_env_info() -> str:
|
|
|
452
518
|
return env_str
|
|
453
519
|
|
|
454
520
|
|
|
455
|
-
def set_env(name: str, value: str) -> None:
|
|
456
|
-
"""
|
|
457
|
-
Set an environment variable if it is not already set.
|
|
458
|
-
|
|
459
|
-
:param name: The name of the environment variable
|
|
460
|
-
:param value: The value of the environment variable
|
|
461
|
-
"""
|
|
462
|
-
|
|
463
|
-
if os.environ.get(name):
|
|
464
|
-
return
|
|
465
|
-
os.environ[name] = value
|
|
466
|
-
return
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
def auto_select_lib_and_device() -> None:
|
|
470
|
-
"""
|
|
471
|
-
Select the DL library and subsequently the device.
|
|
472
|
-
This will set environment variable `USE_TENSORFLOW`, `USE_PYTORCH` and `USE_CUDA`
|
|
473
|
-
|
|
474
|
-
If TF is available, use TF unless a GPU is not available, in which case choose PT. If CUDA is not available and PT
|
|
475
|
-
is not installed raise ImportError.
|
|
476
|
-
"""
|
|
477
|
-
|
|
478
|
-
# USE_TF and USE_TORCH are env variables that steer DL library selection for Doctr.
|
|
479
|
-
if tf_available() and tensorpack_available():
|
|
480
|
-
from tensorpack.utils.gpu import get_num_gpu # pylint: disable=E0401
|
|
481
|
-
|
|
482
|
-
if get_num_gpu() >= 1:
|
|
483
|
-
set_env("USE_TENSORFLOW", "True")
|
|
484
|
-
set_env("USE_PYTORCH", "False")
|
|
485
|
-
set_env("USE_CUDA", "True")
|
|
486
|
-
set_env("USE_MPS", "False")
|
|
487
|
-
set_env("USE_TF", "TRUE")
|
|
488
|
-
set_env("USE_TORCH", "False")
|
|
489
|
-
return
|
|
490
|
-
if pytorch_available():
|
|
491
|
-
set_env("USE_TENSORFLOW", "False")
|
|
492
|
-
set_env("USE_PYTORCH", "True")
|
|
493
|
-
set_env("USE_CUDA", "False")
|
|
494
|
-
set_env("USE_TF", "False")
|
|
495
|
-
set_env("USE_TORCH", "TRUE")
|
|
496
|
-
return
|
|
497
|
-
logger.warning(
|
|
498
|
-
LoggingRecord("You have Tensorflow installed but no GPU is available. All Tensorflow models require a GPU.")
|
|
499
|
-
)
|
|
500
|
-
if tf_available():
|
|
501
|
-
set_env("USE_TENSORFLOW", "False")
|
|
502
|
-
set_env("USE_PYTORCH", "False")
|
|
503
|
-
set_env("USE_CUDA", "False")
|
|
504
|
-
set_env("USE_TF", "AUTO")
|
|
505
|
-
set_env("USE_TORCH", "AUTO")
|
|
506
|
-
return
|
|
507
|
-
|
|
508
|
-
if pytorch_available():
|
|
509
|
-
import torch
|
|
510
|
-
|
|
511
|
-
if torch.cuda.is_available():
|
|
512
|
-
set_env("USE_TENSORFLOW", "False")
|
|
513
|
-
set_env("USE_PYTORCH", "True")
|
|
514
|
-
set_env("USE_CUDA", "True")
|
|
515
|
-
set_env("USE_TF", "False")
|
|
516
|
-
set_env("USE_TORCH", "TRUE")
|
|
517
|
-
return
|
|
518
|
-
if torch.backends.mps.is_available():
|
|
519
|
-
set_env("USE_TENSORFLOW", "False")
|
|
520
|
-
set_env("USE_PYTORCH", "True")
|
|
521
|
-
set_env("USE_CUDA", "False")
|
|
522
|
-
set_env("USE_MPS", "True")
|
|
523
|
-
set_env("USE_TF", "False")
|
|
524
|
-
set_env("USE_TORCH", "TRUE")
|
|
525
|
-
return
|
|
526
|
-
set_env("USE_TENSORFLOW", "False")
|
|
527
|
-
set_env("USE_PYTORCH", "True")
|
|
528
|
-
set_env("USE_CUDA", "False")
|
|
529
|
-
set_env("USE_MPS", "False")
|
|
530
|
-
set_env("USE_TF", "AUTO")
|
|
531
|
-
set_env("USE_TORCH", "AUTO")
|
|
532
|
-
return
|
|
533
|
-
logger.warning(
|
|
534
|
-
LoggingRecord(
|
|
535
|
-
"Neither Tensorflow or Pytorch are available. You will not be able to use any Deep Learning "
|
|
536
|
-
"model from the library."
|
|
537
|
-
)
|
|
538
|
-
)
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
def get_device(ignore_cpu: bool = True) -> Literal["cuda", "mps", "cpu"]:
|
|
542
|
-
"""
|
|
543
|
-
Device checks for running PyTorch with CUDA, MPS or optionall CPU.
|
|
544
|
-
If nothing can be found and if `disable_cpu` is deactivated it will raise a `ValueError`
|
|
545
|
-
|
|
546
|
-
:param ignore_cpu: Will not consider `cpu` as valid return value
|
|
547
|
-
:return: Either cuda or mps
|
|
548
|
-
"""
|
|
549
|
-
|
|
550
|
-
if ast.literal_eval(os.environ.get("USE_CUDA", "True")):
|
|
551
|
-
return "cuda"
|
|
552
|
-
if ast.literal_eval(os.environ.get("USE_MPS", "True")):
|
|
553
|
-
return "mps"
|
|
554
|
-
if not ignore_cpu:
|
|
555
|
-
return "cpu"
|
|
556
|
-
raise RuntimeWarning("Could not find either GPU nor MPS")
|
|
557
|
-
|
|
558
|
-
|
|
559
521
|
def auto_select_viz_library() -> None:
|
|
560
522
|
"""Setting PIL as default image library if cv2 is not installed"""
|
|
561
523
|
|
|
@@ -16,15 +16,15 @@ import sys
|
|
|
16
16
|
from os import environ, path
|
|
17
17
|
from shutil import which
|
|
18
18
|
from types import ModuleType
|
|
19
|
-
from typing import Any,
|
|
19
|
+
from typing import Any, Union, no_type_check
|
|
20
20
|
|
|
21
21
|
import importlib_metadata
|
|
22
22
|
from packaging import version
|
|
23
23
|
|
|
24
|
-
from .detection_types import Requirement
|
|
25
24
|
from .error import DependencyError
|
|
26
25
|
from .logger import LoggingRecord, logger
|
|
27
26
|
from .metacfg import AttrDict
|
|
27
|
+
from .types import PathLikeOrStr, Requirement
|
|
28
28
|
|
|
29
29
|
_GENERIC_ERR_MSG = "Please check the required version either in the docs or in the setup file"
|
|
30
30
|
|
|
@@ -52,7 +52,7 @@ def get_tf_version() -> str:
|
|
|
52
52
|
"""
|
|
53
53
|
tf_version = "0.0"
|
|
54
54
|
if tf_available():
|
|
55
|
-
candidates:
|
|
55
|
+
candidates: tuple[str, ...] = (
|
|
56
56
|
"tensorflow",
|
|
57
57
|
"tensorflow-cpu",
|
|
58
58
|
"tensorflow-gpu",
|
|
@@ -250,31 +250,26 @@ def get_detectron2_requirement() -> Requirement:
|
|
|
250
250
|
# Tesseract related dependencies
|
|
251
251
|
_TESS_AVAILABLE = which("tesseract") is not None
|
|
252
252
|
# Tesseract installation path
|
|
253
|
-
_TESS_PATH = "tesseract"
|
|
253
|
+
_TESS_PATH: PathLikeOrStr = "tesseract"
|
|
254
254
|
_TESS_ERR_MSG = (
|
|
255
255
|
"Tesseract >=4.0 must be installed. Please follow the official installation instructions. "
|
|
256
256
|
"https://tesseract-ocr.github.io/tessdoc/Installation.html"
|
|
257
257
|
)
|
|
258
258
|
|
|
259
259
|
|
|
260
|
-
def set_tesseract_path(tesseract_path:
|
|
260
|
+
def set_tesseract_path(tesseract_path: PathLikeOrStr) -> None:
|
|
261
261
|
"""Set the Tesseract path. If you have tesseract installed in Anaconda,
|
|
262
262
|
you can use this function to set tesseract path.
|
|
263
263
|
|
|
264
264
|
:param tesseract_path: Tesseract installation path.
|
|
265
265
|
"""
|
|
266
|
-
if tesseract_path is None:
|
|
267
|
-
raise TypeError("tesseract_path cannot be None")
|
|
268
266
|
|
|
269
267
|
global _TESS_AVAILABLE # pylint: disable=W0603
|
|
270
268
|
global _TESS_PATH # pylint: disable=W0603
|
|
271
269
|
|
|
272
270
|
tesseract_flag = which(tesseract_path)
|
|
273
271
|
|
|
274
|
-
if tesseract_flag is None:
|
|
275
|
-
_TESS_AVAILABLE = False
|
|
276
|
-
else:
|
|
277
|
-
_TESS_AVAILABLE = True
|
|
272
|
+
_TESS_AVAILABLE = False if tesseract_flag is not None else True # pylint: disable=W0603,R1719
|
|
278
273
|
|
|
279
274
|
_TESS_PATH = tesseract_path
|
|
280
275
|
|
deepdoctection/utils/fs.py
CHANGED
|
@@ -28,12 +28,12 @@ from pathlib import Path
|
|
|
28
28
|
from typing import Callable, Literal, Optional, Protocol, Union, overload
|
|
29
29
|
from urllib.request import urlretrieve
|
|
30
30
|
|
|
31
|
-
from .detection_types import ImageType, JsonDict, Pathlike
|
|
32
31
|
from .develop import deprecated
|
|
33
32
|
from .logger import LoggingRecord, logger
|
|
34
33
|
from .pdf_utils import get_pdf_file_reader, get_pdf_file_writer
|
|
35
34
|
from .settings import CONFIGS, DATASET_DIR, MODEL_DIR, PATH
|
|
36
35
|
from .tqdm import get_tqdm
|
|
36
|
+
from .types import B64, B64Str, JsonDict, PathLikeOrStr, PixelValues
|
|
37
37
|
from .utils import is_file_extension
|
|
38
38
|
from .viz import viz_handler
|
|
39
39
|
|
|
@@ -66,7 +66,7 @@ def sizeof_fmt(num: float, suffix: str = "B") -> str:
|
|
|
66
66
|
|
|
67
67
|
# Copyright (c) Tensorpack Contributors
|
|
68
68
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
69
|
-
def mkdir_p(dir_name:
|
|
69
|
+
def mkdir_p(dir_name: PathLikeOrStr) -> None:
|
|
70
70
|
"""
|
|
71
71
|
Like "mkdir -p", make a dir recursively, but do nothing if the dir exists
|
|
72
72
|
|
|
@@ -84,7 +84,9 @@ def mkdir_p(dir_name: Pathlike) -> None:
|
|
|
84
84
|
|
|
85
85
|
# Copyright (c) Tensorpack Contributors
|
|
86
86
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
87
|
-
def download(
|
|
87
|
+
def download(
|
|
88
|
+
url: str, directory: PathLikeOrStr, file_name: Optional[str] = None, expect_size: Optional[int] = None
|
|
89
|
+
) -> str:
|
|
88
90
|
"""
|
|
89
91
|
Download URL to a directory. Will figure out the filename automatically from URL, if not given.
|
|
90
92
|
"""
|
|
@@ -133,16 +135,18 @@ def download(url: str, directory: Pathlike, file_name: Optional[str] = None, exp
|
|
|
133
135
|
|
|
134
136
|
|
|
135
137
|
@overload
|
|
136
|
-
def load_image_from_file(path:
|
|
138
|
+
def load_image_from_file(path: PathLikeOrStr, type_id: Literal["np"] = "np") -> Optional[PixelValues]:
|
|
137
139
|
...
|
|
138
140
|
|
|
139
141
|
|
|
140
142
|
@overload
|
|
141
|
-
def load_image_from_file(path:
|
|
143
|
+
def load_image_from_file(path: PathLikeOrStr, type_id: Literal["b64"]) -> Optional[B64Str]:
|
|
142
144
|
...
|
|
143
145
|
|
|
144
146
|
|
|
145
|
-
def load_image_from_file(
|
|
147
|
+
def load_image_from_file(
|
|
148
|
+
path: PathLikeOrStr, type_id: Literal["np", "b64"] = "np"
|
|
149
|
+
) -> Optional[Union[B64Str, PixelValues]]:
|
|
146
150
|
"""
|
|
147
151
|
Loads an image from path and passes back an encoded base64 string, a numpy array or None if file is not found
|
|
148
152
|
or a conversion error occurs.
|
|
@@ -151,7 +155,7 @@ def load_image_from_file(path: Pathlike, type_id: Literal["np", "b64"] = "np") -
|
|
|
151
155
|
:param type_id: "np" or "b64".
|
|
152
156
|
:return: image of desired representation
|
|
153
157
|
"""
|
|
154
|
-
image: Optional[Union[str,
|
|
158
|
+
image: Optional[Union[str, PixelValues]] = None
|
|
155
159
|
path = path.as_posix() if isinstance(path, Path) else path
|
|
156
160
|
|
|
157
161
|
assert is_file_extension(path, [".png", ".jpeg", ".jpg", ".tif"]), f"image type not allowed: {path}"
|
|
@@ -169,7 +173,7 @@ def load_image_from_file(path: Pathlike, type_id: Literal["np", "b64"] = "np") -
|
|
|
169
173
|
return image
|
|
170
174
|
|
|
171
175
|
|
|
172
|
-
def load_bytes_from_pdf_file(path:
|
|
176
|
+
def load_bytes_from_pdf_file(path: PathLikeOrStr, page_number: int = 0) -> B64:
|
|
173
177
|
"""
|
|
174
178
|
Loads a pdf file with one single page and passes back a bytes' representation of this file. Can be converted into
|
|
175
179
|
a numpy or directly passed to the attr: image of Image.
|
|
@@ -194,13 +198,13 @@ class LoadImageFunc(Protocol):
|
|
|
194
198
|
Protocol for typing load_image_from_file
|
|
195
199
|
"""
|
|
196
200
|
|
|
197
|
-
def __call__(self, path:
|
|
201
|
+
def __call__(self, path: PathLikeOrStr) -> Optional[PixelValues]:
|
|
198
202
|
...
|
|
199
203
|
|
|
200
204
|
|
|
201
205
|
def get_load_image_func(
|
|
202
|
-
path:
|
|
203
|
-
) -> Union[LoadImageFunc, Callable[[
|
|
206
|
+
path: PathLikeOrStr,
|
|
207
|
+
) -> Union[LoadImageFunc, Callable[[PathLikeOrStr], B64]]:
|
|
204
208
|
"""
|
|
205
209
|
Return the loading function according to its file extension.
|
|
206
210
|
|
|
@@ -219,7 +223,7 @@ def get_load_image_func(
|
|
|
219
223
|
)
|
|
220
224
|
|
|
221
225
|
|
|
222
|
-
def maybe_path_or_pdf(path:
|
|
226
|
+
def maybe_path_or_pdf(path: PathLikeOrStr) -> int:
|
|
223
227
|
"""
|
|
224
228
|
Checks if the path points to a directory or a pdf document. Returns 1 if the path points to a directory, 2
|
|
225
229
|
if the path points to a pdf doc or 0, if none of the previous is true.
|
|
@@ -238,7 +242,7 @@ def maybe_path_or_pdf(path: Pathlike) -> int:
|
|
|
238
242
|
return 0
|
|
239
243
|
|
|
240
244
|
|
|
241
|
-
def load_json(path_ann:
|
|
245
|
+
def load_json(path_ann: PathLikeOrStr) -> JsonDict:
|
|
242
246
|
"""
|
|
243
247
|
Loading json file
|
|
244
248
|
|
|
@@ -250,28 +254,28 @@ def load_json(path_ann: Pathlike) -> JsonDict:
|
|
|
250
254
|
return json_dict
|
|
251
255
|
|
|
252
256
|
|
|
253
|
-
def get_package_path() ->
|
|
257
|
+
def get_package_path() -> PathLikeOrStr:
|
|
254
258
|
"""
|
|
255
259
|
:return: full base path of this package
|
|
256
260
|
"""
|
|
257
261
|
return PATH
|
|
258
262
|
|
|
259
263
|
|
|
260
|
-
def get_weights_dir_path() ->
|
|
264
|
+
def get_weights_dir_path() -> PathLikeOrStr:
|
|
261
265
|
"""
|
|
262
266
|
:return: full base path to the model dir
|
|
263
267
|
"""
|
|
264
268
|
return MODEL_DIR
|
|
265
269
|
|
|
266
270
|
|
|
267
|
-
def get_configs_dir_path() ->
|
|
271
|
+
def get_configs_dir_path() -> PathLikeOrStr:
|
|
268
272
|
"""
|
|
269
273
|
:return: full base path to the configs dir
|
|
270
274
|
"""
|
|
271
275
|
return CONFIGS
|
|
272
276
|
|
|
273
277
|
|
|
274
|
-
def get_dataset_dir_path() ->
|
|
278
|
+
def get_dataset_dir_path() -> PathLikeOrStr:
|
|
275
279
|
"""
|
|
276
280
|
:return: full base path to the dataset dir
|
|
277
281
|
"""
|
|
@@ -279,7 +283,7 @@ def get_dataset_dir_path() -> Path:
|
|
|
279
283
|
|
|
280
284
|
|
|
281
285
|
@deprecated("Use pathlib operations instead", "2022-06-08")
|
|
282
|
-
def sub_path(anchor_dir:
|
|
286
|
+
def sub_path(anchor_dir: PathLikeOrStr, *paths: PathLikeOrStr) -> PathLikeOrStr:
|
|
283
287
|
"""
|
|
284
288
|
Generate a path from the anchor directory and various paths args.
|
|
285
289
|
|
|
@@ -21,7 +21,7 @@ Methods for generating and checking uuids
|
|
|
21
21
|
import hashlib
|
|
22
22
|
import uuid
|
|
23
23
|
|
|
24
|
-
from .
|
|
24
|
+
from .types import PathLikeOrStr
|
|
25
25
|
|
|
26
26
|
__all__ = ["is_uuid_like", "get_uuid_from_str", "get_uuid"]
|
|
27
27
|
|
|
@@ -65,7 +65,7 @@ def get_uuid(*inputs: str) -> str:
|
|
|
65
65
|
return get_uuid_from_str(str_input)
|
|
66
66
|
|
|
67
67
|
|
|
68
|
-
def get_md5_hash(path:
|
|
68
|
+
def get_md5_hash(path: PathLikeOrStr, buffer_size: int = 65536) -> str:
|
|
69
69
|
"""
|
|
70
70
|
Calculate a md5 hash for a given file
|
|
71
71
|
|
deepdoctection/utils/logger.py
CHANGED
|
@@ -25,7 +25,6 @@ Log levels can be set via the environment variable `LOG_LEVEL` (default: INFO).
|
|
|
25
25
|
`STD_OUT_VERBOSE` will print a verbose message to the terminal (default: False).
|
|
26
26
|
"""
|
|
27
27
|
|
|
28
|
-
import ast
|
|
29
28
|
import errno
|
|
30
29
|
import functools
|
|
31
30
|
import json
|
|
@@ -37,21 +36,23 @@ import sys
|
|
|
37
36
|
from dataclasses import dataclass, field
|
|
38
37
|
from datetime import datetime
|
|
39
38
|
from pathlib import Path
|
|
40
|
-
from typing import Any,
|
|
39
|
+
from typing import Any, Optional, Union, no_type_check
|
|
41
40
|
|
|
42
41
|
from termcolor import colored
|
|
43
42
|
|
|
44
|
-
from .
|
|
43
|
+
from .types import PathLikeOrStr
|
|
45
44
|
|
|
46
45
|
__all__ = ["logger", "set_logger_dir", "auto_set_dir", "get_logger_dir"]
|
|
47
46
|
|
|
47
|
+
ENV_VARS_TRUE: set[str] = {"1", "True", "TRUE", "true", "yes"}
|
|
48
|
+
|
|
48
49
|
|
|
49
50
|
@dataclass
|
|
50
51
|
class LoggingRecord:
|
|
51
52
|
"""LoggingRecord to pass to the logger in order to distinguish from third party libraries."""
|
|
52
53
|
|
|
53
54
|
msg: str
|
|
54
|
-
log_dict: Optional[
|
|
55
|
+
log_dict: Optional[dict[Union[int, str], Any]] = field(default=None)
|
|
55
56
|
|
|
56
57
|
def __post_init__(self) -> None:
|
|
57
58
|
"""log_dict will be added to the log record as a dict."""
|
|
@@ -66,7 +67,7 @@ class LoggingRecord:
|
|
|
66
67
|
class CustomFilter(logging.Filter):
|
|
67
68
|
"""A custom filter"""
|
|
68
69
|
|
|
69
|
-
filter_third_party_lib =
|
|
70
|
+
filter_third_party_lib = os.environ.get("FILTER_THIRD_PARTY_LIB", "False") in ENV_VARS_TRUE
|
|
70
71
|
|
|
71
72
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
72
73
|
if self.filter_third_party_lib:
|
|
@@ -79,7 +80,7 @@ class CustomFilter(logging.Filter):
|
|
|
79
80
|
class StreamFormatter(logging.Formatter):
|
|
80
81
|
"""A custom formatter to produce unified LogRecords"""
|
|
81
82
|
|
|
82
|
-
std_out_verbose =
|
|
83
|
+
std_out_verbose = os.environ.get("STD_OUT_VERBOSE", "False") in ENV_VARS_TRUE
|
|
83
84
|
|
|
84
85
|
@no_type_check
|
|
85
86
|
def format(self, record: logging.LogRecord) -> str:
|
|
@@ -109,7 +110,7 @@ class StreamFormatter(logging.Formatter):
|
|
|
109
110
|
class FileFormatter(logging.Formatter):
|
|
110
111
|
"""A custom formatter to produce a loggings in json format"""
|
|
111
112
|
|
|
112
|
-
filter_third_party_lib =
|
|
113
|
+
filter_third_party_lib = os.environ.get("FILTER_THIRD_PARTY_LIB", "False") in ENV_VARS_TRUE
|
|
113
114
|
|
|
114
115
|
@no_type_check
|
|
115
116
|
def format(self, record: logging.LogRecord) -> str:
|
|
@@ -132,8 +133,9 @@ class FileFormatter(logging.Formatter):
|
|
|
132
133
|
|
|
133
134
|
|
|
134
135
|
_LOG_DIR = None
|
|
135
|
-
_CONFIG_DICT:
|
|
136
|
+
_CONFIG_DICT: dict[str, Any] = {
|
|
136
137
|
"version": 1,
|
|
138
|
+
"disable_existing_loggers": False,
|
|
137
139
|
"filters": {"customfilter": {"()": lambda: CustomFilter()}}, # pylint: disable=W0108
|
|
138
140
|
"formatters": {
|
|
139
141
|
"streamformatter": {"()": lambda: StreamFormatter(datefmt="%m%d %H:%M.%S")},
|
|
@@ -144,7 +146,7 @@ _CONFIG_DICT: Dict[str, Any] = {
|
|
|
144
146
|
"root": {
|
|
145
147
|
"handlers": ["streamhandler"],
|
|
146
148
|
"level": os.environ.get("LOG_LEVEL", "INFO"),
|
|
147
|
-
"propagate":
|
|
149
|
+
"propagate": os.environ.get("LOG_PROPAGATE", "False") in ENV_VARS_TRUE,
|
|
148
150
|
},
|
|
149
151
|
}
|
|
150
152
|
|
|
@@ -170,9 +172,8 @@ def _get_time_str() -> str:
|
|
|
170
172
|
return datetime.now().strftime("%m%d-%H%M%S")
|
|
171
173
|
|
|
172
174
|
|
|
173
|
-
def _set_file(path:
|
|
174
|
-
|
|
175
|
-
path = path.as_posix()
|
|
175
|
+
def _set_file(path: PathLikeOrStr) -> None:
|
|
176
|
+
path = os.fspath(path)
|
|
176
177
|
global _FILE_HANDLER # pylint: disable=W0603
|
|
177
178
|
if os.path.isfile(path):
|
|
178
179
|
backup_name = path + "." + _get_time_str()
|
|
@@ -187,7 +188,7 @@ def _set_file(path: Pathlike) -> None:
|
|
|
187
188
|
logger.info("Argv: %s ", sys.argv)
|
|
188
189
|
|
|
189
190
|
|
|
190
|
-
def set_logger_dir(dir_name:
|
|
191
|
+
def set_logger_dir(dir_name: PathLikeOrStr, action: Optional[str] = None) -> None:
|
|
191
192
|
"""
|
|
192
193
|
Set the directory for global logging.
|
|
193
194
|
|
|
@@ -212,7 +213,7 @@ def set_logger_dir(dir_name: Pathlike, action: Optional[str] = None) -> None:
|
|
|
212
213
|
logger.removeHandler(_FILE_HANDLER)
|
|
213
214
|
del _FILE_HANDLER
|
|
214
215
|
|
|
215
|
-
def dir_nonempty(directory:
|
|
216
|
+
def dir_nonempty(directory: PathLikeOrStr) -> int:
|
|
216
217
|
return os.path.isdir(directory) and len([x for x in os.listdir(directory) if x[0] != "."])
|
|
217
218
|
|
|
218
219
|
if dir_nonempty(dir_name):
|
|
@@ -266,7 +267,7 @@ def auto_set_dir(action: Optional[str] = None, name: Optional[str] = None) -> No
|
|
|
266
267
|
set_logger_dir(auto_dir_name, action=action)
|
|
267
268
|
|
|
268
269
|
|
|
269
|
-
def get_logger_dir() -> Optional[
|
|
270
|
+
def get_logger_dir() -> Optional[PathLikeOrStr]:
|
|
270
271
|
"""
|
|
271
272
|
The logger directory, or None if not set.
|
|
272
273
|
The directory is used for general logging, tensorboard events, checkpoints, etc.
|