deepdoctection 0.30__py3-none-any.whl → 0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +4 -2
- deepdoctection/analyzer/dd.py +6 -5
- deepdoctection/dataflow/base.py +0 -19
- deepdoctection/dataflow/custom.py +4 -3
- deepdoctection/dataflow/custom_serialize.py +14 -5
- deepdoctection/dataflow/parallel_map.py +12 -11
- deepdoctection/dataflow/serialize.py +5 -4
- deepdoctection/datapoint/annotation.py +33 -12
- deepdoctection/datapoint/box.py +1 -4
- deepdoctection/datapoint/convert.py +3 -1
- deepdoctection/datapoint/image.py +66 -29
- deepdoctection/datapoint/view.py +57 -25
- deepdoctection/datasets/adapter.py +1 -1
- deepdoctection/datasets/base.py +83 -10
- deepdoctection/datasets/dataflow_builder.py +1 -1
- deepdoctection/datasets/info.py +2 -2
- deepdoctection/datasets/instances/layouttest.py +2 -7
- deepdoctection/eval/accmetric.py +1 -1
- deepdoctection/eval/base.py +5 -4
- deepdoctection/eval/eval.py +2 -2
- deepdoctection/eval/tp_eval_callback.py +5 -4
- deepdoctection/extern/base.py +39 -13
- deepdoctection/extern/d2detect.py +164 -64
- deepdoctection/extern/deskew.py +32 -7
- deepdoctection/extern/doctrocr.py +227 -39
- deepdoctection/extern/fastlang.py +45 -7
- deepdoctection/extern/hfdetr.py +90 -33
- deepdoctection/extern/hflayoutlm.py +109 -22
- deepdoctection/extern/pdftext.py +2 -1
- deepdoctection/extern/pt/ptutils.py +3 -2
- deepdoctection/extern/tessocr.py +134 -22
- deepdoctection/extern/texocr.py +2 -0
- deepdoctection/extern/tp/tpcompat.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +2 -7
- deepdoctection/extern/tpdetect.py +50 -23
- deepdoctection/mapper/d2struct.py +1 -1
- deepdoctection/mapper/hfstruct.py +1 -1
- deepdoctection/mapper/laylmstruct.py +1 -1
- deepdoctection/mapper/maputils.py +13 -2
- deepdoctection/mapper/prodigystruct.py +1 -1
- deepdoctection/mapper/pubstruct.py +10 -10
- deepdoctection/mapper/tpstruct.py +1 -1
- deepdoctection/pipe/anngen.py +35 -8
- deepdoctection/pipe/base.py +53 -19
- deepdoctection/pipe/cell.py +29 -8
- deepdoctection/pipe/common.py +12 -4
- deepdoctection/pipe/doctectionpipe.py +2 -2
- deepdoctection/pipe/language.py +3 -2
- deepdoctection/pipe/layout.py +3 -2
- deepdoctection/pipe/lm.py +2 -2
- deepdoctection/pipe/refine.py +18 -10
- deepdoctection/pipe/segment.py +21 -16
- deepdoctection/pipe/text.py +14 -8
- deepdoctection/pipe/transform.py +16 -9
- deepdoctection/train/d2_frcnn_train.py +15 -12
- deepdoctection/train/hf_detr_train.py +8 -6
- deepdoctection/train/hf_layoutlm_train.py +16 -11
- deepdoctection/utils/__init__.py +3 -0
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +2 -2
- deepdoctection/utils/env_info.py +55 -22
- deepdoctection/utils/error.py +84 -0
- deepdoctection/utils/file_utils.py +4 -15
- deepdoctection/utils/fs.py +7 -7
- deepdoctection/utils/pdf_utils.py +5 -4
- deepdoctection/utils/settings.py +5 -1
- deepdoctection/utils/transform.py +1 -1
- deepdoctection/utils/utils.py +0 -6
- deepdoctection/utils/viz.py +44 -2
- {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/METADATA +33 -58
- {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/RECORD +74 -73
- {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/WHEEL +1 -1
- {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/LICENSE +0 -0
- {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/top_level.txt +0 -0
|
@@ -200,7 +200,6 @@ class HFLayoutLmTokenClassifierBase(LMTokenClassifier, ABC):
|
|
|
200
200
|
:param device: The device (cpu,"cuda"), where to place the model.
|
|
201
201
|
"""
|
|
202
202
|
|
|
203
|
-
self.name = "_".join(Path(path_weights).parts[-3:])
|
|
204
203
|
if categories is None:
|
|
205
204
|
if categories_semantics is None:
|
|
206
205
|
raise ValueError("If categories is None then categories_semantics cannot be None")
|
|
@@ -302,6 +301,11 @@ class HFLayoutLmTokenClassifierBase(LMTokenClassifier, ABC):
|
|
|
302
301
|
self.device,
|
|
303
302
|
)
|
|
304
303
|
|
|
304
|
+
@staticmethod
|
|
305
|
+
def get_name(path_weights: str, architecture: str) -> str:
|
|
306
|
+
"""Returns the name of the model"""
|
|
307
|
+
return f"Transformers_{architecture}_" + "_".join(Path(path_weights).parts[-2:])
|
|
308
|
+
|
|
305
309
|
|
|
306
310
|
class HFLayoutLmTokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
307
311
|
"""
|
|
@@ -357,10 +361,9 @@ class HFLayoutLmTokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
|
357
361
|
:param categories: If you have a pre-trained model you can pass a complete dict of NER categories
|
|
358
362
|
:param device: The device (cpu,"cuda"), where to place the model.
|
|
359
363
|
"""
|
|
360
|
-
|
|
361
|
-
self.
|
|
362
|
-
|
|
363
|
-
)
|
|
364
|
+
self.name = self.get_name(path_weights, "LayoutLM")
|
|
365
|
+
self.model_id = self.get_model_id()
|
|
366
|
+
self.model = self.get_wrapped_model(path_config_json, path_weights)
|
|
364
367
|
super().__init__(path_config_json, path_weights, categories_semantics, categories_bio, categories, device)
|
|
365
368
|
|
|
366
369
|
def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> List[TokenClassResult]:
|
|
@@ -388,6 +391,18 @@ class HFLayoutLmTokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
|
388
391
|
|
|
389
392
|
return self._map_category_names(results)
|
|
390
393
|
|
|
394
|
+
@staticmethod
|
|
395
|
+
def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
|
|
396
|
+
"""
|
|
397
|
+
Get the inner (wrapped) model.
|
|
398
|
+
|
|
399
|
+
:param path_config_json: path to .json config file
|
|
400
|
+
:param path_weights: path to model artifact
|
|
401
|
+
:return: 'nn.Module'
|
|
402
|
+
"""
|
|
403
|
+
config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config_json)
|
|
404
|
+
return LayoutLMForTokenClassification.from_pretrained(pretrained_model_name_or_path=path_weights, config=config)
|
|
405
|
+
|
|
391
406
|
|
|
392
407
|
class HFLayoutLmv2TokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
393
408
|
"""
|
|
@@ -445,10 +460,9 @@ class HFLayoutLmv2TokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
|
445
460
|
:param categories: If you have a pre-trained model you can pass a complete dict of NER categories
|
|
446
461
|
:param device: The device (cpu,"cuda"), where to place the model.
|
|
447
462
|
"""
|
|
448
|
-
|
|
449
|
-
self.
|
|
450
|
-
|
|
451
|
-
)
|
|
463
|
+
self.name = self.get_name(path_weights, "LayoutLMv2")
|
|
464
|
+
self.model_id = self.get_model_id()
|
|
465
|
+
self.model = self.get_wrapped_model(path_config_json, path_weights)
|
|
452
466
|
super().__init__(path_config_json, path_weights, categories_semantics, categories_bio, categories, device)
|
|
453
467
|
|
|
454
468
|
def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> List[TokenClassResult]:
|
|
@@ -489,6 +503,20 @@ class HFLayoutLmv2TokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
|
489
503
|
"""
|
|
490
504
|
return {"image_width": 224, "image_height": 224}
|
|
491
505
|
|
|
506
|
+
@staticmethod
|
|
507
|
+
def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
|
|
508
|
+
"""
|
|
509
|
+
Get the inner (wrapped) model.
|
|
510
|
+
|
|
511
|
+
:param path_config_json: path to .json config file
|
|
512
|
+
:param path_weights: path to model artifact
|
|
513
|
+
:return: 'nn.Module'
|
|
514
|
+
"""
|
|
515
|
+
config = LayoutLMv2Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
|
|
516
|
+
return LayoutLMv2ForTokenClassification.from_pretrained(
|
|
517
|
+
pretrained_model_name_or_path=path_weights, config=config
|
|
518
|
+
)
|
|
519
|
+
|
|
492
520
|
|
|
493
521
|
class HFLayoutLmv3TokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
494
522
|
"""
|
|
@@ -546,10 +574,9 @@ class HFLayoutLmv3TokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
|
546
574
|
:param categories: If you have a pre-trained model you can pass a complete dict of NER categories
|
|
547
575
|
:param device: The device (cpu,"cuda"), where to place the model.
|
|
548
576
|
"""
|
|
549
|
-
|
|
550
|
-
self.
|
|
551
|
-
|
|
552
|
-
)
|
|
577
|
+
self.name = self.get_name(path_weights, "LayoutLMv3")
|
|
578
|
+
self.model_id = self.get_model_id()
|
|
579
|
+
self.model = self.get_wrapped_model(path_config_json, path_weights)
|
|
553
580
|
super().__init__(path_config_json, path_weights, categories_semantics, categories_bio, categories, device)
|
|
554
581
|
|
|
555
582
|
def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> List[TokenClassResult]:
|
|
@@ -592,6 +619,20 @@ class HFLayoutLmv3TokenClassifier(HFLayoutLmTokenClassifierBase):
|
|
|
592
619
|
"pixel_std": np.array(IMAGENET_DEFAULT_STD, dtype=np.float32),
|
|
593
620
|
}
|
|
594
621
|
|
|
622
|
+
@staticmethod
|
|
623
|
+
def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
|
|
624
|
+
"""
|
|
625
|
+
Get the inner (wrapped) model.
|
|
626
|
+
|
|
627
|
+
:param path_config_json: path to .json config file
|
|
628
|
+
:param path_weights: path to model artifact
|
|
629
|
+
:return: 'nn.Module'
|
|
630
|
+
"""
|
|
631
|
+
config = LayoutLMv3Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
|
|
632
|
+
return LayoutLMv3ForTokenClassification.from_pretrained(
|
|
633
|
+
pretrained_model_name_or_path=path_weights, config=config
|
|
634
|
+
)
|
|
635
|
+
|
|
595
636
|
|
|
596
637
|
class HFLayoutLmSequenceClassifierBase(LMSequenceClassifier, ABC):
|
|
597
638
|
"""
|
|
@@ -607,7 +648,6 @@ class HFLayoutLmSequenceClassifierBase(LMSequenceClassifier, ABC):
|
|
|
607
648
|
categories: Mapping[str, TypeOrStr],
|
|
608
649
|
device: Optional[Literal["cpu", "cuda"]] = None,
|
|
609
650
|
):
|
|
610
|
-
self.name = "_".join(Path(path_weights).parts[-3:])
|
|
611
651
|
self.path_config = path_config_json
|
|
612
652
|
self.path_weights = path_weights
|
|
613
653
|
self.categories = copy(categories) # type: ignore
|
|
@@ -691,6 +731,11 @@ class HFLayoutLmSequenceClassifierBase(LMSequenceClassifier, ABC):
|
|
|
691
731
|
boxes = boxes.to(self.device)
|
|
692
732
|
return input_ids, attention_mask, token_type_ids, boxes
|
|
693
733
|
|
|
734
|
+
@staticmethod
|
|
735
|
+
def get_name(path_weights: str, architecture: str) -> str:
|
|
736
|
+
"""Returns the name of the model"""
|
|
737
|
+
return f"Transformers_{architecture}_" + "_".join(Path(path_weights).parts[-2:])
|
|
738
|
+
|
|
694
739
|
|
|
695
740
|
class HFLayoutLmSequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
696
741
|
"""
|
|
@@ -730,6 +775,8 @@ class HFLayoutLmSequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
|
730
775
|
categories: Mapping[str, TypeOrStr],
|
|
731
776
|
device: Optional[Literal["cpu", "cuda"]] = None,
|
|
732
777
|
):
|
|
778
|
+
self.name = self.get_name(path_weights, "LayoutLM")
|
|
779
|
+
self.model_id = self.get_model_id()
|
|
733
780
|
config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config_json)
|
|
734
781
|
self.model = LayoutLMForSequenceClassification.from_pretrained(
|
|
735
782
|
pretrained_model_name_or_path=path_weights, config=config
|
|
@@ -751,6 +798,20 @@ class HFLayoutLmSequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
|
751
798
|
result.class_name = self.categories[str(result.class_id)]
|
|
752
799
|
return result
|
|
753
800
|
|
|
801
|
+
@staticmethod
|
|
802
|
+
def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
|
|
803
|
+
"""
|
|
804
|
+
Get the inner (wrapped) model.
|
|
805
|
+
|
|
806
|
+
:param path_config_json: path to .json config file
|
|
807
|
+
:param path_weights: path to model artifact
|
|
808
|
+
:return: 'nn.Module'
|
|
809
|
+
"""
|
|
810
|
+
config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config_json)
|
|
811
|
+
return LayoutLMForSequenceClassification.from_pretrained(
|
|
812
|
+
pretrained_model_name_or_path=path_weights, config=config
|
|
813
|
+
)
|
|
814
|
+
|
|
754
815
|
|
|
755
816
|
class HFLayoutLmv2SequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
756
817
|
"""
|
|
@@ -790,10 +851,9 @@ class HFLayoutLmv2SequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
|
790
851
|
categories: Mapping[str, TypeOrStr],
|
|
791
852
|
device: Optional[Literal["cpu", "cuda"]] = None,
|
|
792
853
|
):
|
|
793
|
-
|
|
794
|
-
self.
|
|
795
|
-
|
|
796
|
-
)
|
|
854
|
+
self.name = self.get_name(path_weights, "LayoutLMv2")
|
|
855
|
+
self.model_id = self.get_model_id()
|
|
856
|
+
self.model = self.get_wrapped_model(path_config_json, path_weights)
|
|
797
857
|
super().__init__(path_config_json, path_weights, categories, device)
|
|
798
858
|
|
|
799
859
|
def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> SequenceClassResult:
|
|
@@ -818,6 +878,20 @@ class HFLayoutLmv2SequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
|
818
878
|
"""
|
|
819
879
|
return {"image_width": 224, "image_height": 224}
|
|
820
880
|
|
|
881
|
+
@staticmethod
|
|
882
|
+
def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
|
|
883
|
+
"""
|
|
884
|
+
Get the inner (wrapped) model.
|
|
885
|
+
|
|
886
|
+
:param path_config_json: path to .json config file
|
|
887
|
+
:param path_weights: path to model artifact
|
|
888
|
+
:return: 'nn.Module'
|
|
889
|
+
"""
|
|
890
|
+
config = LayoutLMv2Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
|
|
891
|
+
return LayoutLMv2ForSequenceClassification.from_pretrained(
|
|
892
|
+
pretrained_model_name_or_path=path_weights, config=config
|
|
893
|
+
)
|
|
894
|
+
|
|
821
895
|
|
|
822
896
|
class HFLayoutLmv3SequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
823
897
|
"""
|
|
@@ -857,10 +931,9 @@ class HFLayoutLmv3SequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
|
857
931
|
categories: Mapping[str, TypeOrStr],
|
|
858
932
|
device: Optional[Literal["cpu", "cuda"]] = None,
|
|
859
933
|
):
|
|
860
|
-
|
|
861
|
-
self.
|
|
862
|
-
|
|
863
|
-
)
|
|
934
|
+
self.name = self.get_name(path_weights, "LayoutLMv3")
|
|
935
|
+
self.model_id = self.get_model_id()
|
|
936
|
+
self.model = self.get_wrapped_model(path_config_json, path_weights)
|
|
864
937
|
super().__init__(path_config_json, path_weights, categories, device)
|
|
865
938
|
|
|
866
939
|
def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> SequenceClassResult:
|
|
@@ -890,3 +963,17 @@ class HFLayoutLmv3SequenceClassifier(HFLayoutLmSequenceClassifierBase):
|
|
|
890
963
|
"pixel_mean": np.array(IMAGENET_DEFAULT_MEAN, dtype=np.float32),
|
|
891
964
|
"pixel_std": np.array(IMAGENET_DEFAULT_STD, dtype=np.float32),
|
|
892
965
|
}
|
|
966
|
+
|
|
967
|
+
@staticmethod
|
|
968
|
+
def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
|
|
969
|
+
"""
|
|
970
|
+
Get the inner (wrapped) model.
|
|
971
|
+
|
|
972
|
+
:param path_config_json: path to .json config file
|
|
973
|
+
:param path_weights: path to model artifact
|
|
974
|
+
:return: 'nn.Module'
|
|
975
|
+
"""
|
|
976
|
+
config = LayoutLMv3Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
|
|
977
|
+
return LayoutLMv3ForSequenceClassification.from_pretrained(
|
|
978
|
+
pretrained_model_name_or_path=path_weights, config=config
|
|
979
|
+
)
|
deepdoctection/extern/pdftext.py
CHANGED
|
@@ -65,7 +65,8 @@ class PdfPlumberTextDetector(PdfMiner):
|
|
|
65
65
|
"""
|
|
66
66
|
|
|
67
67
|
def __init__(self) -> None:
|
|
68
|
-
self.name = "
|
|
68
|
+
self.name = "Pdfplumber"
|
|
69
|
+
self.model_id = self.get_model_id()
|
|
69
70
|
self.categories = {"1": LayoutType.word}
|
|
70
71
|
|
|
71
72
|
def predict(self, pdf_bytes: bytes) -> List[DetectionResult]:
|
|
@@ -20,6 +20,7 @@ Torch related utils
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
from ...utils.error import DependencyError
|
|
23
24
|
from ...utils.file_utils import pytorch_available
|
|
24
25
|
|
|
25
26
|
|
|
@@ -31,7 +32,7 @@ def set_torch_auto_device() -> "torch.device": # type: ignore
|
|
|
31
32
|
from torch import cuda, device # pylint: disable=C0415
|
|
32
33
|
|
|
33
34
|
return device("cuda" if cuda.is_available() else "cpu")
|
|
34
|
-
raise
|
|
35
|
+
raise DependencyError("Pytorch must be installed")
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
def get_num_gpu() -> int:
|
|
@@ -45,4 +46,4 @@ def get_num_gpu() -> int:
|
|
|
45
46
|
from torch import cuda # pylint: disable=C0415
|
|
46
47
|
|
|
47
48
|
return cuda.device_count()
|
|
48
|
-
raise
|
|
49
|
+
raise DependencyError("Pytorch must be installed")
|
deepdoctection/extern/tessocr.py
CHANGED
|
@@ -19,21 +19,24 @@
|
|
|
19
19
|
Tesseract OCR engine for text extraction
|
|
20
20
|
"""
|
|
21
21
|
import shlex
|
|
22
|
+
import string
|
|
22
23
|
import subprocess
|
|
23
24
|
import sys
|
|
24
25
|
from errno import ENOENT
|
|
25
26
|
from itertools import groupby
|
|
26
27
|
from os import environ
|
|
27
|
-
from typing import Any, Dict, List, Optional, Union
|
|
28
|
+
from typing import Any, Dict, List, Mapping, Optional, Union
|
|
28
29
|
|
|
29
|
-
import
|
|
30
|
+
from packaging.version import InvalidVersion, Version, parse
|
|
30
31
|
|
|
31
32
|
from ..utils.context import save_tmp_file, timeout_manager
|
|
32
33
|
from ..utils.detection_types import ImageType, Requirement
|
|
33
|
-
from ..utils.
|
|
34
|
+
from ..utils.error import DependencyError, TesseractError
|
|
35
|
+
from ..utils.file_utils import _TESS_PATH, get_tesseract_requirement
|
|
34
36
|
from ..utils.metacfg import config_to_cli_str, set_config_by_yaml
|
|
35
|
-
from ..utils.settings import LayoutType, ObjectTypes
|
|
36
|
-
from .
|
|
37
|
+
from ..utils.settings import LayoutType, ObjectTypes, PageType
|
|
38
|
+
from ..utils.viz import viz_handler
|
|
39
|
+
from .base import DetectionResult, ImageTransformer, ObjectDetector, PredictorBase
|
|
37
40
|
|
|
38
41
|
# copy and paste with some light modifications from https://github.com/madmaze/pytesseract/tree/master/pytesseract
|
|
39
42
|
|
|
@@ -57,18 +60,6 @@ _LANG_CODE_TO_TESS_LANG_CODE = {
|
|
|
57
60
|
}
|
|
58
61
|
|
|
59
62
|
|
|
60
|
-
class TesseractError(RuntimeError):
|
|
61
|
-
"""
|
|
62
|
-
Tesseract Error
|
|
63
|
-
"""
|
|
64
|
-
|
|
65
|
-
def __init__(self, status: int, message: str) -> None:
|
|
66
|
-
super().__init__()
|
|
67
|
-
self.status = status
|
|
68
|
-
self.message = message
|
|
69
|
-
self.args = (status, message)
|
|
70
|
-
|
|
71
|
-
|
|
72
63
|
def _subprocess_args() -> Dict[str, Any]:
|
|
73
64
|
# See https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
|
|
74
65
|
# for reference and comments.
|
|
@@ -109,7 +100,7 @@ def _run_tesseract(tesseract_args: List[str]) -> None:
|
|
|
109
100
|
except OSError as error:
|
|
110
101
|
if error.errno != ENOENT:
|
|
111
102
|
raise error from error
|
|
112
|
-
raise
|
|
103
|
+
raise DependencyError("Tesseract not found. Please install or add to your PATH.") from error
|
|
113
104
|
|
|
114
105
|
with timeout_manager(proc, 0) as error_string:
|
|
115
106
|
if proc.returncode:
|
|
@@ -119,6 +110,50 @@ def _run_tesseract(tesseract_args: List[str]) -> None:
|
|
|
119
110
|
)
|
|
120
111
|
|
|
121
112
|
|
|
113
|
+
def get_tesseract_version() -> Version:
|
|
114
|
+
"""
|
|
115
|
+
Returns Version object of the Tesseract version
|
|
116
|
+
"""
|
|
117
|
+
try:
|
|
118
|
+
output = subprocess.check_output(
|
|
119
|
+
["tesseract", "--version"],
|
|
120
|
+
stderr=subprocess.STDOUT,
|
|
121
|
+
env=environ,
|
|
122
|
+
stdin=subprocess.DEVNULL,
|
|
123
|
+
)
|
|
124
|
+
except OSError as error:
|
|
125
|
+
raise DependencyError("Tesseract not found. Please install or add to your PATH.") from error
|
|
126
|
+
|
|
127
|
+
raw_version = output.decode("utf-8")
|
|
128
|
+
str_version, *_ = raw_version.lstrip(string.printable[10:]).partition(" ")
|
|
129
|
+
str_version, *_ = str_version.partition("-")
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
version = parse(str_version)
|
|
133
|
+
assert version >= Version("3.05")
|
|
134
|
+
except (AssertionError, InvalidVersion) as error:
|
|
135
|
+
raise SystemExit(f'Invalid tesseract version: "{raw_version}"') from error
|
|
136
|
+
|
|
137
|
+
return version
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def image_to_angle(image: ImageType) -> Mapping[str, str]:
|
|
141
|
+
"""
|
|
142
|
+
Generating a tmp file and running tesseract to get the orientation of the image.
|
|
143
|
+
|
|
144
|
+
:param image: Image in np.array.
|
|
145
|
+
:return: A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'.
|
|
146
|
+
"""
|
|
147
|
+
with save_tmp_file(image, "tess_") as (tmp_name, input_file_name):
|
|
148
|
+
_run_tesseract(_input_to_cli_str("osd", "--psm 0", 0, input_file_name, tmp_name))
|
|
149
|
+
with open(tmp_name + ".osd", "rb") as output_file:
|
|
150
|
+
output = output_file.read().decode("utf-8")
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
key_value[0]: key_value[1] for key_value in (line.split(": ") for line in output.split("\n") if len(line) >= 2)
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
122
157
|
def image_to_dict(image: ImageType, lang: str, config: str) -> Dict[str, List[Union[str, int, float]]]:
|
|
123
158
|
"""
|
|
124
159
|
This is more or less pytesseract.image_to_data with a dict as returned value.
|
|
@@ -220,7 +255,6 @@ def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool,
|
|
|
220
255
|
:return: A list of tesseract extractions wrapped in DetectionResult
|
|
221
256
|
"""
|
|
222
257
|
|
|
223
|
-
np_img = np_img.astype(np.uint8)
|
|
224
258
|
results = image_to_dict(np_img, supported_languages, config)
|
|
225
259
|
all_results = []
|
|
226
260
|
|
|
@@ -249,6 +283,16 @@ def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool,
|
|
|
249
283
|
return all_results
|
|
250
284
|
|
|
251
285
|
|
|
286
|
+
def predict_rotation(np_img: ImageType) -> Mapping[str, str]:
|
|
287
|
+
"""
|
|
288
|
+
Predicts the rotation of an image using the Tesseract OCR engine.
|
|
289
|
+
|
|
290
|
+
:param np_img: numpy array of the image
|
|
291
|
+
:return: A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'
|
|
292
|
+
"""
|
|
293
|
+
return image_to_angle(np_img)
|
|
294
|
+
|
|
295
|
+
|
|
252
296
|
class TesseractOcrDetector(ObjectDetector):
|
|
253
297
|
"""
|
|
254
298
|
Text object detector based on Tesseracts OCR engine. Note that tesseract has to be installed separately.
|
|
@@ -292,7 +336,9 @@ class TesseractOcrDetector(ObjectDetector):
|
|
|
292
336
|
:param config_overwrite: Overwrite config parameters defined by the yaml file with new values.
|
|
293
337
|
E.g. ["oem=14"]
|
|
294
338
|
"""
|
|
295
|
-
self.name =
|
|
339
|
+
self.name = self.get_name()
|
|
340
|
+
self.model_id = self.get_model_id()
|
|
341
|
+
|
|
296
342
|
if config_overwrite is None:
|
|
297
343
|
config_overwrite = []
|
|
298
344
|
|
|
@@ -316,13 +362,13 @@ class TesseractOcrDetector(ObjectDetector):
|
|
|
316
362
|
:param np_img: image as numpy array
|
|
317
363
|
:return: A list of DetectionResult
|
|
318
364
|
"""
|
|
319
|
-
|
|
365
|
+
|
|
366
|
+
return predict_text(
|
|
320
367
|
np_img,
|
|
321
368
|
supported_languages=self.config.LANGUAGES,
|
|
322
369
|
text_lines=self.config.LINES,
|
|
323
370
|
config=config_to_cli_str(self.config, "LANGUAGES", "LINES"),
|
|
324
371
|
)
|
|
325
|
-
return detection_results
|
|
326
372
|
|
|
327
373
|
@classmethod
|
|
328
374
|
def get_requirements(cls) -> List[Requirement]:
|
|
@@ -342,3 +388,69 @@ class TesseractOcrDetector(ObjectDetector):
|
|
|
342
388
|
:param language: `Languages`
|
|
343
389
|
"""
|
|
344
390
|
self.config.LANGUAGES = _LANG_CODE_TO_TESS_LANG_CODE.get(language, language.value)
|
|
391
|
+
|
|
392
|
+
@staticmethod
|
|
393
|
+
def get_name() -> str:
|
|
394
|
+
"""Returns the name of the model"""
|
|
395
|
+
return f"Tesseract_{get_tesseract_version()}"
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
class TesseractRotationTransformer(ImageTransformer):
|
|
399
|
+
"""
|
|
400
|
+
The `TesseractRotationTransformer` class is a specialized image transformer that is designed to handle image
|
|
401
|
+
rotation in the context of Optical Character Recognition (OCR) tasks. It inherits from the `ImageTransformer`
|
|
402
|
+
base class and implements methods for predicting and applying rotation transformations to images.
|
|
403
|
+
|
|
404
|
+
The `predict` method determines the angle of the rotated image. It can only handle angles that are multiples of 90
|
|
405
|
+
degrees.
|
|
406
|
+
This method uses the Tesseract OCR engine to predict the rotation angle of an image.
|
|
407
|
+
|
|
408
|
+
The `transform` method applies the predicted rotation to the image, effectively rotating the image backwards.
|
|
409
|
+
This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
|
|
410
|
+
|
|
411
|
+
This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
|
|
412
|
+
The class also provides methods for cloning itself and for getting the requirements of the Tesseract OCR system.
|
|
413
|
+
|
|
414
|
+
**Example:**
|
|
415
|
+
transformer = TesseractRotationTransformer()
|
|
416
|
+
detection_result = transformer.predict(np_img)
|
|
417
|
+
rotated_image = transformer.transform(np_img, detection_result)
|
|
418
|
+
"""
|
|
419
|
+
|
|
420
|
+
def __init__(self) -> None:
|
|
421
|
+
self.name = _TESS_PATH + "-rotation"
|
|
422
|
+
|
|
423
|
+
def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
|
|
424
|
+
"""
|
|
425
|
+
Applies the predicted rotation to the image, effectively rotating the image backwards.
|
|
426
|
+
This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
|
|
427
|
+
|
|
428
|
+
:param np_img: The input image as a numpy array.
|
|
429
|
+
:param specification: A `DetectionResult` object containing the predicted rotation angle.
|
|
430
|
+
:return: The rotated image as a numpy array.
|
|
431
|
+
"""
|
|
432
|
+
return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
|
|
433
|
+
|
|
434
|
+
def predict(self, np_img: ImageType) -> DetectionResult:
|
|
435
|
+
"""
|
|
436
|
+
Determines the angle of the rotated image. It can only handle angles that are multiples of 90 degrees.
|
|
437
|
+
This method uses the Tesseract OCR engine to predict the rotation angle of an image.
|
|
438
|
+
|
|
439
|
+
:param np_img: The input image as a numpy array.
|
|
440
|
+
:return: A `DetectionResult` object containing the predicted rotation angle and confidence.
|
|
441
|
+
"""
|
|
442
|
+
output_dict = predict_rotation(np_img)
|
|
443
|
+
return DetectionResult(
|
|
444
|
+
angle=float(output_dict["Orientation in degrees"]), score=float(output_dict["Orientation confidence"])
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
@classmethod
|
|
448
|
+
def get_requirements(cls) -> List[Requirement]:
|
|
449
|
+
return [get_tesseract_requirement()]
|
|
450
|
+
|
|
451
|
+
def clone(self) -> PredictorBase:
|
|
452
|
+
return self.__class__()
|
|
453
|
+
|
|
454
|
+
@staticmethod
|
|
455
|
+
def possible_category() -> PageType:
|
|
456
|
+
return PageType.angle
|
deepdoctection/extern/texocr.py
CHANGED
|
@@ -120,6 +120,8 @@ class TextractOcrDetector(ObjectDetector):
|
|
|
120
120
|
:param credentials_kwargs: `aws_access_key_id`, `aws_secret_access_key` or `aws_session_token`
|
|
121
121
|
"""
|
|
122
122
|
self.name = "textract"
|
|
123
|
+
self.model_id = self.get_model_id()
|
|
124
|
+
|
|
123
125
|
self.text_lines = text_lines
|
|
124
126
|
self.client = boto3.client("textract", **credentials_kwargs)
|
|
125
127
|
if self.text_lines:
|
|
@@ -55,7 +55,7 @@ class ModelDescWithConfig(ModelDesc, ABC): # type: ignore
|
|
|
55
55
|
|
|
56
56
|
:return: Tuple of list input and list output names. The names must coincide with tensor within the model.
|
|
57
57
|
"""
|
|
58
|
-
raise NotImplementedError
|
|
58
|
+
raise NotImplementedError()
|
|
59
59
|
|
|
60
60
|
|
|
61
61
|
class TensorpackPredictor(ABC):
|
|
@@ -106,14 +106,14 @@ class TensorpackPredictor(ABC):
|
|
|
106
106
|
|
|
107
107
|
@staticmethod
|
|
108
108
|
@abstractmethod
|
|
109
|
-
def
|
|
109
|
+
def get_wrapped_model(
|
|
110
110
|
path_yaml: str, categories: Mapping[str, ObjectTypes], config_overwrite: Union[List[str], None]
|
|
111
111
|
) -> ModelDescWithConfig:
|
|
112
112
|
"""
|
|
113
113
|
Implement the config generation, its modification and instantiate a version of the model. See
|
|
114
114
|
`pipe.tpfrcnn.TPFrcnnDetector` for an example
|
|
115
115
|
"""
|
|
116
|
-
raise NotImplementedError
|
|
116
|
+
raise NotImplementedError()
|
|
117
117
|
|
|
118
118
|
@abstractmethod
|
|
119
119
|
def predict(self, np_img: Any) -> Any:
|
|
@@ -121,7 +121,7 @@ class TensorpackPredictor(ABC):
|
|
|
121
121
|
Implement, how `self.tp_predictor` is invoked and raw prediction results are generated. Do use only raw
|
|
122
122
|
objects and nothing, which is related to the DD API.
|
|
123
123
|
"""
|
|
124
|
-
raise NotImplementedError
|
|
124
|
+
raise NotImplementedError()
|
|
125
125
|
|
|
126
126
|
@property
|
|
127
127
|
def model(self) -> ModelDescWithConfig:
|
|
@@ -18,6 +18,7 @@ from tensorpack.dataflow.imgaug import AugmentorList, ImageAugmentor
|
|
|
18
18
|
|
|
19
19
|
from ....datapoint.convert import box_to_point4, point4_to_box
|
|
20
20
|
from ....utils.detection_types import ImageType, JsonDict
|
|
21
|
+
from ....utils.error import MalformedData
|
|
21
22
|
from ....utils.logger import log_once
|
|
22
23
|
from .common import filter_boxes_inside_shape, np_iou
|
|
23
24
|
from .modeling.model_fpn import get_all_anchors_fpn
|
|
@@ -27,12 +28,6 @@ from .utils.np_box_ops import ioa as np_ioa
|
|
|
27
28
|
# pylint: enable=import-error
|
|
28
29
|
|
|
29
30
|
|
|
30
|
-
class MalformedData(BaseException):
|
|
31
|
-
"""
|
|
32
|
-
Exception class for malformed data
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
|
|
36
31
|
def augment(dp: JsonDict, imgaug_list: List[ImageAugmentor], add_mask: bool) -> JsonDict:
|
|
37
32
|
"""
|
|
38
33
|
Augment an image according to a list of augmentors.
|
|
@@ -62,7 +57,7 @@ def augment(dp: JsonDict, imgaug_list: List[ImageAugmentor], add_mask: bool) ->
|
|
|
62
57
|
assert np.min(np_area(gt_boxes)) > 0, "some boxes have zero area"
|
|
63
58
|
|
|
64
59
|
if add_mask:
|
|
65
|
-
raise NotImplementedError
|
|
60
|
+
raise NotImplementedError()
|
|
66
61
|
|
|
67
62
|
return dp
|
|
68
63
|
|