deepdoctection 0.30__py3-none-any.whl → 0.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (74) hide show
  1. deepdoctection/__init__.py +4 -2
  2. deepdoctection/analyzer/dd.py +6 -5
  3. deepdoctection/dataflow/base.py +0 -19
  4. deepdoctection/dataflow/custom.py +4 -3
  5. deepdoctection/dataflow/custom_serialize.py +14 -5
  6. deepdoctection/dataflow/parallel_map.py +12 -11
  7. deepdoctection/dataflow/serialize.py +5 -4
  8. deepdoctection/datapoint/annotation.py +33 -12
  9. deepdoctection/datapoint/box.py +1 -4
  10. deepdoctection/datapoint/convert.py +3 -1
  11. deepdoctection/datapoint/image.py +66 -29
  12. deepdoctection/datapoint/view.py +57 -25
  13. deepdoctection/datasets/adapter.py +1 -1
  14. deepdoctection/datasets/base.py +83 -10
  15. deepdoctection/datasets/dataflow_builder.py +1 -1
  16. deepdoctection/datasets/info.py +2 -2
  17. deepdoctection/datasets/instances/layouttest.py +2 -7
  18. deepdoctection/eval/accmetric.py +1 -1
  19. deepdoctection/eval/base.py +5 -4
  20. deepdoctection/eval/eval.py +2 -2
  21. deepdoctection/eval/tp_eval_callback.py +5 -4
  22. deepdoctection/extern/base.py +39 -13
  23. deepdoctection/extern/d2detect.py +164 -64
  24. deepdoctection/extern/deskew.py +32 -7
  25. deepdoctection/extern/doctrocr.py +227 -39
  26. deepdoctection/extern/fastlang.py +45 -7
  27. deepdoctection/extern/hfdetr.py +90 -33
  28. deepdoctection/extern/hflayoutlm.py +109 -22
  29. deepdoctection/extern/pdftext.py +2 -1
  30. deepdoctection/extern/pt/ptutils.py +3 -2
  31. deepdoctection/extern/tessocr.py +134 -22
  32. deepdoctection/extern/texocr.py +2 -0
  33. deepdoctection/extern/tp/tpcompat.py +4 -4
  34. deepdoctection/extern/tp/tpfrcnn/preproc.py +2 -7
  35. deepdoctection/extern/tpdetect.py +50 -23
  36. deepdoctection/mapper/d2struct.py +1 -1
  37. deepdoctection/mapper/hfstruct.py +1 -1
  38. deepdoctection/mapper/laylmstruct.py +1 -1
  39. deepdoctection/mapper/maputils.py +13 -2
  40. deepdoctection/mapper/prodigystruct.py +1 -1
  41. deepdoctection/mapper/pubstruct.py +10 -10
  42. deepdoctection/mapper/tpstruct.py +1 -1
  43. deepdoctection/pipe/anngen.py +35 -8
  44. deepdoctection/pipe/base.py +53 -19
  45. deepdoctection/pipe/cell.py +29 -8
  46. deepdoctection/pipe/common.py +12 -4
  47. deepdoctection/pipe/doctectionpipe.py +2 -2
  48. deepdoctection/pipe/language.py +3 -2
  49. deepdoctection/pipe/layout.py +3 -2
  50. deepdoctection/pipe/lm.py +2 -2
  51. deepdoctection/pipe/refine.py +18 -10
  52. deepdoctection/pipe/segment.py +21 -16
  53. deepdoctection/pipe/text.py +14 -8
  54. deepdoctection/pipe/transform.py +16 -9
  55. deepdoctection/train/d2_frcnn_train.py +15 -12
  56. deepdoctection/train/hf_detr_train.py +8 -6
  57. deepdoctection/train/hf_layoutlm_train.py +16 -11
  58. deepdoctection/utils/__init__.py +3 -0
  59. deepdoctection/utils/concurrency.py +1 -1
  60. deepdoctection/utils/context.py +2 -2
  61. deepdoctection/utils/env_info.py +55 -22
  62. deepdoctection/utils/error.py +84 -0
  63. deepdoctection/utils/file_utils.py +4 -15
  64. deepdoctection/utils/fs.py +7 -7
  65. deepdoctection/utils/pdf_utils.py +5 -4
  66. deepdoctection/utils/settings.py +5 -1
  67. deepdoctection/utils/transform.py +1 -1
  68. deepdoctection/utils/utils.py +0 -6
  69. deepdoctection/utils/viz.py +44 -2
  70. {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/METADATA +33 -58
  71. {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/RECORD +74 -73
  72. {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/WHEEL +1 -1
  73. {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/LICENSE +0 -0
  74. {deepdoctection-0.30.dist-info → deepdoctection-0.31.dist-info}/top_level.txt +0 -0
@@ -200,7 +200,6 @@ class HFLayoutLmTokenClassifierBase(LMTokenClassifier, ABC):
200
200
  :param device: The device (cpu,"cuda"), where to place the model.
201
201
  """
202
202
 
203
- self.name = "_".join(Path(path_weights).parts[-3:])
204
203
  if categories is None:
205
204
  if categories_semantics is None:
206
205
  raise ValueError("If categories is None then categories_semantics cannot be None")
@@ -302,6 +301,11 @@ class HFLayoutLmTokenClassifierBase(LMTokenClassifier, ABC):
302
301
  self.device,
303
302
  )
304
303
 
304
+ @staticmethod
305
+ def get_name(path_weights: str, architecture: str) -> str:
306
+ """Returns the name of the model"""
307
+ return f"Transformers_{architecture}_" + "_".join(Path(path_weights).parts[-2:])
308
+
305
309
 
306
310
  class HFLayoutLmTokenClassifier(HFLayoutLmTokenClassifierBase):
307
311
  """
@@ -357,10 +361,9 @@ class HFLayoutLmTokenClassifier(HFLayoutLmTokenClassifierBase):
357
361
  :param categories: If you have a pre-trained model you can pass a complete dict of NER categories
358
362
  :param device: The device (cpu,"cuda"), where to place the model.
359
363
  """
360
- config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config_json)
361
- self.model = LayoutLMForTokenClassification.from_pretrained(
362
- pretrained_model_name_or_path=path_weights, config=config
363
- )
364
+ self.name = self.get_name(path_weights, "LayoutLM")
365
+ self.model_id = self.get_model_id()
366
+ self.model = self.get_wrapped_model(path_config_json, path_weights)
364
367
  super().__init__(path_config_json, path_weights, categories_semantics, categories_bio, categories, device)
365
368
 
366
369
  def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> List[TokenClassResult]:
@@ -388,6 +391,18 @@ class HFLayoutLmTokenClassifier(HFLayoutLmTokenClassifierBase):
388
391
 
389
392
  return self._map_category_names(results)
390
393
 
394
+ @staticmethod
395
+ def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
396
+ """
397
+ Get the inner (wrapped) model.
398
+
399
+ :param path_config_json: path to .json config file
400
+ :param path_weights: path to model artifact
401
+ :return: 'nn.Module'
402
+ """
403
+ config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config_json)
404
+ return LayoutLMForTokenClassification.from_pretrained(pretrained_model_name_or_path=path_weights, config=config)
405
+
391
406
 
392
407
  class HFLayoutLmv2TokenClassifier(HFLayoutLmTokenClassifierBase):
393
408
  """
@@ -445,10 +460,9 @@ class HFLayoutLmv2TokenClassifier(HFLayoutLmTokenClassifierBase):
445
460
  :param categories: If you have a pre-trained model you can pass a complete dict of NER categories
446
461
  :param device: The device (cpu,"cuda"), where to place the model.
447
462
  """
448
- config = LayoutLMv2Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
449
- self.model = LayoutLMv2ForTokenClassification.from_pretrained(
450
- pretrained_model_name_or_path=path_weights, config=config
451
- )
463
+ self.name = self.get_name(path_weights, "LayoutLMv2")
464
+ self.model_id = self.get_model_id()
465
+ self.model = self.get_wrapped_model(path_config_json, path_weights)
452
466
  super().__init__(path_config_json, path_weights, categories_semantics, categories_bio, categories, device)
453
467
 
454
468
  def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> List[TokenClassResult]:
@@ -489,6 +503,20 @@ class HFLayoutLmv2TokenClassifier(HFLayoutLmTokenClassifierBase):
489
503
  """
490
504
  return {"image_width": 224, "image_height": 224}
491
505
 
506
+ @staticmethod
507
+ def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
508
+ """
509
+ Get the inner (wrapped) model.
510
+
511
+ :param path_config_json: path to .json config file
512
+ :param path_weights: path to model artifact
513
+ :return: 'nn.Module'
514
+ """
515
+ config = LayoutLMv2Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
516
+ return LayoutLMv2ForTokenClassification.from_pretrained(
517
+ pretrained_model_name_or_path=path_weights, config=config
518
+ )
519
+
492
520
 
493
521
  class HFLayoutLmv3TokenClassifier(HFLayoutLmTokenClassifierBase):
494
522
  """
@@ -546,10 +574,9 @@ class HFLayoutLmv3TokenClassifier(HFLayoutLmTokenClassifierBase):
546
574
  :param categories: If you have a pre-trained model you can pass a complete dict of NER categories
547
575
  :param device: The device (cpu,"cuda"), where to place the model.
548
576
  """
549
- config = LayoutLMv3Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
550
- self.model = LayoutLMv3ForTokenClassification.from_pretrained(
551
- pretrained_model_name_or_path=path_weights, config=config
552
- )
577
+ self.name = self.get_name(path_weights, "LayoutLMv3")
578
+ self.model_id = self.get_model_id()
579
+ self.model = self.get_wrapped_model(path_config_json, path_weights)
553
580
  super().__init__(path_config_json, path_weights, categories_semantics, categories_bio, categories, device)
554
581
 
555
582
  def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> List[TokenClassResult]:
@@ -592,6 +619,20 @@ class HFLayoutLmv3TokenClassifier(HFLayoutLmTokenClassifierBase):
592
619
  "pixel_std": np.array(IMAGENET_DEFAULT_STD, dtype=np.float32),
593
620
  }
594
621
 
622
+ @staticmethod
623
+ def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
624
+ """
625
+ Get the inner (wrapped) model.
626
+
627
+ :param path_config_json: path to .json config file
628
+ :param path_weights: path to model artifact
629
+ :return: 'nn.Module'
630
+ """
631
+ config = LayoutLMv3Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
632
+ return LayoutLMv3ForTokenClassification.from_pretrained(
633
+ pretrained_model_name_or_path=path_weights, config=config
634
+ )
635
+
595
636
 
596
637
  class HFLayoutLmSequenceClassifierBase(LMSequenceClassifier, ABC):
597
638
  """
@@ -607,7 +648,6 @@ class HFLayoutLmSequenceClassifierBase(LMSequenceClassifier, ABC):
607
648
  categories: Mapping[str, TypeOrStr],
608
649
  device: Optional[Literal["cpu", "cuda"]] = None,
609
650
  ):
610
- self.name = "_".join(Path(path_weights).parts[-3:])
611
651
  self.path_config = path_config_json
612
652
  self.path_weights = path_weights
613
653
  self.categories = copy(categories) # type: ignore
@@ -691,6 +731,11 @@ class HFLayoutLmSequenceClassifierBase(LMSequenceClassifier, ABC):
691
731
  boxes = boxes.to(self.device)
692
732
  return input_ids, attention_mask, token_type_ids, boxes
693
733
 
734
+ @staticmethod
735
+ def get_name(path_weights: str, architecture: str) -> str:
736
+ """Returns the name of the model"""
737
+ return f"Transformers_{architecture}_" + "_".join(Path(path_weights).parts[-2:])
738
+
694
739
 
695
740
  class HFLayoutLmSequenceClassifier(HFLayoutLmSequenceClassifierBase):
696
741
  """
@@ -730,6 +775,8 @@ class HFLayoutLmSequenceClassifier(HFLayoutLmSequenceClassifierBase):
730
775
  categories: Mapping[str, TypeOrStr],
731
776
  device: Optional[Literal["cpu", "cuda"]] = None,
732
777
  ):
778
+ self.name = self.get_name(path_weights, "LayoutLM")
779
+ self.model_id = self.get_model_id()
733
780
  config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config_json)
734
781
  self.model = LayoutLMForSequenceClassification.from_pretrained(
735
782
  pretrained_model_name_or_path=path_weights, config=config
@@ -751,6 +798,20 @@ class HFLayoutLmSequenceClassifier(HFLayoutLmSequenceClassifierBase):
751
798
  result.class_name = self.categories[str(result.class_id)]
752
799
  return result
753
800
 
801
+ @staticmethod
802
+ def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
803
+ """
804
+ Get the inner (wrapped) model.
805
+
806
+ :param path_config_json: path to .json config file
807
+ :param path_weights: path to model artifact
808
+ :return: 'nn.Module'
809
+ """
810
+ config = PretrainedConfig.from_pretrained(pretrained_model_name_or_path=path_config_json)
811
+ return LayoutLMForSequenceClassification.from_pretrained(
812
+ pretrained_model_name_or_path=path_weights, config=config
813
+ )
814
+
754
815
 
755
816
  class HFLayoutLmv2SequenceClassifier(HFLayoutLmSequenceClassifierBase):
756
817
  """
@@ -790,10 +851,9 @@ class HFLayoutLmv2SequenceClassifier(HFLayoutLmSequenceClassifierBase):
790
851
  categories: Mapping[str, TypeOrStr],
791
852
  device: Optional[Literal["cpu", "cuda"]] = None,
792
853
  ):
793
- config = LayoutLMv2Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
794
- self.model = LayoutLMv2ForSequenceClassification.from_pretrained(
795
- pretrained_model_name_or_path=path_weights, config=config
796
- )
854
+ self.name = self.get_name(path_weights, "LayoutLMv2")
855
+ self.model_id = self.get_model_id()
856
+ self.model = self.get_wrapped_model(path_config_json, path_weights)
797
857
  super().__init__(path_config_json, path_weights, categories, device)
798
858
 
799
859
  def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> SequenceClassResult:
@@ -818,6 +878,20 @@ class HFLayoutLmv2SequenceClassifier(HFLayoutLmSequenceClassifierBase):
818
878
  """
819
879
  return {"image_width": 224, "image_height": 224}
820
880
 
881
+ @staticmethod
882
+ def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
883
+ """
884
+ Get the inner (wrapped) model.
885
+
886
+ :param path_config_json: path to .json config file
887
+ :param path_weights: path to model artifact
888
+ :return: 'nn.Module'
889
+ """
890
+ config = LayoutLMv2Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
891
+ return LayoutLMv2ForSequenceClassification.from_pretrained(
892
+ pretrained_model_name_or_path=path_weights, config=config
893
+ )
894
+
821
895
 
822
896
  class HFLayoutLmv3SequenceClassifier(HFLayoutLmSequenceClassifierBase):
823
897
  """
@@ -857,10 +931,9 @@ class HFLayoutLmv3SequenceClassifier(HFLayoutLmSequenceClassifierBase):
857
931
  categories: Mapping[str, TypeOrStr],
858
932
  device: Optional[Literal["cpu", "cuda"]] = None,
859
933
  ):
860
- config = LayoutLMv3Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
861
- self.model = LayoutLMv3ForSequenceClassification.from_pretrained(
862
- pretrained_model_name_or_path=path_weights, config=config
863
- )
934
+ self.name = self.get_name(path_weights, "LayoutLMv3")
935
+ self.model_id = self.get_model_id()
936
+ self.model = self.get_wrapped_model(path_config_json, path_weights)
864
937
  super().__init__(path_config_json, path_weights, categories, device)
865
938
 
866
939
  def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> SequenceClassResult:
@@ -890,3 +963,17 @@ class HFLayoutLmv3SequenceClassifier(HFLayoutLmSequenceClassifierBase):
890
963
  "pixel_mean": np.array(IMAGENET_DEFAULT_MEAN, dtype=np.float32),
891
964
  "pixel_std": np.array(IMAGENET_DEFAULT_STD, dtype=np.float32),
892
965
  }
966
+
967
+ @staticmethod
968
+ def get_wrapped_model(path_config_json: str, path_weights: str) -> Any:
969
+ """
970
+ Get the inner (wrapped) model.
971
+
972
+ :param path_config_json: path to .json config file
973
+ :param path_weights: path to model artifact
974
+ :return: 'nn.Module'
975
+ """
976
+ config = LayoutLMv3Config.from_pretrained(pretrained_model_name_or_path=path_config_json)
977
+ return LayoutLMv3ForSequenceClassification.from_pretrained(
978
+ pretrained_model_name_or_path=path_weights, config=config
979
+ )
@@ -65,7 +65,8 @@ class PdfPlumberTextDetector(PdfMiner):
65
65
  """
66
66
 
67
67
  def __init__(self) -> None:
68
- self.name = "pdfplumber"
68
+ self.name = "Pdfplumber"
69
+ self.model_id = self.get_model_id()
69
70
  self.categories = {"1": LayoutType.word}
70
71
 
71
72
  def predict(self, pdf_bytes: bytes) -> List[DetectionResult]:
@@ -20,6 +20,7 @@ Torch related utils
20
20
  """
21
21
 
22
22
 
23
+ from ...utils.error import DependencyError
23
24
  from ...utils.file_utils import pytorch_available
24
25
 
25
26
 
@@ -31,7 +32,7 @@ def set_torch_auto_device() -> "torch.device": # type: ignore
31
32
  from torch import cuda, device # pylint: disable=C0415
32
33
 
33
34
  return device("cuda" if cuda.is_available() else "cpu")
34
- raise ModuleNotFoundError("Pytorch must be installed")
35
+ raise DependencyError("Pytorch must be installed")
35
36
 
36
37
 
37
38
  def get_num_gpu() -> int:
@@ -45,4 +46,4 @@ def get_num_gpu() -> int:
45
46
  from torch import cuda # pylint: disable=C0415
46
47
 
47
48
  return cuda.device_count()
48
- raise ModuleNotFoundError("Pytorch must be installed")
49
+ raise DependencyError("Pytorch must be installed")
@@ -19,21 +19,24 @@
19
19
  Tesseract OCR engine for text extraction
20
20
  """
21
21
  import shlex
22
+ import string
22
23
  import subprocess
23
24
  import sys
24
25
  from errno import ENOENT
25
26
  from itertools import groupby
26
27
  from os import environ
27
- from typing import Any, Dict, List, Optional, Union
28
+ from typing import Any, Dict, List, Mapping, Optional, Union
28
29
 
29
- import numpy as np
30
+ from packaging.version import InvalidVersion, Version, parse
30
31
 
31
32
  from ..utils.context import save_tmp_file, timeout_manager
32
33
  from ..utils.detection_types import ImageType, Requirement
33
- from ..utils.file_utils import _TESS_PATH, TesseractNotFound, get_tesseract_requirement
34
+ from ..utils.error import DependencyError, TesseractError
35
+ from ..utils.file_utils import _TESS_PATH, get_tesseract_requirement
34
36
  from ..utils.metacfg import config_to_cli_str, set_config_by_yaml
35
- from ..utils.settings import LayoutType, ObjectTypes
36
- from .base import DetectionResult, ObjectDetector, PredictorBase
37
+ from ..utils.settings import LayoutType, ObjectTypes, PageType
38
+ from ..utils.viz import viz_handler
39
+ from .base import DetectionResult, ImageTransformer, ObjectDetector, PredictorBase
37
40
 
38
41
  # copy and paste with some light modifications from https://github.com/madmaze/pytesseract/tree/master/pytesseract
39
42
 
@@ -57,18 +60,6 @@ _LANG_CODE_TO_TESS_LANG_CODE = {
57
60
  }
58
61
 
59
62
 
60
- class TesseractError(RuntimeError):
61
- """
62
- Tesseract Error
63
- """
64
-
65
- def __init__(self, status: int, message: str) -> None:
66
- super().__init__()
67
- self.status = status
68
- self.message = message
69
- self.args = (status, message)
70
-
71
-
72
63
  def _subprocess_args() -> Dict[str, Any]:
73
64
  # See https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess
74
65
  # for reference and comments.
@@ -109,7 +100,7 @@ def _run_tesseract(tesseract_args: List[str]) -> None:
109
100
  except OSError as error:
110
101
  if error.errno != ENOENT:
111
102
  raise error from error
112
- raise TesseractNotFound("Tesseract not found. Please install or add to your PATH.") from error
103
+ raise DependencyError("Tesseract not found. Please install or add to your PATH.") from error
113
104
 
114
105
  with timeout_manager(proc, 0) as error_string:
115
106
  if proc.returncode:
@@ -119,6 +110,50 @@ def _run_tesseract(tesseract_args: List[str]) -> None:
119
110
  )
120
111
 
121
112
 
113
+ def get_tesseract_version() -> Version:
114
+ """
115
+ Returns Version object of the Tesseract version
116
+ """
117
+ try:
118
+ output = subprocess.check_output(
119
+ ["tesseract", "--version"],
120
+ stderr=subprocess.STDOUT,
121
+ env=environ,
122
+ stdin=subprocess.DEVNULL,
123
+ )
124
+ except OSError as error:
125
+ raise DependencyError("Tesseract not found. Please install or add to your PATH.") from error
126
+
127
+ raw_version = output.decode("utf-8")
128
+ str_version, *_ = raw_version.lstrip(string.printable[10:]).partition(" ")
129
+ str_version, *_ = str_version.partition("-")
130
+
131
+ try:
132
+ version = parse(str_version)
133
+ assert version >= Version("3.05")
134
+ except (AssertionError, InvalidVersion) as error:
135
+ raise SystemExit(f'Invalid tesseract version: "{raw_version}"') from error
136
+
137
+ return version
138
+
139
+
140
+ def image_to_angle(image: ImageType) -> Mapping[str, str]:
141
+ """
142
+ Generating a tmp file and running tesseract to get the orientation of the image.
143
+
144
+ :param image: Image in np.array.
145
+ :return: A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'.
146
+ """
147
+ with save_tmp_file(image, "tess_") as (tmp_name, input_file_name):
148
+ _run_tesseract(_input_to_cli_str("osd", "--psm 0", 0, input_file_name, tmp_name))
149
+ with open(tmp_name + ".osd", "rb") as output_file:
150
+ output = output_file.read().decode("utf-8")
151
+
152
+ return {
153
+ key_value[0]: key_value[1] for key_value in (line.split(": ") for line in output.split("\n") if len(line) >= 2)
154
+ }
155
+
156
+
122
157
  def image_to_dict(image: ImageType, lang: str, config: str) -> Dict[str, List[Union[str, int, float]]]:
123
158
  """
124
159
  This is more or less pytesseract.image_to_data with a dict as returned value.
@@ -220,7 +255,6 @@ def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool,
220
255
  :return: A list of tesseract extractions wrapped in DetectionResult
221
256
  """
222
257
 
223
- np_img = np_img.astype(np.uint8)
224
258
  results = image_to_dict(np_img, supported_languages, config)
225
259
  all_results = []
226
260
 
@@ -249,6 +283,16 @@ def predict_text(np_img: ImageType, supported_languages: str, text_lines: bool,
249
283
  return all_results
250
284
 
251
285
 
286
+ def predict_rotation(np_img: ImageType) -> Mapping[str, str]:
287
+ """
288
+ Predicts the rotation of an image using the Tesseract OCR engine.
289
+
290
+ :param np_img: numpy array of the image
291
+ :return: A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'
292
+ """
293
+ return image_to_angle(np_img)
294
+
295
+
252
296
  class TesseractOcrDetector(ObjectDetector):
253
297
  """
254
298
  Text object detector based on Tesseracts OCR engine. Note that tesseract has to be installed separately.
@@ -292,7 +336,9 @@ class TesseractOcrDetector(ObjectDetector):
292
336
  :param config_overwrite: Overwrite config parameters defined by the yaml file with new values.
293
337
  E.g. ["oem=14"]
294
338
  """
295
- self.name = _TESS_PATH
339
+ self.name = self.get_name()
340
+ self.model_id = self.get_model_id()
341
+
296
342
  if config_overwrite is None:
297
343
  config_overwrite = []
298
344
 
@@ -316,13 +362,13 @@ class TesseractOcrDetector(ObjectDetector):
316
362
  :param np_img: image as numpy array
317
363
  :return: A list of DetectionResult
318
364
  """
319
- detection_results = predict_text(
365
+
366
+ return predict_text(
320
367
  np_img,
321
368
  supported_languages=self.config.LANGUAGES,
322
369
  text_lines=self.config.LINES,
323
370
  config=config_to_cli_str(self.config, "LANGUAGES", "LINES"),
324
371
  )
325
- return detection_results
326
372
 
327
373
  @classmethod
328
374
  def get_requirements(cls) -> List[Requirement]:
@@ -342,3 +388,69 @@ class TesseractOcrDetector(ObjectDetector):
342
388
  :param language: `Languages`
343
389
  """
344
390
  self.config.LANGUAGES = _LANG_CODE_TO_TESS_LANG_CODE.get(language, language.value)
391
+
392
+ @staticmethod
393
+ def get_name() -> str:
394
+ """Returns the name of the model"""
395
+ return f"Tesseract_{get_tesseract_version()}"
396
+
397
+
398
+ class TesseractRotationTransformer(ImageTransformer):
399
+ """
400
+ The `TesseractRotationTransformer` class is a specialized image transformer that is designed to handle image
401
+ rotation in the context of Optical Character Recognition (OCR) tasks. It inherits from the `ImageTransformer`
402
+ base class and implements methods for predicting and applying rotation transformations to images.
403
+
404
+ The `predict` method determines the angle of the rotated image. It can only handle angles that are multiples of 90
405
+ degrees.
406
+ This method uses the Tesseract OCR engine to predict the rotation angle of an image.
407
+
408
+ The `transform` method applies the predicted rotation to the image, effectively rotating the image backwards.
409
+ This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
410
+
411
+ This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
412
+ The class also provides methods for cloning itself and for getting the requirements of the Tesseract OCR system.
413
+
414
+ **Example:**
415
+ transformer = TesseractRotationTransformer()
416
+ detection_result = transformer.predict(np_img)
417
+ rotated_image = transformer.transform(np_img, detection_result)
418
+ """
419
+
420
+ def __init__(self) -> None:
421
+ self.name = _TESS_PATH + "-rotation"
422
+
423
+ def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
424
+ """
425
+ Applies the predicted rotation to the image, effectively rotating the image backwards.
426
+ This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
427
+
428
+ :param np_img: The input image as a numpy array.
429
+ :param specification: A `DetectionResult` object containing the predicted rotation angle.
430
+ :return: The rotated image as a numpy array.
431
+ """
432
+ return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
433
+
434
+ def predict(self, np_img: ImageType) -> DetectionResult:
435
+ """
436
+ Determines the angle of the rotated image. It can only handle angles that are multiples of 90 degrees.
437
+ This method uses the Tesseract OCR engine to predict the rotation angle of an image.
438
+
439
+ :param np_img: The input image as a numpy array.
440
+ :return: A `DetectionResult` object containing the predicted rotation angle and confidence.
441
+ """
442
+ output_dict = predict_rotation(np_img)
443
+ return DetectionResult(
444
+ angle=float(output_dict["Orientation in degrees"]), score=float(output_dict["Orientation confidence"])
445
+ )
446
+
447
+ @classmethod
448
+ def get_requirements(cls) -> List[Requirement]:
449
+ return [get_tesseract_requirement()]
450
+
451
+ def clone(self) -> PredictorBase:
452
+ return self.__class__()
453
+
454
+ @staticmethod
455
+ def possible_category() -> PageType:
456
+ return PageType.angle
@@ -120,6 +120,8 @@ class TextractOcrDetector(ObjectDetector):
120
120
  :param credentials_kwargs: `aws_access_key_id`, `aws_secret_access_key` or `aws_session_token`
121
121
  """
122
122
  self.name = "textract"
123
+ self.model_id = self.get_model_id()
124
+
123
125
  self.text_lines = text_lines
124
126
  self.client = boto3.client("textract", **credentials_kwargs)
125
127
  if self.text_lines:
@@ -55,7 +55,7 @@ class ModelDescWithConfig(ModelDesc, ABC): # type: ignore
55
55
 
56
56
  :return: Tuple of list input and list output names. The names must coincide with tensor within the model.
57
57
  """
58
- raise NotImplementedError
58
+ raise NotImplementedError()
59
59
 
60
60
 
61
61
  class TensorpackPredictor(ABC):
@@ -106,14 +106,14 @@ class TensorpackPredictor(ABC):
106
106
 
107
107
  @staticmethod
108
108
  @abstractmethod
109
- def set_model(
109
+ def get_wrapped_model(
110
110
  path_yaml: str, categories: Mapping[str, ObjectTypes], config_overwrite: Union[List[str], None]
111
111
  ) -> ModelDescWithConfig:
112
112
  """
113
113
  Implement the config generation, its modification and instantiate a version of the model. See
114
114
  `pipe.tpfrcnn.TPFrcnnDetector` for an example
115
115
  """
116
- raise NotImplementedError
116
+ raise NotImplementedError()
117
117
 
118
118
  @abstractmethod
119
119
  def predict(self, np_img: Any) -> Any:
@@ -121,7 +121,7 @@ class TensorpackPredictor(ABC):
121
121
  Implement, how `self.tp_predictor` is invoked and raw prediction results are generated. Do use only raw
122
122
  objects and nothing, which is related to the DD API.
123
123
  """
124
- raise NotImplementedError
124
+ raise NotImplementedError()
125
125
 
126
126
  @property
127
127
  def model(self) -> ModelDescWithConfig:
@@ -18,6 +18,7 @@ from tensorpack.dataflow.imgaug import AugmentorList, ImageAugmentor
18
18
 
19
19
  from ....datapoint.convert import box_to_point4, point4_to_box
20
20
  from ....utils.detection_types import ImageType, JsonDict
21
+ from ....utils.error import MalformedData
21
22
  from ....utils.logger import log_once
22
23
  from .common import filter_boxes_inside_shape, np_iou
23
24
  from .modeling.model_fpn import get_all_anchors_fpn
@@ -27,12 +28,6 @@ from .utils.np_box_ops import ioa as np_ioa
27
28
  # pylint: enable=import-error
28
29
 
29
30
 
30
- class MalformedData(BaseException):
31
- """
32
- Exception class for malformed data
33
- """
34
-
35
-
36
31
  def augment(dp: JsonDict, imgaug_list: List[ImageAugmentor], add_mask: bool) -> JsonDict:
37
32
  """
38
33
  Augment an image according to a list of augmentors.
@@ -62,7 +57,7 @@ def augment(dp: JsonDict, imgaug_list: List[ImageAugmentor], add_mask: bool) ->
62
57
  assert np.min(np_area(gt_boxes)) > 0, "some boxes have zero area"
63
58
 
64
59
  if add_mask:
65
- raise NotImplementedError
60
+ raise NotImplementedError()
66
61
 
67
62
  return dp
68
63