deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
  120. deepdoctection/analyzer/_config.py +0 -146
  121. deepdoctection-0.42.0.dist-info/METADATA +0 -431
  122. deepdoctection-0.42.0.dist-info/RECORD +0 -148
  123. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -48,18 +48,23 @@ def _to_detect_result(word: dict[str, str], class_name: ObjectTypes) -> Detectio
48
48
 
49
49
  class PdfPlumberTextDetector(PdfMiner):
50
50
  """
51
- Text miner based on the pdfminer.six engine. To convert pdfminers result, especially group character to get word
52
- level results we use pdfplumber.
51
+ Text miner based on the `pdfminer.six` engine. To convert `pdfminers` result, especially group character to get word
52
+ level results we use `pdfplumber`.
53
53
 
54
+ Example:
55
+ ```python
54
56
  pdf_plumber = PdfPlumberTextDetector()
55
57
  df = SerializerPdfDoc.load("path/to/document.pdf")
56
58
  df.reset_state()
57
59
 
58
60
  for dp in df:
59
61
  detection_results = pdf_plumber.predict(dp["pdf_bytes"])
62
+ ```
60
63
 
61
64
  To use it in a more integrated way:
62
65
 
66
+ Example:
67
+ ```python
63
68
  pdf_plumber = PdfPlumberTextDetector()
64
69
  text_extract = TextExtractionService(pdf_plumber)
65
70
 
@@ -70,7 +75,7 @@ class PdfPlumberTextDetector(PdfMiner):
70
75
 
71
76
  for dp in df:
72
77
  ...
73
-
78
+ ```
74
79
  """
75
80
 
76
81
  def __init__(self, x_tolerance: int = 3, y_tolerance: int = 3) -> None:
@@ -83,10 +88,13 @@ class PdfPlumberTextDetector(PdfMiner):
83
88
 
84
89
  def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
85
90
  """
86
- Call pdfminer.six and returns detected text as detection results
91
+ Call `pdfminer.six` and returns detected text as `DetectionResult`
92
+
93
+ Args:
94
+ pdf_bytes: bytes of a single pdf page
87
95
 
88
- :param pdf_bytes: bytes of a single pdf page
89
- :return: A list of DetectionResult
96
+ Returns:
97
+ A list of `DetectionResult`
90
98
  """
91
99
 
92
100
  with save_tmp_file(pdf_bytes, "pdf_") as (tmp_name, _):
@@ -104,8 +112,12 @@ class PdfPlumberTextDetector(PdfMiner):
104
112
  def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
105
113
  """
106
114
  Get the width and height of the full page
107
- :param pdf_bytes: pdf_bytes generating the pdf
108
- :return: width and height
115
+
116
+ Args:
117
+ pdf_bytes: `pdf_bytes` generating the pdf
118
+
119
+ Returns:
120
+ `(width,height)`
109
121
  """
110
122
 
111
123
  if self._pdf_bytes == pdf_bytes and self._page is not None:
@@ -126,15 +138,20 @@ class Pdfmium2TextDetector(PdfMiner):
126
138
  """
127
139
  Text miner based on the pypdfium2 engine. It will return text on text line level and not on word level
128
140
 
141
+ Example:
142
+ ```python
129
143
  pdfmium2 = Pdfmium2TextDetector()
130
144
  df = SerializerPdfDoc.load("path/to/document.pdf")
131
145
  df.reset_state()
132
146
 
133
147
  for dp in df:
134
148
  detection_results = pdfmium2.predict(dp["pdf_bytes"])
149
+ ```
135
150
 
136
151
  To use it in a more integrated way:
137
152
 
153
+ Example:
154
+ ```python
138
155
  pdfmium2 = Pdfmium2TextDetector()
139
156
  text_extract = TextExtractionService(pdfmium2)
140
157
 
@@ -144,6 +161,7 @@ class Pdfmium2TextDetector(PdfMiner):
144
161
  df.reset_state()
145
162
  for dp in df:
146
163
  ...
164
+ ```
147
165
 
148
166
  """
149
167
 
@@ -157,8 +175,11 @@ class Pdfmium2TextDetector(PdfMiner):
157
175
  """
158
176
  Call pypdfium2 and returns detected text as detection results
159
177
 
160
- :param pdf_bytes: bytes of a single pdf page
161
- :return: A list of DetectionResult
178
+ Args:
179
+ pdf_bytes: bytes of a single pdf page
180
+
181
+ Returns:
182
+ A list of `DetectionResult`
162
183
  """
163
184
 
164
185
  pdf = PdfDocument(pdf_bytes)
@@ -188,8 +209,12 @@ class Pdfmium2TextDetector(PdfMiner):
188
209
  def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
189
210
  """
190
211
  Get the width and height of the full page
191
- :param pdf_bytes: pdf_bytes generating the pdf
192
- :return: width and height
212
+
213
+ Args:
214
+ pdf_bytes: `pdf_bytes` generating the pdf
215
+
216
+ Returns:
217
+ `(width,height)`
193
218
  """
194
219
 
195
220
  if self._pdf_bytes == pdf_bytes and self._page is not None:
@@ -30,7 +30,21 @@ with try_import() as import_guard:
30
30
  # Copy & paste from https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/nms.py
31
31
  def batched_nms(boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float) -> torch.Tensor:
32
32
  """
33
- Same as torchvision.ops.boxes.batched_nms, but with float().
33
+ Same as `torchvision.ops.boxes.batched_nms`, but with `float()`.
34
+
35
+ Args:
36
+ boxes: A `torch.Tensor` of shape (N, 4) containing bounding boxes.
37
+ scores: A `torch.Tensor` of shape (N,) containing scores for each box.
38
+ idxs: A `torch.Tensor` of shape (N,) containing the class indices for each box.
39
+ iou_threshold: A float representing the IoU threshold for suppression.
40
+
41
+ Returns:
42
+ A `torch.Tensor` containing the indices of the boxes to keep.
43
+
44
+ Note:
45
+ `Fp16` does not have enough range for batched NMS, so `float()` is used.
46
+ Torchvision already has a strategy to decide whether to use coordinate trick or for loop to implement
47
+ `batched_nms`.
34
48
  """
35
49
  assert boxes.shape[-1] == 4
36
50
  # Note: Torchvision already has a strategy (https://github.com/pytorch/vision/issues/1311)
@@ -33,19 +33,23 @@ with try_import() as import_guard:
33
33
 
34
34
  def get_torch_device(device: Optional[Union[str, torch.device]] = None) -> torch.device:
35
35
  """
36
- Selecting a device on which to load a model. The selection follows a cascade of priorities:
36
+ Select a device on which to load a model. The selection follows a cascade of priorities:
37
37
 
38
- - If a device string is provided, it is used.
39
- - If the environment variable "USE_CUDA" is set, a GPU is used. If more GPUs are available, it will use all of them
40
- unless something else is specified by CUDA_VISIBLE_DEVICES:
38
+ If a device string is provided, it is used. If the environment variable `USE_CUDA` is set, a GPU is used.
39
+ If more GPUs are available, it will use all of them unless something else is specified by `CUDA_VISIBLE_DEVICES`.
41
40
 
42
- https://stackoverflow.com/questions/54216920/how-to-use-multiple-gpus-in-pytorch
41
+ See: <https://stackoverflow.com/questions/54216920/how-to-use-multiple-gpus-in-pytorch>
43
42
 
44
- - If an MPS device is available, it is used.
45
- - Otherwise, the CPU is used.
43
+ If an MPS device is available, it is used. Otherwise, the CPU is used.
46
44
 
47
- :param device: Device either as string or torch.device
48
- :return: Tensorflow device
45
+ Args:
46
+ device: Device either as string or torch.device.
47
+
48
+ Returns:
49
+ torch.device: The selected device.
50
+
51
+ Note:
52
+ The function checks the environment variables `USE_CUDA` and `USE_MPS` to determine device preference.
49
53
  """
50
54
  if device is not None:
51
55
  if isinstance(device, torch.device):
@@ -16,7 +16,7 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Tesseract OCR engine for text extraction
19
+ Tesseract OCR engine
20
20
  """
21
21
  from __future__ import annotations
22
22
 
@@ -115,7 +115,8 @@ def _run_tesseract(tesseract_args: list[str]) -> None:
115
115
 
116
116
  def get_tesseract_version() -> Version:
117
117
  """
118
- Returns Version object of the Tesseract version
118
+ Returns:
119
+ Version of the installed tesseract engine.
119
120
  """
120
121
  try:
121
122
  output = subprocess.check_output(
@@ -142,10 +143,12 @@ def get_tesseract_version() -> Version:
142
143
 
143
144
  def image_to_angle(image: PixelValues) -> Mapping[str, str]:
144
145
  """
145
- Generating a tmp file and running tesseract to get the orientation of the image.
146
+ Generating a tmp file and running Tesseract to get the orientation of the image.
146
147
 
147
- :param image: Image in np.array.
148
- :return: A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'.
148
+ Args:
149
+ image: Image an `np.array`
150
+ Returns:
151
+ A dict with keys 'Orientation in degrees' and 'Orientation confidence'.
149
152
  """
150
153
  with save_tmp_file(image, "tess_") as (tmp_name, input_file_name):
151
154
  _run_tesseract(_input_to_cli_str("osd", "--psm 0", 0, input_file_name, tmp_name))
@@ -159,7 +162,7 @@ def image_to_angle(image: PixelValues) -> Mapping[str, str]:
159
162
 
160
163
  def image_to_dict(image: PixelValues, lang: str, config: str) -> dict[str, list[Union[str, int, float]]]:
161
164
  """
162
- This is more or less pytesseract.image_to_data with a dict as returned value.
165
+ This is more or less `pytesseract.image_to_data` with a dict as returned value.
163
166
  What happens under the hood is:
164
167
 
165
168
  - saving an image file
@@ -167,13 +170,17 @@ def image_to_dict(image: PixelValues, lang: str, config: str) -> dict[str, list[
167
170
  - saving a temp .tsv file with predicted results
168
171
  - reading the .tsv file and returning the results as dict.
169
172
 
170
- Requires Tesseract 3.05+
173
+ Note:
174
+ Requires Tesseract or 3.05 or higher
171
175
 
172
- :param image: Image in np.array.
173
- :param lang: String of language
174
- :param config: string of configs
175
- :return: Dictionary with keys 'left', 'top', 'width', 'height' (bounding box coords), 'conf' (confidence), 'text'
176
- (captured text), 'block_num' (block number) and 'lin_num' (line number).
176
+ Args:
177
+ image: Image in np.array.
178
+ lang: String of language
179
+ config: string of configs
180
+
181
+ Returns:
182
+ Dictionary with keys `left`, `top`, `width`, `height` (bounding box coords), `conf` (confidence), `text`
183
+ (captured text), `block_num` (block number) and `lin_num` (line number).
177
184
  """
178
185
 
179
186
  with save_tmp_file(image, "tess_") as (tmp_name, input_file_name):
@@ -213,10 +220,14 @@ def image_to_dict(image: PixelValues, lang: str, config: str) -> dict[str, list[
213
220
 
214
221
  def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
215
222
  """
216
- Generating text line DetectionResult based on Tesseract word grouping. It generates line bounding boxes from
223
+ Generating text line `DetectionResult`s based on Tesseract word grouping. It generates line bounding boxes from
217
224
  word bounding boxes.
218
- :param detect_result_list: A list of detection result
219
- :return: An extended list of detection result
225
+
226
+ Args:
227
+ detect_result_list: A list of `DetectionResult`s
228
+
229
+ Returns:
230
+ An extended list of `DetectionResult`s
220
231
  """
221
232
 
222
233
  line_detect_result: list[DetectionResult] = []
@@ -247,15 +258,18 @@ def tesseract_line_to_detectresult(detect_result_list: list[DetectionResult]) ->
247
258
 
248
259
  def predict_text(np_img: PixelValues, supported_languages: str, text_lines: bool, config: str) -> list[DetectionResult]:
249
260
  """
250
- Calls tesseract directly with some given configs. Requires Tesseract to be installed.
251
-
252
- :param np_img: Image in np.array.
253
- :param supported_languages: To improve ocr extraction quality it is helpful to pre-select the language of the
254
- detected text, if this in known in advance. Combinations are possible, e.g. "deu",
255
- "fr+eng".
256
- :param text_lines: If True, it will return DetectionResults of Text lines as well.
257
- :param config: The config parameter passing to Tesseract. Consult also https://guides.nyu.edu/tesseract/usage
258
- :return: A list of tesseract extractions wrapped in DetectionResult
261
+ Calls Tesseract directly with some given configs. Requires Tesseract to be installed.
262
+
263
+ Args:
264
+ np_img: Image in `np.array`.
265
+ supported_languages: To improve OCR extraction quality it is helpful to pre-select the language of the
266
+ detected text, if this in known in advance. Combinations are possible, e.g. `deu`,
267
+ `fr+eng`.
268
+ text_lines: If `True`, it will return `DetectionResult`s of text lines as well.
269
+ config: The config parameter passing to Tesseract. Consult also <https://guides.nyu.edu/tesseract/usage>
270
+
271
+ Returns:
272
+ A list of Tesseract extractions wrapped in `DetectionResult`
259
273
  """
260
274
 
261
275
  results = image_to_dict(np_img, supported_languages, config)
@@ -290,31 +304,37 @@ def predict_rotation(np_img: PixelValues) -> Mapping[str, str]:
290
304
  """
291
305
  Predicts the rotation of an image using the Tesseract OCR engine.
292
306
 
293
- :param np_img: numpy array of the image
294
- :return: A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'
307
+ Args:
308
+ np_img: numpy array of the image
309
+
310
+ Returns:
311
+ A dictionary with keys 'Orientation in degrees' and 'Orientation confidence'
295
312
  """
296
313
  return image_to_angle(np_img)
297
314
 
298
315
 
299
316
  class TesseractOcrDetector(ObjectDetector):
300
317
  """
301
- Text object detector based on Tesseracts OCR engine. Note that tesseract has to be installed separately.
318
+ Text object detector based on Tesseracts OCR engine.
302
319
 
303
- The current Tesseract release is 4.1.1. A version 5.xx can be integrated via direct installation at
304
- https://github.com/tesseract-ocr/tesseract. Building from source is necessary here.
320
+ Note:
321
+ Tesseract has to be installed separately. <https://tesseract-ocr.github.io/>
305
322
 
306
- Documentation can be found here: https://tesseract-ocr.github.io/
307
-
308
- All configuration options that are available via pytesseract can be given via the configuration. The best overview
309
- can be found at https://pypi.org/project/pytesseract/.
323
+ All configuration options that are available via pytesseract can be added to the configuration file:
324
+ <https://pypi.org/project/pytesseract/.>
310
325
 
326
+ Example:
327
+ ```python
311
328
  tesseract_config_path = ModelCatalog.get_full_path_configs("dd/conf_tesseract.yaml")
312
329
  ocr_detector = TesseractOcrDetector(tesseract_config_path)
313
330
 
314
331
  detection_result = ocr_detector.predict(bgr_image_as_np_array)
332
+ ```
315
333
 
316
334
  To use it within a pipeline
317
335
 
336
+ Example:
337
+ ```python
318
338
  tesseract_config_path = ModelCatalog.get_full_path_configs("dd/conf_tesseract.yaml")
319
339
  ocr_detector = TesseractOcrDetector(tesseract_config_path)
320
340
 
@@ -325,6 +345,7 @@ class TesseractOcrDetector(ObjectDetector):
325
345
 
326
346
  for dp in df:
327
347
  ...
348
+ ```
328
349
  """
329
350
 
330
351
  def __init__(
@@ -333,11 +354,12 @@ class TesseractOcrDetector(ObjectDetector):
333
354
  config_overwrite: Optional[list[str]] = None,
334
355
  ):
335
356
  """
336
- Set up the configuration which is stored in a yaml-file, that need to be passed through.
357
+ Set up the configuration which is stored in a `.yaml` file, that need to be passed through.
337
358
 
338
- :param path_yaml: The path to the yaml config
339
- :param config_overwrite: Overwrite config parameters defined by the yaml file with new values.
340
- E.g. ["oem=14"]
359
+ Args:
360
+ path_yaml: The path to the yaml config
361
+ config_overwrite: Overwrite config parameters defined by the yaml file with new values.
362
+ E.g. `["oem=14"]`
341
363
  """
342
364
  self.name = self.get_name()
343
365
  self.model_id = self.get_model_id()
@@ -362,8 +384,11 @@ class TesseractOcrDetector(ObjectDetector):
362
384
  """
363
385
  Transfer of a numpy array and call of pytesseract. Return of the detection results.
364
386
 
365
- :param np_img: image as numpy array
366
- :return: A list of DetectionResult
387
+ Args:
388
+ np_img: image as `np.array`
389
+
390
+ Returns:
391
+ A list of `DetectionResult`
367
392
  """
368
393
 
369
394
  return predict_text(
@@ -386,7 +411,10 @@ class TesseractOcrDetector(ObjectDetector):
386
411
  def set_language(self, language: ObjectTypes) -> None:
387
412
  """
388
413
  Pass a language to change the model selection. For runtime language selection.
389
- :param language: `Languages`
414
+
415
+ Args:
416
+ language: One of the following: `fre`,`dut`,`chi`,`cze`,`per`,`gre`,`mac`,`rum`,`arm`,
417
+ `geo`,`war`,`glg`,`slv`,`alb`,`nn`.
390
418
  """
391
419
  self.config.LANGUAGES = _LANG_CODE_TO_TESS_LANG_CODE.get(language, language.value)
392
420
 
@@ -398,13 +426,11 @@ class TesseractOcrDetector(ObjectDetector):
398
426
 
399
427
  class TesseractRotationTransformer(ImageTransformer):
400
428
  """
401
- The `TesseractRotationTransformer` class is a specialized image transformer that is designed to handle image
402
- rotation in the context of Optical Character Recognition (OCR) tasks. It inherits from the `ImageTransformer`
403
- base class and implements methods for predicting and applying rotation transformations to images.
429
+ The `TesseractRotationTransformer` is designed to handle image rotations.. It inherits from the `ImageTransformer`
430
+ base class and implements methods for predicting and applying rotation transformations.
404
431
 
405
432
  The `predict` method determines the angle of the rotated image. It can only handle angles that are multiples of 90
406
- degrees.
407
- This method uses the Tesseract OCR engine to predict the rotation angle of an image.
433
+ degrees. This method uses the Tesseract OCR engine to predict the rotation angle of an image.
408
434
 
409
435
  The `transform` method applies the predicted rotation to the image, effectively rotating the image backwards.
410
436
  This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
@@ -412,10 +438,12 @@ class TesseractRotationTransformer(ImageTransformer):
412
438
  This class can be particularly useful in OCR tasks where the orientation of the text in the image matters.
413
439
  The class also provides methods for cloning itself and for getting the requirements of the Tesseract OCR system.
414
440
 
415
- **Example:**
416
- transformer = TesseractRotationTransformer()
417
- detection_result = transformer.predict(np_img)
418
- rotated_image = transformer.transform(np_img, detection_result)
441
+ Example:
442
+ ```python
443
+ transformer = TesseractRotationTransformer()
444
+ detection_result = transformer.predict(np_img)
445
+ rotated_image = transformer.transform(np_img, detection_result)
446
+ ```
419
447
  """
420
448
 
421
449
  def __init__(self) -> None:
@@ -428,9 +456,12 @@ class TesseractRotationTransformer(ImageTransformer):
428
456
  Applies the predicted rotation to the image, effectively rotating the image backwards.
429
457
  This method uses either the Pillow library or OpenCV for the rotation operation, depending on the configuration.
430
458
 
431
- :param np_img: The input image as a numpy array.
432
- :param specification: A `DetectionResult` object containing the predicted rotation angle.
433
- :return: The rotated image as a numpy array.
459
+ Args:
460
+ np_img: The input image as a numpy array.
461
+ specification: A `DetectionResult` object containing the predicted rotation angle.
462
+
463
+ Returns:
464
+ The rotated image as a numpy array.
434
465
  """
435
466
  return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
436
467
 
@@ -439,8 +470,10 @@ class TesseractRotationTransformer(ImageTransformer):
439
470
  Determines the angle of the rotated image. It can only handle angles that are multiples of 90 degrees.
440
471
  This method uses the Tesseract OCR engine to predict the rotation angle of an image.
441
472
 
442
- :param np_img: The input image as a numpy array.
443
- :return: A `DetectionResult` object containing the predicted rotation angle and confidence.
473
+ Args:
474
+ np_img: The input image as a numpy array.
475
+ Returns:
476
+ A `DetectionResult` object containing the predicted rotation angle and confidence.
444
477
  """
445
478
  output_dict = predict_rotation(np_img)
446
479
  return DetectionResult(
@@ -60,15 +60,18 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
60
60
  return all_results
61
61
 
62
62
 
63
- def predict_text(np_img: PixelValues, client, text_lines: bool) -> list[DetectionResult]: # type: ignore
63
+ def predict_text(np_img: PixelValues, client: boto3.client, text_lines: bool) -> list[DetectionResult]: # type: ignore
64
64
  """
65
65
  Calls AWS Textract client (`detect_document_text`) and returns plain OCR results.
66
66
  AWS account required.
67
67
 
68
- :param client: botocore textract client
69
- :param np_img: Image in np.array.
70
- :param text_lines: If True, it will return DetectionResults of Text lines as well.
71
- :return: A list of textract extractions wrapped in DetectionResult
68
+ Args:
69
+ np_img: Image in `np.array`.
70
+ client: botocore textract client
71
+ text_lines: If `True`, it will return `DetectionResult`s of Text lines as well.
72
+
73
+ Returns:
74
+ A list of `DetectionResult`
72
75
  """
73
76
 
74
77
  width, height = np_img.shape[1], np_img.shape[0]
@@ -95,16 +98,23 @@ def predict_text(np_img: PixelValues, client, text_lines: bool) -> list[Detectio
95
98
  class TextractOcrDetector(ObjectDetector):
96
99
  """
97
100
  Text object detector based on AWS Textract OCR engine. Note that an AWS account as well as some additional
98
- installations are required, i.e AWS CLI and boto3. Note further, that the service is not free of charge. Additional
99
- information can be found at: <https://docs.aws.amazon.com/textract/?id=docs_gateway> .
101
+ installations are required, i.e `AWS CLI` and `boto3`.
102
+ Note:
103
+ The service is not free of charge. Additional information can be found at:
104
+ <https://docs.aws.amazon.com/textract/?id=docs_gateway> .
105
+
106
+ The detector only calls the base `OCR` engine and does not return additional Textract document analysis features.
100
107
 
101
- The detector only calls the base OCR engine and does not return additional Textract document analysis features.
108
+ Example:
102
109
 
110
+ ```python
103
111
  textract_predictor = TextractOcrDetector()
104
112
  detection_result = textract_predictor.predict(bgr_image_as_np_array)
113
+ ```
105
114
 
106
- or
115
+ or
107
116
 
117
+ ```python
108
118
  textract_predictor = TextractOcrDetector()
109
119
  text_extract = TextExtractionService(textract_predictor)
110
120
 
@@ -113,13 +123,15 @@ class TextractOcrDetector(ObjectDetector):
113
123
 
114
124
  for dp in df:
115
125
  ...
126
+ ```
116
127
 
117
128
  """
118
129
 
119
130
  def __init__(self, text_lines: bool = False, **credentials_kwargs: str) -> None:
120
131
  """
121
- :param text_lines: If True, it will return DetectionResults of Text lines as well.
122
- :param credentials_kwargs: `aws_access_key_id`, `aws_secret_access_key` or `aws_session_token`
132
+ Args:
133
+ text_lines: If `True`, it will return `DetectionResult`s of Text lines as well.
134
+ credentials_kwargs: `aws_access_key_id`, `aws_secret_access_key` or `aws_session_token`
123
135
  """
124
136
  self.name = "textract"
125
137
  self.model_id = self.get_model_id()
@@ -133,10 +145,13 @@ class TextractOcrDetector(ObjectDetector):
133
145
 
134
146
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
135
147
  """
136
- Transfer of a numpy array and call textract client. Return of the detection results.
148
+ Transfer of a `np.array` and call textract `client`. Return of the `DetectionResult`s.
149
+
150
+ Args:
151
+ np_img: image as `np.array`
137
152
 
138
- :param np_img: image as numpy array
139
- :return: A list of DetectionResult
153
+ Returns:
154
+ A list of `DetectionResult`s
140
155
  """
141
156
 
142
157
  return predict_text(np_img, self.client, self.text_lines)
@@ -37,7 +37,15 @@ with try_import() as tf_import_guard:
37
37
 
38
38
  def is_tfv2() -> bool:
39
39
  """
40
- Returns whether TF is operating in V2 mode.
40
+ Returns whether TensorFlow is operating in V2 mode.
41
+
42
+ Returns:
43
+ Whether TensorFlow is operating in V2 mode.
44
+
45
+ Example:
46
+ ```python
47
+ is_tfv2()
48
+ ```
41
49
  """
42
50
  try:
43
51
  from tensorflow.python import tf2 # pylint: disable=C0415
@@ -49,7 +57,15 @@ def is_tfv2() -> bool:
49
57
 
50
58
  def disable_tfv2() -> bool:
51
59
  """
52
- Disable TF in V2 mode.
60
+ Disables TensorFlow V2 mode.
61
+
62
+ Returns:
63
+ Whether TensorFlow V2 mode was disabled.
64
+
65
+ Example:
66
+ ```python
67
+ disable_tfv2()
68
+ ```
53
69
  """
54
70
 
55
71
  tfv1 = tf.compat.v1
@@ -62,20 +78,32 @@ def disable_tfv2() -> bool:
62
78
 
63
79
  def disable_tp_layer_logging() -> None:
64
80
  """
65
- Disables TP layer logging, if not already set
81
+ Disables tensorpack layer logging, if not already set.
82
+
83
+ Example:
84
+ ```python
85
+ disable_tp_layer_logging()
86
+ ```
66
87
  """
67
88
  disable_layer_logging()
68
89
 
69
90
 
70
91
  def get_tf_device(device: Optional[Union[str, tf.device]] = None) -> tf.device:
71
92
  """
72
- Selecting a device on which to load a model. The selection follows a cascade of priorities:
93
+ Selects a device on which to load a model. The selection follows a cascade of priorities:
94
+
95
+ - If a `device` string is provided, it is used. If the string is "cuda" or "GPU", the first GPU is used.
96
+ - If the environment variable `USE_CUDA` is set, a GPU is used. If more GPUs are available it will use the first
97
+ one.
98
+
99
+ Args:
100
+ device: Device string.
73
101
 
74
- - If a device string is provided, it is used. If the string is "cuda" or "GPU", the first GPU is used.
75
- - If the environment variable "USE_CUDA" is set, a GPU is used. If more GPUs are available it will use the first one
102
+ Returns:
103
+ TensorFlow device.
76
104
 
77
- :param device: Device string
78
- :return: Tensorflow device
105
+ Raises:
106
+ EnvironmentError: If `USE_CUDA` is set but no GPU device is found, or if no CPU device is found.
79
107
  """
80
108
  if device is not None:
81
109
  if isinstance(device, ContextManager):