deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
  120. deepdoctection/analyzer/_config.py +0 -146
  121. deepdoctection-0.42.0.dist-info/METADATA +0 -431
  122. deepdoctection-0.42.0.dist-info/RECORD +0 -148
  123. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -17,8 +17,9 @@
17
17
 
18
18
 
19
19
  """
20
- Abstract classes for unifying external base- and Doctection predictors
20
+ Base classes for unifying external predictors
21
21
  """
22
+
22
23
  from __future__ import annotations
23
24
 
24
25
  from abc import ABC, abstractmethod
@@ -50,15 +51,18 @@ if TYPE_CHECKING:
50
51
  @dataclass
51
52
  class ModelCategories:
52
53
  """
53
- Categories for models (except models for NER tasks) are managed in this class. Different to DatasetCategories,
54
- these members are immutable.
54
+ Categories for models (except models for NER tasks) are managed in this class.
55
+ Different to `DatasetCategories`, these members are immutable.
55
56
 
56
- **Example**:
57
+ Example:
57
58
 
59
+ ```python
58
60
  categories = ModelCategories(init_categories={1: "text", 2: "title"})
59
61
  cats = categories.get_categories(as_dict=True) # {1: LayoutType.text, 2: LayoutType.title}
60
62
  categories.filter_categories = [LayoutType.text] # filter out text
61
63
  cats = categories.get_categories(as_dict=True) # {2: LayoutType.title}
64
+ ```
65
+
62
66
  """
63
67
 
64
68
  init_categories: Optional[Mapping[int, TypeOrStr]] = field(repr=False)
@@ -94,9 +98,12 @@ class ModelCategories:
94
98
  """
95
99
  Get the categories
96
100
 
97
- :param as_dict: return as dict
98
- :param name_as_key: if as_dict=`True` and name_as_key=`True` will swap key and value
99
- :return: categories dict
101
+ Args:
102
+ as_dict: return as dict
103
+ name_as_key: if `as_dict=True` and `name_as_key=True` will swap key and value
104
+
105
+ Returns:
106
+ categories dict
100
107
  """
101
108
  if as_dict:
102
109
  if name_as_key:
@@ -110,26 +117,30 @@ class ModelCategories:
110
117
 
111
118
  @property
112
119
  def filter_categories(self) -> Sequence[ObjectTypes]:
113
- """filter_categories"""
120
+ """`filter_categories`"""
114
121
  return self._filter_categories
115
122
 
116
123
  @filter_categories.setter
117
124
  def filter_categories(self, categories: Sequence[ObjectTypes]) -> None:
118
- """categories setter"""
125
+ """`categories` setter"""
119
126
  self._filter_categories = categories
120
127
  self.categories = self.get_categories()
121
128
 
122
129
  def shift_category_ids(self, shift_by: int) -> MappingProxyType[int, ObjectTypes]:
123
130
  """
124
- Shift category ids
125
-
126
- **Example**:
131
+ Shift `category_id`s
127
132
 
133
+ Example:
134
+ ```python
128
135
  categories = ModelCategories(init_categories={"1": "text", "2": "title"})
129
136
  cats = categories.shift_category_ids(1) # {"2": LayoutType.text, "3": LayoutType.title}
137
+ ```
130
138
 
131
- :param shift_by: The value to shift the category id to the left or to the right
132
- :return: shifted categories
139
+ Args:
140
+ shift_by: The value to shift the category id to the left or to the right
141
+
142
+ Returns:
143
+ shifted categories
133
144
  """
134
145
  return MappingProxyType({k + shift_by: v for k, v in self.get_categories().items()})
135
146
 
@@ -140,21 +151,23 @@ class NerModelCategories(ModelCategories):
140
151
  Categories for models for NER tasks. It can handle the merging of token classes and bio tags to build a new set
141
152
  of categories.
142
153
 
143
- **Example**:
144
-
154
+ Example:
155
+ ```python
145
156
  categories = NerModelCategories(categories_semantics=["question", "answer"], categories_bio=["B", "I"])
146
157
  cats = categories.get_categories(as_dict=True) # {"1": TokenClassWithTag.b_question,
147
158
  "2": TokenClassWithTag.i_question,
148
159
  "3": TokenClassWithTag.b_answer,
149
160
  "4": TokenClassWithTag.i_answer}
161
+ ```
150
162
 
151
163
  You can also leave the categories unchanged:
152
164
 
153
- **Example**:
154
-
165
+ Example:
166
+ ```python
155
167
  categories = NerModelCategories(init_categories={"1": "question", "2": "answer"})
156
168
  cats = categories.get_categories(as_dict=True) # {"1": TokenClasses.question,
157
169
  "2": TokenClasses.answer}
170
+ ```
158
171
  """
159
172
 
160
173
  categories_semantics: Optional[Sequence[TypeOrStr]] = field(default=None)
@@ -191,16 +204,22 @@ class NerModelCategories(ModelCategories):
191
204
  """
192
205
  Merge bio and semantics categories
193
206
 
194
- **Example**:
207
+ Example:
195
208
 
209
+ ```python
196
210
  categories = NerModelCategories(categories_semantics=["question", "answer"], categories_bio=["B", "I"])
197
211
  cats = categories.get_categories(as_dict=True) # {"1": TokenClassWithTag.b_question,
198
212
  "2": TokenClassWithTag.i_question,
199
213
  "3": TokenClassWithTag.b_answer,
200
214
  "4": TokenClassWithTag.i_answer}
201
- :param categories_semantics: semantic categories (without tags)
202
- :param categories_bio: bio tags
203
- :return: A mapping of categories with tags
215
+ ```
216
+
217
+ Args:
218
+ categories_semantics: semantic categories (without tags)
219
+ categories_bio: bio tags
220
+
221
+ Returns:
222
+ A mapping of categories with tags
204
223
  """
205
224
  categories_list = sorted(
206
225
  {
@@ -216,13 +235,18 @@ class NerModelCategories(ModelCategories):
216
235
  """
217
236
  Disentangle token class and tag. It will return separate ObjectTypes for token class and tag.
218
237
 
219
- **Example**:
238
+ Example:
220
239
 
240
+ ```python
221
241
  NerModelCategories.disentangle_token_class_and_tag(TokenClassWithTag.b_question)
222
242
  # (TokenClasses.question, TokenTags.begin)
243
+ ```
223
244
 
224
- :param category_name: A category name with token class and tag
225
- :return: Tuple of disentangled token class and tag
245
+ Args:
246
+ category_name: A category name with token class and tag
247
+
248
+ Returns:
249
+ Tuple of disentangled token class and tag
226
250
  """
227
251
  return token_class_with_tag_to_token_class_and_tag(category_name)
228
252
 
@@ -252,6 +276,13 @@ class PredictorBase(ABC):
252
276
  def get_requirements(cls) -> list[Requirement]:
253
277
  """
254
278
  Get a list of requirements for running the detector
279
+
280
+ Returns:
281
+ A list of requirements, where each requirement is a tuple of the form:
282
+ (requirement_name, is_available, description)
283
+ - `requirement_name`: The name of the requirement.
284
+ - `is_available`: A boolean indicating whether the requirement is available.
285
+ - `description`: A string describing the error code.
255
286
  """
256
287
  raise NotImplementedError()
257
288
 
@@ -265,6 +296,12 @@ class PredictorBase(ABC):
265
296
  def get_model_id(self) -> str:
266
297
  """
267
298
  Get the generating model
299
+
300
+ Returns:
301
+ A string representing the `model_id`, which is derived from the name of the predictor.
302
+
303
+ Raises:
304
+ ValueError: If the name is not set
268
305
  """
269
306
  if self.name is not None:
270
307
  return get_uuid_from_str(self.name)[:8]
@@ -286,27 +323,17 @@ class DetectionResult:
286
323
  """
287
324
  Simple mutable storage for detection results.
288
325
 
289
- `box`: [ulx,uly,lrx,lry]
290
-
291
- `class_id`: category id
292
-
293
- `score`: prediction score
294
-
295
- `mask`: binary mask
296
-
297
- `absolute_coords` : absolute coordinates
298
-
299
- `class_name`: category name
300
-
301
- `text`: text string. Used for OCR predictors
302
-
303
- `block`: block number. For reading order from some ocr predictors
304
-
305
- `line`: line number. For reading order from some ocr predictors
306
-
307
- `uuid`: uuid. For assigning detection result (e.g. text to image annotations)
308
-
309
-
326
+ Attributes:
327
+ box: [ulx,uly,lrx,lry]
328
+ class_id: category id
329
+ score: prediction score
330
+ mask: binary mask
331
+ absolute_coords: absolute coordinates
332
+ class_name: category name
333
+ text: text string. Used for OCR predictors
334
+ block: block number. For reading order from some ocr predictors
335
+ line: line number. For reading order from some ocr predictors
336
+ uuid: uuid. For assigning detection result (e.g. text to image annotations)
310
337
  """
311
338
 
312
339
  box: Optional[list[float]] = None
@@ -328,9 +355,10 @@ class ObjectDetector(PredictorBase, ABC):
328
355
  Abstract base class for object detection. This can be anything ranging from layout detection to OCR.
329
356
  Use this to connect external detectors with deepdoctection predictors on images.
330
357
 
331
- **Example:**
332
-
333
- MyFancyTensorpackPredictor(TensorpackPredictor,ObjectDetector)
358
+ Example:
359
+ ```python
360
+ MyFancyTensorpackPredictor(TensorpackPredictor,ObjectDetector)
361
+ ```
334
362
 
335
363
  and implement the `predict`.
336
364
  """
@@ -341,20 +369,23 @@ class ObjectDetector(PredictorBase, ABC):
341
369
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
342
370
  """
343
371
  Abstract method predict
372
+
373
+ Args:
374
+ np_img: A numpy array representing the image to be processed by the predictor.
344
375
  """
345
376
  raise NotImplementedError()
346
377
 
347
378
  @property
348
379
  def accepts_batch(self) -> bool:
349
380
  """
350
- whether to accept batches in `predict`
381
+ Whether to accept batches in `predict`
351
382
  """
352
383
  return False
353
384
 
354
385
  @abstractmethod
355
386
  def get_category_names(self) -> tuple[ObjectTypes, ...]:
356
387
  """
357
- Abstract method get_category_names
388
+ `get_category_names`
358
389
  """
359
390
  raise NotImplementedError()
360
391
 
@@ -369,7 +400,11 @@ class ObjectDetector(PredictorBase, ABC):
369
400
  class PdfMiner(PredictorBase, ABC):
370
401
  """
371
402
  Abstract base class for mining information from PDF documents. Reads in a bytes stream from a PDF document page.
372
- Use this to connect external pdf miners and wrap them into Deep-Doctection predictors.
403
+ Use this to connect external pdf miners and wrap them into deepdoctection predictors.
404
+
405
+ Attributes:
406
+ categories: ModelCategories
407
+ _pdf_bytes: Optional[bytes]: Bytes of the PDF document page to be processed by the predictor.
373
408
  """
374
409
 
375
410
  categories: ModelCategories
@@ -379,6 +414,12 @@ class PdfMiner(PredictorBase, ABC):
379
414
  def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
380
415
  """
381
416
  Abstract method predict
417
+
418
+ Args:
419
+ pdf_bytes: A bytes stream representing the PDF document page to be processed by the predictor.
420
+
421
+ Returns:
422
+ A list of DetectionResult objects containing the results of the prediction.
382
423
  """
383
424
  raise NotImplementedError()
384
425
 
@@ -386,6 +427,12 @@ class PdfMiner(PredictorBase, ABC):
386
427
  def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
387
428
  """
388
429
  Abstract method get_width_height
430
+
431
+ Args:
432
+ pdf_bytes: A bytes stream representing the PDF document page.
433
+
434
+ Returns:
435
+ A tuple containing the width and height of the PDF document page.
389
436
  """
390
437
  raise NotImplementedError()
391
438
 
@@ -395,36 +442,43 @@ class PdfMiner(PredictorBase, ABC):
395
442
  @property
396
443
  def accepts_batch(self) -> bool:
397
444
  """
398
- whether to accept batches in `predict`
445
+ Whether to accept batches in `predict`
446
+
447
+ Returns:
448
+ bool: True if the predictor accepts batches, False otherwise.
399
449
  """
400
450
  return False
401
451
 
402
452
  @abstractmethod
403
453
  def get_category_names(self) -> tuple[ObjectTypes, ...]:
404
454
  """
405
- Abstract method get_category_names
455
+ `get_category_names`
406
456
  """
407
457
  raise NotImplementedError()
408
458
 
409
459
 
410
460
  class TextRecognizer(PredictorBase, ABC):
411
461
  """
412
- Abstract base class for text recognition. In contrast to ObjectDetector one assumes that `predict` accepts
413
- batches of numpy arrays. More precisely, when using `predict` pass a list of tuples with uuids (e.g. image_id,
414
- or annotation_id) or numpy arrays.
462
+ Abstract base class for text recognition. In contrast to `ObjectDetector` one assumes that `predict` accepts
463
+ batches of `np.arrays`. More precisely, when using `predict` pass a list of tuples with uuids (e.g. `image_id`,
464
+ or `annotation_id`) or `np.array`s.
415
465
  """
416
466
 
417
467
  @abstractmethod
418
468
  def predict(self, images: list[tuple[str, PixelValues]]) -> list[DetectionResult]:
419
469
  """
420
470
  Abstract method predict
471
+
472
+ Args:
473
+ images: A list of tuples, where each tuple contains a unique identifier (e.g., `annotation_id`)
474
+ and a `np.array` representing the image to be processed by the predictor.
421
475
  """
422
476
  raise NotImplementedError()
423
477
 
424
478
  @property
425
479
  def accepts_batch(self) -> bool:
426
480
  """
427
- whether to accept batches in `predict`
481
+ Whether to accept batches in `predict`
428
482
  """
429
483
  return True
430
484
 
@@ -439,21 +493,15 @@ class TokenClassResult:
439
493
  """
440
494
  Simple mutable storage for token classification results
441
495
 
442
- `id`: uuid of token (not unique)
443
-
444
- `token_id`: token id
445
-
446
- `token`: token
447
-
448
- `class_id`: category id
449
-
450
- `class_name`: category name
451
-
452
- `semantic_name`: semantic name
453
-
454
- `bio_tag`: bio tag
455
-
456
- `score`: prediction score
496
+ Attributes:
497
+ id: uuid of token (not unique)
498
+ token_id: token id
499
+ token: token
500
+ class_id: category id
501
+ class_name: category name
502
+ semantic_name: semantic name
503
+ bio_tag: bio tag
504
+ score: prediction score
457
505
  """
458
506
 
459
507
  uuid: str
@@ -471,10 +519,11 @@ class SequenceClassResult:
471
519
  """
472
520
  Storage for sequence classification results
473
521
 
474
- `class_id`: category id
475
- `class_name`: category name
476
- `score`: prediction score
477
- `class_name_orig`: original class name
522
+ Attributes:
523
+ class_id: category_id
524
+ class_name: category_name
525
+ score: prediction score
526
+ class_name_orig: original class name
478
527
  """
479
528
 
480
529
  class_id: int
@@ -485,14 +534,19 @@ class SequenceClassResult:
485
534
 
486
535
  class LMTokenClassifier(PredictorBase, ABC):
487
536
  """
488
- Abstract base class for token classifiers. If you want to connect external token classifiers with Deepdoctection
489
- predictors wrap them into a class derived from this class. Note, that this class is still DL library agnostic.
537
+ Abstract base class for token classifiers. If you want to connect external token classifiers with deepdoctection
538
+ predictors wrap them into a class derived from this class.
490
539
  """
491
540
 
492
541
  @abstractmethod
493
542
  def predict(self, **encodings: Union[list[list[str]], torch.Tensor]) -> list[TokenClassResult]:
494
543
  """
495
544
  Abstract method predict
545
+
546
+ Args:
547
+ encodings: A dictionary of encodings, where each key is a string representing the encoding type
548
+ (e.g., "input_ids", "attention_mask") and the value is a list of lists of strings or a
549
+ torch.Tensor representing the encoded input data.
496
550
  """
497
551
  raise NotImplementedError()
498
552
 
@@ -514,10 +568,11 @@ class LMTokenClassifier(PredictorBase, ABC):
514
568
 
515
569
  @staticmethod
516
570
  def image_to_raw_features_mapping() -> str:
517
- """Converting image into model features must often be divided into several steps. This is because the process
571
+ """
572
+ Converting image into model features must often be divided into several steps. This is because the process
518
573
  method during training and serving might differ: For training there might be additional augmentation steps
519
574
  required or one might add some data batching. For this reason we have added two methods
520
- `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
575
+ `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name either for
521
576
  training or inference purposes:
522
577
 
523
578
  `image_to_raw_features_mapping` is used for training and transforms an image into raw features that can be
@@ -528,7 +583,8 @@ class LMTokenClassifier(PredictorBase, ABC):
528
583
 
529
584
  @staticmethod
530
585
  def image_to_features_mapping() -> str:
531
- """Converting image into model features must often be divided into several steps. This is because the process
586
+ """
587
+ Converting image into model features must often be divided into several steps. This is because the process
532
588
  method during training and serving might differ: For training there might be additional augmentation steps
533
589
  required or one might add some data batching. For this reason we have added two methods
534
590
  `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
@@ -574,7 +630,8 @@ class LMSequenceClassifier(PredictorBase, ABC):
574
630
 
575
631
  @staticmethod
576
632
  def image_to_raw_features_mapping() -> str:
577
- """Converting image into model features must often be divided into several steps. This is because the process
633
+ """
634
+ Converting image into model features must often be divided into several steps. This is because the process
578
635
  method during training and serving might differ: For training there might be additional augmentation steps
579
636
  required or one might add some data batching. For this reason we have added two methods
580
637
  `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
@@ -588,7 +645,8 @@ class LMSequenceClassifier(PredictorBase, ABC):
588
645
 
589
646
  @staticmethod
590
647
  def image_to_features_mapping() -> str:
591
- """Converting image into model features must often be divided into several steps. This is because the process
648
+ """
649
+ Converting image into model features must often be divided into several steps. This is because the process
592
650
  method during training and serving might differ: For training there might be additional augmentation steps
593
651
  required or one might add some data batching. For this reason we have added two methods
594
652
  `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
@@ -605,27 +663,39 @@ class LMSequenceClassifier(PredictorBase, ABC):
605
663
 
606
664
  class LanguageDetector(PredictorBase, ABC):
607
665
  """
608
- Abstract base class for language detectors. The `predict` accepts a string of arbitrary length and returns an
609
- ISO-639 code for the detected language.
666
+ Abstract base class for language detectors.
610
667
  """
611
668
 
612
669
  @abstractmethod
613
670
  def predict(self, text_string: str) -> DetectionResult:
614
671
  """
615
672
  Abstract method predict
673
+
674
+ Args:
675
+ text_string: A string representing the text to be processed by the predictor.
676
+
677
+ Returns:
678
+ A DetectionResult object containing the detected language information (ISO-639 code).
616
679
  """
617
680
  raise NotImplementedError()
618
681
 
619
682
 
620
683
  class ImageTransformer(PredictorBase, ABC):
621
684
  """
622
- Abstract base class for transforming an image. The `transform` accepts and returns a numpy array
685
+ Abstract base class for transforming an image.
623
686
  """
624
687
 
625
688
  @abstractmethod
626
689
  def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
627
690
  """
628
691
  Abstract method transform
692
+
693
+ Args:
694
+ np_img: A `np.array` representing the image to be transformed.
695
+ specification: A `DetectionResult` instance containing specifications for the transformation.
696
+
697
+ Returns:
698
+ A `np.array` representing the transformed image.
629
699
  """
630
700
  raise NotImplementedError()
631
701
 
@@ -633,6 +703,11 @@ class ImageTransformer(PredictorBase, ABC):
633
703
  def predict(self, np_img: PixelValues) -> DetectionResult:
634
704
  """
635
705
  Abstract method predict
706
+ Args:
707
+ np_img: A `np.array` representing the image to be processed by the predictor.
708
+
709
+ Rweturns:
710
+ A `DetectionResult` object containing the prediction results regarding the transformation.
636
711
  """
637
712
  raise NotImplementedError()
638
713
 
@@ -648,19 +723,25 @@ class ImageTransformer(PredictorBase, ABC):
648
723
  """
649
724
  Transform coordinates aligned with the transform_image method.
650
725
 
651
- :param detect_results: List of DetectionResults
652
- :return: List of DetectionResults. If you pass uuid it is possible to track the transformed bounding boxes.
726
+ Args:
727
+ detect_results: List of `DetectionResult`s
728
+
729
+ Returns:
730
+ List of DetectionResults. If you pass `uuid` it is possible to track the transformed bounding boxes.
653
731
  """
654
732
 
655
733
  raise NotImplementedError()
656
734
 
657
735
  def inverse_transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
658
736
  """
659
- Inverse transform coordinates aligned with the transform_image method. Composing transform_coords with
737
+ Inverse transform coordinates aligned with the `transform_image` method. Composing transform_coords with
660
738
  inverse_transform_coords should return the original coordinates.
661
739
 
662
- :param detect_results: List of DetectionResults
663
- :return: List of DetectionResults. If you pass uuid it is possible to track the transformed bounding boxes.
740
+ Args:
741
+ detect_results: List of `DetectionResult`s
742
+
743
+ Returns:
744
+ List of `DetectionResult`s. If you pass `uuid` it is possible to track the transformed bounding boxes.
664
745
  """
665
746
 
666
747
  raise NotImplementedError()
@@ -678,16 +759,14 @@ class DeterministicImageTransformer(ImageTransformer):
678
759
  The transformer performs deterministic transformations on images and their associated coordinates,
679
760
  enabling operations like padding, rotation, and other geometric transformations while maintaining
680
761
  the relationship between image content and annotation coordinates.
681
-
682
- :param base_transform: A BaseTransform instance that defines the actual transformation operations
683
- to be applied to images and coordinates.
684
762
  """
685
763
 
686
- def __init__(self, base_transform: BaseTransform):
764
+ def __init__(self, base_transform: BaseTransform) -> None:
687
765
  """
688
766
  Initialize the DeterministicImageTransformer with a BaseTransform instance.
689
767
 
690
- :param base_transform: A BaseTransform instance that defines the actual transformation operations
768
+ Args:
769
+ base_transform: A BaseTransform instance that defines the actual transformation operations
691
770
  """
692
771
  self.base_transform = base_transform
693
772
  self.name = base_transform.__class__.__name__