deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (131) hide show
  1. deepdoctection/__init__.py +16 -29
  2. deepdoctection/analyzer/dd.py +70 -59
  3. deepdoctection/configs/conf_dd_one.yaml +34 -31
  4. deepdoctection/dataflow/common.py +9 -5
  5. deepdoctection/dataflow/custom.py +5 -5
  6. deepdoctection/dataflow/custom_serialize.py +75 -18
  7. deepdoctection/dataflow/parallel_map.py +3 -3
  8. deepdoctection/dataflow/serialize.py +4 -4
  9. deepdoctection/dataflow/stats.py +3 -3
  10. deepdoctection/datapoint/annotation.py +41 -56
  11. deepdoctection/datapoint/box.py +9 -8
  12. deepdoctection/datapoint/convert.py +6 -6
  13. deepdoctection/datapoint/image.py +56 -44
  14. deepdoctection/datapoint/view.py +245 -150
  15. deepdoctection/datasets/__init__.py +1 -4
  16. deepdoctection/datasets/adapter.py +35 -26
  17. deepdoctection/datasets/base.py +14 -12
  18. deepdoctection/datasets/dataflow_builder.py +3 -3
  19. deepdoctection/datasets/info.py +24 -26
  20. deepdoctection/datasets/instances/doclaynet.py +51 -51
  21. deepdoctection/datasets/instances/fintabnet.py +46 -46
  22. deepdoctection/datasets/instances/funsd.py +25 -24
  23. deepdoctection/datasets/instances/iiitar13k.py +13 -10
  24. deepdoctection/datasets/instances/layouttest.py +4 -3
  25. deepdoctection/datasets/instances/publaynet.py +5 -5
  26. deepdoctection/datasets/instances/pubtables1m.py +24 -21
  27. deepdoctection/datasets/instances/pubtabnet.py +32 -30
  28. deepdoctection/datasets/instances/rvlcdip.py +30 -30
  29. deepdoctection/datasets/instances/xfund.py +26 -26
  30. deepdoctection/datasets/save.py +6 -6
  31. deepdoctection/eval/__init__.py +1 -4
  32. deepdoctection/eval/accmetric.py +32 -33
  33. deepdoctection/eval/base.py +8 -9
  34. deepdoctection/eval/cocometric.py +15 -13
  35. deepdoctection/eval/eval.py +41 -37
  36. deepdoctection/eval/tedsmetric.py +30 -23
  37. deepdoctection/eval/tp_eval_callback.py +16 -19
  38. deepdoctection/extern/__init__.py +2 -7
  39. deepdoctection/extern/base.py +339 -134
  40. deepdoctection/extern/d2detect.py +85 -113
  41. deepdoctection/extern/deskew.py +14 -11
  42. deepdoctection/extern/doctrocr.py +141 -130
  43. deepdoctection/extern/fastlang.py +27 -18
  44. deepdoctection/extern/hfdetr.py +71 -62
  45. deepdoctection/extern/hflayoutlm.py +504 -211
  46. deepdoctection/extern/hflm.py +230 -0
  47. deepdoctection/extern/model.py +488 -302
  48. deepdoctection/extern/pdftext.py +23 -19
  49. deepdoctection/extern/pt/__init__.py +1 -3
  50. deepdoctection/extern/pt/nms.py +6 -2
  51. deepdoctection/extern/pt/ptutils.py +29 -19
  52. deepdoctection/extern/tessocr.py +39 -38
  53. deepdoctection/extern/texocr.py +18 -18
  54. deepdoctection/extern/tp/tfutils.py +57 -9
  55. deepdoctection/extern/tp/tpcompat.py +21 -14
  56. deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
  57. deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
  58. deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
  59. deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
  60. deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
  61. deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
  62. deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
  63. deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
  64. deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
  65. deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
  66. deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
  67. deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
  68. deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
  69. deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
  70. deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
  71. deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
  72. deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
  73. deepdoctection/extern/tpdetect.py +45 -53
  74. deepdoctection/mapper/__init__.py +3 -8
  75. deepdoctection/mapper/cats.py +27 -29
  76. deepdoctection/mapper/cocostruct.py +10 -10
  77. deepdoctection/mapper/d2struct.py +27 -26
  78. deepdoctection/mapper/hfstruct.py +13 -8
  79. deepdoctection/mapper/laylmstruct.py +178 -37
  80. deepdoctection/mapper/maputils.py +12 -11
  81. deepdoctection/mapper/match.py +2 -2
  82. deepdoctection/mapper/misc.py +11 -9
  83. deepdoctection/mapper/pascalstruct.py +4 -4
  84. deepdoctection/mapper/prodigystruct.py +5 -5
  85. deepdoctection/mapper/pubstruct.py +84 -92
  86. deepdoctection/mapper/tpstruct.py +5 -5
  87. deepdoctection/mapper/xfundstruct.py +33 -33
  88. deepdoctection/pipe/__init__.py +1 -1
  89. deepdoctection/pipe/anngen.py +12 -14
  90. deepdoctection/pipe/base.py +52 -106
  91. deepdoctection/pipe/common.py +72 -59
  92. deepdoctection/pipe/concurrency.py +16 -11
  93. deepdoctection/pipe/doctectionpipe.py +24 -21
  94. deepdoctection/pipe/language.py +20 -25
  95. deepdoctection/pipe/layout.py +20 -16
  96. deepdoctection/pipe/lm.py +75 -105
  97. deepdoctection/pipe/order.py +194 -89
  98. deepdoctection/pipe/refine.py +111 -124
  99. deepdoctection/pipe/segment.py +156 -161
  100. deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
  101. deepdoctection/pipe/text.py +37 -36
  102. deepdoctection/pipe/transform.py +19 -16
  103. deepdoctection/train/__init__.py +6 -12
  104. deepdoctection/train/d2_frcnn_train.py +48 -41
  105. deepdoctection/train/hf_detr_train.py +41 -30
  106. deepdoctection/train/hf_layoutlm_train.py +153 -135
  107. deepdoctection/train/tp_frcnn_train.py +32 -31
  108. deepdoctection/utils/concurrency.py +1 -1
  109. deepdoctection/utils/context.py +13 -6
  110. deepdoctection/utils/develop.py +4 -4
  111. deepdoctection/utils/env_info.py +87 -125
  112. deepdoctection/utils/file_utils.py +6 -11
  113. deepdoctection/utils/fs.py +22 -18
  114. deepdoctection/utils/identifier.py +2 -2
  115. deepdoctection/utils/logger.py +16 -15
  116. deepdoctection/utils/metacfg.py +7 -7
  117. deepdoctection/utils/mocks.py +93 -0
  118. deepdoctection/utils/pdf_utils.py +11 -11
  119. deepdoctection/utils/settings.py +185 -181
  120. deepdoctection/utils/tqdm.py +1 -1
  121. deepdoctection/utils/transform.py +14 -9
  122. deepdoctection/utils/types.py +104 -0
  123. deepdoctection/utils/utils.py +7 -7
  124. deepdoctection/utils/viz.py +74 -72
  125. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
  126. deepdoctection-0.33.dist-info/RECORD +146 -0
  127. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
  128. deepdoctection/utils/detection_types.py +0 -68
  129. deepdoctection-0.31.dist-info/RECORD +0 -144
  130. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
  131. {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
@@ -19,14 +19,211 @@
19
19
  """
20
20
  Abstract classes for unifying external base- and Doctection predictors
21
21
  """
22
+ from __future__ import annotations
22
23
 
23
24
  from abc import ABC, abstractmethod
24
- from dataclasses import dataclass
25
- from typing import Any, Dict, List, Mapping, Optional, Tuple, Union
25
+ from dataclasses import dataclass, field
26
+ from types import MappingProxyType
27
+ from typing import TYPE_CHECKING, Any, Literal, Mapping, Optional, Sequence, Union, overload
28
+
29
+ from lazy_imports import try_import
26
30
 
27
- from ..utils.detection_types import ImageType, JsonDict, Requirement
28
31
  from ..utils.identifier import get_uuid_from_str
29
- from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type
32
+ from ..utils.logger import logger
33
+ from ..utils.settings import (
34
+ DefaultType,
35
+ ObjectTypes,
36
+ TypeOrStr,
37
+ get_type,
38
+ token_class_tag_to_token_class_with_tag,
39
+ token_class_with_tag_to_token_class_and_tag,
40
+ )
41
+ from ..utils.types import JsonDict, PixelValues, Requirement
42
+
43
+ if TYPE_CHECKING:
44
+ with try_import() as import_guard:
45
+ import torch
46
+
47
+
48
+ @dataclass
49
+ class ModelCategories:
50
+ """
51
+ Categories for models (except models for NER tasks) are managed in this class. Different to DatasetCategories,
52
+ these members are immutable.
53
+
54
+ **Example**:
55
+
56
+ categories = ModelCategories(init_categories={1: "text", 2: "title"})
57
+ cats = categories.get_categories(as_dict=True) # {1: LayoutType.text, 2: LayoutType.title}
58
+ categories.filter_categories = [LayoutType.text] # filter out text
59
+ cats = categories.get_categories(as_dict=True) # {2: LayoutType.title}
60
+ """
61
+
62
+ init_categories: Optional[Mapping[int, TypeOrStr]] = field(repr=False)
63
+ _init_categories: MappingProxyType[int, ObjectTypes] = field(init=False, repr=False)
64
+ _filter_categories: Sequence[ObjectTypes] = field(init=False, repr=False, default_factory=tuple)
65
+ categories: MappingProxyType[int, ObjectTypes] = field(init=False)
66
+
67
+ def __post_init__(self) -> None:
68
+ """post init method"""
69
+ if self.init_categories:
70
+ self._init_categories = MappingProxyType({key: get_type(val) for key, val in self.init_categories.items()})
71
+ else:
72
+ if self._init_categories is None:
73
+ self._init_categories = MappingProxyType({})
74
+ self.categories = self._init_categories
75
+
76
+ @overload
77
+ def get_categories(self, as_dict: Literal[False]) -> tuple[ObjectTypes, ...]:
78
+ ...
79
+
80
+ @overload
81
+ def get_categories(
82
+ self, as_dict: Literal[True] = ..., name_as_key: Literal[False] = False
83
+ ) -> MappingProxyType[int, ObjectTypes]:
84
+ ...
85
+
86
+ @overload
87
+ def get_categories(self, as_dict: Literal[True], name_as_key: Literal[True]) -> MappingProxyType[ObjectTypes, int]:
88
+ ...
89
+
90
+ def get_categories(
91
+ self, as_dict: bool = True, name_as_key: bool = False
92
+ ) -> Union[MappingProxyType[int, ObjectTypes], MappingProxyType[ObjectTypes, int], tuple[ObjectTypes, ...]]:
93
+ """
94
+ Get the categories
95
+
96
+ :param as_dict: return as dict
97
+ :param name_as_key: if as_dict=`True` and name_as_key=`True` will swap key and value
98
+ :return: categories dict
99
+ """
100
+ if as_dict:
101
+ if name_as_key:
102
+ return MappingProxyType(
103
+ {value: key for key, value in self._init_categories.items() if value not in self.filter_categories}
104
+ )
105
+ return MappingProxyType(
106
+ {key: value for key, value in self._init_categories.items() if value not in self.filter_categories}
107
+ )
108
+ return tuple(val for val in self._init_categories.values() if val not in self.filter_categories)
109
+
110
+ @property
111
+ def filter_categories(self) -> Sequence[ObjectTypes]:
112
+ """filter_categories"""
113
+ return self._filter_categories
114
+
115
+ @filter_categories.setter
116
+ def filter_categories(self, categories: Sequence[ObjectTypes]) -> None:
117
+ """categories setter"""
118
+ self._filter_categories = categories
119
+ self.categories = self.get_categories()
120
+
121
+ def shift_category_ids(self, shift_by: int) -> MappingProxyType[int, ObjectTypes]:
122
+ """
123
+ Shift category ids
124
+
125
+ **Example**:
126
+
127
+ categories = ModelCategories(init_categories={"1": "text", "2": "title"})
128
+ cats = categories.shift_category_ids(1) # {"2": LayoutType.text, "3": LayoutType.title}
129
+
130
+ :param shift_by: The value to shift the category id to the left or to the right
131
+ :return: shifted categories
132
+ """
133
+ return MappingProxyType({k + shift_by: v for k, v in self.get_categories().items()})
134
+
135
+
136
+ @dataclass
137
+ class NerModelCategories(ModelCategories):
138
+ """
139
+ Categories for models for NER tasks. It can handle the merging of token classes and bio tags to build a new set
140
+ of categories.
141
+
142
+ **Example**:
143
+
144
+ categories = NerModelCategories(categories_semantics=["question", "answer"], categories_bio=["B", "I"])
145
+ cats = categories.get_categories(as_dict=True) # {"1": TokenClassWithTag.b_question,
146
+ "2": TokenClassWithTag.i_question,
147
+ "3": TokenClassWithTag.b_answer,
148
+ "4": TokenClassWithTag.i_answer}
149
+
150
+ You can also leave the categories unchanged:
151
+
152
+ **Example**:
153
+
154
+ categories = NerModelCategories(init_categories={"1": "question", "2": "answer"})
155
+ cats = categories.get_categories(as_dict=True) # {"1": TokenClasses.question,
156
+ "2": TokenClasses.answer}
157
+ """
158
+
159
+ categories_semantics: Optional[Sequence[TypeOrStr]] = field(default=None)
160
+ categories_bio: Optional[Sequence[TypeOrStr]] = field(default=None)
161
+ _categories_semantics: tuple[ObjectTypes, ...] = field(init=False, repr=False)
162
+ _categories_bio: tuple[ObjectTypes, ...] = field(init=False, repr=False)
163
+
164
+ def __post_init__(self) -> None:
165
+ if not self.init_categories:
166
+ if not self.categories_semantics:
167
+ raise ValueError("If categories is None then categories_semantics cannot be None")
168
+ if not self.categories_bio:
169
+ raise ValueError("If categories is None then categories_bio cannot be None")
170
+ else:
171
+ self._init_categories = MappingProxyType({key: get_type(val) for key, val in self.init_categories.items()})
172
+
173
+ if self.categories_bio:
174
+ self._categories_bio = tuple((get_type(cat) for cat in self.categories_bio))
175
+ if self.categories_semantics:
176
+ self._categories_semantics = tuple((get_type(cat) for cat in self.categories_semantics))
177
+ if self.categories_bio and self.categories_semantics and self.init_categories:
178
+ logger.info("Will disregard categories_bio and categories_semantics")
179
+
180
+ if self.categories_bio and self.categories_semantics:
181
+ self._init_categories = self.merge_bio_semantics_categories(
182
+ self._categories_semantics, self._categories_bio
183
+ )
184
+ super().__post_init__()
185
+
186
+ @staticmethod
187
+ def merge_bio_semantics_categories(
188
+ categories_semantics: tuple[ObjectTypes, ...], categories_bio: tuple[ObjectTypes, ...]
189
+ ) -> MappingProxyType[int, ObjectTypes]:
190
+ """
191
+ Merge bio and semantics categories
192
+
193
+ **Example**:
194
+
195
+ categories = NerModelCategories(categories_semantics=["question", "answer"], categories_bio=["B", "I"])
196
+ cats = categories.get_categories(as_dict=True) # {"1": TokenClassWithTag.b_question,
197
+ "2": TokenClassWithTag.i_question,
198
+ "3": TokenClassWithTag.b_answer,
199
+ "4": TokenClassWithTag.i_answer}
200
+ :param categories_semantics: semantic categories (without tags)
201
+ :param categories_bio: bio tags
202
+ :return: A mapping of categories with tags
203
+ """
204
+ categories_list = sorted(
205
+ {
206
+ token_class_tag_to_token_class_with_tag(token, tag)
207
+ for token in categories_semantics
208
+ for tag in categories_bio
209
+ }
210
+ )
211
+ return MappingProxyType(dict(enumerate(categories_list, 1)))
212
+
213
+ @staticmethod
214
+ def disentangle_token_class_and_tag(category_name: ObjectTypes) -> Optional[tuple[ObjectTypes, ObjectTypes]]:
215
+ """
216
+ Disentangle token class and tag. It will return separate ObjectTypes for token class and tag.
217
+
218
+ **Example**:
219
+
220
+ NerModelCategories.disentangle_token_class_and_tag(TokenClassWithTag.b_question)
221
+ # (TokenClasses.question, TokenTags.begin)
222
+
223
+ :param category_name: A category name with token class and tag
224
+ :return: Tuple of disentangled token class and tag
225
+ """
226
+ return token_class_with_tag_to_token_class_and_tag(category_name)
30
227
 
31
228
 
32
229
  class PredictorBase(ABC):
@@ -51,14 +248,14 @@ class PredictorBase(ABC):
51
248
 
52
249
  @classmethod
53
250
  @abstractmethod
54
- def get_requirements(cls) -> List[Requirement]:
251
+ def get_requirements(cls) -> list[Requirement]:
55
252
  """
56
253
  Get a list of requirements for running the detector
57
254
  """
58
255
  raise NotImplementedError()
59
256
 
60
257
  @abstractmethod
61
- def clone(self) -> "PredictorBase":
258
+ def clone(self) -> PredictorBase:
62
259
  """
63
260
  Clone an instance
64
261
  """
@@ -72,6 +269,16 @@ class PredictorBase(ABC):
72
269
  return get_uuid_from_str(self.name)[:8]
73
270
  raise ValueError("name must be set before calling get_model_id")
74
271
 
272
+ def clear_model(self) -> None:
273
+ """
274
+ Clear the inner model of the model wrapper if it has one. Needed for model updates during training.
275
+ """
276
+ raise NotImplementedError(
277
+ "Maybe you forgot to implement this method in your pipeline component. This might "
278
+ "be the case when you run evaluation during training and need to update the "
279
+ "trained model in your pipeline component."
280
+ )
281
+
75
282
 
76
283
  @dataclass
77
284
  class DetectionResult:
@@ -101,24 +308,24 @@ class DetectionResult:
101
308
 
102
309
  """
103
310
 
104
- box: Optional[List[float]] = None
311
+ box: Optional[list[float]] = None
105
312
  class_id: Optional[int] = None
106
313
  score: Optional[float] = None
107
- mask: Optional[List[float]] = None
314
+ mask: Optional[list[float]] = None
108
315
  absolute_coords: bool = True
109
- class_name: ObjectTypes = DefaultType.default_type
316
+ class_name: ObjectTypes = DefaultType.DEFAULT_TYPE
110
317
  text: Optional[Union[str, ObjectTypes]] = None
111
318
  block: Optional[str] = None
112
319
  line: Optional[str] = None
113
320
  uuid: Optional[str] = None
114
- relationships: Optional[Dict[str, Any]] = None
321
+ relationships: Optional[dict[str, Any]] = None
115
322
  angle: Optional[float] = None
116
323
 
117
324
 
118
- class ObjectDetector(PredictorBase):
325
+ class ObjectDetector(PredictorBase, ABC):
119
326
  """
120
327
  Abstract base class for object detection. This can be anything ranging from layout detection to OCR.
121
- Use this to connect external detectors with Deep-Doctection predictors on images.
328
+ Use this to connect external detectors with deepdoctection predictors on images.
122
329
 
123
330
  **Example:**
124
331
 
@@ -127,20 +334,10 @@ class ObjectDetector(PredictorBase):
127
334
  and implement the `predict`.
128
335
  """
129
336
 
130
- _categories: Mapping[str, ObjectTypes]
131
-
132
- @property
133
- def categories(self) -> Mapping[str, ObjectTypes]:
134
- """categories"""
135
- return self._categories
136
-
137
- @categories.setter
138
- def categories(self, categories: Mapping[str, TypeOrStr]) -> None:
139
- """categories setter"""
140
- self._categories = {key: get_type(value) for key, value in categories.items()}
337
+ categories: ModelCategories
141
338
 
142
339
  @abstractmethod
143
- def predict(self, np_img: ImageType) -> List[DetectionResult]:
340
+ def predict(self, np_img: PixelValues) -> list[DetectionResult]:
144
341
  """
145
342
  Abstract method predict
146
343
  """
@@ -153,48 +350,45 @@ class ObjectDetector(PredictorBase):
153
350
  """
154
351
  return False
155
352
 
156
- def possible_categories(self) -> List[ObjectTypes]:
353
+ @abstractmethod
354
+ def get_category_names(self) -> tuple[ObjectTypes, ...]:
355
+ """
356
+ Abstract method get_category_names
157
357
  """
158
- Abstract method possible_categories. Must implement a method that returns a list of possible detectable
159
- categories
358
+ raise NotImplementedError()
359
+
360
+ @abstractmethod
361
+ def clone(self) -> ObjectDetector:
362
+ """
363
+ Clone an instance
160
364
  """
161
- return list(self.categories.values())
365
+ raise NotImplementedError()
162
366
 
163
367
 
164
- class PdfMiner(PredictorBase):
368
+ class PdfMiner(PredictorBase, ABC):
165
369
  """
166
370
  Abstract base class for mining information from PDF documents. Reads in a bytes stream from a PDF document page.
167
371
  Use this to connect external pdf miners and wrap them into Deep-Doctection predictors.
168
372
  """
169
373
 
170
- _categories: Mapping[str, ObjectTypes]
374
+ categories: ModelCategories
171
375
  _pdf_bytes: Optional[bytes] = None
172
- _page: Any = None
173
-
174
- @property
175
- def categories(self) -> Mapping[str, ObjectTypes]:
176
- """categories"""
177
- return self._categories
178
-
179
- @categories.setter
180
- def categories(self, categories: Mapping[str, TypeOrStr]) -> None:
181
- self._categories = {key: get_type(value) for key, value in categories.items()}
182
376
 
183
377
  @abstractmethod
184
- def predict(self, pdf_bytes: bytes) -> List[DetectionResult]:
378
+ def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
185
379
  """
186
380
  Abstract method predict
187
381
  """
188
382
  raise NotImplementedError()
189
383
 
190
384
  @abstractmethod
191
- def get_width_height(self, pdf_bytes: bytes) -> Tuple[float, float]:
385
+ def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
192
386
  """
193
387
  Abstract method get_width_height
194
388
  """
195
389
  raise NotImplementedError()
196
390
 
197
- def clone(self) -> PredictorBase:
391
+ def clone(self) -> PdfMiner:
198
392
  return self.__class__()
199
393
 
200
394
  @property
@@ -204,14 +398,15 @@ class PdfMiner(PredictorBase):
204
398
  """
205
399
  return False
206
400
 
207
- def possible_categories(self) -> List[ObjectTypes]:
401
+ @abstractmethod
402
+ def get_category_names(self) -> tuple[ObjectTypes, ...]:
208
403
  """
209
- Returns a list of possible detectable categories
404
+ Abstract method get_category_names
210
405
  """
211
- return list(self.categories.values())
406
+ raise NotImplementedError()
212
407
 
213
408
 
214
- class TextRecognizer(PredictorBase):
409
+ class TextRecognizer(PredictorBase, ABC):
215
410
  """
216
411
  Abstract base class for text recognition. In contrast to ObjectDetector one assumes that `predict` accepts
217
412
  batches of numpy arrays. More precisely, when using `predict` pass a list of tuples with uuids (e.g. image_id,
@@ -219,7 +414,7 @@ class TextRecognizer(PredictorBase):
219
414
  """
220
415
 
221
416
  @abstractmethod
222
- def predict(self, images: List[Tuple[str, ImageType]]) -> List[DetectionResult]:
417
+ def predict(self, images: list[tuple[str, PixelValues]]) -> list[DetectionResult]:
223
418
  """
224
419
  Abstract method predict
225
420
  """
@@ -232,6 +427,11 @@ class TextRecognizer(PredictorBase):
232
427
  """
233
428
  return True
234
429
 
430
+ @staticmethod
431
+ def get_category_names() -> tuple[ObjectTypes, ...]:
432
+ """return category names"""
433
+ return ()
434
+
235
435
 
236
436
  @dataclass
237
437
  class TokenClassResult:
@@ -258,9 +458,9 @@ class TokenClassResult:
258
458
  uuid: str
259
459
  token: str
260
460
  class_id: int
261
- class_name: ObjectTypes = DefaultType.default_type
262
- semantic_name: ObjectTypes = DefaultType.default_type
263
- bio_tag: ObjectTypes = DefaultType.default_type
461
+ class_name: ObjectTypes = DefaultType.DEFAULT_TYPE
462
+ semantic_name: ObjectTypes = DefaultType.DEFAULT_TYPE
463
+ bio_tag: ObjectTypes = DefaultType.DEFAULT_TYPE
264
464
  score: Optional[float] = None
265
465
  token_id: Optional[int] = None
266
466
 
@@ -277,123 +477,137 @@ class SequenceClassResult:
277
477
  """
278
478
 
279
479
  class_id: int
280
- class_name: ObjectTypes = DefaultType.default_type
480
+ class_name: ObjectTypes = DefaultType.DEFAULT_TYPE
281
481
  score: Optional[float] = None
282
482
  class_name_orig: Optional[str] = None
283
483
 
284
484
 
285
- class LMTokenClassifier(PredictorBase):
485
+ class LMTokenClassifier(PredictorBase, ABC):
286
486
  """
287
487
  Abstract base class for token classifiers. If you want to connect external token classifiers with Deepdoctection
288
488
  predictors wrap them into a class derived from this class. Note, that this class is still DL library agnostic.
289
489
  """
290
490
 
291
- _categories: Mapping[str, ObjectTypes]
292
-
293
- @property
294
- def categories(self) -> Mapping[str, ObjectTypes]:
295
- """categories"""
296
- return self._categories
297
-
298
- @categories.setter
299
- def categories(self, categories: Mapping[str, TypeOrStr]) -> None:
300
- """categories setter"""
301
- self._categories = {key: get_type(value) for key, value in categories.items()}
302
-
303
491
  @abstractmethod
304
- def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> List[TokenClassResult]: # type: ignore
492
+ def predict(self, **encodings: Union[list[list[str]], torch.Tensor]) -> list[TokenClassResult]:
305
493
  """
306
494
  Abstract method predict
307
495
  """
308
496
  raise NotImplementedError()
309
497
 
310
- def possible_tokens(self) -> List[ObjectTypes]:
311
- """
312
- Returns a list of possible detectable tokens
498
+ @staticmethod
499
+ def default_kwargs_for_image_to_features_mapping() -> JsonDict:
313
500
  """
314
- return list(self.categories.values())
501
+ Some models require that their inputs must be pre-processed in a specific way. Responsible for converting
502
+ an `Image` datapoint into the input format in inference mode is a mapper function which is called
503
+ in a pipeline component. The mapper function's name, which has to be used must be specified in the returned
504
+ value of `image_to_features_mapping`.
505
+ This mapper function is often implemented for various models and can therefore have various parameters.
506
+ Some parameters can be inferred from the config file of the model parametrization. Some other might not be
507
+ in the parametrization and therefore have to be specified here.
315
508
 
316
- @abstractmethod
317
- def clone(self) -> "LMTokenClassifier":
318
- """
319
- Clone an instance
509
+ This method therefore returns a dictionary that contains as keys some arguments of the function
510
+ `image_to_features_mapping` and as values the values necessary for providing the model with the required input.
320
511
  """
321
- raise NotImplementedError()
512
+ return {}
322
513
 
323
514
  @staticmethod
324
- def default_kwargs_for_input_mapping() -> JsonDict:
515
+ def image_to_raw_features_mapping() -> str:
516
+ """Converting image into model features must often be divided into several steps. This is because the process
517
+ method during training and serving might differ: For training there might be additional augmentation steps
518
+ required or one might add some data batching. For this reason we have added two methods
519
+ `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
520
+ training or inference purposes:
521
+
522
+ `image_to_raw_features_mapping` is used for training and transforms an image into raw features that can be
523
+ further processed through augmentation or batching. It should not be used when running inference, i.e. when
524
+ running the model in a pipeline component.
325
525
  """
326
- Add some default arguments that might be necessary when preparing a sample. Overwrite this method
327
- for some custom setting. `default_arguments_for_input_mapping` in `LMTokenClassifierService`.
526
+ return ""
527
+
528
+ @staticmethod
529
+ def image_to_features_mapping() -> str:
530
+ """Converting image into model features must often be divided into several steps. This is because the process
531
+ method during training and serving might differ: For training there might be additional augmentation steps
532
+ required or one might add some data batching. For this reason we have added two methods
533
+ `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
534
+ training or inference purposes:
535
+
536
+ `image_to_features_mapping` is a mapping function that converts a single image into ready features that can
537
+ be directly fed into the model. We use this function to determine the input format of the model in a pipeline
538
+ component. Note that this function will also require specific parameters, which can be specified in
539
+ `default_kwargs_for_image_to_features_mapping`.
540
+
328
541
  """
329
- return {}
542
+ return ""
330
543
 
331
544
 
332
- class LMSequenceClassifier(PredictorBase):
545
+ class LMSequenceClassifier(PredictorBase, ABC):
333
546
  """
334
547
  Abstract base class for sequence classification. If you want to connect external sequence classifiers with
335
548
  deepdoctection predictors, wrap them into a class derived from this class.
336
549
  """
337
550
 
338
- _categories: Mapping[str, ObjectTypes]
339
-
340
- @property
341
- def categories(self) -> Mapping[str, ObjectTypes]:
342
- """categories"""
343
- return self._categories
344
-
345
- @categories.setter
346
- def categories(self, categories: Mapping[str, TypeOrStr]) -> None:
347
- """categories setter"""
348
- self._categories = {key: get_type(value) for key, value in categories.items()}
349
-
350
551
  @abstractmethod
351
- def predict(self, **encodings: Union[List[List[str]], "torch.Tensor"]) -> SequenceClassResult: # type: ignore
552
+ def predict(self, **encodings: Union[list[list[str]], torch.Tensor]) -> SequenceClassResult:
352
553
  """
353
554
  Abstract method predict
354
555
  """
355
556
  raise NotImplementedError()
356
557
 
357
- def possible_categories(self) -> List[ObjectTypes]:
358
- """
359
- Returns a list of possible detectable categories for a sequence
558
+ @staticmethod
559
+ def default_kwargs_for_image_to_features_mapping() -> JsonDict:
360
560
  """
361
- return list(self.categories.values())
561
+ Some models require that their inputs must be pre-processed in a specific way. Responsible for converting
562
+ an `Image` datapoint into the input format in inference mode is a mapper function which is called
563
+ in a pipeline component. The mapper function's name, which has to be used must be specified in the returned
564
+ value of `image_to_features_mapping`.
565
+ This mapper function is often implemented for various models and can therefore have various parameters.
566
+ Some parameters can be inferred from the config file of the model parametrization. Some other might not be
567
+ in the parametrization and therefore have to be specified here.
362
568
 
363
- @abstractmethod
364
- def clone(self) -> "LMSequenceClassifier":
365
- """
366
- Clone an instance
569
+ This method therefore returns a dictionary that contains as keys some arguments of the function
570
+ `image_to_features_mapping` and as values the values necessary for providing the model with the required input.
367
571
  """
368
- raise NotImplementedError()
572
+ return {}
369
573
 
370
574
  @staticmethod
371
- def default_kwargs_for_input_mapping() -> JsonDict:
575
+ def image_to_raw_features_mapping() -> str:
576
+ """Converting image into model features must often be divided into several steps. This is because the process
577
+ method during training and serving might differ: For training there might be additional augmentation steps
578
+ required or one might add some data batching. For this reason we have added two methods
579
+ `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
580
+ training or inference purposes:
581
+
582
+ `image_to_raw_features_mapping` is used for training and transforms an image into raw features that can be
583
+ further processed through augmentation or batching. It should not be used when running inference, i.e. when
584
+ running the model in a pipeline component.
372
585
  """
373
- Add some default arguments that might be necessary when preparing a sample. Overwrite this method
374
- for some custom setting. `default_arguments_for_input_mapping` in `LMTokenClassifierService`.
586
+ return ""
587
+
588
+ @staticmethod
589
+ def image_to_features_mapping() -> str:
590
+ """Converting image into model features must often be divided into several steps. This is because the process
591
+ method during training and serving might differ: For training there might be additional augmentation steps
592
+ required or one might add some data batching. For this reason we have added two methods
593
+ `image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
594
+ training or inference purposes:
595
+
596
+ `image_to_features_mapping` is a mapping function that converts a single image into ready features that can
597
+ be directly fed into the model. We use this function to determine the input format of the model in a pipeline
598
+ component. Note that this function will also require specific parameters, which can be specified in
599
+ `default_kwargs_for_image_to_features_mapping`.
600
+
375
601
  """
376
- return {}
602
+ return ""
377
603
 
378
604
 
379
- class LanguageDetector(PredictorBase):
605
+ class LanguageDetector(PredictorBase, ABC):
380
606
  """
381
607
  Abstract base class for language detectors. The `predict` accepts a string of arbitrary length and returns an
382
608
  ISO-639 code for the detected language.
383
609
  """
384
610
 
385
- _categories: Mapping[str, ObjectTypes]
386
-
387
- @property
388
- def categories(self) -> Mapping[str, ObjectTypes]:
389
- """categories"""
390
- return self._categories
391
-
392
- @categories.setter
393
- def categories(self, categories: Mapping[str, TypeOrStr]) -> None:
394
- """categories setter"""
395
- self._categories = {key: get_type(value) for key, value in categories.items()}
396
-
397
611
  @abstractmethod
398
612
  def predict(self, text_string: str) -> DetectionResult:
399
613
  """
@@ -401,39 +615,30 @@ class LanguageDetector(PredictorBase):
401
615
  """
402
616
  raise NotImplementedError()
403
617
 
404
- def possible_languages(self) -> List[ObjectTypes]:
405
- """
406
- Returns a list of possible detectable languages
407
- """
408
- return list(self.categories.values())
409
618
 
410
-
411
- class ImageTransformer(PredictorBase):
619
+ class ImageTransformer(PredictorBase, ABC):
412
620
  """
413
621
  Abstract base class for transforming an image. The `transform` accepts and returns a numpy array
414
622
  """
415
623
 
416
624
  @abstractmethod
417
- def transform(self, np_img: ImageType, specification: DetectionResult) -> ImageType:
625
+ def transform(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
418
626
  """
419
627
  Abstract method transform
420
628
  """
421
629
  raise NotImplementedError()
422
630
 
423
631
  @abstractmethod
424
- def predict(self, np_img: ImageType) -> DetectionResult:
632
+ def predict(self, np_img: PixelValues) -> DetectionResult:
425
633
  """
426
634
  Abstract method predict
427
635
  """
428
636
  raise NotImplementedError()
429
637
 
430
- def clone(self) -> PredictorBase:
638
+ def clone(self) -> ImageTransformer:
431
639
  return self.__class__()
432
640
 
433
- @staticmethod
434
641
  @abstractmethod
435
- def possible_category() -> ObjectTypes:
436
- """
437
- Returns a (single) category the `ImageTransformer` can predict
438
- """
642
+ def get_category_names(self) -> tuple[ObjectTypes, ...]:
643
+ """returns category names"""
439
644
  raise NotImplementedError()