deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.0.dist-info/METADATA +0 -431
- deepdoctection-0.42.0.dist-info/RECORD +0 -148
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
deepdoctection/extern/base.py
CHANGED
|
@@ -17,8 +17,9 @@
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
"""
|
|
20
|
-
|
|
20
|
+
Base classes for unifying external predictors
|
|
21
21
|
"""
|
|
22
|
+
|
|
22
23
|
from __future__ import annotations
|
|
23
24
|
|
|
24
25
|
from abc import ABC, abstractmethod
|
|
@@ -50,15 +51,18 @@ if TYPE_CHECKING:
|
|
|
50
51
|
@dataclass
|
|
51
52
|
class ModelCategories:
|
|
52
53
|
"""
|
|
53
|
-
Categories for models (except models for NER tasks) are managed in this class.
|
|
54
|
-
these members are immutable.
|
|
54
|
+
Categories for models (except models for NER tasks) are managed in this class.
|
|
55
|
+
Different to `DatasetCategories`, these members are immutable.
|
|
55
56
|
|
|
56
|
-
|
|
57
|
+
Example:
|
|
57
58
|
|
|
59
|
+
```python
|
|
58
60
|
categories = ModelCategories(init_categories={1: "text", 2: "title"})
|
|
59
61
|
cats = categories.get_categories(as_dict=True) # {1: LayoutType.text, 2: LayoutType.title}
|
|
60
62
|
categories.filter_categories = [LayoutType.text] # filter out text
|
|
61
63
|
cats = categories.get_categories(as_dict=True) # {2: LayoutType.title}
|
|
64
|
+
```
|
|
65
|
+
|
|
62
66
|
"""
|
|
63
67
|
|
|
64
68
|
init_categories: Optional[Mapping[int, TypeOrStr]] = field(repr=False)
|
|
@@ -94,9 +98,12 @@ class ModelCategories:
|
|
|
94
98
|
"""
|
|
95
99
|
Get the categories
|
|
96
100
|
|
|
97
|
-
:
|
|
98
|
-
|
|
99
|
-
|
|
101
|
+
Args:
|
|
102
|
+
as_dict: return as dict
|
|
103
|
+
name_as_key: if `as_dict=True` and `name_as_key=True` will swap key and value
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
categories dict
|
|
100
107
|
"""
|
|
101
108
|
if as_dict:
|
|
102
109
|
if name_as_key:
|
|
@@ -110,26 +117,30 @@ class ModelCategories:
|
|
|
110
117
|
|
|
111
118
|
@property
|
|
112
119
|
def filter_categories(self) -> Sequence[ObjectTypes]:
|
|
113
|
-
"""filter_categories"""
|
|
120
|
+
"""`filter_categories`"""
|
|
114
121
|
return self._filter_categories
|
|
115
122
|
|
|
116
123
|
@filter_categories.setter
|
|
117
124
|
def filter_categories(self, categories: Sequence[ObjectTypes]) -> None:
|
|
118
|
-
"""categories setter"""
|
|
125
|
+
"""`categories` setter"""
|
|
119
126
|
self._filter_categories = categories
|
|
120
127
|
self.categories = self.get_categories()
|
|
121
128
|
|
|
122
129
|
def shift_category_ids(self, shift_by: int) -> MappingProxyType[int, ObjectTypes]:
|
|
123
130
|
"""
|
|
124
|
-
Shift
|
|
125
|
-
|
|
126
|
-
**Example**:
|
|
131
|
+
Shift `category_id`s
|
|
127
132
|
|
|
133
|
+
Example:
|
|
134
|
+
```python
|
|
128
135
|
categories = ModelCategories(init_categories={"1": "text", "2": "title"})
|
|
129
136
|
cats = categories.shift_category_ids(1) # {"2": LayoutType.text, "3": LayoutType.title}
|
|
137
|
+
```
|
|
130
138
|
|
|
131
|
-
|
|
132
|
-
|
|
139
|
+
Args:
|
|
140
|
+
shift_by: The value to shift the category id to the left or to the right
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
shifted categories
|
|
133
144
|
"""
|
|
134
145
|
return MappingProxyType({k + shift_by: v for k, v in self.get_categories().items()})
|
|
135
146
|
|
|
@@ -140,21 +151,23 @@ class NerModelCategories(ModelCategories):
|
|
|
140
151
|
Categories for models for NER tasks. It can handle the merging of token classes and bio tags to build a new set
|
|
141
152
|
of categories.
|
|
142
153
|
|
|
143
|
-
|
|
144
|
-
|
|
154
|
+
Example:
|
|
155
|
+
```python
|
|
145
156
|
categories = NerModelCategories(categories_semantics=["question", "answer"], categories_bio=["B", "I"])
|
|
146
157
|
cats = categories.get_categories(as_dict=True) # {"1": TokenClassWithTag.b_question,
|
|
147
158
|
"2": TokenClassWithTag.i_question,
|
|
148
159
|
"3": TokenClassWithTag.b_answer,
|
|
149
160
|
"4": TokenClassWithTag.i_answer}
|
|
161
|
+
```
|
|
150
162
|
|
|
151
163
|
You can also leave the categories unchanged:
|
|
152
164
|
|
|
153
|
-
|
|
154
|
-
|
|
165
|
+
Example:
|
|
166
|
+
```python
|
|
155
167
|
categories = NerModelCategories(init_categories={"1": "question", "2": "answer"})
|
|
156
168
|
cats = categories.get_categories(as_dict=True) # {"1": TokenClasses.question,
|
|
157
169
|
"2": TokenClasses.answer}
|
|
170
|
+
```
|
|
158
171
|
"""
|
|
159
172
|
|
|
160
173
|
categories_semantics: Optional[Sequence[TypeOrStr]] = field(default=None)
|
|
@@ -191,16 +204,22 @@ class NerModelCategories(ModelCategories):
|
|
|
191
204
|
"""
|
|
192
205
|
Merge bio and semantics categories
|
|
193
206
|
|
|
194
|
-
|
|
207
|
+
Example:
|
|
195
208
|
|
|
209
|
+
```python
|
|
196
210
|
categories = NerModelCategories(categories_semantics=["question", "answer"], categories_bio=["B", "I"])
|
|
197
211
|
cats = categories.get_categories(as_dict=True) # {"1": TokenClassWithTag.b_question,
|
|
198
212
|
"2": TokenClassWithTag.i_question,
|
|
199
213
|
"3": TokenClassWithTag.b_answer,
|
|
200
214
|
"4": TokenClassWithTag.i_answer}
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
:
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
categories_semantics: semantic categories (without tags)
|
|
219
|
+
categories_bio: bio tags
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
A mapping of categories with tags
|
|
204
223
|
"""
|
|
205
224
|
categories_list = sorted(
|
|
206
225
|
{
|
|
@@ -216,13 +235,18 @@ class NerModelCategories(ModelCategories):
|
|
|
216
235
|
"""
|
|
217
236
|
Disentangle token class and tag. It will return separate ObjectTypes for token class and tag.
|
|
218
237
|
|
|
219
|
-
|
|
238
|
+
Example:
|
|
220
239
|
|
|
240
|
+
```python
|
|
221
241
|
NerModelCategories.disentangle_token_class_and_tag(TokenClassWithTag.b_question)
|
|
222
242
|
# (TokenClasses.question, TokenTags.begin)
|
|
243
|
+
```
|
|
223
244
|
|
|
224
|
-
:
|
|
225
|
-
|
|
245
|
+
Args:
|
|
246
|
+
category_name: A category name with token class and tag
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Tuple of disentangled token class and tag
|
|
226
250
|
"""
|
|
227
251
|
return token_class_with_tag_to_token_class_and_tag(category_name)
|
|
228
252
|
|
|
@@ -252,6 +276,13 @@ class PredictorBase(ABC):
|
|
|
252
276
|
def get_requirements(cls) -> list[Requirement]:
|
|
253
277
|
"""
|
|
254
278
|
Get a list of requirements for running the detector
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
A list of requirements, where each requirement is a tuple of the form:
|
|
282
|
+
(requirement_name, is_available, description)
|
|
283
|
+
- `requirement_name`: The name of the requirement.
|
|
284
|
+
- `is_available`: A boolean indicating whether the requirement is available.
|
|
285
|
+
- `description`: A string describing the error code.
|
|
255
286
|
"""
|
|
256
287
|
raise NotImplementedError()
|
|
257
288
|
|
|
@@ -265,6 +296,12 @@ class PredictorBase(ABC):
|
|
|
265
296
|
def get_model_id(self) -> str:
|
|
266
297
|
"""
|
|
267
298
|
Get the generating model
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
A string representing the `model_id`, which is derived from the name of the predictor.
|
|
302
|
+
|
|
303
|
+
Raises:
|
|
304
|
+
ValueError: If the name is not set
|
|
268
305
|
"""
|
|
269
306
|
if self.name is not None:
|
|
270
307
|
return get_uuid_from_str(self.name)[:8]
|
|
@@ -286,27 +323,17 @@ class DetectionResult:
|
|
|
286
323
|
"""
|
|
287
324
|
Simple mutable storage for detection results.
|
|
288
325
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
`text`: text string. Used for OCR predictors
|
|
302
|
-
|
|
303
|
-
`block`: block number. For reading order from some ocr predictors
|
|
304
|
-
|
|
305
|
-
`line`: line number. For reading order from some ocr predictors
|
|
306
|
-
|
|
307
|
-
`uuid`: uuid. For assigning detection result (e.g. text to image annotations)
|
|
308
|
-
|
|
309
|
-
|
|
326
|
+
Attributes:
|
|
327
|
+
box: [ulx,uly,lrx,lry]
|
|
328
|
+
class_id: category id
|
|
329
|
+
score: prediction score
|
|
330
|
+
mask: binary mask
|
|
331
|
+
absolute_coords: absolute coordinates
|
|
332
|
+
class_name: category name
|
|
333
|
+
text: text string. Used for OCR predictors
|
|
334
|
+
block: block number. For reading order from some ocr predictors
|
|
335
|
+
line: line number. For reading order from some ocr predictors
|
|
336
|
+
uuid: uuid. For assigning detection result (e.g. text to image annotations)
|
|
310
337
|
"""
|
|
311
338
|
|
|
312
339
|
box: Optional[list[float]] = None
|
|
@@ -328,9 +355,10 @@ class ObjectDetector(PredictorBase, ABC):
|
|
|
328
355
|
Abstract base class for object detection. This can be anything ranging from layout detection to OCR.
|
|
329
356
|
Use this to connect external detectors with deepdoctection predictors on images.
|
|
330
357
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
358
|
+
Example:
|
|
359
|
+
```python
|
|
360
|
+
MyFancyTensorpackPredictor(TensorpackPredictor,ObjectDetector)
|
|
361
|
+
```
|
|
334
362
|
|
|
335
363
|
and implement the `predict`.
|
|
336
364
|
"""
|
|
@@ -341,20 +369,23 @@ class ObjectDetector(PredictorBase, ABC):
|
|
|
341
369
|
def predict(self, np_img: PixelValues) -> list[DetectionResult]:
|
|
342
370
|
"""
|
|
343
371
|
Abstract method predict
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
np_img: A numpy array representing the image to be processed by the predictor.
|
|
344
375
|
"""
|
|
345
376
|
raise NotImplementedError()
|
|
346
377
|
|
|
347
378
|
@property
|
|
348
379
|
def accepts_batch(self) -> bool:
|
|
349
380
|
"""
|
|
350
|
-
|
|
381
|
+
Whether to accept batches in `predict`
|
|
351
382
|
"""
|
|
352
383
|
return False
|
|
353
384
|
|
|
354
385
|
@abstractmethod
|
|
355
386
|
def get_category_names(self) -> tuple[ObjectTypes, ...]:
|
|
356
387
|
"""
|
|
357
|
-
|
|
388
|
+
`get_category_names`
|
|
358
389
|
"""
|
|
359
390
|
raise NotImplementedError()
|
|
360
391
|
|
|
@@ -369,7 +400,11 @@ class ObjectDetector(PredictorBase, ABC):
|
|
|
369
400
|
class PdfMiner(PredictorBase, ABC):
|
|
370
401
|
"""
|
|
371
402
|
Abstract base class for mining information from PDF documents. Reads in a bytes stream from a PDF document page.
|
|
372
|
-
Use this to connect external pdf miners and wrap them into
|
|
403
|
+
Use this to connect external pdf miners and wrap them into deepdoctection predictors.
|
|
404
|
+
|
|
405
|
+
Attributes:
|
|
406
|
+
categories: ModelCategories
|
|
407
|
+
_pdf_bytes: Optional[bytes]: Bytes of the PDF document page to be processed by the predictor.
|
|
373
408
|
"""
|
|
374
409
|
|
|
375
410
|
categories: ModelCategories
|
|
@@ -379,6 +414,12 @@ class PdfMiner(PredictorBase, ABC):
|
|
|
379
414
|
def predict(self, pdf_bytes: bytes) -> list[DetectionResult]:
|
|
380
415
|
"""
|
|
381
416
|
Abstract method predict
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
pdf_bytes: A bytes stream representing the PDF document page to be processed by the predictor.
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
A list of DetectionResult objects containing the results of the prediction.
|
|
382
423
|
"""
|
|
383
424
|
raise NotImplementedError()
|
|
384
425
|
|
|
@@ -386,6 +427,12 @@ class PdfMiner(PredictorBase, ABC):
|
|
|
386
427
|
def get_width_height(self, pdf_bytes: bytes) -> tuple[float, float]:
|
|
387
428
|
"""
|
|
388
429
|
Abstract method get_width_height
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
pdf_bytes: A bytes stream representing the PDF document page.
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
A tuple containing the width and height of the PDF document page.
|
|
389
436
|
"""
|
|
390
437
|
raise NotImplementedError()
|
|
391
438
|
|
|
@@ -395,36 +442,43 @@ class PdfMiner(PredictorBase, ABC):
|
|
|
395
442
|
@property
|
|
396
443
|
def accepts_batch(self) -> bool:
|
|
397
444
|
"""
|
|
398
|
-
|
|
445
|
+
Whether to accept batches in `predict`
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
bool: True if the predictor accepts batches, False otherwise.
|
|
399
449
|
"""
|
|
400
450
|
return False
|
|
401
451
|
|
|
402
452
|
@abstractmethod
|
|
403
453
|
def get_category_names(self) -> tuple[ObjectTypes, ...]:
|
|
404
454
|
"""
|
|
405
|
-
|
|
455
|
+
`get_category_names`
|
|
406
456
|
"""
|
|
407
457
|
raise NotImplementedError()
|
|
408
458
|
|
|
409
459
|
|
|
410
460
|
class TextRecognizer(PredictorBase, ABC):
|
|
411
461
|
"""
|
|
412
|
-
Abstract base class for text recognition. In contrast to ObjectDetector one assumes that `predict` accepts
|
|
413
|
-
batches of
|
|
414
|
-
or annotation_id) or
|
|
462
|
+
Abstract base class for text recognition. In contrast to `ObjectDetector` one assumes that `predict` accepts
|
|
463
|
+
batches of `np.arrays`. More precisely, when using `predict` pass a list of tuples with uuids (e.g. `image_id`,
|
|
464
|
+
or `annotation_id`) or `np.array`s.
|
|
415
465
|
"""
|
|
416
466
|
|
|
417
467
|
@abstractmethod
|
|
418
468
|
def predict(self, images: list[tuple[str, PixelValues]]) -> list[DetectionResult]:
|
|
419
469
|
"""
|
|
420
470
|
Abstract method predict
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
images: A list of tuples, where each tuple contains a unique identifier (e.g., `annotation_id`)
|
|
474
|
+
and a `np.array` representing the image to be processed by the predictor.
|
|
421
475
|
"""
|
|
422
476
|
raise NotImplementedError()
|
|
423
477
|
|
|
424
478
|
@property
|
|
425
479
|
def accepts_batch(self) -> bool:
|
|
426
480
|
"""
|
|
427
|
-
|
|
481
|
+
Whether to accept batches in `predict`
|
|
428
482
|
"""
|
|
429
483
|
return True
|
|
430
484
|
|
|
@@ -439,21 +493,15 @@ class TokenClassResult:
|
|
|
439
493
|
"""
|
|
440
494
|
Simple mutable storage for token classification results
|
|
441
495
|
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
`semantic_name`: semantic name
|
|
453
|
-
|
|
454
|
-
`bio_tag`: bio tag
|
|
455
|
-
|
|
456
|
-
`score`: prediction score
|
|
496
|
+
Attributes:
|
|
497
|
+
id: uuid of token (not unique)
|
|
498
|
+
token_id: token id
|
|
499
|
+
token: token
|
|
500
|
+
class_id: category id
|
|
501
|
+
class_name: category name
|
|
502
|
+
semantic_name: semantic name
|
|
503
|
+
bio_tag: bio tag
|
|
504
|
+
score: prediction score
|
|
457
505
|
"""
|
|
458
506
|
|
|
459
507
|
uuid: str
|
|
@@ -471,10 +519,11 @@ class SequenceClassResult:
|
|
|
471
519
|
"""
|
|
472
520
|
Storage for sequence classification results
|
|
473
521
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
522
|
+
Attributes:
|
|
523
|
+
class_id: category_id
|
|
524
|
+
class_name: category_name
|
|
525
|
+
score: prediction score
|
|
526
|
+
class_name_orig: original class name
|
|
478
527
|
"""
|
|
479
528
|
|
|
480
529
|
class_id: int
|
|
@@ -485,14 +534,19 @@ class SequenceClassResult:
|
|
|
485
534
|
|
|
486
535
|
class LMTokenClassifier(PredictorBase, ABC):
|
|
487
536
|
"""
|
|
488
|
-
Abstract base class for token classifiers. If you want to connect external token classifiers with
|
|
489
|
-
predictors wrap them into a class derived from this class.
|
|
537
|
+
Abstract base class for token classifiers. If you want to connect external token classifiers with deepdoctection
|
|
538
|
+
predictors wrap them into a class derived from this class.
|
|
490
539
|
"""
|
|
491
540
|
|
|
492
541
|
@abstractmethod
|
|
493
542
|
def predict(self, **encodings: Union[list[list[str]], torch.Tensor]) -> list[TokenClassResult]:
|
|
494
543
|
"""
|
|
495
544
|
Abstract method predict
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
encodings: A dictionary of encodings, where each key is a string representing the encoding type
|
|
548
|
+
(e.g., "input_ids", "attention_mask") and the value is a list of lists of strings or a
|
|
549
|
+
torch.Tensor representing the encoded input data.
|
|
496
550
|
"""
|
|
497
551
|
raise NotImplementedError()
|
|
498
552
|
|
|
@@ -514,10 +568,11 @@ class LMTokenClassifier(PredictorBase, ABC):
|
|
|
514
568
|
|
|
515
569
|
@staticmethod
|
|
516
570
|
def image_to_raw_features_mapping() -> str:
|
|
517
|
-
"""
|
|
571
|
+
"""
|
|
572
|
+
Converting image into model features must often be divided into several steps. This is because the process
|
|
518
573
|
method during training and serving might differ: For training there might be additional augmentation steps
|
|
519
574
|
required or one might add some data batching. For this reason we have added two methods
|
|
520
|
-
`image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name
|
|
575
|
+
`image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name either for
|
|
521
576
|
training or inference purposes:
|
|
522
577
|
|
|
523
578
|
`image_to_raw_features_mapping` is used for training and transforms an image into raw features that can be
|
|
@@ -528,7 +583,8 @@ class LMTokenClassifier(PredictorBase, ABC):
|
|
|
528
583
|
|
|
529
584
|
@staticmethod
|
|
530
585
|
def image_to_features_mapping() -> str:
|
|
531
|
-
"""
|
|
586
|
+
"""
|
|
587
|
+
Converting image into model features must often be divided into several steps. This is because the process
|
|
532
588
|
method during training and serving might differ: For training there might be additional augmentation steps
|
|
533
589
|
required or one might add some data batching. For this reason we have added two methods
|
|
534
590
|
`image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
|
|
@@ -574,7 +630,8 @@ class LMSequenceClassifier(PredictorBase, ABC):
|
|
|
574
630
|
|
|
575
631
|
@staticmethod
|
|
576
632
|
def image_to_raw_features_mapping() -> str:
|
|
577
|
-
"""
|
|
633
|
+
"""
|
|
634
|
+
Converting image into model features must often be divided into several steps. This is because the process
|
|
578
635
|
method during training and serving might differ: For training there might be additional augmentation steps
|
|
579
636
|
required or one might add some data batching. For this reason we have added two methods
|
|
580
637
|
`image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
|
|
@@ -588,7 +645,8 @@ class LMSequenceClassifier(PredictorBase, ABC):
|
|
|
588
645
|
|
|
589
646
|
@staticmethod
|
|
590
647
|
def image_to_features_mapping() -> str:
|
|
591
|
-
"""
|
|
648
|
+
"""
|
|
649
|
+
Converting image into model features must often be divided into several steps. This is because the process
|
|
592
650
|
method during training and serving might differ: For training there might be additional augmentation steps
|
|
593
651
|
required or one might add some data batching. For this reason we have added two methods
|
|
594
652
|
`image_to_raw_features_mapping`, `image_to_features_mapping` that return a mapping function name for either for
|
|
@@ -605,27 +663,39 @@ class LMSequenceClassifier(PredictorBase, ABC):
|
|
|
605
663
|
|
|
606
664
|
class LanguageDetector(PredictorBase, ABC):
|
|
607
665
|
"""
|
|
608
|
-
Abstract base class for language detectors.
|
|
609
|
-
ISO-639 code for the detected language.
|
|
666
|
+
Abstract base class for language detectors.
|
|
610
667
|
"""
|
|
611
668
|
|
|
612
669
|
@abstractmethod
|
|
613
670
|
def predict(self, text_string: str) -> DetectionResult:
|
|
614
671
|
"""
|
|
615
672
|
Abstract method predict
|
|
673
|
+
|
|
674
|
+
Args:
|
|
675
|
+
text_string: A string representing the text to be processed by the predictor.
|
|
676
|
+
|
|
677
|
+
Returns:
|
|
678
|
+
A DetectionResult object containing the detected language information (ISO-639 code).
|
|
616
679
|
"""
|
|
617
680
|
raise NotImplementedError()
|
|
618
681
|
|
|
619
682
|
|
|
620
683
|
class ImageTransformer(PredictorBase, ABC):
|
|
621
684
|
"""
|
|
622
|
-
Abstract base class for transforming an image.
|
|
685
|
+
Abstract base class for transforming an image.
|
|
623
686
|
"""
|
|
624
687
|
|
|
625
688
|
@abstractmethod
|
|
626
689
|
def transform_image(self, np_img: PixelValues, specification: DetectionResult) -> PixelValues:
|
|
627
690
|
"""
|
|
628
691
|
Abstract method transform
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
np_img: A `np.array` representing the image to be transformed.
|
|
695
|
+
specification: A `DetectionResult` instance containing specifications for the transformation.
|
|
696
|
+
|
|
697
|
+
Returns:
|
|
698
|
+
A `np.array` representing the transformed image.
|
|
629
699
|
"""
|
|
630
700
|
raise NotImplementedError()
|
|
631
701
|
|
|
@@ -633,6 +703,11 @@ class ImageTransformer(PredictorBase, ABC):
|
|
|
633
703
|
def predict(self, np_img: PixelValues) -> DetectionResult:
|
|
634
704
|
"""
|
|
635
705
|
Abstract method predict
|
|
706
|
+
Args:
|
|
707
|
+
np_img: A `np.array` representing the image to be processed by the predictor.
|
|
708
|
+
|
|
709
|
+
Rweturns:
|
|
710
|
+
A `DetectionResult` object containing the prediction results regarding the transformation.
|
|
636
711
|
"""
|
|
637
712
|
raise NotImplementedError()
|
|
638
713
|
|
|
@@ -648,19 +723,25 @@ class ImageTransformer(PredictorBase, ABC):
|
|
|
648
723
|
"""
|
|
649
724
|
Transform coordinates aligned with the transform_image method.
|
|
650
725
|
|
|
651
|
-
:
|
|
652
|
-
|
|
726
|
+
Args:
|
|
727
|
+
detect_results: List of `DetectionResult`s
|
|
728
|
+
|
|
729
|
+
Returns:
|
|
730
|
+
List of DetectionResults. If you pass `uuid` it is possible to track the transformed bounding boxes.
|
|
653
731
|
"""
|
|
654
732
|
|
|
655
733
|
raise NotImplementedError()
|
|
656
734
|
|
|
657
735
|
def inverse_transform_coords(self, detect_results: Sequence[DetectionResult]) -> Sequence[DetectionResult]:
|
|
658
736
|
"""
|
|
659
|
-
Inverse transform coordinates aligned with the transform_image method. Composing transform_coords with
|
|
737
|
+
Inverse transform coordinates aligned with the `transform_image` method. Composing transform_coords with
|
|
660
738
|
inverse_transform_coords should return the original coordinates.
|
|
661
739
|
|
|
662
|
-
:
|
|
663
|
-
|
|
740
|
+
Args:
|
|
741
|
+
detect_results: List of `DetectionResult`s
|
|
742
|
+
|
|
743
|
+
Returns:
|
|
744
|
+
List of `DetectionResult`s. If you pass `uuid` it is possible to track the transformed bounding boxes.
|
|
664
745
|
"""
|
|
665
746
|
|
|
666
747
|
raise NotImplementedError()
|
|
@@ -678,16 +759,14 @@ class DeterministicImageTransformer(ImageTransformer):
|
|
|
678
759
|
The transformer performs deterministic transformations on images and their associated coordinates,
|
|
679
760
|
enabling operations like padding, rotation, and other geometric transformations while maintaining
|
|
680
761
|
the relationship between image content and annotation coordinates.
|
|
681
|
-
|
|
682
|
-
:param base_transform: A BaseTransform instance that defines the actual transformation operations
|
|
683
|
-
to be applied to images and coordinates.
|
|
684
762
|
"""
|
|
685
763
|
|
|
686
|
-
def __init__(self, base_transform: BaseTransform):
|
|
764
|
+
def __init__(self, base_transform: BaseTransform) -> None:
|
|
687
765
|
"""
|
|
688
766
|
Initialize the DeterministicImageTransformer with a BaseTransform instance.
|
|
689
767
|
|
|
690
|
-
:
|
|
768
|
+
Args:
|
|
769
|
+
base_transform: A BaseTransform instance that defines the actual transformation operations
|
|
691
770
|
"""
|
|
692
771
|
self.base_transform = base_transform
|
|
693
772
|
self.name = base_transform.__class__.__name__
|