deepdoctection 0.32__py3-none-any.whl → 0.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +8 -25
- deepdoctection/analyzer/dd.py +84 -71
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +78 -56
- deepdoctection/datapoint/box.py +7 -7
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +157 -75
- deepdoctection/datapoint/view.py +175 -151
- deepdoctection/datasets/adapter.py +30 -24
- deepdoctection/datasets/base.py +10 -10
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +23 -25
- deepdoctection/datasets/instances/doclaynet.py +48 -49
- deepdoctection/datasets/instances/fintabnet.py +44 -45
- deepdoctection/datasets/instances/funsd.py +23 -23
- deepdoctection/datasets/instances/iiitar13k.py +8 -8
- deepdoctection/datasets/instances/layouttest.py +2 -2
- deepdoctection/datasets/instances/publaynet.py +3 -3
- deepdoctection/datasets/instances/pubtables1m.py +18 -18
- deepdoctection/datasets/instances/pubtabnet.py +30 -29
- deepdoctection/datasets/instances/rvlcdip.py +28 -29
- deepdoctection/datasets/instances/xfund.py +51 -30
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +13 -12
- deepdoctection/eval/eval.py +32 -26
- deepdoctection/eval/tedsmetric.py +16 -12
- deepdoctection/eval/tp_eval_callback.py +7 -16
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +69 -89
- deepdoctection/extern/deskew.py +11 -10
- deepdoctection/extern/doctrocr.py +81 -64
- deepdoctection/extern/fastlang.py +23 -16
- deepdoctection/extern/hfdetr.py +53 -38
- deepdoctection/extern/hflayoutlm.py +216 -155
- deepdoctection/extern/hflm.py +35 -30
- deepdoctection/extern/model.py +433 -255
- deepdoctection/extern/pdftext.py +15 -15
- deepdoctection/extern/pt/ptutils.py +4 -2
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +14 -16
- deepdoctection/extern/tp/tfutils.py +16 -2
- deepdoctection/extern/tp/tpcompat.py +11 -7
- deepdoctection/extern/tp/tpfrcnn/config/config.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +1 -1
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +5 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +4 -4
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +5 -3
- deepdoctection/extern/tp/tpfrcnn/preproc.py +5 -5
- deepdoctection/extern/tpdetect.py +40 -45
- deepdoctection/mapper/cats.py +36 -40
- deepdoctection/mapper/cocostruct.py +16 -12
- deepdoctection/mapper/d2struct.py +22 -22
- deepdoctection/mapper/hfstruct.py +7 -7
- deepdoctection/mapper/laylmstruct.py +22 -24
- deepdoctection/mapper/maputils.py +9 -10
- deepdoctection/mapper/match.py +33 -2
- deepdoctection/mapper/misc.py +6 -7
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +6 -6
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +3 -3
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/anngen.py +39 -14
- deepdoctection/pipe/base.py +68 -99
- deepdoctection/pipe/common.py +181 -85
- deepdoctection/pipe/concurrency.py +14 -10
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +18 -16
- deepdoctection/pipe/lm.py +49 -47
- deepdoctection/pipe/order.py +63 -65
- deepdoctection/pipe/refine.py +102 -109
- deepdoctection/pipe/segment.py +157 -162
- deepdoctection/pipe/sub_layout.py +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/d2_frcnn_train.py +27 -25
- deepdoctection/train/hf_detr_train.py +22 -18
- deepdoctection/train/hf_layoutlm_train.py +49 -48
- deepdoctection/train/tp_frcnn_train.py +10 -11
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +52 -14
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +41 -14
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +15 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/pdf_utils.py +39 -14
- deepdoctection/utils/settings.py +188 -182
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +70 -69
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/METADATA +7 -4
- deepdoctection-0.34.dist-info/RECORD +146 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.32.dist-info/RECORD +0 -146
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/LICENSE +0 -0
- {deepdoctection-0.32.dist-info → deepdoctection-0.34.dist-info}/top_level.txt +0 -0
deepdoctection/pipe/anngen.py
CHANGED
|
@@ -19,11 +19,11 @@
|
|
|
19
19
|
Module for datapoint populating helpers
|
|
20
20
|
"""
|
|
21
21
|
from dataclasses import asdict
|
|
22
|
-
from typing import
|
|
22
|
+
from typing import Mapping, Optional, Union
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
25
|
|
|
26
|
-
from ..datapoint.annotation import CategoryAnnotation, ContainerAnnotation, ImageAnnotation
|
|
26
|
+
from ..datapoint.annotation import DEFAULT_CATEGORY_ID, CategoryAnnotation, ContainerAnnotation, ImageAnnotation
|
|
27
27
|
from ..datapoint.box import BoundingBox, local_to_global_coords, rescale_coords
|
|
28
28
|
from ..datapoint.image import Image
|
|
29
29
|
from ..extern.base import DetectionResult
|
|
@@ -44,7 +44,7 @@ class DatapointManager:
|
|
|
44
44
|
|
|
45
45
|
def __init__(self, service_id: str, model_id: Optional[str] = None) -> None:
|
|
46
46
|
self._datapoint: Optional[Image] = None
|
|
47
|
-
self._cache_anns:
|
|
47
|
+
self._cache_anns: dict[str, ImageAnnotation] = {}
|
|
48
48
|
self.datapoint_is_passed: bool = False
|
|
49
49
|
self.category_id_mapping: Optional[Mapping[int, int]] = None
|
|
50
50
|
self.service_id = service_id
|
|
@@ -155,7 +155,7 @@ class DatapointManager:
|
|
|
155
155
|
ann = ImageAnnotation(
|
|
156
156
|
category_name=detect_result.class_name,
|
|
157
157
|
bounding_box=box,
|
|
158
|
-
category_id=
|
|
158
|
+
category_id=detect_result.class_id,
|
|
159
159
|
score=detect_result.score,
|
|
160
160
|
service_id=self.service_id,
|
|
161
161
|
model_id=self.model_id,
|
|
@@ -174,7 +174,7 @@ class DatapointManager:
|
|
|
174
174
|
raise ValueError("image cannot be None")
|
|
175
175
|
ann.image.set_embedding(parent_ann.annotation_id, ann.bounding_box)
|
|
176
176
|
ann.image.set_embedding(self.datapoint.image_id, ann_global_box)
|
|
177
|
-
parent_ann.dump_relationship(Relationships.
|
|
177
|
+
parent_ann.dump_relationship(Relationships.CHILD, ann.annotation_id)
|
|
178
178
|
|
|
179
179
|
self.datapoint.dump(ann)
|
|
180
180
|
self._cache_anns[ann.annotation_id] = ann
|
|
@@ -189,7 +189,7 @@ class DatapointManager:
|
|
|
189
189
|
def set_category_annotation(
|
|
190
190
|
self,
|
|
191
191
|
category_name: ObjectTypes,
|
|
192
|
-
category_id: Optional[
|
|
192
|
+
category_id: Optional[int],
|
|
193
193
|
sub_cat_key: ObjectTypes,
|
|
194
194
|
annotation_id: str,
|
|
195
195
|
score: Optional[float] = None,
|
|
@@ -216,7 +216,7 @@ class DatapointManager:
|
|
|
216
216
|
) as annotation_context:
|
|
217
217
|
cat_ann = CategoryAnnotation(
|
|
218
218
|
category_name=category_name,
|
|
219
|
-
category_id=
|
|
219
|
+
category_id=category_id if category_id is not None else DEFAULT_CATEGORY_ID,
|
|
220
220
|
score=score,
|
|
221
221
|
service_id=self.service_id,
|
|
222
222
|
model_id=self.model_id,
|
|
@@ -230,10 +230,10 @@ class DatapointManager:
|
|
|
230
230
|
def set_container_annotation(
|
|
231
231
|
self,
|
|
232
232
|
category_name: ObjectTypes,
|
|
233
|
-
category_id: Optional[
|
|
233
|
+
category_id: Optional[int],
|
|
234
234
|
sub_cat_key: ObjectTypes,
|
|
235
235
|
annotation_id: str,
|
|
236
|
-
value: Union[str,
|
|
236
|
+
value: Union[str, list[str]],
|
|
237
237
|
score: Optional[float] = None,
|
|
238
238
|
) -> Optional[str]:
|
|
239
239
|
"""
|
|
@@ -260,7 +260,7 @@ class DatapointManager:
|
|
|
260
260
|
) as annotation_context:
|
|
261
261
|
cont_ann = ContainerAnnotation(
|
|
262
262
|
category_name=category_name,
|
|
263
|
-
category_id=
|
|
263
|
+
category_id=category_id if category_id is not None else DEFAULT_CATEGORY_ID,
|
|
264
264
|
value=value,
|
|
265
265
|
score=score,
|
|
266
266
|
service_id=self.service_id,
|
|
@@ -272,6 +272,33 @@ class DatapointManager:
|
|
|
272
272
|
return None
|
|
273
273
|
return cont_ann.annotation_id
|
|
274
274
|
|
|
275
|
+
def set_relationship_annotation(
|
|
276
|
+
self, relationship_name: ObjectTypes, target_annotation_id: str, annotation_id: str
|
|
277
|
+
) -> Optional[str]:
|
|
278
|
+
"""
|
|
279
|
+
Create a relationship annotation and dump it to the target annotation.
|
|
280
|
+
|
|
281
|
+
:param relationship_name: The relationship key
|
|
282
|
+
:param target_annotation_id: Annotation_id of the parent `ImageAnnotation`
|
|
283
|
+
:param annotation_id: The annotation_id to dump the relationship to
|
|
284
|
+
|
|
285
|
+
:return: Annotation_id of the parent `ImageAnnotation` for references if the dumpy has been successful
|
|
286
|
+
"""
|
|
287
|
+
self.assert_datapoint_passed()
|
|
288
|
+
with MappingContextManager(
|
|
289
|
+
dp_name=self.datapoint.file_name,
|
|
290
|
+
filter_level="annotation",
|
|
291
|
+
relationship_annotation={
|
|
292
|
+
"relationship_name": relationship_name.value,
|
|
293
|
+
"target_annotation_id": target_annotation_id,
|
|
294
|
+
"annotation_id": annotation_id,
|
|
295
|
+
},
|
|
296
|
+
) as annotation_context:
|
|
297
|
+
self._cache_anns[target_annotation_id].dump_relationship(relationship_name, annotation_id)
|
|
298
|
+
if annotation_context.context_error:
|
|
299
|
+
return None
|
|
300
|
+
return target_annotation_id
|
|
301
|
+
|
|
275
302
|
def set_summary_annotation(
|
|
276
303
|
self,
|
|
277
304
|
summary_key: ObjectTypes,
|
|
@@ -299,8 +326,6 @@ class DatapointManager:
|
|
|
299
326
|
else:
|
|
300
327
|
image = self.datapoint
|
|
301
328
|
assert image is not None, image
|
|
302
|
-
if image.summary is None:
|
|
303
|
-
image.summary = SummaryAnnotation()
|
|
304
329
|
|
|
305
330
|
ann: Union[CategoryAnnotation, ContainerAnnotation]
|
|
306
331
|
with MappingContextManager(
|
|
@@ -316,7 +341,7 @@ class DatapointManager:
|
|
|
316
341
|
if summary_value is not None:
|
|
317
342
|
ann = ContainerAnnotation(
|
|
318
343
|
category_name=summary_name,
|
|
319
|
-
category_id=
|
|
344
|
+
category_id=summary_number if summary_number else DEFAULT_CATEGORY_ID,
|
|
320
345
|
value=summary_value,
|
|
321
346
|
score=summary_score,
|
|
322
347
|
service_id=self.service_id,
|
|
@@ -326,7 +351,7 @@ class DatapointManager:
|
|
|
326
351
|
else:
|
|
327
352
|
ann = CategoryAnnotation(
|
|
328
353
|
category_name=summary_name,
|
|
329
|
-
category_id=
|
|
354
|
+
category_id=summary_number if summary_number is not None else DEFAULT_CATEGORY_ID,
|
|
330
355
|
score=summary_score,
|
|
331
356
|
service_id=self.service_id,
|
|
332
357
|
model_id=self.model_id,
|
deepdoctection/pipe/base.py
CHANGED
|
@@ -19,21 +19,33 @@
|
|
|
19
19
|
"""
|
|
20
20
|
Module for the base class for building pipelines
|
|
21
21
|
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
22
24
|
from abc import ABC, abstractmethod
|
|
23
25
|
from collections import defaultdict
|
|
24
|
-
from
|
|
25
|
-
from typing import Any,
|
|
26
|
+
from dataclasses import dataclass, field
|
|
27
|
+
from typing import Any, Mapping, Optional, Union
|
|
26
28
|
from uuid import uuid1
|
|
27
29
|
|
|
28
30
|
from ..dataflow import DataFlow, MapData
|
|
29
31
|
from ..datapoint.image import Image
|
|
30
|
-
from ..extern.base import ImageTransformer, ObjectDetector, PdfMiner, TextRecognizer
|
|
31
32
|
from ..utils.context import timed_operation
|
|
32
|
-
from ..utils.detection_types import JsonDict
|
|
33
33
|
from ..utils.identifier import get_uuid_from_str
|
|
34
|
+
from ..utils.settings import ObjectTypes
|
|
34
35
|
from .anngen import DatapointManager
|
|
35
36
|
|
|
36
37
|
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class MetaAnnotation:
|
|
40
|
+
"""A immutable dataclass that stores information about what `Image` are being
|
|
41
|
+
modified through a pipeline compoenent."""
|
|
42
|
+
|
|
43
|
+
image_annotations: tuple[ObjectTypes, ...] = field(default=())
|
|
44
|
+
sub_categories: dict[ObjectTypes, set[ObjectTypes]] = field(default_factory=dict)
|
|
45
|
+
relationships: dict[ObjectTypes, set[ObjectTypes]] = field(default_factory=dict)
|
|
46
|
+
summaries: tuple[ObjectTypes, ...] = field(default=())
|
|
47
|
+
|
|
48
|
+
|
|
37
49
|
class PipelineComponent(ABC):
|
|
38
50
|
"""
|
|
39
51
|
Base class for pipeline components. Pipeline components are the parts that make up a pipeline. They contain the
|
|
@@ -54,15 +66,14 @@ class PipelineComponent(ABC):
|
|
|
54
66
|
planned.
|
|
55
67
|
"""
|
|
56
68
|
|
|
57
|
-
def __init__(self, name: str):
|
|
69
|
+
def __init__(self, name: str, model_id: Optional[str] = None) -> None:
|
|
58
70
|
"""
|
|
59
71
|
:param name: The name of the pipeline component. The name will be used to identify a pipeline component in a
|
|
60
72
|
pipeline. Use something that describe the task of the pipeline.
|
|
61
73
|
"""
|
|
62
74
|
self.name = name
|
|
63
75
|
self.service_id = self.get_service_id()
|
|
64
|
-
self.
|
|
65
|
-
self.dp_manager = DatapointManager(self.service_id)
|
|
76
|
+
self.dp_manager = DatapointManager(self.service_id, model_id)
|
|
66
77
|
self.timer_on = False
|
|
67
78
|
|
|
68
79
|
@abstractmethod
|
|
@@ -108,14 +119,14 @@ class PipelineComponent(ABC):
|
|
|
108
119
|
return MapData(df, self.pass_datapoint)
|
|
109
120
|
|
|
110
121
|
@abstractmethod
|
|
111
|
-
def clone(self) ->
|
|
122
|
+
def clone(self) -> PipelineComponent:
|
|
112
123
|
"""
|
|
113
124
|
Clone an instance
|
|
114
125
|
"""
|
|
115
126
|
raise NotImplementedError()
|
|
116
127
|
|
|
117
128
|
@abstractmethod
|
|
118
|
-
def get_meta_annotation(self) ->
|
|
129
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
119
130
|
"""
|
|
120
131
|
Get a dict of list of annotation type. The dict must contain
|
|
121
132
|
|
|
@@ -127,96 +138,53 @@ class PipelineComponent(ABC):
|
|
|
127
138
|
"""
|
|
128
139
|
raise NotImplementedError()
|
|
129
140
|
|
|
130
|
-
def _meta_has_all_types(self) -> None:
|
|
131
|
-
if not {"image_annotations", "sub_categories", "relationships", "summaries"}.issubset(
|
|
132
|
-
set(self.get_meta_annotation().keys())
|
|
133
|
-
):
|
|
134
|
-
raise TypeError(
|
|
135
|
-
f" 'get_meta_annotation' must return dict with all required keys. "
|
|
136
|
-
f"Got {self.get_meta_annotation().keys()}"
|
|
137
|
-
)
|
|
138
|
-
|
|
139
141
|
def get_service_id(self) -> str:
|
|
140
142
|
"""
|
|
141
143
|
Get the generating model
|
|
142
144
|
"""
|
|
143
145
|
return get_uuid_from_str(self.name)[:8]
|
|
144
146
|
|
|
145
|
-
|
|
146
|
-
class PredictorPipelineComponent(PipelineComponent, ABC):
|
|
147
|
-
"""
|
|
148
|
-
Lightweight abstract pipeline component class with `predictor`. Object detectors that only read in images as
|
|
149
|
-
numpy array and return `DetectResult`s are currently permitted.
|
|
150
|
-
"""
|
|
151
|
-
|
|
152
|
-
def __init__(
|
|
153
|
-
self,
|
|
154
|
-
name: str,
|
|
155
|
-
predictor: Union[ObjectDetector, PdfMiner, TextRecognizer],
|
|
156
|
-
) -> None:
|
|
147
|
+
def clear_predictor(self) -> None:
|
|
157
148
|
"""
|
|
158
|
-
|
|
159
|
-
:param predictor: An Object detector for predicting
|
|
149
|
+
Clear the predictor of the pipeline component if it has one. Needed for model updates during training.
|
|
160
150
|
"""
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
151
|
+
raise NotImplementedError(
|
|
152
|
+
"Maybe you forgot to implement this method in your pipeline component. This might "
|
|
153
|
+
"be the case when you run evaluation during training and need to update the "
|
|
154
|
+
"trained model in your pipeline component."
|
|
155
|
+
)
|
|
164
156
|
|
|
165
|
-
|
|
166
|
-
def clone(self) -> "PredictorPipelineComponent":
|
|
167
|
-
raise NotImplementedError()
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
class LanguageModelPipelineComponent(PipelineComponent, ABC):
|
|
171
|
-
"""
|
|
172
|
-
Abstract pipeline component class with two attributes `tokenizer` and `language_model` .
|
|
173
|
-
"""
|
|
174
|
-
|
|
175
|
-
def __init__(
|
|
176
|
-
self,
|
|
177
|
-
name: str,
|
|
178
|
-
tokenizer: Any,
|
|
179
|
-
mapping_to_lm_input_func: Callable[..., Callable[[Image], Optional[Any]]],
|
|
180
|
-
):
|
|
157
|
+
def has_predictor(self) -> bool:
|
|
181
158
|
"""
|
|
182
|
-
|
|
183
|
-
:param tokenizer: Tokenizer, typing allows currently anything. This will be changed in the future
|
|
184
|
-
:param mapping_to_lm_input_func: Function mapping image to layout language model features
|
|
159
|
+
Check if the pipeline component has a predictor
|
|
185
160
|
"""
|
|
161
|
+
if hasattr(self, "predictor"):
|
|
162
|
+
if self.predictor is not None:
|
|
163
|
+
return True
|
|
164
|
+
return False
|
|
186
165
|
|
|
187
|
-
|
|
188
|
-
super().__init__(name)
|
|
189
|
-
self.mapping_to_lm_input_func = mapping_to_lm_input_func
|
|
190
|
-
|
|
191
|
-
@abstractmethod
|
|
192
|
-
def clone(self) -> "LanguageModelPipelineComponent":
|
|
166
|
+
def _undo(self, dp: Image) -> Image:
|
|
193
167
|
"""
|
|
194
|
-
|
|
168
|
+
Undo the processing of the pipeline component. It will remove `ImageAnnotation`, `CategoryAnnotation` and
|
|
169
|
+
`ContainerAnnotation` with the service_id of the pipeline component.
|
|
195
170
|
"""
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
171
|
+
if self.timer_on:
|
|
172
|
+
with timed_operation(self.__class__.__name__):
|
|
173
|
+
self.dp_manager.datapoint = dp
|
|
174
|
+
dp.remove(service_ids=self.service_id)
|
|
175
|
+
else:
|
|
176
|
+
self.dp_manager.datapoint = dp
|
|
177
|
+
dp.remove(service_ids=self.service_id)
|
|
178
|
+
return self.dp_manager.datapoint
|
|
204
179
|
|
|
205
|
-
def
|
|
180
|
+
def undo(self, df: DataFlow) -> DataFlow:
|
|
206
181
|
"""
|
|
207
|
-
|
|
208
|
-
:param transform_predictor: An `ImageTransformer` for image transformation
|
|
209
|
-
"""
|
|
210
|
-
|
|
211
|
-
self.transform_predictor = transform_predictor
|
|
212
|
-
super().__init__(name)
|
|
182
|
+
Mapping a datapoint via `_undo` within a dataflow pipeline
|
|
213
183
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
"""
|
|
217
|
-
Clone an instance
|
|
184
|
+
:param df: An input dataflow of Images
|
|
185
|
+
:return: A output dataflow of Images
|
|
218
186
|
"""
|
|
219
|
-
|
|
187
|
+
return MapData(df, self._undo)
|
|
220
188
|
|
|
221
189
|
|
|
222
190
|
class Pipeline(ABC):
|
|
@@ -262,7 +230,7 @@ class Pipeline(ABC):
|
|
|
262
230
|
df = pipe.analyze(input = "path/to/dir") # session_id is generated automatically
|
|
263
231
|
"""
|
|
264
232
|
|
|
265
|
-
def __init__(self, pipeline_component_list:
|
|
233
|
+
def __init__(self, pipeline_component_list: list[PipelineComponent]) -> None:
|
|
266
234
|
"""
|
|
267
235
|
:param pipeline_component_list: A list of pipeline components.
|
|
268
236
|
"""
|
|
@@ -305,7 +273,7 @@ class Pipeline(ABC):
|
|
|
305
273
|
"""
|
|
306
274
|
raise NotImplementedError()
|
|
307
275
|
|
|
308
|
-
def get_meta_annotation(self) ->
|
|
276
|
+
def get_meta_annotation(self) -> MetaAnnotation:
|
|
309
277
|
"""
|
|
310
278
|
Collects meta annotations from all pipeline components and summarizes the returned results
|
|
311
279
|
|
|
@@ -313,23 +281,24 @@ class Pipeline(ABC):
|
|
|
313
281
|
names and generated sub categories), relationships (dict with category names and generated
|
|
314
282
|
relationships) as well as summaries (list with sub categories)
|
|
315
283
|
"""
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
"summaries": [],
|
|
321
|
-
}
|
|
284
|
+
image_annotations: list[ObjectTypes] = []
|
|
285
|
+
sub_categories = defaultdict(set)
|
|
286
|
+
relationships = defaultdict(set)
|
|
287
|
+
summaries: list[ObjectTypes] = []
|
|
322
288
|
for component in self.pipe_component_list:
|
|
323
|
-
meta_anns =
|
|
324
|
-
|
|
325
|
-
for key, value in meta_anns
|
|
326
|
-
|
|
327
|
-
for key, value in meta_anns
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
289
|
+
meta_anns = component.get_meta_annotation()
|
|
290
|
+
image_annotations.extend(meta_anns.image_annotations)
|
|
291
|
+
for key, value in meta_anns.sub_categories.items():
|
|
292
|
+
sub_categories[key].update(value)
|
|
293
|
+
for key, value in meta_anns.relationships.items():
|
|
294
|
+
relationships[key].update(value)
|
|
295
|
+
summaries.extend(meta_anns.summaries)
|
|
296
|
+
return MetaAnnotation(
|
|
297
|
+
image_annotations=tuple(image_annotations),
|
|
298
|
+
sub_categories=dict(sub_categories),
|
|
299
|
+
relationships=dict(relationships),
|
|
300
|
+
summaries=tuple(summaries),
|
|
301
|
+
)
|
|
333
302
|
|
|
334
303
|
def get_pipeline_info(
|
|
335
304
|
self, service_id: Optional[str] = None, name: Optional[str] = None
|