deltafi 1.1.17__tar.gz → 2.0rc1705013410531__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deltafi might be problematic. Click here for more details.
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/PKG-INFO +1 -1
- deltafi-2.0rc1705013410531/deltafi/action.py +131 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/actiontype.py +0 -6
- deltafi-2.0rc1705013410531/deltafi/input.py +52 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/plugin.py +0 -2
- deltafi-2.0rc1705013410531/deltafi/result.py +229 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/pyproject.toml +1 -1
- deltafi-1.1.17/deltafi/action.py +0 -237
- deltafi-1.1.17/deltafi/input.py +0 -216
- deltafi-1.1.17/deltafi/result.py +0 -474
- deltafi-1.1.17/deltafi/test_kit/format.py +0 -100
- deltafi-1.1.17/deltafi/test_kit/load.py +0 -122
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/README.md +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/__init__.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/actioneventqueue.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/domain.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/exception.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/genericmodel.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/logger.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/metric.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/storage.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/test_kit/__init__.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/test_kit/assertions.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/test_kit/compare_helpers.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/test_kit/constants.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/test_kit/framework.py +0 -0
- {deltafi-1.1.17 → deltafi-2.0rc1705013410531}/deltafi/test_kit/transform.py +0 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#
|
|
2
|
+
# DeltaFi - Data transformation and enrichment platform
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
from abc import ABC, abstractmethod
|
|
20
|
+
from typing import Any, List
|
|
21
|
+
|
|
22
|
+
from deltafi.actiontype import ActionType
|
|
23
|
+
from deltafi.genericmodel import GenericModel
|
|
24
|
+
from deltafi.domain import Context, DeltaFileMessage
|
|
25
|
+
from deltafi.input import EgressInput, TransformInput
|
|
26
|
+
from deltafi.result import *
|
|
27
|
+
from pydantic import BaseModel
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Action(ABC):
|
|
31
|
+
def __init__(self, action_type: ActionType, description: str, valid_result_types: tuple):
|
|
32
|
+
self.action_type = action_type
|
|
33
|
+
self.description = description
|
|
34
|
+
self.valid_result_types = valid_result_types
|
|
35
|
+
self.action_execution = None
|
|
36
|
+
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def collect(self, action_inputs: List[Any]):
|
|
42
|
+
raise RuntimeError(f"Collect is not supported for {self.__class__.__name__}")
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def execute(self, context: Context, action_input: Any, params: BaseModel):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
def execute_action(self, event):
|
|
49
|
+
if event.delta_file_messages is None or not len(event.delta_file_messages):
|
|
50
|
+
raise RuntimeError(f"Received event with no delta file messages for did {event.context.did}")
|
|
51
|
+
|
|
52
|
+
if event.context.collect is not None:
|
|
53
|
+
result = self.execute(event.context, self.collect([self.build_input(event.context, delta_file_message)
|
|
54
|
+
for delta_file_message in event.delta_file_messages]),
|
|
55
|
+
self.param_class().model_validate(event.params))
|
|
56
|
+
else:
|
|
57
|
+
result = self.execute(event.context, self.build_input(event.context, event.delta_file_messages[0]),
|
|
58
|
+
self.param_class().model_validate(event.params))
|
|
59
|
+
|
|
60
|
+
self.validate_type(result)
|
|
61
|
+
return result
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def param_class( ):
|
|
65
|
+
"""Factory method to create and return an empty GenericModel instance.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
GenericModel
|
|
70
|
+
an empty GenericModel instance
|
|
71
|
+
"""
|
|
72
|
+
return GenericModel
|
|
73
|
+
|
|
74
|
+
def validate_type(self, result):
|
|
75
|
+
if not isinstance(result, self.valid_result_types):
|
|
76
|
+
raise ValueError(f"{self.__class__.__name__} must return one of "
|
|
77
|
+
f"{[result_type.__name__ for result_type in self.valid_result_types]} "
|
|
78
|
+
f"but a {result.__class__.__name__} was returned")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class EgressAction(Action, ABC):
|
|
82
|
+
def __init__(self, description: str):
|
|
83
|
+
super().__init__(ActionType.EGRESS, description, (EgressResult, ErrorResult, FilterResult))
|
|
84
|
+
|
|
85
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
86
|
+
return EgressInput(content=delta_file_message.content_list[0], metadata=delta_file_message.metadata)
|
|
87
|
+
|
|
88
|
+
@abstractmethod
|
|
89
|
+
def egress(self, context: Context, params: BaseModel, egress_input: EgressInput):
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def execute(self, context: Context, egress_input: EgressInput, params: BaseModel):
|
|
93
|
+
return self.egress(context, params, egress_input)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class TimedIngressAction(Action, ABC):
|
|
97
|
+
def __init__(self, description: str):
|
|
98
|
+
super().__init__(ActionType.TIMED_INGRESS, description, (IngressResult, ErrorResult))
|
|
99
|
+
|
|
100
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
@abstractmethod
|
|
104
|
+
def ingress(self, context: Context, params: BaseModel):
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
def execute(self, context: Context, input_placeholder: Any, params: BaseModel):
|
|
108
|
+
return self.ingress(context, params)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class TransformAction(Action, ABC):
|
|
112
|
+
def __init__(self, description: str):
|
|
113
|
+
super().__init__(ActionType.TRANSFORM, description, (TransformResult, ErrorResult, FilterResult))
|
|
114
|
+
|
|
115
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
116
|
+
return TransformInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata)
|
|
117
|
+
|
|
118
|
+
def collect(self, transform_inputs: List[TransformInput]):
|
|
119
|
+
all_content = []
|
|
120
|
+
all_metadata = {}
|
|
121
|
+
for transform_input in transform_inputs:
|
|
122
|
+
all_content += transform_input.content
|
|
123
|
+
all_metadata.update(transform_input.metadata)
|
|
124
|
+
return TransformInput(content=all_content, metadata=all_metadata)
|
|
125
|
+
|
|
126
|
+
@abstractmethod
|
|
127
|
+
def transform(self, context: Context, params: BaseModel, transform_input: TransformInput):
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
def execute(self, context: Context, transform_input: TransformInput, params: BaseModel):
|
|
131
|
+
return self.transform(context, params, transform_input)
|
|
@@ -22,11 +22,5 @@ from enum import Enum
|
|
|
22
22
|
class ActionType(Enum):
|
|
23
23
|
TIMED_INGRESS = "TIMED_INGRESS"
|
|
24
24
|
TRANSFORM = "TRANSFORM"
|
|
25
|
-
LOAD = "LOAD"
|
|
26
|
-
DOMAIN = "DOMAIN"
|
|
27
|
-
ENRICH = "ENRICH"
|
|
28
|
-
FORMAT = "FORMAT"
|
|
29
|
-
VALIDATE = "VALIDATE"
|
|
30
25
|
EGRESS = "EGRESS"
|
|
31
|
-
DELETE = "DELETE"
|
|
32
26
|
UNKNOWN = "UNKNOWN"
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#
|
|
2
|
+
# DeltaFi - Data transformation and enrichment platform
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
from deltafi.domain import *
|
|
20
|
+
from deltafi.exception import MissingMetadataException, ExpectedContentException
|
|
21
|
+
|
|
22
|
+
class EgressInput(NamedTuple):
|
|
23
|
+
content: Content
|
|
24
|
+
metadata: dict
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TransformInput(NamedTuple):
|
|
28
|
+
content: List[Content]
|
|
29
|
+
metadata: dict
|
|
30
|
+
|
|
31
|
+
def has_content(self) -> bool:
|
|
32
|
+
return len(self.content) > 0
|
|
33
|
+
|
|
34
|
+
def content_at(self, index: int) -> Content:
|
|
35
|
+
if len(self.content) < index + 1:
|
|
36
|
+
raise ExpectedContentException(index, len(self.content))
|
|
37
|
+
return self.content[index]
|
|
38
|
+
|
|
39
|
+
def first_content(self):
|
|
40
|
+
return self.content_at(0)
|
|
41
|
+
|
|
42
|
+
def get_metadata(self, key: str):
|
|
43
|
+
if key in self.metadata:
|
|
44
|
+
return self.metadata[key]
|
|
45
|
+
else:
|
|
46
|
+
raise MissingMetadataException(key)
|
|
47
|
+
|
|
48
|
+
def get_metadata_or_else(self, key: str, default: str) -> str:
|
|
49
|
+
if key in self.metadata:
|
|
50
|
+
return self.metadata[key]
|
|
51
|
+
else:
|
|
52
|
+
return default
|
|
@@ -165,8 +165,6 @@ class Plugin(object):
|
|
|
165
165
|
'name': self.action_name(action),
|
|
166
166
|
'description': action.description,
|
|
167
167
|
'type': action.action_type.name,
|
|
168
|
-
'requiresDomains': action.requires_domains,
|
|
169
|
-
'requiresEnrichments': action.requires_enrichments,
|
|
170
168
|
'schema': action.param_class().model_json_schema()
|
|
171
169
|
}
|
|
172
170
|
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
#
|
|
2
|
+
# DeltaFi - Data transformation and enrichment platform
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
import abc
|
|
20
|
+
from enum import Enum
|
|
21
|
+
import uuid
|
|
22
|
+
from typing import Dict, List
|
|
23
|
+
|
|
24
|
+
from deltafi.domain import Content, Context
|
|
25
|
+
from deltafi.metric import Metric
|
|
26
|
+
|
|
27
|
+
ENDPOINT_TAG = "endpoint"
|
|
28
|
+
FILES_OUT = "files_out"
|
|
29
|
+
BYTES_OUT = "bytes_out"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Result:
|
|
33
|
+
__metaclass__ = abc.ABCMeta
|
|
34
|
+
|
|
35
|
+
def __init__(self, result_key, result_type, context):
|
|
36
|
+
self.result_key = result_key
|
|
37
|
+
self.result_type = result_type
|
|
38
|
+
self.metrics = []
|
|
39
|
+
self.context = context
|
|
40
|
+
|
|
41
|
+
@abc.abstractmethod
|
|
42
|
+
def response(self):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
def add_metric(self, metric: Metric):
|
|
46
|
+
self.metrics.append(metric)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class EgressResult(Result):
|
|
50
|
+
def __init__(self, context: Context, destination: str, bytes_egressed: int):
|
|
51
|
+
super().__init__(None, 'EGRESS', context)
|
|
52
|
+
self.add_metric(Metric(FILES_OUT, 1, {ENDPOINT_TAG: destination}))
|
|
53
|
+
self.add_metric(Metric(BYTES_OUT, bytes_egressed, {ENDPOINT_TAG: destination}))
|
|
54
|
+
|
|
55
|
+
def response(self):
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ErrorResult(Result):
|
|
60
|
+
def __init__(self, context: Context, error_cause: str, error_context: str):
|
|
61
|
+
super().__init__('error', 'ERROR', context)
|
|
62
|
+
self.error_cause = error_cause
|
|
63
|
+
self.error_context = error_context
|
|
64
|
+
self.annotations = {}
|
|
65
|
+
|
|
66
|
+
def annotate(self, key: str, value: str):
|
|
67
|
+
self.annotations[key] = value
|
|
68
|
+
return self
|
|
69
|
+
|
|
70
|
+
def response(self):
|
|
71
|
+
return {
|
|
72
|
+
'cause': self.error_cause,
|
|
73
|
+
'context': self.error_context,
|
|
74
|
+
'annotations': self.annotations
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class FilterResult(Result):
|
|
79
|
+
def __init__(self, context: Context, filtered_cause: str, filtered_context: str=None):
|
|
80
|
+
super().__init__('filter', 'FILTER', context)
|
|
81
|
+
self.filtered_cause = filtered_cause
|
|
82
|
+
self.filtered_context = filtered_context
|
|
83
|
+
self.annotations = {}
|
|
84
|
+
|
|
85
|
+
def annotate(self, key: str, value: str):
|
|
86
|
+
self.annotations[key] = value
|
|
87
|
+
return self
|
|
88
|
+
|
|
89
|
+
def response(self):
|
|
90
|
+
return {
|
|
91
|
+
'message': self.filtered_cause,
|
|
92
|
+
'context': self.filtered_context,
|
|
93
|
+
'annotations': self.annotations
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class IngressResultItem:
|
|
98
|
+
def __init__(self, context: Context, filename: str):
|
|
99
|
+
self.context = context
|
|
100
|
+
self.filename = filename
|
|
101
|
+
self._did = str(uuid.uuid4())
|
|
102
|
+
self.content = []
|
|
103
|
+
self.metadata = {}
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def did(self):
|
|
107
|
+
return self._did
|
|
108
|
+
|
|
109
|
+
# content can be a single Content or a List[Content]
|
|
110
|
+
def add_content(self, content):
|
|
111
|
+
if content:
|
|
112
|
+
if type(content) == list:
|
|
113
|
+
self.content.extend(content)
|
|
114
|
+
else:
|
|
115
|
+
self.content.append(content)
|
|
116
|
+
|
|
117
|
+
return self
|
|
118
|
+
|
|
119
|
+
def save_string_content(self, string_data: str, name: str, media_type: str):
|
|
120
|
+
segment = self.context.content_service.put_str(self.context.did, string_data)
|
|
121
|
+
self.content.append(
|
|
122
|
+
Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
|
|
123
|
+
return self
|
|
124
|
+
|
|
125
|
+
def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
|
|
126
|
+
segment = self.context.content_service.put_bytes(self.context.did, byte_data)
|
|
127
|
+
self.content.append(
|
|
128
|
+
Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
|
|
129
|
+
return self
|
|
130
|
+
|
|
131
|
+
def set_metadata(self, metadata: dict):
|
|
132
|
+
self.metadata = metadata
|
|
133
|
+
return self
|
|
134
|
+
|
|
135
|
+
def add_metadata(self, key: str, value: str):
|
|
136
|
+
self.metadata[key] = value
|
|
137
|
+
return self
|
|
138
|
+
|
|
139
|
+
def response(self):
|
|
140
|
+
return {
|
|
141
|
+
'did': self._did,
|
|
142
|
+
'filename': self.filename,
|
|
143
|
+
'metadata': self.metadata,
|
|
144
|
+
'content': [content.json() for content in self.content]
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class IngressStatusEnum(Enum):
|
|
149
|
+
HEALTHY = 'HEALTHY'
|
|
150
|
+
DEGRADED = 'DEGRADED'
|
|
151
|
+
UNHEALTHY = 'UNHEALTHY'
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class IngressResult(Result):
|
|
155
|
+
def __init__(self, context: Context):
|
|
156
|
+
super().__init__('ingress', 'INGRESS', context)
|
|
157
|
+
self.memo = None
|
|
158
|
+
self.execute_immediate = False
|
|
159
|
+
self.ingress_result_items = []
|
|
160
|
+
self.status = IngressStatusEnum.HEALTHY
|
|
161
|
+
self.statusMessage = None
|
|
162
|
+
|
|
163
|
+
def add_item(self, ingress_result_item: IngressResultItem):
|
|
164
|
+
self.ingress_result_items.append(ingress_result_item)
|
|
165
|
+
return self
|
|
166
|
+
|
|
167
|
+
def response(self):
|
|
168
|
+
return {
|
|
169
|
+
'memo': self.memo,
|
|
170
|
+
'executeImmediate': self.execute_immediate,
|
|
171
|
+
'ingressItems': [ingress_result_item.response() for ingress_result_item in self.ingress_result_items],
|
|
172
|
+
'status': self.status.value,
|
|
173
|
+
'statusMessage': self.statusMessage
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class TransformResult(Result):
|
|
178
|
+
def __init__(self, context: Context):
|
|
179
|
+
super().__init__('transform', 'TRANSFORM', context)
|
|
180
|
+
self.content = []
|
|
181
|
+
self.metadata = {}
|
|
182
|
+
self.annotations = {}
|
|
183
|
+
self.delete_metadata_keys = []
|
|
184
|
+
|
|
185
|
+
# content can be a single Content or a List[Content]
|
|
186
|
+
def add_content(self, content):
|
|
187
|
+
if content:
|
|
188
|
+
if type(content) == list:
|
|
189
|
+
self.content.extend(content)
|
|
190
|
+
else:
|
|
191
|
+
self.content.append(content)
|
|
192
|
+
|
|
193
|
+
return self
|
|
194
|
+
|
|
195
|
+
def save_string_content(self, string_data: str, name: str, media_type: str):
|
|
196
|
+
segment = self.context.content_service.put_str(self.context.did, string_data)
|
|
197
|
+
self.content.append(
|
|
198
|
+
Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
|
|
199
|
+
return self
|
|
200
|
+
|
|
201
|
+
def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
|
|
202
|
+
segment = self.context.content_service.put_bytes(self.context.did, byte_data)
|
|
203
|
+
self.content.append(
|
|
204
|
+
Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
|
|
205
|
+
return self
|
|
206
|
+
|
|
207
|
+
def set_metadata(self, metadata: dict):
|
|
208
|
+
self.metadata = metadata
|
|
209
|
+
return self
|
|
210
|
+
|
|
211
|
+
def add_metadata(self, key: str, value: str):
|
|
212
|
+
self.metadata[key] = value
|
|
213
|
+
return self
|
|
214
|
+
|
|
215
|
+
def annotate(self, key: str, value: str):
|
|
216
|
+
self.annotations[key] = value
|
|
217
|
+
return self
|
|
218
|
+
|
|
219
|
+
def delete_metadata_key(self, key: str):
|
|
220
|
+
self.delete_metadata_keys.append(key)
|
|
221
|
+
return self
|
|
222
|
+
|
|
223
|
+
def response(self):
|
|
224
|
+
return {
|
|
225
|
+
'content': [content.json() for content in self.content],
|
|
226
|
+
'metadata': self.metadata,
|
|
227
|
+
'annotations': self.annotations,
|
|
228
|
+
'deleteMetadataKeys': self.delete_metadata_keys
|
|
229
|
+
}
|
deltafi-1.1.17/deltafi/action.py
DELETED
|
@@ -1,237 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# DeltaFi - Data transformation and enrichment platform
|
|
3
|
-
#
|
|
4
|
-
# Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
|
-
#
|
|
6
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
-
# you may not use this file except in compliance with the License.
|
|
8
|
-
# You may obtain a copy of the License at
|
|
9
|
-
#
|
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
#
|
|
12
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
-
# See the License for the specific language governing permissions and
|
|
16
|
-
# limitations under the License.
|
|
17
|
-
#
|
|
18
|
-
|
|
19
|
-
from abc import ABC, abstractmethod
|
|
20
|
-
from typing import Any, List
|
|
21
|
-
|
|
22
|
-
from deltafi.actiontype import ActionType
|
|
23
|
-
from deltafi.genericmodel import GenericModel
|
|
24
|
-
from deltafi.domain import Context, DeltaFileMessage
|
|
25
|
-
from deltafi.input import DomainInput, EgressInput, EnrichInput, FormatInput, LoadInput, TransformInput, ValidateInput
|
|
26
|
-
from deltafi.result import *
|
|
27
|
-
from pydantic import BaseModel
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class Action(ABC):
|
|
31
|
-
def __init__(self, action_type: ActionType, description: str, requires_domains: List[str],
|
|
32
|
-
requires_enrichments: List[str], valid_result_types: tuple):
|
|
33
|
-
self.action_type = action_type
|
|
34
|
-
self.description = description
|
|
35
|
-
self.requires_domains = requires_domains
|
|
36
|
-
self.requires_enrichments = requires_enrichments
|
|
37
|
-
self.valid_result_types = valid_result_types
|
|
38
|
-
self.action_execution = None
|
|
39
|
-
|
|
40
|
-
@abstractmethod
|
|
41
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
42
|
-
pass
|
|
43
|
-
|
|
44
|
-
def collect(self, action_inputs: List[Any]):
|
|
45
|
-
raise RuntimeError(f"Collect is not supported for {self.__class__.__name__}")
|
|
46
|
-
|
|
47
|
-
@abstractmethod
|
|
48
|
-
def execute(self, context: Context, action_input: Any, params: BaseModel):
|
|
49
|
-
pass
|
|
50
|
-
|
|
51
|
-
def execute_action(self, event):
|
|
52
|
-
if event.delta_file_messages is None or not len(event.delta_file_messages):
|
|
53
|
-
raise RuntimeError(f"Received event with no delta file messages for did {event.context.did}")
|
|
54
|
-
|
|
55
|
-
if event.context.collect is not None:
|
|
56
|
-
result = self.execute(event.context, self.collect([self.build_input(event.context, delta_file_message)
|
|
57
|
-
for delta_file_message in event.delta_file_messages]),
|
|
58
|
-
self.param_class().model_validate(event.params))
|
|
59
|
-
else:
|
|
60
|
-
result = self.execute(event.context, self.build_input(event.context, event.delta_file_messages[0]),
|
|
61
|
-
self.param_class().model_validate(event.params))
|
|
62
|
-
|
|
63
|
-
self.validate_type(result)
|
|
64
|
-
return result
|
|
65
|
-
|
|
66
|
-
@staticmethod
|
|
67
|
-
def param_class( ):
|
|
68
|
-
"""Factory method to create and return an empty GenericModel instance.
|
|
69
|
-
|
|
70
|
-
Returns
|
|
71
|
-
-------
|
|
72
|
-
GenericModel
|
|
73
|
-
an empty GenericModel instance
|
|
74
|
-
"""
|
|
75
|
-
return GenericModel
|
|
76
|
-
|
|
77
|
-
def validate_type(self, result):
|
|
78
|
-
if not isinstance(result, self.valid_result_types):
|
|
79
|
-
raise ValueError(f"{self.__class__.__name__} must return one of "
|
|
80
|
-
f"{[result_type.__name__ for result_type in self.valid_result_types]} "
|
|
81
|
-
f"but a {result.__class__.__name__} was returned")
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class DomainAction(Action, ABC):
|
|
85
|
-
def __init__(self, description: str, requires_domains: List[str]):
|
|
86
|
-
super().__init__(ActionType.DOMAIN, description, requires_domains, [], (DomainResult, ErrorResult))
|
|
87
|
-
|
|
88
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
89
|
-
return DomainInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata,
|
|
90
|
-
domains={domain.name: domain for domain in delta_file_message.domains})
|
|
91
|
-
|
|
92
|
-
@abstractmethod
|
|
93
|
-
def domain(self, context: Context, params: BaseModel, domain_input: DomainInput):
|
|
94
|
-
pass
|
|
95
|
-
|
|
96
|
-
def execute(self, context: Context, domain_input: DomainInput, params: BaseModel):
|
|
97
|
-
return self.domain(context, params, domain_input)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
class EgressAction(Action, ABC):
|
|
101
|
-
def __init__(self, description: str):
|
|
102
|
-
super().__init__(ActionType.EGRESS, description, [], [], (EgressResult, ErrorResult, FilterResult))
|
|
103
|
-
|
|
104
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
105
|
-
return EgressInput(content=delta_file_message.content_list[0], metadata=delta_file_message.metadata)
|
|
106
|
-
|
|
107
|
-
@abstractmethod
|
|
108
|
-
def egress(self, context: Context, params: BaseModel, egress_input: EgressInput):
|
|
109
|
-
pass
|
|
110
|
-
|
|
111
|
-
def execute(self, context: Context, egress_input: EgressInput, params: BaseModel):
|
|
112
|
-
return self.egress(context, params, egress_input)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
class EnrichAction(Action, ABC):
|
|
116
|
-
def __init__(self, description: str, requires_domains: List[str], requires_enrichments: List[str]):
|
|
117
|
-
super().__init__(ActionType.ENRICH, description, requires_domains, requires_enrichments,
|
|
118
|
-
(EnrichResult, ErrorResult))
|
|
119
|
-
|
|
120
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
121
|
-
return EnrichInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata,
|
|
122
|
-
domains={domain.name: domain for domain in delta_file_message.domains},
|
|
123
|
-
enrichments={domain.name: domain for domain in delta_file_message.enrichments})
|
|
124
|
-
|
|
125
|
-
@abstractmethod
|
|
126
|
-
def enrich(self, context: Context, params: BaseModel, enrich_input: EnrichInput):
|
|
127
|
-
pass
|
|
128
|
-
|
|
129
|
-
def execute(self, context: Context, enrich_input: EnrichInput, params: BaseModel):
|
|
130
|
-
return self.enrich(context, params, enrich_input)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
class FormatAction(Action, ABC):
|
|
134
|
-
def __init__(self, description: str, requires_domains: List[str], requires_enrichments: List[str]):
|
|
135
|
-
super().__init__(ActionType.FORMAT, description, requires_domains, requires_enrichments,
|
|
136
|
-
(FormatResult, FormatManyResult, ErrorResult, FilterResult))
|
|
137
|
-
|
|
138
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
139
|
-
return FormatInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata,
|
|
140
|
-
domains={domain.name: domain for domain in delta_file_message.domains},
|
|
141
|
-
enrichments={domain.name: domain for domain in delta_file_message.enrichments})
|
|
142
|
-
|
|
143
|
-
def collect(self, format_inputs: List[FormatInput]):
|
|
144
|
-
all_content = []
|
|
145
|
-
all_metadata = {}
|
|
146
|
-
all_domains = {}
|
|
147
|
-
all_enrichments = {}
|
|
148
|
-
for format_input in format_inputs:
|
|
149
|
-
all_content += format_input.content
|
|
150
|
-
all_metadata.update(format_input.metadata)
|
|
151
|
-
all_domains.update(format_input.domains)
|
|
152
|
-
all_enrichments.update(format_input.enrichments)
|
|
153
|
-
return FormatInput(content=all_content, metadata=all_metadata, domains=all_domains, enrichments=all_enrichments)
|
|
154
|
-
|
|
155
|
-
@abstractmethod
|
|
156
|
-
def format(self, context: Context, params: BaseModel, format_input: FormatInput):
|
|
157
|
-
pass
|
|
158
|
-
|
|
159
|
-
def execute(self, context: Context, format_input: FormatInput, params: BaseModel):
|
|
160
|
-
return self.format(context, params, format_input)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
class LoadAction(Action, ABC):
|
|
164
|
-
def __init__(self, description: str):
|
|
165
|
-
super().__init__(ActionType.LOAD, description, [], [],
|
|
166
|
-
(LoadResult, LoadManyResult, ErrorResult, FilterResult, ReinjectResult))
|
|
167
|
-
|
|
168
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
169
|
-
return LoadInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata)
|
|
170
|
-
|
|
171
|
-
def collect(self, load_inputs: List[LoadInput]):
|
|
172
|
-
all_content = []
|
|
173
|
-
all_metadata = {}
|
|
174
|
-
for load_input in load_inputs:
|
|
175
|
-
all_content += load_input.content
|
|
176
|
-
all_metadata.update(load_input.metadata)
|
|
177
|
-
return LoadInput(content=all_content, metadata=all_metadata)
|
|
178
|
-
|
|
179
|
-
@abstractmethod
|
|
180
|
-
def load(self, context: Context, params: BaseModel, load_input: LoadInput):
|
|
181
|
-
pass
|
|
182
|
-
|
|
183
|
-
def execute(self, context: Context, load_input: LoadInput, params: BaseModel):
|
|
184
|
-
return self.load(context, params, load_input)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
class TimedIngressAction(Action, ABC):
|
|
188
|
-
def __init__(self, description: str):
|
|
189
|
-
super().__init__(ActionType.TIMED_INGRESS, description, [], [], (IngressResult, ErrorResult))
|
|
190
|
-
|
|
191
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
192
|
-
return None
|
|
193
|
-
|
|
194
|
-
@abstractmethod
|
|
195
|
-
def ingress(self, context: Context, params: BaseModel):
|
|
196
|
-
pass
|
|
197
|
-
|
|
198
|
-
def execute(self, context: Context, input_placeholder: Any, params: BaseModel):
|
|
199
|
-
return self.ingress(context, params)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
class TransformAction(Action, ABC):
|
|
203
|
-
def __init__(self, description: str):
|
|
204
|
-
super().__init__(ActionType.TRANSFORM, description, [], [], (TransformResult, ErrorResult, FilterResult, ReinjectResult))
|
|
205
|
-
|
|
206
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
207
|
-
return TransformInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata)
|
|
208
|
-
|
|
209
|
-
def collect(self, transform_inputs: List[TransformInput]):
|
|
210
|
-
all_content = []
|
|
211
|
-
all_metadata = {}
|
|
212
|
-
for transform_input in transform_inputs:
|
|
213
|
-
all_content += transform_input.content
|
|
214
|
-
all_metadata.update(transform_input.metadata)
|
|
215
|
-
return TransformInput(content=all_content, metadata=all_metadata)
|
|
216
|
-
|
|
217
|
-
@abstractmethod
|
|
218
|
-
def transform(self, context: Context, params: BaseModel, transform_input: TransformInput):
|
|
219
|
-
pass
|
|
220
|
-
|
|
221
|
-
def execute(self, context: Context, transform_input: TransformInput, params: BaseModel):
|
|
222
|
-
return self.transform(context, params, transform_input)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
class ValidateAction(Action, ABC):
|
|
226
|
-
def __init__(self, description: str):
|
|
227
|
-
super().__init__(ActionType.VALIDATE, description, [], [], (ValidateResult, ErrorResult, FilterResult))
|
|
228
|
-
|
|
229
|
-
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
230
|
-
return ValidateInput(content=delta_file_message.content_list[0], metadata=delta_file_message.metadata)
|
|
231
|
-
|
|
232
|
-
@abstractmethod
|
|
233
|
-
def validate(self, context: Context, params: BaseModel, validate_input: ValidateInput):
|
|
234
|
-
pass
|
|
235
|
-
|
|
236
|
-
def execute(self, context: Context, validate_input: ValidateInput, params: BaseModel):
|
|
237
|
-
return self.validate(context, params, validate_input)
|