deltafi 2.0rc1719271450675__tar.gz → 2.0rc1720728217472__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deltafi might be problematic. Click here for more details.
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/PKG-INFO +5 -5
- deltafi-2.0rc1720728217472/deltafi/action.py +237 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/actiontype.py +6 -2
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/domain.py +52 -56
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/exception.py +10 -0
- deltafi-2.0rc1720728217472/deltafi/input.py +216 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/plugin.py +17 -9
- deltafi-2.0rc1720728217472/deltafi/result.py +474 -0
- deltafi-2.0rc1720728217472/deltafi/test_kit/format.py +105 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/framework.py +50 -27
- deltafi-2.0rc1720728217472/deltafi/test_kit/load.py +128 -0
- deltafi-2.0rc1720728217472/deltafi/test_kit/transform.py +75 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/pyproject.toml +8 -8
- deltafi-2.0rc1719271450675/deltafi/action.py +0 -135
- deltafi-2.0rc1719271450675/deltafi/input.py +0 -52
- deltafi-2.0rc1719271450675/deltafi/result.py +0 -261
- deltafi-2.0rc1719271450675/deltafi/test_kit/transform.py +0 -103
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/README.md +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/__init__.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/actioneventqueue.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/genericmodel.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/logger.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/metric.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/storage.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/__init__.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/assertions.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/compare_helpers.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/constants.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/domain.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/egress.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/enrich.py +0 -0
- {deltafi-2.0rc1719271450675 → deltafi-2.0rc1720728217472}/deltafi/test_kit/validate.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deltafi
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.0rc1720728217472
|
|
4
4
|
Summary: SDK for DeltaFi plugins and actions
|
|
5
5
|
License: Apache License, Version 2.0
|
|
6
6
|
Keywords: deltafi
|
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
20
20
|
Classifier: Topic :: Software Development
|
|
21
21
|
Requires-Dist: deepdiff (>=6.7.1)
|
|
22
22
|
Requires-Dist: json-logging (>=1.3.0)
|
|
23
|
-
Requires-Dist: minio (>=7.2.
|
|
24
|
-
Requires-Dist: pydantic (>=2.
|
|
25
|
-
Requires-Dist: redis (>=5.0.
|
|
23
|
+
Requires-Dist: minio (>=7.2.3)
|
|
24
|
+
Requires-Dist: pydantic (>=2.5.3)
|
|
25
|
+
Requires-Dist: redis (>=5.0.1)
|
|
26
26
|
Requires-Dist: requests (>=2.31.0)
|
|
27
|
-
Requires-Dist: urllib3 (>=2.
|
|
27
|
+
Requires-Dist: urllib3 (>=2.1.0)
|
|
28
28
|
Project-URL: Bug Reports, https://chat.deltafi.org/deltafi/channels/bug-reports
|
|
29
29
|
Project-URL: Documentation, https://docs.deltafi.org/#/
|
|
30
30
|
Project-URL: Source Code, https://gitlab.com/deltafi/deltafi
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
#
|
|
2
|
+
# DeltaFi - Data transformation and enrichment platform
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
from abc import ABC, abstractmethod
|
|
20
|
+
from typing import Any, List
|
|
21
|
+
|
|
22
|
+
from deltafi.actiontype import ActionType
|
|
23
|
+
from deltafi.genericmodel import GenericModel
|
|
24
|
+
from deltafi.domain import Context, DeltaFileMessage
|
|
25
|
+
from deltafi.input import DomainInput, EgressInput, EnrichInput, FormatInput, LoadInput, TransformInput, ValidateInput
|
|
26
|
+
from deltafi.result import *
|
|
27
|
+
from pydantic import BaseModel
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Action(ABC):
|
|
31
|
+
def __init__(self, action_type: ActionType, description: str, requires_domains: List[str],
|
|
32
|
+
requires_enrichments: List[str], valid_result_types: tuple):
|
|
33
|
+
self.action_type = action_type
|
|
34
|
+
self.description = description
|
|
35
|
+
self.requires_domains = requires_domains
|
|
36
|
+
self.requires_enrichments = requires_enrichments
|
|
37
|
+
self.valid_result_types = valid_result_types
|
|
38
|
+
self.action_execution = None
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def collect(self, action_inputs: List[Any]):
|
|
45
|
+
raise RuntimeError(f"Collect is not supported for {self.__class__.__name__}")
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def execute(self, context: Context, action_input: Any, params: BaseModel):
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
def execute_action(self, event):
|
|
52
|
+
if event.delta_file_messages is None or not len(event.delta_file_messages):
|
|
53
|
+
raise RuntimeError(f"Received event with no delta file messages for did {event.context.did}")
|
|
54
|
+
|
|
55
|
+
if event.context.collect is not None:
|
|
56
|
+
result = self.execute(event.context, self.collect([self.build_input(event.context, delta_file_message)
|
|
57
|
+
for delta_file_message in event.delta_file_messages]),
|
|
58
|
+
self.param_class().model_validate(event.params))
|
|
59
|
+
else:
|
|
60
|
+
result = self.execute(event.context, self.build_input(event.context, event.delta_file_messages[0]),
|
|
61
|
+
self.param_class().model_validate(event.params))
|
|
62
|
+
|
|
63
|
+
self.validate_type(result)
|
|
64
|
+
return result
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def param_class( ):
|
|
68
|
+
"""Factory method to create and return an empty GenericModel instance.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
GenericModel
|
|
73
|
+
an empty GenericModel instance
|
|
74
|
+
"""
|
|
75
|
+
return GenericModel
|
|
76
|
+
|
|
77
|
+
def validate_type(self, result):
|
|
78
|
+
if not isinstance(result, self.valid_result_types):
|
|
79
|
+
raise ValueError(f"{self.__class__.__name__} must return one of "
|
|
80
|
+
f"{[result_type.__name__ for result_type in self.valid_result_types]} "
|
|
81
|
+
f"but a {result.__class__.__name__} was returned")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class DomainAction(Action, ABC):
|
|
85
|
+
def __init__(self, description: str, requires_domains: List[str]):
|
|
86
|
+
super().__init__(ActionType.DOMAIN, description, requires_domains, [], (DomainResult, ErrorResult))
|
|
87
|
+
|
|
88
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
89
|
+
return DomainInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata,
|
|
90
|
+
domains={domain.name: domain for domain in delta_file_message.domains})
|
|
91
|
+
|
|
92
|
+
@abstractmethod
|
|
93
|
+
def domain(self, context: Context, params: BaseModel, domain_input: DomainInput):
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
def execute(self, context: Context, domain_input: DomainInput, params: BaseModel):
|
|
97
|
+
return self.domain(context, params, domain_input)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class EgressAction(Action, ABC):
|
|
101
|
+
def __init__(self, description: str):
|
|
102
|
+
super().__init__(ActionType.EGRESS, description, [], [], (EgressResult, ErrorResult, FilterResult))
|
|
103
|
+
|
|
104
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
105
|
+
return EgressInput(content=delta_file_message.content_list[0], metadata=delta_file_message.metadata)
|
|
106
|
+
|
|
107
|
+
@abstractmethod
|
|
108
|
+
def egress(self, context: Context, params: BaseModel, egress_input: EgressInput):
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
def execute(self, context: Context, egress_input: EgressInput, params: BaseModel):
|
|
112
|
+
return self.egress(context, params, egress_input)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class EnrichAction(Action, ABC):
|
|
116
|
+
def __init__(self, description: str, requires_domains: List[str], requires_enrichments: List[str]):
|
|
117
|
+
super().__init__(ActionType.ENRICH, description, requires_domains, requires_enrichments,
|
|
118
|
+
(EnrichResult, ErrorResult))
|
|
119
|
+
|
|
120
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
121
|
+
return EnrichInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata,
|
|
122
|
+
domains={domain.name: domain for domain in delta_file_message.domains},
|
|
123
|
+
enrichments={domain.name: domain for domain in delta_file_message.enrichments})
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
def enrich(self, context: Context, params: BaseModel, enrich_input: EnrichInput):
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
def execute(self, context: Context, enrich_input: EnrichInput, params: BaseModel):
|
|
130
|
+
return self.enrich(context, params, enrich_input)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class FormatAction(Action, ABC):
|
|
134
|
+
def __init__(self, description: str, requires_domains: List[str], requires_enrichments: List[str]):
|
|
135
|
+
super().__init__(ActionType.FORMAT, description, requires_domains, requires_enrichments,
|
|
136
|
+
(FormatResult, FormatManyResult, ErrorResult, FilterResult))
|
|
137
|
+
|
|
138
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
139
|
+
return FormatInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata,
|
|
140
|
+
domains={domain.name: domain for domain in delta_file_message.domains},
|
|
141
|
+
enrichments={domain.name: domain for domain in delta_file_message.enrichments})
|
|
142
|
+
|
|
143
|
+
def collect(self, format_inputs: List[FormatInput]):
|
|
144
|
+
all_content = []
|
|
145
|
+
all_metadata = {}
|
|
146
|
+
all_domains = {}
|
|
147
|
+
all_enrichments = {}
|
|
148
|
+
for format_input in format_inputs:
|
|
149
|
+
all_content += format_input.content
|
|
150
|
+
all_metadata.update(format_input.metadata)
|
|
151
|
+
all_domains.update(format_input.domains)
|
|
152
|
+
all_enrichments.update(format_input.enrichments)
|
|
153
|
+
return FormatInput(content=all_content, metadata=all_metadata, domains=all_domains, enrichments=all_enrichments)
|
|
154
|
+
|
|
155
|
+
@abstractmethod
|
|
156
|
+
def format(self, context: Context, params: BaseModel, format_input: FormatInput):
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
def execute(self, context: Context, format_input: FormatInput, params: BaseModel):
|
|
160
|
+
return self.format(context, params, format_input)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class LoadAction(Action, ABC):
|
|
164
|
+
def __init__(self, description: str):
|
|
165
|
+
super().__init__(ActionType.LOAD, description, [], [],
|
|
166
|
+
(LoadResult, LoadManyResult, ErrorResult, FilterResult, ReinjectResult))
|
|
167
|
+
|
|
168
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
169
|
+
return LoadInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata)
|
|
170
|
+
|
|
171
|
+
def collect(self, load_inputs: List[LoadInput]):
|
|
172
|
+
all_content = []
|
|
173
|
+
all_metadata = {}
|
|
174
|
+
for load_input in load_inputs:
|
|
175
|
+
all_content += load_input.content
|
|
176
|
+
all_metadata.update(load_input.metadata)
|
|
177
|
+
return LoadInput(content=all_content, metadata=all_metadata)
|
|
178
|
+
|
|
179
|
+
@abstractmethod
|
|
180
|
+
def load(self, context: Context, params: BaseModel, load_input: LoadInput):
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
def execute(self, context: Context, load_input: LoadInput, params: BaseModel):
|
|
184
|
+
return self.load(context, params, load_input)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class TimedIngressAction(Action, ABC):
|
|
188
|
+
def __init__(self, description: str):
|
|
189
|
+
super().__init__(ActionType.TIMED_INGRESS, description, [], [], (IngressResult, ErrorResult))
|
|
190
|
+
|
|
191
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
@abstractmethod
|
|
195
|
+
def ingress(self, context: Context, params: BaseModel):
|
|
196
|
+
pass
|
|
197
|
+
|
|
198
|
+
def execute(self, context: Context, input_placeholder: Any, params: BaseModel):
|
|
199
|
+
return self.ingress(context, params)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class TransformAction(Action, ABC):
|
|
203
|
+
def __init__(self, description: str):
|
|
204
|
+
super().__init__(ActionType.TRANSFORM, description, [], [], (TransformResult, ErrorResult, FilterResult, ReinjectResult))
|
|
205
|
+
|
|
206
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
207
|
+
return TransformInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata)
|
|
208
|
+
|
|
209
|
+
def collect(self, transform_inputs: List[TransformInput]):
|
|
210
|
+
all_content = []
|
|
211
|
+
all_metadata = {}
|
|
212
|
+
for transform_input in transform_inputs:
|
|
213
|
+
all_content += transform_input.content
|
|
214
|
+
all_metadata.update(transform_input.metadata)
|
|
215
|
+
return TransformInput(content=all_content, metadata=all_metadata)
|
|
216
|
+
|
|
217
|
+
@abstractmethod
|
|
218
|
+
def transform(self, context: Context, params: BaseModel, transform_input: TransformInput):
|
|
219
|
+
pass
|
|
220
|
+
|
|
221
|
+
def execute(self, context: Context, transform_input: TransformInput, params: BaseModel):
|
|
222
|
+
return self.transform(context, params, transform_input)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class ValidateAction(Action, ABC):
|
|
226
|
+
def __init__(self, description: str):
|
|
227
|
+
super().__init__(ActionType.VALIDATE, description, [], [], (ValidateResult, ErrorResult, FilterResult))
|
|
228
|
+
|
|
229
|
+
def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
|
|
230
|
+
return ValidateInput(content=delta_file_message.content_list[0], metadata=delta_file_message.metadata)
|
|
231
|
+
|
|
232
|
+
@abstractmethod
|
|
233
|
+
def validate(self, context: Context, params: BaseModel, validate_input: ValidateInput):
|
|
234
|
+
pass
|
|
235
|
+
|
|
236
|
+
def execute(self, context: Context, validate_input: ValidateInput, params: BaseModel):
|
|
237
|
+
return self.validate(context, params, validate_input)
|
|
@@ -20,9 +20,13 @@ from enum import Enum
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class ActionType(Enum):
|
|
23
|
-
INGRESS = "INGRESS"
|
|
24
23
|
TIMED_INGRESS = "TIMED_INGRESS"
|
|
25
24
|
TRANSFORM = "TRANSFORM"
|
|
25
|
+
LOAD = "LOAD"
|
|
26
|
+
DOMAIN = "DOMAIN"
|
|
27
|
+
ENRICH = "ENRICH"
|
|
28
|
+
FORMAT = "FORMAT"
|
|
29
|
+
VALIDATE = "VALIDATE"
|
|
26
30
|
EGRESS = "EGRESS"
|
|
27
|
-
|
|
31
|
+
DELETE = "DELETE"
|
|
28
32
|
UNKNOWN = "UNKNOWN"
|
|
@@ -40,15 +40,13 @@ class ActionExecution(NamedTuple):
|
|
|
40
40
|
|
|
41
41
|
class Context(NamedTuple):
|
|
42
42
|
did: str
|
|
43
|
-
|
|
44
|
-
data_source: str
|
|
45
|
-
flow_name: str
|
|
46
|
-
flow_id: str
|
|
43
|
+
action_flow: str
|
|
47
44
|
action_name: str
|
|
48
|
-
|
|
49
|
-
|
|
45
|
+
source_filename: str
|
|
46
|
+
ingress_flow: str
|
|
47
|
+
egress_flow: str
|
|
48
|
+
system: str
|
|
50
49
|
hostname: str
|
|
51
|
-
system_name: str
|
|
52
50
|
content_service: ContentService
|
|
53
51
|
collect: dict = None
|
|
54
52
|
collected_dids: List[str] = None
|
|
@@ -56,44 +54,21 @@ class Context(NamedTuple):
|
|
|
56
54
|
logger: Logger = None
|
|
57
55
|
|
|
58
56
|
@classmethod
|
|
59
|
-
def create(cls, context: dict, content_service: ContentService, logger: Logger):
|
|
57
|
+
def create(cls, context: dict, hostname: str, content_service: ContentService, logger: Logger):
|
|
60
58
|
did = context['did']
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
action_name_parts = context['name'].split(".")
|
|
60
|
+
action_flow = action_name_parts[0]
|
|
61
|
+
action_name = action_name_parts[1]
|
|
62
|
+
if 'sourceFilename' in context:
|
|
63
|
+
source_filename = context['sourceFilename']
|
|
63
64
|
else:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
source_filename = None
|
|
66
|
+
ingress_flow = context['ingressFlow']
|
|
67
|
+
if 'egressFlow' in context:
|
|
68
|
+
egress_flow = context['egressFlow']
|
|
67
69
|
else:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
flow_name = context['flowName']
|
|
71
|
-
else:
|
|
72
|
-
flow_name = None
|
|
73
|
-
if 'flowId' in context:
|
|
74
|
-
flow_id = context['flowId']
|
|
75
|
-
else:
|
|
76
|
-
flow_id = None
|
|
77
|
-
if 'actionName' in context:
|
|
78
|
-
action_name = context['actionName']
|
|
79
|
-
else:
|
|
80
|
-
action_name = None
|
|
81
|
-
if 'actionId' in context:
|
|
82
|
-
action_id = context['actionId']
|
|
83
|
-
else:
|
|
84
|
-
action_id = None
|
|
85
|
-
if 'actionVersion' in context:
|
|
86
|
-
action_version = context['actionVersion']
|
|
87
|
-
else:
|
|
88
|
-
action_version = None
|
|
89
|
-
if 'hostname' in context:
|
|
90
|
-
hostname = context['hostname']
|
|
91
|
-
else:
|
|
92
|
-
hostname = None
|
|
93
|
-
if 'systemName' in context:
|
|
94
|
-
system_name = context['systemName']
|
|
95
|
-
else:
|
|
96
|
-
system_name = None
|
|
70
|
+
egress_flow = None
|
|
71
|
+
system = context['systemName']
|
|
97
72
|
if 'collect' in context:
|
|
98
73
|
collect = context['collect']
|
|
99
74
|
else:
|
|
@@ -106,21 +81,18 @@ class Context(NamedTuple):
|
|
|
106
81
|
memo = context['memo']
|
|
107
82
|
else:
|
|
108
83
|
memo = None
|
|
109
|
-
|
|
110
84
|
return Context(did=did,
|
|
111
|
-
|
|
112
|
-
data_source=data_source,
|
|
113
|
-
flow_name=flow_name,
|
|
114
|
-
flow_id=flow_id,
|
|
85
|
+
action_flow=action_flow,
|
|
115
86
|
action_name=action_name,
|
|
116
|
-
|
|
117
|
-
|
|
87
|
+
source_filename=source_filename,
|
|
88
|
+
ingress_flow=ingress_flow,
|
|
89
|
+
egress_flow=egress_flow,
|
|
90
|
+
system=system,
|
|
118
91
|
hostname=hostname,
|
|
119
|
-
|
|
92
|
+
content_service=content_service,
|
|
120
93
|
collect=collect,
|
|
121
94
|
collected_dids=collected_dids,
|
|
122
95
|
memo=memo,
|
|
123
|
-
content_service=content_service,
|
|
124
96
|
logger=logger)
|
|
125
97
|
|
|
126
98
|
|
|
@@ -225,6 +197,7 @@ class Content:
|
|
|
225
197
|
|
|
226
198
|
return new_segments
|
|
227
199
|
|
|
200
|
+
|
|
228
201
|
def get_size(self):
|
|
229
202
|
"""
|
|
230
203
|
Returns the size of the content in bytes.
|
|
@@ -323,17 +296,41 @@ class Content:
|
|
|
323
296
|
content_service=content_service)
|
|
324
297
|
|
|
325
298
|
|
|
299
|
+
class Domain(NamedTuple):
|
|
300
|
+
name: str
|
|
301
|
+
value: str
|
|
302
|
+
media_type: str
|
|
303
|
+
|
|
304
|
+
@classmethod
|
|
305
|
+
def from_dict(cls, domain: dict):
|
|
306
|
+
name = domain['name']
|
|
307
|
+
if 'value' in domain:
|
|
308
|
+
value = domain['value']
|
|
309
|
+
else:
|
|
310
|
+
value = None
|
|
311
|
+
media_type = domain['mediaType']
|
|
312
|
+
return Domain(name=name,
|
|
313
|
+
value=value,
|
|
314
|
+
media_type=media_type)
|
|
315
|
+
|
|
316
|
+
|
|
326
317
|
class DeltaFileMessage(NamedTuple):
|
|
327
318
|
metadata: Dict[str, str]
|
|
328
319
|
content_list: List[Content]
|
|
320
|
+
domains: List[Domain]
|
|
321
|
+
enrichments: List[Domain]
|
|
329
322
|
|
|
330
323
|
@classmethod
|
|
331
324
|
def from_dict(cls, delta_file_message: dict, content_service: ContentService):
|
|
332
325
|
metadata = delta_file_message['metadata']
|
|
333
326
|
content_list = [Content.from_dict(content, content_service) for content in delta_file_message['contentList']]
|
|
327
|
+
domains = [Domain.from_dict(domain) for domain in delta_file_message['domains']] if 'domains' in delta_file_message else []
|
|
328
|
+
enrichments = [Domain.from_dict(domain) for domain in delta_file_message['enrichments']] if 'enrichments' in delta_file_message else []
|
|
334
329
|
|
|
335
330
|
return DeltaFileMessage(metadata=metadata,
|
|
336
|
-
content_list=content_list
|
|
331
|
+
content_list=content_list,
|
|
332
|
+
domains=domains,
|
|
333
|
+
enrichments=enrichments)
|
|
337
334
|
|
|
338
335
|
|
|
339
336
|
class Event(NamedTuple):
|
|
@@ -344,10 +341,9 @@ class Event(NamedTuple):
|
|
|
344
341
|
return_address: str
|
|
345
342
|
|
|
346
343
|
@classmethod
|
|
347
|
-
def create(cls, event: dict, content_service: ContentService, logger: Logger):
|
|
348
|
-
delta_file_messages = [DeltaFileMessage.from_dict(delta_file_message, content_service) for delta_file_message in
|
|
349
|
-
|
|
350
|
-
context = Context.create(event['actionContext'], content_service, logger)
|
|
344
|
+
def create(cls, event: dict, hostname: str, content_service: ContentService, logger: Logger):
|
|
345
|
+
delta_file_messages = [DeltaFileMessage.from_dict(delta_file_message, content_service) for delta_file_message in event['deltaFileMessages']]
|
|
346
|
+
context = Context.create(event['actionContext'], hostname, content_service, logger)
|
|
351
347
|
params = event['actionParams']
|
|
352
348
|
queue_name = None
|
|
353
349
|
if 'queueName' in event:
|
|
@@ -23,6 +23,16 @@ class ExpectedContentException(RuntimeError):
|
|
|
23
23
|
self.size = size
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
class MissingDomainException(RuntimeError):
|
|
27
|
+
def __init__(self, name):
|
|
28
|
+
self.name = name
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class MissingEnrichmentException(RuntimeError):
|
|
32
|
+
def __init__(self, name):
|
|
33
|
+
self.name = name
|
|
34
|
+
|
|
35
|
+
|
|
26
36
|
class MissingMetadataException(RuntimeError):
|
|
27
37
|
def __init__(self, key):
|
|
28
38
|
self.key = key
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
#
|
|
2
|
+
# DeltaFi - Data transformation and enrichment platform
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
from deltafi.domain import *
|
|
20
|
+
from deltafi.exception import MissingMetadataException, ExpectedContentException, MissingDomainException, \
|
|
21
|
+
MissingEnrichmentException
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DomainInput(NamedTuple):
|
|
25
|
+
content: List[Content]
|
|
26
|
+
metadata: Dict[str, str]
|
|
27
|
+
domains: Dict[str, Domain]
|
|
28
|
+
|
|
29
|
+
def has_content(self) -> bool:
|
|
30
|
+
return len(self.content) > 0
|
|
31
|
+
|
|
32
|
+
def content_at(self, index: int) -> Content:
|
|
33
|
+
if len(self.content) < index + 1:
|
|
34
|
+
raise ExpectedContentException(index, len(self.content))
|
|
35
|
+
return self.content[index]
|
|
36
|
+
|
|
37
|
+
def first_content(self):
|
|
38
|
+
return self.content_at(0)
|
|
39
|
+
|
|
40
|
+
def get_metadata(self, key: str):
|
|
41
|
+
if key in self.metadata:
|
|
42
|
+
return self.metadata[key]
|
|
43
|
+
else:
|
|
44
|
+
raise MissingMetadataException(key)
|
|
45
|
+
|
|
46
|
+
def get_metadata_or_else(self, key: str, default: str) -> str:
|
|
47
|
+
if key in self.metadata:
|
|
48
|
+
return self.metadata[key]
|
|
49
|
+
else:
|
|
50
|
+
return default
|
|
51
|
+
|
|
52
|
+
def has_domain(self, name: str) -> bool:
|
|
53
|
+
return name in self.domains
|
|
54
|
+
|
|
55
|
+
def domain(self, name: str) -> Domain:
|
|
56
|
+
if not self.has_domain(name):
|
|
57
|
+
raise MissingDomainException(name)
|
|
58
|
+
return self.domains[name]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class EgressInput(NamedTuple):
|
|
62
|
+
content: Content
|
|
63
|
+
metadata: dict
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class EnrichInput(NamedTuple):
|
|
67
|
+
content: List[Content]
|
|
68
|
+
metadata: dict
|
|
69
|
+
domains: Dict[str, Domain]
|
|
70
|
+
enrichments: Dict[str, Domain]
|
|
71
|
+
|
|
72
|
+
def has_content(self) -> bool:
|
|
73
|
+
return len(self.content) > 0
|
|
74
|
+
|
|
75
|
+
def content_at(self, index: int) -> Content:
|
|
76
|
+
if len(self.content) < index + 1:
|
|
77
|
+
raise ExpectedContentException(index, len(self.content))
|
|
78
|
+
return self.content[index]
|
|
79
|
+
|
|
80
|
+
def first_content(self):
|
|
81
|
+
return self.content_at(0)
|
|
82
|
+
|
|
83
|
+
def get_metadata(self, key: str):
|
|
84
|
+
if key in self.metadata:
|
|
85
|
+
return self.metadata[key]
|
|
86
|
+
else:
|
|
87
|
+
raise MissingMetadataException(key)
|
|
88
|
+
|
|
89
|
+
def get_metadata_or_else(self, key: str, default: str) -> str:
|
|
90
|
+
if key in self.metadata:
|
|
91
|
+
return self.metadata[key]
|
|
92
|
+
else:
|
|
93
|
+
return default
|
|
94
|
+
|
|
95
|
+
def has_domain(self, name: str) -> bool:
|
|
96
|
+
return name in self.domains
|
|
97
|
+
|
|
98
|
+
def domain(self, name: str) -> Domain:
|
|
99
|
+
if not self.has_domain(name):
|
|
100
|
+
raise MissingDomainException(name)
|
|
101
|
+
return self.domains[name]
|
|
102
|
+
|
|
103
|
+
def has_enrichment(self, name: str) -> bool:
|
|
104
|
+
return name in self.enrichments
|
|
105
|
+
|
|
106
|
+
def enrichment(self, name: str) -> Domain:
|
|
107
|
+
if not self.has_enrichment(name):
|
|
108
|
+
raise MissingEnrichmentException(name)
|
|
109
|
+
return self.enrichments[name]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class FormatInput(NamedTuple):
|
|
113
|
+
content: List[Content]
|
|
114
|
+
metadata: dict
|
|
115
|
+
domains: Dict[str, Domain]
|
|
116
|
+
enrichments: Dict[str, Domain]
|
|
117
|
+
|
|
118
|
+
def has_content(self) -> bool:
|
|
119
|
+
return len(self.content) > 0
|
|
120
|
+
|
|
121
|
+
def content_at(self, index: int) -> Content:
|
|
122
|
+
if len(self.content) < index + 1:
|
|
123
|
+
raise ExpectedContentException(index, len(self.content))
|
|
124
|
+
return self.content[index]
|
|
125
|
+
|
|
126
|
+
def first_content(self):
|
|
127
|
+
return self.content_at(0)
|
|
128
|
+
|
|
129
|
+
def get_metadata(self, key: str):
|
|
130
|
+
if key in self.metadata:
|
|
131
|
+
return self.metadata[key]
|
|
132
|
+
else:
|
|
133
|
+
raise MissingMetadataException(key)
|
|
134
|
+
|
|
135
|
+
def get_metadata_or_else(self, key: str, default: str) -> str:
|
|
136
|
+
if key in self.metadata:
|
|
137
|
+
return self.metadata[key]
|
|
138
|
+
else:
|
|
139
|
+
return default
|
|
140
|
+
|
|
141
|
+
def has_domain(self, name: str) -> bool:
|
|
142
|
+
return name in self.domains
|
|
143
|
+
|
|
144
|
+
def domain(self, name: str) -> Domain:
|
|
145
|
+
if not self.has_domain(name):
|
|
146
|
+
raise MissingDomainException(name)
|
|
147
|
+
return self.domains[name]
|
|
148
|
+
|
|
149
|
+
def has_enrichment(self, name: str) -> bool:
|
|
150
|
+
return name in self.enrichments
|
|
151
|
+
|
|
152
|
+
def enrichment(self, name: str) -> Domain:
|
|
153
|
+
if not self.has_enrichment(name):
|
|
154
|
+
raise MissingEnrichmentException(name)
|
|
155
|
+
return self.enrichments[name]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class LoadInput(NamedTuple):
|
|
159
|
+
content: List[Content]
|
|
160
|
+
metadata: dict
|
|
161
|
+
|
|
162
|
+
def has_content(self) -> bool:
|
|
163
|
+
return len(self.content) > 0
|
|
164
|
+
|
|
165
|
+
def content_at(self, index: int) -> Content:
|
|
166
|
+
if len(self.content) < index + 1:
|
|
167
|
+
raise ExpectedContentException(index, len(self.content))
|
|
168
|
+
return self.content[index]
|
|
169
|
+
|
|
170
|
+
def first_content(self):
|
|
171
|
+
return self.content_at(0)
|
|
172
|
+
|
|
173
|
+
def get_metadata(self, key: str):
|
|
174
|
+
if key in self.metadata:
|
|
175
|
+
return self.metadata[key]
|
|
176
|
+
else:
|
|
177
|
+
raise MissingMetadataException(key)
|
|
178
|
+
|
|
179
|
+
def get_metadata_or_else(self, key: str, default: str) -> str:
|
|
180
|
+
if key in self.metadata:
|
|
181
|
+
return self.metadata[key]
|
|
182
|
+
else:
|
|
183
|
+
return default
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class TransformInput(NamedTuple):
|
|
187
|
+
content: List[Content]
|
|
188
|
+
metadata: dict
|
|
189
|
+
|
|
190
|
+
def has_content(self) -> bool:
|
|
191
|
+
return len(self.content) > 0
|
|
192
|
+
|
|
193
|
+
def content_at(self, index: int) -> Content:
|
|
194
|
+
if len(self.content) < index + 1:
|
|
195
|
+
raise ExpectedContentException(index, len(self.content))
|
|
196
|
+
return self.content[index]
|
|
197
|
+
|
|
198
|
+
def first_content(self):
|
|
199
|
+
return self.content_at(0)
|
|
200
|
+
|
|
201
|
+
def get_metadata(self, key: str):
|
|
202
|
+
if key in self.metadata:
|
|
203
|
+
return self.metadata[key]
|
|
204
|
+
else:
|
|
205
|
+
raise MissingMetadataException(key)
|
|
206
|
+
|
|
207
|
+
def get_metadata_or_else(self, key: str, default: str) -> str:
|
|
208
|
+
if key in self.metadata:
|
|
209
|
+
return self.metadata[key]
|
|
210
|
+
else:
|
|
211
|
+
return default
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class ValidateInput(NamedTuple):
|
|
215
|
+
content: Content
|
|
216
|
+
metadata: dict
|