deltafi 0.109.0__py3-none-any.whl → 2.40.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltafi/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -15,3 +15,5 @@
15
15
  # See the License for the specific language governing permissions and
16
16
  # limitations under the License.
17
17
  #
18
+
19
+ #
deltafi/action.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,148 +17,308 @@
17
17
  #
18
18
 
19
19
  from abc import ABC, abstractmethod
20
+ from typing import Any, List
21
+
22
+ from pydantic import BaseModel
20
23
 
21
24
  from deltafi.actiontype import ActionType
22
- from deltafi.domain import Context, DeltaFileMessage
23
- from deltafi.input import DomainInput, EgressInput, EnrichInput, FormatInput, LoadInput, TransformInput, ValidateInput
25
+ from deltafi.domain import DeltaFileMessage
26
+ from deltafi.genericmodel import GenericModel
27
+ from deltafi.input import EgressInput, TransformInput
24
28
  from deltafi.result import *
25
- from pydantic import BaseModel
26
29
 
27
30
 
28
- class Action(ABC):
29
- def __init__(self, action_type: ActionType, description: str, requires_domains: List[str],
30
- requires_enrichments: List[str]):
31
- self.action_type = action_type
32
- self.description = description
33
- self.requires_domains = requires_domains
34
- self.requires_enrichments = requires_enrichments
31
+ class Join(ABC):
32
+ def join(self, transform_inputs: List[TransformInput]):
33
+ all_content = []
34
+ all_metadata = {}
35
+ for transform_input in transform_inputs:
36
+ all_content += transform_input.content
37
+ all_metadata.update(transform_input.metadata)
38
+ return TransformInput(content=all_content, metadata=all_metadata)
35
39
 
36
- @abstractmethod
37
- def execute(self, event):
38
- pass
39
40
 
40
- def param_class(self):
41
- return BaseModel
41
+ class ContentSpec:
42
+ name: str
43
+ media_type: str
44
+ description: str
42
45
 
43
- def validate_type(self, result, types: tuple):
44
- if not isinstance(result, types):
45
- raise ValueError(f"{self.__class__.__name__} must return one of "
46
- f"{[result_type.__name__ for result_type in types]} "
47
- f"but a {result.__class__.__name__} was returned")
46
+ def __init__(self, name: str = None, media_type: str = None, description: str = None):
47
+ self.name = name
48
+ self.media_type = media_type
49
+ self.description = description
48
50
 
51
+ def json(self):
52
+ json_dictionary = {}
53
+ if self.name is not None:
54
+ json_dictionary['name'] = self.name
55
+ if self.media_type is not None:
56
+ json_dictionary['mediaType'] = self.media_type
57
+ if self.description is not None:
58
+ json_dictionary['description'] = self.description
59
+ return json_dictionary
49
60
 
50
- class DomainAction(Action):
51
- def __init__(self, description: str, requires_domains: List[str]):
52
- super().__init__(ActionType.DOMAIN, description, requires_domains, [])
53
61
 
54
- def execute(self, event):
55
- domain_input = DomainInput(content=event.delta_file_messages[0].content_list,
56
- metadata=event.delta_file_messages[0].metadata,
57
- domains={domain.name: domain for domain in event.delta_file_messages[0].domains})
58
- result = self.domain(event.context, self.param_class().parse_obj(event.params), domain_input)
59
- self.validate_type(result, (DomainResult, ErrorResult))
60
- return result
62
+ class KeyedDescription:
63
+ key: str
64
+ description: str
61
65
 
62
- @abstractmethod
63
- def domain(self, context: Context, params: BaseModel, domain_input: DomainInput):
64
- pass
66
+ def __init__(self, key: str, description: str):
67
+ self.key = key
68
+ self.description = description
65
69
 
70
+ def json(self):
71
+ json_dictionary = {}
72
+ if self.key is not None:
73
+ json_dictionary['key'] = self.key
74
+ json_dictionary['description'] = self.description
75
+ return json_dictionary
76
+
77
+
78
+ class InputSpec:
79
+ content_summary: str
80
+ content_specs: List[ContentSpec]
81
+ metadata_summary: str
82
+ metadata_descriptions: List[KeyedDescription]
83
+
84
+ def __init__(self, content_summary: str = None, content_specs: List[ContentSpec] = None,
85
+ metadata_summary: str = None, metadata_descriptions: List[KeyedDescription] = None):
86
+ self.content_summary = content_summary
87
+ self.content_specs = content_specs
88
+ self.metadata_summary = metadata_summary
89
+ self.metadata_descriptions = metadata_descriptions
90
+
91
+ def json(self):
92
+ json_dictionary = {}
93
+ if self.content_summary is not None:
94
+ json_dictionary['contentSummary'] = self.content_summary
95
+ if self.content_specs is not None:
96
+ json_dictionary['contentSpecs'] = [cs.json() for cs in self.content_specs]
97
+ if self.metadata_summary is not None:
98
+ json_dictionary['metadataSummary'] = self.metadata_summary
99
+ if self.metadata_descriptions is not None:
100
+ json_dictionary['metadataDescriptions'] = [md.json() for md in self.metadata_descriptions]
101
+ return json_dictionary
102
+
103
+
104
+ class OutputSpec:
105
+ content_summary: str
106
+ content_specs: List[ContentSpec]
107
+ metadata_summary: str
108
+ metadata_descriptions: List[KeyedDescription]
109
+ passthrough: bool
110
+ annotations_summary: str
111
+ annotation_descriptions: List[KeyedDescription]
112
+
113
+ def __init__(self, content_summary: str = None, content_specs: List[ContentSpec] = None,
114
+ metadata_summary: str = None, metadata_descriptions: List[KeyedDescription] = None,
115
+ passthrough: bool = False, annotations_summary: str = None,
116
+ annotation_descriptions: List[KeyedDescription] = None):
117
+ self.content_summary = content_summary
118
+ self.content_specs = content_specs
119
+ self.metadata_summary = metadata_summary
120
+ self.metadata_descriptions = metadata_descriptions
121
+ self.passthrough = passthrough
122
+ self.annotations_summary = annotations_summary
123
+ self.annotation_descriptions = annotation_descriptions
124
+
125
+ def json(self):
126
+ json_dictionary = {}
127
+ if self.content_summary is not None:
128
+ json_dictionary['contentSummary'] = self.content_summary
129
+ if self.content_specs is not None:
130
+ json_dictionary['contentSpecs'] = [cs.json() for cs in self.content_specs]
131
+ if self.metadata_summary is not None:
132
+ json_dictionary['metadataSummary'] = self.metadata_summary
133
+ if self.metadata_descriptions is not None:
134
+ json_dictionary['metadataDescriptions'] = [md.json() for md in self.metadata_descriptions]
135
+ if self.passthrough is not None:
136
+ json_dictionary['passthrough'] = self.passthrough
137
+ if self.annotations_summary is not None:
138
+ json_dictionary['annotationsSummary'] = self.annotations_summary
139
+ if self.annotation_descriptions is not None:
140
+ json_dictionary['annotationDescriptions'] = [ad.json() for ad in self.annotation_descriptions]
141
+ return json_dictionary
142
+
143
+
144
+ class DescriptionWithConditions:
145
+ description: str
146
+ conditions: List[str]
147
+
148
+ def __init__(self, description: str = None, conditions: List[str] = None):
149
+ self.description = description
150
+ self.conditions = conditions
151
+
152
+ def json(self):
153
+ json_dictionary = {}
154
+ if self.description is not None:
155
+ json_dictionary['description'] = self.description
156
+ if self.conditions is not None:
157
+ json_dictionary['conditions'] = [c for c in self.conditions]
158
+ return json_dictionary
159
+
160
+
161
+ class ActionOptions:
162
+ description: str
163
+ input_spec: InputSpec
164
+ output_spec: OutputSpec
165
+ filters: List[DescriptionWithConditions] = None
166
+ errors: List[DescriptionWithConditions] = None
167
+ notes: List[str]
168
+ details: str
169
+
170
+ def __init__(self, description: str = None, input_spec: InputSpec = None, output_spec: OutputSpec = None,
171
+ filters: List = None, errors: List = None, notes: List[str] = None, details: str = None):
172
+ self.description = description
173
+ self.input_spec = input_spec
174
+ self.output_spec = output_spec
175
+ if filters is not None:
176
+ self.filters = []
177
+ for f in filters:
178
+ if isinstance(f, DescriptionWithConditions):
179
+ self.filters.append(f)
180
+ else:
181
+ self.filters.append(DescriptionWithConditions(description=f))
182
+ if errors is not None:
183
+ self.errors = []
184
+ for e in errors:
185
+ if isinstance(e, DescriptionWithConditions):
186
+ self.errors.append(e)
187
+ else:
188
+ self.errors.append(DescriptionWithConditions(description=e))
189
+ self.notes = notes
190
+ self.details = details
191
+
192
+ def json(self):
193
+ json_dictionary = {}
194
+ if self.description is not None:
195
+ json_dictionary['description'] = self.description
196
+ if self.input_spec is not None:
197
+ json_dictionary['inputSpec'] = self.input_spec.json()
198
+ if self.output_spec is not None:
199
+ json_dictionary['outputSpec'] = self.output_spec.json()
200
+ if self.filters is not None:
201
+ json_dictionary['filters'] = [f.json() for f in self.filters]
202
+ if self.errors is not None:
203
+ json_dictionary['errors'] = [e.json() for e in self.errors]
204
+ if self.notes is not None:
205
+ json_dictionary['notes'] = [n for n in self.notes]
206
+ if self.details is not None:
207
+ json_dictionary['details'] = self.details
208
+ return json_dictionary
66
209
 
67
- class EgressAction(Action):
68
- def __init__(self, description: str):
69
- super().__init__(ActionType.EGRESS, description, [], [])
70
210
 
71
- def execute(self, event):
72
- egress_input = EgressInput(content=event.delta_file_messages[0].content_list[0],
73
- metadata=event.delta_file_messages[0].metadata)
74
- result = self.egress(event.context, self.param_class().parse_obj(event.params), egress_input)
75
- self.validate_type(result, (EgressResult, ErrorResult, FilterResult))
76
- return result
211
+ class Action(ABC):
212
+ def __init__(self, action_type: ActionType, description: str, valid_result_types: tuple,
213
+ action_options: ActionOptions = None):
214
+ self.action_type = action_type
215
+ if action_options is None:
216
+ self.action_options = ActionOptions(description=description)
217
+ else:
218
+ self.action_options = action_options
219
+ self.valid_result_types = valid_result_types
77
220
 
78
221
  @abstractmethod
79
- def egress(self, context: Context, params: BaseModel, egress_input: EgressInput):
222
+ def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
80
223
  pass
81
224
 
82
-
83
- class EnrichAction(Action):
84
- def __init__(self, description: str, requires_domains: List[str], requires_enrichments: List[str]):
85
- super().__init__(ActionType.ENRICH, description, requires_domains, requires_enrichments)
86
-
87
- def execute(self, event):
88
- enrich_input = EnrichInput(content=event.delta_file_messages[0].content_list,
89
- metadata=event.delta_file_messages[0].metadata,
90
- domains={domain.name: domain for domain in event.delta_file_messages[0].domains},
91
- enrichment={domain.name: domain for domain in event.delta_file_messages[0].enrichment})
92
- result = self.enrich(event.context, self.param_class().parse_obj(event.params), enrich_input)
93
- self.validate_type(result, (EnrichResult, ErrorResult))
94
- return result
225
+ def execute_join_action(self, event):
226
+ raise RuntimeError(f"Join is not supported for {self.__class__.__name__}")
95
227
 
96
228
  @abstractmethod
97
- def enrich(self, context: Context, params: BaseModel, enrich_input: EnrichInput):
229
+ def execute(self, context: Context, action_input: Any, params: BaseModel):
98
230
  pass
99
231
 
232
+ def execute_action(self, event):
233
+ if event.delta_file_messages is None or not len(event.delta_file_messages):
234
+ raise RuntimeError(f"Received event with no delta file messages for did {event.context.did}")
235
+ if event.context.join is not None:
236
+ result = self.execute_join_action(event)
237
+ else:
238
+ result = self.execute(
239
+ event.context,
240
+ self.build_input(event.context, event.delta_file_messages[0]),
241
+ self.param_class().model_validate(event.params))
242
+
243
+ self.validate_type(result)
244
+ return result
100
245
 
101
- class FormatAction(Action):
102
- def __init__(self, description: str, requires_domains: List[str], requires_enrichments: List[str]):
103
- super().__init__(ActionType.FORMAT, description, requires_domains, requires_enrichments)
246
+ @staticmethod
247
+ def param_class():
248
+ """Factory method to create and return an empty GenericModel instance.
104
249
 
105
- def execute(self, event):
106
- format_input = FormatInput(content=event.delta_file_messages[0].content_list,
107
- metadata=event.delta_file_messages[0].metadata,
108
- domains={domain.name: domain for domain in event.delta_file_messages[0].domains},
109
- enrichment={domain.name: domain for domain in event.delta_file_messages[0].enrichment})
110
- result = self.format(event.context, self.param_class().parse_obj(event.params), format_input)
111
- self.validate_type(result, (FormatResult, FormatManyResult, ErrorResult, FilterResult))
112
- return result
250
+ All action parameter classes must inherit pydantic.BaseModel.
251
+ Use of complex types in custom action parameter classes must specify
252
+ the internal types when defined. E.g., dict[str, str], or List[str]
113
253
 
114
- @abstractmethod
115
- def format(self, context: Context, params: BaseModel, format_input: FormatInput):
116
- pass
254
+ Returns
255
+ -------
256
+ GenericModel
257
+ an empty GenericModel instance
258
+ """
259
+ return GenericModel
117
260
 
261
+ def validate_type(self, result):
262
+ if not isinstance(result, self.valid_result_types):
263
+ raise ValueError(f"{self.__class__.__name__} must return one of "
264
+ f"{[result_type.__name__ for result_type in self.valid_result_types]} "
265
+ f"but a {result.__class__.__name__} was returned")
118
266
 
119
- class LoadAction(Action):
120
- def __init__(self, description: str):
121
- super().__init__(ActionType.LOAD, description, [], [])
122
267
 
123
- def execute(self, event):
124
- load_input = LoadInput(content=event.delta_file_messages[0].content_list,
125
- metadata=event.delta_file_messages[0].metadata)
126
- result = self.load(event.context, self.param_class().parse_obj(event.params), load_input)
127
- self.validate_type(result, (LoadResult, LoadManyResult, ErrorResult, FilterResult, ReinjectResult))
128
- return result
268
+ class EgressAction(Action, ABC):
269
+ def __init__(self, description: str, action_options: ActionOptions = None):
270
+ super().__init__(ActionType.EGRESS, description, (EgressResult, ErrorResult, FilterResult), action_options)
271
+
272
+ def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
273
+ content = None
274
+ if delta_file_message.content_list is not None and len(delta_file_message.content_list) > 0:
275
+ content = delta_file_message.content_list[0]
276
+ return EgressInput(content=content, metadata=delta_file_message.metadata)
129
277
 
130
278
  @abstractmethod
131
- def load(self, context: Context, params: BaseModel, load_input: LoadInput):
279
+ def egress(self, context: Context, params: BaseModel, egress_input: EgressInput):
132
280
  pass
133
281
 
282
+ def execute(self, context: Context, egress_input: EgressInput, params: BaseModel):
283
+ return self.egress(context, params, egress_input)
134
284
 
135
- class TransformAction(Action):
136
- def __init__(self, description: str):
137
- super().__init__(ActionType.TRANSFORM, description, [], [])
138
285
 
139
- def execute(self, event):
140
- transform_input = TransformInput(content=event.delta_file_messages[0].content_list,
141
- metadata=event.delta_file_messages[0].metadata)
142
- result = self.transform(event.context, self.param_class().parse_obj(event.params), transform_input)
143
- self.validate_type(result, (TransformResult, ErrorResult, FilterResult))
144
- return result
286
+ class TimedIngressAction(Action, ABC):
287
+ def __init__(self, description: str, action_options: ActionOptions = None):
288
+ super().__init__(ActionType.TIMED_INGRESS, description, (IngressResult, ErrorResult), action_options)
289
+
290
+ def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
291
+ return None
145
292
 
146
293
  @abstractmethod
147
- def transform(self, context: Context, params: BaseModel, transform_input: TransformInput):
294
+ def ingress(self, context: Context, params: BaseModel):
148
295
  pass
149
296
 
297
+ def execute(self, context: Context, input_placeholder: Any, params: BaseModel):
298
+ return self.ingress(context, params)
150
299
 
151
- class ValidateAction(Action):
152
- def __init__(self, description: str):
153
- super().__init__(ActionType.VALIDATE, description, [], [])
154
300
 
155
- def execute(self, event):
156
- validate_input = ValidateInput(content=event.delta_file_messages[0].content_list[0],
157
- metadata=event.delta_file_messages[0].metadata)
158
- result = self.validate(event.context, self.param_class().parse_obj(event.params), validate_input)
159
- self.validate_type(result, (ValidateResult, ErrorResult, FilterResult))
160
- return result
301
+ class TransformAction(Action, ABC):
302
+ def __init__(self, description: str, action_options: ActionOptions = None):
303
+ super().__init__(ActionType.TRANSFORM, description,
304
+ (TransformResult, TransformResults, ErrorResult, FilterResult), action_options)
305
+
306
+ def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
307
+ return TransformInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata)
308
+
309
+ def execute_join_action(self, event):
310
+ if isinstance(self, Join):
311
+ return self.execute(
312
+ event.context,
313
+ self.join([self.build_input(event.context, delta_file_message)
314
+ for delta_file_message in event.delta_file_messages]),
315
+ self.param_class().model_validate(event.params))
316
+ else:
317
+ super().execute_join_action(event)
161
318
 
162
319
  @abstractmethod
163
- def validate(self, context: Context, params: BaseModel, validate_input: ValidateInput):
320
+ def transform(self, context: Context, params: BaseModel, transform_input: TransformInput):
164
321
  pass
322
+
323
+ def execute(self, context: Context, transform_input: TransformInput, params: BaseModel):
324
+ return self.transform(context, params, transform_input)
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,14 +17,19 @@
17
17
  #
18
18
 
19
19
  from datetime import datetime, timezone
20
+ from typing import List
20
21
  from urllib.parse import urlparse
21
22
 
23
+ import json
22
24
  import redis
23
25
  import time
24
26
 
27
+ HEARTBEAT_HASH = "org.deltafi.action-queue.heartbeat"
28
+ LONG_RUNNING_TASKS_HASH = "org.deltafi.action-queue.long-running-tasks"
29
+
25
30
 
26
31
  class ActionEventQueue:
27
- def __init__(self, url, max_connections, password):
32
+ def __init__(self, url, max_connections, password, app_name):
28
33
  parsed = urlparse(url)
29
34
  self.pool = redis.ConnectionPool(
30
35
  max_connections=max_connections,
@@ -32,6 +37,7 @@ class ActionEventQueue:
32
37
  port=parsed.port,
33
38
  password=password)
34
39
  self.connection = None
40
+ self.app_name = app_name
35
41
 
36
42
  def get_connection(self):
37
43
  if self.connection is None:
@@ -49,7 +55,7 @@ class ActionEventQueue:
49
55
  added = conn.zadd(name, {item: now}, nx=True)
50
56
  return added
51
57
 
52
- def take(self, name: str) -> str:
58
+ def take(self, name: List[str]) -> str:
53
59
  conn = self.get_connection()
54
60
  setkey, item, score = conn.bzpopmin(name, 0)
55
61
  return item
@@ -57,4 +63,23 @@ class ActionEventQueue:
57
63
  def heartbeat(self, name: str):
58
64
  conn = self.get_connection()
59
65
  utcnow = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
60
- conn.hset("org.deltafi.action-queue.heartbeat", name, utcnow)
66
+ conn.hset(HEARTBEAT_HASH, name, utcnow)
67
+
68
+ def record_long_running_task(self, action_execution):
69
+ try:
70
+ key = action_execution.key
71
+ start_time = action_execution.start_time.isoformat().replace("+00:00", "Z")
72
+ heartbeat_time = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
73
+ values = [start_time, heartbeat_time]
74
+ if self.app_name is not None:
75
+ values.append(self.app_name)
76
+ value = json.dumps(values)
77
+ conn = self.get_connection()
78
+ conn.hset(LONG_RUNNING_TASKS_HASH, key, value)
79
+ except Exception as e:
80
+ print(f"Unable to convert long running task information to JSON: {str(e)}")
81
+
82
+ def remove_long_running_task(self, action_execution):
83
+ key = action_execution.key
84
+ conn = self.get_connection()
85
+ conn.hdel(LONG_RUNNING_TASKS_HASH, key)
deltafi/actiontype.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -20,13 +20,9 @@ from enum import Enum
20
20
 
21
21
 
22
22
  class ActionType(Enum):
23
- INGRESS = "ingress"
24
- TRANSFORM = "transform"
25
- LOAD = "load"
26
- DOMAIN = "domain"
27
- ENRICH = "enrich"
28
- FORMAT = "format"
29
- VALIDATE = "validate"
30
- EGRESS = "egress"
31
- DELETE = "delete"
32
- UNKNOWN = "unknown"
23
+ INGRESS = "INGRESS"
24
+ TIMED_INGRESS = "TIMED_INGRESS"
25
+ TRANSFORM = "TRANSFORM"
26
+ EGRESS = "EGRESS"
27
+ PUBLISH = "PUBLISH"
28
+ UNKNOWN = "UNKNOWN"