deltafi 2.0rc1720728217472__tar.gz → 2.0rc1720817063181__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deltafi might be problematic. Click here for more details.

Files changed (32) hide show
  1. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/PKG-INFO +5 -5
  2. deltafi-2.0rc1720817063181/deltafi/action.py +135 -0
  3. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/actiontype.py +2 -6
  4. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/domain.py +66 -62
  5. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/exception.py +0 -10
  6. deltafi-2.0rc1720817063181/deltafi/input.py +52 -0
  7. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/plugin.py +9 -17
  8. deltafi-2.0rc1720817063181/deltafi/result.py +261 -0
  9. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/test_kit/framework.py +69 -66
  10. deltafi-2.0rc1720817063181/deltafi/test_kit/transform.py +103 -0
  11. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/pyproject.toml +8 -8
  12. deltafi-2.0rc1720728217472/deltafi/action.py +0 -237
  13. deltafi-2.0rc1720728217472/deltafi/input.py +0 -216
  14. deltafi-2.0rc1720728217472/deltafi/result.py +0 -474
  15. deltafi-2.0rc1720728217472/deltafi/test_kit/domain.py +0 -59
  16. deltafi-2.0rc1720728217472/deltafi/test_kit/enrich.py +0 -70
  17. deltafi-2.0rc1720728217472/deltafi/test_kit/format.py +0 -105
  18. deltafi-2.0rc1720728217472/deltafi/test_kit/load.py +0 -128
  19. deltafi-2.0rc1720728217472/deltafi/test_kit/transform.py +0 -75
  20. deltafi-2.0rc1720728217472/deltafi/test_kit/validate.py +0 -54
  21. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/README.md +0 -0
  22. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/__init__.py +0 -0
  23. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/actioneventqueue.py +0 -0
  24. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/genericmodel.py +0 -0
  25. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/logger.py +0 -0
  26. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/metric.py +0 -0
  27. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/storage.py +0 -0
  28. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/test_kit/__init__.py +0 -0
  29. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/test_kit/assertions.py +0 -0
  30. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/test_kit/compare_helpers.py +0 -0
  31. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/test_kit/constants.py +0 -0
  32. {deltafi-2.0rc1720728217472 → deltafi-2.0rc1720817063181}/deltafi/test_kit/egress.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltafi
3
- Version: 2.0rc1720728217472
3
+ Version: 2.0rc1720817063181
4
4
  Summary: SDK for DeltaFi plugins and actions
5
5
  License: Apache License, Version 2.0
6
6
  Keywords: deltafi
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Topic :: Software Development
21
21
  Requires-Dist: deepdiff (>=6.7.1)
22
22
  Requires-Dist: json-logging (>=1.3.0)
23
- Requires-Dist: minio (>=7.2.3)
24
- Requires-Dist: pydantic (>=2.5.3)
25
- Requires-Dist: redis (>=5.0.1)
23
+ Requires-Dist: minio (>=7.2.5)
24
+ Requires-Dist: pydantic (>=2.7.1)
25
+ Requires-Dist: redis (>=5.0.4)
26
26
  Requires-Dist: requests (>=2.31.0)
27
- Requires-Dist: urllib3 (>=2.1.0)
27
+ Requires-Dist: urllib3 (>=2.2.1)
28
28
  Project-URL: Bug Reports, https://chat.deltafi.org/deltafi/channels/bug-reports
29
29
  Project-URL: Documentation, https://docs.deltafi.org/#/
30
30
  Project-URL: Source Code, https://gitlab.com/deltafi/deltafi
@@ -0,0 +1,135 @@
1
+ #
2
+ # DeltaFi - Data transformation and enrichment platform
3
+ #
4
+ # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+ from abc import ABC, abstractmethod
20
+ from typing import Any, List
21
+
22
+ from deltafi.actiontype import ActionType
23
+ from deltafi.domain import Context, DeltaFileMessage
24
+ from deltafi.genericmodel import GenericModel
25
+ from deltafi.input import EgressInput, TransformInput
26
+ from deltafi.result import *
27
+ from pydantic import BaseModel
28
+
29
+
30
+ class Action(ABC):
31
+ def __init__(self, action_type: ActionType, description: str, valid_result_types: tuple):
32
+ self.action_type = action_type
33
+ self.description = description
34
+ self.valid_result_types = valid_result_types
35
+ self.action_execution = None
36
+
37
+ @abstractmethod
38
+ def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
39
+ pass
40
+
41
+ def join(self, action_inputs: List[Any]):
42
+ raise RuntimeError(f"Join is not supported for {self.__class__.__name__}")
43
+
44
+ @abstractmethod
45
+ def execute(self, context: Context, action_input: Any, params: BaseModel):
46
+ pass
47
+
48
+ def execute_action(self, event):
49
+ if event.delta_file_messages is None or not len(event.delta_file_messages):
50
+ raise RuntimeError(f"Received event with no delta file messages for did {event.context.did}")
51
+ if event.context.join is not None:
52
+ result = self.execute(
53
+ event.context,
54
+ self.join([self.build_input(event.context, delta_file_message)
55
+ for delta_file_message in event.delta_file_messages]),
56
+ self.param_class().model_validate(event.params))
57
+ else:
58
+ result = self.execute(
59
+ event.context,
60
+ self.build_input(event.context, event.delta_file_messages[0]),
61
+ self.param_class().model_validate(event.params))
62
+
63
+ self.validate_type(result)
64
+ return result
65
+
66
+ @staticmethod
67
+ def param_class():
68
+ """Factory method to create and return an empty GenericModel instance.
69
+
70
+ Returns
71
+ -------
72
+ GenericModel
73
+ an empty GenericModel instance
74
+ """
75
+ return GenericModel
76
+
77
+ def validate_type(self, result):
78
+ if not isinstance(result, self.valid_result_types):
79
+ raise ValueError(f"{self.__class__.__name__} must return one of "
80
+ f"{[result_type.__name__ for result_type in self.valid_result_types]} "
81
+ f"but a {result.__class__.__name__} was returned")
82
+
83
+
84
+ class EgressAction(Action, ABC):
85
+ def __init__(self, description: str):
86
+ super().__init__(ActionType.EGRESS, description, (EgressResult, ErrorResult, FilterResult))
87
+
88
+ def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
89
+ return EgressInput(content=delta_file_message.content_list[0], metadata=delta_file_message.metadata)
90
+
91
+ @abstractmethod
92
+ def egress(self, context: Context, params: BaseModel, egress_input: EgressInput):
93
+ pass
94
+
95
+ def execute(self, context: Context, egress_input: EgressInput, params: BaseModel):
96
+ return self.egress(context, params, egress_input)
97
+
98
+
99
+ class TimedIngressAction(Action, ABC):
100
+ def __init__(self, description: str):
101
+ super().__init__(ActionType.TIMED_INGRESS, description, (IngressResult, ErrorResult))
102
+
103
+ def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
104
+ return None
105
+
106
+ @abstractmethod
107
+ def ingress(self, context: Context, params: BaseModel):
108
+ pass
109
+
110
+ def execute(self, context: Context, input_placeholder: Any, params: BaseModel):
111
+ return self.ingress(context, params)
112
+
113
+
114
+ class TransformAction(Action, ABC):
115
+ def __init__(self, description: str):
116
+ super().__init__(ActionType.TRANSFORM, description,
117
+ (TransformResult, TransformResults, ErrorResult, FilterResult))
118
+
119
+ def build_input(self, context: Context, delta_file_message: DeltaFileMessage):
120
+ return TransformInput(content=delta_file_message.content_list, metadata=delta_file_message.metadata)
121
+
122
+ def join(self, transform_inputs: List[TransformInput]):
123
+ all_content = []
124
+ all_metadata = {}
125
+ for transform_input in transform_inputs:
126
+ all_content += transform_input.content
127
+ all_metadata.update(transform_input.metadata)
128
+ return TransformInput(content=all_content, metadata=all_metadata)
129
+
130
+ @abstractmethod
131
+ def transform(self, context: Context, params: BaseModel, transform_input: TransformInput):
132
+ pass
133
+
134
+ def execute(self, context: Context, transform_input: TransformInput, params: BaseModel):
135
+ return self.transform(context, params, transform_input)
@@ -20,13 +20,9 @@ from enum import Enum
20
20
 
21
21
 
22
22
  class ActionType(Enum):
23
+ INGRESS = "INGRESS"
23
24
  TIMED_INGRESS = "TIMED_INGRESS"
24
25
  TRANSFORM = "TRANSFORM"
25
- LOAD = "LOAD"
26
- DOMAIN = "DOMAIN"
27
- ENRICH = "ENRICH"
28
- FORMAT = "FORMAT"
29
- VALIDATE = "VALIDATE"
30
26
  EGRESS = "EGRESS"
31
- DELETE = "DELETE"
27
+ PUBLISH = "PUBLISH"
32
28
  UNKNOWN = "UNKNOWN"
@@ -40,59 +40,87 @@ class ActionExecution(NamedTuple):
40
40
 
41
41
  class Context(NamedTuple):
42
42
  did: str
43
- action_flow: str
43
+ delta_file_name: str
44
+ data_source: str
45
+ flow_name: str
46
+ flow_id: str
44
47
  action_name: str
45
- source_filename: str
46
- ingress_flow: str
47
- egress_flow: str
48
- system: str
48
+ action_id: str
49
+ action_version: str
49
50
  hostname: str
51
+ system_name: str
50
52
  content_service: ContentService
51
- collect: dict = None
52
- collected_dids: List[str] = None
53
+ join: dict = None
54
+ joined_dids: List[str] = None
53
55
  memo: str = None
54
56
  logger: Logger = None
55
57
 
56
58
  @classmethod
57
- def create(cls, context: dict, hostname: str, content_service: ContentService, logger: Logger):
59
+ def create(cls, context: dict, content_service: ContentService, logger: Logger):
58
60
  did = context['did']
59
- action_name_parts = context['name'].split(".")
60
- action_flow = action_name_parts[0]
61
- action_name = action_name_parts[1]
62
- if 'sourceFilename' in context:
63
- source_filename = context['sourceFilename']
61
+ if 'deltaFileName' in context:
62
+ delta_file_name = context['deltaFileName']
64
63
  else:
65
- source_filename = None
66
- ingress_flow = context['ingressFlow']
67
- if 'egressFlow' in context:
68
- egress_flow = context['egressFlow']
64
+ delta_file_name = None
65
+ if 'dataSource' in context:
66
+ data_source = context['dataSource']
69
67
  else:
70
- egress_flow = None
71
- system = context['systemName']
72
- if 'collect' in context:
73
- collect = context['collect']
68
+ data_source = None
69
+ if 'flowName' in context:
70
+ flow_name = context['flowName']
74
71
  else:
75
- collect = None
76
- if 'collectedDids' in context:
77
- collected_dids = context['collectedDids']
72
+ flow_name = None
73
+ if 'flowId' in context:
74
+ flow_id = context['flowId']
78
75
  else:
79
- collected_dids = None
76
+ flow_id = None
77
+ if 'actionName' in context:
78
+ action_name = context['actionName']
79
+ else:
80
+ action_name = None
81
+ if 'actionId' in context:
82
+ action_id = context['actionId']
83
+ else:
84
+ action_id = None
85
+ if 'actionVersion' in context:
86
+ action_version = context['actionVersion']
87
+ else:
88
+ action_version = None
89
+ if 'hostname' in context:
90
+ hostname = context['hostname']
91
+ else:
92
+ hostname = None
93
+ if 'systemName' in context:
94
+ system_name = context['systemName']
95
+ else:
96
+ system_name = None
97
+ if 'join' in context:
98
+ join = context['join']
99
+ else:
100
+ join = None
101
+ if 'joinedDids' in context:
102
+ joined_dids = context['joinedDids']
103
+ else:
104
+ joined_dids = None
80
105
  if 'memo' in context:
81
106
  memo = context['memo']
82
107
  else:
83
108
  memo = None
109
+
84
110
  return Context(did=did,
85
- action_flow=action_flow,
111
+ delta_file_name=delta_file_name,
112
+ data_source=data_source,
113
+ flow_name=flow_name,
114
+ flow_id=flow_id,
86
115
  action_name=action_name,
87
- source_filename=source_filename,
88
- ingress_flow=ingress_flow,
89
- egress_flow=egress_flow,
90
- system=system,
116
+ action_id=action_id,
117
+ action_version=action_version,
91
118
  hostname=hostname,
92
- content_service=content_service,
93
- collect=collect,
94
- collected_dids=collected_dids,
119
+ system_name=system_name,
120
+ join=join,
121
+ joined_dids=joined_dids,
95
122
  memo=memo,
123
+ content_service=content_service,
96
124
  logger=logger)
97
125
 
98
126
 
@@ -197,7 +225,6 @@ class Content:
197
225
 
198
226
  return new_segments
199
227
 
200
-
201
228
  def get_size(self):
202
229
  """
203
230
  Returns the size of the content in bytes.
@@ -296,41 +323,17 @@ class Content:
296
323
  content_service=content_service)
297
324
 
298
325
 
299
- class Domain(NamedTuple):
300
- name: str
301
- value: str
302
- media_type: str
303
-
304
- @classmethod
305
- def from_dict(cls, domain: dict):
306
- name = domain['name']
307
- if 'value' in domain:
308
- value = domain['value']
309
- else:
310
- value = None
311
- media_type = domain['mediaType']
312
- return Domain(name=name,
313
- value=value,
314
- media_type=media_type)
315
-
316
-
317
326
  class DeltaFileMessage(NamedTuple):
318
327
  metadata: Dict[str, str]
319
328
  content_list: List[Content]
320
- domains: List[Domain]
321
- enrichments: List[Domain]
322
329
 
323
330
  @classmethod
324
331
  def from_dict(cls, delta_file_message: dict, content_service: ContentService):
325
332
  metadata = delta_file_message['metadata']
326
333
  content_list = [Content.from_dict(content, content_service) for content in delta_file_message['contentList']]
327
- domains = [Domain.from_dict(domain) for domain in delta_file_message['domains']] if 'domains' in delta_file_message else []
328
- enrichments = [Domain.from_dict(domain) for domain in delta_file_message['enrichments']] if 'enrichments' in delta_file_message else []
329
334
 
330
335
  return DeltaFileMessage(metadata=metadata,
331
- content_list=content_list,
332
- domains=domains,
333
- enrichments=enrichments)
336
+ content_list=content_list)
334
337
 
335
338
 
336
339
  class Event(NamedTuple):
@@ -341,9 +344,10 @@ class Event(NamedTuple):
341
344
  return_address: str
342
345
 
343
346
  @classmethod
344
- def create(cls, event: dict, hostname: str, content_service: ContentService, logger: Logger):
345
- delta_file_messages = [DeltaFileMessage.from_dict(delta_file_message, content_service) for delta_file_message in event['deltaFileMessages']]
346
- context = Context.create(event['actionContext'], hostname, content_service, logger)
347
+ def create(cls, event: dict, content_service: ContentService, logger: Logger):
348
+ delta_file_messages = [DeltaFileMessage.from_dict(delta_file_message, content_service) for delta_file_message in
349
+ event['deltaFileMessages']]
350
+ context = Context.create(event['actionContext'], content_service, logger)
347
351
  params = event['actionParams']
348
352
  queue_name = None
349
353
  if 'queueName' in event:
@@ -23,16 +23,6 @@ class ExpectedContentException(RuntimeError):
23
23
  self.size = size
24
24
 
25
25
 
26
- class MissingDomainException(RuntimeError):
27
- def __init__(self, name):
28
- self.name = name
29
-
30
-
31
- class MissingEnrichmentException(RuntimeError):
32
- def __init__(self, name):
33
- self.name = name
34
-
35
-
36
26
  class MissingMetadataException(RuntimeError):
37
27
  def __init__(self, key):
38
28
  self.key = key
@@ -0,0 +1,52 @@
1
+ #
2
+ # DeltaFi - Data transformation and enrichment platform
3
+ #
4
+ # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+ from deltafi.domain import *
20
+ from deltafi.exception import MissingMetadataException, ExpectedContentException
21
+
22
+ class EgressInput(NamedTuple):
23
+ content: Content
24
+ metadata: dict
25
+
26
+
27
+ class TransformInput(NamedTuple):
28
+ content: List[Content]
29
+ metadata: dict
30
+
31
+ def has_content(self) -> bool:
32
+ return len(self.content) > 0
33
+
34
+ def content_at(self, index: int) -> Content:
35
+ if len(self.content) < index + 1:
36
+ raise ExpectedContentException(index, len(self.content))
37
+ return self.content[index]
38
+
39
+ def first_content(self):
40
+ return self.content_at(0)
41
+
42
+ def get_metadata(self, key: str):
43
+ if key in self.metadata:
44
+ return self.metadata[key]
45
+ else:
46
+ raise MissingMetadataException(key)
47
+
48
+ def get_metadata_or_else(self, key: str, default: str) -> str:
49
+ if key in self.metadata:
50
+ return self.metadata[key]
51
+ else:
52
+ return default
@@ -34,8 +34,7 @@ from importlib import metadata
34
34
  import requests
35
35
  from deltafi.actioneventqueue import ActionEventQueue
36
36
  from deltafi.domain import Event, ActionExecution
37
- from deltafi.exception import ExpectedContentException, MissingDomainException, MissingEnrichmentException, \
38
- MissingMetadataException
37
+ from deltafi.exception import ExpectedContentException, MissingMetadataException
39
38
  from deltafi.logger import get_logger
40
39
  from deltafi.result import ErrorResult
41
40
  from deltafi.storage import ContentService
@@ -47,9 +46,9 @@ def _coordinates():
47
46
 
48
47
 
49
48
  def _setup_queue(max_connections):
50
- redis_url = os.getenv('REDIS_URL', 'http://deltafi-redis-master:6379')
51
- password = os.getenv('REDIS_PASSWORD')
52
- return ActionEventQueue(redis_url, max_connections, password)
49
+ url = os.getenv('VALKEY_URL', 'http://deltafi-valkey-master:6379')
50
+ password = os.getenv('VALKEY_PASSWORD')
51
+ return ActionEventQueue(url, max_connections, password)
53
52
 
54
53
 
55
54
  def _setup_content_service():
@@ -165,8 +164,6 @@ class Plugin(object):
165
164
  'name': self.action_name(action),
166
165
  'description': action.description,
167
166
  'type': action.action_type.name,
168
- 'requiresDomains': action.requires_domains,
169
- 'requiresEnrichments': action.requires_enrichments,
170
167
  'schema': action.param_class().model_json_schema()
171
168
  }
172
169
 
@@ -263,7 +260,7 @@ class Plugin(object):
263
260
  while True:
264
261
  try:
265
262
  event_string = self.queue.take(self.action_name(action))
266
- event = Event.create(json.loads(event_string), self.hostname, self.content_service, action_logger)
263
+ event = Event.create(json.loads(event_string), self.content_service, action_logger)
267
264
  start_time = time.time()
268
265
  action_logger.debug(f"Processing event for did {event.context.did}")
269
266
 
@@ -277,14 +274,6 @@ class Plugin(object):
277
274
  f"Action attempted to look up element {e.index + 1} (index {e.index}) from "
278
275
  f"content list of size {e.size}",
279
276
  f"{str(e)}\n{traceback.format_exc()}")
280
- except MissingDomainException as e:
281
- result = ErrorResult(event.context,
282
- f"Action attempted to access domain {e.name}, which does not exist",
283
- f"{str(e)}\n{traceback.format_exc()}")
284
- except MissingEnrichmentException as e:
285
- result = ErrorResult(event.context,
286
- f"Action attempted to access enrichment {e.name}, which does not exist",
287
- f"{str(e)}\n{traceback.format_exc()}")
288
277
  except MissingMetadataException as e:
289
278
  result = ErrorResult(event.context,
290
279
  f"Missing metadata with key {e.key}",
@@ -297,7 +286,10 @@ class Plugin(object):
297
286
 
298
287
  response = {
299
288
  'did': event.context.did,
300
- 'action': event.context.action_flow + "." + event.context.action_name,
289
+ 'flowName': event.context.flow_name,
290
+ 'flowId': event.context.flow_id,
291
+ 'actionName': event.context.action_name,
292
+ 'actionId': event.context.action_id,
301
293
  'start': start_time,
302
294
  'stop': time.time(),
303
295
  'type': result.result_type,