deltafi 1.2.19__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deltafi might be problematic. Click here for more details.

deltafi/input.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,172 +17,13 @@
17
17
  #
18
18
 
19
19
  from deltafi.domain import *
20
- from deltafi.exception import MissingMetadataException, ExpectedContentException, MissingDomainException, \
21
- MissingEnrichmentException
22
-
23
-
24
- class DomainInput(NamedTuple):
25
- content: List[Content]
26
- metadata: Dict[str, str]
27
- domains: Dict[str, Domain]
28
-
29
- def has_content(self) -> bool:
30
- return len(self.content) > 0
31
-
32
- def content_at(self, index: int) -> Content:
33
- if len(self.content) < index + 1:
34
- raise ExpectedContentException(index, len(self.content))
35
- return self.content[index]
36
-
37
- def first_content(self):
38
- return self.content_at(0)
39
-
40
- def get_metadata(self, key: str):
41
- if key in self.metadata:
42
- return self.metadata[key]
43
- else:
44
- raise MissingMetadataException(key)
45
-
46
- def get_metadata_or_else(self, key: str, default: str) -> str:
47
- if key in self.metadata:
48
- return self.metadata[key]
49
- else:
50
- return default
51
-
52
- def has_domain(self, name: str) -> bool:
53
- return name in self.domains
54
-
55
- def domain(self, name: str) -> Domain:
56
- if not self.has_domain(name):
57
- raise MissingDomainException(name)
58
- return self.domains[name]
59
-
20
+ from deltafi.exception import MissingMetadataException, ExpectedContentException
60
21
 
61
22
  class EgressInput(NamedTuple):
62
23
  content: Content
63
24
  metadata: dict
64
25
 
65
26
 
66
- class EnrichInput(NamedTuple):
67
- content: List[Content]
68
- metadata: dict
69
- domains: Dict[str, Domain]
70
- enrichments: Dict[str, Domain]
71
-
72
- def has_content(self) -> bool:
73
- return len(self.content) > 0
74
-
75
- def content_at(self, index: int) -> Content:
76
- if len(self.content) < index + 1:
77
- raise ExpectedContentException(index, len(self.content))
78
- return self.content[index]
79
-
80
- def first_content(self):
81
- return self.content_at(0)
82
-
83
- def get_metadata(self, key: str):
84
- if key in self.metadata:
85
- return self.metadata[key]
86
- else:
87
- raise MissingMetadataException(key)
88
-
89
- def get_metadata_or_else(self, key: str, default: str) -> str:
90
- if key in self.metadata:
91
- return self.metadata[key]
92
- else:
93
- return default
94
-
95
- def has_domain(self, name: str) -> bool:
96
- return name in self.domains
97
-
98
- def domain(self, name: str) -> Domain:
99
- if not self.has_domain(name):
100
- raise MissingDomainException(name)
101
- return self.domains[name]
102
-
103
- def has_enrichment(self, name: str) -> bool:
104
- return name in self.enrichments
105
-
106
- def enrichment(self, name: str) -> Domain:
107
- if not self.has_enrichment(name):
108
- raise MissingEnrichmentException(name)
109
- return self.enrichments[name]
110
-
111
-
112
- class FormatInput(NamedTuple):
113
- content: List[Content]
114
- metadata: dict
115
- domains: Dict[str, Domain]
116
- enrichments: Dict[str, Domain]
117
-
118
- def has_content(self) -> bool:
119
- return len(self.content) > 0
120
-
121
- def content_at(self, index: int) -> Content:
122
- if len(self.content) < index + 1:
123
- raise ExpectedContentException(index, len(self.content))
124
- return self.content[index]
125
-
126
- def first_content(self):
127
- return self.content_at(0)
128
-
129
- def get_metadata(self, key: str):
130
- if key in self.metadata:
131
- return self.metadata[key]
132
- else:
133
- raise MissingMetadataException(key)
134
-
135
- def get_metadata_or_else(self, key: str, default: str) -> str:
136
- if key in self.metadata:
137
- return self.metadata[key]
138
- else:
139
- return default
140
-
141
- def has_domain(self, name: str) -> bool:
142
- return name in self.domains
143
-
144
- def domain(self, name: str) -> Domain:
145
- if not self.has_domain(name):
146
- raise MissingDomainException(name)
147
- return self.domains[name]
148
-
149
- def has_enrichment(self, name: str) -> bool:
150
- return name in self.enrichments
151
-
152
- def enrichment(self, name: str) -> Domain:
153
- if not self.has_enrichment(name):
154
- raise MissingEnrichmentException(name)
155
- return self.enrichments[name]
156
-
157
-
158
- class LoadInput(NamedTuple):
159
- content: List[Content]
160
- metadata: dict
161
-
162
- def has_content(self) -> bool:
163
- return len(self.content) > 0
164
-
165
- def content_at(self, index: int) -> Content:
166
- if len(self.content) < index + 1:
167
- raise ExpectedContentException(index, len(self.content))
168
- return self.content[index]
169
-
170
- def first_content(self):
171
- return self.content_at(0)
172
-
173
- def get_metadata(self, key: str):
174
- if key in self.metadata:
175
- return self.metadata[key]
176
- else:
177
- raise MissingMetadataException(key)
178
-
179
- def get_metadata_or_else(self, key: str, default: str) -> str:
180
- if key in self.metadata:
181
- return self.metadata[key]
182
- else:
183
- return default
184
-
185
-
186
27
  class TransformInput(NamedTuple):
187
28
  content: List[Content]
188
29
  metadata: dict
@@ -209,8 +50,3 @@ class TransformInput(NamedTuple):
209
50
  return self.metadata[key]
210
51
  else:
211
52
  return default
212
-
213
-
214
- class ValidateInput(NamedTuple):
215
- content: Content
216
- metadata: dict
deltafi/logger.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -18,14 +18,14 @@
18
18
 
19
19
  import logging
20
20
  import sys
21
- from datetime import datetime
21
+ from datetime import datetime, UTC
22
22
 
23
23
  import json_logging
24
24
 
25
25
 
26
26
  def get_logger(name: str = None) -> logging.Logger:
27
27
  logger = logging.getLogger(name)
28
- logger.setLevel(logging.DEBUG)
28
+ logger.setLevel(logging.INFO)
29
29
  logger.addHandler(logging.StreamHandler(sys.stdout))
30
30
  logger.propagate = False
31
31
 
@@ -42,7 +42,7 @@ def _sanitize_log_msg(record):
42
42
  class JSONLogFormatter(json_logging.JSONLogFormatter):
43
43
 
44
44
  def _format_log_object(self, record, request_util):
45
- utcnow = datetime.utcnow()
45
+ utcnow = datetime.now(UTC)
46
46
 
47
47
  json_log_object = {
48
48
  'timestamp': json_logging.util.iso_time_format(utcnow),
deltafi/metric.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ from typing import Dict, NamedTuple
22
22
  class Metric(NamedTuple):
23
23
  name: str
24
24
  value: int
25
- tags: Dict[str, str]
25
+ tags: Dict[str, str] = {}
26
26
 
27
27
  def json(self):
28
28
  return {
deltafi/plugin.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -16,30 +16,30 @@
16
16
  # limitations under the License.
17
17
  #
18
18
 
19
+ import importlib
20
+ import inspect
19
21
  import json
20
22
  import os
23
+ import pkgutil
21
24
  import sys
22
25
  import threading
23
26
  import time
24
27
  import traceback
28
+ import yaml
25
29
  from datetime import datetime, timezone, timedelta
30
+ from importlib import metadata
26
31
  from os.path import isdir, isfile, join
27
32
  from pathlib import Path
28
33
  from typing import List
29
- import importlib
30
- import inspect
31
- import pkgutil
32
34
 
33
- from importlib import metadata
34
35
  import requests
36
+ from deltafi.action import Action, Join
35
37
  from deltafi.actioneventqueue import ActionEventQueue
36
38
  from deltafi.domain import Event, ActionExecution
37
- from deltafi.exception import ExpectedContentException, MissingDomainException, MissingEnrichmentException, \
38
- MissingMetadataException
39
+ from deltafi.exception import ExpectedContentException, MissingMetadataException
39
40
  from deltafi.logger import get_logger
40
- from deltafi.result import ErrorResult
41
+ from deltafi.result import ErrorResult, IngressResult, TransformResult, TransformResults
41
42
  from deltafi.storage import ContentService
42
- from deltafi.action import Action
43
43
 
44
44
 
45
45
  def _coordinates():
@@ -47,9 +47,9 @@ def _coordinates():
47
47
 
48
48
 
49
49
  def _setup_queue(max_connections):
50
- redis_url = os.getenv('REDIS_URL', 'http://deltafi-redis-master:6379')
51
- password = os.getenv('REDIS_PASSWORD')
52
- return ActionEventQueue(redis_url, max_connections, password)
50
+ url = os.getenv('VALKEY_URL', 'http://deltafi-valkey-master:6379')
51
+ password = os.getenv('VALKEY_PASSWORD')
52
+ return ActionEventQueue(url, max_connections, password)
53
53
 
54
54
 
55
55
  def _setup_content_service():
@@ -94,6 +94,8 @@ class Plugin(object):
94
94
  self.queue = None
95
95
  self.actions = []
96
96
  self.core_url = os.getenv('CORE_URL')
97
+ self.image = os.getenv('IMAGE')
98
+ self.image_pull_secret = os.getenv('IMAGE_PULL_SECRET')
97
99
  action_classes = []
98
100
  if actions is not None and len(actions):
99
101
  action_classes.extend(actions)
@@ -160,16 +162,39 @@ class Plugin(object):
160
162
  def action_name(self, action):
161
163
  return f"{self.coordinates.group_id}.{action.__class__.__name__}"
162
164
 
165
+ def _load_action_docs(self, action):
166
+ docs_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'docs')
167
+ if not isdir(docs_path):
168
+ return None
169
+
170
+ action_docs_file = join(docs_path, action.__class__.__name__ + '.md')
171
+ if not isfile(action_docs_file):
172
+ return None
173
+
174
+ return open(action_docs_file).read()
175
+
163
176
  def _action_json(self, action):
164
177
  return {
165
178
  'name': self.action_name(action),
166
179
  'description': action.description,
167
180
  'type': action.action_type.name,
168
- 'requiresDomains': action.requires_domains,
169
- 'requiresEnrichments': action.requires_enrichments,
170
- 'schema': action.param_class().model_json_schema()
181
+ 'supportsJoin': isinstance(action, Join),
182
+ 'schema': action.param_class().model_json_schema(),
183
+ 'docsMarkdown': self._load_action_docs(action)
171
184
  }
172
185
 
186
+ def _integration_tests(self):
187
+ tests_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'integration')
188
+
189
+ test_files = []
190
+ if isdir(tests_path):
191
+ test_files = [f for f in os.listdir(tests_path) if isfile(join(tests_path, f))]
192
+ else:
193
+ self.logger.warning(f"tests directory ({tests_path}) does not exist. No tests will be installed.")
194
+
195
+ tests = [json.load(open(join(tests_path, f))) for f in test_files]
196
+ return tests
197
+
173
198
  def registration_json(self):
174
199
  flows_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'flows')
175
200
 
@@ -187,15 +212,18 @@ class Plugin(object):
187
212
  actions = [self._action_json(action) for action in self.actions]
188
213
 
189
214
  return {
190
- 'pluginCoordinates': self.coordinates.__json__(),
191
- 'displayName': self.display_name,
192
- 'description': self.description,
193
- 'actionKitVersion': metadata.version('deltafi'),
194
- 'dependencies': [],
195
- 'actions': actions,
196
- 'variables': variables,
197
- 'flowPlans': flows
198
- }
215
+ 'pluginCoordinates': self.coordinates.__json__(),
216
+ 'displayName': self.display_name,
217
+ 'description': self.description,
218
+ 'actionKitVersion': metadata.version('deltafi'),
219
+ 'image': self.image,
220
+ 'imagePullSecret': self.image_pull_secret,
221
+ 'dependencies': [],
222
+ 'actions': actions,
223
+ 'variables': variables,
224
+ 'flowPlans': flows,
225
+ 'integrationTests': self._integration_tests()
226
+ }
199
227
 
200
228
  def _register(self):
201
229
  url = f"{self.core_url}/plugins"
@@ -258,6 +286,22 @@ class Plugin(object):
258
286
  finally:
259
287
  time.sleep(10)
260
288
 
289
+ @staticmethod
290
+ def to_response(event, start_time, stop_time, result):
291
+ response = {
292
+ 'did': event.context.did,
293
+ 'flowName': event.context.flow_name,
294
+ 'flowId': event.context.flow_id,
295
+ 'actionName': event.context.action_name,
296
+ 'start': start_time,
297
+ 'stop': stop_time,
298
+ 'type': result.result_type,
299
+ 'metrics': [metric.json() for metric in result.metrics]
300
+ }
301
+ if result.result_key is not None:
302
+ response[result.result_key] = result.response()
303
+ return response
304
+
261
305
  def _do_action(self, action):
262
306
  action_logger = get_logger(self.action_name(action))
263
307
 
@@ -265,7 +309,7 @@ class Plugin(object):
265
309
  while True:
266
310
  try:
267
311
  event_string = self.queue.take(self.action_name(action))
268
- event = Event.create(json.loads(event_string), self.hostname, self.content_service, action_logger)
312
+ event = Event.create(json.loads(event_string), self.content_service, action_logger)
269
313
  start_time = time.time()
270
314
  action_logger.debug(f"Processing event for did {event.context.did}")
271
315
 
@@ -279,14 +323,6 @@ class Plugin(object):
279
323
  f"Action attempted to look up element {e.index + 1} (index {e.index}) from "
280
324
  f"content list of size {e.size}",
281
325
  f"{str(e)}\n{traceback.format_exc()}")
282
- except MissingDomainException as e:
283
- result = ErrorResult(event.context,
284
- f"Action attempted to access domain {e.name}, which does not exist",
285
- f"{str(e)}\n{traceback.format_exc()}")
286
- except MissingEnrichmentException as e:
287
- result = ErrorResult(event.context,
288
- f"Action attempted to access enrichment {e.name}, which does not exist",
289
- f"{str(e)}\n{traceback.format_exc()}")
290
326
  except MissingMetadataException as e:
291
327
  result = ErrorResult(event.context,
292
328
  f"Missing metadata with key {e.key}",
@@ -297,16 +333,10 @@ class Plugin(object):
297
333
 
298
334
  action.action_execution = None
299
335
 
300
- response = {
301
- 'did': event.context.did,
302
- 'action': event.context.action_flow + "." + event.context.action_name,
303
- 'start': start_time,
304
- 'stop': time.time(),
305
- 'type': result.result_type,
306
- 'metrics': [metric.json() for metric in result.metrics]
307
- }
308
- if result.result_key is not None:
309
- response[result.result_key] = result.response()
336
+ response = Plugin.to_response(
337
+ event, start_time, time.time(), result)
338
+
339
+ Plugin.orphaned_content_check(action_logger, event.context, result, response)
310
340
 
311
341
  topic = 'dgs'
312
342
  if event.return_address:
@@ -315,3 +345,42 @@ class Plugin(object):
315
345
  except BaseException as e:
316
346
  action_logger.error(f"Unexpected {type(e)} error: {str(e)}\n{traceback.format_exc()}")
317
347
  time.sleep(1)
348
+
349
+ @staticmethod
350
+ def orphaned_content_check(logger, context, result, response):
351
+ if len(context.saved_content) > 0:
352
+ to_delete = Plugin.find_unused_content(context.saved_content, result)
353
+ if len(to_delete) > 0:
354
+ errors = context.content_service.delete_all(to_delete)
355
+ for e in errors:
356
+ logger.error(f"Unable to delete object(s), {e}")
357
+ logger.warning(
358
+ f"Deleted {len(to_delete)} unused content entries for did {context.did} due to a {response['type']} event by {response['actionName']}")
359
+
360
+ @staticmethod
361
+ def find_unused_content(saved_content, result):
362
+ segments_in_use = Plugin.used_segment_names(result)
363
+ saved_segments = Plugin.get_segment_names(saved_content)
364
+ to_delete = []
365
+ for key, value in saved_segments.items():
366
+ if key not in segments_in_use:
367
+ to_delete.append(value)
368
+ return to_delete
369
+
370
+ @staticmethod
371
+ def used_segment_names(result):
372
+ segment_names = {}
373
+ if isinstance(result, TransformResult):
374
+ segment_names.update(result.get_segment_names())
375
+ elif isinstance(result, TransformResults):
376
+ segment_names.update(result.get_segment_names())
377
+ elif isinstance(result, IngressResult):
378
+ segment_names.update(result.get_segment_names())
379
+ return segment_names
380
+
381
+ @staticmethod
382
+ def get_segment_names(content_list):
383
+ segment_names = {}
384
+ for content in content_list:
385
+ segment_names.update(content.get_segment_names())
386
+ return segment_names