deltafi 2.0rc1705024454242__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deltafi might be problematic. Click here for more details.
- deltafi/__init__.py +1 -1
- deltafi/action.py +36 -20
- deltafi/actioneventqueue.py +1 -1
- deltafi/actiontype.py +3 -1
- deltafi/domain.py +73 -63
- deltafi/exception.py +1 -11
- deltafi/genericmodel.py +4 -2
- deltafi/input.py +1 -1
- deltafi/logger.py +4 -4
- deltafi/metric.py +2 -2
- deltafi/plugin.py +198 -50
- deltafi/result.py +95 -31
- deltafi/storage.py +6 -1
- deltafi/test_kit/__init__.py +1 -1
- deltafi/test_kit/assertions.py +10 -2
- deltafi/test_kit/compare_helpers.py +251 -8
- deltafi/test_kit/constants.py +1 -1
- deltafi/test_kit/egress.py +54 -0
- deltafi/test_kit/framework.py +146 -112
- deltafi/test_kit/timed_ingress.py +101 -0
- deltafi/test_kit/transform.py +45 -15
- {deltafi-2.0rc1705024454242.dist-info → deltafi-2.4.0.dist-info}/METADATA +13 -12
- deltafi-2.4.0.dist-info/RECORD +24 -0
- {deltafi-2.0rc1705024454242.dist-info → deltafi-2.4.0.dist-info}/WHEEL +1 -1
- deltafi-2.0rc1705024454242.dist-info/RECORD +0 -22
deltafi/plugin.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# DeltaFi - Data transformation and enrichment platform
|
|
3
3
|
#
|
|
4
|
-
# Copyright 2021-
|
|
4
|
+
# Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,40 +16,109 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
#
|
|
18
18
|
|
|
19
|
+
import importlib
|
|
20
|
+
import inspect
|
|
19
21
|
import json
|
|
20
22
|
import os
|
|
23
|
+
import pkgutil
|
|
21
24
|
import sys
|
|
22
25
|
import threading
|
|
23
26
|
import time
|
|
24
27
|
import traceback
|
|
25
28
|
from datetime import datetime, timezone, timedelta
|
|
29
|
+
from importlib import metadata
|
|
26
30
|
from os.path import isdir, isfile, join
|
|
27
31
|
from pathlib import Path
|
|
28
32
|
from typing import List
|
|
29
|
-
import importlib
|
|
30
|
-
import inspect
|
|
31
|
-
import pkgutil
|
|
32
33
|
|
|
33
|
-
from importlib import metadata
|
|
34
34
|
import requests
|
|
35
|
+
import yaml
|
|
36
|
+
from deltafi.action import Action, Join
|
|
35
37
|
from deltafi.actioneventqueue import ActionEventQueue
|
|
36
38
|
from deltafi.domain import Event, ActionExecution
|
|
37
|
-
from deltafi.exception import ExpectedContentException,
|
|
38
|
-
MissingMetadataException
|
|
39
|
+
from deltafi.exception import ExpectedContentException, MissingMetadataException
|
|
39
40
|
from deltafi.logger import get_logger
|
|
40
|
-
from deltafi.result import ErrorResult
|
|
41
|
+
from deltafi.result import ErrorResult, IngressResult, TransformResult, TransformResults
|
|
41
42
|
from deltafi.storage import ContentService
|
|
42
|
-
from deltafi.action import Action
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
def _coordinates():
|
|
46
46
|
return PluginCoordinates(os.getenv('PROJECT_GROUP'), os.getenv('PROJECT_NAME'), os.getenv('PROJECT_VERSION'))
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
def _valid_file(filename: str):
|
|
50
|
+
return isfile(filename) and \
|
|
51
|
+
(filename.endswith(".json")
|
|
52
|
+
or filename.endswith(".yaml")
|
|
53
|
+
or filename.endswith(".yml"))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _read_valid_files(path: str):
|
|
57
|
+
"""
|
|
58
|
+
Read the contents of a directory, and returns a filtered list of files
|
|
59
|
+
that can be read/parsed for plugin usage, and ignores everything else.
|
|
60
|
+
:param path: name of the directory to scan
|
|
61
|
+
:return: list of filtered, parsable files
|
|
62
|
+
"""
|
|
63
|
+
files = []
|
|
64
|
+
if isdir(path):
|
|
65
|
+
files = [f for f in os.listdir(path) if _valid_file(join(path, f))]
|
|
66
|
+
return files
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _load_resource(path: str, filename: str):
|
|
70
|
+
"""
|
|
71
|
+
Read the content of a JSON or YAML file, and return a Python
|
|
72
|
+
object of its contents, typically as a dict or list.
|
|
73
|
+
To avoid exceptions, use only files returned by _read_valid_files().
|
|
74
|
+
:param path: directory which contains the file to load
|
|
75
|
+
:param filename: name of the file to load
|
|
76
|
+
:return: dict or list of file contents
|
|
77
|
+
"""
|
|
78
|
+
with open(join(path, filename)) as file_in:
|
|
79
|
+
if filename.endswith(".json"):
|
|
80
|
+
return json.load(file_in)
|
|
81
|
+
elif filename.endswith(".yaml") or filename.endswith(".yml"):
|
|
82
|
+
results = []
|
|
83
|
+
yaml_docs = yaml.safe_load_all(file_in)
|
|
84
|
+
for doc_iter in yaml_docs:
|
|
85
|
+
# yaml_docs must be iterated
|
|
86
|
+
results.append(doc_iter)
|
|
87
|
+
if len(results) == 1:
|
|
88
|
+
# Single document YAML file
|
|
89
|
+
return results[0]
|
|
90
|
+
else:
|
|
91
|
+
# Multi-document YAML file
|
|
92
|
+
return results
|
|
93
|
+
raise RuntimeError(f"File type not supported: {filename}")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _load__all_resource(path: str, file_list: List[str]):
|
|
97
|
+
resources = []
|
|
98
|
+
for f in file_list:
|
|
99
|
+
r = _load_resource(path, f)
|
|
100
|
+
if isinstance(r, list):
|
|
101
|
+
resources.extend(r)
|
|
102
|
+
else:
|
|
103
|
+
resources.append(r)
|
|
104
|
+
return resources
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _find_variables_filename(names: List[str]):
|
|
108
|
+
if 'variables.json' in names:
|
|
109
|
+
return 'variables.json'
|
|
110
|
+
elif 'variables.yaml' in names:
|
|
111
|
+
return 'variables.yaml'
|
|
112
|
+
elif 'variables.yml' in names:
|
|
113
|
+
return 'variables.yml'
|
|
114
|
+
else:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
|
|
49
118
|
def _setup_queue(max_connections):
|
|
50
|
-
|
|
51
|
-
password = os.getenv('
|
|
52
|
-
return ActionEventQueue(
|
|
119
|
+
url = os.getenv('VALKEY_URL', 'http://deltafi-valkey-master:6379')
|
|
120
|
+
password = os.getenv('VALKEY_PASSWORD')
|
|
121
|
+
return ActionEventQueue(url, max_connections, password)
|
|
53
122
|
|
|
54
123
|
|
|
55
124
|
def _setup_content_service():
|
|
@@ -94,6 +163,8 @@ class Plugin(object):
|
|
|
94
163
|
self.queue = None
|
|
95
164
|
self.actions = []
|
|
96
165
|
self.core_url = os.getenv('CORE_URL')
|
|
166
|
+
self.image = os.getenv('IMAGE')
|
|
167
|
+
self.image_pull_secret = os.getenv('IMAGE_PULL_SECRET')
|
|
97
168
|
action_classes = []
|
|
98
169
|
if actions is not None and len(actions):
|
|
99
170
|
action_classes.extend(actions)
|
|
@@ -160,40 +231,74 @@ class Plugin(object):
|
|
|
160
231
|
def action_name(self, action):
|
|
161
232
|
return f"{self.coordinates.group_id}.{action.__class__.__name__}"
|
|
162
233
|
|
|
234
|
+
def _load_action_docs(self, action):
|
|
235
|
+
docs_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'docs')
|
|
236
|
+
if not isdir(docs_path):
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
action_docs_file = join(docs_path, action.__class__.__name__ + '.md')
|
|
240
|
+
if not isfile(action_docs_file):
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
return open(action_docs_file).read()
|
|
244
|
+
|
|
163
245
|
def _action_json(self, action):
|
|
164
246
|
return {
|
|
165
247
|
'name': self.action_name(action),
|
|
166
248
|
'description': action.description,
|
|
167
249
|
'type': action.action_type.name,
|
|
168
|
-
'
|
|
250
|
+
'supportsJoin': isinstance(action, Join),
|
|
251
|
+
'schema': action.param_class().model_json_schema(),
|
|
252
|
+
'docsMarkdown': self._load_action_docs(action)
|
|
169
253
|
}
|
|
170
254
|
|
|
255
|
+
@staticmethod
|
|
256
|
+
def load_integration_tests(tests_path: str):
|
|
257
|
+
test_files = _read_valid_files(tests_path)
|
|
258
|
+
return _load__all_resource(tests_path, test_files)
|
|
259
|
+
|
|
260
|
+
@staticmethod
|
|
261
|
+
def load_variables(flows_path: str, flow_files: List[str]):
|
|
262
|
+
variables = []
|
|
263
|
+
variables_filename = _find_variables_filename(flow_files)
|
|
264
|
+
if variables_filename is not None:
|
|
265
|
+
flow_files.remove(variables_filename)
|
|
266
|
+
variables = _load__all_resource(flows_path, [variables_filename])
|
|
267
|
+
return variables
|
|
268
|
+
|
|
171
269
|
def registration_json(self):
|
|
172
270
|
flows_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'flows')
|
|
271
|
+
tests_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'integration')
|
|
173
272
|
|
|
174
|
-
flow_files = []
|
|
175
273
|
variables = []
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
variables = json.load(open(join(flows_path, 'variables.json')))
|
|
274
|
+
flow_files = _read_valid_files(flows_path)
|
|
275
|
+
if len(flow_files) == 0:
|
|
276
|
+
self.logger.warning(
|
|
277
|
+
f"Flows directory ({flows_path}) does not exist or contains no valid files. No flows will be installed.")
|
|
181
278
|
else:
|
|
182
|
-
self.
|
|
279
|
+
variables = self.load_variables(flows_path, flow_files)
|
|
183
280
|
|
|
184
|
-
flows =
|
|
281
|
+
flows = _load__all_resource(flows_path, flow_files)
|
|
185
282
|
actions = [self._action_json(action) for action in self.actions]
|
|
186
283
|
|
|
284
|
+
test_files = self.load_integration_tests(tests_path)
|
|
285
|
+
if len(test_files) == 0:
|
|
286
|
+
self.logger.warning(
|
|
287
|
+
f"tests directory ({tests_path}) does not exist or contains no valid files. No tests will be installed.")
|
|
288
|
+
|
|
187
289
|
return {
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
290
|
+
'pluginCoordinates': self.coordinates.__json__(),
|
|
291
|
+
'displayName': self.display_name,
|
|
292
|
+
'description': self.description,
|
|
293
|
+
'actionKitVersion': metadata.version('deltafi'),
|
|
294
|
+
'image': self.image,
|
|
295
|
+
'imagePullSecret': self.image_pull_secret,
|
|
296
|
+
'dependencies': [],
|
|
297
|
+
'actions': actions,
|
|
298
|
+
'variables': variables,
|
|
299
|
+
'flowPlans': flows,
|
|
300
|
+
'integrationTests': test_files
|
|
301
|
+
}
|
|
197
302
|
|
|
198
303
|
def _register(self):
|
|
199
304
|
url = f"{self.core_url}/plugins"
|
|
@@ -217,7 +322,8 @@ class Plugin(object):
|
|
|
217
322
|
for action in self.actions:
|
|
218
323
|
threading.Thread(target=self._do_action, args=(action,)).start()
|
|
219
324
|
|
|
220
|
-
threading.Thread(target=self._heartbeat)
|
|
325
|
+
hb_thread = threading.Thread(target=self._heartbeat)
|
|
326
|
+
hb_thread.start()
|
|
221
327
|
|
|
222
328
|
self.logger.info("All threads running")
|
|
223
329
|
|
|
@@ -225,6 +331,7 @@ class Plugin(object):
|
|
|
225
331
|
f.close()
|
|
226
332
|
|
|
227
333
|
self.logger.info("Application initialization complete")
|
|
334
|
+
hb_thread.join()
|
|
228
335
|
|
|
229
336
|
def _heartbeat(self):
|
|
230
337
|
long_running_actions = set()
|
|
@@ -254,6 +361,22 @@ class Plugin(object):
|
|
|
254
361
|
finally:
|
|
255
362
|
time.sleep(10)
|
|
256
363
|
|
|
364
|
+
@staticmethod
|
|
365
|
+
def to_response(event, start_time, stop_time, result):
|
|
366
|
+
response = {
|
|
367
|
+
'did': event.context.did,
|
|
368
|
+
'flowName': event.context.flow_name,
|
|
369
|
+
'flowId': event.context.flow_id,
|
|
370
|
+
'actionName': event.context.action_name,
|
|
371
|
+
'start': start_time,
|
|
372
|
+
'stop': stop_time,
|
|
373
|
+
'type': result.result_type,
|
|
374
|
+
'metrics': [metric.json() for metric in result.metrics]
|
|
375
|
+
}
|
|
376
|
+
if result.result_key is not None:
|
|
377
|
+
response[result.result_key] = result.response()
|
|
378
|
+
return response
|
|
379
|
+
|
|
257
380
|
def _do_action(self, action):
|
|
258
381
|
action_logger = get_logger(self.action_name(action))
|
|
259
382
|
|
|
@@ -261,7 +384,7 @@ class Plugin(object):
|
|
|
261
384
|
while True:
|
|
262
385
|
try:
|
|
263
386
|
event_string = self.queue.take(self.action_name(action))
|
|
264
|
-
event = Event.create(json.loads(event_string), self.
|
|
387
|
+
event = Event.create(json.loads(event_string), self.content_service, action_logger)
|
|
265
388
|
start_time = time.time()
|
|
266
389
|
action_logger.debug(f"Processing event for did {event.context.did}")
|
|
267
390
|
|
|
@@ -275,14 +398,6 @@ class Plugin(object):
|
|
|
275
398
|
f"Action attempted to look up element {e.index + 1} (index {e.index}) from "
|
|
276
399
|
f"content list of size {e.size}",
|
|
277
400
|
f"{str(e)}\n{traceback.format_exc()}")
|
|
278
|
-
except MissingDomainException as e:
|
|
279
|
-
result = ErrorResult(event.context,
|
|
280
|
-
f"Action attempted to access domain {e.name}, which does not exist",
|
|
281
|
-
f"{str(e)}\n{traceback.format_exc()}")
|
|
282
|
-
except MissingEnrichmentException as e:
|
|
283
|
-
result = ErrorResult(event.context,
|
|
284
|
-
f"Action attempted to access enrichment {e.name}, which does not exist",
|
|
285
|
-
f"{str(e)}\n{traceback.format_exc()}")
|
|
286
401
|
except MissingMetadataException as e:
|
|
287
402
|
result = ErrorResult(event.context,
|
|
288
403
|
f"Missing metadata with key {e.key}",
|
|
@@ -293,16 +408,10 @@ class Plugin(object):
|
|
|
293
408
|
|
|
294
409
|
action.action_execution = None
|
|
295
410
|
|
|
296
|
-
response =
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
'stop': time.time(),
|
|
301
|
-
'type': result.result_type,
|
|
302
|
-
'metrics': [metric.json() for metric in result.metrics]
|
|
303
|
-
}
|
|
304
|
-
if result.result_key is not None:
|
|
305
|
-
response[result.result_key] = result.response()
|
|
411
|
+
response = Plugin.to_response(
|
|
412
|
+
event, start_time, time.time(), result)
|
|
413
|
+
|
|
414
|
+
Plugin.orphaned_content_check(action_logger, event.context, result, response)
|
|
306
415
|
|
|
307
416
|
topic = 'dgs'
|
|
308
417
|
if event.return_address:
|
|
@@ -311,3 +420,42 @@ class Plugin(object):
|
|
|
311
420
|
except BaseException as e:
|
|
312
421
|
action_logger.error(f"Unexpected {type(e)} error: {str(e)}\n{traceback.format_exc()}")
|
|
313
422
|
time.sleep(1)
|
|
423
|
+
|
|
424
|
+
@staticmethod
|
|
425
|
+
def orphaned_content_check(logger, context, result, response):
|
|
426
|
+
if len(context.saved_content) > 0:
|
|
427
|
+
to_delete = Plugin.find_unused_content(context.saved_content, result)
|
|
428
|
+
if len(to_delete) > 0:
|
|
429
|
+
errors = context.content_service.delete_all(to_delete)
|
|
430
|
+
for e in errors:
|
|
431
|
+
logger.error(f"Unable to delete object(s), {e}")
|
|
432
|
+
logger.warning(
|
|
433
|
+
f"Deleted {len(to_delete)} unused content entries for did {context.did} due to a {response['type']} event by {response['actionName']}")
|
|
434
|
+
|
|
435
|
+
@staticmethod
|
|
436
|
+
def find_unused_content(saved_content, result):
|
|
437
|
+
segments_in_use = Plugin.used_segment_names(result)
|
|
438
|
+
saved_segments = Plugin.get_segment_names(saved_content)
|
|
439
|
+
to_delete = []
|
|
440
|
+
for key, value in saved_segments.items():
|
|
441
|
+
if key not in segments_in_use:
|
|
442
|
+
to_delete.append(value)
|
|
443
|
+
return to_delete
|
|
444
|
+
|
|
445
|
+
@staticmethod
|
|
446
|
+
def used_segment_names(result):
|
|
447
|
+
segment_names = {}
|
|
448
|
+
if isinstance(result, TransformResult):
|
|
449
|
+
segment_names.update(result.get_segment_names())
|
|
450
|
+
elif isinstance(result, TransformResults):
|
|
451
|
+
segment_names.update(result.get_segment_names())
|
|
452
|
+
elif isinstance(result, IngressResult):
|
|
453
|
+
segment_names.update(result.get_segment_names())
|
|
454
|
+
return segment_names
|
|
455
|
+
|
|
456
|
+
@staticmethod
|
|
457
|
+
def get_segment_names(content_list):
|
|
458
|
+
segment_names = {}
|
|
459
|
+
for content in content_list:
|
|
460
|
+
segment_names.update(content.get_segment_names())
|
|
461
|
+
return segment_names
|
deltafi/result.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# DeltaFi - Data transformation and enrichment platform
|
|
3
3
|
#
|
|
4
|
-
# Copyright 2021-
|
|
4
|
+
# Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -17,17 +17,13 @@
|
|
|
17
17
|
#
|
|
18
18
|
|
|
19
19
|
import abc
|
|
20
|
-
from enum import Enum
|
|
21
20
|
import uuid
|
|
22
|
-
from
|
|
21
|
+
from enum import Enum
|
|
22
|
+
from typing import NamedTuple
|
|
23
23
|
|
|
24
24
|
from deltafi.domain import Content, Context
|
|
25
25
|
from deltafi.metric import Metric
|
|
26
26
|
|
|
27
|
-
ENDPOINT_TAG = "endpoint"
|
|
28
|
-
FILES_OUT = "files_out"
|
|
29
|
-
BYTES_OUT = "bytes_out"
|
|
30
|
-
|
|
31
27
|
|
|
32
28
|
class Result:
|
|
33
29
|
__metaclass__ = abc.ABCMeta
|
|
@@ -44,13 +40,12 @@ class Result:
|
|
|
44
40
|
|
|
45
41
|
def add_metric(self, metric: Metric):
|
|
46
42
|
self.metrics.append(metric)
|
|
43
|
+
return self
|
|
47
44
|
|
|
48
45
|
|
|
49
46
|
class EgressResult(Result):
|
|
50
|
-
def __init__(self, context: Context
|
|
47
|
+
def __init__(self, context: Context):
|
|
51
48
|
super().__init__(None, 'EGRESS', context)
|
|
52
|
-
self.add_metric(Metric(FILES_OUT, 1, {ENDPOINT_TAG: destination}))
|
|
53
|
-
self.add_metric(Metric(BYTES_OUT, bytes_egressed, {ENDPOINT_TAG: destination}))
|
|
54
49
|
|
|
55
50
|
def response(self):
|
|
56
51
|
return None
|
|
@@ -76,7 +71,7 @@ class ErrorResult(Result):
|
|
|
76
71
|
|
|
77
72
|
|
|
78
73
|
class FilterResult(Result):
|
|
79
|
-
def __init__(self, context: Context, filtered_cause: str, filtered_context: str=None):
|
|
74
|
+
def __init__(self, context: Context, filtered_cause: str, filtered_context: str = None):
|
|
80
75
|
super().__init__('filter', 'FILTER', context)
|
|
81
76
|
self.filtered_cause = filtered_cause
|
|
82
77
|
self.filtered_context = filtered_context
|
|
@@ -95,12 +90,13 @@ class FilterResult(Result):
|
|
|
95
90
|
|
|
96
91
|
|
|
97
92
|
class IngressResultItem:
|
|
98
|
-
def __init__(self, context: Context,
|
|
93
|
+
def __init__(self, context: Context, delta_file_name: str):
|
|
99
94
|
self.context = context
|
|
100
|
-
self.filename = filename
|
|
101
95
|
self._did = str(uuid.uuid4())
|
|
102
96
|
self.content = []
|
|
103
97
|
self.metadata = {}
|
|
98
|
+
self.annotations = {}
|
|
99
|
+
self.delta_file_name = delta_file_name
|
|
104
100
|
|
|
105
101
|
@property
|
|
106
102
|
def did(self):
|
|
@@ -117,15 +113,17 @@ class IngressResultItem:
|
|
|
117
113
|
return self
|
|
118
114
|
|
|
119
115
|
def save_string_content(self, string_data: str, name: str, media_type: str):
|
|
120
|
-
segment = self.context.content_service.put_str(self.
|
|
121
|
-
self.
|
|
122
|
-
|
|
116
|
+
segment = self.context.content_service.put_str(self._did, string_data)
|
|
117
|
+
c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
|
|
118
|
+
self.content.append(c)
|
|
119
|
+
self.context.saved_content.append(c)
|
|
123
120
|
return self
|
|
124
121
|
|
|
125
122
|
def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
|
|
126
|
-
segment = self.context.content_service.put_bytes(self.
|
|
127
|
-
self.
|
|
128
|
-
|
|
123
|
+
segment = self.context.content_service.put_bytes(self._did, byte_data)
|
|
124
|
+
c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
|
|
125
|
+
self.content.append(c)
|
|
126
|
+
self.context.saved_content.append(c)
|
|
129
127
|
return self
|
|
130
128
|
|
|
131
129
|
def set_metadata(self, metadata: dict):
|
|
@@ -136,12 +134,23 @@ class IngressResultItem:
|
|
|
136
134
|
self.metadata[key] = value
|
|
137
135
|
return self
|
|
138
136
|
|
|
137
|
+
def get_segment_names(self):
|
|
138
|
+
segment_names = {}
|
|
139
|
+
for c in self.content:
|
|
140
|
+
segment_names.update(c.get_segment_names())
|
|
141
|
+
return segment_names
|
|
142
|
+
|
|
143
|
+
def annotate(self, key: str, value: str):
|
|
144
|
+
self.annotations[key] = value
|
|
145
|
+
return self
|
|
146
|
+
|
|
139
147
|
def response(self):
|
|
140
148
|
return {
|
|
141
149
|
'did': self._did,
|
|
142
|
-
'
|
|
150
|
+
'deltaFileName': self.delta_file_name,
|
|
143
151
|
'metadata': self.metadata,
|
|
144
|
-
'content': [content.json() for content in self.content]
|
|
152
|
+
'content': [content.json() for content in self.content],
|
|
153
|
+
'annotations': self.annotations
|
|
145
154
|
}
|
|
146
155
|
|
|
147
156
|
|
|
@@ -155,22 +164,28 @@ class IngressResult(Result):
|
|
|
155
164
|
def __init__(self, context: Context):
|
|
156
165
|
super().__init__('ingress', 'INGRESS', context)
|
|
157
166
|
self.memo = None
|
|
158
|
-
self.execute_immediate = False
|
|
159
167
|
self.ingress_result_items = []
|
|
168
|
+
self.execute_immediate = False
|
|
160
169
|
self.status = IngressStatusEnum.HEALTHY
|
|
161
|
-
self.
|
|
170
|
+
self.status_message = None
|
|
162
171
|
|
|
163
172
|
def add_item(self, ingress_result_item: IngressResultItem):
|
|
164
173
|
self.ingress_result_items.append(ingress_result_item)
|
|
165
174
|
return self
|
|
166
175
|
|
|
176
|
+
def get_segment_names(self):
|
|
177
|
+
segment_names = {}
|
|
178
|
+
for ingress_item in self.ingress_result_items:
|
|
179
|
+
segment_names.update(ingress_item.get_segment_names())
|
|
180
|
+
return segment_names
|
|
181
|
+
|
|
167
182
|
def response(self):
|
|
168
183
|
return {
|
|
169
184
|
'memo': self.memo,
|
|
170
185
|
'executeImmediate': self.execute_immediate,
|
|
171
186
|
'ingressItems': [ingress_result_item.response() for ingress_result_item in self.ingress_result_items],
|
|
172
187
|
'status': self.status.value,
|
|
173
|
-
'statusMessage': self.
|
|
188
|
+
'statusMessage': self.status_message
|
|
174
189
|
}
|
|
175
190
|
|
|
176
191
|
|
|
@@ -178,8 +193,8 @@ class TransformResult(Result):
|
|
|
178
193
|
def __init__(self, context: Context):
|
|
179
194
|
super().__init__('transform', 'TRANSFORM', context)
|
|
180
195
|
self.content = []
|
|
181
|
-
self.metadata = {}
|
|
182
196
|
self.annotations = {}
|
|
197
|
+
self.metadata = {}
|
|
183
198
|
self.delete_metadata_keys = []
|
|
184
199
|
|
|
185
200
|
# content can be a single Content or a List[Content]
|
|
@@ -194,14 +209,16 @@ class TransformResult(Result):
|
|
|
194
209
|
|
|
195
210
|
def save_string_content(self, string_data: str, name: str, media_type: str):
|
|
196
211
|
segment = self.context.content_service.put_str(self.context.did, string_data)
|
|
197
|
-
self.
|
|
198
|
-
|
|
212
|
+
c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
|
|
213
|
+
self.content.append(c)
|
|
214
|
+
self.context.saved_content.append(c)
|
|
199
215
|
return self
|
|
200
216
|
|
|
201
217
|
def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
|
|
202
218
|
segment = self.context.content_service.put_bytes(self.context.did, byte_data)
|
|
203
|
-
self.
|
|
204
|
-
|
|
219
|
+
c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
|
|
220
|
+
self.content.append(c)
|
|
221
|
+
self.context.saved_content.append(c)
|
|
205
222
|
return self
|
|
206
223
|
|
|
207
224
|
def set_metadata(self, metadata: dict):
|
|
@@ -220,10 +237,57 @@ class TransformResult(Result):
|
|
|
220
237
|
self.delete_metadata_keys.append(key)
|
|
221
238
|
return self
|
|
222
239
|
|
|
223
|
-
def
|
|
240
|
+
def get_segment_names(self):
|
|
241
|
+
segment_names = {}
|
|
242
|
+
for c in self.content:
|
|
243
|
+
segment_names.update(c.get_segment_names())
|
|
244
|
+
return segment_names
|
|
245
|
+
|
|
246
|
+
def json(self):
|
|
224
247
|
return {
|
|
248
|
+
'did': self.context.did,
|
|
225
249
|
'content': [content.json() for content in self.content],
|
|
226
|
-
'metadata': self.metadata,
|
|
227
250
|
'annotations': self.annotations,
|
|
251
|
+
'metadata': self.metadata,
|
|
228
252
|
'deleteMetadataKeys': self.delete_metadata_keys
|
|
229
253
|
}
|
|
254
|
+
|
|
255
|
+
def response(self):
|
|
256
|
+
return [self.json()]
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class ChildTransformResult(TransformResult):
|
|
260
|
+
delta_file_name: str
|
|
261
|
+
|
|
262
|
+
def __init__(self, context: Context, delta_file_name: str = None):
|
|
263
|
+
super().__init__(context.child_context())
|
|
264
|
+
self.delta_file_name = delta_file_name
|
|
265
|
+
|
|
266
|
+
def json(self):
|
|
267
|
+
j = super().json()
|
|
268
|
+
if self.delta_file_name is not None:
|
|
269
|
+
j['name'] = self.delta_file_name
|
|
270
|
+
return j
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class TransformResults(Result):
|
|
274
|
+
def __init__(self, context: Context):
|
|
275
|
+
super().__init__('transform', 'TRANSFORM', context)
|
|
276
|
+
self.child_results = []
|
|
277
|
+
|
|
278
|
+
def add_result(self, result: ChildTransformResult):
|
|
279
|
+
self.child_results.append(result)
|
|
280
|
+
return self
|
|
281
|
+
|
|
282
|
+
def get_segment_names(self):
|
|
283
|
+
segment_names = {}
|
|
284
|
+
for child_result in self.child_results:
|
|
285
|
+
segment_names.update(child_result.get_segment_names())
|
|
286
|
+
return segment_names
|
|
287
|
+
|
|
288
|
+
def response(self):
|
|
289
|
+
transform_events = []
|
|
290
|
+
for child_result in self.child_results:
|
|
291
|
+
json_dict = child_result.json()
|
|
292
|
+
transform_events.append(json_dict)
|
|
293
|
+
return transform_events
|
deltafi/storage.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# DeltaFi - Data transformation and enrichment platform
|
|
3
3
|
#
|
|
4
|
-
# Copyright 2021-
|
|
4
|
+
# Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -22,6 +22,7 @@ from typing import List, NamedTuple
|
|
|
22
22
|
from urllib.parse import urlparse
|
|
23
23
|
|
|
24
24
|
import minio
|
|
25
|
+
from minio.deleteobjects import DeleteObject
|
|
25
26
|
|
|
26
27
|
BUCKET = 'storage'
|
|
27
28
|
|
|
@@ -86,3 +87,7 @@ class ContentService:
|
|
|
86
87
|
|
|
87
88
|
def put_str(self, did, string_data):
|
|
88
89
|
return self.put_bytes(did, string_data.encode('utf-8'))
|
|
90
|
+
|
|
91
|
+
def delete_all(self, segments: List[Segment]):
|
|
92
|
+
delete_objects = [DeleteObject(seg.id()) for seg in segments]
|
|
93
|
+
return self.minio_client.remove_objects(BUCKET, delete_objects)
|
deltafi/test_kit/__init__.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# DeltaFi - Data transformation and enrichment platform
|
|
3
3
|
#
|
|
4
|
-
# Copyright 2021-
|
|
4
|
+
# Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
deltafi/test_kit/assertions.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# DeltaFi - Data transformation and enrichment platform
|
|
3
3
|
#
|
|
4
|
-
# Copyright 2021-
|
|
4
|
+
# Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -29,10 +29,18 @@ def assert_equal_with_label(e, a, l):
|
|
|
29
29
|
assert e == a, f"{l}. Expected:\n<<{e}>>\nBut was:\n<<{a}>>"
|
|
30
30
|
|
|
31
31
|
|
|
32
|
+
def assert_equal_short(e, a, l):
|
|
33
|
+
assert e == a, f"{l}. E:{e}, A::{a}"
|
|
34
|
+
|
|
35
|
+
|
|
32
36
|
def assert_equal_len(e, a):
|
|
33
37
|
assert len(e) == len(a), f"{len(e)} != {len(a)}"
|
|
34
38
|
|
|
35
39
|
|
|
40
|
+
def assert_equal_len_with_label(e, a, l):
|
|
41
|
+
assert len(e) == len(a), f"{l}. {len(e)} != {len(a)}"
|
|
42
|
+
|
|
43
|
+
|
|
36
44
|
def assert_key_in(k, m):
|
|
37
45
|
assert k in m, f"{k} not found"
|
|
38
46
|
|
|
@@ -45,4 +53,4 @@ def assert_keys_and_values(expected: Dict, actual: Dict):
|
|
|
45
53
|
for key in expected:
|
|
46
54
|
assert_key_in(key, actual)
|
|
47
55
|
if expected[key] != IGNORE_VALUE:
|
|
48
|
-
|
|
56
|
+
assert_equal_short(expected[key], actual[key], f"invalid value for key {key}")
|