deltafi 2.0rc1705024454242__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deltafi might be problematic. Click here for more details.

deltafi/plugin.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -16,40 +16,109 @@
16
16
  # limitations under the License.
17
17
  #
18
18
 
19
+ import importlib
20
+ import inspect
19
21
  import json
20
22
  import os
23
+ import pkgutil
21
24
  import sys
22
25
  import threading
23
26
  import time
24
27
  import traceback
25
28
  from datetime import datetime, timezone, timedelta
29
+ from importlib import metadata
26
30
  from os.path import isdir, isfile, join
27
31
  from pathlib import Path
28
32
  from typing import List
29
- import importlib
30
- import inspect
31
- import pkgutil
32
33
 
33
- from importlib import metadata
34
34
  import requests
35
+ import yaml
36
+ from deltafi.action import Action, Join
35
37
  from deltafi.actioneventqueue import ActionEventQueue
36
38
  from deltafi.domain import Event, ActionExecution
37
- from deltafi.exception import ExpectedContentException, MissingDomainException, MissingEnrichmentException, \
38
- MissingMetadataException
39
+ from deltafi.exception import ExpectedContentException, MissingMetadataException
39
40
  from deltafi.logger import get_logger
40
- from deltafi.result import ErrorResult
41
+ from deltafi.result import ErrorResult, IngressResult, TransformResult, TransformResults
41
42
  from deltafi.storage import ContentService
42
- from deltafi.action import Action
43
43
 
44
44
 
45
45
  def _coordinates():
46
46
  return PluginCoordinates(os.getenv('PROJECT_GROUP'), os.getenv('PROJECT_NAME'), os.getenv('PROJECT_VERSION'))
47
47
 
48
48
 
49
+ def _valid_file(filename: str):
50
+ return isfile(filename) and \
51
+ (filename.endswith(".json")
52
+ or filename.endswith(".yaml")
53
+ or filename.endswith(".yml"))
54
+
55
+
56
+ def _read_valid_files(path: str):
57
+ """
58
+ Read the contents of a directory, and returns a filtered list of files
59
+ that can be read/parsed for plugin usage, and ignores everything else.
60
+ :param path: name of the directory to scan
61
+ :return: list of filtered, parsable files
62
+ """
63
+ files = []
64
+ if isdir(path):
65
+ files = [f for f in os.listdir(path) if _valid_file(join(path, f))]
66
+ return files
67
+
68
+
69
+ def _load_resource(path: str, filename: str):
70
+ """
71
+ Read the content of a JSON or YAML file, and return a Python
72
+ object of its contents, typically as a dict or list.
73
+ To avoid exceptions, use only files returned by _read_valid_files().
74
+ :param path: directory which contains the file to load
75
+ :param filename: name of the file to load
76
+ :return: dict or list of file contents
77
+ """
78
+ with open(join(path, filename)) as file_in:
79
+ if filename.endswith(".json"):
80
+ return json.load(file_in)
81
+ elif filename.endswith(".yaml") or filename.endswith(".yml"):
82
+ results = []
83
+ yaml_docs = yaml.safe_load_all(file_in)
84
+ for doc_iter in yaml_docs:
85
+ # yaml_docs must be iterated
86
+ results.append(doc_iter)
87
+ if len(results) == 1:
88
+ # Single document YAML file
89
+ return results[0]
90
+ else:
91
+ # Multi-document YAML file
92
+ return results
93
+ raise RuntimeError(f"File type not supported: {filename}")
94
+
95
+
96
+ def _load__all_resource(path: str, file_list: List[str]):
97
+ resources = []
98
+ for f in file_list:
99
+ r = _load_resource(path, f)
100
+ if isinstance(r, list):
101
+ resources.extend(r)
102
+ else:
103
+ resources.append(r)
104
+ return resources
105
+
106
+
107
+ def _find_variables_filename(names: List[str]):
108
+ if 'variables.json' in names:
109
+ return 'variables.json'
110
+ elif 'variables.yaml' in names:
111
+ return 'variables.yaml'
112
+ elif 'variables.yml' in names:
113
+ return 'variables.yml'
114
+ else:
115
+ return None
116
+
117
+
49
118
  def _setup_queue(max_connections):
50
- redis_url = os.getenv('REDIS_URL', 'http://deltafi-redis-master:6379')
51
- password = os.getenv('REDIS_PASSWORD')
52
- return ActionEventQueue(redis_url, max_connections, password)
119
+ url = os.getenv('VALKEY_URL', 'http://deltafi-valkey-master:6379')
120
+ password = os.getenv('VALKEY_PASSWORD')
121
+ return ActionEventQueue(url, max_connections, password)
53
122
 
54
123
 
55
124
  def _setup_content_service():
@@ -94,6 +163,8 @@ class Plugin(object):
94
163
  self.queue = None
95
164
  self.actions = []
96
165
  self.core_url = os.getenv('CORE_URL')
166
+ self.image = os.getenv('IMAGE')
167
+ self.image_pull_secret = os.getenv('IMAGE_PULL_SECRET')
97
168
  action_classes = []
98
169
  if actions is not None and len(actions):
99
170
  action_classes.extend(actions)
@@ -160,40 +231,74 @@ class Plugin(object):
160
231
  def action_name(self, action):
161
232
  return f"{self.coordinates.group_id}.{action.__class__.__name__}"
162
233
 
234
+ def _load_action_docs(self, action):
235
+ docs_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'docs')
236
+ if not isdir(docs_path):
237
+ return None
238
+
239
+ action_docs_file = join(docs_path, action.__class__.__name__ + '.md')
240
+ if not isfile(action_docs_file):
241
+ return None
242
+
243
+ return open(action_docs_file).read()
244
+
163
245
  def _action_json(self, action):
164
246
  return {
165
247
  'name': self.action_name(action),
166
248
  'description': action.description,
167
249
  'type': action.action_type.name,
168
- 'schema': action.param_class().model_json_schema()
250
+ 'supportsJoin': isinstance(action, Join),
251
+ 'schema': action.param_class().model_json_schema(),
252
+ 'docsMarkdown': self._load_action_docs(action)
169
253
  }
170
254
 
255
+ @staticmethod
256
+ def load_integration_tests(tests_path: str):
257
+ test_files = _read_valid_files(tests_path)
258
+ return _load__all_resource(tests_path, test_files)
259
+
260
+ @staticmethod
261
+ def load_variables(flows_path: str, flow_files: List[str]):
262
+ variables = []
263
+ variables_filename = _find_variables_filename(flow_files)
264
+ if variables_filename is not None:
265
+ flow_files.remove(variables_filename)
266
+ variables = _load__all_resource(flows_path, [variables_filename])
267
+ return variables
268
+
171
269
  def registration_json(self):
172
270
  flows_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'flows')
271
+ tests_path = str(Path(os.path.dirname(os.path.abspath(sys.argv[0]))) / 'integration')
173
272
 
174
- flow_files = []
175
273
  variables = []
176
- if isdir(flows_path):
177
- flow_files = [f for f in os.listdir(flows_path) if isfile(join(flows_path, f))]
178
- if 'variables.json' in flow_files:
179
- flow_files.remove('variables.json')
180
- variables = json.load(open(join(flows_path, 'variables.json')))
274
+ flow_files = _read_valid_files(flows_path)
275
+ if len(flow_files) == 0:
276
+ self.logger.warning(
277
+ f"Flows directory ({flows_path}) does not exist or contains no valid files. No flows will be installed.")
181
278
  else:
182
- self.logger.warning(f"Flows directory ({flows_path}) does not exist. No flows will be installed.")
279
+ variables = self.load_variables(flows_path, flow_files)
183
280
 
184
- flows = [json.load(open(join(flows_path, f))) for f in flow_files]
281
+ flows = _load__all_resource(flows_path, flow_files)
185
282
  actions = [self._action_json(action) for action in self.actions]
186
283
 
284
+ test_files = self.load_integration_tests(tests_path)
285
+ if len(test_files) == 0:
286
+ self.logger.warning(
287
+ f"tests directory ({tests_path}) does not exist or contains no valid files. No tests will be installed.")
288
+
187
289
  return {
188
- 'pluginCoordinates': self.coordinates.__json__(),
189
- 'displayName': self.display_name,
190
- 'description': self.description,
191
- 'actionKitVersion': metadata.version('deltafi'),
192
- 'dependencies': [],
193
- 'actions': actions,
194
- 'variables': variables,
195
- 'flowPlans': flows
196
- }
290
+ 'pluginCoordinates': self.coordinates.__json__(),
291
+ 'displayName': self.display_name,
292
+ 'description': self.description,
293
+ 'actionKitVersion': metadata.version('deltafi'),
294
+ 'image': self.image,
295
+ 'imagePullSecret': self.image_pull_secret,
296
+ 'dependencies': [],
297
+ 'actions': actions,
298
+ 'variables': variables,
299
+ 'flowPlans': flows,
300
+ 'integrationTests': test_files
301
+ }
197
302
 
198
303
  def _register(self):
199
304
  url = f"{self.core_url}/plugins"
@@ -217,7 +322,8 @@ class Plugin(object):
217
322
  for action in self.actions:
218
323
  threading.Thread(target=self._do_action, args=(action,)).start()
219
324
 
220
- threading.Thread(target=self._heartbeat).start()
325
+ hb_thread = threading.Thread(target=self._heartbeat)
326
+ hb_thread.start()
221
327
 
222
328
  self.logger.info("All threads running")
223
329
 
@@ -225,6 +331,7 @@ class Plugin(object):
225
331
  f.close()
226
332
 
227
333
  self.logger.info("Application initialization complete")
334
+ hb_thread.join()
228
335
 
229
336
  def _heartbeat(self):
230
337
  long_running_actions = set()
@@ -254,6 +361,22 @@ class Plugin(object):
254
361
  finally:
255
362
  time.sleep(10)
256
363
 
364
+ @staticmethod
365
+ def to_response(event, start_time, stop_time, result):
366
+ response = {
367
+ 'did': event.context.did,
368
+ 'flowName': event.context.flow_name,
369
+ 'flowId': event.context.flow_id,
370
+ 'actionName': event.context.action_name,
371
+ 'start': start_time,
372
+ 'stop': stop_time,
373
+ 'type': result.result_type,
374
+ 'metrics': [metric.json() for metric in result.metrics]
375
+ }
376
+ if result.result_key is not None:
377
+ response[result.result_key] = result.response()
378
+ return response
379
+
257
380
  def _do_action(self, action):
258
381
  action_logger = get_logger(self.action_name(action))
259
382
 
@@ -261,7 +384,7 @@ class Plugin(object):
261
384
  while True:
262
385
  try:
263
386
  event_string = self.queue.take(self.action_name(action))
264
- event = Event.create(json.loads(event_string), self.hostname, self.content_service, action_logger)
387
+ event = Event.create(json.loads(event_string), self.content_service, action_logger)
265
388
  start_time = time.time()
266
389
  action_logger.debug(f"Processing event for did {event.context.did}")
267
390
 
@@ -275,14 +398,6 @@ class Plugin(object):
275
398
  f"Action attempted to look up element {e.index + 1} (index {e.index}) from "
276
399
  f"content list of size {e.size}",
277
400
  f"{str(e)}\n{traceback.format_exc()}")
278
- except MissingDomainException as e:
279
- result = ErrorResult(event.context,
280
- f"Action attempted to access domain {e.name}, which does not exist",
281
- f"{str(e)}\n{traceback.format_exc()}")
282
- except MissingEnrichmentException as e:
283
- result = ErrorResult(event.context,
284
- f"Action attempted to access enrichment {e.name}, which does not exist",
285
- f"{str(e)}\n{traceback.format_exc()}")
286
401
  except MissingMetadataException as e:
287
402
  result = ErrorResult(event.context,
288
403
  f"Missing metadata with key {e.key}",
@@ -293,16 +408,10 @@ class Plugin(object):
293
408
 
294
409
  action.action_execution = None
295
410
 
296
- response = {
297
- 'did': event.context.did,
298
- 'action': event.context.action_flow + "." + event.context.action_name,
299
- 'start': start_time,
300
- 'stop': time.time(),
301
- 'type': result.result_type,
302
- 'metrics': [metric.json() for metric in result.metrics]
303
- }
304
- if result.result_key is not None:
305
- response[result.result_key] = result.response()
411
+ response = Plugin.to_response(
412
+ event, start_time, time.time(), result)
413
+
414
+ Plugin.orphaned_content_check(action_logger, event.context, result, response)
306
415
 
307
416
  topic = 'dgs'
308
417
  if event.return_address:
@@ -311,3 +420,42 @@ class Plugin(object):
311
420
  except BaseException as e:
312
421
  action_logger.error(f"Unexpected {type(e)} error: {str(e)}\n{traceback.format_exc()}")
313
422
  time.sleep(1)
423
+
424
+ @staticmethod
425
+ def orphaned_content_check(logger, context, result, response):
426
+ if len(context.saved_content) > 0:
427
+ to_delete = Plugin.find_unused_content(context.saved_content, result)
428
+ if len(to_delete) > 0:
429
+ errors = context.content_service.delete_all(to_delete)
430
+ for e in errors:
431
+ logger.error(f"Unable to delete object(s), {e}")
432
+ logger.warning(
433
+ f"Deleted {len(to_delete)} unused content entries for did {context.did} due to a {response['type']} event by {response['actionName']}")
434
+
435
+ @staticmethod
436
+ def find_unused_content(saved_content, result):
437
+ segments_in_use = Plugin.used_segment_names(result)
438
+ saved_segments = Plugin.get_segment_names(saved_content)
439
+ to_delete = []
440
+ for key, value in saved_segments.items():
441
+ if key not in segments_in_use:
442
+ to_delete.append(value)
443
+ return to_delete
444
+
445
+ @staticmethod
446
+ def used_segment_names(result):
447
+ segment_names = {}
448
+ if isinstance(result, TransformResult):
449
+ segment_names.update(result.get_segment_names())
450
+ elif isinstance(result, TransformResults):
451
+ segment_names.update(result.get_segment_names())
452
+ elif isinstance(result, IngressResult):
453
+ segment_names.update(result.get_segment_names())
454
+ return segment_names
455
+
456
+ @staticmethod
457
+ def get_segment_names(content_list):
458
+ segment_names = {}
459
+ for content in content_list:
460
+ segment_names.update(content.get_segment_names())
461
+ return segment_names
deltafi/result.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,17 +17,13 @@
17
17
  #
18
18
 
19
19
  import abc
20
- from enum import Enum
21
20
  import uuid
22
- from typing import Dict, List
21
+ from enum import Enum
22
+ from typing import NamedTuple
23
23
 
24
24
  from deltafi.domain import Content, Context
25
25
  from deltafi.metric import Metric
26
26
 
27
- ENDPOINT_TAG = "endpoint"
28
- FILES_OUT = "files_out"
29
- BYTES_OUT = "bytes_out"
30
-
31
27
 
32
28
  class Result:
33
29
  __metaclass__ = abc.ABCMeta
@@ -44,13 +40,12 @@ class Result:
44
40
 
45
41
  def add_metric(self, metric: Metric):
46
42
  self.metrics.append(metric)
43
+ return self
47
44
 
48
45
 
49
46
  class EgressResult(Result):
50
- def __init__(self, context: Context, destination: str, bytes_egressed: int):
47
+ def __init__(self, context: Context):
51
48
  super().__init__(None, 'EGRESS', context)
52
- self.add_metric(Metric(FILES_OUT, 1, {ENDPOINT_TAG: destination}))
53
- self.add_metric(Metric(BYTES_OUT, bytes_egressed, {ENDPOINT_TAG: destination}))
54
49
 
55
50
  def response(self):
56
51
  return None
@@ -76,7 +71,7 @@ class ErrorResult(Result):
76
71
 
77
72
 
78
73
  class FilterResult(Result):
79
- def __init__(self, context: Context, filtered_cause: str, filtered_context: str=None):
74
+ def __init__(self, context: Context, filtered_cause: str, filtered_context: str = None):
80
75
  super().__init__('filter', 'FILTER', context)
81
76
  self.filtered_cause = filtered_cause
82
77
  self.filtered_context = filtered_context
@@ -95,12 +90,13 @@ class FilterResult(Result):
95
90
 
96
91
 
97
92
  class IngressResultItem:
98
- def __init__(self, context: Context, filename: str):
93
+ def __init__(self, context: Context, delta_file_name: str):
99
94
  self.context = context
100
- self.filename = filename
101
95
  self._did = str(uuid.uuid4())
102
96
  self.content = []
103
97
  self.metadata = {}
98
+ self.annotations = {}
99
+ self.delta_file_name = delta_file_name
104
100
 
105
101
  @property
106
102
  def did(self):
@@ -117,15 +113,17 @@ class IngressResultItem:
117
113
  return self
118
114
 
119
115
  def save_string_content(self, string_data: str, name: str, media_type: str):
120
- segment = self.context.content_service.put_str(self.context.did, string_data)
121
- self.content.append(
122
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
116
+ segment = self.context.content_service.put_str(self._did, string_data)
117
+ c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
118
+ self.content.append(c)
119
+ self.context.saved_content.append(c)
123
120
  return self
124
121
 
125
122
  def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
126
- segment = self.context.content_service.put_bytes(self.context.did, byte_data)
127
- self.content.append(
128
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
123
+ segment = self.context.content_service.put_bytes(self._did, byte_data)
124
+ c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
125
+ self.content.append(c)
126
+ self.context.saved_content.append(c)
129
127
  return self
130
128
 
131
129
  def set_metadata(self, metadata: dict):
@@ -136,12 +134,23 @@ class IngressResultItem:
136
134
  self.metadata[key] = value
137
135
  return self
138
136
 
137
+ def get_segment_names(self):
138
+ segment_names = {}
139
+ for c in self.content:
140
+ segment_names.update(c.get_segment_names())
141
+ return segment_names
142
+
143
+ def annotate(self, key: str, value: str):
144
+ self.annotations[key] = value
145
+ return self
146
+
139
147
  def response(self):
140
148
  return {
141
149
  'did': self._did,
142
- 'filename': self.filename,
150
+ 'deltaFileName': self.delta_file_name,
143
151
  'metadata': self.metadata,
144
- 'content': [content.json() for content in self.content]
152
+ 'content': [content.json() for content in self.content],
153
+ 'annotations': self.annotations
145
154
  }
146
155
 
147
156
 
@@ -155,22 +164,28 @@ class IngressResult(Result):
155
164
  def __init__(self, context: Context):
156
165
  super().__init__('ingress', 'INGRESS', context)
157
166
  self.memo = None
158
- self.execute_immediate = False
159
167
  self.ingress_result_items = []
168
+ self.execute_immediate = False
160
169
  self.status = IngressStatusEnum.HEALTHY
161
- self.statusMessage = None
170
+ self.status_message = None
162
171
 
163
172
  def add_item(self, ingress_result_item: IngressResultItem):
164
173
  self.ingress_result_items.append(ingress_result_item)
165
174
  return self
166
175
 
176
+ def get_segment_names(self):
177
+ segment_names = {}
178
+ for ingress_item in self.ingress_result_items:
179
+ segment_names.update(ingress_item.get_segment_names())
180
+ return segment_names
181
+
167
182
  def response(self):
168
183
  return {
169
184
  'memo': self.memo,
170
185
  'executeImmediate': self.execute_immediate,
171
186
  'ingressItems': [ingress_result_item.response() for ingress_result_item in self.ingress_result_items],
172
187
  'status': self.status.value,
173
- 'statusMessage': self.statusMessage
188
+ 'statusMessage': self.status_message
174
189
  }
175
190
 
176
191
 
@@ -178,8 +193,8 @@ class TransformResult(Result):
178
193
  def __init__(self, context: Context):
179
194
  super().__init__('transform', 'TRANSFORM', context)
180
195
  self.content = []
181
- self.metadata = {}
182
196
  self.annotations = {}
197
+ self.metadata = {}
183
198
  self.delete_metadata_keys = []
184
199
 
185
200
  # content can be a single Content or a List[Content]
@@ -194,14 +209,16 @@ class TransformResult(Result):
194
209
 
195
210
  def save_string_content(self, string_data: str, name: str, media_type: str):
196
211
  segment = self.context.content_service.put_str(self.context.did, string_data)
197
- self.content.append(
198
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
212
+ c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
213
+ self.content.append(c)
214
+ self.context.saved_content.append(c)
199
215
  return self
200
216
 
201
217
  def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
202
218
  segment = self.context.content_service.put_bytes(self.context.did, byte_data)
203
- self.content.append(
204
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
219
+ c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
220
+ self.content.append(c)
221
+ self.context.saved_content.append(c)
205
222
  return self
206
223
 
207
224
  def set_metadata(self, metadata: dict):
@@ -220,10 +237,57 @@ class TransformResult(Result):
220
237
  self.delete_metadata_keys.append(key)
221
238
  return self
222
239
 
223
- def response(self):
240
+ def get_segment_names(self):
241
+ segment_names = {}
242
+ for c in self.content:
243
+ segment_names.update(c.get_segment_names())
244
+ return segment_names
245
+
246
+ def json(self):
224
247
  return {
248
+ 'did': self.context.did,
225
249
  'content': [content.json() for content in self.content],
226
- 'metadata': self.metadata,
227
250
  'annotations': self.annotations,
251
+ 'metadata': self.metadata,
228
252
  'deleteMetadataKeys': self.delete_metadata_keys
229
253
  }
254
+
255
+ def response(self):
256
+ return [self.json()]
257
+
258
+
259
+ class ChildTransformResult(TransformResult):
260
+ delta_file_name: str
261
+
262
+ def __init__(self, context: Context, delta_file_name: str = None):
263
+ super().__init__(context.child_context())
264
+ self.delta_file_name = delta_file_name
265
+
266
+ def json(self):
267
+ j = super().json()
268
+ if self.delta_file_name is not None:
269
+ j['name'] = self.delta_file_name
270
+ return j
271
+
272
+
273
+ class TransformResults(Result):
274
+ def __init__(self, context: Context):
275
+ super().__init__('transform', 'TRANSFORM', context)
276
+ self.child_results = []
277
+
278
+ def add_result(self, result: ChildTransformResult):
279
+ self.child_results.append(result)
280
+ return self
281
+
282
+ def get_segment_names(self):
283
+ segment_names = {}
284
+ for child_result in self.child_results:
285
+ segment_names.update(child_result.get_segment_names())
286
+ return segment_names
287
+
288
+ def response(self):
289
+ transform_events = []
290
+ for child_result in self.child_results:
291
+ json_dict = child_result.json()
292
+ transform_events.append(json_dict)
293
+ return transform_events
deltafi/storage.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@ from typing import List, NamedTuple
22
22
  from urllib.parse import urlparse
23
23
 
24
24
  import minio
25
+ from minio.deleteobjects import DeleteObject
25
26
 
26
27
  BUCKET = 'storage'
27
28
 
@@ -86,3 +87,7 @@ class ContentService:
86
87
 
87
88
  def put_str(self, did, string_data):
88
89
  return self.put_bytes(did, string_data.encode('utf-8'))
90
+
91
+ def delete_all(self, segments: List[Segment]):
92
+ delete_objects = [DeleteObject(seg.id()) for seg in segments]
93
+ return self.minio_client.remove_objects(BUCKET, delete_objects)
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -29,10 +29,18 @@ def assert_equal_with_label(e, a, l):
29
29
  assert e == a, f"{l}. Expected:\n<<{e}>>\nBut was:\n<<{a}>>"
30
30
 
31
31
 
32
+ def assert_equal_short(e, a, l):
33
+ assert e == a, f"{l}. E:{e}, A::{a}"
34
+
35
+
32
36
  def assert_equal_len(e, a):
33
37
  assert len(e) == len(a), f"{len(e)} != {len(a)}"
34
38
 
35
39
 
40
+ def assert_equal_len_with_label(e, a, l):
41
+ assert len(e) == len(a), f"{l}. {len(e)} != {len(a)}"
42
+
43
+
36
44
  def assert_key_in(k, m):
37
45
  assert k in m, f"{k} not found"
38
46
 
@@ -45,4 +53,4 @@ def assert_keys_and_values(expected: Dict, actual: Dict):
45
53
  for key in expected:
46
54
  assert_key_in(key, actual)
47
55
  if expected[key] != IGNORE_VALUE:
48
- assert_equal(expected[key], actual[key])
56
+ assert_equal_short(expected[key], actual[key], f"invalid value for key {key}")