deltafi 1.2.20__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deltafi might be problematic. Click here for more details.

deltafi/result.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,17 +17,13 @@
17
17
  #
18
18
 
19
19
  import abc
20
- from enum import Enum
21
20
  import uuid
22
- from typing import Dict, List
21
+ from enum import Enum
22
+ from typing import NamedTuple
23
23
 
24
24
  from deltafi.domain import Content, Context
25
25
  from deltafi.metric import Metric
26
26
 
27
- ENDPOINT_TAG = "endpoint"
28
- FILES_OUT = "files_out"
29
- BYTES_OUT = "bytes_out"
30
-
31
27
 
32
28
  class Result:
33
29
  __metaclass__ = abc.ABCMeta
@@ -44,58 +40,17 @@ class Result:
44
40
 
45
41
  def add_metric(self, metric: Metric):
46
42
  self.metrics.append(metric)
47
-
48
-
49
- class DomainResult(Result):
50
- def __init__(self, context: Context):
51
- super().__init__('domain', 'DOMAIN', context)
52
- self.annotations = {}
53
-
54
- def annotate(self, key: str, value: str):
55
- self.annotations[key] = value
56
43
  return self
57
44
 
58
- def response(self):
59
- return {
60
- 'annotations': self.annotations
61
- }
62
-
63
45
 
64
46
  class EgressResult(Result):
65
- def __init__(self, context: Context, destination: str, bytes_egressed: int):
47
+ def __init__(self, context: Context):
66
48
  super().__init__(None, 'EGRESS', context)
67
- self.add_metric(Metric(FILES_OUT, 1, {ENDPOINT_TAG: destination}))
68
- self.add_metric(Metric(BYTES_OUT, bytes_egressed, {ENDPOINT_TAG: destination}))
69
49
 
70
50
  def response(self):
71
51
  return None
72
52
 
73
53
 
74
- class EnrichResult(Result):
75
- def __init__(self, context: Context):
76
- super().__init__('enrich', 'ENRICH', context)
77
- self.enrichments = []
78
- self.annotations = {}
79
-
80
- def enrich(self, name: str, value: str, media_type: str):
81
- self.enrichments.append({
82
- 'name': name,
83
- 'value': value,
84
- 'mediaType': media_type
85
- })
86
- return self
87
-
88
- def annotate(self, key: str, value: str):
89
- self.annotations[key] = value
90
- return self
91
-
92
- def response(self):
93
- return {
94
- 'enrichments': self.enrichments,
95
- 'annotations': self.annotations
96
- }
97
-
98
-
99
54
  class ErrorResult(Result):
100
55
  def __init__(self, context: Context, error_cause: str, error_context: str):
101
56
  super().__init__('error', 'ERROR', context)
@@ -116,7 +71,7 @@ class ErrorResult(Result):
116
71
 
117
72
 
118
73
  class FilterResult(Result):
119
- def __init__(self, context: Context, filtered_cause: str, filtered_context: str=None):
74
+ def __init__(self, context: Context, filtered_cause: str, filtered_context: str = None):
120
75
  super().__init__('filter', 'FILTER', context)
121
76
  self.filtered_cause = filtered_cause
122
77
  self.filtered_context = filtered_context
@@ -134,87 +89,13 @@ class FilterResult(Result):
134
89
  }
135
90
 
136
91
 
137
- class FormatResult(Result):
138
- def __init__(self, context: Context):
139
- super().__init__('format', 'FORMAT', context)
140
- self.content = None
141
- self.delete_metadata_keys = []
142
- self.metadata = {}
143
-
144
- def set_metadata(self, metadata: dict):
145
- self.metadata = metadata
146
- return self
147
-
148
- def add_metadata(self, key: str, value: str):
149
- self.metadata[key] = value
150
- return self
151
-
152
- def delete_metadata_key(self, key: str):
153
- self.delete_metadata_keys.append(key)
154
- return self
155
-
156
- def set_content(self, content: Content):
157
- self.content = content
158
- return self
159
-
160
- def save_string_content(self, string_data: str, name: str, media_type: str):
161
- segment = self.context.content_service.put_str(self.context.did, string_data)
162
- self.content = Content(name=name, segments=[segment], media_type=media_type,
163
- content_service=self.context.content_service)
164
- return self
165
-
166
- def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
167
- segment = self.context.content_service.put_bytes(self.context.did, byte_data)
168
- self.content = Content(name=name, segments=[segment], media_type=media_type,
169
- content_service=self.context.content_service)
170
- return self
171
-
172
- def response(self):
173
- return {
174
- 'content': self.content.json(),
175
- 'metadata': self.metadata,
176
- 'deleteMetadataKeys': self.delete_metadata_keys
177
- }
178
-
179
-
180
- class ChildFormatResult:
181
- def __init__(self, format_result: FormatResult = None):
182
- self._did = str(uuid.uuid4())
183
- self.format_result = format_result
184
-
185
- @property
186
- def did(self):
187
- return self._did
188
-
189
- def response(self):
190
- res = self.format_result.response()
191
- res["did"] = self._did
192
- return res
193
-
194
-
195
- class FormatManyResult(Result):
196
- def __init__(self, context: Context):
197
- super().__init__('formatMany', 'FORMAT_MANY', context)
198
- self.format_results = []
199
-
200
- def add_format_result(self, format_result):
201
- if isinstance(format_result, ChildFormatResult):
202
- self.format_results.append(format_result)
203
- else:
204
- self.format_results.append(ChildFormatResult(format_result))
205
- return self
206
-
207
- def response(self):
208
- return [format_result.response() for format_result in self.format_results]
209
-
210
-
211
92
  class IngressResultItem:
212
- def __init__(self, context: Context, filename: str):
93
+ def __init__(self, context: Context, delta_file_name: str):
213
94
  self.context = context
214
- self.filename = filename
215
95
  self._did = str(uuid.uuid4())
216
96
  self.content = []
217
97
  self.metadata = {}
98
+ self.delta_file_name = delta_file_name
218
99
 
219
100
  @property
220
101
  def did(self):
@@ -231,15 +112,17 @@ class IngressResultItem:
231
112
  return self
232
113
 
233
114
  def save_string_content(self, string_data: str, name: str, media_type: str):
234
- segment = self.context.content_service.put_str(self.context.did, string_data)
235
- self.content.append(
236
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
115
+ segment = self.context.content_service.put_str(self._did, string_data)
116
+ c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
117
+ self.content.append(c)
118
+ self.context.saved_content.append(c)
237
119
  return self
238
120
 
239
121
  def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
240
- segment = self.context.content_service.put_bytes(self.context.did, byte_data)
241
- self.content.append(
242
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
122
+ segment = self.context.content_service.put_bytes(self._did, byte_data)
123
+ c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
124
+ self.content.append(c)
125
+ self.context.saved_content.append(c)
243
126
  return self
244
127
 
245
128
  def set_metadata(self, metadata: dict):
@@ -250,10 +133,16 @@ class IngressResultItem:
250
133
  self.metadata[key] = value
251
134
  return self
252
135
 
136
+ def get_segment_names(self):
137
+ segment_names = {}
138
+ for c in self.content:
139
+ segment_names.update(c.get_segment_names())
140
+ return segment_names
141
+
253
142
  def response(self):
254
143
  return {
255
144
  'did': self._did,
256
- 'filename': self.filename,
145
+ 'deltaFileName': self.delta_file_name,
257
146
  'metadata': self.metadata,
258
147
  'content': [content.json() for content in self.content]
259
148
  }
@@ -269,32 +158,37 @@ class IngressResult(Result):
269
158
  def __init__(self, context: Context):
270
159
  super().__init__('ingress', 'INGRESS', context)
271
160
  self.memo = None
272
- self.execute_immediate = False
273
161
  self.ingress_result_items = []
162
+ self.execute_immediate = False
274
163
  self.status = IngressStatusEnum.HEALTHY
275
- self.statusMessage = None
164
+ self.status_message = None
276
165
 
277
166
  def add_item(self, ingress_result_item: IngressResultItem):
278
167
  self.ingress_result_items.append(ingress_result_item)
279
168
  return self
280
169
 
170
+ def get_segment_names(self):
171
+ segment_names = {}
172
+ for ingress_item in self.ingress_result_items:
173
+ segment_names.update(ingress_item.get_segment_names())
174
+ return segment_names
175
+
281
176
  def response(self):
282
177
  return {
283
178
  'memo': self.memo,
284
179
  'executeImmediate': self.execute_immediate,
285
180
  'ingressItems': [ingress_result_item.response() for ingress_result_item in self.ingress_result_items],
286
181
  'status': self.status.value,
287
- 'statusMessage': self.statusMessage
182
+ 'statusMessage': self.status_message
288
183
  }
289
184
 
290
185
 
291
- class LoadResult(Result):
186
+ class TransformResult(Result):
292
187
  def __init__(self, context: Context):
293
- super().__init__('load', 'LOAD', context)
188
+ super().__init__('transform', 'TRANSFORM', context)
294
189
  self.content = []
295
- self.metadata = {}
296
- self.domains = []
297
190
  self.annotations = {}
191
+ self.metadata = {}
298
192
  self.delete_metadata_keys = []
299
193
 
300
194
  # content can be a single Content or a List[Content]
@@ -309,14 +203,16 @@ class LoadResult(Result):
309
203
 
310
204
  def save_string_content(self, string_data: str, name: str, media_type: str):
311
205
  segment = self.context.content_service.put_str(self.context.did, string_data)
312
- self.content.append(
313
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
206
+ c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
207
+ self.content.append(c)
208
+ self.context.saved_content.append(c)
314
209
  return self
315
210
 
316
211
  def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
317
212
  segment = self.context.content_service.put_bytes(self.context.did, byte_data)
318
- self.content.append(
319
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
213
+ c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
214
+ self.content.append(c)
215
+ self.context.saved_content.append(c)
320
216
  return self
321
217
 
322
218
  def set_metadata(self, metadata: dict):
@@ -327,13 +223,6 @@ class LoadResult(Result):
327
223
  self.metadata[key] = value
328
224
  return self
329
225
 
330
- def add_domain(self, name: str, value: str, media_type: str):
331
- self.domains.append({
332
- 'name': name,
333
- 'value': value,
334
- 'mediaType': media_type})
335
- return self
336
-
337
226
  def annotate(self, key: str, value: str):
338
227
  self.annotations[key] = value
339
228
  return self
@@ -342,133 +231,57 @@ class LoadResult(Result):
342
231
  self.delete_metadata_keys.append(key)
343
232
  return self
344
233
 
345
- def response(self):
234
+ def get_segment_names(self):
235
+ segment_names = {}
236
+ for c in self.content:
237
+ segment_names.update(c.get_segment_names())
238
+ return segment_names
239
+
240
+ def json(self):
346
241
  return {
347
- 'domains': self.domains,
242
+ 'did': self.context.did,
348
243
  'content': [content.json() for content in self.content],
349
- 'metadata': self.metadata,
350
244
  'annotations': self.annotations,
245
+ 'metadata': self.metadata,
351
246
  'deleteMetadataKeys': self.delete_metadata_keys
352
247
  }
353
248
 
354
-
355
- class ChildLoadResult:
356
- def __init__(self, load_result: LoadResult = None):
357
- self._did = str(uuid.uuid4())
358
- self.load_result = load_result
359
-
360
- @property
361
- def did(self):
362
- return self._did
363
-
364
- def response(self):
365
- res = self.load_result.response()
366
- res["did"] = self._did
367
- return res
368
-
369
-
370
- class LoadManyResult(Result):
371
- def __init__(self, context: Context):
372
- super().__init__('loadMany', 'LOAD_MANY', context)
373
- self.load_results = []
374
-
375
- def add_load_result(self, load_result):
376
- if isinstance(load_result, ChildLoadResult):
377
- self.load_results.append(load_result)
378
- else:
379
- self.load_results.append(ChildLoadResult(load_result))
380
- return self
381
-
382
249
  def response(self):
383
- return [load_result.response() for load_result in self.load_results]
384
-
385
-
386
- class ReinjectResult(Result):
387
- class ReinjectChild:
388
- def __init__(self, filename: str, flow: str, content: List[Content], metadata: Dict[str, str]):
389
- self.filename = filename
390
- self.flow = flow
391
- self.content = content
392
- self.metadata = metadata
250
+ return [self.json()]
393
251
 
394
- def json(self):
395
- return {
396
- 'filename': self.filename,
397
- 'flow': self.flow,
398
- 'metadata': self.metadata,
399
- 'content': [content.json() for content in self.content]
400
- }
401
252
 
402
- def __init__(self, context: Context):
403
- super().__init__('reinject', 'REINJECT', context)
404
- self.children = []
253
+ class ChildTransformResult(TransformResult):
254
+ delta_file_name: str
405
255
 
406
- def add_child(self, filename: str, flow: str, content: List[Content], metadata: Dict[str, str]):
407
- child = ReinjectResult.ReinjectChild(filename, flow, content, metadata)
408
- self.children.append(child)
256
+ def __init__(self, context: Context, delta_file_name: str = None):
257
+ super().__init__(context.child_context())
258
+ self.delta_file_name = delta_file_name
409
259
 
410
- def response(self):
411
- return [child.json() for child in self.children]
260
+ def json(self):
261
+ j = super().json()
262
+ if self.delta_file_name is not None:
263
+ j['name'] = self.delta_file_name
264
+ return j
412
265
 
413
266
 
414
- class TransformResult(Result):
267
+ class TransformResults(Result):
415
268
  def __init__(self, context: Context):
416
269
  super().__init__('transform', 'TRANSFORM', context)
417
- self.content = []
418
- self.metadata = {}
419
- self.annotations = {}
420
- self.delete_metadata_keys = []
421
-
422
- # content can be a single Content or a List[Content]
423
- def add_content(self, content):
424
- if content:
425
- if type(content) == list:
426
- self.content.extend(content)
427
- else:
428
- self.content.append(content)
429
-
430
- return self
431
-
432
- def save_string_content(self, string_data: str, name: str, media_type: str):
433
- segment = self.context.content_service.put_str(self.context.did, string_data)
434
- self.content.append(
435
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
436
- return self
437
-
438
- def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
439
- segment = self.context.content_service.put_bytes(self.context.did, byte_data)
440
- self.content.append(
441
- Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service))
442
- return self
443
-
444
- def set_metadata(self, metadata: dict):
445
- self.metadata = metadata
446
- return self
447
-
448
- def add_metadata(self, key: str, value: str):
449
- self.metadata[key] = value
450
- return self
451
-
452
- def annotate(self, key: str, value: str):
453
- self.annotations[key] = value
454
- return self
270
+ self.child_results = []
455
271
 
456
- def delete_metadata_key(self, key: str):
457
- self.delete_metadata_keys.append(key)
272
+ def add_result(self, result: ChildTransformResult):
273
+ self.child_results.append(result)
458
274
  return self
459
275
 
460
- def response(self):
461
- return {
462
- 'content': [content.json() for content in self.content],
463
- 'metadata': self.metadata,
464
- 'annotations': self.annotations,
465
- 'deleteMetadataKeys': self.delete_metadata_keys
466
- }
467
-
468
-
469
- class ValidateResult(Result):
470
- def __init__(self, context: Context):
471
- super().__init__(None, 'VALIDATE', context)
276
+ def get_segment_names(self):
277
+ segment_names = {}
278
+ for child_result in self.child_results:
279
+ segment_names.update(child_result.get_segment_names())
280
+ return segment_names
472
281
 
473
282
  def response(self):
474
- return None
283
+ transform_events = []
284
+ for child_result in self.child_results:
285
+ json_dict = child_result.json()
286
+ transform_events.append(json_dict)
287
+ return transform_events
deltafi/storage.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@ from typing import List, NamedTuple
22
22
  from urllib.parse import urlparse
23
23
 
24
24
  import minio
25
+ from minio.deleteobjects import DeleteObject
25
26
 
26
27
  BUCKET = 'storage'
27
28
 
@@ -86,3 +87,7 @@ class ContentService:
86
87
 
87
88
  def put_str(self, did, string_data):
88
89
  return self.put_bytes(did, string_data.encode('utf-8'))
90
+
91
+ def delete_all(self, segments: List[Segment]):
92
+ delete_objects = [DeleteObject(seg.id()) for seg in segments]
93
+ return self.minio_client.remove_objects(BUCKET, delete_objects)
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2024 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.