deltafi 0.109.0__py3-none-any.whl → 2.40.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltafi/__init__.py +3 -1
- deltafi/action.py +262 -102
- deltafi/actioneventqueue.py +29 -4
- deltafi/actiontype.py +7 -11
- deltafi/domain.py +241 -88
- deltafi/exception.py +1 -11
- deltafi/genericmodel.py +38 -0
- deltafi/input.py +6 -163
- deltafi/logger.py +16 -4
- deltafi/lookuptable.py +292 -0
- deltafi/metric.py +2 -2
- deltafi/plugin.py +374 -87
- deltafi/result.py +174 -172
- deltafi/resultmessage.py +56 -0
- deltafi/storage.py +20 -90
- deltafi/test_kit/__init__.py +19 -0
- deltafi/test_kit/assertions.py +56 -0
- deltafi/test_kit/compare_helpers.py +293 -0
- deltafi/test_kit/constants.py +23 -0
- deltafi/test_kit/egress.py +54 -0
- deltafi/test_kit/framework.py +390 -0
- deltafi/test_kit/timed_ingress.py +104 -0
- deltafi/test_kit/transform.py +103 -0
- deltafi/types.py +31 -0
- deltafi-2.40.0.dist-info/METADATA +82 -0
- deltafi-2.40.0.dist-info/RECORD +27 -0
- {deltafi-0.109.0.dist-info → deltafi-2.40.0.dist-info}/WHEEL +1 -1
- deltafi-0.109.0.dist-info/METADATA +0 -41
- deltafi-0.109.0.dist-info/RECORD +0 -15
deltafi/result.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# DeltaFi - Data transformation and enrichment platform
|
|
3
3
|
#
|
|
4
|
-
# Copyright 2021-
|
|
4
|
+
# Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -17,15 +17,12 @@
|
|
|
17
17
|
#
|
|
18
18
|
|
|
19
19
|
import abc
|
|
20
|
-
from typing import Dict, List
|
|
21
20
|
import uuid
|
|
21
|
+
from enum import Enum
|
|
22
22
|
|
|
23
|
-
from deltafi.domain import Content, Context
|
|
23
|
+
from deltafi.domain import Content, Context
|
|
24
24
|
from deltafi.metric import Metric
|
|
25
|
-
|
|
26
|
-
ENDPOINT_TAG = "endpoint"
|
|
27
|
-
FILES_OUT = "files_out"
|
|
28
|
-
BYTES_OUT = "bytes_out"
|
|
25
|
+
from deltafi.resultmessage import LogMessage
|
|
29
26
|
|
|
30
27
|
|
|
31
28
|
class Result:
|
|
@@ -34,6 +31,7 @@ class Result:
|
|
|
34
31
|
def __init__(self, result_key, result_type, context):
|
|
35
32
|
self.result_key = result_key
|
|
36
33
|
self.result_type = result_type
|
|
34
|
+
self.messages = []
|
|
37
35
|
self.metrics = []
|
|
38
36
|
self.context = context
|
|
39
37
|
|
|
@@ -43,133 +41,80 @@ class Result:
|
|
|
43
41
|
|
|
44
42
|
def add_metric(self, metric: Metric):
|
|
45
43
|
self.metrics.append(metric)
|
|
44
|
+
return self
|
|
46
45
|
|
|
46
|
+
def log_info(self, message: str):
|
|
47
|
+
self.messages.append(LogMessage.info(self.context.action_name, message))
|
|
48
|
+
return self
|
|
47
49
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
super().__init__('domain', 'DOMAIN', context)
|
|
51
|
-
self.indexed_metadata = {}
|
|
52
|
-
|
|
53
|
-
def index_metadata(self, key: str, value: str):
|
|
54
|
-
self.indexed_metadata[key] = value
|
|
50
|
+
def log_warning(self, message: str):
|
|
51
|
+
self.messages.append(LogMessage.warning(self.context.action_name, message))
|
|
55
52
|
return self
|
|
56
53
|
|
|
57
|
-
def
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
}
|
|
54
|
+
def log_error(self, message: str):
|
|
55
|
+
self.messages.append(LogMessage.error(self.context.action_name, message))
|
|
56
|
+
return self
|
|
61
57
|
|
|
62
58
|
|
|
63
59
|
class EgressResult(Result):
|
|
64
|
-
def __init__(self, context: Context
|
|
60
|
+
def __init__(self, context: Context):
|
|
65
61
|
super().__init__(None, 'EGRESS', context)
|
|
66
|
-
self.add_metric(Metric(FILES_OUT, 1, {ENDPOINT_TAG: destination}))
|
|
67
|
-
self.add_metric(Metric(BYTES_OUT, bytes_egressed, {ENDPOINT_TAG: destination}))
|
|
68
62
|
|
|
69
63
|
def response(self):
|
|
70
64
|
return None
|
|
71
65
|
|
|
72
66
|
|
|
73
|
-
class EnrichResult(Result):
|
|
74
|
-
def __init__(self, context: Context):
|
|
75
|
-
super().__init__('enrich', 'ENRICH', context)
|
|
76
|
-
self.enrichments = []
|
|
77
|
-
self.indexed_metadata = {}
|
|
78
|
-
|
|
79
|
-
def enrich(self, name: str, value: str, media_type: str):
|
|
80
|
-
self.enrichments.append({
|
|
81
|
-
'name': name,
|
|
82
|
-
'value': value,
|
|
83
|
-
'mediaType': media_type
|
|
84
|
-
})
|
|
85
|
-
return self
|
|
86
|
-
|
|
87
|
-
def index_metadata(self, key: str, value: str):
|
|
88
|
-
self.indexed_metadata[key] = value
|
|
89
|
-
return self
|
|
90
|
-
|
|
91
|
-
def response(self):
|
|
92
|
-
return {
|
|
93
|
-
'enrichments': self.enrichments,
|
|
94
|
-
'indexedMetadata': self.indexed_metadata
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
|
|
98
67
|
class ErrorResult(Result):
|
|
99
68
|
def __init__(self, context: Context, error_cause: str, error_context: str):
|
|
100
69
|
super().__init__('error', 'ERROR', context)
|
|
101
70
|
self.error_cause = error_cause
|
|
102
71
|
self.error_context = error_context
|
|
72
|
+
self.annotations = {}
|
|
73
|
+
|
|
74
|
+
def annotate(self, key: str, value: str):
|
|
75
|
+
self.annotations[key] = value
|
|
76
|
+
return self
|
|
103
77
|
|
|
104
78
|
def response(self):
|
|
79
|
+
self.log_error(self.error_cause + '\n' + self.error_context)
|
|
105
80
|
return {
|
|
106
81
|
'cause': self.error_cause,
|
|
107
|
-
'context': self.error_context
|
|
82
|
+
'context': self.error_context,
|
|
83
|
+
'annotations': self.annotations
|
|
108
84
|
}
|
|
109
85
|
|
|
110
86
|
|
|
111
87
|
class FilterResult(Result):
|
|
112
|
-
def __init__(self, context: Context, filtered_cause: str):
|
|
88
|
+
def __init__(self, context: Context, filtered_cause: str, filtered_context: str = None):
|
|
113
89
|
super().__init__('filter', 'FILTER', context)
|
|
114
90
|
self.filtered_cause = filtered_cause
|
|
91
|
+
self.filtered_context = filtered_context
|
|
92
|
+
self.annotations = {}
|
|
115
93
|
|
|
116
|
-
def
|
|
117
|
-
|
|
118
|
-
'message': self.filtered_cause
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
class FormatResult(Result):
|
|
123
|
-
def __init__(self, context: Context):
|
|
124
|
-
super().__init__('format', 'FORMAT', context)
|
|
125
|
-
self.content = None
|
|
126
|
-
self.metadata = {}
|
|
127
|
-
|
|
128
|
-
def add_metadata(self, key: str, value: str):
|
|
129
|
-
self.metadata[key] = value
|
|
130
|
-
return self
|
|
131
|
-
|
|
132
|
-
def set_content(self, content: Content):
|
|
133
|
-
self.content = content
|
|
134
|
-
return self
|
|
135
|
-
|
|
136
|
-
def save_string_content(self, string_data: str, name: str, media_type: str):
|
|
137
|
-
content_reference = self.context.content_service.put_str(self.context.did, string_data, media_type)
|
|
138
|
-
self.content = Content(name=name, content_reference=content_reference, content_service=self.context.content_service)
|
|
139
|
-
return self
|
|
140
|
-
|
|
141
|
-
def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
|
|
142
|
-
content_reference = self.context.content_service.put_bytes(self.context.did, byte_data, media_type)
|
|
143
|
-
self.content = Content(name=name, content_reference=content_reference, content_service=self.context.content_service)
|
|
94
|
+
def annotate(self, key: str, value: str):
|
|
95
|
+
self.annotations[key] = value
|
|
144
96
|
return self
|
|
145
97
|
|
|
146
98
|
def response(self):
|
|
147
99
|
return {
|
|
148
|
-
'
|
|
149
|
-
'
|
|
150
|
-
'
|
|
100
|
+
'message': self.filtered_cause,
|
|
101
|
+
'context': self.filtered_context,
|
|
102
|
+
'annotations': self.annotations
|
|
151
103
|
}
|
|
152
104
|
|
|
153
105
|
|
|
154
|
-
class
|
|
155
|
-
def __init__(self, context: Context):
|
|
156
|
-
|
|
157
|
-
self.
|
|
158
|
-
|
|
159
|
-
def add_format_result(self, format_result: FormatResult):
|
|
160
|
-
self.format_results.append(format_result)
|
|
161
|
-
return self
|
|
162
|
-
|
|
163
|
-
def response(self):
|
|
164
|
-
return [format_result.response() for format_result in self.format_results]
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
class LoadResult(Result):
|
|
168
|
-
def __init__(self, context: Context):
|
|
169
|
-
super().__init__('load', 'LOAD', context)
|
|
106
|
+
class IngressResultItem:
|
|
107
|
+
def __init__(self, context: Context, delta_file_name: str):
|
|
108
|
+
self.context = context
|
|
109
|
+
self._did = str(uuid.uuid4())
|
|
170
110
|
self.content = []
|
|
171
111
|
self.metadata = {}
|
|
172
|
-
self.
|
|
112
|
+
self.annotations = {}
|
|
113
|
+
self.delta_file_name = delta_file_name
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def did(self):
|
|
117
|
+
return self._did
|
|
173
118
|
|
|
174
119
|
# content can be a single Content or a List[Content]
|
|
175
120
|
def add_content(self, content):
|
|
@@ -181,97 +126,94 @@ class LoadResult(Result):
|
|
|
181
126
|
|
|
182
127
|
return self
|
|
183
128
|
|
|
184
|
-
def save_string_content(self, string_data: str, name: str, media_type: str):
|
|
185
|
-
|
|
186
|
-
|
|
129
|
+
def save_string_content(self, string_data: str, name: str, media_type: str, tags: set = None):
|
|
130
|
+
segment = self.context.content_service.put_str(self._did, string_data)
|
|
131
|
+
c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
|
|
132
|
+
if tags is not None:
|
|
133
|
+
c.add_tags(tags)
|
|
134
|
+
self.content.append(c)
|
|
135
|
+
self.context.saved_content.append(c)
|
|
136
|
+
return self
|
|
137
|
+
|
|
138
|
+
def save_byte_content(self, byte_data: bytes, name: str, media_type: str, tags: set = None):
|
|
139
|
+
segment = self.context.content_service.put_bytes(self._did, byte_data)
|
|
140
|
+
c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
|
|
141
|
+
if tags is not None:
|
|
142
|
+
c.add_tags(tags)
|
|
143
|
+
self.content.append(c)
|
|
144
|
+
self.context.saved_content.append(c)
|
|
187
145
|
return self
|
|
188
146
|
|
|
189
|
-
def
|
|
190
|
-
|
|
191
|
-
self.content.append(Content(name=name, content_reference=content_reference, content_service=self.context.content_service))
|
|
147
|
+
def set_metadata(self, metadata: dict):
|
|
148
|
+
self.metadata = metadata
|
|
192
149
|
return self
|
|
193
150
|
|
|
194
151
|
def add_metadata(self, key: str, value: str):
|
|
195
152
|
self.metadata[key] = value
|
|
196
153
|
return self
|
|
197
154
|
|
|
198
|
-
def
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
155
|
+
def get_segment_names(self):
|
|
156
|
+
segment_names = {}
|
|
157
|
+
for c in self.content:
|
|
158
|
+
segment_names.update(c.get_segment_names())
|
|
159
|
+
return segment_names
|
|
160
|
+
|
|
161
|
+
def annotate(self, key: str, value: str):
|
|
162
|
+
self.annotations[key] = value
|
|
203
163
|
return self
|
|
204
164
|
|
|
205
165
|
def response(self):
|
|
206
166
|
return {
|
|
207
|
-
'
|
|
208
|
-
'
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
167
|
+
'did': self._did,
|
|
168
|
+
'deltaFileName': self.delta_file_name,
|
|
169
|
+
'metadata': self.metadata,
|
|
170
|
+
'content': [content.json() for content in self.content],
|
|
171
|
+
'annotations': self.annotations
|
|
212
172
|
}
|
|
213
173
|
|
|
214
174
|
|
|
215
|
-
class
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
@property
|
|
221
|
-
def did(self):
|
|
222
|
-
return self._did
|
|
223
|
-
|
|
224
|
-
def response(self):
|
|
225
|
-
res = self.load_result.response()
|
|
226
|
-
res["did"] = self._did
|
|
227
|
-
return res
|
|
175
|
+
class IngressStatusEnum(Enum):
|
|
176
|
+
HEALTHY = 'HEALTHY'
|
|
177
|
+
DEGRADED = 'DEGRADED'
|
|
178
|
+
UNHEALTHY = 'UNHEALTHY'
|
|
228
179
|
|
|
229
180
|
|
|
230
|
-
class
|
|
181
|
+
class IngressResult(Result):
|
|
231
182
|
def __init__(self, context: Context):
|
|
232
|
-
super().__init__('
|
|
233
|
-
self.
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
183
|
+
super().__init__('ingress', 'INGRESS', context)
|
|
184
|
+
self.memo = None
|
|
185
|
+
self.ingress_result_items = []
|
|
186
|
+
self.execute_immediate = False
|
|
187
|
+
self.status = IngressStatusEnum.HEALTHY
|
|
188
|
+
self.status_message = None
|
|
189
|
+
|
|
190
|
+
def add_item(self, ingress_result_item: IngressResultItem):
|
|
191
|
+
self.ingress_result_items.append(ingress_result_item)
|
|
240
192
|
return self
|
|
241
193
|
|
|
242
|
-
def
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
class ReinjectChild:
|
|
248
|
-
def __init__(self, source_info: SourceInfo, content: List[Content]):
|
|
249
|
-
self.source_info = source_info
|
|
250
|
-
self.content = content
|
|
251
|
-
|
|
252
|
-
def json(self):
|
|
253
|
-
return {
|
|
254
|
-
'sourceInfo': self.source_info.json(),
|
|
255
|
-
'content': [content.json() for content in self.content]
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
def __init__(self, context: Context):
|
|
259
|
-
super().__init__('reinject', 'REINJECT', context)
|
|
260
|
-
self.children = []
|
|
261
|
-
|
|
262
|
-
def add_child(self, filename: str, flow: str, metadata: Dict[str, str], content: List[Content]):
|
|
263
|
-
child = ReinjectResult.ReinjectChild(SourceInfo(filename, flow, metadata), content)
|
|
264
|
-
self.children.append(child)
|
|
194
|
+
def get_segment_names(self):
|
|
195
|
+
segment_names = {}
|
|
196
|
+
for ingress_item in self.ingress_result_items:
|
|
197
|
+
segment_names.update(ingress_item.get_segment_names())
|
|
198
|
+
return segment_names
|
|
265
199
|
|
|
266
200
|
def response(self):
|
|
267
|
-
return
|
|
201
|
+
return {
|
|
202
|
+
'memo': self.memo,
|
|
203
|
+
'executeImmediate': self.execute_immediate,
|
|
204
|
+
'ingressItems': [ingress_result_item.response() for ingress_result_item in self.ingress_result_items],
|
|
205
|
+
'status': self.status.value,
|
|
206
|
+
'statusMessage': self.status_message
|
|
207
|
+
}
|
|
268
208
|
|
|
269
209
|
|
|
270
210
|
class TransformResult(Result):
|
|
271
211
|
def __init__(self, context: Context):
|
|
272
212
|
super().__init__('transform', 'TRANSFORM', context)
|
|
273
213
|
self.content = []
|
|
214
|
+
self.annotations = {}
|
|
274
215
|
self.metadata = {}
|
|
216
|
+
self.delete_metadata_keys = []
|
|
275
217
|
|
|
276
218
|
# content can be a single Content or a List[Content]
|
|
277
219
|
def add_content(self, content):
|
|
@@ -283,32 +225,92 @@ class TransformResult(Result):
|
|
|
283
225
|
|
|
284
226
|
return self
|
|
285
227
|
|
|
286
|
-
def save_string_content(self, string_data: str, name: str, media_type: str):
|
|
287
|
-
|
|
288
|
-
|
|
228
|
+
def save_string_content(self, string_data: str, name: str, media_type: str, tags: set = None):
|
|
229
|
+
segment = self.context.content_service.put_str(self.context.did, string_data)
|
|
230
|
+
c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
|
|
231
|
+
if tags is not None:
|
|
232
|
+
c.add_tags(tags)
|
|
233
|
+
self.content.append(c)
|
|
234
|
+
self.context.saved_content.append(c)
|
|
289
235
|
return self
|
|
290
236
|
|
|
291
|
-
def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
|
|
292
|
-
|
|
293
|
-
|
|
237
|
+
def save_byte_content(self, byte_data: bytes, name: str, media_type: str, tags: set = None):
|
|
238
|
+
segment = self.context.content_service.put_bytes(self.context.did, byte_data)
|
|
239
|
+
c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
|
|
240
|
+
if tags is not None:
|
|
241
|
+
c.add_tags(tags)
|
|
242
|
+
self.content.append(c)
|
|
243
|
+
self.context.saved_content.append(c)
|
|
244
|
+
return self
|
|
245
|
+
|
|
246
|
+
def set_metadata(self, metadata: dict):
|
|
247
|
+
self.metadata = metadata
|
|
294
248
|
return self
|
|
295
249
|
|
|
296
250
|
def add_metadata(self, key: str, value: str):
|
|
297
251
|
self.metadata[key] = value
|
|
298
252
|
return self
|
|
299
253
|
|
|
300
|
-
def
|
|
254
|
+
def annotate(self, key: str, value: str):
|
|
255
|
+
self.annotations[key] = value
|
|
256
|
+
return self
|
|
257
|
+
|
|
258
|
+
def delete_metadata_key(self, key: str):
|
|
259
|
+
self.delete_metadata_keys.append(key)
|
|
260
|
+
return self
|
|
261
|
+
|
|
262
|
+
def get_segment_names(self):
|
|
263
|
+
segment_names = {}
|
|
264
|
+
for c in self.content:
|
|
265
|
+
segment_names.update(c.get_segment_names())
|
|
266
|
+
return segment_names
|
|
267
|
+
|
|
268
|
+
def json(self):
|
|
301
269
|
return {
|
|
302
|
-
'
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
270
|
+
'did': self.context.did,
|
|
271
|
+
'content': [content.json() for content in self.content],
|
|
272
|
+
'annotations': self.annotations,
|
|
273
|
+
'metadata': self.metadata,
|
|
274
|
+
'deleteMetadataKeys': self.delete_metadata_keys
|
|
306
275
|
}
|
|
307
276
|
|
|
277
|
+
def response(self):
|
|
278
|
+
return [self.json()]
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class ChildTransformResult(TransformResult):
|
|
282
|
+
delta_file_name: str
|
|
283
|
+
|
|
284
|
+
def __init__(self, context: Context, delta_file_name: str = None):
|
|
285
|
+
super().__init__(context.child_context())
|
|
286
|
+
self.delta_file_name = delta_file_name
|
|
287
|
+
|
|
288
|
+
def json(self):
|
|
289
|
+
j = super().json()
|
|
290
|
+
j['messages'] = [message.json() for message in self.messages]
|
|
291
|
+
if self.delta_file_name is not None:
|
|
292
|
+
j['name'] = self.delta_file_name
|
|
293
|
+
return j
|
|
294
|
+
|
|
308
295
|
|
|
309
|
-
class
|
|
296
|
+
class TransformResults(Result):
|
|
310
297
|
def __init__(self, context: Context):
|
|
311
|
-
super().__init__(
|
|
298
|
+
super().__init__('transform', 'TRANSFORM', context)
|
|
299
|
+
self.child_results = []
|
|
300
|
+
|
|
301
|
+
def add_result(self, result: ChildTransformResult):
|
|
302
|
+
self.child_results.append(result)
|
|
303
|
+
return self
|
|
304
|
+
|
|
305
|
+
def get_segment_names(self):
|
|
306
|
+
segment_names = {}
|
|
307
|
+
for child_result in self.child_results:
|
|
308
|
+
segment_names.update(child_result.get_segment_names())
|
|
309
|
+
return segment_names
|
|
312
310
|
|
|
313
311
|
def response(self):
|
|
314
|
-
|
|
312
|
+
transform_events = []
|
|
313
|
+
for child_result in self.child_results:
|
|
314
|
+
json_dict = child_result.json()
|
|
315
|
+
transform_events.append(json_dict)
|
|
316
|
+
return transform_events
|
deltafi/resultmessage.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#
|
|
2
|
+
# DeltaFi - Data transformation and enrichment platform
|
|
3
|
+
#
|
|
4
|
+
# Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
|
+
#
|
|
6
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
|
+
# you may not use this file except in compliance with the License.
|
|
8
|
+
# You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
14
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
15
|
+
# See the License for the specific language governing permissions and
|
|
16
|
+
# limitations under the License.
|
|
17
|
+
#
|
|
18
|
+
|
|
19
|
+
import time
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import NamedTuple
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class LogSeverity(Enum):
|
|
25
|
+
TRACE = "TRACE"
|
|
26
|
+
INFO = "INFO"
|
|
27
|
+
WARNING = "WARNING"
|
|
28
|
+
ERROR = "ERROR"
|
|
29
|
+
USER = "USER"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class LogMessage(NamedTuple):
|
|
33
|
+
severity: LogSeverity
|
|
34
|
+
created: int
|
|
35
|
+
source: str
|
|
36
|
+
message: str
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def info(cls, source: str, message: str):
|
|
40
|
+
return LogMessage(severity=LogSeverity.INFO, created=time.time(), source=source, message=message)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def warning(cls, source: str, message: str):
|
|
44
|
+
return LogMessage(severity=LogSeverity.WARNING, created=time.time(), source=source,
|
|
45
|
+
message=message)
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def error(cls, source: str, message: str):
|
|
49
|
+
return LogMessage(severity=LogSeverity.ERROR, created=time.time(), source=source,
|
|
50
|
+
message=message)
|
|
51
|
+
|
|
52
|
+
def json(self):
|
|
53
|
+
return {'severity': self.severity.value,
|
|
54
|
+
'created': self.created,
|
|
55
|
+
'source': self.source,
|
|
56
|
+
'message': self.message}
|
deltafi/storage.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# DeltaFi - Data transformation and enrichment platform
|
|
3
3
|
#
|
|
4
|
-
# Copyright 2021-
|
|
4
|
+
# Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
|
|
5
5
|
#
|
|
6
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
7
7
|
# you may not use this file except in compliance with the License.
|
|
@@ -22,8 +22,7 @@ from typing import List, NamedTuple
|
|
|
22
22
|
from urllib.parse import urlparse
|
|
23
23
|
|
|
24
24
|
import minio
|
|
25
|
-
|
|
26
|
-
BUCKET = 'storage'
|
|
25
|
+
from minio.deleteobjects import DeleteObject
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
class Segment(NamedTuple):
|
|
@@ -55,81 +54,10 @@ class Segment(NamedTuple):
|
|
|
55
54
|
return f"{self.did[:3]}/{self.did}/{self.uuid}"
|
|
56
55
|
|
|
57
56
|
|
|
58
|
-
class ContentReference(NamedTuple):
|
|
59
|
-
segments: List[Segment]
|
|
60
|
-
media_type: str
|
|
61
|
-
|
|
62
|
-
def subreference_segments(self, offset: int, size: int):
|
|
63
|
-
if offset < 0:
|
|
64
|
-
raise ValueError(f"subreference offset must be positive, got {offset}")
|
|
65
|
-
|
|
66
|
-
if size < 0:
|
|
67
|
-
raise ValueError(f"subreference size must be positive, got {size}")
|
|
68
|
-
|
|
69
|
-
if size + offset > self.get_size():
|
|
70
|
-
raise ValueError(f"Size + offset ({size} + {offset}) exceeds total ContentReference size of {self.get_size()}")
|
|
71
|
-
|
|
72
|
-
if size == 0:
|
|
73
|
-
return []
|
|
74
|
-
|
|
75
|
-
new_segments = []
|
|
76
|
-
offset_remaining = offset
|
|
77
|
-
size_remaining = size
|
|
78
|
-
|
|
79
|
-
for segment in self.segments:
|
|
80
|
-
if offset_remaining > 0:
|
|
81
|
-
if segment.size < offset_remaining:
|
|
82
|
-
# the first offset is past this segment, skip it
|
|
83
|
-
offset_remaining -= segment.size
|
|
84
|
-
continue
|
|
85
|
-
else:
|
|
86
|
-
# chop off the front of this segment
|
|
87
|
-
segment = Segment(uuid=segment.uuid,
|
|
88
|
-
offset=segment.offset + offset_remaining,
|
|
89
|
-
size=segment.size - offset_remaining,
|
|
90
|
-
did=segment.did)
|
|
91
|
-
offset_remaining = 0
|
|
92
|
-
|
|
93
|
-
if size_remaining < segment.size:
|
|
94
|
-
# chop off the back of this segment
|
|
95
|
-
segment = Segment(uuid=segment.uuid,
|
|
96
|
-
offset=segment.offset,
|
|
97
|
-
size=size_remaining,
|
|
98
|
-
did=segment.did)
|
|
99
|
-
size_remaining -= segment.size
|
|
100
|
-
new_segments.append(segment)
|
|
101
|
-
if size_remaining == 0:
|
|
102
|
-
break
|
|
103
|
-
|
|
104
|
-
return new_segments
|
|
105
|
-
|
|
106
|
-
def subreference(self, offset: int, size: int):
|
|
107
|
-
return ContentReference(segments=self.subreference_segments(offset, size),
|
|
108
|
-
media_type=self.media_type)
|
|
109
|
-
|
|
110
|
-
def get_size(self):
|
|
111
|
-
sum = 0
|
|
112
|
-
for segment in self.segments:
|
|
113
|
-
sum = sum + segment.size
|
|
114
|
-
return sum
|
|
115
|
-
|
|
116
|
-
def json(self):
|
|
117
|
-
return {
|
|
118
|
-
'segments': [segment.json() for segment in self.segments],
|
|
119
|
-
'mediaType': self.media_type
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
@classmethod
|
|
123
|
-
def from_dict(cls, content_reference: dict):
|
|
124
|
-
segments = [Segment.from_dict(segment) for segment in content_reference['segments']]
|
|
125
|
-
media_type = content_reference['mediaType']
|
|
126
|
-
return ContentReference(segments=segments,
|
|
127
|
-
media_type=media_type)
|
|
128
|
-
|
|
129
|
-
|
|
130
57
|
class ContentService:
|
|
131
|
-
def __init__(self, url, access_key, secret_key):
|
|
58
|
+
def __init__(self, url, access_key, secret_key, bucket_name):
|
|
132
59
|
parsed = urlparse(url)
|
|
60
|
+
self.bucket_name = bucket_name
|
|
133
61
|
self.minio_client = minio.Minio(
|
|
134
62
|
f"{parsed.hostname}:{str(parsed.port)}",
|
|
135
63
|
access_key=access_key,
|
|
@@ -137,26 +65,28 @@ class ContentService:
|
|
|
137
65
|
secure=False
|
|
138
66
|
)
|
|
139
67
|
|
|
140
|
-
found = self.minio_client.bucket_exists(
|
|
68
|
+
found = self.minio_client.bucket_exists(self.bucket_name)
|
|
141
69
|
if not found:
|
|
142
|
-
raise RuntimeError(f"Minio bucket {
|
|
70
|
+
raise RuntimeError(f"Minio bucket {self.bucket_name} not found")
|
|
143
71
|
|
|
144
|
-
def get_bytes(self,
|
|
145
|
-
return b"".join([self.minio_client.get_object(
|
|
146
|
-
segment.size).read() for segment in
|
|
72
|
+
def get_bytes(self, segments: List[Segment]):
|
|
73
|
+
return b"".join([self.minio_client.get_object(self.bucket_name, segment.id(), segment.offset,
|
|
74
|
+
segment.size).read() for segment in segments])
|
|
147
75
|
|
|
148
|
-
def get_str(self,
|
|
149
|
-
return self.get_bytes(
|
|
76
|
+
def get_str(self, segments: List[Segment]):
|
|
77
|
+
return self.get_bytes(segments).decode('utf-8')
|
|
150
78
|
|
|
151
|
-
def put_bytes(self, did, bytes_data
|
|
79
|
+
def put_bytes(self, did, bytes_data):
|
|
152
80
|
segment = Segment(uuid=str(uuid.uuid4()),
|
|
153
81
|
offset=0,
|
|
154
82
|
size=len(bytes_data),
|
|
155
83
|
did=did)
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
84
|
+
self.minio_client.put_object(self.bucket_name, segment.id(), io.BytesIO(bytes_data), len(bytes_data))
|
|
85
|
+
return segment
|
|
86
|
+
|
|
87
|
+
def put_str(self, did, string_data):
|
|
88
|
+
return self.put_bytes(did, string_data.encode('utf-8'))
|
|
160
89
|
|
|
161
|
-
def
|
|
162
|
-
|
|
90
|
+
def delete_all(self, segments: List[Segment]):
|
|
91
|
+
delete_objects = [DeleteObject(seg.id()) for seg in segments]
|
|
92
|
+
return self.minio_client.remove_objects(self.bucket_name, delete_objects)
|