deltafi 0.109.0__py3-none-any.whl → 2.40.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltafi/domain.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,60 +17,129 @@
17
17
  #
18
18
 
19
19
  import copy
20
+ from datetime import datetime, timedelta, timezone
20
21
  from logging import Logger
21
22
  from typing import Dict, List, NamedTuple
23
+ from uuid import uuid4
22
24
 
23
- from deltafi.storage import ContentService, ContentReference
25
+ from deltafi.storage import ContentService, Segment
26
+
27
+
28
+ class ActionExecution(NamedTuple):
29
+ clazz: str
30
+ action: str
31
+ thread_num: int
32
+ did: str
33
+ start_time: datetime
34
+
35
+ def exceeds_duration(self, duration: timedelta) -> bool:
36
+ return self.start_time + duration < datetime.now(timezone.utc)
37
+
38
+ @property
39
+ def key(self) -> str:
40
+ return f"{self.clazz}:{self.action}#{self.thread_num}:{self.did}"
24
41
 
25
42
 
26
43
  class Context(NamedTuple):
27
44
  did: str
45
+ delta_file_name: str
46
+ data_source: str
47
+ flow_name: str
48
+ flow_id: str
28
49
  action_name: str
29
- source_filename: str
30
- ingress_flow: str
31
- egress_flow: str
32
- system: str
50
+ action_version: str
33
51
  hostname: str
52
+ system_name: str
34
53
  content_service: ContentService
35
- logger: Logger
54
+ join: dict = None
55
+ joined_dids: List[str] = None
56
+ memo: str = None
57
+ logger: Logger = None
58
+ saved_content: List = []
36
59
 
37
60
  @classmethod
38
- def create(cls, context: dict, hostname: str, content_service: ContentService, logger: Logger):
61
+ def create(cls, context: dict, content_service: ContentService, logger: Logger):
39
62
  did = context['did']
40
- action_name = context['name']
41
- if 'sourceFilename' in context:
42
- source_filename = context['sourceFilename']
63
+ if 'deltaFileName' in context:
64
+ delta_file_name = context['deltaFileName']
65
+ else:
66
+ delta_file_name = None
67
+ if 'dataSource' in context:
68
+ data_source = context['dataSource']
69
+ else:
70
+ data_source = None
71
+ if 'flowName' in context:
72
+ flow_name = context['flowName']
73
+ else:
74
+ flow_name = None
75
+ if 'flowId' in context:
76
+ flow_id = context['flowId']
43
77
  else:
44
- source_filename = None
45
- ingress_flow = context['ingressFlow']
46
- if 'egressFlow' in context:
47
- egress_flow = context['egressFlow']
78
+ flow_id = None
79
+ if 'actionName' in context:
80
+ action_name = context['actionName']
48
81
  else:
49
- egress_flow = None
50
- system = context['systemName']
82
+ action_name = None
83
+ if 'actionVersion' in context:
84
+ action_version = context['actionVersion']
85
+ else:
86
+ action_version = None
87
+ if 'hostname' in context:
88
+ hostname = context['hostname']
89
+ else:
90
+ hostname = None
91
+ if 'systemName' in context:
92
+ system_name = context['systemName']
93
+ else:
94
+ system_name = None
95
+ if 'join' in context:
96
+ join = context['join']
97
+ else:
98
+ join = None
99
+ if 'joinedDids' in context:
100
+ joined_dids = context['joinedDids']
101
+ else:
102
+ joined_dids = None
103
+ if 'memo' in context:
104
+ memo = context['memo']
105
+ else:
106
+ memo = None
107
+
51
108
  return Context(did=did,
109
+ delta_file_name=delta_file_name,
110
+ data_source=data_source,
111
+ flow_name=flow_name,
112
+ flow_id=flow_id,
52
113
  action_name=action_name,
53
- source_filename=source_filename,
54
- ingress_flow=ingress_flow,
55
- egress_flow=egress_flow,
56
- system=system,
114
+ action_version=action_version,
57
115
  hostname=hostname,
116
+ system_name=system_name,
117
+ join=join,
118
+ joined_dids=joined_dids,
119
+ memo=memo,
58
120
  content_service=content_service,
121
+ saved_content=[],
59
122
  logger=logger)
60
123
 
124
+ def child_context(self):
125
+ return self._replace(did=str(uuid4()))
126
+
61
127
 
62
128
  class Content:
63
129
  """
64
- A Content class that holds information about a piece of content, including its name, reference, and service.
130
+ A Content class that holds information about a piece of content, including its name, segments, mediaType, and service.
65
131
  Attributes:
66
132
  name (str): The name of the content.
67
- content_reference (ContentReference): A ContentReference object that holds information about the content's data.
133
+ segments (List<Segment>): The list of segments in storage that make up the Content
134
+ media_type (str): The media type of the content
68
135
  content_service (ContentService): A ContentService object used to retrieve the content data.
69
136
  """
70
137
 
71
- def __init__(self, name: str, content_reference: ContentReference, content_service: ContentService):
138
+ def __init__(self, name: str, segments: List[Segment], media_type: str, content_service: ContentService):
72
139
  self.name = name
73
- self.content_reference = content_reference
140
+ self.segments = segments
141
+ self.media_type = media_type
142
+ self.tags = set()
74
143
  self.content_service = content_service
75
144
 
76
145
  def json(self):
@@ -78,11 +147,13 @@ class Content:
78
147
  Returns a dictionary representation of the Content object.
79
148
 
80
149
  Returns:
81
- dict: A dictionary containing 'name' and 'contentReference' keys.
150
+ dict: A dictionary containing 'name', 'segments', and 'mediaType' keys.
82
151
  """
83
152
  return {
84
153
  'name': self.name,
85
- 'contentReference': self.content_reference.json(),
154
+ 'segments': [segment.json() for segment in self.segments],
155
+ 'mediaType': self.media_type,
156
+ 'tags': list(self.tags)
86
157
  }
87
158
 
88
159
  def copy(self):
@@ -92,9 +163,12 @@ class Content:
92
163
  Returns:
93
164
  Content: A deep copy of the Content object.
94
165
  """
95
- return Content(name=self.name,
96
- content_reference=copy.deepcopy(self.content_reference),
97
- content_service=self.content_service)
166
+ new_copy = Content(name=self.name,
167
+ segments=copy.deepcopy(self.segments),
168
+ media_type=self.media_type,
169
+ content_service=self.content_service)
170
+ new_copy.add_tags(self.tags.copy())
171
+ return new_copy
98
172
 
99
173
  def subcontent(self, offset: int, size: int):
100
174
  """
@@ -108,9 +182,54 @@ class Content:
108
182
  Content: A new Content object with the specified subcontent.
109
183
  """
110
184
  return Content(name=self.name,
111
- content_reference=self.content_reference.subreference(offset, size),
185
+ segments=self.subsegments(offset, size),
186
+ media_type=self.media_type,
112
187
  content_service=self.content_service)
113
188
 
189
+ def subsegments(self, offset: int, size: int):
190
+ if offset < 0:
191
+ raise ValueError(f"subsegments offset must be positive, got {offset}")
192
+
193
+ if size < 0:
194
+ raise ValueError(f"subsegments size must be positive, got {size}")
195
+
196
+ if size + offset > self.get_size():
197
+ raise ValueError(f"Size + offset ({size} + {offset}) exceeds total Content size of {self.get_size()}")
198
+
199
+ if size == 0:
200
+ return []
201
+
202
+ new_segments = []
203
+ offset_remaining = offset
204
+ size_remaining = size
205
+
206
+ for segment in self.segments:
207
+ if offset_remaining > 0:
208
+ if segment.size < offset_remaining:
209
+ # the first offset is past this segment, skip it
210
+ offset_remaining -= segment.size
211
+ continue
212
+ else:
213
+ # chop off the front of this segment
214
+ segment = Segment(uuid=segment.uuid,
215
+ offset=segment.offset + offset_remaining,
216
+ size=segment.size - offset_remaining,
217
+ did=segment.did)
218
+ offset_remaining = 0
219
+
220
+ if size_remaining < segment.size:
221
+ # chop off the back of this segment
222
+ segment = Segment(uuid=segment.uuid,
223
+ offset=segment.offset,
224
+ size=size_remaining,
225
+ did=segment.did)
226
+ size_remaining -= segment.size
227
+ new_segments.append(segment)
228
+ if size_remaining == 0:
229
+ break
230
+
231
+ return new_segments
232
+
114
233
  def get_size(self):
115
234
  """
116
235
  Returns the size of the content in bytes.
@@ -118,7 +237,10 @@ class Content:
118
237
  Returns:
119
238
  int: The size of the content in bytes.
120
239
  """
121
- return self.content_reference.get_size()
240
+ sum = 0
241
+ for segment in self.segments:
242
+ sum = sum + segment.size
243
+ return sum
122
244
 
123
245
  def get_media_type(self):
124
246
  """
@@ -127,7 +249,7 @@ class Content:
127
249
  Returns:
128
250
  str: The media type of the content.
129
251
  """
130
- return self.content_reference.media_type
252
+ return self.media_type
131
253
 
132
254
  def set_media_type(self, media_type: str):
133
255
  """
@@ -136,7 +258,7 @@ class Content:
136
258
  Args:
137
259
  media_type (str): The media type to set.
138
260
  """
139
- self.content_reference = self.content_reference._replace(media_type=media_type)
261
+ self.media_type = media_type
140
262
 
141
263
  def load_bytes(self):
142
264
  """
@@ -145,7 +267,7 @@ class Content:
145
267
  Returns:
146
268
  bytes: The content as bytes.
147
269
  """
148
- return self.content_service.get_bytes(self.content_reference)
270
+ return self.content_service.get_bytes(self.segments)
149
271
 
150
272
  def load_str(self):
151
273
  """
@@ -154,7 +276,7 @@ class Content:
154
276
  Returns:
155
277
  str: The content as a string.
156
278
  """
157
- return self.content_service.get_str(self.content_reference)
279
+ return self.content_service.get_str(self.segments)
158
280
 
159
281
  def prepend(self, other_content):
160
282
  """
@@ -163,7 +285,7 @@ class Content:
163
285
  Args:
164
286
  other_content (Content): The Content object to prepend.
165
287
  """
166
- self.content_reference.segments[0:0] = other_content.content_reference.segments
288
+ self.segments[0:0] = other_content.segments
167
289
 
168
290
  def append(self, other_content):
169
291
  """
@@ -172,24 +294,86 @@ class Content:
172
294
  Args:
173
295
  other_content (Content): The Content object to append.
174
296
  """
175
- self.content_reference.segments.extend(other_content.content_reference.segments)
297
+ self.segments.extend(other_content.segments)
298
+
299
+ def get_segment_names(self):
300
+ segment_names = {}
301
+ for seg in self.segments:
302
+ segment_names[seg.id()] = seg
303
+ return segment_names
304
+
305
+ def add_tag(self, tag: str):
306
+ """
307
+ Adds a tag to the content.
308
+
309
+ Args:
310
+ tag (str): The tag to add.
311
+ """
312
+ self.tags.add(tag)
313
+
314
+ def add_tags(self, tags: set):
315
+ """
316
+ Adds multiple tags to the content.
317
+
318
+ Args:
319
+ tags (set): A set of tags to add.
320
+ """
321
+ self.tags.update(tags)
322
+
323
+ def remove_tag(self, tag: str):
324
+ """
325
+ Removes a tag from the content.
326
+
327
+ Args:
328
+ tag (str): The tag to remove.
329
+ """
330
+ self.tags.discard(tag)
331
+
332
+ def has_tag(self, tag: str) -> bool:
333
+ """
334
+ Checks if the content has a specific tag.
335
+
336
+ Args:
337
+ tag (str): The tag to check.
338
+
339
+ Returns:
340
+ bool: True if the content has the tag, False otherwise.
341
+ """
342
+ return tag in self.tags
343
+
344
+ def clear_tags(self):
345
+ """
346
+ Clears all tags from the content.
347
+ """
348
+ self.tags.clear()
349
+
350
+ def get_tags(self) -> set:
351
+ """
352
+ Returns the tags associated with the content.
353
+
354
+ Returns:
355
+ set: A set of tags.
356
+ """
357
+ return self.tags
176
358
 
177
359
  def __eq__(self, other):
178
360
  if isinstance(other, Content):
179
361
  return (self.name == other.name and
180
- self.content_reference == other.content_reference and
362
+ self.segments == other.segments and
363
+ self.media_type == other.media_type and
364
+ self.tags == other.tags and
181
365
  self.content_service == other.content_service)
182
366
  return False
183
367
 
184
368
  @classmethod
185
369
  def from_str(cls, context: Context, str_data: str, name: str, media_type: str):
186
- content_reference = context.content_service.put_str(context.did, str_data, media_type)
187
- return Content(name=name, content_reference=content_reference, content_service=context.content_service)
370
+ segment = context.content_service.put_str(context.did, str_data)
371
+ return Content(name=name, segments=[segment], media_type=media_type, content_service=context.content_service)
188
372
 
189
373
  @classmethod
190
374
  def from_bytes(cls, context: Context, byte_data: bytes, name: str, media_type: str):
191
- content_reference = context.content_service.put_bytes(context.did, byte_data, media_type)
192
- return Content(name=name, content_reference=content_reference, content_service=context.content_service)
375
+ segment = context.content_service.put_bytes(context.did, byte_data)
376
+ return Content(name=name, segments=[segment], media_type=media_type, content_service=context.content_service)
193
377
 
194
378
  @classmethod
195
379
  def from_dict(cls, content: dict, content_service: ContentService):
@@ -197,60 +381,28 @@ class Content:
197
381
  name = content['name']
198
382
  else:
199
383
  name = None
200
- content_reference = ContentReference.from_dict(content['contentReference'])
201
- return Content(name=name,
202
- content_reference=content_reference,
203
- content_service=content_service)
204
-
205
-
206
- class Domain(NamedTuple):
207
- name: str
208
- value: str
209
- media_type: str
210
-
211
- @classmethod
212
- def from_dict(cls, domain: dict):
213
- name = domain['name']
214
- if 'value' in domain:
215
- value = domain['value']
216
- else:
217
- value = None
218
- media_type = domain['mediaType']
219
- return Domain(name=name,
220
- value=value,
221
- media_type=media_type)
222
-
223
-
224
- class SourceInfo(NamedTuple):
225
- filename: str
226
- flow: str
227
- metadata: Dict[str, str]
228
-
229
- def json(self):
230
- return {
231
- 'filename': self.filename,
232
- 'flow': self.flow,
233
- 'metadata': self.metadata
234
- }
384
+ segments = [Segment.from_dict(segment) for segment in content['segments']]
385
+ media_type = content['mediaType']
386
+ action_content = Content(name=name,
387
+ segments=segments,
388
+ media_type=media_type,
389
+ content_service=content_service)
390
+ tags = set(content.get('tags', []))
391
+ action_content.add_tags(tags)
392
+ return action_content
235
393
 
236
394
 
237
395
  class DeltaFileMessage(NamedTuple):
238
396
  metadata: Dict[str, str]
239
397
  content_list: List[Content]
240
- domains: List[Domain]
241
- enrichment: List[Domain]
242
398
 
243
399
  @classmethod
244
400
  def from_dict(cls, delta_file_message: dict, content_service: ContentService):
245
401
  metadata = delta_file_message['metadata']
246
402
  content_list = [Content.from_dict(content, content_service) for content in delta_file_message['contentList']]
247
- domains = [Domain.from_dict(domain) for domain in delta_file_message['domains']] if 'domains' in delta_file_message else []
248
- enrichment = [Domain.from_dict(domain) for domain in delta_file_message['enrichment']] if 'enrichment' in delta_file_message else []
249
403
 
250
404
  return DeltaFileMessage(metadata=metadata,
251
- content_list=content_list,
252
- domains=domains,
253
- enrichment=enrichment)
405
+ content_list=content_list)
254
406
 
255
407
 
256
408
  class Event(NamedTuple):
@@ -261,9 +413,10 @@ class Event(NamedTuple):
261
413
  return_address: str
262
414
 
263
415
  @classmethod
264
- def create(cls, event: dict, hostname: str, content_service: ContentService, logger: Logger):
265
- delta_file_messages = [DeltaFileMessage.from_dict(delta_file_message, content_service) for delta_file_message in event['deltaFileMessages']]
266
- context = Context.create(event['actionContext'], hostname, content_service, logger)
416
+ def create(cls, event: dict, content_service: ContentService, logger: Logger):
417
+ delta_file_messages = [DeltaFileMessage.from_dict(delta_file_message, content_service) for delta_file_message in
418
+ event['deltaFileMessages']]
419
+ context = Context.create(event['actionContext'], content_service, logger)
267
420
  params = event['actionParams']
268
421
  queue_name = None
269
422
  if 'queueName' in event:
deltafi/exception.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -23,16 +23,6 @@ class ExpectedContentException(RuntimeError):
23
23
  self.size = size
24
24
 
25
25
 
26
- class MissingDomainException(RuntimeError):
27
- def __init__(self, name):
28
- self.name = name
29
-
30
-
31
- class MissingEnrichmentException(RuntimeError):
32
- def __init__(self, name):
33
- self.name = name
34
-
35
-
36
26
  class MissingMetadataException(RuntimeError):
37
27
  def __init__(self, key):
38
28
  self.key = key
@@ -0,0 +1,38 @@
1
+ #
2
+ # DeltaFi - Data transformation and enrichment platform
3
+ #
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+
20
+
21
+ """GenericModel
22
+
23
+ Provides an empty subclass of pydantic.BaseModel.
24
+
25
+ All action parameter classes must inherit pydantic.BaseModel.
26
+
27
+ Starting Pydantic v2, the BaseModel cannot be directly instantiated. This class provides for instantiation of GenericModel objects that inherit from BaseModel.
28
+
29
+ This class does not define fields for validation or any other purpose.
30
+ """
31
+
32
+
33
+ from pydantic import BaseModel
34
+
35
+
36
+
37
+ class GenericModel(BaseModel):
38
+ pass