PyPI - deltafi - Versions diffs - 0.109.0__py3-none-any.whl → 2.40.0__py3-none-any.whl - Mend

deltafi 0.109.0py3-none-any.whl → 2.40.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

deltafi/__init__.py +3 -1
deltafi/action.py +262 -102
deltafi/actioneventqueue.py +29 -4
deltafi/actiontype.py +7 -11
deltafi/domain.py +241 -88
deltafi/exception.py +1 -11
deltafi/genericmodel.py +38 -0
deltafi/input.py +6 -163
deltafi/logger.py +16 -4
deltafi/lookuptable.py +292 -0
deltafi/metric.py +2 -2
deltafi/plugin.py +374 -87
deltafi/result.py +174 -172
deltafi/resultmessage.py +56 -0
deltafi/storage.py +20 -90
deltafi/test_kit/__init__.py +19 -0
deltafi/test_kit/assertions.py +56 -0
deltafi/test_kit/compare_helpers.py +293 -0
deltafi/test_kit/constants.py +23 -0
deltafi/test_kit/egress.py +54 -0
deltafi/test_kit/framework.py +390 -0
deltafi/test_kit/timed_ingress.py +104 -0
deltafi/test_kit/transform.py +103 -0
deltafi/types.py +31 -0
deltafi-2.40.0.dist-info/METADATA +82 -0
deltafi-2.40.0.dist-info/RECORD +27 -0
{deltafi-0.109.0.dist-info → deltafi-2.40.0.dist-info}/WHEEL +1 -1
deltafi-0.109.0.dist-info/METADATA +0 -41
deltafi-0.109.0.dist-info/RECORD +0 -15

deltafi/domain.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #
 #    DeltaFi - Data transformation and enrichment platform
 #
-#    Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
+#    Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
 #
 #    Licensed under the Apache License, Version 2.0 (the "License");
 #    you may not use this file except in compliance with the License.
@@ -17,60 +17,129 @@
 #
 import copy
+from datetime import datetime, timedelta, timezone
 from logging import Logger
 from typing import Dict, List, NamedTuple
+from uuid import uuid4
-from deltafi.storage import ContentService, ContentReference
+from deltafi.storage import ContentService, Segment
+class ActionExecution(NamedTuple):
+    clazz: str
+    action: str
+    thread_num: int
+    did: str
+    start_time: datetime
+    def exceeds_duration(self, duration: timedelta) -> bool:
+        return self.start_time + duration < datetime.now(timezone.utc)
+    @property
+    def key(self) -> str:
+        return f"{self.clazz}:{self.action}#{self.thread_num}:{self.did}"
 class Context(NamedTuple):
     did: str
+    delta_file_name: str
+    data_source: str
+    flow_name: str
+    flow_id: str
     action_name: str
-    source_filename: str
-    ingress_flow: str
-    egress_flow: str
-    system: str
+    action_version: str
     hostname: str
+    system_name: str
     content_service: ContentService
-    logger: Logger
+    join: dict = None
+    joined_dids: List[str] = None
+    memo: str = None
+    logger: Logger = None
+    saved_content: List = []
     @classmethod
-    def create(cls, context: dict, hostname: str, content_service: ContentService, logger: Logger):
+    def create(cls, context: dict, content_service: ContentService, logger: Logger):
         did = context['did']
-        action_name = context['name']
-        if 'sourceFilename' in context:
-            source_filename = context['sourceFilename']
+        if 'deltaFileName' in context:
+            delta_file_name = context['deltaFileName']
+        else:
+            delta_file_name = None
+        if 'dataSource' in context:
+            data_source = context['dataSource']
+        else:
+            data_source = None
+        if 'flowName' in context:
+            flow_name = context['flowName']
+        else:
+            flow_name = None
+        if 'flowId' in context:
+            flow_id = context['flowId']
         else:
-            source_filename = None
-        ingress_flow = context['ingressFlow']
-        if 'egressFlow' in context:
-            egress_flow = context['egressFlow']
+            flow_id = None
+        if 'actionName' in context:
+            action_name = context['actionName']
         else:
-            egress_flow = None
-        system = context['systemName']
+            action_name = None
+        if 'actionVersion' in context:
+            action_version = context['actionVersion']
+        else:
+            action_version = None
+        if 'hostname' in context:
+            hostname = context['hostname']
+        else:
+            hostname = None
+        if 'systemName' in context:
+            system_name = context['systemName']
+        else:
+            system_name = None
+        if 'join' in context:
+            join = context['join']
+        else:
+            join = None
+        if 'joinedDids' in context:
+            joined_dids = context['joinedDids']
+        else:
+            joined_dids = None
+        if 'memo' in context:
+            memo = context['memo']
+        else:
+            memo = None
         return Context(did=did,
+                       delta_file_name=delta_file_name,
+                       data_source=data_source,
+                       flow_name=flow_name,
+                       flow_id=flow_id,
                        action_name=action_name,
-                       source_filename=source_filename,
-                       ingress_flow=ingress_flow,
-                       egress_flow=egress_flow,
-                       system=system,
+                       action_version=action_version,
                        hostname=hostname,
+                       system_name=system_name,
+                       join=join,
+                       joined_dids=joined_dids,
+                       memo=memo,
                        content_service=content_service,
+                       saved_content=[],
                        logger=logger)
+    def child_context(self):
+        return self._replace(did=str(uuid4()))
 class Content:
     """
-    A Content class that holds information about a piece of content, including its name, reference, and service.
+    A Content class that holds information about a piece of content, including its name, segments, mediaType, and service.
     Attributes:
         name (str): The name of the content.
-        content_reference (ContentReference): A ContentReference object that holds information about the content's data.
+        segments (List<Segment>): The list of segments in storage that make up the Content
+        media_type (str): The media type of the content
         content_service (ContentService): A ContentService object used to retrieve the content data.
     """
-    def __init__(self, name: str, content_reference: ContentReference, content_service: ContentService):
+    def __init__(self, name: str, segments: List[Segment], media_type: str, content_service: ContentService):
         self.name = name
-        self.content_reference = content_reference
+        self.segments = segments
+        self.media_type = media_type
+        self.tags = set()
         self.content_service = content_service
     def json(self):
@@ -78,11 +147,13 @@ class Content:
         Returns a dictionary representation of the Content object.
         Returns:
-            dict: A dictionary containing 'name' and 'contentReference' keys.
+            dict: A dictionary containing 'name', 'segments', and 'mediaType' keys.
         """
         return {
             'name': self.name,
-            'contentReference': self.content_reference.json(),
+            'segments': [segment.json() for segment in self.segments],
+            'mediaType': self.media_type,
+            'tags': list(self.tags)
         }
     def copy(self):
@@ -92,9 +163,12 @@ class Content:
         Returns:
             Content: A deep copy of the Content object.
         """
-        return Content(name=self.name,
-                       content_reference=copy.deepcopy(self.content_reference),
-                       content_service=self.content_service)
+        new_copy = Content(name=self.name,
+                           segments=copy.deepcopy(self.segments),
+                           media_type=self.media_type,
+                           content_service=self.content_service)
+        new_copy.add_tags(self.tags.copy())
+        return new_copy
     def subcontent(self, offset: int, size: int):
         """
@@ -108,9 +182,54 @@ class Content:
             Content: A new Content object with the specified subcontent.
         """
         return Content(name=self.name,
-                       content_reference=self.content_reference.subreference(offset, size),
+                       segments=self.subsegments(offset, size),
+                       media_type=self.media_type,
                        content_service=self.content_service)
+    def subsegments(self, offset: int, size: int):
+        if offset < 0:
+            raise ValueError(f"subsegments offset must be positive, got {offset}")
+        if size < 0:
+            raise ValueError(f"subsegments size must be positive, got {size}")
+        if size + offset > self.get_size():
+            raise ValueError(f"Size + offset ({size} + {offset}) exceeds total Content size of {self.get_size()}")
+        if size == 0:
+            return []
+        new_segments = []
+        offset_remaining = offset
+        size_remaining = size
+        for segment in self.segments:
+            if offset_remaining > 0:
+                if segment.size < offset_remaining:
+                    # the first offset is past this segment, skip it
+                    offset_remaining -= segment.size
+                    continue
+                else:
+                    # chop off the front of this segment
+                    segment = Segment(uuid=segment.uuid,
+                                      offset=segment.offset + offset_remaining,
+                                      size=segment.size - offset_remaining,
+                                      did=segment.did)
+                    offset_remaining = 0
+            if size_remaining < segment.size:
+                # chop off the back of this segment
+                segment = Segment(uuid=segment.uuid,
+                                  offset=segment.offset,
+                                  size=size_remaining,
+                                  did=segment.did)
+            size_remaining -= segment.size
+            new_segments.append(segment)
+            if size_remaining == 0:
+                break
+        return new_segments
     def get_size(self):
         """
         Returns the size of the content in bytes.
@@ -118,7 +237,10 @@ class Content:
         Returns:
             int: The size of the content in bytes.
         """
-        return self.content_reference.get_size()
+        sum = 0
+        for segment in self.segments:
+            sum = sum + segment.size
+        return sum
     def get_media_type(self):
         """
@@ -127,7 +249,7 @@ class Content:
         Returns:
         str: The media type of the content.
         """
-        return self.content_reference.media_type
+        return self.media_type
     def set_media_type(self, media_type: str):
         """
@@ -136,7 +258,7 @@ class Content:
         Args:
             media_type (str): The media type to set.
         """
-        self.content_reference = self.content_reference._replace(media_type=media_type)
+        self.media_type = media_type
     def load_bytes(self):
         """
@@ -145,7 +267,7 @@ class Content:
         Returns:
             bytes: The content as bytes.
         """
-        return self.content_service.get_bytes(self.content_reference)
+        return self.content_service.get_bytes(self.segments)
     def load_str(self):
         """
@@ -154,7 +276,7 @@ class Content:
         Returns:
             str: The content as a string.
         """
-        return self.content_service.get_str(self.content_reference)
+        return self.content_service.get_str(self.segments)
     def prepend(self, other_content):
         """
@@ -163,7 +285,7 @@ class Content:
         Args:
             other_content (Content): The Content object to prepend.
         """
-        self.content_reference.segments[0:0] = other_content.content_reference.segments
+        self.segments[0:0] = other_content.segments
     def append(self, other_content):
         """
@@ -172,24 +294,86 @@ class Content:
         Args:
             other_content (Content): The Content object to append.
         """
-        self.content_reference.segments.extend(other_content.content_reference.segments)
+        self.segments.extend(other_content.segments)
+    def get_segment_names(self):
+        segment_names = {}
+        for seg in self.segments:
+            segment_names[seg.id()] = seg
+        return segment_names
+    def add_tag(self, tag: str):
+        """
+        Adds a tag to the content.
+        Args:
+            tag (str): The tag to add.
+        """
+        self.tags.add(tag)
+    def add_tags(self, tags: set):
+        """
+        Adds multiple tags to the content.
+        Args:
+            tags (set): A set of tags to add.
+        """
+        self.tags.update(tags)
+    def remove_tag(self, tag: str):
+        """
+        Removes a tag from the content.
+        Args:
+            tag (str): The tag to remove.
+        """
+        self.tags.discard(tag)
+    def has_tag(self, tag: str) -> bool:
+        """
+        Checks if the content has a specific tag.
+        Args:
+            tag (str): The tag to check.
+        Returns:
+            bool: True if the content has the tag, False otherwise.
+        """
+        return tag in self.tags
+    def clear_tags(self):
+        """
+        Clears all tags from the content.
+        """
+        self.tags.clear()
+    def get_tags(self) -> set:
+        """
+        Returns the tags associated with the content.
+        Returns:
+            set: A set of tags.
+        """
+        return self.tags
     def __eq__(self, other):
         if isinstance(other, Content):
             return (self.name == other.name and
-                    self.content_reference == other.content_reference and
+                    self.segments == other.segments and
+                    self.media_type == other.media_type and
+                    self.tags == other.tags and
                     self.content_service == other.content_service)
         return False
     @classmethod
     def from_str(cls, context: Context, str_data: str, name: str, media_type: str):
-        content_reference = context.content_service.put_str(context.did, str_data, media_type)
-        return Content(name=name, content_reference=content_reference, content_service=context.content_service)
+        segment = context.content_service.put_str(context.did, str_data)
+        return Content(name=name, segments=[segment], media_type=media_type, content_service=context.content_service)
     @classmethod
     def from_bytes(cls, context: Context, byte_data: bytes, name: str, media_type: str):
-        content_reference = context.content_service.put_bytes(context.did, byte_data, media_type)
-        return Content(name=name, content_reference=content_reference, content_service=context.content_service)
+        segment = context.content_service.put_bytes(context.did, byte_data)
+        return Content(name=name, segments=[segment], media_type=media_type, content_service=context.content_service)
     @classmethod
     def from_dict(cls, content: dict, content_service: ContentService):
@@ -197,60 +381,28 @@ class Content:
             name = content['name']
         else:
             name = None
-        content_reference = ContentReference.from_dict(content['contentReference'])
-        return Content(name=name,
-                       content_reference=content_reference,
-                       content_service=content_service)
-class Domain(NamedTuple):
-    name: str
-    value: str
-    media_type: str
-    @classmethod
-    def from_dict(cls, domain: dict):
-        name = domain['name']
-        if 'value' in domain:
-            value = domain['value']
-        else:
-            value = None
-        media_type = domain['mediaType']
-        return Domain(name=name,
-                      value=value,
-                      media_type=media_type)
-class SourceInfo(NamedTuple):
-    filename: str
-    flow: str
-    metadata: Dict[str, str]
-    def json(self):
-        return {
-            'filename': self.filename,
-            'flow': self.flow,
-            'metadata': self.metadata
-        }
+        segments = [Segment.from_dict(segment) for segment in content['segments']]
+        media_type = content['mediaType']
+        action_content = Content(name=name,
+                                 segments=segments,
+                                 media_type=media_type,
+                                 content_service=content_service)
+        tags = set(content.get('tags', []))
+        action_content.add_tags(tags)
+        return action_content
 class DeltaFileMessage(NamedTuple):
     metadata: Dict[str, str]
     content_list: List[Content]
-    domains: List[Domain]
-    enrichment: List[Domain]
     @classmethod
     def from_dict(cls, delta_file_message: dict, content_service: ContentService):
         metadata = delta_file_message['metadata']
         content_list = [Content.from_dict(content, content_service) for content in delta_file_message['contentList']]
-        domains = [Domain.from_dict(domain) for domain in delta_file_message['domains']] if 'domains' in delta_file_message else []
-        enrichment = [Domain.from_dict(domain) for domain in delta_file_message['enrichment']] if 'enrichment' in delta_file_message else []
         return DeltaFileMessage(metadata=metadata,
-                                content_list=content_list,
-                                domains=domains,
-                                enrichment=enrichment)
+                                content_list=content_list)
 class Event(NamedTuple):
@@ -261,9 +413,10 @@ class Event(NamedTuple):
     return_address: str
     @classmethod
-    def create(cls, event: dict, hostname: str, content_service: ContentService, logger: Logger):
-        delta_file_messages = [DeltaFileMessage.from_dict(delta_file_message, content_service) for delta_file_message in event['deltaFileMessages']]
-        context = Context.create(event['actionContext'], hostname, content_service, logger)
+    def create(cls, event: dict, content_service: ContentService, logger: Logger):
+        delta_file_messages = [DeltaFileMessage.from_dict(delta_file_message, content_service) for delta_file_message in
+                               event['deltaFileMessages']]
+        context = Context.create(event['actionContext'], content_service, logger)
         params = event['actionParams']
         queue_name = None
         if 'queueName' in event:

deltafi/exception.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #
 #    DeltaFi - Data transformation and enrichment platform
 #
-#    Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
+#    Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
 #
 #    Licensed under the Apache License, Version 2.0 (the "License");
 #    you may not use this file except in compliance with the License.
@@ -23,16 +23,6 @@ class ExpectedContentException(RuntimeError):
         self.size = size
-class MissingDomainException(RuntimeError):
-    def __init__(self, name):
-        self.name = name
-class MissingEnrichmentException(RuntimeError):
-    def __init__(self, name):
-        self.name = name
 class MissingMetadataException(RuntimeError):
     def __init__(self, key):
         self.key = key

deltafi/genericmodel.py ADDED Viewed

@@ -0,0 +1,38 @@
+#
+#    DeltaFi - Data transformation and enrichment platform
+#
+#    Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+"""GenericModel
+Provides an empty subclass of pydantic.BaseModel.
+All action parameter classes must inherit pydantic.BaseModel.
+Starting Pydantic v2, the BaseModel cannot be directly instantiated. This class provides for instantiation of GenericModel objects that inherit from BaseModel.
+This class does not define fields for validation or any other purpose.
+"""
+from pydantic import BaseModel
+class GenericModel(BaseModel):
+    pass

deltafi 0.109.0__py3-none-any.whl → 2.40.0__py3-none-any.whl

deltafi 0.109.0py3-none-any.whl → 2.40.0py3-none-any.whl