PyPI - deltafi - Versions diffs - 0.109.0__py3-none-any.whl → 2.40.0__py3-none-any.whl - Mend

deltafi 0.109.0py3-none-any.whl → 2.40.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

deltafi/__init__.py +3 -1
deltafi/action.py +262 -102
deltafi/actioneventqueue.py +29 -4
deltafi/actiontype.py +7 -11
deltafi/domain.py +241 -88
deltafi/exception.py +1 -11
deltafi/genericmodel.py +38 -0
deltafi/input.py +6 -163
deltafi/logger.py +16 -4
deltafi/lookuptable.py +292 -0
deltafi/metric.py +2 -2
deltafi/plugin.py +374 -87
deltafi/result.py +174 -172
deltafi/resultmessage.py +56 -0
deltafi/storage.py +20 -90
deltafi/test_kit/__init__.py +19 -0
deltafi/test_kit/assertions.py +56 -0
deltafi/test_kit/compare_helpers.py +293 -0
deltafi/test_kit/constants.py +23 -0
deltafi/test_kit/egress.py +54 -0
deltafi/test_kit/framework.py +390 -0
deltafi/test_kit/timed_ingress.py +104 -0
deltafi/test_kit/transform.py +103 -0
deltafi/types.py +31 -0
deltafi-2.40.0.dist-info/METADATA +82 -0
deltafi-2.40.0.dist-info/RECORD +27 -0
{deltafi-0.109.0.dist-info → deltafi-2.40.0.dist-info}/WHEEL +1 -1
deltafi-0.109.0.dist-info/METADATA +0 -41
deltafi-0.109.0.dist-info/RECORD +0 -15

deltafi/result.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #
 #    DeltaFi - Data transformation and enrichment platform
 #
-#    Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
+#    Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
 #
 #    Licensed under the Apache License, Version 2.0 (the "License");
 #    you may not use this file except in compliance with the License.
@@ -17,15 +17,12 @@
 #
 import abc
-from typing import Dict, List
 import uuid
+from enum import Enum
-from deltafi.domain import Content, Context, SourceInfo
+from deltafi.domain import Content, Context
 from deltafi.metric import Metric
-ENDPOINT_TAG = "endpoint"
-FILES_OUT = "files_out"
-BYTES_OUT = "bytes_out"
+from deltafi.resultmessage import LogMessage
 class Result:
@@ -34,6 +31,7 @@ class Result:
     def __init__(self, result_key, result_type, context):
         self.result_key = result_key
         self.result_type = result_type
+        self.messages = []
         self.metrics = []
         self.context = context
@@ -43,133 +41,80 @@ class Result:
     def add_metric(self, metric: Metric):
         self.metrics.append(metric)
+        return self
+    def log_info(self, message: str):
+        self.messages.append(LogMessage.info(self.context.action_name, message))
+        return self
-class DomainResult(Result):
-    def __init__(self, context: Context):
-        super().__init__('domain', 'DOMAIN', context)
-        self.indexed_metadata = {}
-    def index_metadata(self, key: str, value: str):
-        self.indexed_metadata[key] = value
+    def log_warning(self, message: str):
+        self.messages.append(LogMessage.warning(self.context.action_name, message))
         return self
-    def response(self):
-        return {
-            'indexedMetadata': self.indexed_metadata
-        }
+    def log_error(self, message: str):
+        self.messages.append(LogMessage.error(self.context.action_name, message))
+        return self
 class EgressResult(Result):
-    def __init__(self, context: Context, destination: str, bytes_egressed: int):
+    def __init__(self, context: Context):
         super().__init__(None, 'EGRESS', context)
-        self.add_metric(Metric(FILES_OUT, 1, {ENDPOINT_TAG: destination}))
-        self.add_metric(Metric(BYTES_OUT, bytes_egressed, {ENDPOINT_TAG: destination}))
     def response(self):
         return None
-class EnrichResult(Result):
-    def __init__(self, context: Context):
-        super().__init__('enrich', 'ENRICH', context)
-        self.enrichments = []
-        self.indexed_metadata = {}
-    def enrich(self, name: str, value: str, media_type: str):
-        self.enrichments.append({
-            'name': name,
-            'value': value,
-            'mediaType': media_type
-        })
-        return self
-    def index_metadata(self, key: str, value: str):
-        self.indexed_metadata[key] = value
-        return self
-    def response(self):
-        return {
-            'enrichments': self.enrichments,
-            'indexedMetadata': self.indexed_metadata
-        }
 class ErrorResult(Result):
     def __init__(self, context: Context, error_cause: str, error_context: str):
         super().__init__('error', 'ERROR', context)
         self.error_cause = error_cause
         self.error_context = error_context
+        self.annotations = {}
+    def annotate(self, key: str, value: str):
+        self.annotations[key] = value
+        return self
     def response(self):
+        self.log_error(self.error_cause + '\n' + self.error_context)
         return {
             'cause': self.error_cause,
-            'context': self.error_context
+            'context': self.error_context,
+            'annotations': self.annotations
         }
 class FilterResult(Result):
-    def __init__(self, context: Context, filtered_cause: str):
+    def __init__(self, context: Context, filtered_cause: str, filtered_context: str = None):
         super().__init__('filter', 'FILTER', context)
         self.filtered_cause = filtered_cause
+        self.filtered_context = filtered_context
+        self.annotations = {}
-    def response(self):
-        return {
-            'message': self.filtered_cause
-        }
-class FormatResult(Result):
-    def __init__(self, context: Context):
-        super().__init__('format', 'FORMAT', context)
-        self.content = None
-        self.metadata = {}
-    def add_metadata(self, key: str, value: str):
-        self.metadata[key] = value
-        return self
-    def set_content(self, content: Content):
-        self.content = content
-        return self
-    def save_string_content(self, string_data: str, name: str, media_type: str):
-        content_reference = self.context.content_service.put_str(self.context.did, string_data, media_type)
-        self.content = Content(name=name, content_reference=content_reference, content_service=self.context.content_service)
-        return self
-    def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
-        content_reference = self.context.content_service.put_bytes(self.context.did, byte_data, media_type)
-        self.content = Content(name=name, content_reference=content_reference, content_service=self.context.content_service)
+    def annotate(self, key: str, value: str):
+        self.annotations[key] = value
         return self
     def response(self):
         return {
-            'filename': self.content.name,
-            'contentReference': self.content.content_reference.json(),
-            'metadata': self.metadata
+            'message': self.filtered_cause,
+            'context': self.filtered_context,
+            'annotations': self.annotations
         }
-class FormatManyResult(Result):
-    def __init__(self, context: Context):
-        super().__init__('formatMany', 'FORMAT_MANY', context)
-        self.format_results = []
-    def add_format_result(self, format_result: FormatResult):
-        self.format_results.append(format_result)
-        return self
-    def response(self):
-        return [format_result.response() for format_result in self.format_results]
-class LoadResult(Result):
-    def __init__(self, context: Context):
-        super().__init__('load', 'LOAD', context)
+class IngressResultItem:
+    def __init__(self, context: Context, delta_file_name: str):
+        self.context = context
+        self._did = str(uuid.uuid4())
         self.content = []
         self.metadata = {}
-        self.domains = []
+        self.annotations = {}
+        self.delta_file_name = delta_file_name
+    @property
+    def did(self):
+        return self._did
     # content can be a single Content or a List[Content]
     def add_content(self, content):
@@ -181,97 +126,94 @@ class LoadResult(Result):
         return self
-    def save_string_content(self, string_data: str, name: str, media_type: str):
-        content_reference = self.context.content_service.put_str(self.context.did, string_data, media_type)
-        self.content.append(Content(name=name, content_reference=content_reference, content_service=self.context.content_service))
+    def save_string_content(self, string_data: str, name: str, media_type: str, tags: set = None):
+        segment = self.context.content_service.put_str(self._did, string_data)
+        c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
+        if tags is not None:
+            c.add_tags(tags)
+        self.content.append(c)
+        self.context.saved_content.append(c)
+        return self
+    def save_byte_content(self, byte_data: bytes, name: str, media_type: str, tags: set = None):
+        segment = self.context.content_service.put_bytes(self._did, byte_data)
+        c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
+        if tags is not None:
+            c.add_tags(tags)
+        self.content.append(c)
+        self.context.saved_content.append(c)
         return self
-    def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
-        content_reference = self.context.content_service.put_bytes(self.context.did, byte_data, media_type)
-        self.content.append(Content(name=name, content_reference=content_reference, content_service=self.context.content_service))
+    def set_metadata(self, metadata: dict):
+        self.metadata = metadata
         return self
     def add_metadata(self, key: str, value: str):
         self.metadata[key] = value
         return self
-    def add_domain(self, name: str, value: str, media_type: str):
-        self.domains.append({
-            'name': name,
-            'value': value,
-            'mediaType': media_type})
+    def get_segment_names(self):
+        segment_names = {}
+        for c in self.content:
+            segment_names.update(c.get_segment_names())
+        return segment_names
+    def annotate(self, key: str, value: str):
+        self.annotations[key] = value
         return self
     def response(self):
         return {
-            'domains': self.domains,
-            'protocolLayer': {
-                'content': [content.json() for content in self.content],
-                'metadata': self.metadata
-            }
+            'did': self._did,
+            'deltaFileName': self.delta_file_name,
+            'metadata': self.metadata,
+            'content': [content.json() for content in self.content],
+            'annotations': self.annotations
         }
-class ChildLoadResult:
-    def __init__(self, load_result: LoadResult = None):
-        self._did = str(uuid.uuid4())
-        self.load_result = load_result
-    @property
-    def did(self):
-        return self._did
-    def response(self):
-        res = self.load_result.response()
-        res["did"] = self._did
-        return res
+class IngressStatusEnum(Enum):
+    HEALTHY = 'HEALTHY'
+    DEGRADED = 'DEGRADED'
+    UNHEALTHY = 'UNHEALTHY'
-class LoadManyResult(Result):
+class IngressResult(Result):
     def __init__(self, context: Context):
-        super().__init__('loadMany', 'LOAD_MANY', context)
-        self.load_results = []
-    def add_load_result(self, load_result):
-        if isinstance(load_result, ChildLoadResult):
-            self.load_results.append(load_result)
-        else:
-            self.load_results.append(ChildLoadResult(load_result))
+        super().__init__('ingress', 'INGRESS', context)
+        self.memo = None
+        self.ingress_result_items = []
+        self.execute_immediate = False
+        self.status = IngressStatusEnum.HEALTHY
+        self.status_message = None
+    def add_item(self, ingress_result_item: IngressResultItem):
+        self.ingress_result_items.append(ingress_result_item)
         return self
-    def response(self):
-        return [load_result.response() for load_result in self.load_results]
-class ReinjectResult(Result):
-    class ReinjectChild:
-        def __init__(self, source_info: SourceInfo, content: List[Content]):
-            self.source_info = source_info
-            self.content = content
-        def json(self):
-            return {
-                'sourceInfo': self.source_info.json(),
-                'content': [content.json() for content in self.content]
-            }
-    def __init__(self, context: Context):
-        super().__init__('reinject', 'REINJECT', context)
-        self.children = []
-    def add_child(self, filename: str, flow: str, metadata: Dict[str, str], content: List[Content]):
-        child = ReinjectResult.ReinjectChild(SourceInfo(filename, flow, metadata), content)
-        self.children.append(child)
+    def get_segment_names(self):
+        segment_names = {}
+        for ingress_item in self.ingress_result_items:
+            segment_names.update(ingress_item.get_segment_names())
+        return segment_names
     def response(self):
-        return [child.json() for child in self.children]
+        return {
+            'memo': self.memo,
+            'executeImmediate': self.execute_immediate,
+            'ingressItems': [ingress_result_item.response() for ingress_result_item in self.ingress_result_items],
+            'status': self.status.value,
+            'statusMessage': self.status_message
+        }
 class TransformResult(Result):
     def __init__(self, context: Context):
         super().__init__('transform', 'TRANSFORM', context)
         self.content = []
+        self.annotations = {}
         self.metadata = {}
+        self.delete_metadata_keys = []
     # content can be a single Content or a List[Content]
     def add_content(self, content):
@@ -283,32 +225,92 @@ class TransformResult(Result):
         return self
-    def save_string_content(self, string_data: str, name: str, media_type: str):
-        content_reference = self.context.content_service.put_str(self.context.did, string_data, media_type)
-        self.content.append(Content(name=name, content_reference=content_reference, content_service=self.context.content_service))
+    def save_string_content(self, string_data: str, name: str, media_type: str, tags: set = None):
+        segment = self.context.content_service.put_str(self.context.did, string_data)
+        c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
+        if tags is not None:
+            c.add_tags(tags)
+        self.content.append(c)
+        self.context.saved_content.append(c)
         return self
-    def save_byte_content(self, byte_data: bytes, name: str, media_type: str):
-        content_reference = self.context.content_service.put_bytes(self.context.did, byte_data, media_type)
-        self.content.append(Content(name=name, content_reference=content_reference, content_service=self.context.content_service))
+    def save_byte_content(self, byte_data: bytes, name: str, media_type: str, tags: set = None):
+        segment = self.context.content_service.put_bytes(self.context.did, byte_data)
+        c = Content(name=name, segments=[segment], media_type=media_type, content_service=self.context.content_service)
+        if tags is not None:
+            c.add_tags(tags)
+        self.content.append(c)
+        self.context.saved_content.append(c)
+        return self
+    def set_metadata(self, metadata: dict):
+        self.metadata = metadata
         return self
     def add_metadata(self, key: str, value: str):
         self.metadata[key] = value
         return self
-    def response(self):
+    def annotate(self, key: str, value: str):
+        self.annotations[key] = value
+        return self
+    def delete_metadata_key(self, key: str):
+        self.delete_metadata_keys.append(key)
+        return self
+    def get_segment_names(self):
+        segment_names = {}
+        for c in self.content:
+            segment_names.update(c.get_segment_names())
+        return segment_names
+    def json(self):
         return {
-            'protocolLayer': {
-                'content': [content.json() for content in self.content],
-                'metadata': self.metadata
-            }
+            'did': self.context.did,
+            'content': [content.json() for content in self.content],
+            'annotations': self.annotations,
+            'metadata': self.metadata,
+            'deleteMetadataKeys': self.delete_metadata_keys
         }
+    def response(self):
+        return [self.json()]
+class ChildTransformResult(TransformResult):
+    delta_file_name: str
+    def __init__(self, context: Context, delta_file_name: str = None):
+        super().__init__(context.child_context())
+        self.delta_file_name = delta_file_name
+    def json(self):
+        j = super().json()
+        j['messages'] = [message.json() for message in self.messages]
+        if self.delta_file_name is not None:
+            j['name'] = self.delta_file_name
+        return j
-class ValidateResult(Result):
+class TransformResults(Result):
     def __init__(self, context: Context):
-        super().__init__(None, 'VALIDATE', context)
+        super().__init__('transform', 'TRANSFORM', context)
+        self.child_results = []
+    def add_result(self, result: ChildTransformResult):
+        self.child_results.append(result)
+        return self
+    def get_segment_names(self):
+        segment_names = {}
+        for child_result in self.child_results:
+            segment_names.update(child_result.get_segment_names())
+        return segment_names
     def response(self):
-        return None
+        transform_events = []
+        for child_result in self.child_results:
+            json_dict = child_result.json()
+            transform_events.append(json_dict)
+        return transform_events

deltafi/resultmessage.py ADDED Viewed

@@ -0,0 +1,56 @@
+#
+#    DeltaFi - Data transformation and enrichment platform
+#
+#    Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+import time
+from enum import Enum
+from typing import NamedTuple
+class LogSeverity(Enum):
+    TRACE = "TRACE"
+    INFO = "INFO"
+    WARNING = "WARNING"
+    ERROR = "ERROR"
+    USER = "USER"
+class LogMessage(NamedTuple):
+    severity: LogSeverity
+    created: int
+    source: str
+    message: str
+    @classmethod
+    def info(cls, source: str, message: str):
+        return LogMessage(severity=LogSeverity.INFO, created=time.time(), source=source, message=message)
+    @classmethod
+    def warning(cls, source: str, message: str):
+        return LogMessage(severity=LogSeverity.WARNING, created=time.time(), source=source,
+                          message=message)
+    @classmethod
+    def error(cls, source: str, message: str):
+        return LogMessage(severity=LogSeverity.ERROR, created=time.time(), source=source,
+                          message=message)
+    def json(self):
+        return {'severity': self.severity.value,
+                'created': self.created,
+                'source': self.source,
+                'message': self.message}

deltafi/storage.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #
 #    DeltaFi - Data transformation and enrichment platform
 #
-#    Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
+#    Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
 #
 #    Licensed under the Apache License, Version 2.0 (the "License");
 #    you may not use this file except in compliance with the License.
@@ -22,8 +22,7 @@ from typing import List, NamedTuple
 from urllib.parse import urlparse
 import minio
-BUCKET = 'storage'
+from minio.deleteobjects import DeleteObject
 class Segment(NamedTuple):
@@ -55,81 +54,10 @@ class Segment(NamedTuple):
         return f"{self.did[:3]}/{self.did}/{self.uuid}"
-class ContentReference(NamedTuple):
-    segments: List[Segment]
-    media_type: str
-    def subreference_segments(self, offset: int, size: int):
-        if offset < 0:
-            raise ValueError(f"subreference offset must be positive, got {offset}")
-        if size < 0:
-            raise ValueError(f"subreference size must be positive, got {size}")
-        if size + offset > self.get_size():
-            raise ValueError(f"Size + offset ({size} + {offset}) exceeds total ContentReference size of {self.get_size()}")
-        if size == 0:
-            return []
-        new_segments = []
-        offset_remaining = offset
-        size_remaining = size
-        for segment in self.segments:
-            if offset_remaining > 0:
-                if segment.size < offset_remaining:
-                    # the first offset is past this segment, skip it
-                    offset_remaining -= segment.size
-                    continue
-                else:
-                    # chop off the front of this segment
-                    segment = Segment(uuid=segment.uuid,
-                                      offset=segment.offset + offset_remaining,
-                                      size=segment.size - offset_remaining,
-                                      did=segment.did)
-                    offset_remaining = 0
-            if size_remaining < segment.size:
-                # chop off the back of this segment
-                segment = Segment(uuid=segment.uuid,
-                                  offset=segment.offset,
-                                  size=size_remaining,
-                                  did=segment.did)
-            size_remaining -= segment.size
-            new_segments.append(segment)
-            if size_remaining == 0:
-                break
-        return new_segments
-    def subreference(self, offset: int, size: int):
-        return ContentReference(segments=self.subreference_segments(offset, size),
-                                media_type=self.media_type)
-    def get_size(self):
-        sum = 0
-        for segment in self.segments:
-            sum = sum + segment.size
-        return sum
-    def json(self):
-        return {
-            'segments': [segment.json() for segment in self.segments],
-            'mediaType': self.media_type
-        }
-    @classmethod
-    def from_dict(cls, content_reference: dict):
-        segments = [Segment.from_dict(segment) for segment in content_reference['segments']]
-        media_type = content_reference['mediaType']
-        return ContentReference(segments=segments,
-                                media_type=media_type)
 class ContentService:
-    def __init__(self, url, access_key, secret_key):
+    def __init__(self, url, access_key, secret_key, bucket_name):
         parsed = urlparse(url)
+        self.bucket_name = bucket_name
         self.minio_client = minio.Minio(
             f"{parsed.hostname}:{str(parsed.port)}",
             access_key=access_key,
@@ -137,26 +65,28 @@ class ContentService:
             secure=False
         )
-        found = self.minio_client.bucket_exists(BUCKET)
+        found = self.minio_client.bucket_exists(self.bucket_name)
         if not found:
-            raise RuntimeError(f"Minio bucket {BUCKET} not found")
+            raise RuntimeError(f"Minio bucket {self.bucket_name} not found")
-    def get_bytes(self, content_reference: ContentReference):
-        return b"".join([self.minio_client.get_object(BUCKET, segment.id(), segment.offset,
-                                                      segment.size).read() for segment in content_reference.segments])
+    def get_bytes(self, segments: List[Segment]):
+        return b"".join([self.minio_client.get_object(self.bucket_name, segment.id(), segment.offset,
+                                                      segment.size).read() for segment in segments])
-    def get_str(self, content_reference: ContentReference):
-        return self.get_bytes(content_reference).decode('utf-8')
+    def get_str(self, segments: List[Segment]):
+        return self.get_bytes(segments).decode('utf-8')
-    def put_bytes(self, did, bytes_data, media_type):
+    def put_bytes(self, did, bytes_data):
         segment = Segment(uuid=str(uuid.uuid4()),
                           offset=0,
                           size=len(bytes_data),
                           did=did)
-        content_reference = ContentReference(segments=[segment],
-                                             media_type=media_type)
-        self.minio_client.put_object(BUCKET, segment.id(), io.BytesIO(bytes_data), len(bytes_data))
-        return content_reference
+        self.minio_client.put_object(self.bucket_name, segment.id(), io.BytesIO(bytes_data), len(bytes_data))
+        return segment
+    def put_str(self, did, string_data):
+        return self.put_bytes(did, string_data.encode('utf-8'))
-    def put_str(self, did, string_data, media_type):
-        return self.put_bytes(did, string_data.encode('utf-8'), media_type)
+    def delete_all(self, segments: List[Segment]):
+        delete_objects = [DeleteObject(seg.id()) for seg in segments]
+        return self.minio_client.remove_objects(self.bucket_name, delete_objects)

deltafi 0.109.0__py3-none-any.whl → 2.40.0__py3-none-any.whl

deltafi 0.109.0py3-none-any.whl → 2.40.0py3-none-any.whl