PyPI - acryl-datahub-cloud - Versions diffs - 0.3.12.1rc3__py3-none-any.whl → 0.3.12.2__py3-none-any.whl - Mend - Supply Chain Defender

acryl-datahub-cloud 0.3.12.1rc3py3-none-any.whl → 0.3.12.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (18) hide show

acryl_datahub_cloud/sdk/assertion_input/assertion_input.py CHANGED Viewed

@@ -33,7 +33,7 @@ from acryl_datahub_cloud.sdk.errors import (
 from datahub.emitter.enum_helpers import get_enum_options
 from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
 from datahub.metadata import schema_classes as models
-from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
+from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
 from datahub.sdk import Dataset
 from datahub.sdk.entity_client import EntityClient
@@ -66,6 +66,15 @@ DEFAULT_EVERY_SIX_HOURS_SCHEDULE = models.CronScheduleClass(
     ),  # User local timezone, matches the UI default
 )
+TYPE_CLASS_NAME_TO_TYPE_MAP = {
+    "StringTypeClass": "STRING",
+    "NumberTypeClass": "NUMBER",
+    "BooleanTypeClass": "BOOLEAN",
+    "DateTypeClass": "DATE",
+    "TimeTypeClass": "TIME",
+    "NullTypeClass": "NULL",
+}
 class AbstractDetectionMechanism(BaseModel, ABC):
     type: str
@@ -318,8 +327,11 @@ class DetectionMechanism:
     def _try_parse_from_dict(
         detection_mechanism_config: dict[str, str],
     ) -> _DetectionMechanismTypes:
+        # Make a copy of the dictionary to avoid mutating the original
+        config_copy = detection_mechanism_config.copy()
         try:
-            detection_mechanism_type = detection_mechanism_config.pop("type")
+            detection_mechanism_type = config_copy.pop("type")
         except KeyError as e:
             raise SDKUsageErrorWithExamples(
                 msg="Detection mechanism type is required if using a dict to create a DetectionMechanism",
@@ -336,23 +348,23 @@ class DetectionMechanism:
             ) from e
         try:
-            return detection_mechanism_obj(**detection_mechanism_config)
+            return detection_mechanism_obj(**config_copy)
         except TypeError as e:
             if "object is not callable" not in e.args[0]:
                 raise e
-            if detection_mechanism_config:
+            if config_copy:
                 # If we are here in the TypeError case, the detection mechanism is an instance of a class,
                 # not a class itself, so we can't instantiate it with the config dict.
                 # In this case, the config dict should be empty after the type is popped.
                 # If it is not empty, we raise an error.
                 raise SDKUsageErrorWithExamples(
-                    msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {detection_mechanism_config}",
+                    msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {config_copy}",
                     examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
                 ) from e
             return detection_mechanism_obj
         except ValidationError as e:
             raise SDKUsageErrorWithExamples(
-                msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {detection_mechanism_config} {e}",
+                msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {config_copy} {e}",
                 examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
             ) from e
@@ -426,12 +438,14 @@ class InferenceSensitivity(Enum):
 DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
 TIME_WINDOW_SIZE_EXAMPLES = {
-    "Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
+    "Recommended: Time window size from objects": "TimeWindowSize(unit=CalendarInterval.MINUTE, multiple=10)",
     "Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
+    "Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
+    "Time window size from dict": '{"unit": "MINUTE", "multiple": 10}',
 }
-class CalendarInterval(Enum):
+class CalendarInterval(str, Enum):
     MINUTE = "MINUTE"
     HOUR = "HOUR"
     DAY = "DAY"
@@ -446,9 +460,24 @@ TimeWindowSizeInputTypes: TypeAlias = Union[
     models.TimeWindowSizeClass,
     models.FixedIntervalScheduleClass,
     TimeWindowSize,
+    dict[str, Union[str, int]],  # {"unit": "MINUTE", "multiple": 10}
 ]
+def _try_parse_calendar_interval(
+    config: Union[str, CalendarInterval],
+) -> CalendarInterval:
+    if isinstance(config, CalendarInterval):
+        return config
+    try:
+        return CalendarInterval(config.upper())
+    except ValueError as e:
+        raise SDKUsageErrorWithExamples(
+            msg=f"Invalid calendar interval: {config}",
+            examples=TIME_WINDOW_SIZE_EXAMPLES,
+        ) from e
 def _try_parse_time_window_size(
     config: TimeWindowSizeInputTypes,
 ) -> models.TimeWindowSizeClass:
@@ -471,6 +500,23 @@ def _try_parse_time_window_size(
             ),
             multiple=config.multiple,
         )
+    elif isinstance(config, dict):
+        if "unit" not in config or "multiple" not in config:
+            raise SDKUsageErrorWithExamples(
+                msg=f"Invalid time window size: {config}",
+                examples=TIME_WINDOW_SIZE_EXAMPLES,
+            )
+        try:
+            multiple = int(config["multiple"])
+        except ValueError as e:
+            raise SDKUsageErrorWithExamples(
+                msg=f"Invalid time window size: {config}",
+                examples=TIME_WINDOW_SIZE_EXAMPLES,
+            ) from e
+        return models.TimeWindowSizeClass(
+            unit=_try_parse_calendar_interval(str(config["unit"])),
+            multiple=multiple,
+        )
     else:
         raise SDKUsageErrorWithExamples(
             msg=f"Invalid time window size: {config}",
@@ -1077,6 +1123,10 @@ class _AssertionInput(ABC):
         """
         Convert the tags input into a standardized format.
+        Tag names are automatically converted to tag URNs using TagUrn constructor. For example:
+        - "my_tag" becomes "urn:li:tag:my_tag"
+        - "urn:li:tag:my_tag" remains unchanged
         Returns:
             A list of tags or None if no tags are provided.
@@ -1087,16 +1137,19 @@ class _AssertionInput(ABC):
             return None
         if isinstance(self.tags, str):
-            return [self.tags]
+            return [str(TagUrn(self.tags))]
         elif isinstance(self.tags, list):
-            return self.tags
+            return [
+                str(TagUrn(tag)) if isinstance(tag, str) else tag for tag in self.tags
+            ]
         else:
             raise SDKUsageErrorWithExamples(
                 msg=f"Invalid tags: {self.tags}",
                 examples={
-                    "Tags from string": "urn:li:tag:my_tag_1",
-                    "Tags from list": [
-                        "urn:li:tag:my_tag_1",
+                    "Tags from string (tag name)": "my_tag_1",
+                    "Tags from string (tag URN)": "urn:li:tag:my_tag_1",
+                    "Tags from list (mixed)": [
+                        "my_tag_1",
                         "urn:li:tag:my_tag_2",
                     ],
                 },
@@ -1173,16 +1226,40 @@ class _AssertionInput(ABC):
         schema_fields = self.cached_dataset._schema_dict()
         field = schema_fields.get(column_name)
         if field:
-            return models.SchemaFieldSpecClass(
-                path=field.fieldPath,
-                type=field.type.type.__class__.__name__,
-                nativeType=field.nativeDataType,
-            )
+            return self._convert_schema_field_to_schema_field_spec(field)
         else:
             raise SDKUsageError(
                 msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
             )
+    def _convert_schema_field_to_schema_field_spec(
+        self, field: models.SchemaFieldClass
+    ) -> models.SchemaFieldSpecClass:
+        """
+        Convert a SchemaFieldClass to a SchemaFieldSpecClass.
+        """
+        type_class_name = field.type.type.__class__.__name__
+        try:
+            type = self._convert_schema_field_type_class_name_to_type(type_class_name)
+        except KeyError as e:
+            raise SDKUsageError(
+                msg=f"Invalid type: {type_class_name}. Must be one of {list(TYPE_CLASS_NAME_TO_TYPE_MAP.keys())}",
+            ) from e
+        return models.SchemaFieldSpecClass(
+            path=field.fieldPath,
+            type=type,
+            nativeType=field.nativeDataType,
+        )
+    def _convert_schema_field_type_class_name_to_type(
+        self, type_class_name: str
+    ) -> str:
+        """
+        Convert a type class name to a type.
+        """
+        return TYPE_CLASS_NAME_TO_TYPE_MAP[type_class_name]
     def _validate_field_type(
         self,
         field_spec: models.SchemaFieldSpecClass,
@@ -1202,7 +1279,10 @@ class _AssertionInput(ABC):
         Raises:
             SDKUsageError: If the field has an invalid type
         """
-        allowed_type_names = [t.__class__.__name__ for t in allowed_types]
+        allowed_type_names = [
+            self._convert_schema_field_type_class_name_to_type(t.__class__.__name__)
+            for t in allowed_types
+        ]
         if field_spec.type not in allowed_type_names:
             raise SDKUsageError(
                 msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "