PyPI - acryl-datahub-cloud - Versions diffs - 0.3.12rc4__py3-none-any.whl → 0.3.12rc6__py3-none-any.whl - Mend

acryl-datahub-cloud 0.3.12rc4py3-none-any.whl → 0.3.12rc6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (18) hide show

acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py ADDED Viewed

@@ -0,0 +1,224 @@
+import logging
+from datetime import datetime
+from typing import Optional, Union
+from typing_extensions import Self
+from acryl_datahub_cloud.sdk.assertion.assertion_base import (
+    AssertionMode,
+    _AssertionPublic,
+    _HasColumnMetricFunctionality,
+    _HasSchedule,
+    _HasSmartFunctionality,
+)
+from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
+    ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
+    DEFAULT_DETECTION_MECHANISM,
+    DEFAULT_SCHEDULE,
+    DEFAULT_SENSITIVITY,
+    AssertionIncidentBehavior,
+    DetectionMechanism,
+    ExclusionWindowTypes,
+    InferenceSensitivity,
+    _DetectionMechanismTypes,
+)
+from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
+    MetricInputType,
+    OperatorInputType,
+    RangeInputType,
+    RangeTypeInputType,
+    ValueInputType,
+    ValueTypeInputType,
+)
+from acryl_datahub_cloud.sdk.entities.assertion import Assertion
+from acryl_datahub_cloud.sdk.entities.monitor import Monitor
+from datahub.metadata import schema_classes as models
+from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
+logger = logging.getLogger(__name__)
+class SmartColumnMetricAssertion(
+    _HasColumnMetricFunctionality,
+    _HasSmartFunctionality,
+    _HasSchedule,
+    _AssertionPublic,
+):
+    """
+    A class that represents a smart column metric assertion.
+    This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
+    min, max, median, and more. It uses AI to infer the assertion parameters.
+    """
+    def __init__(
+        self,
+        *,
+        urn: AssertionUrn,
+        dataset_urn: DatasetUrn,
+        column_name: str,
+        metric_type: MetricInputType,
+        operator: OperatorInputType,
+        # Depending on the operator, value, range (and corresponding type) or no parameters are required:
+        value: Optional[ValueInputType] = None,
+        value_type: Optional[ValueTypeInputType] = None,
+        range: Optional[RangeInputType] = None,
+        range_type: Optional[RangeTypeInputType] = None,
+        # TODO: Evaluate these params:
+        display_name: str,
+        mode: AssertionMode,
+        schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
+        sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
+        exclusion_windows: list[ExclusionWindowTypes],
+        training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
+        incident_behavior: list[AssertionIncidentBehavior],
+        detection_mechanism: Optional[
+            _DetectionMechanismTypes
+        ] = DEFAULT_DETECTION_MECHANISM,
+        tags: list[TagUrn],
+        created_by: Optional[CorpUserUrn] = None,
+        created_at: Union[datetime, None] = None,
+        updated_by: Optional[CorpUserUrn] = None,
+        updated_at: Optional[datetime] = None,
+    ):
+        """
+        Initialize a smart column metric assertion.
+        Args:
+            urn: The URN of the assertion.
+            dataset_urn: The URN of the dataset to monitor.
+            display_name: The display name of the assertion.
+            mode: The mode of the assertion (active/inactive).
+            sensitivity: The sensitivity of the assertion (low/medium/high).
+            exclusion_windows: The exclusion windows to apply to the assertion.
+            training_data_lookback_days: The number of days of data to use for training.
+            incident_behavior: The behavior when incidents occur.
+            detection_mechanism: The mechanism used to detect changes.
+            tags: The tags to apply to the assertion.
+            created_by: The URN of the user who created the assertion.
+            created_at: The timestamp when the assertion was created.
+            updated_by: The URN of the user who last updated the assertion.
+            updated_at: The timestamp when the assertion was last updated.
+        """
+        _AssertionPublic.__init__(
+            self,
+            urn=urn,
+            dataset_urn=dataset_urn,
+            display_name=display_name,
+            mode=mode,
+            tags=tags,
+            incident_behavior=incident_behavior,
+            detection_mechanism=detection_mechanism,
+            created_by=created_by,
+            created_at=created_at,
+            updated_by=updated_by,
+            updated_at=updated_at,
+        )
+        _HasSmartFunctionality.__init__(
+            self,
+            sensitivity=sensitivity,
+            exclusion_windows=exclusion_windows,
+            training_data_lookback_days=training_data_lookback_days,
+        )
+        _HasSchedule.__init__(
+            self,
+            schedule=schedule,
+        )
+        _HasColumnMetricFunctionality.__init__(
+            self,
+            column_name=column_name,
+            metric_type=metric_type,
+            operator=operator,
+            value=value,
+            value_type=value_type,
+            range=range,
+            range_type=range_type,
+        )
+    @classmethod
+    def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
+        """
+        Create a SmartColumnMetricAssertion from an Assertion and Monitor entity.
+        Args:
+            assertion: The Assertion entity.
+            monitor: The Monitor entity.
+        Returns:
+            A SmartColumnMetricAssertion instance.
+        """
+        return cls(
+            urn=assertion.urn,
+            dataset_urn=assertion.dataset,
+            column_name=cls._get_column_name(assertion),
+            metric_type=cls._get_metric_type(assertion),
+            operator=cls._get_operator(assertion),
+            value=cls._get_value(assertion),
+            value_type=cls._get_value_type(assertion),
+            range=cls._get_range(assertion),
+            range_type=cls._get_range_type(assertion),
+            display_name=assertion.description or "",
+            mode=cls._get_mode(monitor),
+            schedule=cls._get_schedule(monitor),
+            sensitivity=cls._get_sensitivity(monitor),
+            exclusion_windows=cls._get_exclusion_windows(monitor),
+            training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
+            incident_behavior=cls._get_incident_behavior(assertion),
+            detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
+            tags=cls._get_tags(assertion),
+            created_by=cls._get_created_by(assertion),
+            created_at=cls._get_created_at(assertion),
+            updated_by=cls._get_updated_by(assertion),
+            updated_at=cls._get_updated_at(assertion),
+        )
+    @staticmethod
+    def _get_detection_mechanism(
+        assertion: Assertion,
+        monitor: Monitor,
+        default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
+    ) -> Optional[_DetectionMechanismTypes]:
+        """Get the detection mechanism for column metric assertions."""
+        parameters = _AssertionPublic._get_validated_detection_context(
+            monitor,
+            assertion,
+            models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
+            models.FieldAssertionInfoClass,
+            default,
+        )
+        if parameters is None:
+            return default
+        if parameters.datasetFieldParameters is None:
+            logger.warning(
+                f"Monitor does not have datasetFieldParameters, defaulting detection mechanism to {default}"
+            )
+            return default
+        source_type = parameters.datasetFieldParameters.sourceType
+        if source_type == models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY:
+            additional_filter = _AssertionPublic._get_additional_filter(assertion)
+            return DetectionMechanism.ALL_ROWS_QUERY(
+                additional_filter=additional_filter
+            )
+        elif (
+            source_type
+            == models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
+        ):
+            if parameters.datasetFieldParameters.changedRowsField is None:
+                logger.warning(
+                    f"Monitor has CHANGED_ROWS_QUERY source type but no changedRowsField, defaulting detection mechanism to {default}"
+                )
+                return default
+            column_name = parameters.datasetFieldParameters.changedRowsField.path
+            additional_filter = _AssertionPublic._get_additional_filter(assertion)
+            return DetectionMechanism.CHANGED_ROWS_QUERY(
+                column_name=column_name, additional_filter=additional_filter
+            )
+        elif (
+            source_type
+            == models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
+        ):
+            return DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
+        else:
+            logger.warning(
+                f"Unsupported DatasetFieldAssertionSourceType {source_type}, defaulting detection mechanism to {default}"
+            )
+            return default

acryl_datahub_cloud/sdk/assertion/types.py ADDED Viewed

@@ -0,0 +1,20 @@
+from typing import Union
+from acryl_datahub_cloud.sdk.assertion.assertion_base import (
+    FreshnessAssertion,
+    SmartFreshnessAssertion,
+    SmartVolumeAssertion,
+    SqlAssertion,
+)
+from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
+    SmartColumnMetricAssertion,
+)
+AssertionTypes = Union[
+    SmartFreshnessAssertion,
+    SmartVolumeAssertion,
+    FreshnessAssertion,
+    SmartColumnMetricAssertion,
+    SqlAssertion,
+    # TODO: Add other assertion types here as we add them.
+]

acryl_datahub_cloud/sdk/assertion_input/assertion_input.py CHANGED Viewed

@@ -44,7 +44,7 @@ DEFAULT_NAME_PREFIX = "New Assertion"
 DEFAULT_NAME_SUFFIX_LENGTH = 8
-DEFAULT_HOURLY_SCHEDULE = models.CronScheduleClass(
+DEFAULT_HOURLY_SCHEDULE: models.CronScheduleClass = models.CronScheduleClass(
     cron="0 * * * *",  # Every hour, matches the UI default
     timezone=str(
         tzlocal.get_localzone()
@@ -59,6 +59,13 @@ DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
     ),  # User local timezone, matches the UI default
 )
+DEFAULT_EVERY_SIX_HOURS_SCHEDULE = models.CronScheduleClass(
+    cron="0 */6 * * *",  # Every 6 hours, matches the UI default
+    timezone=str(
+        tzlocal.get_localzone()
+    ),  # User local timezone, matches the UI default
+)
 class AbstractDetectionMechanism(BaseModel, ABC):
     type: str
@@ -135,6 +142,40 @@ class _DatasetProfile(AbstractDetectionMechanism):
     type: Literal["dataset_profile"] = "dataset_profile"
+# Operators that require a single value numeric parameter
+SINGLE_VALUE_NUMERIC_OPERATORS = [
+    models.AssertionStdOperatorClass.EQUAL_TO,
+    models.AssertionStdOperatorClass.NOT_EQUAL_TO,
+    models.AssertionStdOperatorClass.GREATER_THAN,
+    models.AssertionStdOperatorClass.LESS_THAN,
+    models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
+    models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
+]
+# Operators that require a single value parameter
+SINGLE_VALUE_OPERATORS = [
+    models.AssertionStdOperatorClass.CONTAIN,
+    models.AssertionStdOperatorClass.END_WITH,
+    models.AssertionStdOperatorClass.START_WITH,
+    models.AssertionStdOperatorClass.REGEX_MATCH,
+    models.AssertionStdOperatorClass.IN,
+    models.AssertionStdOperatorClass.NOT_IN,
+] + SINGLE_VALUE_NUMERIC_OPERATORS
+# Operators that require a numeric range parameter
+RANGE_OPERATORS = [
+    models.AssertionStdOperatorClass.BETWEEN,
+]
+# Operators that require no parameters
+NO_PARAMETER_OPERATORS = [
+    models.AssertionStdOperatorClass.NULL,
+    models.AssertionStdOperatorClass.NOT_NULL,
+    models.AssertionStdOperatorClass.IS_TRUE,
+    models.AssertionStdOperatorClass.IS_FALSE,
+]
 # Keep these two lists in sync:
 _DETECTION_MECHANISM_CONCRETE_TYPES = (
     _InformationSchema,
@@ -181,7 +222,7 @@ class DetectionMechanism:
     HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
     DATAHUB_OPERATION = _DataHubOperation()
     QUERY = _Query
-    ALL_ROWS_QUERY = _AllRowsQuery()
+    ALL_ROWS_QUERY = _AllRowsQuery
     CHANGED_ROWS_QUERY = _ChangedRowsQuery
     ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
     DATASET_PROFILE = _DatasetProfile()
@@ -710,7 +751,7 @@ def _try_parse_and_validate_schema_classes_enum(
     if isinstance(value, enum_class):
         return value
     assert isinstance(value, str)
-    if value not in get_enum_options(enum_class):
+    if value.upper() not in get_enum_options(enum_class):
         raise SDKUsageError(
             f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
         )
@@ -1089,15 +1130,12 @@ class _AssertionInput(ABC):
         Returns:
             A Monitor entity configured with the assertion input parameters.
         """
-        source_type, field = self._convert_assertion_source_type_and_field()
         return Monitor(
             id=(self.dataset_urn, assertion_urn),
             info=self._create_monitor_info(
                 assertion_urn=assertion_urn,
                 status=self._convert_monitor_status(),
                 schedule=self._convert_schedule(),
-                source_type=source_type,
-                field=field,
             ),
         )
@@ -1175,8 +1213,6 @@ class _AssertionInput(ABC):
         assertion_urn: AssertionUrn,
         status: models.MonitorStatusClass,
         schedule: models.CronScheduleClass,
-        source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
-        field: Optional[FieldSpecType],
     ) -> models.MonitorInfoClass:
         """
         Create a MonitorInfoClass with all the necessary components.
@@ -1184,8 +1220,6 @@ class _AssertionInput(ABC):
         Args:
             status: The monitor status.
             schedule: The monitor schedule.
-            source_type: The source type.
-            field: Optional field specification.
         Returns:
             A MonitorInfoClass configured with all the provided components.
         """
@@ -1425,12 +1459,11 @@ class _SmartFreshnessAssertionInput(
         assertion_urn: AssertionUrn,
         status: models.MonitorStatusClass,
         schedule: models.CronScheduleClass,
-        source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
-        field: Optional[FieldSpecType],
     ) -> models.MonitorInfoClass:
         """
         Create a MonitorInfoClass with all the necessary components.
         """
+        source_type, field = self._convert_assertion_source_type_and_field()
         return models.MonitorInfoClass(
             type=models.MonitorTypeClass.ASSERTION,
             status=status,
@@ -1584,12 +1617,11 @@ class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
         assertion_urn: AssertionUrn,
         status: models.MonitorStatusClass,
         schedule: models.CronScheduleClass,
-        source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
-        field: Optional[FieldSpecType],
     ) -> models.MonitorInfoClass:
         """
         Create a MonitorInfoClass with all the necessary components.
         """
+        source_type, field = self._convert_assertion_source_type_and_field()
         return models.MonitorInfoClass(
             type=models.MonitorTypeClass.ASSERTION,
             status=status,

acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py CHANGED Viewed

@@ -112,12 +112,11 @@ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
         assertion_urn: AssertionUrn,
         status: models.MonitorStatusClass,
         schedule: models.CronScheduleClass,
-        source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
-        field: Optional[FieldSpecType],
     ) -> models.MonitorInfoClass:
         """
         Create a MonitorInfoClass with all the necessary components.
         """
+        source_type, field = self._convert_assertion_source_type_and_field()
         return models.MonitorInfoClass(
             type=models.MonitorTypeClass.ASSERTION,
             status=status,

acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py CHANGED Viewed

@@ -3,11 +3,13 @@ from datetime import datetime
 from typing import Optional, Union
 from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
-    DEFAULT_HOURLY_SCHEDULE,
+    DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
     HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
+    NO_PARAMETER_OPERATORS,
+    RANGE_OPERATORS,
+    SINGLE_VALUE_OPERATORS,
     AssertionIncidentBehavior,
     AssertionInfoInputType,
-    DetectionMechanism,
     DetectionMechanismInputTypes,
     ExclusionWindowInputTypes,
     FieldSpecType,
@@ -86,35 +88,6 @@ FIELD_VALUES_OPERATOR_CONFIG = {
     ],
 }
-# Operators that require a single value parameter
-SINGLE_VALUE_OPERATORS = [
-    models.AssertionStdOperatorClass.EQUAL_TO,
-    models.AssertionStdOperatorClass.NOT_EQUAL_TO,
-    models.AssertionStdOperatorClass.GREATER_THAN,
-    models.AssertionStdOperatorClass.LESS_THAN,
-    models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
-    models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
-    models.AssertionStdOperatorClass.CONTAIN,
-    models.AssertionStdOperatorClass.END_WITH,
-    models.AssertionStdOperatorClass.START_WITH,
-    models.AssertionStdOperatorClass.REGEX_MATCH,
-    models.AssertionStdOperatorClass.IN,
-    models.AssertionStdOperatorClass.NOT_IN,
-]
-# Operators that require a range parameter
-RANGE_OPERATORS = [
-    models.AssertionStdOperatorClass.BETWEEN,
-]
-# Operators that require no parameters
-NO_PARAMETER_OPERATORS = [
-    models.AssertionStdOperatorClass.NULL,
-    models.AssertionStdOperatorClass.NOT_NULL,
-    models.AssertionStdOperatorClass.IS_TRUE,
-    models.AssertionStdOperatorClass.IS_FALSE,
-]
 # Keep this in sync with FIELD_METRIC_TYPE_CONFIG in the frontend
 # datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
 FIELD_METRIC_TYPE_CONFIG = {
@@ -183,8 +156,8 @@ RangeTypeInputType = Union[
 RangeTypeParsedType = tuple[ValueTypeInputType, ValueTypeInputType]
 OperatorInputType = Union[str, models.AssertionStdOperatorClass]
-DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION = (
-    DetectionMechanism.ALL_ROWS_QUERY
+DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION: _AllRowsQuery = (
+    _AllRowsQuery()
 )
@@ -443,12 +416,11 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
         assertion_urn: AssertionUrn,
         status: models.MonitorStatusClass,
         schedule: models.CronScheduleClass,
-        source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
-        field: Optional[FieldSpecType],
     ) -> models.MonitorInfoClass:
         """
         Create a MonitorInfoClass with all the necessary components.
         """
+        source_type, field = self._convert_assertion_source_type_and_field()
         return models.MonitorInfoClass(
             type=models.MonitorTypeClass.ASSERTION,
             status=status,
@@ -512,7 +484,7 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
             A CronScheduleClass with appropriate schedule settings.
         """
         if self.schedule is None:
-            return DEFAULT_HOURLY_SCHEDULE
+            return DEFAULT_EVERY_SIX_HOURS_SCHEDULE
         return models.CronScheduleClass(
             cron=self.schedule.cron,
@@ -815,13 +787,10 @@ def _try_parse_and_validate_value_type(
 ) -> models.AssertionStdParameterTypeClass:
     if value_type is None:
         raise SDKUsageError("Value type is required")
-    if isinstance(value_type, models.AssertionStdParameterTypeClass):
-        return value_type
-    if value_type not in get_enum_options(models.AssertionStdParameterTypeClass):
-        raise SDKUsageError(
-            f"Invalid value type: {value_type}, valid options are {get_enum_options(models.AssertionStdParameterTypeClass)}"
-        )
-    return getattr(models.AssertionStdParameterTypeClass, value_type)
+    return _try_parse_and_validate_schema_classes_enum(
+        value_type, models.AssertionStdParameterTypeClass
+    )
 def _try_parse_and_validate_value(

acryl-datahub-cloud 0.3.12rc4__py3-none-any.whl → 0.3.12rc6__py3-none-any.whl

Potentially problematic release.

acryl-datahub-cloud 0.3.12rc4py3-none-any.whl → 0.3.12rc6py3-none-any.whl