PyPI - acryl-datahub-cloud - Versions diffs - 0.3.12rc1__py3-none-any.whl → 0.3.12rc4__py3-none-any.whl - Mend - Supply Chain Defender

acryl-datahub-cloud 0.3.12rc1py3-none-any.whl → 0.3.12rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (74) hide show

acryl_datahub_cloud/{_sdk_extras → sdk/assertion_input}/assertion_input.py RENAMED Viewed

@@ -6,22 +6,26 @@ validate and represent the input for creating an Assertion in DataHub.
 import random
 import string
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
-from typing import Literal, Optional, TypeAlias, Union
+from typing import Callable, Literal, Optional, Type, TypeAlias, TypeVar, Union
 import pydantic
+import pytz
+import tzlocal
 from avrogen.dict_wrapper import DictWrapper
+from croniter import croniter
 from pydantic import BaseModel, Extra, ValidationError
-from acryl_datahub_cloud._sdk_extras.entities.assertion import (
+from acryl_datahub_cloud.sdk.entities.assertion import (
     Assertion,
     AssertionActionsInputType,
     AssertionInfoInputType,
     TagsInputType,
 )
-from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
-from acryl_datahub_cloud._sdk_extras.errors import (
+from acryl_datahub_cloud.sdk.entities.monitor import Monitor
+from acryl_datahub_cloud.sdk.errors import (
     SDKNotYetSupportedError,
     SDKUsageError,
     SDKUsageErrorWithExamples,
@@ -40,6 +44,22 @@ DEFAULT_NAME_PREFIX = "New Assertion"
 DEFAULT_NAME_SUFFIX_LENGTH = 8
+DEFAULT_HOURLY_SCHEDULE = models.CronScheduleClass(
+    cron="0 * * * *",  # Every hour, matches the UI default
+    timezone=str(
+        tzlocal.get_localzone()
+    ),  # User local timezone, matches the UI default
+)
+DEFAULT_SCHEDULE: models.CronScheduleClass = DEFAULT_HOURLY_SCHEDULE
+DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
+    cron="0 0 * * *",  # Every day at midnight, matches the UI default
+    timezone=str(
+        tzlocal.get_localzone()
+    ),  # User local timezone, matches the UI default
+)
 class AbstractDetectionMechanism(BaseModel, ABC):
     type: str
@@ -85,6 +105,36 @@ class _DataHubOperation(AbstractDetectionMechanism):
     type: Literal["datahub_operation"] = "datahub_operation"
+class _Query(AbstractDetectionMechanism):
+    # COUNT(*) query
+    type: Literal["query"] = "query"
+    additional_filter: Optional[str] = None
+class _AllRowsQuery(AbstractDetectionMechanism):
+    # For column-based assertions, this is the default detection mechanism.
+    type: Literal["all_rows_query"] = "all_rows_query"
+    additional_filter: Optional[str] = None
+class _AllRowsQueryDataHubDatasetProfile(AbstractDetectionMechanism):
+    # Used for column-based assertions.
+    type: Literal["all_rows_query_datahub_dataset_profile"] = (
+        "all_rows_query_datahub_dataset_profile"
+    )
+class _ChangedRowsQuery(AbstractDetectionMechanism):
+    # Used for column-based assertions.
+    type: Literal["changed_rows_query"] = "changed_rows_query"
+    column_name: str
+    additional_filter: Optional[str] = None
+class _DatasetProfile(AbstractDetectionMechanism):
+    type: Literal["dataset_profile"] = "dataset_profile"
 # Keep these two lists in sync:
 _DETECTION_MECHANISM_CONCRETE_TYPES = (
     _InformationSchema,
@@ -92,6 +142,11 @@ _DETECTION_MECHANISM_CONCRETE_TYPES = (
     _LastModifiedColumn,
     _HighWatermarkColumn,
     _DataHubOperation,
+    _Query,
+    _DatasetProfile,
+    _AllRowsQuery,
+    _ChangedRowsQuery,
+    _AllRowsQueryDataHubDatasetProfile,
 )
 _DetectionMechanismTypes = Union[
     _InformationSchema,
@@ -99,8 +154,23 @@ _DetectionMechanismTypes = Union[
     _LastModifiedColumn,
     _HighWatermarkColumn,
     _DataHubOperation,
+    _Query,
+    _DatasetProfile,
+    _AllRowsQuery,
+    _ChangedRowsQuery,
+    _AllRowsQueryDataHubDatasetProfile,
 ]
+_DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
+    _LastModifiedColumn,
+    _HighWatermarkColumn,
+    _Query,
+    _AllRowsQuery,
+    _ChangedRowsQuery,
+)
+DEFAULT_DETECTION_MECHANISM: _DetectionMechanismTypes = _InformationSchema()
 class DetectionMechanism:
     # To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
@@ -110,6 +180,11 @@ class DetectionMechanism:
     LAST_MODIFIED_COLUMN = _LastModifiedColumn
     HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
     DATAHUB_OPERATION = _DataHubOperation()
+    QUERY = _Query
+    ALL_ROWS_QUERY = _AllRowsQuery()
+    CHANGED_ROWS_QUERY = _ChangedRowsQuery
+    ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
+    DATASET_PROFILE = _DatasetProfile()
     _DETECTION_MECHANISM_EXAMPLES = {
         "Information Schema from string": "information_schema",
@@ -130,6 +205,26 @@ class DetectionMechanism:
         "High Watermark Column from DetectionMechanism": "DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id', additional_filter='id > 1000')",
         "DataHub Operation from string": "datahub_operation",
         "DataHub Operation from DetectionMechanism": "DetectionMechanism.DATAHUB_OPERATION",
+        "Query from string": "query",
+        "Query from dict": {
+            "type": "query",
+            "additional_filter": "id > 1000",
+        },
+        "Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
+        "Dataset Profile from string": "dataset_profile",
+        "Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
+        "All Rows Query from string": "all_rows_query",
+        "All Rows Query from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY",
+        "All Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.ALL_ROWS_QUERY(additional_filter='id > 1000')",
+        "Changed Rows Query from dict (with optional additional filter)": {
+            "type": "changed_rows_query",
+            "column_name": "id",
+            "additional_filter": "id > 1000",
+        },
+        "Changed Rows Query from DetectionMechanism": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id')",
+        "Changed Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id', additional_filter='id > 1000')",
+        "All Rows Query DataHub Dataset Profile from string": "all_rows_query_datahub_dataset_profile",
+        "All Rows Query DataHub Dataset Profile from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE",
     }
     @staticmethod
@@ -137,9 +232,10 @@ class DetectionMechanism:
         detection_mechanism_config: Optional[
             Union[str, dict[str, str], _DetectionMechanismTypes]
         ] = None,
+        default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
     ) -> _DetectionMechanismTypes:
         if detection_mechanism_config is None:
-            return DEFAULT_DETECTION_MECHANISM
+            return default_detection_mechanism
         if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
             return detection_mechanism_config
         elif isinstance(detection_mechanism_config, str):
@@ -220,8 +316,6 @@ class DetectionMechanism:
             ) from e
-DEFAULT_DETECTION_MECHANISM = DetectionMechanism.INFORMATION_SCHEMA
 DetectionMechanismInputTypes: TypeAlias = Union[
     str, dict[str, str], _DetectionMechanismTypes, None
 ]
@@ -288,7 +382,59 @@ class InferenceSensitivity(Enum):
         }[sensitivity]
-DEFAULT_SENSITIVITY = InferenceSensitivity.MEDIUM
+DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
+TIME_WINDOW_SIZE_EXAMPLES = {
+    "Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
+    "Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
+}
+class CalendarInterval(Enum):
+    MINUTE = "MINUTE"
+    HOUR = "HOUR"
+    DAY = "DAY"
+class TimeWindowSize(BaseModel):
+    unit: Union[CalendarInterval, str]
+    multiple: int
+TimeWindowSizeInputTypes: TypeAlias = Union[
+    models.TimeWindowSizeClass,
+    models.FixedIntervalScheduleClass,
+    TimeWindowSize,
+]
+def _try_parse_time_window_size(
+    config: TimeWindowSizeInputTypes,
+) -> models.TimeWindowSizeClass:
+    if isinstance(config, models.TimeWindowSizeClass):
+        return config
+    elif isinstance(config, models.FixedIntervalScheduleClass):
+        return models.TimeWindowSizeClass(
+            unit=_try_parse_and_validate_schema_classes_enum(
+                config.unit, models.CalendarIntervalClass
+            ),
+            multiple=config.multiple,
+        )
+    elif isinstance(config, TimeWindowSize):
+        return models.TimeWindowSizeClass(
+            unit=_try_parse_and_validate_schema_classes_enum(
+                _try_parse_and_validate_schema_classes_enum(
+                    config.unit, CalendarInterval
+                ).value,
+                models.CalendarIntervalClass,
+            ),
+            multiple=config.multiple,
+        )
+    else:
+        raise SDKUsageErrorWithExamples(
+            msg=f"Invalid time window size: {config}",
+            examples=TIME_WINDOW_SIZE_EXAMPLES,
+        )
 class FixedRangeExclusionWindow(BaseModel):
@@ -496,6 +642,219 @@ def _try_parse_training_data_lookback_days(
     return training_data_lookback_days
+def _validate_cron_schedule(schedule: str, timezone: str) -> None:
+    """We are using the POSIX.1-2017 standard for cron expressions.
+    Note: We are using the croniter library for cron parsing which is different from executor, which uses apscheduler, so there is a risk of mismatch here.
+    """
+    try:
+        # Validate timezone - pytz.timezone() raises UnknownTimeZoneError for invalid timezones
+        # Skip timezone validation when empty
+        if timezone:
+            pytz.timezone(timezone)
+        # Validate 5-field cron expression only (POSIX.1-2017 standard)
+        fields = schedule.strip().split()
+        if len(fields) != 5:
+            raise ValueError("POSIX.1-2017 requires exactly 5 fields")
+        # POSIX.1-2017 specific validation: Sunday must be 0, not 7
+        # However croniter accepts 7 as Sunday, so custom check is needed here.
+        # Check the day-of-week field (5th field, index 4)
+        dow_field = fields[4]
+        if "7" in dow_field:
+            # Check if 7 appears as a standalone value or in ranges
+            import re
+            # Match 7 as standalone, in lists, or in ranges
+            if re.search(r"\b7\b|7-|,7,|^7,|,7$|-7\b", dow_field):
+                raise ValueError(
+                    "POSIX.1-2017 standard: Sunday must be represented as 0, not 7"
+                )
+        # Validate cron expression - croniter constructor validates the expression
+        croniter(schedule)
+    except Exception as e:
+        raise SDKUsageError(
+            f"Invalid cron expression or timezone: {schedule} {timezone}, please use a POSIX.1-2017 compatible cron expression and timezone."
+        ) from e
+def _try_parse_schedule(
+    schedule: Optional[Union[str, models.CronScheduleClass]],
+) -> Optional[models.CronScheduleClass]:
+    if schedule is None:
+        return None
+    if isinstance(schedule, str):
+        _validate_cron_schedule(schedule, "UTC")
+        return models.CronScheduleClass(
+            cron=schedule,
+            timezone="UTC",
+        )
+    if isinstance(schedule, models.CronScheduleClass):
+        _validate_cron_schedule(schedule.cron, schedule.timezone)
+        return schedule
+FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
+T = TypeVar("T")
+def _try_parse_and_validate_schema_classes_enum(
+    value: Union[str, T],
+    enum_class: Type[T],
+) -> T:
+    if isinstance(value, enum_class):
+        return value
+    assert isinstance(value, str)
+    if value not in get_enum_options(enum_class):
+        raise SDKUsageError(
+            f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
+        )
+    return getattr(enum_class, value.upper())
+@dataclass(frozen=True)
+class DatasetSourceType:
+    """
+    DatasetSourceType is used to represent a dataset source type.
+    It is used to check if a source type is valid for a dataset type and assertion type.
+    Args:
+        source_type: The source type (e.g. information schema, field value, etc. aka detection mechanism)
+        platform: The platform of the dataset as a string OR "all" for all platforms.
+        assertion_type: The assertion type as a models.AssertionTypeClass string e.g. models.AssertionTypeClass.FRESHNESS OR "all" for all assertion types.
+    Example:
+    DatasetSourceType(
+        source_type=_InformationSchema,
+        platform="databricks",
+        assertion_type="all",
+    )
+    This means that the source type _InformationSchema is invalid for the dataset type "databricks" and assertion type "all".
+    "all" in this example means that the source type is invalid for all assertion types.
+    """
+    source_type: Type[_DetectionMechanismTypes]
+    platform: str
+    assertion_type: Union[models.AssertionTypeClass, str]
+INVALID_SOURCE_TYPES = {
+    # Add exceptions here if a source type (detection mechanism) is invalid for a dataset type and assertion type.
+    DatasetSourceType(
+        source_type=_InformationSchema,
+        platform="databricks",
+        assertion_type="all",
+    )
+}
+def _is_source_type_valid(
+    dataset_source_type: DatasetSourceType,
+    invalid_source_types: set[DatasetSourceType] = INVALID_SOURCE_TYPES,
+) -> bool:
+    for invalid in invalid_source_types:
+        if invalid.source_type == dataset_source_type.source_type:
+            # If both platform and assertion type are "all", the source type is invalid for all combinations
+            if invalid.platform == "all" and invalid.assertion_type == "all":
+                return False
+            # If platform matches and assertion type is "all", the source type is invalid for all assertion types on that platform
+            if (
+                invalid.platform == dataset_source_type.platform
+                and invalid.assertion_type == "all"
+            ):
+                return False
+            # If platform is "all" and assertion type matches, the source type is invalid for all platforms for that assertion type
+            if (
+                invalid.platform == "all"
+                and invalid.assertion_type == dataset_source_type.assertion_type
+            ):
+                return False
+            # If both platform and assertion type match exactly, the source type is invalid
+            if (
+                invalid.platform == dataset_source_type.platform
+                and invalid.assertion_type == dataset_source_type.assertion_type
+            ):
+                return False
+    return True
+class _HasSmartAssertionInputs:
+    """
+    A class that contains the common inputs for smart assertions.
+    This is used to avoid code duplication in the smart assertion inputs.
+    Args:
+        sensitivity: The sensitivity to be applied to the assertion.
+        exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
+        training_data_lookback_days: The training data lookback days to be applied to the assertion.
+    """
+    def __init__(
+        self,
+        *,
+        sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
+        exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
+        training_data_lookback_days: Optional[int] = None,
+    ):
+        self.sensitivity = InferenceSensitivity.parse(sensitivity)
+        self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
+        self.training_data_lookback_days = _try_parse_training_data_lookback_days(
+            training_data_lookback_days
+        )
+    def _convert_exclusion_windows(
+        self,
+    ) -> list[models.AssertionExclusionWindowClass]:
+        """
+        Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
+        Returns:
+            A list of AssertionExclusionWindowClass objects.
+        Raises:
+            SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
+        """
+        exclusion_windows: list[models.AssertionExclusionWindowClass] = []
+        if self.exclusion_windows:
+            for window in self.exclusion_windows:
+                if not isinstance(window, FixedRangeExclusionWindow):
+                    raise SDKUsageErrorWithExamples(
+                        msg=f"Invalid exclusion window type: {window}",
+                        examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
+                    )
+                # To match the UI, we generate a display name for the exclusion window.
+                # See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
+                # Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
+                generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
+                exclusion_windows.append(
+                    models.AssertionExclusionWindowClass(
+                        type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE,  # Currently only fixed range is supported
+                        displayName=generated_display_name,
+                        fixedRange=models.AbsoluteTimeWindowClass(
+                            startTimeMillis=make_ts_millis(window.start),
+                            endTimeMillis=make_ts_millis(window.end),
+                        ),
+                    )
+                )
+        return exclusion_windows
+    def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
+        """
+        Convert sensitivity into an AssertionMonitorSensitivityClass.
+        Returns:
+            An AssertionMonitorSensitivityClass with the appropriate sensitivity.
+        """
+        return models.AssertionMonitorSensitivityClass(
+            level=InferenceSensitivity.to_int(self.sensitivity),
+        )
 class _AssertionInput(ABC):
     def __init__(
         self,
@@ -509,10 +868,8 @@ class _AssertionInput(ABC):
         ] = None,  # Can be None if the assertion is not yet created
         display_name: Optional[str] = None,
         enabled: bool = True,
+        schedule: Optional[Union[str, models.CronScheduleClass]] = None,
         detection_mechanism: DetectionMechanismInputTypes = None,
-        sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
-        exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
-        training_data_lookback_days: Optional[int] = None,
         incident_behavior: Optional[
             Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
         ] = None,
@@ -522,6 +879,7 @@ class _AssertionInput(ABC):
         created_at: datetime,
         updated_by: Union[str, CorpUserUrn],
         updated_at: datetime,
+        default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
     ):
         """
         Create an AssertionInput object.
@@ -533,9 +891,6 @@ class _AssertionInput(ABC):
             display_name: The display name of the assertion. If not provided, a random display name will be generated.
             enabled: Whether the assertion is enabled. Defaults to True.
             detection_mechanism: The detection mechanism to be used for the assertion.
-            sensitivity: The sensitivity to be applied to the assertion.
-            exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
-            training_data_lookback_days: The training data lookback days to be applied to the assertion.
             incident_behavior: The incident behavior to be applied to the assertion.
             tags: The tags to be applied to the assertion.
             source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
@@ -553,13 +908,20 @@ class _AssertionInput(ABC):
             else _generate_default_name(DEFAULT_NAME_PREFIX, DEFAULT_NAME_SUFFIX_LENGTH)
         )
         self.enabled = enabled
-        self.detection_mechanism = DetectionMechanism.parse(detection_mechanism)
-        self.sensitivity = InferenceSensitivity.parse(sensitivity)
-        self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
-        self.training_data_lookback_days = _try_parse_training_data_lookback_days(
-            training_data_lookback_days
+        self.schedule = _try_parse_schedule(schedule)
+        self.detection_mechanism = DetectionMechanism.parse(
+            detection_mechanism, default_detection_mechanism
         )
+        if not _is_source_type_valid(
+            DatasetSourceType(
+                source_type=type(self.detection_mechanism),
+                platform=self.dataset_urn.platform,
+                assertion_type=self._assertion_type(),
+            )
+        ):
+            raise SDKUsageError(
+                f"Invalid source type: {self.detection_mechanism} for dataset type: {self.dataset_urn.platform} and assertion type: {self._assertion_type()}"
+            )
         self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
         self.tags = tags
         if source_type not in get_enum_options(models.AssertionSourceTypeClass):
@@ -571,7 +933,6 @@ class _AssertionInput(ABC):
         self.created_at = created_at
         self.updated_by = updated_by
         self.updated_at = updated_at
         self.cached_dataset: Optional[Dataset] = None
     def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
@@ -656,10 +1017,7 @@ class _AssertionInput(ABC):
         """
         if not isinstance(
             self.detection_mechanism,
-            (
-                DetectionMechanism.LAST_MODIFIED_COLUMN,
-                DetectionMechanism.HIGH_WATERMARK_COLUMN,
-            ),
+            _DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER,
         ):
             return None
@@ -672,12 +1030,6 @@ class _AssertionInput(ABC):
             sql=additional_filter,
         )
-    @abstractmethod
-    def _create_assertion_info(
-        self, filter: Optional[models.DatasetFilterClass]
-    ) -> AssertionInfoInputType:
-        pass
     def _convert_tags(self) -> Optional[TagsInputType]:
         """
         Convert the tags input into a standardized format.
@@ -746,8 +1098,6 @@ class _AssertionInput(ABC):
                 schedule=self._convert_schedule(),
                 source_type=source_type,
                 field=field,
-                sensitivity=self._convert_sensitivity(),
-                exclusion_windows=self._convert_exclusion_windows(),
             ),
         )
@@ -764,86 +1114,69 @@ class _AssertionInput(ABC):
             else models.MonitorModeClass.INACTIVE,
         )
-    def _convert_exclusion_windows(
-        self,
-    ) -> list[models.AssertionExclusionWindowClass]:
+    def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
         """
-        Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
+        Get the schema field spec for the detection mechanism if needed.
+        """
+        # Only fetch the dataset if it's not already cached.
+        # Also we only fetch the dataset if it's needed for the detection mechanism.
+        if self.cached_dataset is None:
+            self.cached_dataset = self.entity_client.get(self.dataset_urn)
-        Returns:
-            A list of AssertionExclusionWindowClass objects.
+        # Handle case where dataset doesn't exist
+        if self.cached_dataset is None:
+            raise SDKUsageError(
+                f"Dataset {self.dataset_urn} not found. Cannot validate column {column_name}."
+            )
-        Raises:
-            SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
-        """
-        exclusion_windows: list[models.AssertionExclusionWindowClass] = []
-        if self.exclusion_windows:
-            for window in self.exclusion_windows:
-                if not isinstance(window, FixedRangeExclusionWindow):
-                    raise SDKUsageErrorWithExamples(
-                        msg=f"Invalid exclusion window type: {window}",
-                        examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
-                    )
-                # To match the UI, we generate a display name for the exclusion window.
-                # See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
-                # Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
-                generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
-                exclusion_windows.append(
-                    models.AssertionExclusionWindowClass(
-                        type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE,  # Currently only fixed range is supported
-                        displayName=generated_display_name,
-                        fixedRange=models.AbsoluteTimeWindowClass(
-                            startTimeMillis=make_ts_millis(window.start),
-                            endTimeMillis=make_ts_millis(window.end),
-                        ),
-                    )
-                )
-        return exclusion_windows
+        # TODO: Make a public accessor for _schema_dict in the SDK
+        schema_fields = self.cached_dataset._schema_dict()
+        field = schema_fields.get(column_name)
+        if field:
+            return models.SchemaFieldSpecClass(
+                path=field.fieldPath,
+                type=field.type.type.__class__.__name__,
+                nativeType=field.nativeDataType,
+            )
+        else:
+            raise SDKUsageError(
+                msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
+            )
-    @abstractmethod
-    def _convert_assertion_source_type_and_field(
+    def _validate_field_type(
         self,
-    ) -> tuple[str, Optional[models.FreshnessFieldSpecClass]]:
+        field_spec: models.SchemaFieldSpecClass,
+        column_name: str,
+        allowed_types: list[DictWrapper],
+        field_type_name: str,
+    ) -> None:
         """
-        Convert detection mechanism into source type and field specification for freshness assertions.
+        Validate that a field has an allowed type.
-        Returns:
-            A tuple of (source_type, field) where field may be None.
-            Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass since
-            the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
+        Args:
+            field_spec: The field specification to validate
+            column_name: The name of the column for error messages
+            allowed_types: List of allowed field types
+            field_type_name: Human-readable name of the field type for error messages
         Raises:
-            SDKNotYetSupportedError: If the detection mechanism is not supported.
-            SDKUsageError: If the field (column) is not found in the dataset,
-            and the detection mechanism requires a field. Also if the field
-            is not an allowed type for the detection mechanism.
+            SDKUsageError: If the field has an invalid type
         """
-        pass
+        allowed_type_names = [t.__class__.__name__ for t in allowed_types]
+        if field_spec.type not in allowed_type_names:
+            raise SDKUsageError(
+                msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
+                f"Allowed types are {allowed_type_names}.",
+            )
     @abstractmethod
-    def _convert_schedule(self) -> models.CronScheduleClass:
-        pass
-    def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
-        """
-        Convert sensitivity into an AssertionMonitorSensitivityClass.
-        Returns:
-            An AssertionMonitorSensitivityClass with the appropriate sensitivity.
-        """
-        return models.AssertionMonitorSensitivityClass(
-            level=InferenceSensitivity.to_int(self.sensitivity),
-        )
     def _create_monitor_info(
         self,
         assertion_urn: AssertionUrn,
         status: models.MonitorStatusClass,
         schedule: models.CronScheduleClass,
         source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
-        field: Optional[models.FreshnessFieldSpecClass],
-        sensitivity: models.AssertionMonitorSensitivityClass,
-        exclusion_windows: list[models.AssertionExclusionWindowClass],
+        field: Optional[FieldSpecType],
     ) -> models.MonitorInfoClass:
         """
         Create a MonitorInfoClass with all the necessary components.
@@ -851,71 +1184,94 @@ class _AssertionInput(ABC):
         Args:
             status: The monitor status.
             schedule: The monitor schedule.
-            source_type: The freshness source type.
+            source_type: The source type.
             field: Optional field specification.
-            sensitivity: The monitor sensitivity.
-            exclusion_windows: List of exclusion windows.
         Returns:
             A MonitorInfoClass configured with all the provided components.
         """
-        return models.MonitorInfoClass(
-            type=models.MonitorTypeClass.ASSERTION,
-            status=status,
-            assertionMonitor=models.AssertionMonitorClass(
-                assertions=[
-                    models.AssertionEvaluationSpecClass(
-                        assertion=str(assertion_urn),
-                        schedule=schedule,
-                        parameters=models.AssertionEvaluationParametersClass(
-                            type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
-                            datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
-                                sourceType=source_type,
-                                field=field,
-                            ),
-                        ),
-                    )
-                ],
-                settings=models.AssertionMonitorSettingsClass(
-                    adjustmentSettings=models.AssertionAdjustmentSettingsClass(
-                        sensitivity=sensitivity,
-                        exclusionWindows=exclusion_windows,
-                        trainingDataLookbackWindowDays=self.training_data_lookback_days,
-                    ),
-                ),
-            ),
-        )
+        pass
-    def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
-        """
-        Get the schema field spec for the detection mechanism if needed.
+    @abstractmethod
+    def _assertion_type(self) -> str:
+        """Get the assertion type."""
+        pass
+    @abstractmethod
+    def _create_assertion_info(
+        self, filter: Optional[models.DatasetFilterClass]
+    ) -> AssertionInfoInputType:
+        """Create assertion info specific to the assertion type."""
+        pass
+    @abstractmethod
+    def _convert_schedule(self) -> models.CronScheduleClass:
+        """Convert schedule to appropriate format for the assertion type."""
+        pass
+    @abstractmethod
+    def _get_assertion_evaluation_parameters(
+        self, source_type: str, field: Optional[FieldSpecType]
+    ) -> models.AssertionEvaluationParametersClass:
+        """Get evaluation parameters specific to the assertion type."""
+        pass
+    @abstractmethod
+    def _convert_assertion_source_type_and_field(
+        self,
+    ) -> tuple[str, Optional[FieldSpecType]]:
+        """Convert detection mechanism to source type and field spec."""
+        pass
+class _HasFreshnessFeatures:
+    def _create_field_spec(
+        self,
+        column_name: str,
+        allowed_types: list[DictWrapper],  # TODO: Use the type from the PDL
+        field_type_name: str,
+        kind: str,
+        get_schema_field_spec: Callable[[str], models.SchemaFieldSpecClass],
+        validate_field_type: Callable[
+            [models.SchemaFieldSpecClass, str, list[DictWrapper], str], None
+        ],
+    ) -> models.FreshnessFieldSpecClass:
         """
-        # Only fetch the dataset if it's not already cached.
-        # Also we only fetch the dataset if it's needed for the detection mechanism.
-        if self.cached_dataset is None:
-            self.cached_dataset = self.entity_client.get(self.dataset_urn)
+        Create a field specification for a column, validating its type.
-        # TODO: Make a public accessor for _schema_dict in the SDK
-        schema_fields = self.cached_dataset._schema_dict()
-        field = schema_fields.get(column_name)
-        if field:
-            return models.SchemaFieldSpecClass(
-                path=field.fieldPath,
-                type=field.type.type.__class__.__name__,
-                nativeType=field.nativeDataType,
-            )
-        else:
+        Args:
+            column_name: The name of the column to create a spec for
+            allowed_types: List of allowed field types
+            field_type_name: Human-readable name of the field type for error messages
+            kind: The kind of field to create
+        Returns:
+            A FreshnessFieldSpecClass for the column
+        Raises:
+            SDKUsageError: If the column is not found or has an invalid type
+        """
+        SUPPORTED_KINDS = [
+            models.FreshnessFieldKindClass.LAST_MODIFIED,
+            models.FreshnessFieldKindClass.HIGH_WATERMARK,
+        ]
+        if kind not in SUPPORTED_KINDS:
             raise SDKUsageError(
-                msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
+                msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
             )
+        field_spec = get_schema_field_spec(column_name)
+        validate_field_type(field_spec, column_name, allowed_types, field_type_name)
+        return models.FreshnessFieldSpecClass(
+            path=field_spec.path,
+            type=field_spec.type,
+            nativeType=field_spec.nativeType,
+            kind=kind,
+        )
-class _SmartFreshnessAssertionInput(_AssertionInput):
-    DEFAULT_SCHEDULE = models.CronScheduleClass(
-        cron="0 0 * * *",
-        timezone="UTC",
-    )
+class _SmartFreshnessAssertionInput(
+    _AssertionInput, _HasSmartAssertionInputs, _HasFreshnessFeatures
+):
     def __init__(
         self,
         *,
@@ -926,6 +1282,7 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
         urn: Optional[Union[str, AssertionUrn]] = None,
         display_name: Optional[str] = None,
         enabled: bool = True,
+        schedule: Optional[Union[str, models.CronScheduleClass]] = None,
         detection_mechanism: DetectionMechanismInputTypes = None,
         sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
         exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
@@ -939,16 +1296,17 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
         updated_by: Union[str, CorpUserUrn],
         updated_at: datetime,
     ):
-        super().__init__(
+        _AssertionInput.__init__(
+            self,
             dataset_urn=dataset_urn,
             entity_client=entity_client,
             urn=urn,
             display_name=display_name,
             enabled=enabled,
+            schedule=schedule
+            if schedule is not None
+            else DEFAULT_HOURLY_SCHEDULE,  # Use provided schedule or default for create case
             detection_mechanism=detection_mechanism,
-            sensitivity=sensitivity,
-            exclusion_windows=exclusion_windows,
-            training_data_lookback_days=training_data_lookback_days,
             incident_behavior=incident_behavior,
             tags=tags,
             source_type=models.AssertionSourceTypeClass.INFERRED,  # Smart assertions are of type inferred, not native
@@ -957,6 +1315,16 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
             updated_by=updated_by,
             updated_at=updated_at,
         )
+        _HasSmartAssertionInputs.__init__(
+            self,
+            sensitivity=sensitivity,
+            exclusion_windows=exclusion_windows,
+            training_data_lookback_days=training_data_lookback_days,
+        )
+    def _assertion_type(self) -> str:
+        """Get the assertion type."""
+        return models.AssertionTypeClass.FRESHNESS
     def _create_assertion_info(
         self, filter: Optional[models.DatasetFilterClass]
@@ -973,29 +1341,51 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
         return models.FreshnessAssertionInfoClass(
             type=models.FreshnessAssertionTypeClass.DATASET_CHANGE,  # Currently only dataset change is supported
             entity=str(self.dataset_urn),
-            # schedule (optional, not used for smart freshness assertions)
+            # schedule (optional, must be left empty for smart freshness assertions - managed by the AI inference engine)
             filter=filter,
         )
     def _convert_schedule(self) -> models.CronScheduleClass:
         """Create a schedule for a smart freshness assertion.
-        Since the schedule is not used for smart freshness assertions, we return a default schedule.
+        For create case, uses DEFAULT_HOURLY_SCHEDULE. For update case, preserves existing schedule.
         Returns:
             A CronScheduleClass with appropriate schedule settings.
         """
-        return self.DEFAULT_SCHEDULE
+        assert self.schedule is not None, (
+            "Schedule should never be None due to constructor logic"
+        )
+        return self.schedule
+    def _get_assertion_evaluation_parameters(
+        self, source_type: str, field: Optional[FieldSpecType]
+    ) -> models.AssertionEvaluationParametersClass:
+        # Ensure field is either None or FreshnessFieldSpecClass
+        freshness_field = None
+        if field is not None:
+            if not isinstance(field, models.FreshnessFieldSpecClass):
+                raise SDKUsageError(
+                    f"Expected FreshnessFieldSpecClass for freshness assertion, got {type(field).__name__}"
+                )
+            freshness_field = field
+        return models.AssertionEvaluationParametersClass(
+            type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
+            datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
+                sourceType=source_type, field=freshness_field
+            ),
+        )
     def _convert_assertion_source_type_and_field(
         self,
-    ) -> tuple[str, Optional[models.FreshnessFieldSpecClass]]:
+    ) -> tuple[str, Optional[FieldSpecType]]:
         """
         Convert detection mechanism into source type and field specification for freshness assertions.
         Returns:
             A tuple of (source_type, field) where field may be None.
-            Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass since
+            Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
             the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
         Raises:
@@ -1014,6 +1404,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
                 LAST_MODIFIED_ALLOWED_FIELD_TYPES,
                 "last modified column",
                 models.FreshnessFieldKindClass.LAST_MODIFIED,
+                self._get_schema_field_spec,
+                self._validate_field_type,
             )
         elif isinstance(self.detection_mechanism, _InformationSchema):
             source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
@@ -1028,47 +1420,199 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
         return source_type, field
-    def _create_field_spec(
+    def _create_monitor_info(
         self,
-        column_name: str,
-        allowed_types: list[DictWrapper],  # TODO: Use the type from the PDL
-        field_type_name: str,
-        kind: str,
-    ) -> models.FreshnessFieldSpecClass:
+        assertion_urn: AssertionUrn,
+        status: models.MonitorStatusClass,
+        schedule: models.CronScheduleClass,
+        source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
+        field: Optional[FieldSpecType],
+    ) -> models.MonitorInfoClass:
         """
-        Create a field specification for a column, validating its type.
+        Create a MonitorInfoClass with all the necessary components.
+        """
+        return models.MonitorInfoClass(
+            type=models.MonitorTypeClass.ASSERTION,
+            status=status,
+            assertionMonitor=models.AssertionMonitorClass(
+                assertions=[
+                    models.AssertionEvaluationSpecClass(
+                        assertion=str(assertion_urn),
+                        schedule=schedule,
+                        parameters=self._get_assertion_evaluation_parameters(
+                            str(source_type), field
+                        ),
+                    ),
+                ],
+                settings=models.AssertionMonitorSettingsClass(
+                    adjustmentSettings=models.AssertionAdjustmentSettingsClass(
+                        sensitivity=self._convert_sensitivity(),
+                        exclusionWindows=self._convert_exclusion_windows(),
+                        trainingDataLookbackWindowDays=self.training_data_lookback_days,
+                    ),
+                ),
+            ),
+        )
+class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
+    def __init__(
+        self,
+        *,
+        # Required fields
+        dataset_urn: Union[str, DatasetUrn],
+        entity_client: EntityClient,  # Needed to get the schema field spec for the detection mechanism if needed
+        # Optional fields
+        urn: Optional[Union[str, AssertionUrn]] = None,
+        display_name: Optional[str] = None,
+        enabled: bool = True,
+        schedule: Optional[Union[str, models.CronScheduleClass]] = None,
+        detection_mechanism: DetectionMechanismInputTypes = None,
+        sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
+        exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
+        training_data_lookback_days: Optional[int] = None,
+        incident_behavior: Optional[
+            Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
+        ] = None,
+        tags: Optional[TagsInputType] = None,
+        created_by: Union[str, CorpUserUrn],
+        created_at: datetime,
+        updated_by: Union[str, CorpUserUrn],
+        updated_at: datetime,
+    ):
+        _AssertionInput.__init__(
+            self,
+            dataset_urn=dataset_urn,
+            entity_client=entity_client,
+            urn=urn,
+            display_name=display_name,
+            enabled=enabled,
+            schedule=schedule,
+            detection_mechanism=detection_mechanism,
+            incident_behavior=incident_behavior,
+            tags=tags,
+            source_type=models.AssertionSourceTypeClass.INFERRED,  # Smart assertions are of type inferred, not native
+            created_by=created_by,
+            created_at=created_at,
+            updated_by=updated_by,
+            updated_at=updated_at,
+        )
+        _HasSmartAssertionInputs.__init__(
+            self,
+            sensitivity=sensitivity,
+            exclusion_windows=exclusion_windows,
+            training_data_lookback_days=training_data_lookback_days,
+        )
+    def _create_assertion_info(
+        self, filter: Optional[models.DatasetFilterClass]
+    ) -> AssertionInfoInputType:
+        """
+        Create a VolumeAssertionInfoClass for a smart volume assertion.
         Args:
-            column_name: The name of the column to create a spec for
-            allowed_types: List of allowed field types
-            field_type_name: Human-readable name of the field type for error messages
-            kind: The kind of field to create
+            filter: Optional filter to apply to the assertion.
         Returns:
-            A FreshnessFieldSpecClass for the column
+            A VolumeAssertionInfoClass configured for smart volume.
+        """
+        return models.VolumeAssertionInfoClass(
+            type=models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL,  # Currently only ROW_COUNT_TOTAL is supported for smart volume
+            entity=str(self.dataset_urn),
+            filter=filter,
+        )
+    def _convert_schedule(self) -> models.CronScheduleClass:
+        """Create a schedule for a smart volume assertion.
+        Returns:
+            A CronScheduleClass with appropriate schedule settings.
+        """
+        if self.schedule is None:
+            return DEFAULT_HOURLY_SCHEDULE
+        return models.CronScheduleClass(
+            cron=self.schedule.cron,
+            timezone=self.schedule.timezone,
+        )
+    def _get_assertion_evaluation_parameters(
+        self, source_type: str, field: Optional[FieldSpecType]
+    ) -> models.AssertionEvaluationParametersClass:
+        return models.AssertionEvaluationParametersClass(
+            type=models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
+            datasetVolumeParameters=models.DatasetVolumeAssertionParametersClass(
+                sourceType=source_type,
+            ),
+        )
+    def _convert_assertion_source_type_and_field(
+        self,
+    ) -> tuple[str, Optional[FieldSpecType]]:
+        """
+        Convert detection mechanism into source type and field specification for volume assertions.
+        Returns:
+            A tuple of (source_type, field) where field may be None.
+            Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
+            the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
         Raises:
-            SDKUsageError: If the column is not found or has an invalid type
+            SDKNotYetSupportedError: If the detection mechanism is not supported.
+            SDKUsageError: If the field (column) is not found in the dataset,
+            and the detection mechanism requires a field. Also if the field
+            is not an allowed type for the detection mechanism.
         """
-        SUPPORTED_KINDS = [
-            models.FreshnessFieldKindClass.LAST_MODIFIED,
-            models.FreshnessFieldKindClass.HIGH_WATERMARK,
-        ]
-        if kind not in SUPPORTED_KINDS:
-            raise SDKUsageError(
-                msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
-            )
+        source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
+        field = None
-        field_spec = self._get_schema_field_spec(column_name)
-        allowed_type_names = [t.__class__.__name__ for t in allowed_types]
-        if field_spec.type not in allowed_type_names:
-            raise SDKUsageError(
-                msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
-                f"Allowed types are {allowed_type_names}.",
+        if isinstance(self.detection_mechanism, _Query):
+            source_type = models.DatasetVolumeSourceTypeClass.QUERY
+        elif isinstance(self.detection_mechanism, _InformationSchema):
+            source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
+        elif isinstance(self.detection_mechanism, _DatasetProfile):
+            source_type = models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE
+        else:
+            raise SDKNotYetSupportedError(
+                f"Detection mechanism {self.detection_mechanism} not yet supported for smart volume assertions"
             )
-        return models.FreshnessFieldSpecClass(
-            path=field_spec.path,
-            type=field_spec.type,
-            nativeType=field_spec.nativeType,
-            kind=kind,
+        return source_type, field
+    def _create_monitor_info(
+        self,
+        assertion_urn: AssertionUrn,
+        status: models.MonitorStatusClass,
+        schedule: models.CronScheduleClass,
+        source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
+        field: Optional[FieldSpecType],
+    ) -> models.MonitorInfoClass:
+        """
+        Create a MonitorInfoClass with all the necessary components.
+        """
+        return models.MonitorInfoClass(
+            type=models.MonitorTypeClass.ASSERTION,
+            status=status,
+            assertionMonitor=models.AssertionMonitorClass(
+                assertions=[
+                    models.AssertionEvaluationSpecClass(
+                        assertion=str(assertion_urn),
+                        schedule=schedule,
+                        parameters=self._get_assertion_evaluation_parameters(
+                            str(source_type), field
+                        ),
+                    ),
+                ],
+                settings=models.AssertionMonitorSettingsClass(
+                    adjustmentSettings=models.AssertionAdjustmentSettingsClass(
+                        sensitivity=self._convert_sensitivity(),
+                        exclusionWindows=self._convert_exclusion_windows(),
+                        trainingDataLookbackWindowDays=self.training_data_lookback_days,
+                    ),
+                ),
+            ),
         )
+    def _assertion_type(self) -> str:
+        """Get the assertion type."""
+        return models.AssertionTypeClass.VOLUME