acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -6,9 +6,10 @@ validate and represent the input for creating an Assertion in DataHub.
6
6
  import random
7
7
  import string
8
8
  from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
9
10
  from datetime import datetime
10
11
  from enum import Enum
11
- from typing import Literal, Optional, TypeAlias, Union
12
+ from typing import Callable, Literal, Optional, Type, TypeAlias, TypeVar, Union
12
13
 
13
14
  import pydantic
14
15
  import pytz
@@ -42,12 +43,21 @@ ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS = 60
42
43
  DEFAULT_NAME_PREFIX = "New Assertion"
43
44
  DEFAULT_NAME_SUFFIX_LENGTH = 8
44
45
 
45
- DEFAULT_SCHEDULE = models.CronScheduleClass(
46
+
47
+ DEFAULT_HOURLY_SCHEDULE = models.CronScheduleClass(
46
48
  cron="0 * * * *", # Every hour, matches the UI default
47
49
  timezone=str(
48
50
  tzlocal.get_localzone()
49
51
  ), # User local timezone, matches the UI default
50
52
  )
53
+ DEFAULT_SCHEDULE: models.CronScheduleClass = DEFAULT_HOURLY_SCHEDULE
54
+
55
+ DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
56
+ cron="0 0 * * *", # Every day at midnight, matches the UI default
57
+ timezone=str(
58
+ tzlocal.get_localzone()
59
+ ), # User local timezone, matches the UI default
60
+ )
51
61
 
52
62
 
53
63
  class AbstractDetectionMechanism(BaseModel, ABC):
@@ -101,6 +111,26 @@ class _Query(AbstractDetectionMechanism):
101
111
  additional_filter: Optional[str] = None
102
112
 
103
113
 
114
+ class _AllRowsQuery(AbstractDetectionMechanism):
115
+ # For column-based assertions, this is the default detection mechanism.
116
+ type: Literal["all_rows_query"] = "all_rows_query"
117
+ additional_filter: Optional[str] = None
118
+
119
+
120
+ class _AllRowsQueryDataHubDatasetProfile(AbstractDetectionMechanism):
121
+ # Used for column-based assertions.
122
+ type: Literal["all_rows_query_datahub_dataset_profile"] = (
123
+ "all_rows_query_datahub_dataset_profile"
124
+ )
125
+
126
+
127
+ class _ChangedRowsQuery(AbstractDetectionMechanism):
128
+ # Used for column-based assertions.
129
+ type: Literal["changed_rows_query"] = "changed_rows_query"
130
+ column_name: str
131
+ additional_filter: Optional[str] = None
132
+
133
+
104
134
  class _DatasetProfile(AbstractDetectionMechanism):
105
135
  type: Literal["dataset_profile"] = "dataset_profile"
106
136
 
@@ -114,6 +144,9 @@ _DETECTION_MECHANISM_CONCRETE_TYPES = (
114
144
  _DataHubOperation,
115
145
  _Query,
116
146
  _DatasetProfile,
147
+ _AllRowsQuery,
148
+ _ChangedRowsQuery,
149
+ _AllRowsQueryDataHubDatasetProfile,
117
150
  )
118
151
  _DetectionMechanismTypes = Union[
119
152
  _InformationSchema,
@@ -123,14 +156,21 @@ _DetectionMechanismTypes = Union[
123
156
  _DataHubOperation,
124
157
  _Query,
125
158
  _DatasetProfile,
159
+ _AllRowsQuery,
160
+ _ChangedRowsQuery,
161
+ _AllRowsQueryDataHubDatasetProfile,
126
162
  ]
127
163
 
128
164
  _DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
129
165
  _LastModifiedColumn,
130
166
  _HighWatermarkColumn,
131
167
  _Query,
168
+ _AllRowsQuery,
169
+ _ChangedRowsQuery,
132
170
  )
133
171
 
172
+ DEFAULT_DETECTION_MECHANISM: _DetectionMechanismTypes = _InformationSchema()
173
+
134
174
 
135
175
  class DetectionMechanism:
136
176
  # To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
@@ -141,6 +181,9 @@ class DetectionMechanism:
141
181
  HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
142
182
  DATAHUB_OPERATION = _DataHubOperation()
143
183
  QUERY = _Query
184
+ ALL_ROWS_QUERY = _AllRowsQuery()
185
+ CHANGED_ROWS_QUERY = _ChangedRowsQuery
186
+ ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
144
187
  DATASET_PROFILE = _DatasetProfile()
145
188
 
146
189
  _DETECTION_MECHANISM_EXAMPLES = {
@@ -170,6 +213,18 @@ class DetectionMechanism:
170
213
  "Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
171
214
  "Dataset Profile from string": "dataset_profile",
172
215
  "Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
216
+ "All Rows Query from string": "all_rows_query",
217
+ "All Rows Query from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY",
218
+ "All Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.ALL_ROWS_QUERY(additional_filter='id > 1000')",
219
+ "Changed Rows Query from dict (with optional additional filter)": {
220
+ "type": "changed_rows_query",
221
+ "column_name": "id",
222
+ "additional_filter": "id > 1000",
223
+ },
224
+ "Changed Rows Query from DetectionMechanism": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id')",
225
+ "Changed Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id', additional_filter='id > 1000')",
226
+ "All Rows Query DataHub Dataset Profile from string": "all_rows_query_datahub_dataset_profile",
227
+ "All Rows Query DataHub Dataset Profile from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE",
173
228
  }
174
229
 
175
230
  @staticmethod
@@ -177,9 +232,10 @@ class DetectionMechanism:
177
232
  detection_mechanism_config: Optional[
178
233
  Union[str, dict[str, str], _DetectionMechanismTypes]
179
234
  ] = None,
235
+ default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
180
236
  ) -> _DetectionMechanismTypes:
181
237
  if detection_mechanism_config is None:
182
- return DEFAULT_DETECTION_MECHANISM
238
+ return default_detection_mechanism
183
239
  if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
184
240
  return detection_mechanism_config
185
241
  elif isinstance(detection_mechanism_config, str):
@@ -260,8 +316,6 @@ class DetectionMechanism:
260
316
  ) from e
261
317
 
262
318
 
263
- DEFAULT_DETECTION_MECHANISM = DetectionMechanism.INFORMATION_SCHEMA
264
-
265
319
  DetectionMechanismInputTypes: TypeAlias = Union[
266
320
  str, dict[str, str], _DetectionMechanismTypes, None
267
321
  ]
@@ -328,7 +382,59 @@ class InferenceSensitivity(Enum):
328
382
  }[sensitivity]
329
383
 
330
384
 
331
- DEFAULT_SENSITIVITY = InferenceSensitivity.MEDIUM
385
+ DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
386
+
387
+ TIME_WINDOW_SIZE_EXAMPLES = {
388
+ "Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
389
+ "Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
390
+ }
391
+
392
+
393
+ class CalendarInterval(Enum):
394
+ MINUTE = "MINUTE"
395
+ HOUR = "HOUR"
396
+ DAY = "DAY"
397
+
398
+
399
+ class TimeWindowSize(BaseModel):
400
+ unit: Union[CalendarInterval, str]
401
+ multiple: int
402
+
403
+
404
+ TimeWindowSizeInputTypes: TypeAlias = Union[
405
+ models.TimeWindowSizeClass,
406
+ models.FixedIntervalScheduleClass,
407
+ TimeWindowSize,
408
+ ]
409
+
410
+
411
+ def _try_parse_time_window_size(
412
+ config: TimeWindowSizeInputTypes,
413
+ ) -> models.TimeWindowSizeClass:
414
+ if isinstance(config, models.TimeWindowSizeClass):
415
+ return config
416
+ elif isinstance(config, models.FixedIntervalScheduleClass):
417
+ return models.TimeWindowSizeClass(
418
+ unit=_try_parse_and_validate_schema_classes_enum(
419
+ config.unit, models.CalendarIntervalClass
420
+ ),
421
+ multiple=config.multiple,
422
+ )
423
+ elif isinstance(config, TimeWindowSize):
424
+ return models.TimeWindowSizeClass(
425
+ unit=_try_parse_and_validate_schema_classes_enum(
426
+ _try_parse_and_validate_schema_classes_enum(
427
+ config.unit, CalendarInterval
428
+ ).value,
429
+ models.CalendarIntervalClass,
430
+ ),
431
+ multiple=config.multiple,
432
+ )
433
+ else:
434
+ raise SDKUsageErrorWithExamples(
435
+ msg=f"Invalid time window size: {config}",
436
+ examples=TIME_WINDOW_SIZE_EXAMPLES,
437
+ )
332
438
 
333
439
 
334
440
  class FixedRangeExclusionWindow(BaseModel):
@@ -594,6 +700,161 @@ def _try_parse_schedule(
594
700
  FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
595
701
 
596
702
 
703
+ T = TypeVar("T")
704
+
705
+
706
+ def _try_parse_and_validate_schema_classes_enum(
707
+ value: Union[str, T],
708
+ enum_class: Type[T],
709
+ ) -> T:
710
+ if isinstance(value, enum_class):
711
+ return value
712
+ assert isinstance(value, str)
713
+ if value not in get_enum_options(enum_class):
714
+ raise SDKUsageError(
715
+ f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
716
+ )
717
+ return getattr(enum_class, value.upper())
718
+
719
+
720
+ @dataclass(frozen=True)
721
+ class DatasetSourceType:
722
+ """
723
+ DatasetSourceType is used to represent a dataset source type.
724
+ It is used to check if a source type is valid for a dataset type and assertion type.
725
+
726
+ Args:
727
+ source_type: The source type (e.g. information schema, field value, etc. aka detection mechanism)
728
+ platform: The platform of the dataset as a string OR "all" for all platforms.
729
+ assertion_type: The assertion type as a models.AssertionTypeClass string e.g. models.AssertionTypeClass.FRESHNESS OR "all" for all assertion types.
730
+
731
+ Example:
732
+ DatasetSourceType(
733
+ source_type=_InformationSchema,
734
+ platform="databricks",
735
+ assertion_type="all",
736
+ )
737
+ This means that the source type _InformationSchema is invalid for the dataset type "databricks" and assertion type "all".
738
+ "all" in this example means that the source type is invalid for all assertion types.
739
+ """
740
+
741
+ source_type: Type[_DetectionMechanismTypes]
742
+ platform: str
743
+ assertion_type: Union[models.AssertionTypeClass, str]
744
+
745
+
746
+ INVALID_SOURCE_TYPES = {
747
+ # Add exceptions here if a source type (detection mechanism) is invalid for a dataset type and assertion type.
748
+ DatasetSourceType(
749
+ source_type=_InformationSchema,
750
+ platform="databricks",
751
+ assertion_type="all",
752
+ )
753
+ }
754
+
755
+
756
+ def _is_source_type_valid(
757
+ dataset_source_type: DatasetSourceType,
758
+ invalid_source_types: set[DatasetSourceType] = INVALID_SOURCE_TYPES,
759
+ ) -> bool:
760
+ for invalid in invalid_source_types:
761
+ if invalid.source_type == dataset_source_type.source_type:
762
+ # If both platform and assertion type are "all", the source type is invalid for all combinations
763
+ if invalid.platform == "all" and invalid.assertion_type == "all":
764
+ return False
765
+ # If platform matches and assertion type is "all", the source type is invalid for all assertion types on that platform
766
+ if (
767
+ invalid.platform == dataset_source_type.platform
768
+ and invalid.assertion_type == "all"
769
+ ):
770
+ return False
771
+ # If platform is "all" and assertion type matches, the source type is invalid for all platforms for that assertion type
772
+ if (
773
+ invalid.platform == "all"
774
+ and invalid.assertion_type == dataset_source_type.assertion_type
775
+ ):
776
+ return False
777
+ # If both platform and assertion type match exactly, the source type is invalid
778
+ if (
779
+ invalid.platform == dataset_source_type.platform
780
+ and invalid.assertion_type == dataset_source_type.assertion_type
781
+ ):
782
+ return False
783
+ return True
784
+
785
+
786
+ class _HasSmartAssertionInputs:
787
+ """
788
+ A class that contains the common inputs for smart assertions.
789
+ This is used to avoid code duplication in the smart assertion inputs.
790
+
791
+ Args:
792
+ sensitivity: The sensitivity to be applied to the assertion.
793
+ exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
794
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
795
+ """
796
+
797
+ def __init__(
798
+ self,
799
+ *,
800
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
801
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
802
+ training_data_lookback_days: Optional[int] = None,
803
+ ):
804
+ self.sensitivity = InferenceSensitivity.parse(sensitivity)
805
+ self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
806
+ self.training_data_lookback_days = _try_parse_training_data_lookback_days(
807
+ training_data_lookback_days
808
+ )
809
+
810
+ def _convert_exclusion_windows(
811
+ self,
812
+ ) -> list[models.AssertionExclusionWindowClass]:
813
+ """
814
+ Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
815
+
816
+ Returns:
817
+ A list of AssertionExclusionWindowClass objects.
818
+
819
+ Raises:
820
+ SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
821
+ """
822
+ exclusion_windows: list[models.AssertionExclusionWindowClass] = []
823
+ if self.exclusion_windows:
824
+ for window in self.exclusion_windows:
825
+ if not isinstance(window, FixedRangeExclusionWindow):
826
+ raise SDKUsageErrorWithExamples(
827
+ msg=f"Invalid exclusion window type: {window}",
828
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
829
+ )
830
+ # To match the UI, we generate a display name for the exclusion window.
831
+ # See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
832
+ # Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
833
+ generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
834
+ exclusion_windows.append(
835
+ models.AssertionExclusionWindowClass(
836
+ type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
837
+ displayName=generated_display_name,
838
+ fixedRange=models.AbsoluteTimeWindowClass(
839
+ startTimeMillis=make_ts_millis(window.start),
840
+ endTimeMillis=make_ts_millis(window.end),
841
+ ),
842
+ )
843
+ )
844
+ return exclusion_windows
845
+
846
+ def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
847
+ """
848
+ Convert sensitivity into an AssertionMonitorSensitivityClass.
849
+
850
+ Returns:
851
+ An AssertionMonitorSensitivityClass with the appropriate sensitivity.
852
+ """
853
+ return models.AssertionMonitorSensitivityClass(
854
+ level=InferenceSensitivity.to_int(self.sensitivity),
855
+ )
856
+
857
+
597
858
  class _AssertionInput(ABC):
598
859
  def __init__(
599
860
  self,
@@ -609,9 +870,6 @@ class _AssertionInput(ABC):
609
870
  enabled: bool = True,
610
871
  schedule: Optional[Union[str, models.CronScheduleClass]] = None,
611
872
  detection_mechanism: DetectionMechanismInputTypes = None,
612
- sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
613
- exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
614
- training_data_lookback_days: Optional[int] = None,
615
873
  incident_behavior: Optional[
616
874
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
617
875
  ] = None,
@@ -621,6 +879,7 @@ class _AssertionInput(ABC):
621
879
  created_at: datetime,
622
880
  updated_by: Union[str, CorpUserUrn],
623
881
  updated_at: datetime,
882
+ default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
624
883
  ):
625
884
  """
626
885
  Create an AssertionInput object.
@@ -632,9 +891,6 @@ class _AssertionInput(ABC):
632
891
  display_name: The display name of the assertion. If not provided, a random display name will be generated.
633
892
  enabled: Whether the assertion is enabled. Defaults to True.
634
893
  detection_mechanism: The detection mechanism to be used for the assertion.
635
- sensitivity: The sensitivity to be applied to the assertion.
636
- exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
637
- training_data_lookback_days: The training data lookback days to be applied to the assertion.
638
894
  incident_behavior: The incident behavior to be applied to the assertion.
639
895
  tags: The tags to be applied to the assertion.
640
896
  source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
@@ -653,12 +909,19 @@ class _AssertionInput(ABC):
653
909
  )
654
910
  self.enabled = enabled
655
911
  self.schedule = _try_parse_schedule(schedule)
656
- self.detection_mechanism = DetectionMechanism.parse(detection_mechanism)
657
- self.sensitivity = InferenceSensitivity.parse(sensitivity)
658
- self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
659
- self.training_data_lookback_days = _try_parse_training_data_lookback_days(
660
- training_data_lookback_days
912
+ self.detection_mechanism = DetectionMechanism.parse(
913
+ detection_mechanism, default_detection_mechanism
661
914
  )
915
+ if not _is_source_type_valid(
916
+ DatasetSourceType(
917
+ source_type=type(self.detection_mechanism),
918
+ platform=self.dataset_urn.platform,
919
+ assertion_type=self._assertion_type(),
920
+ )
921
+ ):
922
+ raise SDKUsageError(
923
+ f"Invalid source type: {self.detection_mechanism} for dataset type: {self.dataset_urn.platform} and assertion type: {self._assertion_type()}"
924
+ )
662
925
  self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
663
926
  self.tags = tags
664
927
  if source_type not in get_enum_options(models.AssertionSourceTypeClass):
@@ -670,7 +933,6 @@ class _AssertionInput(ABC):
670
933
  self.created_at = created_at
671
934
  self.updated_by = updated_by
672
935
  self.updated_at = updated_at
673
-
674
936
  self.cached_dataset: Optional[Dataset] = None
675
937
 
676
938
  def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
@@ -836,8 +1098,6 @@ class _AssertionInput(ABC):
836
1098
  schedule=self._convert_schedule(),
837
1099
  source_type=source_type,
838
1100
  field=field,
839
- sensitivity=self._convert_sensitivity(),
840
- exclusion_windows=self._convert_exclusion_windows(),
841
1101
  ),
842
1102
  )
843
1103
 
@@ -854,53 +1114,6 @@ class _AssertionInput(ABC):
854
1114
  else models.MonitorModeClass.INACTIVE,
855
1115
  )
856
1116
 
857
- def _convert_exclusion_windows(
858
- self,
859
- ) -> list[models.AssertionExclusionWindowClass]:
860
- """
861
- Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
862
-
863
- Returns:
864
- A list of AssertionExclusionWindowClass objects.
865
-
866
- Raises:
867
- SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
868
- """
869
- exclusion_windows: list[models.AssertionExclusionWindowClass] = []
870
- if self.exclusion_windows:
871
- for window in self.exclusion_windows:
872
- if not isinstance(window, FixedRangeExclusionWindow):
873
- raise SDKUsageErrorWithExamples(
874
- msg=f"Invalid exclusion window type: {window}",
875
- examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
876
- )
877
- # To match the UI, we generate a display name for the exclusion window.
878
- # See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
879
- # Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
880
- generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
881
- exclusion_windows.append(
882
- models.AssertionExclusionWindowClass(
883
- type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
884
- displayName=generated_display_name,
885
- fixedRange=models.AbsoluteTimeWindowClass(
886
- startTimeMillis=make_ts_millis(window.start),
887
- endTimeMillis=make_ts_millis(window.end),
888
- ),
889
- )
890
- )
891
- return exclusion_windows
892
-
893
- def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
894
- """
895
- Convert sensitivity into an AssertionMonitorSensitivityClass.
896
-
897
- Returns:
898
- An AssertionMonitorSensitivityClass with the appropriate sensitivity.
899
- """
900
- return models.AssertionMonitorSensitivityClass(
901
- level=InferenceSensitivity.to_int(self.sensitivity),
902
- )
903
-
904
1117
  def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
905
1118
  """
906
1119
  Get the schema field spec for the detection mechanism if needed.
@@ -956,6 +1169,7 @@ class _AssertionInput(ABC):
956
1169
  f"Allowed types are {allowed_type_names}.",
957
1170
  )
958
1171
 
1172
+ @abstractmethod
959
1173
  def _create_monitor_info(
960
1174
  self,
961
1175
  assertion_urn: AssertionUrn,
@@ -963,8 +1177,6 @@ class _AssertionInput(ABC):
963
1177
  schedule: models.CronScheduleClass,
964
1178
  source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
965
1179
  field: Optional[FieldSpecType],
966
- sensitivity: models.AssertionMonitorSensitivityClass,
967
- exclusion_windows: list[models.AssertionExclusionWindowClass],
968
1180
  ) -> models.MonitorInfoClass:
969
1181
  """
970
1182
  Create a MonitorInfoClass with all the necessary components.
@@ -974,34 +1186,15 @@ class _AssertionInput(ABC):
974
1186
  schedule: The monitor schedule.
975
1187
  source_type: The source type.
976
1188
  field: Optional field specification.
977
- sensitivity: The monitor sensitivity.
978
- exclusion_windows: List of exclusion windows.
979
-
980
1189
  Returns:
981
1190
  A MonitorInfoClass configured with all the provided components.
982
1191
  """
983
- return models.MonitorInfoClass(
984
- type=models.MonitorTypeClass.ASSERTION,
985
- status=status,
986
- assertionMonitor=models.AssertionMonitorClass(
987
- assertions=[
988
- models.AssertionEvaluationSpecClass(
989
- assertion=str(assertion_urn),
990
- schedule=schedule,
991
- parameters=self._get_assertion_evaluation_parameters(
992
- str(source_type), field
993
- ),
994
- )
995
- ],
996
- settings=models.AssertionMonitorSettingsClass(
997
- adjustmentSettings=models.AssertionAdjustmentSettingsClass(
998
- sensitivity=sensitivity,
999
- exclusionWindows=exclusion_windows,
1000
- trainingDataLookbackWindowDays=self.training_data_lookback_days,
1001
- ),
1002
- ),
1003
- ),
1004
- )
1192
+ pass
1193
+
1194
+ @abstractmethod
1195
+ def _assertion_type(self) -> str:
1196
+ """Get the assertion type."""
1197
+ pass
1005
1198
 
1006
1199
  @abstractmethod
1007
1200
  def _create_assertion_info(
@@ -1030,7 +1223,55 @@ class _AssertionInput(ABC):
1030
1223
  pass
1031
1224
 
1032
1225
 
1033
- class _SmartFreshnessAssertionInput(_AssertionInput):
1226
+ class _HasFreshnessFeatures:
1227
+ def _create_field_spec(
1228
+ self,
1229
+ column_name: str,
1230
+ allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
1231
+ field_type_name: str,
1232
+ kind: str,
1233
+ get_schema_field_spec: Callable[[str], models.SchemaFieldSpecClass],
1234
+ validate_field_type: Callable[
1235
+ [models.SchemaFieldSpecClass, str, list[DictWrapper], str], None
1236
+ ],
1237
+ ) -> models.FreshnessFieldSpecClass:
1238
+ """
1239
+ Create a field specification for a column, validating its type.
1240
+
1241
+ Args:
1242
+ column_name: The name of the column to create a spec for
1243
+ allowed_types: List of allowed field types
1244
+ field_type_name: Human-readable name of the field type for error messages
1245
+ kind: The kind of field to create
1246
+
1247
+ Returns:
1248
+ A FreshnessFieldSpecClass for the column
1249
+
1250
+ Raises:
1251
+ SDKUsageError: If the column is not found or has an invalid type
1252
+ """
1253
+ SUPPORTED_KINDS = [
1254
+ models.FreshnessFieldKindClass.LAST_MODIFIED,
1255
+ models.FreshnessFieldKindClass.HIGH_WATERMARK,
1256
+ ]
1257
+ if kind not in SUPPORTED_KINDS:
1258
+ raise SDKUsageError(
1259
+ msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
1260
+ )
1261
+
1262
+ field_spec = get_schema_field_spec(column_name)
1263
+ validate_field_type(field_spec, column_name, allowed_types, field_type_name)
1264
+ return models.FreshnessFieldSpecClass(
1265
+ path=field_spec.path,
1266
+ type=field_spec.type,
1267
+ nativeType=field_spec.nativeType,
1268
+ kind=kind,
1269
+ )
1270
+
1271
+
1272
+ class _SmartFreshnessAssertionInput(
1273
+ _AssertionInput, _HasSmartAssertionInputs, _HasFreshnessFeatures
1274
+ ):
1034
1275
  def __init__(
1035
1276
  self,
1036
1277
  *,
@@ -1055,7 +1296,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1055
1296
  updated_by: Union[str, CorpUserUrn],
1056
1297
  updated_at: datetime,
1057
1298
  ):
1058
- super().__init__(
1299
+ _AssertionInput.__init__(
1300
+ self,
1059
1301
  dataset_urn=dataset_urn,
1060
1302
  entity_client=entity_client,
1061
1303
  urn=urn,
@@ -1063,11 +1305,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1063
1305
  enabled=enabled,
1064
1306
  schedule=schedule
1065
1307
  if schedule is not None
1066
- else DEFAULT_SCHEDULE, # Use provided schedule or default for create case
1308
+ else DEFAULT_HOURLY_SCHEDULE, # Use provided schedule or default for create case
1067
1309
  detection_mechanism=detection_mechanism,
1068
- sensitivity=sensitivity,
1069
- exclusion_windows=exclusion_windows,
1070
- training_data_lookback_days=training_data_lookback_days,
1071
1310
  incident_behavior=incident_behavior,
1072
1311
  tags=tags,
1073
1312
  source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
@@ -1076,6 +1315,16 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1076
1315
  updated_by=updated_by,
1077
1316
  updated_at=updated_at,
1078
1317
  )
1318
+ _HasSmartAssertionInputs.__init__(
1319
+ self,
1320
+ sensitivity=sensitivity,
1321
+ exclusion_windows=exclusion_windows,
1322
+ training_data_lookback_days=training_data_lookback_days,
1323
+ )
1324
+
1325
+ def _assertion_type(self) -> str:
1326
+ """Get the assertion type."""
1327
+ return models.AssertionTypeClass.FRESHNESS
1079
1328
 
1080
1329
  def _create_assertion_info(
1081
1330
  self, filter: Optional[models.DatasetFilterClass]
@@ -1099,7 +1348,7 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1099
1348
  def _convert_schedule(self) -> models.CronScheduleClass:
1100
1349
  """Create a schedule for a smart freshness assertion.
1101
1350
 
1102
- For create case, uses DEFAULT_SCHEDULE. For update case, preserves existing schedule.
1351
+ For create case, uses DEFAULT_HOURLY_SCHEDULE. For update case, preserves existing schedule.
1103
1352
 
1104
1353
  Returns:
1105
1354
  A CronScheduleClass with appropriate schedule settings.
@@ -1155,6 +1404,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1155
1404
  LAST_MODIFIED_ALLOWED_FIELD_TYPES,
1156
1405
  "last modified column",
1157
1406
  models.FreshnessFieldKindClass.LAST_MODIFIED,
1407
+ self._get_schema_field_spec,
1408
+ self._validate_field_type,
1158
1409
  )
1159
1410
  elif isinstance(self.detection_mechanism, _InformationSchema):
1160
1411
  source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
@@ -1169,50 +1420,42 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1169
1420
 
1170
1421
  return source_type, field
1171
1422
 
1172
- def _create_field_spec(
1423
+ def _create_monitor_info(
1173
1424
  self,
1174
- column_name: str,
1175
- allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
1176
- field_type_name: str,
1177
- kind: str,
1178
- ) -> models.FreshnessFieldSpecClass:
1425
+ assertion_urn: AssertionUrn,
1426
+ status: models.MonitorStatusClass,
1427
+ schedule: models.CronScheduleClass,
1428
+ source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
1429
+ field: Optional[FieldSpecType],
1430
+ ) -> models.MonitorInfoClass:
1179
1431
  """
1180
- Create a field specification for a column, validating its type.
1181
-
1182
- Args:
1183
- column_name: The name of the column to create a spec for
1184
- allowed_types: List of allowed field types
1185
- field_type_name: Human-readable name of the field type for error messages
1186
- kind: The kind of field to create
1187
-
1188
- Returns:
1189
- A FreshnessFieldSpecClass for the column
1190
-
1191
- Raises:
1192
- SDKUsageError: If the column is not found or has an invalid type
1432
+ Create a MonitorInfoClass with all the necessary components.
1193
1433
  """
1194
- SUPPORTED_KINDS = [
1195
- models.FreshnessFieldKindClass.LAST_MODIFIED,
1196
- models.FreshnessFieldKindClass.HIGH_WATERMARK,
1197
- ]
1198
- if kind not in SUPPORTED_KINDS:
1199
- raise SDKUsageError(
1200
- msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
1201
- )
1202
-
1203
- field_spec = self._get_schema_field_spec(column_name)
1204
- self._validate_field_type(
1205
- field_spec, column_name, allowed_types, field_type_name
1206
- )
1207
- return models.FreshnessFieldSpecClass(
1208
- path=field_spec.path,
1209
- type=field_spec.type,
1210
- nativeType=field_spec.nativeType,
1211
- kind=kind,
1434
+ return models.MonitorInfoClass(
1435
+ type=models.MonitorTypeClass.ASSERTION,
1436
+ status=status,
1437
+ assertionMonitor=models.AssertionMonitorClass(
1438
+ assertions=[
1439
+ models.AssertionEvaluationSpecClass(
1440
+ assertion=str(assertion_urn),
1441
+ schedule=schedule,
1442
+ parameters=self._get_assertion_evaluation_parameters(
1443
+ str(source_type), field
1444
+ ),
1445
+ ),
1446
+ ],
1447
+ settings=models.AssertionMonitorSettingsClass(
1448
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
1449
+ sensitivity=self._convert_sensitivity(),
1450
+ exclusionWindows=self._convert_exclusion_windows(),
1451
+ trainingDataLookbackWindowDays=self.training_data_lookback_days,
1452
+ ),
1453
+ ),
1454
+ ),
1212
1455
  )
1213
1456
 
1214
1457
 
1215
- class _SmartVolumeAssertionInput(_AssertionInput):
1458
+ class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
1216
1459
  def __init__(
1217
1460
  self,
1218
1461
  *,
@@ -1237,7 +1480,8 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1237
1480
  updated_by: Union[str, CorpUserUrn],
1238
1481
  updated_at: datetime,
1239
1482
  ):
1240
- super().__init__(
1483
+ _AssertionInput.__init__(
1484
+ self,
1241
1485
  dataset_urn=dataset_urn,
1242
1486
  entity_client=entity_client,
1243
1487
  urn=urn,
@@ -1245,9 +1489,6 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1245
1489
  enabled=enabled,
1246
1490
  schedule=schedule,
1247
1491
  detection_mechanism=detection_mechanism,
1248
- sensitivity=sensitivity,
1249
- exclusion_windows=exclusion_windows,
1250
- training_data_lookback_days=training_data_lookback_days,
1251
1492
  incident_behavior=incident_behavior,
1252
1493
  tags=tags,
1253
1494
  source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
@@ -1256,6 +1497,12 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1256
1497
  updated_by=updated_by,
1257
1498
  updated_at=updated_at,
1258
1499
  )
1500
+ _HasSmartAssertionInputs.__init__(
1501
+ self,
1502
+ sensitivity=sensitivity,
1503
+ exclusion_windows=exclusion_windows,
1504
+ training_data_lookback_days=training_data_lookback_days,
1505
+ )
1259
1506
 
1260
1507
  def _create_assertion_info(
1261
1508
  self, filter: Optional[models.DatasetFilterClass]
@@ -1276,15 +1523,13 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1276
1523
  )
1277
1524
 
1278
1525
  def _convert_schedule(self) -> models.CronScheduleClass:
1279
- """Create a schedule for a smart freshness assertion.
1280
-
1281
- Since the schedule is not used for smart freshness assertions, we return a default schedule.
1526
+ """Create a schedule for a smart volume assertion.
1282
1527
 
1283
1528
  Returns:
1284
1529
  A CronScheduleClass with appropriate schedule settings.
1285
1530
  """
1286
1531
  if self.schedule is None:
1287
- return DEFAULT_SCHEDULE
1532
+ return DEFAULT_HOURLY_SCHEDULE
1288
1533
 
1289
1534
  return models.CronScheduleClass(
1290
1535
  cron=self.schedule.cron,
@@ -1333,3 +1578,41 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1333
1578
  )
1334
1579
 
1335
1580
  return source_type, field
1581
+
1582
+ def _create_monitor_info(
1583
+ self,
1584
+ assertion_urn: AssertionUrn,
1585
+ status: models.MonitorStatusClass,
1586
+ schedule: models.CronScheduleClass,
1587
+ source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
1588
+ field: Optional[FieldSpecType],
1589
+ ) -> models.MonitorInfoClass:
1590
+ """
1591
+ Create a MonitorInfoClass with all the necessary components.
1592
+ """
1593
+ return models.MonitorInfoClass(
1594
+ type=models.MonitorTypeClass.ASSERTION,
1595
+ status=status,
1596
+ assertionMonitor=models.AssertionMonitorClass(
1597
+ assertions=[
1598
+ models.AssertionEvaluationSpecClass(
1599
+ assertion=str(assertion_urn),
1600
+ schedule=schedule,
1601
+ parameters=self._get_assertion_evaluation_parameters(
1602
+ str(source_type), field
1603
+ ),
1604
+ ),
1605
+ ],
1606
+ settings=models.AssertionMonitorSettingsClass(
1607
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
1608
+ sensitivity=self._convert_sensitivity(),
1609
+ exclusionWindows=self._convert_exclusion_windows(),
1610
+ trainingDataLookbackWindowDays=self.training_data_lookback_days,
1611
+ ),
1612
+ ),
1613
+ ),
1614
+ )
1615
+
1616
+ def _assertion_type(self) -> str:
1617
+ """Get the assertion type."""
1618
+ return models.AssertionTypeClass.VOLUME