acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (20) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
  3. acryl_datahub_cloud/metadata/schema.avsc +9 -0
  4. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
  5. acryl_datahub_cloud/sdk/__init__.py +10 -2
  6. acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
  7. acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +614 -231
  8. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
  9. acryl_datahub_cloud/sdk/assertion/types.py +18 -0
  10. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  11. acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +437 -147
  12. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
  13. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +943 -0
  14. acryl_datahub_cloud/sdk/assertions_client.py +1281 -70
  15. acryl_datahub_cloud/sdk/entities/assertion.py +8 -1
  16. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +41 -41
  17. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +20 -14
  18. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
  19. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
  20. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
@@ -6,9 +6,10 @@ validate and represent the input for creating an Assertion in DataHub.
6
6
  import random
7
7
  import string
8
8
  from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
9
10
  from datetime import datetime
10
11
  from enum import Enum
11
- from typing import Literal, Optional, TypeAlias, Union
12
+ from typing import Callable, Literal, Optional, Type, TypeAlias, TypeVar, Union
12
13
 
13
14
  import pydantic
14
15
  import pytz
@@ -42,12 +43,28 @@ ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS = 60
42
43
  DEFAULT_NAME_PREFIX = "New Assertion"
43
44
  DEFAULT_NAME_SUFFIX_LENGTH = 8
44
45
 
45
- DEFAULT_SCHEDULE = models.CronScheduleClass(
46
+
47
+ DEFAULT_HOURLY_SCHEDULE: models.CronScheduleClass = models.CronScheduleClass(
46
48
  cron="0 * * * *", # Every hour, matches the UI default
47
49
  timezone=str(
48
50
  tzlocal.get_localzone()
49
51
  ), # User local timezone, matches the UI default
50
52
  )
53
+ DEFAULT_SCHEDULE: models.CronScheduleClass = DEFAULT_HOURLY_SCHEDULE
54
+
55
+ DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
56
+ cron="0 0 * * *", # Every day at midnight, matches the UI default
57
+ timezone=str(
58
+ tzlocal.get_localzone()
59
+ ), # User local timezone, matches the UI default
60
+ )
61
+
62
+ DEFAULT_EVERY_SIX_HOURS_SCHEDULE = models.CronScheduleClass(
63
+ cron="0 */6 * * *", # Every 6 hours, matches the UI default
64
+ timezone=str(
65
+ tzlocal.get_localzone()
66
+ ), # User local timezone, matches the UI default
67
+ )
51
68
 
52
69
 
53
70
  class AbstractDetectionMechanism(BaseModel, ABC):
@@ -101,6 +118,26 @@ class _Query(AbstractDetectionMechanism):
101
118
  additional_filter: Optional[str] = None
102
119
 
103
120
 
121
+ class _AllRowsQuery(AbstractDetectionMechanism):
122
+ # For column-based assertions, this is the default detection mechanism.
123
+ type: Literal["all_rows_query"] = "all_rows_query"
124
+ additional_filter: Optional[str] = None
125
+
126
+
127
+ class _AllRowsQueryDataHubDatasetProfile(AbstractDetectionMechanism):
128
+ # Used for column-based assertions.
129
+ type: Literal["all_rows_query_datahub_dataset_profile"] = (
130
+ "all_rows_query_datahub_dataset_profile"
131
+ )
132
+
133
+
134
+ class _ChangedRowsQuery(AbstractDetectionMechanism):
135
+ # Used for column-based assertions.
136
+ type: Literal["changed_rows_query"] = "changed_rows_query"
137
+ column_name: str
138
+ additional_filter: Optional[str] = None
139
+
140
+
104
141
  class _DatasetProfile(AbstractDetectionMechanism):
105
142
  type: Literal["dataset_profile"] = "dataset_profile"
106
143
 
@@ -114,6 +151,9 @@ _DETECTION_MECHANISM_CONCRETE_TYPES = (
114
151
  _DataHubOperation,
115
152
  _Query,
116
153
  _DatasetProfile,
154
+ _AllRowsQuery,
155
+ _ChangedRowsQuery,
156
+ _AllRowsQueryDataHubDatasetProfile,
117
157
  )
118
158
  _DetectionMechanismTypes = Union[
119
159
  _InformationSchema,
@@ -123,14 +163,21 @@ _DetectionMechanismTypes = Union[
123
163
  _DataHubOperation,
124
164
  _Query,
125
165
  _DatasetProfile,
166
+ _AllRowsQuery,
167
+ _ChangedRowsQuery,
168
+ _AllRowsQueryDataHubDatasetProfile,
126
169
  ]
127
170
 
128
171
  _DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
129
172
  _LastModifiedColumn,
130
173
  _HighWatermarkColumn,
131
174
  _Query,
175
+ _AllRowsQuery,
176
+ _ChangedRowsQuery,
132
177
  )
133
178
 
179
+ DEFAULT_DETECTION_MECHANISM: _DetectionMechanismTypes = _InformationSchema()
180
+
134
181
 
135
182
  class DetectionMechanism:
136
183
  # To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
@@ -141,6 +188,9 @@ class DetectionMechanism:
141
188
  HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
142
189
  DATAHUB_OPERATION = _DataHubOperation()
143
190
  QUERY = _Query
191
+ ALL_ROWS_QUERY = _AllRowsQuery
192
+ CHANGED_ROWS_QUERY = _ChangedRowsQuery
193
+ ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
144
194
  DATASET_PROFILE = _DatasetProfile()
145
195
 
146
196
  _DETECTION_MECHANISM_EXAMPLES = {
@@ -170,6 +220,18 @@ class DetectionMechanism:
170
220
  "Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
171
221
  "Dataset Profile from string": "dataset_profile",
172
222
  "Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
223
+ "All Rows Query from string": "all_rows_query",
224
+ "All Rows Query from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY",
225
+ "All Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.ALL_ROWS_QUERY(additional_filter='id > 1000')",
226
+ "Changed Rows Query from dict (with optional additional filter)": {
227
+ "type": "changed_rows_query",
228
+ "column_name": "id",
229
+ "additional_filter": "id > 1000",
230
+ },
231
+ "Changed Rows Query from DetectionMechanism": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id')",
232
+ "Changed Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id', additional_filter='id > 1000')",
233
+ "All Rows Query DataHub Dataset Profile from string": "all_rows_query_datahub_dataset_profile",
234
+ "All Rows Query DataHub Dataset Profile from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE",
173
235
  }
174
236
 
175
237
  @staticmethod
@@ -177,9 +239,10 @@ class DetectionMechanism:
177
239
  detection_mechanism_config: Optional[
178
240
  Union[str, dict[str, str], _DetectionMechanismTypes]
179
241
  ] = None,
242
+ default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
180
243
  ) -> _DetectionMechanismTypes:
181
244
  if detection_mechanism_config is None:
182
- return DEFAULT_DETECTION_MECHANISM
245
+ return default_detection_mechanism
183
246
  if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
184
247
  return detection_mechanism_config
185
248
  elif isinstance(detection_mechanism_config, str):
@@ -260,8 +323,6 @@ class DetectionMechanism:
260
323
  ) from e
261
324
 
262
325
 
263
- DEFAULT_DETECTION_MECHANISM = DetectionMechanism.INFORMATION_SCHEMA
264
-
265
326
  DetectionMechanismInputTypes: TypeAlias = Union[
266
327
  str, dict[str, str], _DetectionMechanismTypes, None
267
328
  ]
@@ -328,7 +389,59 @@ class InferenceSensitivity(Enum):
328
389
  }[sensitivity]
329
390
 
330
391
 
331
- DEFAULT_SENSITIVITY = InferenceSensitivity.MEDIUM
392
+ DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
393
+
394
+ TIME_WINDOW_SIZE_EXAMPLES = {
395
+ "Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
396
+ "Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
397
+ }
398
+
399
+
400
+ class CalendarInterval(Enum):
401
+ MINUTE = "MINUTE"
402
+ HOUR = "HOUR"
403
+ DAY = "DAY"
404
+
405
+
406
+ class TimeWindowSize(BaseModel):
407
+ unit: Union[CalendarInterval, str]
408
+ multiple: int
409
+
410
+
411
+ TimeWindowSizeInputTypes: TypeAlias = Union[
412
+ models.TimeWindowSizeClass,
413
+ models.FixedIntervalScheduleClass,
414
+ TimeWindowSize,
415
+ ]
416
+
417
+
418
+ def _try_parse_time_window_size(
419
+ config: TimeWindowSizeInputTypes,
420
+ ) -> models.TimeWindowSizeClass:
421
+ if isinstance(config, models.TimeWindowSizeClass):
422
+ return config
423
+ elif isinstance(config, models.FixedIntervalScheduleClass):
424
+ return models.TimeWindowSizeClass(
425
+ unit=_try_parse_and_validate_schema_classes_enum(
426
+ config.unit, models.CalendarIntervalClass
427
+ ),
428
+ multiple=config.multiple,
429
+ )
430
+ elif isinstance(config, TimeWindowSize):
431
+ return models.TimeWindowSizeClass(
432
+ unit=_try_parse_and_validate_schema_classes_enum(
433
+ _try_parse_and_validate_schema_classes_enum(
434
+ config.unit, CalendarInterval
435
+ ).value,
436
+ models.CalendarIntervalClass,
437
+ ),
438
+ multiple=config.multiple,
439
+ )
440
+ else:
441
+ raise SDKUsageErrorWithExamples(
442
+ msg=f"Invalid time window size: {config}",
443
+ examples=TIME_WINDOW_SIZE_EXAMPLES,
444
+ )
332
445
 
333
446
 
334
447
  class FixedRangeExclusionWindow(BaseModel):
@@ -594,6 +707,161 @@ def _try_parse_schedule(
594
707
  FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
595
708
 
596
709
 
710
+ T = TypeVar("T")
711
+
712
+
713
+ def _try_parse_and_validate_schema_classes_enum(
714
+ value: Union[str, T],
715
+ enum_class: Type[T],
716
+ ) -> T:
717
+ if isinstance(value, enum_class):
718
+ return value
719
+ assert isinstance(value, str)
720
+ if value.upper() not in get_enum_options(enum_class):
721
+ raise SDKUsageError(
722
+ f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
723
+ )
724
+ return getattr(enum_class, value.upper())
725
+
726
+
727
+ @dataclass(frozen=True)
728
+ class DatasetSourceType:
729
+ """
730
+ DatasetSourceType is used to represent a dataset source type.
731
+ It is used to check if a source type is valid for a dataset type and assertion type.
732
+
733
+ Args:
734
+ source_type: The source type (e.g. information schema, field value, etc. aka detection mechanism)
735
+ platform: The platform of the dataset as a string OR "all" for all platforms.
736
+ assertion_type: The assertion type as a models.AssertionTypeClass string e.g. models.AssertionTypeClass.FRESHNESS OR "all" for all assertion types.
737
+
738
+ Example:
739
+ DatasetSourceType(
740
+ source_type=_InformationSchema,
741
+ platform="databricks",
742
+ assertion_type="all",
743
+ )
744
+ This means that the source type _InformationSchema is invalid for the dataset type "databricks" and assertion type "all".
745
+ "all" in this example means that the source type is invalid for all assertion types.
746
+ """
747
+
748
+ source_type: Type[_DetectionMechanismTypes]
749
+ platform: str
750
+ assertion_type: Union[models.AssertionTypeClass, str]
751
+
752
+
753
+ INVALID_SOURCE_TYPES = {
754
+ # Add exceptions here if a source type (detection mechanism) is invalid for a dataset type and assertion type.
755
+ DatasetSourceType(
756
+ source_type=_InformationSchema,
757
+ platform="databricks",
758
+ assertion_type="all",
759
+ )
760
+ }
761
+
762
+
763
+ def _is_source_type_valid(
764
+ dataset_source_type: DatasetSourceType,
765
+ invalid_source_types: set[DatasetSourceType] = INVALID_SOURCE_TYPES,
766
+ ) -> bool:
767
+ for invalid in invalid_source_types:
768
+ if invalid.source_type == dataset_source_type.source_type:
769
+ # If both platform and assertion type are "all", the source type is invalid for all combinations
770
+ if invalid.platform == "all" and invalid.assertion_type == "all":
771
+ return False
772
+ # If platform matches and assertion type is "all", the source type is invalid for all assertion types on that platform
773
+ if (
774
+ invalid.platform == dataset_source_type.platform
775
+ and invalid.assertion_type == "all"
776
+ ):
777
+ return False
778
+ # If platform is "all" and assertion type matches, the source type is invalid for all platforms for that assertion type
779
+ if (
780
+ invalid.platform == "all"
781
+ and invalid.assertion_type == dataset_source_type.assertion_type
782
+ ):
783
+ return False
784
+ # If both platform and assertion type match exactly, the source type is invalid
785
+ if (
786
+ invalid.platform == dataset_source_type.platform
787
+ and invalid.assertion_type == dataset_source_type.assertion_type
788
+ ):
789
+ return False
790
+ return True
791
+
792
+
793
+ class _HasSmartAssertionInputs:
794
+ """
795
+ A class that contains the common inputs for smart assertions.
796
+ This is used to avoid code duplication in the smart assertion inputs.
797
+
798
+ Args:
799
+ sensitivity: The sensitivity to be applied to the assertion.
800
+ exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
801
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
802
+ """
803
+
804
+ def __init__(
805
+ self,
806
+ *,
807
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
808
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
809
+ training_data_lookback_days: Optional[int] = None,
810
+ ):
811
+ self.sensitivity = InferenceSensitivity.parse(sensitivity)
812
+ self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
813
+ self.training_data_lookback_days = _try_parse_training_data_lookback_days(
814
+ training_data_lookback_days
815
+ )
816
+
817
+ def _convert_exclusion_windows(
818
+ self,
819
+ ) -> list[models.AssertionExclusionWindowClass]:
820
+ """
821
+ Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
822
+
823
+ Returns:
824
+ A list of AssertionExclusionWindowClass objects.
825
+
826
+ Raises:
827
+ SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
828
+ """
829
+ exclusion_windows: list[models.AssertionExclusionWindowClass] = []
830
+ if self.exclusion_windows:
831
+ for window in self.exclusion_windows:
832
+ if not isinstance(window, FixedRangeExclusionWindow):
833
+ raise SDKUsageErrorWithExamples(
834
+ msg=f"Invalid exclusion window type: {window}",
835
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
836
+ )
837
+ # To match the UI, we generate a display name for the exclusion window.
838
+ # See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
839
+ # Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
840
+ generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
841
+ exclusion_windows.append(
842
+ models.AssertionExclusionWindowClass(
843
+ type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
844
+ displayName=generated_display_name,
845
+ fixedRange=models.AbsoluteTimeWindowClass(
846
+ startTimeMillis=make_ts_millis(window.start),
847
+ endTimeMillis=make_ts_millis(window.end),
848
+ ),
849
+ )
850
+ )
851
+ return exclusion_windows
852
+
853
+ def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
854
+ """
855
+ Convert sensitivity into an AssertionMonitorSensitivityClass.
856
+
857
+ Returns:
858
+ An AssertionMonitorSensitivityClass with the appropriate sensitivity.
859
+ """
860
+ return models.AssertionMonitorSensitivityClass(
861
+ level=InferenceSensitivity.to_int(self.sensitivity),
862
+ )
863
+
864
+
597
865
  class _AssertionInput(ABC):
598
866
  def __init__(
599
867
  self,
@@ -609,9 +877,6 @@ class _AssertionInput(ABC):
609
877
  enabled: bool = True,
610
878
  schedule: Optional[Union[str, models.CronScheduleClass]] = None,
611
879
  detection_mechanism: DetectionMechanismInputTypes = None,
612
- sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
613
- exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
614
- training_data_lookback_days: Optional[int] = None,
615
880
  incident_behavior: Optional[
616
881
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
617
882
  ] = None,
@@ -621,6 +886,7 @@ class _AssertionInput(ABC):
621
886
  created_at: datetime,
622
887
  updated_by: Union[str, CorpUserUrn],
623
888
  updated_at: datetime,
889
+ default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
624
890
  ):
625
891
  """
626
892
  Create an AssertionInput object.
@@ -632,9 +898,6 @@ class _AssertionInput(ABC):
632
898
  display_name: The display name of the assertion. If not provided, a random display name will be generated.
633
899
  enabled: Whether the assertion is enabled. Defaults to True.
634
900
  detection_mechanism: The detection mechanism to be used for the assertion.
635
- sensitivity: The sensitivity to be applied to the assertion.
636
- exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
637
- training_data_lookback_days: The training data lookback days to be applied to the assertion.
638
901
  incident_behavior: The incident behavior to be applied to the assertion.
639
902
  tags: The tags to be applied to the assertion.
640
903
  source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
@@ -653,12 +916,19 @@ class _AssertionInput(ABC):
653
916
  )
654
917
  self.enabled = enabled
655
918
  self.schedule = _try_parse_schedule(schedule)
656
- self.detection_mechanism = DetectionMechanism.parse(detection_mechanism)
657
- self.sensitivity = InferenceSensitivity.parse(sensitivity)
658
- self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
659
- self.training_data_lookback_days = _try_parse_training_data_lookback_days(
660
- training_data_lookback_days
919
+ self.detection_mechanism = DetectionMechanism.parse(
920
+ detection_mechanism, default_detection_mechanism
661
921
  )
922
+ if not _is_source_type_valid(
923
+ DatasetSourceType(
924
+ source_type=type(self.detection_mechanism),
925
+ platform=self.dataset_urn.platform,
926
+ assertion_type=self._assertion_type(),
927
+ )
928
+ ):
929
+ raise SDKUsageError(
930
+ f"Invalid source type: {self.detection_mechanism} for dataset type: {self.dataset_urn.platform} and assertion type: {self._assertion_type()}"
931
+ )
662
932
  self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
663
933
  self.tags = tags
664
934
  if source_type not in get_enum_options(models.AssertionSourceTypeClass):
@@ -670,7 +940,6 @@ class _AssertionInput(ABC):
670
940
  self.created_at = created_at
671
941
  self.updated_by = updated_by
672
942
  self.updated_at = updated_at
673
-
674
943
  self.cached_dataset: Optional[Dataset] = None
675
944
 
676
945
  def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
@@ -836,8 +1105,6 @@ class _AssertionInput(ABC):
836
1105
  schedule=self._convert_schedule(),
837
1106
  source_type=source_type,
838
1107
  field=field,
839
- sensitivity=self._convert_sensitivity(),
840
- exclusion_windows=self._convert_exclusion_windows(),
841
1108
  ),
842
1109
  )
843
1110
 
@@ -854,53 +1121,6 @@ class _AssertionInput(ABC):
854
1121
  else models.MonitorModeClass.INACTIVE,
855
1122
  )
856
1123
 
857
- def _convert_exclusion_windows(
858
- self,
859
- ) -> list[models.AssertionExclusionWindowClass]:
860
- """
861
- Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
862
-
863
- Returns:
864
- A list of AssertionExclusionWindowClass objects.
865
-
866
- Raises:
867
- SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
868
- """
869
- exclusion_windows: list[models.AssertionExclusionWindowClass] = []
870
- if self.exclusion_windows:
871
- for window in self.exclusion_windows:
872
- if not isinstance(window, FixedRangeExclusionWindow):
873
- raise SDKUsageErrorWithExamples(
874
- msg=f"Invalid exclusion window type: {window}",
875
- examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
876
- )
877
- # To match the UI, we generate a display name for the exclusion window.
878
- # See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
879
- # Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
880
- generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
881
- exclusion_windows.append(
882
- models.AssertionExclusionWindowClass(
883
- type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
884
- displayName=generated_display_name,
885
- fixedRange=models.AbsoluteTimeWindowClass(
886
- startTimeMillis=make_ts_millis(window.start),
887
- endTimeMillis=make_ts_millis(window.end),
888
- ),
889
- )
890
- )
891
- return exclusion_windows
892
-
893
- def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
894
- """
895
- Convert sensitivity into an AssertionMonitorSensitivityClass.
896
-
897
- Returns:
898
- An AssertionMonitorSensitivityClass with the appropriate sensitivity.
899
- """
900
- return models.AssertionMonitorSensitivityClass(
901
- level=InferenceSensitivity.to_int(self.sensitivity),
902
- )
903
-
904
1124
  def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
905
1125
  """
906
1126
  Get the schema field spec for the detection mechanism if needed.
@@ -956,6 +1176,7 @@ class _AssertionInput(ABC):
956
1176
  f"Allowed types are {allowed_type_names}.",
957
1177
  )
958
1178
 
1179
+ @abstractmethod
959
1180
  def _create_monitor_info(
960
1181
  self,
961
1182
  assertion_urn: AssertionUrn,
@@ -963,8 +1184,6 @@ class _AssertionInput(ABC):
963
1184
  schedule: models.CronScheduleClass,
964
1185
  source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
965
1186
  field: Optional[FieldSpecType],
966
- sensitivity: models.AssertionMonitorSensitivityClass,
967
- exclusion_windows: list[models.AssertionExclusionWindowClass],
968
1187
  ) -> models.MonitorInfoClass:
969
1188
  """
970
1189
  Create a MonitorInfoClass with all the necessary components.
@@ -974,34 +1193,15 @@ class _AssertionInput(ABC):
974
1193
  schedule: The monitor schedule.
975
1194
  source_type: The source type.
976
1195
  field: Optional field specification.
977
- sensitivity: The monitor sensitivity.
978
- exclusion_windows: List of exclusion windows.
979
-
980
1196
  Returns:
981
1197
  A MonitorInfoClass configured with all the provided components.
982
1198
  """
983
- return models.MonitorInfoClass(
984
- type=models.MonitorTypeClass.ASSERTION,
985
- status=status,
986
- assertionMonitor=models.AssertionMonitorClass(
987
- assertions=[
988
- models.AssertionEvaluationSpecClass(
989
- assertion=str(assertion_urn),
990
- schedule=schedule,
991
- parameters=self._get_assertion_evaluation_parameters(
992
- str(source_type), field
993
- ),
994
- )
995
- ],
996
- settings=models.AssertionMonitorSettingsClass(
997
- adjustmentSettings=models.AssertionAdjustmentSettingsClass(
998
- sensitivity=sensitivity,
999
- exclusionWindows=exclusion_windows,
1000
- trainingDataLookbackWindowDays=self.training_data_lookback_days,
1001
- ),
1002
- ),
1003
- ),
1004
- )
1199
+ pass
1200
+
1201
+ @abstractmethod
1202
+ def _assertion_type(self) -> str:
1203
+ """Get the assertion type."""
1204
+ pass
1005
1205
 
1006
1206
  @abstractmethod
1007
1207
  def _create_assertion_info(
@@ -1030,7 +1230,55 @@ class _AssertionInput(ABC):
1030
1230
  pass
1031
1231
 
1032
1232
 
1033
- class _SmartFreshnessAssertionInput(_AssertionInput):
1233
+ class _HasFreshnessFeatures:
1234
+ def _create_field_spec(
1235
+ self,
1236
+ column_name: str,
1237
+ allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
1238
+ field_type_name: str,
1239
+ kind: str,
1240
+ get_schema_field_spec: Callable[[str], models.SchemaFieldSpecClass],
1241
+ validate_field_type: Callable[
1242
+ [models.SchemaFieldSpecClass, str, list[DictWrapper], str], None
1243
+ ],
1244
+ ) -> models.FreshnessFieldSpecClass:
1245
+ """
1246
+ Create a field specification for a column, validating its type.
1247
+
1248
+ Args:
1249
+ column_name: The name of the column to create a spec for
1250
+ allowed_types: List of allowed field types
1251
+ field_type_name: Human-readable name of the field type for error messages
1252
+ kind: The kind of field to create
1253
+
1254
+ Returns:
1255
+ A FreshnessFieldSpecClass for the column
1256
+
1257
+ Raises:
1258
+ SDKUsageError: If the column is not found or has an invalid type
1259
+ """
1260
+ SUPPORTED_KINDS = [
1261
+ models.FreshnessFieldKindClass.LAST_MODIFIED,
1262
+ models.FreshnessFieldKindClass.HIGH_WATERMARK,
1263
+ ]
1264
+ if kind not in SUPPORTED_KINDS:
1265
+ raise SDKUsageError(
1266
+ msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
1267
+ )
1268
+
1269
+ field_spec = get_schema_field_spec(column_name)
1270
+ validate_field_type(field_spec, column_name, allowed_types, field_type_name)
1271
+ return models.FreshnessFieldSpecClass(
1272
+ path=field_spec.path,
1273
+ type=field_spec.type,
1274
+ nativeType=field_spec.nativeType,
1275
+ kind=kind,
1276
+ )
1277
+
1278
+
1279
+ class _SmartFreshnessAssertionInput(
1280
+ _AssertionInput, _HasSmartAssertionInputs, _HasFreshnessFeatures
1281
+ ):
1034
1282
  def __init__(
1035
1283
  self,
1036
1284
  *,
@@ -1055,7 +1303,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1055
1303
  updated_by: Union[str, CorpUserUrn],
1056
1304
  updated_at: datetime,
1057
1305
  ):
1058
- super().__init__(
1306
+ _AssertionInput.__init__(
1307
+ self,
1059
1308
  dataset_urn=dataset_urn,
1060
1309
  entity_client=entity_client,
1061
1310
  urn=urn,
@@ -1063,11 +1312,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1063
1312
  enabled=enabled,
1064
1313
  schedule=schedule
1065
1314
  if schedule is not None
1066
- else DEFAULT_SCHEDULE, # Use provided schedule or default for create case
1315
+ else DEFAULT_HOURLY_SCHEDULE, # Use provided schedule or default for create case
1067
1316
  detection_mechanism=detection_mechanism,
1068
- sensitivity=sensitivity,
1069
- exclusion_windows=exclusion_windows,
1070
- training_data_lookback_days=training_data_lookback_days,
1071
1317
  incident_behavior=incident_behavior,
1072
1318
  tags=tags,
1073
1319
  source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
@@ -1076,6 +1322,16 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1076
1322
  updated_by=updated_by,
1077
1323
  updated_at=updated_at,
1078
1324
  )
1325
+ _HasSmartAssertionInputs.__init__(
1326
+ self,
1327
+ sensitivity=sensitivity,
1328
+ exclusion_windows=exclusion_windows,
1329
+ training_data_lookback_days=training_data_lookback_days,
1330
+ )
1331
+
1332
+ def _assertion_type(self) -> str:
1333
+ """Get the assertion type."""
1334
+ return models.AssertionTypeClass.FRESHNESS
1079
1335
 
1080
1336
  def _create_assertion_info(
1081
1337
  self, filter: Optional[models.DatasetFilterClass]
@@ -1099,7 +1355,7 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1099
1355
  def _convert_schedule(self) -> models.CronScheduleClass:
1100
1356
  """Create a schedule for a smart freshness assertion.
1101
1357
 
1102
- For create case, uses DEFAULT_SCHEDULE. For update case, preserves existing schedule.
1358
+ For create case, uses DEFAULT_HOURLY_SCHEDULE. For update case, preserves existing schedule.
1103
1359
 
1104
1360
  Returns:
1105
1361
  A CronScheduleClass with appropriate schedule settings.
@@ -1155,6 +1411,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1155
1411
  LAST_MODIFIED_ALLOWED_FIELD_TYPES,
1156
1412
  "last modified column",
1157
1413
  models.FreshnessFieldKindClass.LAST_MODIFIED,
1414
+ self._get_schema_field_spec,
1415
+ self._validate_field_type,
1158
1416
  )
1159
1417
  elif isinstance(self.detection_mechanism, _InformationSchema):
1160
1418
  source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
@@ -1169,50 +1427,42 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
1169
1427
 
1170
1428
  return source_type, field
1171
1429
 
1172
- def _create_field_spec(
1430
+ def _create_monitor_info(
1173
1431
  self,
1174
- column_name: str,
1175
- allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
1176
- field_type_name: str,
1177
- kind: str,
1178
- ) -> models.FreshnessFieldSpecClass:
1432
+ assertion_urn: AssertionUrn,
1433
+ status: models.MonitorStatusClass,
1434
+ schedule: models.CronScheduleClass,
1435
+ source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
1436
+ field: Optional[FieldSpecType],
1437
+ ) -> models.MonitorInfoClass:
1179
1438
  """
1180
- Create a field specification for a column, validating its type.
1181
-
1182
- Args:
1183
- column_name: The name of the column to create a spec for
1184
- allowed_types: List of allowed field types
1185
- field_type_name: Human-readable name of the field type for error messages
1186
- kind: The kind of field to create
1187
-
1188
- Returns:
1189
- A FreshnessFieldSpecClass for the column
1190
-
1191
- Raises:
1192
- SDKUsageError: If the column is not found or has an invalid type
1439
+ Create a MonitorInfoClass with all the necessary components.
1193
1440
  """
1194
- SUPPORTED_KINDS = [
1195
- models.FreshnessFieldKindClass.LAST_MODIFIED,
1196
- models.FreshnessFieldKindClass.HIGH_WATERMARK,
1197
- ]
1198
- if kind not in SUPPORTED_KINDS:
1199
- raise SDKUsageError(
1200
- msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
1201
- )
1202
-
1203
- field_spec = self._get_schema_field_spec(column_name)
1204
- self._validate_field_type(
1205
- field_spec, column_name, allowed_types, field_type_name
1206
- )
1207
- return models.FreshnessFieldSpecClass(
1208
- path=field_spec.path,
1209
- type=field_spec.type,
1210
- nativeType=field_spec.nativeType,
1211
- kind=kind,
1441
+ return models.MonitorInfoClass(
1442
+ type=models.MonitorTypeClass.ASSERTION,
1443
+ status=status,
1444
+ assertionMonitor=models.AssertionMonitorClass(
1445
+ assertions=[
1446
+ models.AssertionEvaluationSpecClass(
1447
+ assertion=str(assertion_urn),
1448
+ schedule=schedule,
1449
+ parameters=self._get_assertion_evaluation_parameters(
1450
+ str(source_type), field
1451
+ ),
1452
+ ),
1453
+ ],
1454
+ settings=models.AssertionMonitorSettingsClass(
1455
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
1456
+ sensitivity=self._convert_sensitivity(),
1457
+ exclusionWindows=self._convert_exclusion_windows(),
1458
+ trainingDataLookbackWindowDays=self.training_data_lookback_days,
1459
+ ),
1460
+ ),
1461
+ ),
1212
1462
  )
1213
1463
 
1214
1464
 
1215
- class _SmartVolumeAssertionInput(_AssertionInput):
1465
+ class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
1216
1466
  def __init__(
1217
1467
  self,
1218
1468
  *,
@@ -1237,7 +1487,8 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1237
1487
  updated_by: Union[str, CorpUserUrn],
1238
1488
  updated_at: datetime,
1239
1489
  ):
1240
- super().__init__(
1490
+ _AssertionInput.__init__(
1491
+ self,
1241
1492
  dataset_urn=dataset_urn,
1242
1493
  entity_client=entity_client,
1243
1494
  urn=urn,
@@ -1245,9 +1496,6 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1245
1496
  enabled=enabled,
1246
1497
  schedule=schedule,
1247
1498
  detection_mechanism=detection_mechanism,
1248
- sensitivity=sensitivity,
1249
- exclusion_windows=exclusion_windows,
1250
- training_data_lookback_days=training_data_lookback_days,
1251
1499
  incident_behavior=incident_behavior,
1252
1500
  tags=tags,
1253
1501
  source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
@@ -1256,6 +1504,12 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1256
1504
  updated_by=updated_by,
1257
1505
  updated_at=updated_at,
1258
1506
  )
1507
+ _HasSmartAssertionInputs.__init__(
1508
+ self,
1509
+ sensitivity=sensitivity,
1510
+ exclusion_windows=exclusion_windows,
1511
+ training_data_lookback_days=training_data_lookback_days,
1512
+ )
1259
1513
 
1260
1514
  def _create_assertion_info(
1261
1515
  self, filter: Optional[models.DatasetFilterClass]
@@ -1276,15 +1530,13 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1276
1530
  )
1277
1531
 
1278
1532
  def _convert_schedule(self) -> models.CronScheduleClass:
1279
- """Create a schedule for a smart freshness assertion.
1280
-
1281
- Since the schedule is not used for smart freshness assertions, we return a default schedule.
1533
+ """Create a schedule for a smart volume assertion.
1282
1534
 
1283
1535
  Returns:
1284
1536
  A CronScheduleClass with appropriate schedule settings.
1285
1537
  """
1286
1538
  if self.schedule is None:
1287
- return DEFAULT_SCHEDULE
1539
+ return DEFAULT_HOURLY_SCHEDULE
1288
1540
 
1289
1541
  return models.CronScheduleClass(
1290
1542
  cron=self.schedule.cron,
@@ -1333,3 +1585,41 @@ class _SmartVolumeAssertionInput(_AssertionInput):
1333
1585
  )
1334
1586
 
1335
1587
  return source_type, field
1588
+
1589
+ def _create_monitor_info(
1590
+ self,
1591
+ assertion_urn: AssertionUrn,
1592
+ status: models.MonitorStatusClass,
1593
+ schedule: models.CronScheduleClass,
1594
+ source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
1595
+ field: Optional[FieldSpecType],
1596
+ ) -> models.MonitorInfoClass:
1597
+ """
1598
+ Create a MonitorInfoClass with all the necessary components.
1599
+ """
1600
+ return models.MonitorInfoClass(
1601
+ type=models.MonitorTypeClass.ASSERTION,
1602
+ status=status,
1603
+ assertionMonitor=models.AssertionMonitorClass(
1604
+ assertions=[
1605
+ models.AssertionEvaluationSpecClass(
1606
+ assertion=str(assertion_urn),
1607
+ schedule=schedule,
1608
+ parameters=self._get_assertion_evaluation_parameters(
1609
+ str(source_type), field
1610
+ ),
1611
+ ),
1612
+ ],
1613
+ settings=models.AssertionMonitorSettingsClass(
1614
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
1615
+ sensitivity=self._convert_sensitivity(),
1616
+ exclusionWindows=self._convert_exclusion_windows(),
1617
+ trainingDataLookbackWindowDays=self.training_data_lookback_days,
1618
+ ),
1619
+ ),
1620
+ ),
1621
+ )
1622
+
1623
+ def _assertion_type(self) -> str:
1624
+ """Get the assertion type."""
1625
+ return models.AssertionTypeClass.VOLUME