acryl-datahub-cloud 0.3.12rc4__py3-none-any.whl → 0.3.12rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -17,6 +17,7 @@ from typing_extensions import Self
17
17
  from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
18
18
  ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
19
19
  DEFAULT_DETECTION_MECHANISM,
20
+ DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
20
21
  DEFAULT_SCHEDULE,
21
22
  DEFAULT_SENSITIVITY,
22
23
  AssertionIncidentBehavior,
@@ -27,12 +28,23 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
27
28
  TimeWindowSizeInputTypes,
28
29
  _DetectionMechanismTypes,
29
30
  )
31
+ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
32
+ MetricInputType,
33
+ OperatorInputType,
34
+ RangeInputType,
35
+ RangeTypeInputType,
36
+ ValueInputType,
37
+ ValueTypeInputType,
38
+ )
39
+ from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
40
+ SqlAssertionCriteria,
41
+ )
30
42
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion
31
43
  from acryl_datahub_cloud.sdk.entities.monitor import (
32
44
  Monitor,
33
45
  _get_nested_field_for_entity_with_default,
34
46
  )
35
- from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError
47
+ from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError, SDKUsageError
36
48
  from datahub.emitter.mce_builder import parse_ts_millis
37
49
  from datahub.metadata import schema_classes as models
38
50
  from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
@@ -65,7 +77,9 @@ class _HasSchedule:
65
77
  return self._schedule
66
78
 
67
79
  @staticmethod
68
- def _get_schedule(monitor: Monitor) -> models.CronScheduleClass:
80
+ def _get_schedule(
81
+ monitor: Monitor, default: models.CronScheduleClass = DEFAULT_SCHEDULE
82
+ ) -> models.CronScheduleClass:
69
83
  """Get the schedule from the monitor."""
70
84
  assertion_evaluation_specs = _get_nested_field_for_entity_with_default(
71
85
  monitor,
@@ -73,11 +87,11 @@ class _HasSchedule:
73
87
  [],
74
88
  )
75
89
  if len(assertion_evaluation_specs) == 0:
76
- return DEFAULT_SCHEDULE
90
+ return default
77
91
  assertion_evaluation_spec = assertion_evaluation_specs[0]
78
92
  schedule = assertion_evaluation_spec.schedule
79
93
  if schedule is None:
80
- return DEFAULT_SCHEDULE
94
+ return default
81
95
  return schedule
82
96
 
83
97
 
@@ -169,14 +183,169 @@ class _HasSmartFunctionality:
169
183
  return retrieved
170
184
 
171
185
 
186
+ class _HasColumnMetricFunctionality:
187
+ """
188
+ Mixin class that provides column metric functionality for assertions.
189
+ """
190
+
191
+ def __init__(
192
+ self,
193
+ column_name: str,
194
+ metric_type: MetricInputType,
195
+ operator: OperatorInputType,
196
+ value: Optional[ValueInputType] = None,
197
+ value_type: Optional[ValueTypeInputType] = None,
198
+ range: Optional[RangeInputType] = None,
199
+ range_type: Optional[RangeTypeInputType] = None,
200
+ ):
201
+ self._column_name = column_name
202
+ self._metric_type = metric_type
203
+ self._operator = operator
204
+ self._value = value
205
+ self._value_type = value_type
206
+ self._range = range
207
+ self._range_type = range_type
208
+
209
+ @property
210
+ def column_name(self) -> str:
211
+ return self._column_name
212
+
213
+ @property
214
+ def metric_type(self) -> MetricInputType:
215
+ return self._metric_type
216
+
217
+ @property
218
+ def operator(self) -> OperatorInputType:
219
+ return self._operator
220
+
221
+ @property
222
+ def value(self) -> Optional[ValueInputType]:
223
+ return self._value
224
+
225
+ @property
226
+ def value_type(self) -> Optional[ValueTypeInputType]:
227
+ return self._value_type
228
+
229
+ @property
230
+ def range(self) -> Optional[RangeInputType]:
231
+ return self._range
232
+
233
+ @property
234
+ def range_type(self) -> Optional[RangeTypeInputType]:
235
+ return self._range_type
236
+
237
+ @staticmethod
238
+ def _get_column_name(assertion: Assertion) -> str:
239
+ column_name = _get_nested_field_for_entity_with_default(
240
+ assertion,
241
+ field_path="info.fieldMetricAssertion.field.path",
242
+ default=None,
243
+ )
244
+ if column_name is None:
245
+ raise SDKUsageError(
246
+ f"Column name is required for column metric assertions. Assertion {assertion.urn} does not have a column name"
247
+ )
248
+ return column_name
249
+
250
+ @staticmethod
251
+ def _get_metric_type(assertion: Assertion) -> MetricInputType:
252
+ metric_type = _get_nested_field_for_entity_with_default(
253
+ assertion,
254
+ field_path="info.fieldMetricAssertion.metric",
255
+ default=None,
256
+ )
257
+ if metric_type is None:
258
+ raise SDKUsageError(
259
+ f"Metric type is required for column metric assertions. Assertion {assertion.urn} does not have a metric type"
260
+ )
261
+ return metric_type
262
+
263
+ @staticmethod
264
+ def _get_operator(assertion: Assertion) -> OperatorInputType:
265
+ operator = _get_nested_field_for_entity_with_default(
266
+ assertion,
267
+ field_path="info.fieldMetricAssertion.operator",
268
+ default=None,
269
+ )
270
+ if operator is None:
271
+ raise SDKUsageError(
272
+ f"Operator is required for column metric assertions. Assertion {assertion.urn} does not have an operator"
273
+ )
274
+ return operator
275
+
276
+ @staticmethod
277
+ def _get_value(assertion: Assertion) -> Optional[ValueInputType]:
278
+ value = _get_nested_field_for_entity_with_default(
279
+ assertion,
280
+ field_path="info.fieldMetricAssertion.parameters.value.value",
281
+ default=None,
282
+ )
283
+ return value
284
+
285
+ @staticmethod
286
+ def _get_value_type(assertion: Assertion) -> Optional[ValueTypeInputType]:
287
+ value_type = _get_nested_field_for_entity_with_default(
288
+ assertion,
289
+ field_path="info.fieldMetricAssertion.parameters.value.type",
290
+ default=None,
291
+ )
292
+ return value_type
293
+
294
+ @staticmethod
295
+ def _get_range(assertion: Assertion) -> Optional[RangeInputType]:
296
+ min_value = _get_nested_field_for_entity_with_default(
297
+ assertion,
298
+ field_path="info.fieldMetricAssertion.parameters.minValue",
299
+ default=None,
300
+ )
301
+ max_value = _get_nested_field_for_entity_with_default(
302
+ assertion,
303
+ field_path="info.fieldMetricAssertion.parameters.maxValue",
304
+ default=None,
305
+ )
306
+
307
+ # If both are None, return None
308
+ if min_value is None and max_value is None:
309
+ return None
310
+
311
+ # Extract the value from the parameter objects if they exist
312
+ if min_value is not None and hasattr(min_value, "value"):
313
+ min_value = min_value.value
314
+ if max_value is not None and hasattr(max_value, "value"):
315
+ max_value = max_value.value
316
+
317
+ return (min_value, max_value)
318
+
319
+ @staticmethod
320
+ def _get_range_type(assertion: Assertion) -> Optional[RangeTypeInputType]:
321
+ min_value_range_type = _get_nested_field_for_entity_with_default(
322
+ assertion,
323
+ field_path="info.fieldMetricAssertion.parameters.minValue.type",
324
+ default=None,
325
+ )
326
+ max_value_range_type = _get_nested_field_for_entity_with_default(
327
+ assertion,
328
+ field_path="info.fieldMetricAssertion.parameters.maxValue.type",
329
+ default=None,
330
+ )
331
+
332
+ # If both are None, return None instead of a tuple of Nones
333
+ if min_value_range_type is None and max_value_range_type is None:
334
+ return None
335
+
336
+ return (min_value_range_type, max_value_range_type)
337
+
338
+
172
339
  class _AssertionPublic(ABC):
173
340
  """
174
341
  Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
175
342
  """
176
343
 
344
+ # TODO: have the individual classes self-declare this
177
345
  _SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
178
346
  models.FreshnessAssertionInfoClass,
179
347
  models.VolumeAssertionInfoClass,
348
+ models.FieldAssertionInfoClass,
180
349
  )
181
350
 
182
351
  def __init__(
@@ -278,165 +447,26 @@ class _AssertionPublic(ABC):
278
447
  return incident_behaviors
279
448
 
280
449
  @staticmethod
450
+ @abstractmethod
281
451
  def _get_detection_mechanism(
282
452
  assertion: Assertion,
283
453
  monitor: Monitor,
284
454
  default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
285
455
  ) -> Optional[_DetectionMechanismTypes]:
286
- """Get the detection mechanism from the monitor and assertion."""
287
- if not _AssertionPublic._has_valid_monitor_info(monitor):
288
- return default
289
-
290
- # 1. Check if the assertion has a parameters field
291
- def _warn_and_return_default_detection_mechanism(
292
- field_name: str,
293
- default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
294
- ) -> Optional[_DetectionMechanismTypes]:
295
- logger.warning(
296
- f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
297
- )
298
- return default
299
-
300
- parameters = _AssertionPublic._get_assertion_parameters(monitor, default)
301
- if parameters is None:
302
- return _warn_and_return_default_detection_mechanism("parameters", default)
303
-
304
- # 2. Convert the raw detection mechanism to the SDK detection mechanism
305
- if parameters.type in [
306
- models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
307
- models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
308
- ]:
309
- if assertion.info is None:
310
- return _warn_and_return_default_detection_mechanism("info", default)
311
- if isinstance(assertion.info, models.VolumeAssertionInfoClass):
312
- return _AssertionPublic._get_volume_detection_mechanism(
313
- assertion, parameters, default
314
- )
315
- elif isinstance(assertion.info, models.FreshnessAssertionInfoClass):
316
- return _AssertionPublic._get_freshness_detection_mechanism(
317
- assertion, parameters, default
318
- )
319
- # TODO: Consider moving the detection mechanism logic to the assertion classes themselves e.g. _get_assertion_specific_detection_mechanism as an abstract method
320
- # TODO: Add support here for other detection mechanisms when other assertion types are supported
321
- else:
322
- raise SDKNotYetSupportedError(
323
- f"AssertionType {type(assertion.info).__name__}"
324
- )
325
- else:
326
- raise SDKNotYetSupportedError(
327
- f"AssertionEvaluationParametersType {parameters.type} not supported"
328
- )
329
-
330
- @staticmethod
331
- def _get_freshness_detection_mechanism(
332
- assertion: Assertion,
333
- parameters: models.AssertionEvaluationParametersClass,
334
- default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
335
- ) -> Optional[_DetectionMechanismTypes]:
336
- """Get the detection mechanism for freshness assertions."""
337
- if parameters.datasetFreshnessParameters is None:
338
- logger.warning(
339
- f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
340
- )
341
- return default
342
-
343
- source_type = parameters.datasetFreshnessParameters.sourceType
344
- if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
345
- return DetectionMechanism.INFORMATION_SCHEMA
346
- elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
347
- return DetectionMechanism.AUDIT_LOG
348
- elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
349
- return _AssertionPublic._get_field_value_detection_mechanism(
350
- assertion, parameters
351
- )
352
- elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
353
- return DetectionMechanism.DATAHUB_OPERATION
354
- elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
355
- raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
356
- else:
357
- raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
358
-
359
- @staticmethod
360
- def _get_volume_detection_mechanism(
361
- assertion: Assertion,
362
- parameters: models.AssertionEvaluationParametersClass,
363
- default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
364
- ) -> _DetectionMechanismTypes:
365
- """Get the detection mechanism for volume assertions."""
366
- if parameters.datasetVolumeParameters is None:
367
- logger.warning(
368
- f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
369
- )
370
- if default is None:
371
- return DEFAULT_DETECTION_MECHANISM
372
- else:
373
- return default
374
-
375
- source_type = parameters.datasetVolumeParameters.sourceType
376
- if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
377
- return DetectionMechanism.INFORMATION_SCHEMA
378
- elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
379
- additional_filter = _AssertionPublic._get_additional_filter(assertion)
380
- return DetectionMechanism.QUERY(additional_filter=additional_filter)
381
- elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
382
- return DetectionMechanism.DATASET_PROFILE
383
- else:
384
- raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
385
-
386
- @staticmethod
387
- def _get_field_value_detection_mechanism(
388
- assertion: Assertion,
389
- parameters: models.AssertionEvaluationParametersClass,
390
- ) -> _DetectionMechanismTypes:
391
- """Get the detection mechanism for field value based freshness."""
392
- # We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
393
- assert parameters.datasetFreshnessParameters is not None
394
- field = parameters.datasetFreshnessParameters.field
395
-
396
- if field is None or field.kind is None:
397
- logger.warning(
398
- f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
399
- )
400
- return DEFAULT_DETECTION_MECHANISM
456
+ """Get the detection mechanism from the monitor and assertion.
401
457
 
402
- column_name = field.path
403
- additional_filter = _AssertionPublic._get_additional_filter(assertion)
458
+ This method should be implemented by each assertion class to handle
459
+ its specific detection mechanism logic.
404
460
 
405
- if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
406
- return DetectionMechanism.LAST_MODIFIED_COLUMN(
407
- column_name=column_name, additional_filter=additional_filter
408
- )
409
- elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
410
- return DetectionMechanism.HIGH_WATERMARK_COLUMN(
411
- column_name=column_name, additional_filter=additional_filter
412
- )
413
- else:
414
- raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
461
+ Args:
462
+ assertion: The assertion entity
463
+ monitor: The monitor entity
464
+ default: Default detection mechanism to return if none is found
415
465
 
416
- @staticmethod
417
- def _get_additional_filter(assertion: Assertion) -> Optional[str]:
418
- """Get the additional filter SQL from the assertion."""
419
- if assertion.info is None:
420
- logger.warning(
421
- f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
422
- )
423
- return None
424
- if (
425
- not isinstance(
426
- assertion.info,
427
- _AssertionPublic._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
428
- )
429
- or assertion.info.filter is None
430
- ):
431
- logger.warning(
432
- f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
433
- )
434
- return None
435
- if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
436
- raise SDKNotYetSupportedError(
437
- f"DatasetFilterType {assertion.info.filter.type}"
438
- )
439
- return assertion.info.filter.sql
466
+ Returns:
467
+ The detection mechanism or default if none is found
468
+ """
469
+ pass
440
470
 
441
471
  @staticmethod
442
472
  def _has_valid_monitor_info(monitor: Monitor) -> bool:
@@ -564,6 +594,124 @@ class _AssertionPublic(ABC):
564
594
  """
565
595
  pass
566
596
 
597
+ @staticmethod
598
+ def _get_additional_filter(assertion: Assertion) -> Optional[str]:
599
+ """Get the additional filter SQL from the assertion."""
600
+ if assertion.info is None:
601
+ logger.warning(
602
+ f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
603
+ )
604
+ return None
605
+ if (
606
+ not isinstance(
607
+ assertion.info,
608
+ _AssertionPublic._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
609
+ )
610
+ or assertion.info.filter is None
611
+ ):
612
+ logger.warning(
613
+ f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
614
+ )
615
+ return None
616
+ if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
617
+ raise SDKNotYetSupportedError(
618
+ f"DatasetFilterType {assertion.info.filter.type}"
619
+ )
620
+ return assertion.info.filter.sql
621
+
622
+ @staticmethod
623
+ def _get_field_value_detection_mechanism(
624
+ assertion: Assertion,
625
+ parameters: models.AssertionEvaluationParametersClass,
626
+ ) -> _DetectionMechanismTypes:
627
+ """Get the detection mechanism for field value based freshness."""
628
+ # We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
629
+ assert parameters.datasetFreshnessParameters is not None
630
+ field = parameters.datasetFreshnessParameters.field
631
+
632
+ if field is None or field.kind is None:
633
+ logger.warning(
634
+ f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
635
+ )
636
+ return DEFAULT_DETECTION_MECHANISM
637
+
638
+ column_name = field.path
639
+ additional_filter = _AssertionPublic._get_additional_filter(assertion)
640
+
641
+ if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
642
+ return DetectionMechanism.LAST_MODIFIED_COLUMN(
643
+ column_name=column_name, additional_filter=additional_filter
644
+ )
645
+ elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
646
+ return DetectionMechanism.HIGH_WATERMARK_COLUMN(
647
+ column_name=column_name, additional_filter=additional_filter
648
+ )
649
+ else:
650
+ raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
651
+
652
+ @staticmethod
653
+ def _warn_and_return_default_detection_mechanism(
654
+ monitor: Monitor,
655
+ field_name: str,
656
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
657
+ ) -> Optional[_DetectionMechanismTypes]:
658
+ """Helper method to log a warning and return default detection mechanism."""
659
+ logger.warning(
660
+ f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
661
+ )
662
+ return default
663
+
664
+ @staticmethod
665
+ def _check_valid_monitor_info(
666
+ monitor: Monitor,
667
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
668
+ ) -> Optional[models.AssertionEvaluationParametersClass]:
669
+ """Check if monitor has valid info and get assertion parameters.
670
+
671
+ Returns:
672
+ The assertion parameters if monitor info is valid, None otherwise.
673
+ """
674
+ if not _AssertionPublic._has_valid_monitor_info(monitor):
675
+ return None
676
+
677
+ parameters = _AssertionPublic._get_assertion_parameters(monitor)
678
+ if parameters is None:
679
+ return None
680
+
681
+ return parameters
682
+
683
+ @staticmethod
684
+ def _get_validated_detection_context(
685
+ monitor: Monitor,
686
+ assertion: Assertion,
687
+ expected_parameters_type: str,
688
+ expected_info_class: type,
689
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
690
+ ) -> Optional[models.AssertionEvaluationParametersClass]:
691
+ """
692
+ Validate and extract the detection context (parameters) for detection mechanism logic.
693
+ Returns the parameters if all checks pass, otherwise None.
694
+ """
695
+ parameters = _AssertionPublic._check_valid_monitor_info(monitor, default)
696
+ if parameters is None:
697
+ return None
698
+ if parameters.type != expected_parameters_type:
699
+ logger.warning(
700
+ f"Expected {expected_parameters_type} parameters type, got {parameters.type}, defaulting detection mechanism to {default}"
701
+ )
702
+ return None
703
+ if assertion.info is None:
704
+ _AssertionPublic._warn_and_return_default_detection_mechanism(
705
+ monitor, "info", default
706
+ )
707
+ return None
708
+ if not isinstance(assertion.info, expected_info_class):
709
+ logger.warning(
710
+ f"Expected {expected_info_class.__name__}, got {type(assertion.info).__name__}, defaulting detection mechanism to {default}"
711
+ )
712
+ return None
713
+ return parameters
714
+
567
715
 
568
716
  class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
569
717
  """
@@ -662,6 +810,43 @@ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPu
662
810
  tags=cls._get_tags(assertion),
663
811
  )
664
812
 
813
+ @staticmethod
814
+ def _get_detection_mechanism(
815
+ assertion: Assertion,
816
+ monitor: Monitor,
817
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
818
+ ) -> Optional[_DetectionMechanismTypes]:
819
+ """Get the detection mechanism for freshness assertions."""
820
+ parameters = _AssertionPublic._get_validated_detection_context(
821
+ monitor,
822
+ assertion,
823
+ models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
824
+ models.FreshnessAssertionInfoClass,
825
+ default,
826
+ )
827
+ if parameters is None:
828
+ return default
829
+ if parameters.datasetFreshnessParameters is None:
830
+ logger.warning(
831
+ f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
832
+ )
833
+ return default
834
+ source_type = parameters.datasetFreshnessParameters.sourceType
835
+ if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
836
+ return DetectionMechanism.INFORMATION_SCHEMA
837
+ elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
838
+ return DetectionMechanism.AUDIT_LOG
839
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
840
+ return _AssertionPublic._get_field_value_detection_mechanism(
841
+ assertion, parameters
842
+ )
843
+ elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
844
+ return DetectionMechanism.DATAHUB_OPERATION
845
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
846
+ raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
847
+ else:
848
+ raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
849
+
665
850
 
666
851
  class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
667
852
  """
@@ -760,6 +945,41 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
760
945
  tags=cls._get_tags(assertion),
761
946
  )
762
947
 
948
+ @staticmethod
949
+ def _get_detection_mechanism(
950
+ assertion: Assertion,
951
+ monitor: Monitor,
952
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
953
+ ) -> Optional[_DetectionMechanismTypes]:
954
+ """Get the detection mechanism for volume assertions."""
955
+ parameters = _AssertionPublic._get_validated_detection_context(
956
+ monitor,
957
+ assertion,
958
+ models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
959
+ models.VolumeAssertionInfoClass,
960
+ default,
961
+ )
962
+ if parameters is None:
963
+ return default
964
+ if parameters.datasetVolumeParameters is None:
965
+ logger.warning(
966
+ f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
967
+ )
968
+ if default is None:
969
+ return DEFAULT_DETECTION_MECHANISM
970
+ else:
971
+ return default
972
+ source_type = parameters.datasetVolumeParameters.sourceType
973
+ if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
974
+ return DetectionMechanism.INFORMATION_SCHEMA
975
+ elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
976
+ additional_filter = _AssertionPublic._get_additional_filter(assertion)
977
+ return DetectionMechanism.QUERY(additional_filter=additional_filter)
978
+ elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
979
+ return DetectionMechanism.DATASET_PROFILE
980
+ else:
981
+ raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
982
+
763
983
 
764
984
  class FreshnessAssertion(_HasSchedule, _AssertionPublic):
765
985
  """
@@ -899,20 +1119,199 @@ class FreshnessAssertion(_HasSchedule, _AssertionPublic):
899
1119
  tags=cls._get_tags(assertion),
900
1120
  )
901
1121
 
1122
+ @staticmethod
1123
+ def _get_detection_mechanism(
1124
+ assertion: Assertion,
1125
+ monitor: Monitor,
1126
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
1127
+ ) -> Optional[_DetectionMechanismTypes]:
1128
+ """Get the detection mechanism for freshness assertions."""
1129
+ parameters = _AssertionPublic._get_validated_detection_context(
1130
+ monitor,
1131
+ assertion,
1132
+ models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
1133
+ models.FreshnessAssertionInfoClass,
1134
+ default,
1135
+ )
1136
+ if parameters is None:
1137
+ return default
1138
+ if parameters.datasetFreshnessParameters is None:
1139
+ logger.warning(
1140
+ f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
1141
+ )
1142
+ return default
1143
+ source_type = parameters.datasetFreshnessParameters.sourceType
1144
+ if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
1145
+ return DetectionMechanism.INFORMATION_SCHEMA
1146
+ elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
1147
+ return DetectionMechanism.AUDIT_LOG
1148
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
1149
+ return _AssertionPublic._get_field_value_detection_mechanism(
1150
+ assertion, parameters
1151
+ )
1152
+ elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
1153
+ return DetectionMechanism.DATAHUB_OPERATION
1154
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
1155
+ raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
1156
+ else:
1157
+ raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
1158
+
902
1159
 
903
- class SmartColumnMetricAssertion(_HasSmartFunctionality, _AssertionPublic):
1160
+ class SqlAssertion(_AssertionPublic, _HasSchedule):
904
1161
  """
905
- A class that represents a smart column metric assertion.
1162
+ A class that represents a SQL assertion.
906
1163
  """
907
1164
 
908
- def __init__(self) -> None:
909
- raise NotImplementedError("SmartColumnMetricAssertion is not implemented yet")
1165
+ def __init__(
1166
+ self,
1167
+ *,
1168
+ urn: AssertionUrn,
1169
+ dataset_urn: DatasetUrn,
1170
+ display_name: str,
1171
+ mode: AssertionMode,
1172
+ statement: str,
1173
+ criteria: SqlAssertionCriteria,
1174
+ schedule: models.CronScheduleClass,
1175
+ tags: list[TagUrn],
1176
+ incident_behavior: list[AssertionIncidentBehavior],
1177
+ created_by: Optional[CorpUserUrn] = None,
1178
+ created_at: Union[datetime, None] = None,
1179
+ updated_by: Optional[CorpUserUrn] = None,
1180
+ updated_at: Optional[datetime] = None,
1181
+ ):
1182
+ """
1183
+ Initialize a SQL assertion.
910
1184
 
1185
+ Note: Values can be accessed, but not set on the assertion object.
1186
+ To update an assertion, use the `upsert_*` method.
1187
+ Args:
1188
+ urn: The urn of the assertion.
1189
+ dataset_urn: The urn of the dataset that the assertion is for.
1190
+ display_name: The display name of the assertion.
1191
+ mode: The mode of the assertion (active, inactive).
1192
+ statement: The SQL statement to be used for the assertion.
1193
+ criteria: The criteria to be used for the assertion.
1194
+ schedule: The schedule of the assertion.
1195
+ tags: The tags applied to the assertion.
1196
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
1197
+ created_by: The urn of the user that created the assertion.
1198
+ created_at: The timestamp of when the assertion was created.
1199
+ updated_by: The urn of the user that updated the assertion.
1200
+ updated_at: The timestamp of when the assertion was updated.
1201
+ """
1202
+ # Initialize the mixins first
1203
+ _AssertionPublic.__init__(
1204
+ self,
1205
+ urn=urn,
1206
+ dataset_urn=dataset_urn,
1207
+ display_name=display_name,
1208
+ mode=mode,
1209
+ tags=tags,
1210
+ incident_behavior=incident_behavior,
1211
+ created_by=created_by,
1212
+ created_at=created_at,
1213
+ updated_by=updated_by,
1214
+ updated_at=updated_at,
1215
+ )
1216
+ _HasSchedule.__init__(self, schedule=schedule)
1217
+ # Then initialize the parent class
1218
+ self._statement = statement
1219
+ self._criteria = criteria
1220
+
1221
+ @property
1222
+ def statement(self) -> str:
1223
+ return self._statement
1224
+
1225
+ @property
1226
+ def criteria(self) -> SqlAssertionCriteria:
1227
+ return self._criteria
1228
+
1229
+ @staticmethod
1230
+ def _get_detection_mechanism(
1231
+ assertion: Assertion,
1232
+ monitor: Monitor,
1233
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
1234
+ ) -> Optional[_DetectionMechanismTypes]:
1235
+ """Sql assertions do not have a detection mechanism."""
1236
+ return None
911
1237
 
912
- AssertionTypes = Union[
913
- SmartFreshnessAssertion,
914
- SmartVolumeAssertion,
915
- FreshnessAssertion,
916
- SmartColumnMetricAssertion,
917
- # TODO: Add other assertion types here as we add them.
918
- ]
1238
+ @staticmethod
1239
+ def _get_statement(assertion: Assertion) -> str:
1240
+ if assertion.info is None:
1241
+ raise SDKNotYetSupportedError(
1242
+ f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
1243
+ )
1244
+ if isinstance(assertion.info, models.SqlAssertionInfoClass):
1245
+ return assertion.info.statement
1246
+ else:
1247
+ raise SDKNotYetSupportedError(
1248
+ f"Assertion {assertion.urn} is not a SQL assertion"
1249
+ )
1250
+
1251
+ @staticmethod
1252
+ def _get_criteria(assertion: Assertion) -> SqlAssertionCriteria:
1253
+ if assertion.info is None:
1254
+ raise SDKNotYetSupportedError(
1255
+ f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
1256
+ )
1257
+ if isinstance(assertion.info, models.SqlAssertionInfoClass):
1258
+ parameters: Union[float, tuple[float, float]]
1259
+ if assertion.info.parameters.value is not None:
1260
+ parameters = float(assertion.info.parameters.value.value)
1261
+ elif (
1262
+ assertion.info.parameters.maxValue is not None
1263
+ and assertion.info.parameters.minValue is not None
1264
+ ):
1265
+ # min and max values are in the order of min, max
1266
+ parameters = (
1267
+ float(assertion.info.parameters.minValue.value),
1268
+ float(assertion.info.parameters.maxValue.value),
1269
+ )
1270
+ else:
1271
+ raise SDKNotYetSupportedError(
1272
+ f"Assertion {assertion.urn} does not have a valid parameters for the SQL assertion"
1273
+ )
1274
+
1275
+ return SqlAssertionCriteria(
1276
+ type=assertion.info.type
1277
+ if isinstance(assertion.info.type, str)
1278
+ else str(assertion.info.type),
1279
+ change_type=assertion.info.changeType
1280
+ if assertion.info.changeType is None
1281
+ else (
1282
+ assertion.info.changeType
1283
+ if isinstance(assertion.info.changeType, str)
1284
+ else str(assertion.info.changeType)
1285
+ ),
1286
+ operator=assertion.info.operator
1287
+ if isinstance(assertion.info.operator, str)
1288
+ else str(assertion.info.operator),
1289
+ parameters=parameters,
1290
+ )
1291
+ else:
1292
+ raise SDKNotYetSupportedError(
1293
+ f"Assertion {assertion.urn} is not a SQL assertion"
1294
+ )
1295
+
1296
+ @classmethod
1297
+ def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
1298
+ """
1299
+ Create a SQL assertion from the assertion and monitor entities.
1300
+ """
1301
+ return cls(
1302
+ urn=assertion.urn,
1303
+ dataset_urn=assertion.dataset,
1304
+ display_name=assertion.description or "",
1305
+ mode=cls._get_mode(monitor),
1306
+ statement=cls._get_statement(assertion),
1307
+ criteria=cls._get_criteria(assertion),
1308
+ schedule=cls._get_schedule(
1309
+ monitor, default=DEFAULT_EVERY_SIX_HOURS_SCHEDULE
1310
+ ),
1311
+ tags=cls._get_tags(assertion),
1312
+ incident_behavior=cls._get_incident_behavior(assertion),
1313
+ created_by=cls._get_created_by(assertion),
1314
+ created_at=cls._get_created_at(assertion),
1315
+ updated_by=cls._get_updated_by(assertion),
1316
+ updated_at=cls._get_updated_at(assertion),
1317
+ )