acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (20) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
  3. acryl_datahub_cloud/metadata/schema.avsc +9 -0
  4. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
  5. acryl_datahub_cloud/sdk/__init__.py +10 -2
  6. acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
  7. acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +614 -231
  8. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
  9. acryl_datahub_cloud/sdk/assertion/types.py +18 -0
  10. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  11. acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +437 -147
  12. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
  13. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +943 -0
  14. acryl_datahub_cloud/sdk/assertions_client.py +1281 -70
  15. acryl_datahub_cloud/sdk/entities/assertion.py +8 -1
  16. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +41 -41
  17. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +20 -14
  18. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
  19. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
  20. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,7 @@ from typing import Optional, Union
14
14
 
15
15
  from typing_extensions import Self
16
16
 
17
- from acryl_datahub_cloud.sdk.assertion_input import (
17
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
18
18
  ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
19
19
  DEFAULT_DETECTION_MECHANISM,
20
20
  DEFAULT_SCHEDULE,
@@ -24,14 +24,23 @@ from acryl_datahub_cloud.sdk.assertion_input import (
24
24
  ExclusionWindowTypes,
25
25
  FixedRangeExclusionWindow,
26
26
  InferenceSensitivity,
27
+ TimeWindowSizeInputTypes,
27
28
  _DetectionMechanismTypes,
28
29
  )
30
+ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
31
+ MetricInputType,
32
+ OperatorInputType,
33
+ RangeInputType,
34
+ RangeTypeInputType,
35
+ ValueInputType,
36
+ ValueTypeInputType,
37
+ )
29
38
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion
30
39
  from acryl_datahub_cloud.sdk.entities.monitor import (
31
40
  Monitor,
32
41
  _get_nested_field_for_entity_with_default,
33
42
  )
34
- from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError
43
+ from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError, SDKUsageError
35
44
  from datahub.emitter.mce_builder import parse_ts_millis
36
45
  from datahub.metadata import schema_classes as models
37
46
  from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
@@ -85,21 +94,12 @@ class _HasSmartFunctionality:
85
94
  Mixin class that provides smart functionality for assertions.
86
95
  """
87
96
 
88
- _SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
89
- models.FreshnessAssertionInfoClass,
90
- models.VolumeAssertionInfoClass,
91
- )
92
-
93
97
  def __init__(
94
98
  self,
95
99
  *,
96
100
  sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
97
101
  exclusion_windows: list[ExclusionWindowTypes],
98
102
  training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
99
- incident_behavior: list[AssertionIncidentBehavior],
100
- detection_mechanism: Optional[
101
- _DetectionMechanismTypes
102
- ] = DEFAULT_DETECTION_MECHANISM,
103
103
  ) -> None:
104
104
  """
105
105
  Initialize the smart functionality mixin.
@@ -115,8 +115,6 @@ class _HasSmartFunctionality:
115
115
  self._sensitivity = sensitivity
116
116
  self._exclusion_windows = exclusion_windows
117
117
  self._training_data_lookback_days = training_data_lookback_days
118
- self._incident_behavior = incident_behavior
119
- self._detection_mechanism = detection_mechanism
120
118
 
121
119
  @property
122
120
  def sensitivity(self) -> InferenceSensitivity:
@@ -130,14 +128,6 @@ class _HasSmartFunctionality:
130
128
  def training_data_lookback_days(self) -> int:
131
129
  return self._training_data_lookback_days
132
130
 
133
- @property
134
- def incident_behavior(self) -> list[AssertionIncidentBehavior]:
135
- return self._incident_behavior
136
-
137
- @property
138
- def detection_mechanism(self) -> Optional[_DetectionMechanismTypes]:
139
- return self._detection_mechanism
140
-
141
131
  @staticmethod
142
132
  def _get_sensitivity(monitor: Monitor) -> InferenceSensitivity:
143
133
  # 1. Check if the monitor has a sensitivity field
@@ -186,213 +176,158 @@ class _HasSmartFunctionality:
186
176
  assert isinstance(retrieved, int)
187
177
  return retrieved
188
178
 
189
- @staticmethod
190
- def _get_detection_mechanism(
191
- assertion: Assertion,
192
- monitor: Monitor,
193
- default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
194
- ) -> Optional[_DetectionMechanismTypes]:
195
- """Get the detection mechanism from the monitor and assertion."""
196
- if not _HasSmartFunctionality._has_valid_monitor_info(monitor):
197
- return default
198
179
 
199
- # 1. Check if the assertion has a parameters field
200
- def _warn_and_return_default_detection_mechanism(
201
- field_name: str,
202
- default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
203
- ) -> Optional[_DetectionMechanismTypes]:
204
- logger.warning(
205
- f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
206
- )
207
- return default
180
+ class _HasColumnMetricFunctionality:
181
+ """
182
+ Mixin class that provides column metric functionality for assertions.
183
+ """
208
184
 
209
- parameters = _HasSmartFunctionality._get_assertion_parameters(monitor, default)
210
- if parameters is None:
211
- return _warn_and_return_default_detection_mechanism("parameters", default)
185
+ def __init__(
186
+ self,
187
+ column_name: str,
188
+ metric_type: MetricInputType,
189
+ operator: OperatorInputType,
190
+ value: Optional[ValueInputType] = None,
191
+ value_type: Optional[ValueTypeInputType] = None,
192
+ range: Optional[RangeInputType] = None,
193
+ range_type: Optional[RangeTypeInputType] = None,
194
+ ):
195
+ self._column_name = column_name
196
+ self._metric_type = metric_type
197
+ self._operator = operator
198
+ self._value = value
199
+ self._value_type = value_type
200
+ self._range = range
201
+ self._range_type = range_type
212
202
 
213
- # 2. Convert the raw detection mechanism to the SDK detection mechanism
214
- if parameters.type in [
215
- models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
216
- models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
217
- ]:
218
- if assertion.info is None:
219
- return _warn_and_return_default_detection_mechanism("info", default)
220
- if isinstance(assertion.info, models.VolumeAssertionInfoClass):
221
- return _HasSmartFunctionality._get_volume_detection_mechanism(
222
- assertion, parameters, default
223
- )
224
- elif isinstance(assertion.info, models.FreshnessAssertionInfoClass):
225
- return _HasSmartFunctionality._get_freshness_detection_mechanism(
226
- assertion, parameters, default
227
- )
228
- # TODO: Consider moving the detection mechanism logic to the assertion classes themselves e.g. _get_assertion_specific_detection_mechanism as an abstract method
229
- # TODO: Add support here for other detection mechanisms when other assertion types are supported
230
- else:
231
- raise SDKNotYetSupportedError(
232
- f"AssertionType {type(assertion.info).__name__}"
233
- )
234
- else:
235
- raise SDKNotYetSupportedError(
236
- f"AssertionEvaluationParametersType {parameters.type} not supported"
237
- )
203
+ @property
204
+ def column_name(self) -> str:
205
+ return self._column_name
238
206
 
239
- @staticmethod
240
- def _has_valid_monitor_info(monitor: Monitor) -> bool:
241
- """Check if monitor has valid info and assertion monitor."""
207
+ @property
208
+ def metric_type(self) -> MetricInputType:
209
+ return self._metric_type
242
210
 
243
- def _warn_and_return_false(field_name: str) -> bool:
244
- logger.warning(
245
- f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
246
- )
247
- return False
211
+ @property
212
+ def operator(self) -> OperatorInputType:
213
+ return self._operator
248
214
 
249
- if monitor.info is None:
250
- return _warn_and_return_false("info")
251
- if monitor.info.assertionMonitor is None:
252
- return _warn_and_return_false("assertionMonitor")
253
- if (
254
- monitor.info.assertionMonitor.assertions is None
255
- or len(monitor.info.assertionMonitor.assertions) == 0
256
- ):
257
- return _warn_and_return_false("assertionMonitor.assertions")
215
+ @property
216
+ def value(self) -> Optional[ValueInputType]:
217
+ return self._value
258
218
 
259
- return True
219
+ @property
220
+ def value_type(self) -> Optional[ValueTypeInputType]:
221
+ return self._value_type
222
+
223
+ @property
224
+ def range(self) -> Optional[RangeInputType]:
225
+ return self._range
226
+
227
+ @property
228
+ def range_type(self) -> Optional[RangeTypeInputType]:
229
+ return self._range_type
260
230
 
261
231
  @staticmethod
262
- def _get_assertion_parameters(
263
- monitor: Monitor,
264
- default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
265
- ) -> Optional[models.AssertionEvaluationParametersClass]:
266
- """Get the assertion parameters from the monitor."""
267
- # We know these are not None from _has_valid_monitor_info check
268
- assert (
269
- monitor is not None
270
- and monitor.info is not None
271
- and monitor.info.assertionMonitor is not None
272
- )
273
- assertion_monitor = monitor.info.assertionMonitor
274
- assert (
275
- assertion_monitor is not None and assertion_monitor.assertions is not None
232
+ def _get_column_name(assertion: Assertion) -> str:
233
+ column_name = _get_nested_field_for_entity_with_default(
234
+ assertion,
235
+ field_path="info.fieldMetricAssertion.field.path",
236
+ default=None,
276
237
  )
277
- assertions = assertion_monitor.assertions
278
-
279
- if assertions[0].parameters is None:
280
- logger.warning(
281
- f"Monitor {monitor.urn} does not have a assertionMonitor.assertions[0].parameters, defaulting detection mechanism to {default}"
238
+ if column_name is None:
239
+ raise SDKUsageError(
240
+ f"Column name is required for column metric assertions. Assertion {assertion.urn} does not have a column name"
282
241
  )
283
- return None
284
- return assertions[0].parameters
242
+ return column_name
285
243
 
286
244
  @staticmethod
287
- def _get_freshness_detection_mechanism(
288
- assertion: Assertion,
289
- parameters: models.AssertionEvaluationParametersClass,
290
- default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
291
- ) -> Optional[_DetectionMechanismTypes]:
292
- """Get the detection mechanism for freshness assertions."""
293
- if parameters.datasetFreshnessParameters is None:
294
- logger.warning(
295
- f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
245
+ def _get_metric_type(assertion: Assertion) -> MetricInputType:
246
+ metric_type = _get_nested_field_for_entity_with_default(
247
+ assertion,
248
+ field_path="info.fieldMetricAssertion.metric",
249
+ default=None,
250
+ )
251
+ if metric_type is None:
252
+ raise SDKUsageError(
253
+ f"Metric type is required for column metric assertions. Assertion {assertion.urn} does not have a metric type"
296
254
  )
297
- return default
255
+ return metric_type
298
256
 
299
- source_type = parameters.datasetFreshnessParameters.sourceType
300
- if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
301
- return DetectionMechanism.INFORMATION_SCHEMA
302
- elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
303
- return DetectionMechanism.AUDIT_LOG
304
- elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
305
- return _HasSmartFunctionality._get_field_value_detection_mechanism(
306
- assertion, parameters
257
+ @staticmethod
258
+ def _get_operator(assertion: Assertion) -> OperatorInputType:
259
+ operator = _get_nested_field_for_entity_with_default(
260
+ assertion,
261
+ field_path="info.fieldMetricAssertion.operator",
262
+ default=None,
263
+ )
264
+ if operator is None:
265
+ raise SDKUsageError(
266
+ f"Operator is required for column metric assertions. Assertion {assertion.urn} does not have an operator"
307
267
  )
308
- elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
309
- return DetectionMechanism.DATAHUB_OPERATION
310
- elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
311
- raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
312
- else:
313
- raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
268
+ return operator
314
269
 
315
270
  @staticmethod
316
- def _get_volume_detection_mechanism(
317
- assertion: Assertion,
318
- parameters: models.AssertionEvaluationParametersClass,
319
- default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
320
- ) -> _DetectionMechanismTypes:
321
- """Get the detection mechanism for volume assertions."""
322
- if parameters.datasetVolumeParameters is None:
323
- logger.warning(
324
- f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
325
- )
326
- if default is None:
327
- return DEFAULT_DETECTION_MECHANISM
328
- else:
329
- return default
271
+ def _get_value(assertion: Assertion) -> Optional[ValueInputType]:
272
+ value = _get_nested_field_for_entity_with_default(
273
+ assertion,
274
+ field_path="info.fieldMetricAssertion.parameters.value.value",
275
+ default=None,
276
+ )
277
+ return value
330
278
 
331
- source_type = parameters.datasetVolumeParameters.sourceType
332
- if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
333
- return DetectionMechanism.INFORMATION_SCHEMA
334
- elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
335
- additional_filter = _HasSmartFunctionality._get_additional_filter(assertion)
336
- return DetectionMechanism.QUERY(additional_filter=additional_filter)
337
- elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
338
- return DetectionMechanism.DATASET_PROFILE
339
- else:
340
- raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
279
+ @staticmethod
280
+ def _get_value_type(assertion: Assertion) -> Optional[ValueTypeInputType]:
281
+ value_type = _get_nested_field_for_entity_with_default(
282
+ assertion,
283
+ field_path="info.fieldMetricAssertion.parameters.value.type",
284
+ default=None,
285
+ )
286
+ return value_type
341
287
 
342
288
  @staticmethod
343
- def _get_field_value_detection_mechanism(
344
- assertion: Assertion,
345
- parameters: models.AssertionEvaluationParametersClass,
346
- ) -> _DetectionMechanismTypes:
347
- """Get the detection mechanism for field value based freshness."""
348
- # We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
349
- assert parameters.datasetFreshnessParameters is not None
350
- field = parameters.datasetFreshnessParameters.field
289
+ def _get_range(assertion: Assertion) -> Optional[RangeInputType]:
290
+ min_value = _get_nested_field_for_entity_with_default(
291
+ assertion,
292
+ field_path="info.fieldMetricAssertion.parameters.minValue",
293
+ default=None,
294
+ )
295
+ max_value = _get_nested_field_for_entity_with_default(
296
+ assertion,
297
+ field_path="info.fieldMetricAssertion.parameters.maxValue",
298
+ default=None,
299
+ )
351
300
 
352
- if field is None or field.kind is None:
353
- logger.warning(
354
- f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
355
- )
356
- return DEFAULT_DETECTION_MECHANISM
301
+ # If both are None, return None
302
+ if min_value is None and max_value is None:
303
+ return None
357
304
 
358
- column_name = field.path
359
- additional_filter = _HasSmartFunctionality._get_additional_filter(assertion)
305
+ # Extract the value from the parameter objects if they exist
306
+ if min_value is not None and hasattr(min_value, "value"):
307
+ min_value = min_value.value
308
+ if max_value is not None and hasattr(max_value, "value"):
309
+ max_value = max_value.value
360
310
 
361
- if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
362
- return DetectionMechanism.LAST_MODIFIED_COLUMN(
363
- column_name=column_name, additional_filter=additional_filter
364
- )
365
- elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
366
- return DetectionMechanism.HIGH_WATERMARK_COLUMN(
367
- column_name=column_name, additional_filter=additional_filter
368
- )
369
- else:
370
- raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
311
+ return (min_value, max_value)
371
312
 
372
313
  @staticmethod
373
- def _get_additional_filter(assertion: Assertion) -> Optional[str]:
374
- """Get the additional filter SQL from the assertion."""
375
- if assertion.info is None:
376
- logger.warning(
377
- f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
378
- )
379
- return None
380
- if (
381
- not isinstance(
382
- assertion.info,
383
- _HasSmartFunctionality._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
384
- )
385
- or assertion.info.filter is None
386
- ):
387
- logger.warning(
388
- f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
389
- )
314
+ def _get_range_type(assertion: Assertion) -> Optional[RangeTypeInputType]:
315
+ min_value_range_type = _get_nested_field_for_entity_with_default(
316
+ assertion,
317
+ field_path="info.fieldMetricAssertion.parameters.minValue.type",
318
+ default=None,
319
+ )
320
+ max_value_range_type = _get_nested_field_for_entity_with_default(
321
+ assertion,
322
+ field_path="info.fieldMetricAssertion.parameters.maxValue.type",
323
+ default=None,
324
+ )
325
+
326
+ # If both are None, return None instead of a tuple of Nones
327
+ if min_value_range_type is None and max_value_range_type is None:
390
328
  return None
391
- if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
392
- raise SDKNotYetSupportedError(
393
- f"DatasetFilterType {assertion.info.filter.type}"
394
- )
395
- return assertion.info.filter.sql
329
+
330
+ return (min_value_range_type, max_value_range_type)
396
331
 
397
332
 
398
333
  class _AssertionPublic(ABC):
@@ -400,6 +335,12 @@ class _AssertionPublic(ABC):
400
335
  Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
401
336
  """
402
337
 
338
+ _SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
339
+ models.FreshnessAssertionInfoClass,
340
+ models.VolumeAssertionInfoClass,
341
+ models.FieldAssertionInfoClass,
342
+ )
343
+
403
344
  def __init__(
404
345
  self,
405
346
  *,
@@ -408,6 +349,10 @@ class _AssertionPublic(ABC):
408
349
  display_name: str,
409
350
  mode: AssertionMode,
410
351
  tags: list[TagUrn],
352
+ incident_behavior: list[AssertionIncidentBehavior],
353
+ detection_mechanism: Optional[
354
+ _DetectionMechanismTypes
355
+ ] = DEFAULT_DETECTION_MECHANISM,
411
356
  created_by: Optional[CorpUserUrn] = None,
412
357
  created_at: Union[datetime, None] = None,
413
358
  updated_by: Optional[CorpUserUrn] = None,
@@ -431,6 +376,8 @@ class _AssertionPublic(ABC):
431
376
  self._dataset_urn = dataset_urn
432
377
  self._display_name = display_name
433
378
  self._mode = mode
379
+ self._incident_behavior = incident_behavior
380
+ self._detection_mechanism = detection_mechanism
434
381
  self._created_by = created_by
435
382
  self._created_at = created_at
436
383
  self._updated_by = updated_by
@@ -453,6 +400,14 @@ class _AssertionPublic(ABC):
453
400
  def mode(self) -> AssertionMode:
454
401
  return self._mode
455
402
 
403
+ @property
404
+ def incident_behavior(self) -> list[AssertionIncidentBehavior]:
405
+ return self._incident_behavior
406
+
407
+ @property
408
+ def detection_mechanism(self) -> Optional[_DetectionMechanismTypes]:
409
+ return self._detection_mechanism
410
+
456
411
  @property
457
412
  def created_by(self) -> Optional[CorpUserUrn]:
458
413
  return self._created_by
@@ -485,30 +440,99 @@ class _AssertionPublic(ABC):
485
440
  return incident_behaviors
486
441
 
487
442
  @staticmethod
488
- def _get_created_by(assertion: Assertion) -> Optional[CorpUserUrn]:
489
- if assertion.source is None:
490
- logger.warning(f"Assertion {assertion.urn} does not have a source")
491
- return None
492
- if isinstance(assertion.source, models.AssertionSourceClass):
493
- if assertion.source.created is None:
494
- logger.warning(
495
- f"Assertion {assertion.urn} does not have a created by in the source"
496
- )
497
- return None
498
- return CorpUserUrn.from_string(assertion.source.created.actor)
499
- elif isinstance(assertion.source, models.AssertionSourceTypeClass):
443
+ @abstractmethod
444
+ def _get_detection_mechanism(
445
+ assertion: Assertion,
446
+ monitor: Monitor,
447
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
448
+ ) -> Optional[_DetectionMechanismTypes]:
449
+ """Get the detection mechanism from the monitor and assertion.
450
+
451
+ This method should be implemented by each assertion class to handle
452
+ its specific detection mechanism logic.
453
+
454
+ Args:
455
+ assertion: The assertion entity
456
+ monitor: The monitor entity
457
+ default: Default detection mechanism to return if none is found
458
+
459
+ Returns:
460
+ The detection mechanism or default if none is found
461
+ """
462
+ pass
463
+
464
+ @staticmethod
465
+ def _has_valid_monitor_info(monitor: Monitor) -> bool:
466
+ """Check if monitor has valid info and assertion monitor."""
467
+
468
+ def _warn_and_return_false(field_name: str) -> bool:
500
469
  logger.warning(
501
- f"Assertion {assertion.urn} has a source type with no created by"
470
+ f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
502
471
  )
503
- return None
504
- return None
472
+ return False
505
473
 
506
- @staticmethod
507
- def _get_created_at(assertion: Assertion) -> Union[datetime, None]:
508
- if assertion.source is None:
509
- logger.warning(f"Assertion {assertion.urn} does not have a source")
510
- return None
511
- if isinstance(assertion.source, models.AssertionSourceClass):
474
+ if monitor.info is None:
475
+ return _warn_and_return_false("info")
476
+ if monitor.info.assertionMonitor is None:
477
+ return _warn_and_return_false("assertionMonitor")
478
+ if (
479
+ monitor.info.assertionMonitor.assertions is None
480
+ or len(monitor.info.assertionMonitor.assertions) == 0
481
+ ):
482
+ return _warn_and_return_false("assertionMonitor.assertions")
483
+
484
+ return True
485
+
486
+ @staticmethod
487
+ def _get_assertion_parameters(
488
+ monitor: Monitor,
489
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
490
+ ) -> Optional[models.AssertionEvaluationParametersClass]:
491
+ """Get the assertion parameters from the monitor."""
492
+ # We know these are not None from _has_valid_monitor_info check
493
+ assert (
494
+ monitor is not None
495
+ and monitor.info is not None
496
+ and monitor.info.assertionMonitor is not None
497
+ )
498
+ assertion_monitor = monitor.info.assertionMonitor
499
+ assert (
500
+ assertion_monitor is not None and assertion_monitor.assertions is not None
501
+ )
502
+ assertions = assertion_monitor.assertions
503
+
504
+ if assertions[0].parameters is None:
505
+ logger.warning(
506
+ f"Monitor {monitor.urn} does not have a assertionMonitor.assertions[0].parameters, defaulting detection mechanism to {default}"
507
+ )
508
+ return None
509
+ return assertions[0].parameters
510
+
511
+ @staticmethod
512
+ def _get_created_by(assertion: Assertion) -> Optional[CorpUserUrn]:
513
+ if assertion.source is None:
514
+ logger.warning(f"Assertion {assertion.urn} does not have a source")
515
+ return None
516
+ if isinstance(assertion.source, models.AssertionSourceClass):
517
+ if assertion.source.created is None:
518
+ logger.warning(
519
+ f"Assertion {assertion.urn} does not have a created by in the source"
520
+ )
521
+ return None
522
+ return CorpUserUrn.from_string(assertion.source.created.actor)
523
+ elif isinstance(assertion.source, models.AssertionSourceTypeClass):
524
+ logger.warning(
525
+ f"Assertion {assertion.urn} has a source type with no created by"
526
+ )
527
+ return None
528
+ return None
529
+
530
+ @staticmethod
531
+ def _get_created_at(assertion: Assertion) -> Union[datetime, None]:
532
+ if assertion.source is None:
533
+ logger.warning(f"Assertion {assertion.urn} does not have a source")
534
+ return None
535
+ if isinstance(assertion.source, models.AssertionSourceClass):
512
536
  if assertion.source.created is None:
513
537
  logger.warning(
514
538
  f"Assertion {assertion.urn} does not have a created by in the source"
@@ -563,6 +587,124 @@ class _AssertionPublic(ABC):
563
587
  """
564
588
  pass
565
589
 
590
+ @staticmethod
591
+ def _get_additional_filter(assertion: Assertion) -> Optional[str]:
592
+ """Get the additional filter SQL from the assertion."""
593
+ if assertion.info is None:
594
+ logger.warning(
595
+ f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
596
+ )
597
+ return None
598
+ if (
599
+ not isinstance(
600
+ assertion.info,
601
+ _AssertionPublic._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
602
+ )
603
+ or assertion.info.filter is None
604
+ ):
605
+ logger.warning(
606
+ f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
607
+ )
608
+ return None
609
+ if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
610
+ raise SDKNotYetSupportedError(
611
+ f"DatasetFilterType {assertion.info.filter.type}"
612
+ )
613
+ return assertion.info.filter.sql
614
+
615
+ @staticmethod
616
+ def _get_field_value_detection_mechanism(
617
+ assertion: Assertion,
618
+ parameters: models.AssertionEvaluationParametersClass,
619
+ ) -> _DetectionMechanismTypes:
620
+ """Get the detection mechanism for field value based freshness."""
621
+ # We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
622
+ assert parameters.datasetFreshnessParameters is not None
623
+ field = parameters.datasetFreshnessParameters.field
624
+
625
+ if field is None or field.kind is None:
626
+ logger.warning(
627
+ f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
628
+ )
629
+ return DEFAULT_DETECTION_MECHANISM
630
+
631
+ column_name = field.path
632
+ additional_filter = _AssertionPublic._get_additional_filter(assertion)
633
+
634
+ if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
635
+ return DetectionMechanism.LAST_MODIFIED_COLUMN(
636
+ column_name=column_name, additional_filter=additional_filter
637
+ )
638
+ elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
639
+ return DetectionMechanism.HIGH_WATERMARK_COLUMN(
640
+ column_name=column_name, additional_filter=additional_filter
641
+ )
642
+ else:
643
+ raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
644
+
645
+ @staticmethod
646
+ def _warn_and_return_default_detection_mechanism(
647
+ monitor: Monitor,
648
+ field_name: str,
649
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
650
+ ) -> Optional[_DetectionMechanismTypes]:
651
+ """Helper method to log a warning and return default detection mechanism."""
652
+ logger.warning(
653
+ f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
654
+ )
655
+ return default
656
+
657
+ @staticmethod
658
+ def _check_valid_monitor_info(
659
+ monitor: Monitor,
660
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
661
+ ) -> Optional[models.AssertionEvaluationParametersClass]:
662
+ """Check if monitor has valid info and get assertion parameters.
663
+
664
+ Returns:
665
+ The assertion parameters if monitor info is valid, None otherwise.
666
+ """
667
+ if not _AssertionPublic._has_valid_monitor_info(monitor):
668
+ return None
669
+
670
+ parameters = _AssertionPublic._get_assertion_parameters(monitor)
671
+ if parameters is None:
672
+ return None
673
+
674
+ return parameters
675
+
676
+ @staticmethod
677
+ def _get_validated_detection_context(
678
+ monitor: Monitor,
679
+ assertion: Assertion,
680
+ expected_parameters_type: str,
681
+ expected_info_class: type,
682
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
683
+ ) -> Optional[models.AssertionEvaluationParametersClass]:
684
+ """
685
+ Validate and extract the detection context (parameters) for detection mechanism logic.
686
+ Returns the parameters if all checks pass, otherwise None.
687
+ """
688
+ parameters = _AssertionPublic._check_valid_monitor_info(monitor, default)
689
+ if parameters is None:
690
+ return None
691
+ if parameters.type != expected_parameters_type:
692
+ logger.warning(
693
+ f"Expected {expected_parameters_type} parameters type, got {parameters.type}, defaulting detection mechanism to {default}"
694
+ )
695
+ return None
696
+ if assertion.info is None:
697
+ _AssertionPublic._warn_and_return_default_detection_mechanism(
698
+ monitor, "info", default
699
+ )
700
+ return None
701
+ if not isinstance(assertion.info, expected_info_class):
702
+ logger.warning(
703
+ f"Expected {expected_info_class.__name__}, got {type(assertion.info).__name__}, defaulting detection mechanism to {default}"
704
+ )
705
+ return None
706
+ return parameters
707
+
566
708
 
567
709
  class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
568
710
  """
@@ -619,8 +761,6 @@ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPu
619
761
  sensitivity=sensitivity,
620
762
  exclusion_windows=exclusion_windows,
621
763
  training_data_lookback_days=training_data_lookback_days,
622
- incident_behavior=incident_behavior,
623
- detection_mechanism=detection_mechanism,
624
764
  )
625
765
  # Then initialize the parent class
626
766
  _AssertionPublic.__init__(
@@ -629,6 +769,8 @@ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPu
629
769
  dataset_urn=dataset_urn,
630
770
  display_name=display_name,
631
771
  mode=mode,
772
+ incident_behavior=incident_behavior,
773
+ detection_mechanism=detection_mechanism,
632
774
  created_by=created_by,
633
775
  created_at=created_at,
634
776
  updated_by=updated_by,
@@ -661,6 +803,43 @@ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPu
661
803
  tags=cls._get_tags(assertion),
662
804
  )
663
805
 
806
+ @staticmethod
807
+ def _get_detection_mechanism(
808
+ assertion: Assertion,
809
+ monitor: Monitor,
810
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
811
+ ) -> Optional[_DetectionMechanismTypes]:
812
+ """Get the detection mechanism for freshness assertions."""
813
+ parameters = _AssertionPublic._get_validated_detection_context(
814
+ monitor,
815
+ assertion,
816
+ models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
817
+ models.FreshnessAssertionInfoClass,
818
+ default,
819
+ )
820
+ if parameters is None:
821
+ return default
822
+ if parameters.datasetFreshnessParameters is None:
823
+ logger.warning(
824
+ f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
825
+ )
826
+ return default
827
+ source_type = parameters.datasetFreshnessParameters.sourceType
828
+ if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
829
+ return DetectionMechanism.INFORMATION_SCHEMA
830
+ elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
831
+ return DetectionMechanism.AUDIT_LOG
832
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
833
+ return _AssertionPublic._get_field_value_detection_mechanism(
834
+ assertion, parameters
835
+ )
836
+ elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
837
+ return DetectionMechanism.DATAHUB_OPERATION
838
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
839
+ raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
840
+ else:
841
+ raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
842
+
664
843
 
665
844
  class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
666
845
  """
@@ -717,8 +896,6 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
717
896
  sensitivity=sensitivity,
718
897
  exclusion_windows=exclusion_windows,
719
898
  training_data_lookback_days=training_data_lookback_days,
720
- incident_behavior=incident_behavior,
721
- detection_mechanism=detection_mechanism,
722
899
  )
723
900
  # Then initialize the parent class
724
901
  _AssertionPublic.__init__(
@@ -727,6 +904,8 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
727
904
  dataset_urn=dataset_urn,
728
905
  display_name=display_name,
729
906
  mode=mode,
907
+ incident_behavior=incident_behavior,
908
+ detection_mechanism=detection_mechanism,
730
909
  created_by=created_by,
731
910
  created_at=created_at,
732
911
  updated_by=updated_by,
@@ -759,9 +938,213 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
759
938
  tags=cls._get_tags(assertion),
760
939
  )
761
940
 
941
+ @staticmethod
942
+ def _get_detection_mechanism(
943
+ assertion: Assertion,
944
+ monitor: Monitor,
945
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
946
+ ) -> Optional[_DetectionMechanismTypes]:
947
+ """Get the detection mechanism for volume assertions."""
948
+ parameters = _AssertionPublic._get_validated_detection_context(
949
+ monitor,
950
+ assertion,
951
+ models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
952
+ models.VolumeAssertionInfoClass,
953
+ default,
954
+ )
955
+ if parameters is None:
956
+ return default
957
+ if parameters.datasetVolumeParameters is None:
958
+ logger.warning(
959
+ f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
960
+ )
961
+ if default is None:
962
+ return DEFAULT_DETECTION_MECHANISM
963
+ else:
964
+ return default
965
+ source_type = parameters.datasetVolumeParameters.sourceType
966
+ if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
967
+ return DetectionMechanism.INFORMATION_SCHEMA
968
+ elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
969
+ additional_filter = _AssertionPublic._get_additional_filter(assertion)
970
+ return DetectionMechanism.QUERY(additional_filter=additional_filter)
971
+ elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
972
+ return DetectionMechanism.DATASET_PROFILE
973
+ else:
974
+ raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
975
+
762
976
 
763
- AssertionTypes = Union[
764
- SmartFreshnessAssertion,
765
- SmartVolumeAssertion,
766
- # TODO: Add other assertion types here as we add them.
767
- ]
977
+ class FreshnessAssertion(_HasSchedule, _AssertionPublic):
978
+ """
979
+ A class that represents a freshness assertion.
980
+ """
981
+
982
+ def __init__(
983
+ self,
984
+ *,
985
+ urn: AssertionUrn,
986
+ dataset_urn: DatasetUrn,
987
+ display_name: str,
988
+ mode: AssertionMode,
989
+ schedule: models.CronScheduleClass,
990
+ freshness_schedule_check_type: Union[
991
+ str, models.FreshnessAssertionScheduleTypeClass
992
+ ],
993
+ lookback_window: Optional[TimeWindowSizeInputTypes],
994
+ tags: list[TagUrn],
995
+ incident_behavior: list[AssertionIncidentBehavior],
996
+ detection_mechanism: Optional[
997
+ _DetectionMechanismTypes
998
+ ] = DEFAULT_DETECTION_MECHANISM,
999
+ created_by: Optional[CorpUserUrn] = None,
1000
+ created_at: Union[datetime, None] = None,
1001
+ updated_by: Optional[CorpUserUrn] = None,
1002
+ updated_at: Optional[datetime] = None,
1003
+ ):
1004
+ """
1005
+ Initialize a freshness assertion.
1006
+
1007
+ Note: Values can be accessed, but not set on the assertion object.
1008
+ To update an assertion, use the `upsert_*` method.
1009
+ Args:
1010
+ urn: The urn of the assertion.
1011
+ dataset_urn: The urn of the dataset that the assertion is for.
1012
+ display_name: The display name of the assertion.
1013
+ mode: The mode of the assertion (active, inactive).
1014
+ schedule: The schedule of the assertion.
1015
+ freshness_schedule_check_type: The type of freshness schedule check to be used for the assertion.
1016
+ lookback_window: The lookback window to be used for the assertion.
1017
+ tags: The tags applied to the assertion.
1018
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
1019
+ detection_mechanism: The detection mechanism of the assertion.
1020
+ created_by: The urn of the user that created the assertion.
1021
+ created_at: The timestamp of when the assertion was created.
1022
+ updated_by: The urn of the user that updated the assertion.
1023
+ updated_at: The timestamp of when the assertion was updated.
1024
+ """
1025
+ _HasSchedule.__init__(self, schedule=schedule)
1026
+ _AssertionPublic.__init__(
1027
+ self,
1028
+ urn=urn,
1029
+ dataset_urn=dataset_urn,
1030
+ display_name=display_name,
1031
+ mode=mode,
1032
+ incident_behavior=incident_behavior,
1033
+ detection_mechanism=detection_mechanism,
1034
+ created_by=created_by,
1035
+ created_at=created_at,
1036
+ updated_by=updated_by,
1037
+ updated_at=updated_at,
1038
+ tags=tags,
1039
+ )
1040
+ self._freshness_schedule_check_type = freshness_schedule_check_type
1041
+ self._lookback_window = lookback_window
1042
+
1043
+ @property
1044
+ def freshness_schedule_check_type(
1045
+ self,
1046
+ ) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
1047
+ return self._freshness_schedule_check_type
1048
+
1049
+ @property
1050
+ def lookback_window(self) -> Optional[TimeWindowSizeInputTypes]:
1051
+ return self._lookback_window
1052
+
1053
+ @staticmethod
1054
+ def _get_freshness_schedule_check_type(
1055
+ assertion: Assertion,
1056
+ ) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
1057
+ if assertion.info is None:
1058
+ raise SDKNotYetSupportedError(
1059
+ f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
1060
+ )
1061
+ if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
1062
+ if assertion.info.schedule is None:
1063
+ raise SDKNotYetSupportedError(
1064
+ f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
1065
+ )
1066
+ return assertion.info.schedule.type
1067
+ else:
1068
+ raise SDKNotYetSupportedError(
1069
+ f"Assertion {assertion.urn} is not a freshness assertion"
1070
+ )
1071
+
1072
+ @staticmethod
1073
+ def _get_lookback_window(
1074
+ assertion: Assertion,
1075
+ ) -> Optional[models.FixedIntervalScheduleClass]:
1076
+ if assertion.info is None:
1077
+ raise SDKNotYetSupportedError(
1078
+ f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
1079
+ )
1080
+ if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
1081
+ if assertion.info.schedule is None:
1082
+ raise SDKNotYetSupportedError(
1083
+ f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
1084
+ )
1085
+ return assertion.info.schedule.fixedInterval
1086
+ else:
1087
+ raise SDKNotYetSupportedError(
1088
+ f"Assertion {assertion.urn} is not a freshness assertion"
1089
+ )
1090
+
1091
+ @classmethod
1092
+ def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
1093
+ """
1094
+ Create a freshness assertion from the assertion and monitor entities.
1095
+ """
1096
+ return cls(
1097
+ urn=assertion.urn,
1098
+ dataset_urn=assertion.dataset,
1099
+ display_name=assertion.description or "",
1100
+ mode=cls._get_mode(monitor),
1101
+ schedule=cls._get_schedule(monitor),
1102
+ freshness_schedule_check_type=cls._get_freshness_schedule_check_type(
1103
+ assertion
1104
+ ),
1105
+ lookback_window=cls._get_lookback_window(assertion),
1106
+ incident_behavior=cls._get_incident_behavior(assertion),
1107
+ detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
1108
+ created_by=cls._get_created_by(assertion),
1109
+ created_at=cls._get_created_at(assertion),
1110
+ updated_by=cls._get_updated_by(assertion),
1111
+ updated_at=cls._get_updated_at(assertion),
1112
+ tags=cls._get_tags(assertion),
1113
+ )
1114
+
1115
+ @staticmethod
1116
+ def _get_detection_mechanism(
1117
+ assertion: Assertion,
1118
+ monitor: Monitor,
1119
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
1120
+ ) -> Optional[_DetectionMechanismTypes]:
1121
+ """Get the detection mechanism for freshness assertions."""
1122
+ parameters = _AssertionPublic._get_validated_detection_context(
1123
+ monitor,
1124
+ assertion,
1125
+ models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
1126
+ models.FreshnessAssertionInfoClass,
1127
+ default,
1128
+ )
1129
+ if parameters is None:
1130
+ return default
1131
+ if parameters.datasetFreshnessParameters is None:
1132
+ logger.warning(
1133
+ f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
1134
+ )
1135
+ return default
1136
+ source_type = parameters.datasetFreshnessParameters.sourceType
1137
+ if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
1138
+ return DetectionMechanism.INFORMATION_SCHEMA
1139
+ elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
1140
+ return DetectionMechanism.AUDIT_LOG
1141
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
1142
+ return _AssertionPublic._get_field_value_detection_mechanism(
1143
+ assertion, parameters
1144
+ )
1145
+ elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
1146
+ return DetectionMechanism.DATAHUB_OPERATION
1147
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
1148
+ raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
1149
+ else:
1150
+ raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")