acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
- acryl_datahub_cloud/metadata/schema.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
- acryl_datahub_cloud/sdk/__init__.py +10 -2
- acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +614 -231
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
- acryl_datahub_cloud/sdk/assertion/types.py +18 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +437 -147
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +943 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1281 -70
- acryl_datahub_cloud/sdk/entities/assertion.py +8 -1
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +41 -41
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +20 -14
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,7 @@ from typing import Optional, Union
|
|
|
14
14
|
|
|
15
15
|
from typing_extensions import Self
|
|
16
16
|
|
|
17
|
-
from acryl_datahub_cloud.sdk.assertion_input import (
|
|
17
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
18
18
|
ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
19
19
|
DEFAULT_DETECTION_MECHANISM,
|
|
20
20
|
DEFAULT_SCHEDULE,
|
|
@@ -24,14 +24,23 @@ from acryl_datahub_cloud.sdk.assertion_input import (
|
|
|
24
24
|
ExclusionWindowTypes,
|
|
25
25
|
FixedRangeExclusionWindow,
|
|
26
26
|
InferenceSensitivity,
|
|
27
|
+
TimeWindowSizeInputTypes,
|
|
27
28
|
_DetectionMechanismTypes,
|
|
28
29
|
)
|
|
30
|
+
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
31
|
+
MetricInputType,
|
|
32
|
+
OperatorInputType,
|
|
33
|
+
RangeInputType,
|
|
34
|
+
RangeTypeInputType,
|
|
35
|
+
ValueInputType,
|
|
36
|
+
ValueTypeInputType,
|
|
37
|
+
)
|
|
29
38
|
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
30
39
|
from acryl_datahub_cloud.sdk.entities.monitor import (
|
|
31
40
|
Monitor,
|
|
32
41
|
_get_nested_field_for_entity_with_default,
|
|
33
42
|
)
|
|
34
|
-
from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError
|
|
43
|
+
from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError, SDKUsageError
|
|
35
44
|
from datahub.emitter.mce_builder import parse_ts_millis
|
|
36
45
|
from datahub.metadata import schema_classes as models
|
|
37
46
|
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
@@ -85,21 +94,12 @@ class _HasSmartFunctionality:
|
|
|
85
94
|
Mixin class that provides smart functionality for assertions.
|
|
86
95
|
"""
|
|
87
96
|
|
|
88
|
-
_SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
|
|
89
|
-
models.FreshnessAssertionInfoClass,
|
|
90
|
-
models.VolumeAssertionInfoClass,
|
|
91
|
-
)
|
|
92
|
-
|
|
93
97
|
def __init__(
|
|
94
98
|
self,
|
|
95
99
|
*,
|
|
96
100
|
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
97
101
|
exclusion_windows: list[ExclusionWindowTypes],
|
|
98
102
|
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
99
|
-
incident_behavior: list[AssertionIncidentBehavior],
|
|
100
|
-
detection_mechanism: Optional[
|
|
101
|
-
_DetectionMechanismTypes
|
|
102
|
-
] = DEFAULT_DETECTION_MECHANISM,
|
|
103
103
|
) -> None:
|
|
104
104
|
"""
|
|
105
105
|
Initialize the smart functionality mixin.
|
|
@@ -115,8 +115,6 @@ class _HasSmartFunctionality:
|
|
|
115
115
|
self._sensitivity = sensitivity
|
|
116
116
|
self._exclusion_windows = exclusion_windows
|
|
117
117
|
self._training_data_lookback_days = training_data_lookback_days
|
|
118
|
-
self._incident_behavior = incident_behavior
|
|
119
|
-
self._detection_mechanism = detection_mechanism
|
|
120
118
|
|
|
121
119
|
@property
|
|
122
120
|
def sensitivity(self) -> InferenceSensitivity:
|
|
@@ -130,14 +128,6 @@ class _HasSmartFunctionality:
|
|
|
130
128
|
def training_data_lookback_days(self) -> int:
|
|
131
129
|
return self._training_data_lookback_days
|
|
132
130
|
|
|
133
|
-
@property
|
|
134
|
-
def incident_behavior(self) -> list[AssertionIncidentBehavior]:
|
|
135
|
-
return self._incident_behavior
|
|
136
|
-
|
|
137
|
-
@property
|
|
138
|
-
def detection_mechanism(self) -> Optional[_DetectionMechanismTypes]:
|
|
139
|
-
return self._detection_mechanism
|
|
140
|
-
|
|
141
131
|
@staticmethod
|
|
142
132
|
def _get_sensitivity(monitor: Monitor) -> InferenceSensitivity:
|
|
143
133
|
# 1. Check if the monitor has a sensitivity field
|
|
@@ -186,213 +176,158 @@ class _HasSmartFunctionality:
|
|
|
186
176
|
assert isinstance(retrieved, int)
|
|
187
177
|
return retrieved
|
|
188
178
|
|
|
189
|
-
@staticmethod
|
|
190
|
-
def _get_detection_mechanism(
|
|
191
|
-
assertion: Assertion,
|
|
192
|
-
monitor: Monitor,
|
|
193
|
-
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
194
|
-
) -> Optional[_DetectionMechanismTypes]:
|
|
195
|
-
"""Get the detection mechanism from the monitor and assertion."""
|
|
196
|
-
if not _HasSmartFunctionality._has_valid_monitor_info(monitor):
|
|
197
|
-
return default
|
|
198
179
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
) -> Optional[_DetectionMechanismTypes]:
|
|
204
|
-
logger.warning(
|
|
205
|
-
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
|
|
206
|
-
)
|
|
207
|
-
return default
|
|
180
|
+
class _HasColumnMetricFunctionality:
|
|
181
|
+
"""
|
|
182
|
+
Mixin class that provides column metric functionality for assertions.
|
|
183
|
+
"""
|
|
208
184
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
column_name: str,
|
|
188
|
+
metric_type: MetricInputType,
|
|
189
|
+
operator: OperatorInputType,
|
|
190
|
+
value: Optional[ValueInputType] = None,
|
|
191
|
+
value_type: Optional[ValueTypeInputType] = None,
|
|
192
|
+
range: Optional[RangeInputType] = None,
|
|
193
|
+
range_type: Optional[RangeTypeInputType] = None,
|
|
194
|
+
):
|
|
195
|
+
self._column_name = column_name
|
|
196
|
+
self._metric_type = metric_type
|
|
197
|
+
self._operator = operator
|
|
198
|
+
self._value = value
|
|
199
|
+
self._value_type = value_type
|
|
200
|
+
self._range = range
|
|
201
|
+
self._range_type = range_type
|
|
212
202
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
217
|
-
]:
|
|
218
|
-
if assertion.info is None:
|
|
219
|
-
return _warn_and_return_default_detection_mechanism("info", default)
|
|
220
|
-
if isinstance(assertion.info, models.VolumeAssertionInfoClass):
|
|
221
|
-
return _HasSmartFunctionality._get_volume_detection_mechanism(
|
|
222
|
-
assertion, parameters, default
|
|
223
|
-
)
|
|
224
|
-
elif isinstance(assertion.info, models.FreshnessAssertionInfoClass):
|
|
225
|
-
return _HasSmartFunctionality._get_freshness_detection_mechanism(
|
|
226
|
-
assertion, parameters, default
|
|
227
|
-
)
|
|
228
|
-
# TODO: Consider moving the detection mechanism logic to the assertion classes themselves e.g. _get_assertion_specific_detection_mechanism as an abstract method
|
|
229
|
-
# TODO: Add support here for other detection mechanisms when other assertion types are supported
|
|
230
|
-
else:
|
|
231
|
-
raise SDKNotYetSupportedError(
|
|
232
|
-
f"AssertionType {type(assertion.info).__name__}"
|
|
233
|
-
)
|
|
234
|
-
else:
|
|
235
|
-
raise SDKNotYetSupportedError(
|
|
236
|
-
f"AssertionEvaluationParametersType {parameters.type} not supported"
|
|
237
|
-
)
|
|
203
|
+
@property
|
|
204
|
+
def column_name(self) -> str:
|
|
205
|
+
return self._column_name
|
|
238
206
|
|
|
239
|
-
@
|
|
240
|
-
def
|
|
241
|
-
|
|
207
|
+
@property
|
|
208
|
+
def metric_type(self) -> MetricInputType:
|
|
209
|
+
return self._metric_type
|
|
242
210
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
)
|
|
247
|
-
return False
|
|
211
|
+
@property
|
|
212
|
+
def operator(self) -> OperatorInputType:
|
|
213
|
+
return self._operator
|
|
248
214
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
return _warn_and_return_false("assertionMonitor")
|
|
253
|
-
if (
|
|
254
|
-
monitor.info.assertionMonitor.assertions is None
|
|
255
|
-
or len(monitor.info.assertionMonitor.assertions) == 0
|
|
256
|
-
):
|
|
257
|
-
return _warn_and_return_false("assertionMonitor.assertions")
|
|
215
|
+
@property
|
|
216
|
+
def value(self) -> Optional[ValueInputType]:
|
|
217
|
+
return self._value
|
|
258
218
|
|
|
259
|
-
|
|
219
|
+
@property
|
|
220
|
+
def value_type(self) -> Optional[ValueTypeInputType]:
|
|
221
|
+
return self._value_type
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def range(self) -> Optional[RangeInputType]:
|
|
225
|
+
return self._range
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def range_type(self) -> Optional[RangeTypeInputType]:
|
|
229
|
+
return self._range_type
|
|
260
230
|
|
|
261
231
|
@staticmethod
|
|
262
|
-
def
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
# We know these are not None from _has_valid_monitor_info check
|
|
268
|
-
assert (
|
|
269
|
-
monitor is not None
|
|
270
|
-
and monitor.info is not None
|
|
271
|
-
and monitor.info.assertionMonitor is not None
|
|
272
|
-
)
|
|
273
|
-
assertion_monitor = monitor.info.assertionMonitor
|
|
274
|
-
assert (
|
|
275
|
-
assertion_monitor is not None and assertion_monitor.assertions is not None
|
|
232
|
+
def _get_column_name(assertion: Assertion) -> str:
|
|
233
|
+
column_name = _get_nested_field_for_entity_with_default(
|
|
234
|
+
assertion,
|
|
235
|
+
field_path="info.fieldMetricAssertion.field.path",
|
|
236
|
+
default=None,
|
|
276
237
|
)
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
logger.warning(
|
|
281
|
-
f"Monitor {monitor.urn} does not have a assertionMonitor.assertions[0].parameters, defaulting detection mechanism to {default}"
|
|
238
|
+
if column_name is None:
|
|
239
|
+
raise SDKUsageError(
|
|
240
|
+
f"Column name is required for column metric assertions. Assertion {assertion.urn} does not have a column name"
|
|
282
241
|
)
|
|
283
|
-
|
|
284
|
-
return assertions[0].parameters
|
|
242
|
+
return column_name
|
|
285
243
|
|
|
286
244
|
@staticmethod
|
|
287
|
-
def
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
if
|
|
294
|
-
|
|
295
|
-
f"
|
|
245
|
+
def _get_metric_type(assertion: Assertion) -> MetricInputType:
|
|
246
|
+
metric_type = _get_nested_field_for_entity_with_default(
|
|
247
|
+
assertion,
|
|
248
|
+
field_path="info.fieldMetricAssertion.metric",
|
|
249
|
+
default=None,
|
|
250
|
+
)
|
|
251
|
+
if metric_type is None:
|
|
252
|
+
raise SDKUsageError(
|
|
253
|
+
f"Metric type is required for column metric assertions. Assertion {assertion.urn} does not have a metric type"
|
|
296
254
|
)
|
|
297
|
-
|
|
255
|
+
return metric_type
|
|
298
256
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
257
|
+
@staticmethod
|
|
258
|
+
def _get_operator(assertion: Assertion) -> OperatorInputType:
|
|
259
|
+
operator = _get_nested_field_for_entity_with_default(
|
|
260
|
+
assertion,
|
|
261
|
+
field_path="info.fieldMetricAssertion.operator",
|
|
262
|
+
default=None,
|
|
263
|
+
)
|
|
264
|
+
if operator is None:
|
|
265
|
+
raise SDKUsageError(
|
|
266
|
+
f"Operator is required for column metric assertions. Assertion {assertion.urn} does not have an operator"
|
|
307
267
|
)
|
|
308
|
-
|
|
309
|
-
return DetectionMechanism.DATAHUB_OPERATION
|
|
310
|
-
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
311
|
-
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
312
|
-
else:
|
|
313
|
-
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
268
|
+
return operator
|
|
314
269
|
|
|
315
270
|
@staticmethod
|
|
316
|
-
def
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
logger.warning(
|
|
324
|
-
f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
325
|
-
)
|
|
326
|
-
if default is None:
|
|
327
|
-
return DEFAULT_DETECTION_MECHANISM
|
|
328
|
-
else:
|
|
329
|
-
return default
|
|
271
|
+
def _get_value(assertion: Assertion) -> Optional[ValueInputType]:
|
|
272
|
+
value = _get_nested_field_for_entity_with_default(
|
|
273
|
+
assertion,
|
|
274
|
+
field_path="info.fieldMetricAssertion.parameters.value.value",
|
|
275
|
+
default=None,
|
|
276
|
+
)
|
|
277
|
+
return value
|
|
330
278
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
else:
|
|
340
|
-
raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
|
|
279
|
+
@staticmethod
|
|
280
|
+
def _get_value_type(assertion: Assertion) -> Optional[ValueTypeInputType]:
|
|
281
|
+
value_type = _get_nested_field_for_entity_with_default(
|
|
282
|
+
assertion,
|
|
283
|
+
field_path="info.fieldMetricAssertion.parameters.value.type",
|
|
284
|
+
default=None,
|
|
285
|
+
)
|
|
286
|
+
return value_type
|
|
341
287
|
|
|
342
288
|
@staticmethod
|
|
343
|
-
def
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
289
|
+
def _get_range(assertion: Assertion) -> Optional[RangeInputType]:
|
|
290
|
+
min_value = _get_nested_field_for_entity_with_default(
|
|
291
|
+
assertion,
|
|
292
|
+
field_path="info.fieldMetricAssertion.parameters.minValue",
|
|
293
|
+
default=None,
|
|
294
|
+
)
|
|
295
|
+
max_value = _get_nested_field_for_entity_with_default(
|
|
296
|
+
assertion,
|
|
297
|
+
field_path="info.fieldMetricAssertion.parameters.maxValue",
|
|
298
|
+
default=None,
|
|
299
|
+
)
|
|
351
300
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
)
|
|
356
|
-
return DEFAULT_DETECTION_MECHANISM
|
|
301
|
+
# If both are None, return None
|
|
302
|
+
if min_value is None and max_value is None:
|
|
303
|
+
return None
|
|
357
304
|
|
|
358
|
-
|
|
359
|
-
|
|
305
|
+
# Extract the value from the parameter objects if they exist
|
|
306
|
+
if min_value is not None and hasattr(min_value, "value"):
|
|
307
|
+
min_value = min_value.value
|
|
308
|
+
if max_value is not None and hasattr(max_value, "value"):
|
|
309
|
+
max_value = max_value.value
|
|
360
310
|
|
|
361
|
-
|
|
362
|
-
return DetectionMechanism.LAST_MODIFIED_COLUMN(
|
|
363
|
-
column_name=column_name, additional_filter=additional_filter
|
|
364
|
-
)
|
|
365
|
-
elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
|
|
366
|
-
return DetectionMechanism.HIGH_WATERMARK_COLUMN(
|
|
367
|
-
column_name=column_name, additional_filter=additional_filter
|
|
368
|
-
)
|
|
369
|
-
else:
|
|
370
|
-
raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
|
|
311
|
+
return (min_value, max_value)
|
|
371
312
|
|
|
372
313
|
@staticmethod
|
|
373
|
-
def
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
logger.warning(
|
|
388
|
-
f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
|
|
389
|
-
)
|
|
314
|
+
def _get_range_type(assertion: Assertion) -> Optional[RangeTypeInputType]:
|
|
315
|
+
min_value_range_type = _get_nested_field_for_entity_with_default(
|
|
316
|
+
assertion,
|
|
317
|
+
field_path="info.fieldMetricAssertion.parameters.minValue.type",
|
|
318
|
+
default=None,
|
|
319
|
+
)
|
|
320
|
+
max_value_range_type = _get_nested_field_for_entity_with_default(
|
|
321
|
+
assertion,
|
|
322
|
+
field_path="info.fieldMetricAssertion.parameters.maxValue.type",
|
|
323
|
+
default=None,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# If both are None, return None instead of a tuple of Nones
|
|
327
|
+
if min_value_range_type is None and max_value_range_type is None:
|
|
390
328
|
return None
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
f"DatasetFilterType {assertion.info.filter.type}"
|
|
394
|
-
)
|
|
395
|
-
return assertion.info.filter.sql
|
|
329
|
+
|
|
330
|
+
return (min_value_range_type, max_value_range_type)
|
|
396
331
|
|
|
397
332
|
|
|
398
333
|
class _AssertionPublic(ABC):
|
|
@@ -400,6 +335,12 @@ class _AssertionPublic(ABC):
|
|
|
400
335
|
Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
|
|
401
336
|
"""
|
|
402
337
|
|
|
338
|
+
_SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
|
|
339
|
+
models.FreshnessAssertionInfoClass,
|
|
340
|
+
models.VolumeAssertionInfoClass,
|
|
341
|
+
models.FieldAssertionInfoClass,
|
|
342
|
+
)
|
|
343
|
+
|
|
403
344
|
def __init__(
|
|
404
345
|
self,
|
|
405
346
|
*,
|
|
@@ -408,6 +349,10 @@ class _AssertionPublic(ABC):
|
|
|
408
349
|
display_name: str,
|
|
409
350
|
mode: AssertionMode,
|
|
410
351
|
tags: list[TagUrn],
|
|
352
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
353
|
+
detection_mechanism: Optional[
|
|
354
|
+
_DetectionMechanismTypes
|
|
355
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
411
356
|
created_by: Optional[CorpUserUrn] = None,
|
|
412
357
|
created_at: Union[datetime, None] = None,
|
|
413
358
|
updated_by: Optional[CorpUserUrn] = None,
|
|
@@ -431,6 +376,8 @@ class _AssertionPublic(ABC):
|
|
|
431
376
|
self._dataset_urn = dataset_urn
|
|
432
377
|
self._display_name = display_name
|
|
433
378
|
self._mode = mode
|
|
379
|
+
self._incident_behavior = incident_behavior
|
|
380
|
+
self._detection_mechanism = detection_mechanism
|
|
434
381
|
self._created_by = created_by
|
|
435
382
|
self._created_at = created_at
|
|
436
383
|
self._updated_by = updated_by
|
|
@@ -453,6 +400,14 @@ class _AssertionPublic(ABC):
|
|
|
453
400
|
def mode(self) -> AssertionMode:
|
|
454
401
|
return self._mode
|
|
455
402
|
|
|
403
|
+
@property
|
|
404
|
+
def incident_behavior(self) -> list[AssertionIncidentBehavior]:
|
|
405
|
+
return self._incident_behavior
|
|
406
|
+
|
|
407
|
+
@property
|
|
408
|
+
def detection_mechanism(self) -> Optional[_DetectionMechanismTypes]:
|
|
409
|
+
return self._detection_mechanism
|
|
410
|
+
|
|
456
411
|
@property
|
|
457
412
|
def created_by(self) -> Optional[CorpUserUrn]:
|
|
458
413
|
return self._created_by
|
|
@@ -485,30 +440,99 @@ class _AssertionPublic(ABC):
|
|
|
485
440
|
return incident_behaviors
|
|
486
441
|
|
|
487
442
|
@staticmethod
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
443
|
+
@abstractmethod
|
|
444
|
+
def _get_detection_mechanism(
|
|
445
|
+
assertion: Assertion,
|
|
446
|
+
monitor: Monitor,
|
|
447
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
448
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
449
|
+
"""Get the detection mechanism from the monitor and assertion.
|
|
450
|
+
|
|
451
|
+
This method should be implemented by each assertion class to handle
|
|
452
|
+
its specific detection mechanism logic.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
assertion: The assertion entity
|
|
456
|
+
monitor: The monitor entity
|
|
457
|
+
default: Default detection mechanism to return if none is found
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
The detection mechanism or default if none is found
|
|
461
|
+
"""
|
|
462
|
+
pass
|
|
463
|
+
|
|
464
|
+
@staticmethod
|
|
465
|
+
def _has_valid_monitor_info(monitor: Monitor) -> bool:
|
|
466
|
+
"""Check if monitor has valid info and assertion monitor."""
|
|
467
|
+
|
|
468
|
+
def _warn_and_return_false(field_name: str) -> bool:
|
|
500
469
|
logger.warning(
|
|
501
|
-
f"
|
|
470
|
+
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
502
471
|
)
|
|
503
|
-
return
|
|
504
|
-
return None
|
|
472
|
+
return False
|
|
505
473
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
if
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
474
|
+
if monitor.info is None:
|
|
475
|
+
return _warn_and_return_false("info")
|
|
476
|
+
if monitor.info.assertionMonitor is None:
|
|
477
|
+
return _warn_and_return_false("assertionMonitor")
|
|
478
|
+
if (
|
|
479
|
+
monitor.info.assertionMonitor.assertions is None
|
|
480
|
+
or len(monitor.info.assertionMonitor.assertions) == 0
|
|
481
|
+
):
|
|
482
|
+
return _warn_and_return_false("assertionMonitor.assertions")
|
|
483
|
+
|
|
484
|
+
return True
|
|
485
|
+
|
|
486
|
+
@staticmethod
|
|
487
|
+
def _get_assertion_parameters(
|
|
488
|
+
monitor: Monitor,
|
|
489
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
490
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
491
|
+
"""Get the assertion parameters from the monitor."""
|
|
492
|
+
# We know these are not None from _has_valid_monitor_info check
|
|
493
|
+
assert (
|
|
494
|
+
monitor is not None
|
|
495
|
+
and monitor.info is not None
|
|
496
|
+
and monitor.info.assertionMonitor is not None
|
|
497
|
+
)
|
|
498
|
+
assertion_monitor = monitor.info.assertionMonitor
|
|
499
|
+
assert (
|
|
500
|
+
assertion_monitor is not None and assertion_monitor.assertions is not None
|
|
501
|
+
)
|
|
502
|
+
assertions = assertion_monitor.assertions
|
|
503
|
+
|
|
504
|
+
if assertions[0].parameters is None:
|
|
505
|
+
logger.warning(
|
|
506
|
+
f"Monitor {monitor.urn} does not have a assertionMonitor.assertions[0].parameters, defaulting detection mechanism to {default}"
|
|
507
|
+
)
|
|
508
|
+
return None
|
|
509
|
+
return assertions[0].parameters
|
|
510
|
+
|
|
511
|
+
@staticmethod
|
|
512
|
+
def _get_created_by(assertion: Assertion) -> Optional[CorpUserUrn]:
|
|
513
|
+
if assertion.source is None:
|
|
514
|
+
logger.warning(f"Assertion {assertion.urn} does not have a source")
|
|
515
|
+
return None
|
|
516
|
+
if isinstance(assertion.source, models.AssertionSourceClass):
|
|
517
|
+
if assertion.source.created is None:
|
|
518
|
+
logger.warning(
|
|
519
|
+
f"Assertion {assertion.urn} does not have a created by in the source"
|
|
520
|
+
)
|
|
521
|
+
return None
|
|
522
|
+
return CorpUserUrn.from_string(assertion.source.created.actor)
|
|
523
|
+
elif isinstance(assertion.source, models.AssertionSourceTypeClass):
|
|
524
|
+
logger.warning(
|
|
525
|
+
f"Assertion {assertion.urn} has a source type with no created by"
|
|
526
|
+
)
|
|
527
|
+
return None
|
|
528
|
+
return None
|
|
529
|
+
|
|
530
|
+
@staticmethod
|
|
531
|
+
def _get_created_at(assertion: Assertion) -> Union[datetime, None]:
|
|
532
|
+
if assertion.source is None:
|
|
533
|
+
logger.warning(f"Assertion {assertion.urn} does not have a source")
|
|
534
|
+
return None
|
|
535
|
+
if isinstance(assertion.source, models.AssertionSourceClass):
|
|
512
536
|
if assertion.source.created is None:
|
|
513
537
|
logger.warning(
|
|
514
538
|
f"Assertion {assertion.urn} does not have a created by in the source"
|
|
@@ -563,6 +587,124 @@ class _AssertionPublic(ABC):
|
|
|
563
587
|
"""
|
|
564
588
|
pass
|
|
565
589
|
|
|
590
|
+
@staticmethod
|
|
591
|
+
def _get_additional_filter(assertion: Assertion) -> Optional[str]:
|
|
592
|
+
"""Get the additional filter SQL from the assertion."""
|
|
593
|
+
if assertion.info is None:
|
|
594
|
+
logger.warning(
|
|
595
|
+
f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
|
|
596
|
+
)
|
|
597
|
+
return None
|
|
598
|
+
if (
|
|
599
|
+
not isinstance(
|
|
600
|
+
assertion.info,
|
|
601
|
+
_AssertionPublic._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
|
|
602
|
+
)
|
|
603
|
+
or assertion.info.filter is None
|
|
604
|
+
):
|
|
605
|
+
logger.warning(
|
|
606
|
+
f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
|
|
607
|
+
)
|
|
608
|
+
return None
|
|
609
|
+
if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
|
|
610
|
+
raise SDKNotYetSupportedError(
|
|
611
|
+
f"DatasetFilterType {assertion.info.filter.type}"
|
|
612
|
+
)
|
|
613
|
+
return assertion.info.filter.sql
|
|
614
|
+
|
|
615
|
+
@staticmethod
|
|
616
|
+
def _get_field_value_detection_mechanism(
|
|
617
|
+
assertion: Assertion,
|
|
618
|
+
parameters: models.AssertionEvaluationParametersClass,
|
|
619
|
+
) -> _DetectionMechanismTypes:
|
|
620
|
+
"""Get the detection mechanism for field value based freshness."""
|
|
621
|
+
# We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
|
|
622
|
+
assert parameters.datasetFreshnessParameters is not None
|
|
623
|
+
field = parameters.datasetFreshnessParameters.field
|
|
624
|
+
|
|
625
|
+
if field is None or field.kind is None:
|
|
626
|
+
logger.warning(
|
|
627
|
+
f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
628
|
+
)
|
|
629
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
630
|
+
|
|
631
|
+
column_name = field.path
|
|
632
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
633
|
+
|
|
634
|
+
if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
|
|
635
|
+
return DetectionMechanism.LAST_MODIFIED_COLUMN(
|
|
636
|
+
column_name=column_name, additional_filter=additional_filter
|
|
637
|
+
)
|
|
638
|
+
elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
|
|
639
|
+
return DetectionMechanism.HIGH_WATERMARK_COLUMN(
|
|
640
|
+
column_name=column_name, additional_filter=additional_filter
|
|
641
|
+
)
|
|
642
|
+
else:
|
|
643
|
+
raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
|
|
644
|
+
|
|
645
|
+
@staticmethod
|
|
646
|
+
def _warn_and_return_default_detection_mechanism(
|
|
647
|
+
monitor: Monitor,
|
|
648
|
+
field_name: str,
|
|
649
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
650
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
651
|
+
"""Helper method to log a warning and return default detection mechanism."""
|
|
652
|
+
logger.warning(
|
|
653
|
+
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
|
|
654
|
+
)
|
|
655
|
+
return default
|
|
656
|
+
|
|
657
|
+
@staticmethod
|
|
658
|
+
def _check_valid_monitor_info(
|
|
659
|
+
monitor: Monitor,
|
|
660
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
661
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
662
|
+
"""Check if monitor has valid info and get assertion parameters.
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
The assertion parameters if monitor info is valid, None otherwise.
|
|
666
|
+
"""
|
|
667
|
+
if not _AssertionPublic._has_valid_monitor_info(monitor):
|
|
668
|
+
return None
|
|
669
|
+
|
|
670
|
+
parameters = _AssertionPublic._get_assertion_parameters(monitor)
|
|
671
|
+
if parameters is None:
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
return parameters
|
|
675
|
+
|
|
676
|
+
@staticmethod
|
|
677
|
+
def _get_validated_detection_context(
|
|
678
|
+
monitor: Monitor,
|
|
679
|
+
assertion: Assertion,
|
|
680
|
+
expected_parameters_type: str,
|
|
681
|
+
expected_info_class: type,
|
|
682
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
683
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
684
|
+
"""
|
|
685
|
+
Validate and extract the detection context (parameters) for detection mechanism logic.
|
|
686
|
+
Returns the parameters if all checks pass, otherwise None.
|
|
687
|
+
"""
|
|
688
|
+
parameters = _AssertionPublic._check_valid_monitor_info(monitor, default)
|
|
689
|
+
if parameters is None:
|
|
690
|
+
return None
|
|
691
|
+
if parameters.type != expected_parameters_type:
|
|
692
|
+
logger.warning(
|
|
693
|
+
f"Expected {expected_parameters_type} parameters type, got {parameters.type}, defaulting detection mechanism to {default}"
|
|
694
|
+
)
|
|
695
|
+
return None
|
|
696
|
+
if assertion.info is None:
|
|
697
|
+
_AssertionPublic._warn_and_return_default_detection_mechanism(
|
|
698
|
+
monitor, "info", default
|
|
699
|
+
)
|
|
700
|
+
return None
|
|
701
|
+
if not isinstance(assertion.info, expected_info_class):
|
|
702
|
+
logger.warning(
|
|
703
|
+
f"Expected {expected_info_class.__name__}, got {type(assertion.info).__name__}, defaulting detection mechanism to {default}"
|
|
704
|
+
)
|
|
705
|
+
return None
|
|
706
|
+
return parameters
|
|
707
|
+
|
|
566
708
|
|
|
567
709
|
class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
|
|
568
710
|
"""
|
|
@@ -619,8 +761,6 @@ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPu
|
|
|
619
761
|
sensitivity=sensitivity,
|
|
620
762
|
exclusion_windows=exclusion_windows,
|
|
621
763
|
training_data_lookback_days=training_data_lookback_days,
|
|
622
|
-
incident_behavior=incident_behavior,
|
|
623
|
-
detection_mechanism=detection_mechanism,
|
|
624
764
|
)
|
|
625
765
|
# Then initialize the parent class
|
|
626
766
|
_AssertionPublic.__init__(
|
|
@@ -629,6 +769,8 @@ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPu
|
|
|
629
769
|
dataset_urn=dataset_urn,
|
|
630
770
|
display_name=display_name,
|
|
631
771
|
mode=mode,
|
|
772
|
+
incident_behavior=incident_behavior,
|
|
773
|
+
detection_mechanism=detection_mechanism,
|
|
632
774
|
created_by=created_by,
|
|
633
775
|
created_at=created_at,
|
|
634
776
|
updated_by=updated_by,
|
|
@@ -661,6 +803,43 @@ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPu
|
|
|
661
803
|
tags=cls._get_tags(assertion),
|
|
662
804
|
)
|
|
663
805
|
|
|
806
|
+
@staticmethod
|
|
807
|
+
def _get_detection_mechanism(
|
|
808
|
+
assertion: Assertion,
|
|
809
|
+
monitor: Monitor,
|
|
810
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
811
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
812
|
+
"""Get the detection mechanism for freshness assertions."""
|
|
813
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
814
|
+
monitor,
|
|
815
|
+
assertion,
|
|
816
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
817
|
+
models.FreshnessAssertionInfoClass,
|
|
818
|
+
default,
|
|
819
|
+
)
|
|
820
|
+
if parameters is None:
|
|
821
|
+
return default
|
|
822
|
+
if parameters.datasetFreshnessParameters is None:
|
|
823
|
+
logger.warning(
|
|
824
|
+
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
825
|
+
)
|
|
826
|
+
return default
|
|
827
|
+
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
828
|
+
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
829
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
830
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
831
|
+
return DetectionMechanism.AUDIT_LOG
|
|
832
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
833
|
+
return _AssertionPublic._get_field_value_detection_mechanism(
|
|
834
|
+
assertion, parameters
|
|
835
|
+
)
|
|
836
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
837
|
+
return DetectionMechanism.DATAHUB_OPERATION
|
|
838
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
839
|
+
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
840
|
+
else:
|
|
841
|
+
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
842
|
+
|
|
664
843
|
|
|
665
844
|
class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
|
|
666
845
|
"""
|
|
@@ -717,8 +896,6 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
|
|
|
717
896
|
sensitivity=sensitivity,
|
|
718
897
|
exclusion_windows=exclusion_windows,
|
|
719
898
|
training_data_lookback_days=training_data_lookback_days,
|
|
720
|
-
incident_behavior=incident_behavior,
|
|
721
|
-
detection_mechanism=detection_mechanism,
|
|
722
899
|
)
|
|
723
900
|
# Then initialize the parent class
|
|
724
901
|
_AssertionPublic.__init__(
|
|
@@ -727,6 +904,8 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
|
|
|
727
904
|
dataset_urn=dataset_urn,
|
|
728
905
|
display_name=display_name,
|
|
729
906
|
mode=mode,
|
|
907
|
+
incident_behavior=incident_behavior,
|
|
908
|
+
detection_mechanism=detection_mechanism,
|
|
730
909
|
created_by=created_by,
|
|
731
910
|
created_at=created_at,
|
|
732
911
|
updated_by=updated_by,
|
|
@@ -759,9 +938,213 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
|
|
|
759
938
|
tags=cls._get_tags(assertion),
|
|
760
939
|
)
|
|
761
940
|
|
|
941
|
+
@staticmethod
|
|
942
|
+
def _get_detection_mechanism(
|
|
943
|
+
assertion: Assertion,
|
|
944
|
+
monitor: Monitor,
|
|
945
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
946
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
947
|
+
"""Get the detection mechanism for volume assertions."""
|
|
948
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
949
|
+
monitor,
|
|
950
|
+
assertion,
|
|
951
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
952
|
+
models.VolumeAssertionInfoClass,
|
|
953
|
+
default,
|
|
954
|
+
)
|
|
955
|
+
if parameters is None:
|
|
956
|
+
return default
|
|
957
|
+
if parameters.datasetVolumeParameters is None:
|
|
958
|
+
logger.warning(
|
|
959
|
+
f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
960
|
+
)
|
|
961
|
+
if default is None:
|
|
962
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
963
|
+
else:
|
|
964
|
+
return default
|
|
965
|
+
source_type = parameters.datasetVolumeParameters.sourceType
|
|
966
|
+
if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
|
|
967
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
968
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
|
|
969
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
970
|
+
return DetectionMechanism.QUERY(additional_filter=additional_filter)
|
|
971
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
|
|
972
|
+
return DetectionMechanism.DATASET_PROFILE
|
|
973
|
+
else:
|
|
974
|
+
raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
|
|
975
|
+
|
|
762
976
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
977
|
+
class FreshnessAssertion(_HasSchedule, _AssertionPublic):
|
|
978
|
+
"""
|
|
979
|
+
A class that represents a freshness assertion.
|
|
980
|
+
"""
|
|
981
|
+
|
|
982
|
+
def __init__(
|
|
983
|
+
self,
|
|
984
|
+
*,
|
|
985
|
+
urn: AssertionUrn,
|
|
986
|
+
dataset_urn: DatasetUrn,
|
|
987
|
+
display_name: str,
|
|
988
|
+
mode: AssertionMode,
|
|
989
|
+
schedule: models.CronScheduleClass,
|
|
990
|
+
freshness_schedule_check_type: Union[
|
|
991
|
+
str, models.FreshnessAssertionScheduleTypeClass
|
|
992
|
+
],
|
|
993
|
+
lookback_window: Optional[TimeWindowSizeInputTypes],
|
|
994
|
+
tags: list[TagUrn],
|
|
995
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
996
|
+
detection_mechanism: Optional[
|
|
997
|
+
_DetectionMechanismTypes
|
|
998
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
999
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
1000
|
+
created_at: Union[datetime, None] = None,
|
|
1001
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
1002
|
+
updated_at: Optional[datetime] = None,
|
|
1003
|
+
):
|
|
1004
|
+
"""
|
|
1005
|
+
Initialize a freshness assertion.
|
|
1006
|
+
|
|
1007
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
1008
|
+
To update an assertion, use the `upsert_*` method.
|
|
1009
|
+
Args:
|
|
1010
|
+
urn: The urn of the assertion.
|
|
1011
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
1012
|
+
display_name: The display name of the assertion.
|
|
1013
|
+
mode: The mode of the assertion (active, inactive).
|
|
1014
|
+
schedule: The schedule of the assertion.
|
|
1015
|
+
freshness_schedule_check_type: The type of freshness schedule check to be used for the assertion.
|
|
1016
|
+
lookback_window: The lookback window to be used for the assertion.
|
|
1017
|
+
tags: The tags applied to the assertion.
|
|
1018
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
1019
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
1020
|
+
created_by: The urn of the user that created the assertion.
|
|
1021
|
+
created_at: The timestamp of when the assertion was created.
|
|
1022
|
+
updated_by: The urn of the user that updated the assertion.
|
|
1023
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
1024
|
+
"""
|
|
1025
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
1026
|
+
_AssertionPublic.__init__(
|
|
1027
|
+
self,
|
|
1028
|
+
urn=urn,
|
|
1029
|
+
dataset_urn=dataset_urn,
|
|
1030
|
+
display_name=display_name,
|
|
1031
|
+
mode=mode,
|
|
1032
|
+
incident_behavior=incident_behavior,
|
|
1033
|
+
detection_mechanism=detection_mechanism,
|
|
1034
|
+
created_by=created_by,
|
|
1035
|
+
created_at=created_at,
|
|
1036
|
+
updated_by=updated_by,
|
|
1037
|
+
updated_at=updated_at,
|
|
1038
|
+
tags=tags,
|
|
1039
|
+
)
|
|
1040
|
+
self._freshness_schedule_check_type = freshness_schedule_check_type
|
|
1041
|
+
self._lookback_window = lookback_window
|
|
1042
|
+
|
|
1043
|
+
@property
|
|
1044
|
+
def freshness_schedule_check_type(
|
|
1045
|
+
self,
|
|
1046
|
+
) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
|
|
1047
|
+
return self._freshness_schedule_check_type
|
|
1048
|
+
|
|
1049
|
+
@property
|
|
1050
|
+
def lookback_window(self) -> Optional[TimeWindowSizeInputTypes]:
|
|
1051
|
+
return self._lookback_window
|
|
1052
|
+
|
|
1053
|
+
@staticmethod
|
|
1054
|
+
def _get_freshness_schedule_check_type(
|
|
1055
|
+
assertion: Assertion,
|
|
1056
|
+
) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
|
|
1057
|
+
if assertion.info is None:
|
|
1058
|
+
raise SDKNotYetSupportedError(
|
|
1059
|
+
f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
|
|
1060
|
+
)
|
|
1061
|
+
if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
|
|
1062
|
+
if assertion.info.schedule is None:
|
|
1063
|
+
raise SDKNotYetSupportedError(
|
|
1064
|
+
f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
|
|
1065
|
+
)
|
|
1066
|
+
return assertion.info.schedule.type
|
|
1067
|
+
else:
|
|
1068
|
+
raise SDKNotYetSupportedError(
|
|
1069
|
+
f"Assertion {assertion.urn} is not a freshness assertion"
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
@staticmethod
|
|
1073
|
+
def _get_lookback_window(
|
|
1074
|
+
assertion: Assertion,
|
|
1075
|
+
) -> Optional[models.FixedIntervalScheduleClass]:
|
|
1076
|
+
if assertion.info is None:
|
|
1077
|
+
raise SDKNotYetSupportedError(
|
|
1078
|
+
f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
|
|
1079
|
+
)
|
|
1080
|
+
if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
|
|
1081
|
+
if assertion.info.schedule is None:
|
|
1082
|
+
raise SDKNotYetSupportedError(
|
|
1083
|
+
f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
|
|
1084
|
+
)
|
|
1085
|
+
return assertion.info.schedule.fixedInterval
|
|
1086
|
+
else:
|
|
1087
|
+
raise SDKNotYetSupportedError(
|
|
1088
|
+
f"Assertion {assertion.urn} is not a freshness assertion"
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1091
|
+
@classmethod
|
|
1092
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
1093
|
+
"""
|
|
1094
|
+
Create a freshness assertion from the assertion and monitor entities.
|
|
1095
|
+
"""
|
|
1096
|
+
return cls(
|
|
1097
|
+
urn=assertion.urn,
|
|
1098
|
+
dataset_urn=assertion.dataset,
|
|
1099
|
+
display_name=assertion.description or "",
|
|
1100
|
+
mode=cls._get_mode(monitor),
|
|
1101
|
+
schedule=cls._get_schedule(monitor),
|
|
1102
|
+
freshness_schedule_check_type=cls._get_freshness_schedule_check_type(
|
|
1103
|
+
assertion
|
|
1104
|
+
),
|
|
1105
|
+
lookback_window=cls._get_lookback_window(assertion),
|
|
1106
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1107
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
1108
|
+
created_by=cls._get_created_by(assertion),
|
|
1109
|
+
created_at=cls._get_created_at(assertion),
|
|
1110
|
+
updated_by=cls._get_updated_by(assertion),
|
|
1111
|
+
updated_at=cls._get_updated_at(assertion),
|
|
1112
|
+
tags=cls._get_tags(assertion),
|
|
1113
|
+
)
|
|
1114
|
+
|
|
1115
|
+
@staticmethod
|
|
1116
|
+
def _get_detection_mechanism(
|
|
1117
|
+
assertion: Assertion,
|
|
1118
|
+
monitor: Monitor,
|
|
1119
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
1120
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
1121
|
+
"""Get the detection mechanism for freshness assertions."""
|
|
1122
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
1123
|
+
monitor,
|
|
1124
|
+
assertion,
|
|
1125
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
1126
|
+
models.FreshnessAssertionInfoClass,
|
|
1127
|
+
default,
|
|
1128
|
+
)
|
|
1129
|
+
if parameters is None:
|
|
1130
|
+
return default
|
|
1131
|
+
if parameters.datasetFreshnessParameters is None:
|
|
1132
|
+
logger.warning(
|
|
1133
|
+
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
1134
|
+
)
|
|
1135
|
+
return default
|
|
1136
|
+
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
1137
|
+
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
1138
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
1139
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
1140
|
+
return DetectionMechanism.AUDIT_LOG
|
|
1141
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
1142
|
+
return _AssertionPublic._get_field_value_detection_mechanism(
|
|
1143
|
+
assertion, parameters
|
|
1144
|
+
)
|
|
1145
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
1146
|
+
return DetectionMechanism.DATAHUB_OPERATION
|
|
1147
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
1148
|
+
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
1149
|
+
else:
|
|
1150
|
+
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|