acryl-datahub-cloud 0.3.12rc4__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/sdk/__init__.py +5 -1
- acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +401 -169
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
- acryl_datahub_cloud/sdk/assertion/types.py +18 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +10 -3
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +8 -12
- acryl_datahub_cloud/sdk/assertions_client.py +726 -1
- acryl_datahub_cloud/sdk/entities/assertion.py +4 -0
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +45 -45
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +15 -12
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
from acryl_datahub_cloud.sdk.assertion import (
|
|
1
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
2
2
|
FreshnessAssertion,
|
|
3
3
|
SmartFreshnessAssertion,
|
|
4
4
|
SmartVolumeAssertion,
|
|
5
5
|
)
|
|
6
|
+
from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
7
|
+
SmartColumnMetricAssertion,
|
|
8
|
+
)
|
|
6
9
|
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
7
10
|
AssertionIncidentBehavior,
|
|
8
11
|
DetectionMechanism,
|
|
@@ -17,6 +20,7 @@ from acryl_datahub_cloud.sdk.subscription_client import SubscriptionClient
|
|
|
17
20
|
__all__ = [
|
|
18
21
|
"SmartFreshnessAssertion",
|
|
19
22
|
"SmartVolumeAssertion",
|
|
23
|
+
"SmartColumnMetricAssertion",
|
|
20
24
|
"TimeWindowSize",
|
|
21
25
|
"FreshnessAssertion",
|
|
22
26
|
"DetectionMechanism",
|
|
File without changes
|
|
@@ -27,12 +27,20 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
|
27
27
|
TimeWindowSizeInputTypes,
|
|
28
28
|
_DetectionMechanismTypes,
|
|
29
29
|
)
|
|
30
|
+
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
31
|
+
MetricInputType,
|
|
32
|
+
OperatorInputType,
|
|
33
|
+
RangeInputType,
|
|
34
|
+
RangeTypeInputType,
|
|
35
|
+
ValueInputType,
|
|
36
|
+
ValueTypeInputType,
|
|
37
|
+
)
|
|
30
38
|
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
31
39
|
from acryl_datahub_cloud.sdk.entities.monitor import (
|
|
32
40
|
Monitor,
|
|
33
41
|
_get_nested_field_for_entity_with_default,
|
|
34
42
|
)
|
|
35
|
-
from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError
|
|
43
|
+
from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError, SDKUsageError
|
|
36
44
|
from datahub.emitter.mce_builder import parse_ts_millis
|
|
37
45
|
from datahub.metadata import schema_classes as models
|
|
38
46
|
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
@@ -169,6 +177,159 @@ class _HasSmartFunctionality:
|
|
|
169
177
|
return retrieved
|
|
170
178
|
|
|
171
179
|
|
|
180
|
+
class _HasColumnMetricFunctionality:
|
|
181
|
+
"""
|
|
182
|
+
Mixin class that provides column metric functionality for assertions.
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
column_name: str,
|
|
188
|
+
metric_type: MetricInputType,
|
|
189
|
+
operator: OperatorInputType,
|
|
190
|
+
value: Optional[ValueInputType] = None,
|
|
191
|
+
value_type: Optional[ValueTypeInputType] = None,
|
|
192
|
+
range: Optional[RangeInputType] = None,
|
|
193
|
+
range_type: Optional[RangeTypeInputType] = None,
|
|
194
|
+
):
|
|
195
|
+
self._column_name = column_name
|
|
196
|
+
self._metric_type = metric_type
|
|
197
|
+
self._operator = operator
|
|
198
|
+
self._value = value
|
|
199
|
+
self._value_type = value_type
|
|
200
|
+
self._range = range
|
|
201
|
+
self._range_type = range_type
|
|
202
|
+
|
|
203
|
+
@property
|
|
204
|
+
def column_name(self) -> str:
|
|
205
|
+
return self._column_name
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def metric_type(self) -> MetricInputType:
|
|
209
|
+
return self._metric_type
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def operator(self) -> OperatorInputType:
|
|
213
|
+
return self._operator
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def value(self) -> Optional[ValueInputType]:
|
|
217
|
+
return self._value
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def value_type(self) -> Optional[ValueTypeInputType]:
|
|
221
|
+
return self._value_type
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def range(self) -> Optional[RangeInputType]:
|
|
225
|
+
return self._range
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def range_type(self) -> Optional[RangeTypeInputType]:
|
|
229
|
+
return self._range_type
|
|
230
|
+
|
|
231
|
+
@staticmethod
|
|
232
|
+
def _get_column_name(assertion: Assertion) -> str:
|
|
233
|
+
column_name = _get_nested_field_for_entity_with_default(
|
|
234
|
+
assertion,
|
|
235
|
+
field_path="info.fieldMetricAssertion.field.path",
|
|
236
|
+
default=None,
|
|
237
|
+
)
|
|
238
|
+
if column_name is None:
|
|
239
|
+
raise SDKUsageError(
|
|
240
|
+
f"Column name is required for column metric assertions. Assertion {assertion.urn} does not have a column name"
|
|
241
|
+
)
|
|
242
|
+
return column_name
|
|
243
|
+
|
|
244
|
+
@staticmethod
|
|
245
|
+
def _get_metric_type(assertion: Assertion) -> MetricInputType:
|
|
246
|
+
metric_type = _get_nested_field_for_entity_with_default(
|
|
247
|
+
assertion,
|
|
248
|
+
field_path="info.fieldMetricAssertion.metric",
|
|
249
|
+
default=None,
|
|
250
|
+
)
|
|
251
|
+
if metric_type is None:
|
|
252
|
+
raise SDKUsageError(
|
|
253
|
+
f"Metric type is required for column metric assertions. Assertion {assertion.urn} does not have a metric type"
|
|
254
|
+
)
|
|
255
|
+
return metric_type
|
|
256
|
+
|
|
257
|
+
@staticmethod
|
|
258
|
+
def _get_operator(assertion: Assertion) -> OperatorInputType:
|
|
259
|
+
operator = _get_nested_field_for_entity_with_default(
|
|
260
|
+
assertion,
|
|
261
|
+
field_path="info.fieldMetricAssertion.operator",
|
|
262
|
+
default=None,
|
|
263
|
+
)
|
|
264
|
+
if operator is None:
|
|
265
|
+
raise SDKUsageError(
|
|
266
|
+
f"Operator is required for column metric assertions. Assertion {assertion.urn} does not have an operator"
|
|
267
|
+
)
|
|
268
|
+
return operator
|
|
269
|
+
|
|
270
|
+
@staticmethod
|
|
271
|
+
def _get_value(assertion: Assertion) -> Optional[ValueInputType]:
|
|
272
|
+
value = _get_nested_field_for_entity_with_default(
|
|
273
|
+
assertion,
|
|
274
|
+
field_path="info.fieldMetricAssertion.parameters.value.value",
|
|
275
|
+
default=None,
|
|
276
|
+
)
|
|
277
|
+
return value
|
|
278
|
+
|
|
279
|
+
@staticmethod
|
|
280
|
+
def _get_value_type(assertion: Assertion) -> Optional[ValueTypeInputType]:
|
|
281
|
+
value_type = _get_nested_field_for_entity_with_default(
|
|
282
|
+
assertion,
|
|
283
|
+
field_path="info.fieldMetricAssertion.parameters.value.type",
|
|
284
|
+
default=None,
|
|
285
|
+
)
|
|
286
|
+
return value_type
|
|
287
|
+
|
|
288
|
+
@staticmethod
|
|
289
|
+
def _get_range(assertion: Assertion) -> Optional[RangeInputType]:
|
|
290
|
+
min_value = _get_nested_field_for_entity_with_default(
|
|
291
|
+
assertion,
|
|
292
|
+
field_path="info.fieldMetricAssertion.parameters.minValue",
|
|
293
|
+
default=None,
|
|
294
|
+
)
|
|
295
|
+
max_value = _get_nested_field_for_entity_with_default(
|
|
296
|
+
assertion,
|
|
297
|
+
field_path="info.fieldMetricAssertion.parameters.maxValue",
|
|
298
|
+
default=None,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# If both are None, return None
|
|
302
|
+
if min_value is None and max_value is None:
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
# Extract the value from the parameter objects if they exist
|
|
306
|
+
if min_value is not None and hasattr(min_value, "value"):
|
|
307
|
+
min_value = min_value.value
|
|
308
|
+
if max_value is not None and hasattr(max_value, "value"):
|
|
309
|
+
max_value = max_value.value
|
|
310
|
+
|
|
311
|
+
return (min_value, max_value)
|
|
312
|
+
|
|
313
|
+
@staticmethod
|
|
314
|
+
def _get_range_type(assertion: Assertion) -> Optional[RangeTypeInputType]:
|
|
315
|
+
min_value_range_type = _get_nested_field_for_entity_with_default(
|
|
316
|
+
assertion,
|
|
317
|
+
field_path="info.fieldMetricAssertion.parameters.minValue.type",
|
|
318
|
+
default=None,
|
|
319
|
+
)
|
|
320
|
+
max_value_range_type = _get_nested_field_for_entity_with_default(
|
|
321
|
+
assertion,
|
|
322
|
+
field_path="info.fieldMetricAssertion.parameters.maxValue.type",
|
|
323
|
+
default=None,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# If both are None, return None instead of a tuple of Nones
|
|
327
|
+
if min_value_range_type is None and max_value_range_type is None:
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
return (min_value_range_type, max_value_range_type)
|
|
331
|
+
|
|
332
|
+
|
|
172
333
|
class _AssertionPublic(ABC):
|
|
173
334
|
"""
|
|
174
335
|
Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
|
|
@@ -177,6 +338,7 @@ class _AssertionPublic(ABC):
|
|
|
177
338
|
_SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
|
|
178
339
|
models.FreshnessAssertionInfoClass,
|
|
179
340
|
models.VolumeAssertionInfoClass,
|
|
341
|
+
models.FieldAssertionInfoClass,
|
|
180
342
|
)
|
|
181
343
|
|
|
182
344
|
def __init__(
|
|
@@ -278,165 +440,26 @@ class _AssertionPublic(ABC):
|
|
|
278
440
|
return incident_behaviors
|
|
279
441
|
|
|
280
442
|
@staticmethod
|
|
443
|
+
@abstractmethod
|
|
281
444
|
def _get_detection_mechanism(
|
|
282
445
|
assertion: Assertion,
|
|
283
446
|
monitor: Monitor,
|
|
284
447
|
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
285
448
|
) -> Optional[_DetectionMechanismTypes]:
|
|
286
|
-
"""Get the detection mechanism from the monitor and assertion.
|
|
287
|
-
if not _AssertionPublic._has_valid_monitor_info(monitor):
|
|
288
|
-
return default
|
|
289
|
-
|
|
290
|
-
# 1. Check if the assertion has a parameters field
|
|
291
|
-
def _warn_and_return_default_detection_mechanism(
|
|
292
|
-
field_name: str,
|
|
293
|
-
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
294
|
-
) -> Optional[_DetectionMechanismTypes]:
|
|
295
|
-
logger.warning(
|
|
296
|
-
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
|
|
297
|
-
)
|
|
298
|
-
return default
|
|
299
|
-
|
|
300
|
-
parameters = _AssertionPublic._get_assertion_parameters(monitor, default)
|
|
301
|
-
if parameters is None:
|
|
302
|
-
return _warn_and_return_default_detection_mechanism("parameters", default)
|
|
303
|
-
|
|
304
|
-
# 2. Convert the raw detection mechanism to the SDK detection mechanism
|
|
305
|
-
if parameters.type in [
|
|
306
|
-
models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
307
|
-
models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
308
|
-
]:
|
|
309
|
-
if assertion.info is None:
|
|
310
|
-
return _warn_and_return_default_detection_mechanism("info", default)
|
|
311
|
-
if isinstance(assertion.info, models.VolumeAssertionInfoClass):
|
|
312
|
-
return _AssertionPublic._get_volume_detection_mechanism(
|
|
313
|
-
assertion, parameters, default
|
|
314
|
-
)
|
|
315
|
-
elif isinstance(assertion.info, models.FreshnessAssertionInfoClass):
|
|
316
|
-
return _AssertionPublic._get_freshness_detection_mechanism(
|
|
317
|
-
assertion, parameters, default
|
|
318
|
-
)
|
|
319
|
-
# TODO: Consider moving the detection mechanism logic to the assertion classes themselves e.g. _get_assertion_specific_detection_mechanism as an abstract method
|
|
320
|
-
# TODO: Add support here for other detection mechanisms when other assertion types are supported
|
|
321
|
-
else:
|
|
322
|
-
raise SDKNotYetSupportedError(
|
|
323
|
-
f"AssertionType {type(assertion.info).__name__}"
|
|
324
|
-
)
|
|
325
|
-
else:
|
|
326
|
-
raise SDKNotYetSupportedError(
|
|
327
|
-
f"AssertionEvaluationParametersType {parameters.type} not supported"
|
|
328
|
-
)
|
|
329
|
-
|
|
330
|
-
@staticmethod
|
|
331
|
-
def _get_freshness_detection_mechanism(
|
|
332
|
-
assertion: Assertion,
|
|
333
|
-
parameters: models.AssertionEvaluationParametersClass,
|
|
334
|
-
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
335
|
-
) -> Optional[_DetectionMechanismTypes]:
|
|
336
|
-
"""Get the detection mechanism for freshness assertions."""
|
|
337
|
-
if parameters.datasetFreshnessParameters is None:
|
|
338
|
-
logger.warning(
|
|
339
|
-
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
340
|
-
)
|
|
341
|
-
return default
|
|
342
|
-
|
|
343
|
-
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
344
|
-
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
345
|
-
return DetectionMechanism.INFORMATION_SCHEMA
|
|
346
|
-
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
347
|
-
return DetectionMechanism.AUDIT_LOG
|
|
348
|
-
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
349
|
-
return _AssertionPublic._get_field_value_detection_mechanism(
|
|
350
|
-
assertion, parameters
|
|
351
|
-
)
|
|
352
|
-
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
353
|
-
return DetectionMechanism.DATAHUB_OPERATION
|
|
354
|
-
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
355
|
-
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
356
|
-
else:
|
|
357
|
-
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
358
|
-
|
|
359
|
-
@staticmethod
|
|
360
|
-
def _get_volume_detection_mechanism(
|
|
361
|
-
assertion: Assertion,
|
|
362
|
-
parameters: models.AssertionEvaluationParametersClass,
|
|
363
|
-
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
364
|
-
) -> _DetectionMechanismTypes:
|
|
365
|
-
"""Get the detection mechanism for volume assertions."""
|
|
366
|
-
if parameters.datasetVolumeParameters is None:
|
|
367
|
-
logger.warning(
|
|
368
|
-
f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
369
|
-
)
|
|
370
|
-
if default is None:
|
|
371
|
-
return DEFAULT_DETECTION_MECHANISM
|
|
372
|
-
else:
|
|
373
|
-
return default
|
|
449
|
+
"""Get the detection mechanism from the monitor and assertion.
|
|
374
450
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
return DetectionMechanism.INFORMATION_SCHEMA
|
|
378
|
-
elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
|
|
379
|
-
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
380
|
-
return DetectionMechanism.QUERY(additional_filter=additional_filter)
|
|
381
|
-
elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
|
|
382
|
-
return DetectionMechanism.DATASET_PROFILE
|
|
383
|
-
else:
|
|
384
|
-
raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
|
|
385
|
-
|
|
386
|
-
@staticmethod
|
|
387
|
-
def _get_field_value_detection_mechanism(
|
|
388
|
-
assertion: Assertion,
|
|
389
|
-
parameters: models.AssertionEvaluationParametersClass,
|
|
390
|
-
) -> _DetectionMechanismTypes:
|
|
391
|
-
"""Get the detection mechanism for field value based freshness."""
|
|
392
|
-
# We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
|
|
393
|
-
assert parameters.datasetFreshnessParameters is not None
|
|
394
|
-
field = parameters.datasetFreshnessParameters.field
|
|
451
|
+
This method should be implemented by each assertion class to handle
|
|
452
|
+
its specific detection mechanism logic.
|
|
395
453
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
return DEFAULT_DETECTION_MECHANISM
|
|
401
|
-
|
|
402
|
-
column_name = field.path
|
|
403
|
-
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
404
|
-
|
|
405
|
-
if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
|
|
406
|
-
return DetectionMechanism.LAST_MODIFIED_COLUMN(
|
|
407
|
-
column_name=column_name, additional_filter=additional_filter
|
|
408
|
-
)
|
|
409
|
-
elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
|
|
410
|
-
return DetectionMechanism.HIGH_WATERMARK_COLUMN(
|
|
411
|
-
column_name=column_name, additional_filter=additional_filter
|
|
412
|
-
)
|
|
413
|
-
else:
|
|
414
|
-
raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
|
|
454
|
+
Args:
|
|
455
|
+
assertion: The assertion entity
|
|
456
|
+
monitor: The monitor entity
|
|
457
|
+
default: Default detection mechanism to return if none is found
|
|
415
458
|
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
"""
|
|
419
|
-
|
|
420
|
-
logger.warning(
|
|
421
|
-
f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
|
|
422
|
-
)
|
|
423
|
-
return None
|
|
424
|
-
if (
|
|
425
|
-
not isinstance(
|
|
426
|
-
assertion.info,
|
|
427
|
-
_AssertionPublic._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
|
|
428
|
-
)
|
|
429
|
-
or assertion.info.filter is None
|
|
430
|
-
):
|
|
431
|
-
logger.warning(
|
|
432
|
-
f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
|
|
433
|
-
)
|
|
434
|
-
return None
|
|
435
|
-
if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
|
|
436
|
-
raise SDKNotYetSupportedError(
|
|
437
|
-
f"DatasetFilterType {assertion.info.filter.type}"
|
|
438
|
-
)
|
|
439
|
-
return assertion.info.filter.sql
|
|
459
|
+
Returns:
|
|
460
|
+
The detection mechanism or default if none is found
|
|
461
|
+
"""
|
|
462
|
+
pass
|
|
440
463
|
|
|
441
464
|
@staticmethod
|
|
442
465
|
def _has_valid_monitor_info(monitor: Monitor) -> bool:
|
|
@@ -564,6 +587,124 @@ class _AssertionPublic(ABC):
|
|
|
564
587
|
"""
|
|
565
588
|
pass
|
|
566
589
|
|
|
590
|
+
@staticmethod
|
|
591
|
+
def _get_additional_filter(assertion: Assertion) -> Optional[str]:
|
|
592
|
+
"""Get the additional filter SQL from the assertion."""
|
|
593
|
+
if assertion.info is None:
|
|
594
|
+
logger.warning(
|
|
595
|
+
f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
|
|
596
|
+
)
|
|
597
|
+
return None
|
|
598
|
+
if (
|
|
599
|
+
not isinstance(
|
|
600
|
+
assertion.info,
|
|
601
|
+
_AssertionPublic._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
|
|
602
|
+
)
|
|
603
|
+
or assertion.info.filter is None
|
|
604
|
+
):
|
|
605
|
+
logger.warning(
|
|
606
|
+
f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
|
|
607
|
+
)
|
|
608
|
+
return None
|
|
609
|
+
if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
|
|
610
|
+
raise SDKNotYetSupportedError(
|
|
611
|
+
f"DatasetFilterType {assertion.info.filter.type}"
|
|
612
|
+
)
|
|
613
|
+
return assertion.info.filter.sql
|
|
614
|
+
|
|
615
|
+
@staticmethod
|
|
616
|
+
def _get_field_value_detection_mechanism(
|
|
617
|
+
assertion: Assertion,
|
|
618
|
+
parameters: models.AssertionEvaluationParametersClass,
|
|
619
|
+
) -> _DetectionMechanismTypes:
|
|
620
|
+
"""Get the detection mechanism for field value based freshness."""
|
|
621
|
+
# We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
|
|
622
|
+
assert parameters.datasetFreshnessParameters is not None
|
|
623
|
+
field = parameters.datasetFreshnessParameters.field
|
|
624
|
+
|
|
625
|
+
if field is None or field.kind is None:
|
|
626
|
+
logger.warning(
|
|
627
|
+
f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
628
|
+
)
|
|
629
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
630
|
+
|
|
631
|
+
column_name = field.path
|
|
632
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
633
|
+
|
|
634
|
+
if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
|
|
635
|
+
return DetectionMechanism.LAST_MODIFIED_COLUMN(
|
|
636
|
+
column_name=column_name, additional_filter=additional_filter
|
|
637
|
+
)
|
|
638
|
+
elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
|
|
639
|
+
return DetectionMechanism.HIGH_WATERMARK_COLUMN(
|
|
640
|
+
column_name=column_name, additional_filter=additional_filter
|
|
641
|
+
)
|
|
642
|
+
else:
|
|
643
|
+
raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
|
|
644
|
+
|
|
645
|
+
@staticmethod
|
|
646
|
+
def _warn_and_return_default_detection_mechanism(
|
|
647
|
+
monitor: Monitor,
|
|
648
|
+
field_name: str,
|
|
649
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
650
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
651
|
+
"""Helper method to log a warning and return default detection mechanism."""
|
|
652
|
+
logger.warning(
|
|
653
|
+
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
|
|
654
|
+
)
|
|
655
|
+
return default
|
|
656
|
+
|
|
657
|
+
@staticmethod
|
|
658
|
+
def _check_valid_monitor_info(
|
|
659
|
+
monitor: Monitor,
|
|
660
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
661
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
662
|
+
"""Check if monitor has valid info and get assertion parameters.
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
The assertion parameters if monitor info is valid, None otherwise.
|
|
666
|
+
"""
|
|
667
|
+
if not _AssertionPublic._has_valid_monitor_info(monitor):
|
|
668
|
+
return None
|
|
669
|
+
|
|
670
|
+
parameters = _AssertionPublic._get_assertion_parameters(monitor)
|
|
671
|
+
if parameters is None:
|
|
672
|
+
return None
|
|
673
|
+
|
|
674
|
+
return parameters
|
|
675
|
+
|
|
676
|
+
@staticmethod
|
|
677
|
+
def _get_validated_detection_context(
|
|
678
|
+
monitor: Monitor,
|
|
679
|
+
assertion: Assertion,
|
|
680
|
+
expected_parameters_type: str,
|
|
681
|
+
expected_info_class: type,
|
|
682
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
683
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
684
|
+
"""
|
|
685
|
+
Validate and extract the detection context (parameters) for detection mechanism logic.
|
|
686
|
+
Returns the parameters if all checks pass, otherwise None.
|
|
687
|
+
"""
|
|
688
|
+
parameters = _AssertionPublic._check_valid_monitor_info(monitor, default)
|
|
689
|
+
if parameters is None:
|
|
690
|
+
return None
|
|
691
|
+
if parameters.type != expected_parameters_type:
|
|
692
|
+
logger.warning(
|
|
693
|
+
f"Expected {expected_parameters_type} parameters type, got {parameters.type}, defaulting detection mechanism to {default}"
|
|
694
|
+
)
|
|
695
|
+
return None
|
|
696
|
+
if assertion.info is None:
|
|
697
|
+
_AssertionPublic._warn_and_return_default_detection_mechanism(
|
|
698
|
+
monitor, "info", default
|
|
699
|
+
)
|
|
700
|
+
return None
|
|
701
|
+
if not isinstance(assertion.info, expected_info_class):
|
|
702
|
+
logger.warning(
|
|
703
|
+
f"Expected {expected_info_class.__name__}, got {type(assertion.info).__name__}, defaulting detection mechanism to {default}"
|
|
704
|
+
)
|
|
705
|
+
return None
|
|
706
|
+
return parameters
|
|
707
|
+
|
|
567
708
|
|
|
568
709
|
class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
|
|
569
710
|
"""
|
|
@@ -662,6 +803,43 @@ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPu
|
|
|
662
803
|
tags=cls._get_tags(assertion),
|
|
663
804
|
)
|
|
664
805
|
|
|
806
|
+
@staticmethod
|
|
807
|
+
def _get_detection_mechanism(
|
|
808
|
+
assertion: Assertion,
|
|
809
|
+
monitor: Monitor,
|
|
810
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
811
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
812
|
+
"""Get the detection mechanism for freshness assertions."""
|
|
813
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
814
|
+
monitor,
|
|
815
|
+
assertion,
|
|
816
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
817
|
+
models.FreshnessAssertionInfoClass,
|
|
818
|
+
default,
|
|
819
|
+
)
|
|
820
|
+
if parameters is None:
|
|
821
|
+
return default
|
|
822
|
+
if parameters.datasetFreshnessParameters is None:
|
|
823
|
+
logger.warning(
|
|
824
|
+
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
825
|
+
)
|
|
826
|
+
return default
|
|
827
|
+
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
828
|
+
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
829
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
830
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
831
|
+
return DetectionMechanism.AUDIT_LOG
|
|
832
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
833
|
+
return _AssertionPublic._get_field_value_detection_mechanism(
|
|
834
|
+
assertion, parameters
|
|
835
|
+
)
|
|
836
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
837
|
+
return DetectionMechanism.DATAHUB_OPERATION
|
|
838
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
839
|
+
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
840
|
+
else:
|
|
841
|
+
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
842
|
+
|
|
665
843
|
|
|
666
844
|
class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
|
|
667
845
|
"""
|
|
@@ -760,6 +938,41 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
|
|
|
760
938
|
tags=cls._get_tags(assertion),
|
|
761
939
|
)
|
|
762
940
|
|
|
941
|
+
@staticmethod
|
|
942
|
+
def _get_detection_mechanism(
|
|
943
|
+
assertion: Assertion,
|
|
944
|
+
monitor: Monitor,
|
|
945
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
946
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
947
|
+
"""Get the detection mechanism for volume assertions."""
|
|
948
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
949
|
+
monitor,
|
|
950
|
+
assertion,
|
|
951
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
952
|
+
models.VolumeAssertionInfoClass,
|
|
953
|
+
default,
|
|
954
|
+
)
|
|
955
|
+
if parameters is None:
|
|
956
|
+
return default
|
|
957
|
+
if parameters.datasetVolumeParameters is None:
|
|
958
|
+
logger.warning(
|
|
959
|
+
f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
960
|
+
)
|
|
961
|
+
if default is None:
|
|
962
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
963
|
+
else:
|
|
964
|
+
return default
|
|
965
|
+
source_type = parameters.datasetVolumeParameters.sourceType
|
|
966
|
+
if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
|
|
967
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
968
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
|
|
969
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
970
|
+
return DetectionMechanism.QUERY(additional_filter=additional_filter)
|
|
971
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
|
|
972
|
+
return DetectionMechanism.DATASET_PROFILE
|
|
973
|
+
else:
|
|
974
|
+
raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
|
|
975
|
+
|
|
763
976
|
|
|
764
977
|
class FreshnessAssertion(_HasSchedule, _AssertionPublic):
|
|
765
978
|
"""
|
|
@@ -899,20 +1112,39 @@ class FreshnessAssertion(_HasSchedule, _AssertionPublic):
|
|
|
899
1112
|
tags=cls._get_tags(assertion),
|
|
900
1113
|
)
|
|
901
1114
|
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
1115
|
+
@staticmethod
|
|
1116
|
+
def _get_detection_mechanism(
|
|
1117
|
+
assertion: Assertion,
|
|
1118
|
+
monitor: Monitor,
|
|
1119
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
1120
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
1121
|
+
"""Get the detection mechanism for freshness assertions."""
|
|
1122
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
1123
|
+
monitor,
|
|
1124
|
+
assertion,
|
|
1125
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
1126
|
+
models.FreshnessAssertionInfoClass,
|
|
1127
|
+
default,
|
|
1128
|
+
)
|
|
1129
|
+
if parameters is None:
|
|
1130
|
+
return default
|
|
1131
|
+
if parameters.datasetFreshnessParameters is None:
|
|
1132
|
+
logger.warning(
|
|
1133
|
+
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
1134
|
+
)
|
|
1135
|
+
return default
|
|
1136
|
+
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
1137
|
+
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
1138
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
1139
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
1140
|
+
return DetectionMechanism.AUDIT_LOG
|
|
1141
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
1142
|
+
return _AssertionPublic._get_field_value_detection_mechanism(
|
|
1143
|
+
assertion, parameters
|
|
1144
|
+
)
|
|
1145
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
1146
|
+
return DetectionMechanism.DATAHUB_OPERATION
|
|
1147
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
1148
|
+
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
1149
|
+
else:
|
|
1150
|
+
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|