acryl-datahub-cloud 0.3.12.1rc2__py3-none-any.whl → 0.3.12.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/sdk/__init__.py +20 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +146 -97
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +10 -22
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +99 -19
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +965 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +191 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +60 -11
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +438 -347
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +105 -61
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +381 -392
- acryl_datahub_cloud/sdk/assertions_client.py +993 -314
- {acryl_datahub_cloud-0.3.12.1rc2.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/METADATA +49 -49
- {acryl_datahub_cloud-0.3.12.1rc2.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/RECORD +18 -15
- {acryl_datahub_cloud-0.3.12.1rc2.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12.1rc2.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12.1rc2.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/top_level.txt +0 -0
|
@@ -9,14 +9,27 @@ from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
|
9
9
|
)
|
|
10
10
|
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
11
11
|
AssertionIncidentBehavior,
|
|
12
|
+
CalendarInterval,
|
|
12
13
|
DetectionMechanism,
|
|
13
14
|
FixedRangeExclusionWindow,
|
|
14
15
|
InferenceSensitivity,
|
|
15
16
|
TimeWindowSize,
|
|
16
17
|
)
|
|
18
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
|
|
19
|
+
MetricType,
|
|
20
|
+
OperatorType,
|
|
21
|
+
ValueType,
|
|
22
|
+
)
|
|
23
|
+
from acryl_datahub_cloud.sdk.assertion_input.freshness_assertion_input import (
|
|
24
|
+
FreshnessAssertionScheduleCheckType,
|
|
25
|
+
)
|
|
17
26
|
from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
|
|
27
|
+
SqlAssertionCondition,
|
|
18
28
|
SqlAssertionCriteria,
|
|
19
29
|
)
|
|
30
|
+
from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
|
|
31
|
+
VolumeAssertionCondition,
|
|
32
|
+
)
|
|
20
33
|
from acryl_datahub_cloud.sdk.assertions_client import AssertionsClient
|
|
21
34
|
from acryl_datahub_cloud.sdk.resolver_client import ResolverClient
|
|
22
35
|
from acryl_datahub_cloud.sdk.subscription_client import SubscriptionClient
|
|
@@ -31,9 +44,16 @@ __all__ = [
|
|
|
31
44
|
"InferenceSensitivity",
|
|
32
45
|
"FixedRangeExclusionWindow",
|
|
33
46
|
"AssertionIncidentBehavior",
|
|
47
|
+
"MetricType",
|
|
48
|
+
"OperatorType",
|
|
49
|
+
"ValueType",
|
|
34
50
|
"AssertionsClient",
|
|
35
51
|
"ResolverClient",
|
|
36
52
|
"SubscriptionClient",
|
|
37
53
|
"SqlAssertion",
|
|
38
54
|
"SqlAssertionCriteria",
|
|
55
|
+
"VolumeAssertionCondition",
|
|
56
|
+
"SqlAssertionCondition",
|
|
57
|
+
"FreshnessAssertionScheduleCheckType",
|
|
58
|
+
"CalendarInterval",
|
|
39
59
|
]
|
|
@@ -28,23 +28,19 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
|
28
28
|
TimeWindowSizeInputTypes,
|
|
29
29
|
_DetectionMechanismTypes,
|
|
30
30
|
)
|
|
31
|
-
from acryl_datahub_cloud.sdk.assertion_input.
|
|
31
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
|
|
32
32
|
MetricInputType,
|
|
33
33
|
OperatorInputType,
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
ValueTypeInputType,
|
|
34
|
+
)
|
|
35
|
+
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
36
|
+
SmartColumnMetricAssertionParameters,
|
|
38
37
|
)
|
|
39
38
|
from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
|
|
40
|
-
|
|
39
|
+
SqlAssertionCondition,
|
|
41
40
|
SqlAssertionCriteria,
|
|
42
|
-
SqlAssertionOperator,
|
|
43
|
-
SqlAssertionType,
|
|
44
41
|
)
|
|
45
42
|
from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
|
|
46
|
-
|
|
47
|
-
_VolumeAssertionDefinitionTypes,
|
|
43
|
+
VolumeAssertionCriteria,
|
|
48
44
|
)
|
|
49
45
|
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
50
46
|
from acryl_datahub_cloud.sdk.entities.monitor import (
|
|
@@ -200,18 +196,12 @@ class _HasColumnMetricFunctionality:
|
|
|
200
196
|
column_name: str,
|
|
201
197
|
metric_type: MetricInputType,
|
|
202
198
|
operator: OperatorInputType,
|
|
203
|
-
|
|
204
|
-
value_type: Optional[ValueTypeInputType] = None,
|
|
205
|
-
range: Optional[RangeInputType] = None,
|
|
206
|
-
range_type: Optional[RangeTypeInputType] = None,
|
|
199
|
+
criteria_parameters: Optional[SmartColumnMetricAssertionParameters] = None,
|
|
207
200
|
):
|
|
208
201
|
self._column_name = column_name
|
|
209
202
|
self._metric_type = metric_type
|
|
210
203
|
self._operator = operator
|
|
211
|
-
self.
|
|
212
|
-
self._value_type = value_type
|
|
213
|
-
self._range = range
|
|
214
|
-
self._range_type = range_type
|
|
204
|
+
self._criteria_parameters = criteria_parameters
|
|
215
205
|
|
|
216
206
|
@property
|
|
217
207
|
def column_name(self) -> str:
|
|
@@ -226,20 +216,8 @@ class _HasColumnMetricFunctionality:
|
|
|
226
216
|
return self._operator
|
|
227
217
|
|
|
228
218
|
@property
|
|
229
|
-
def
|
|
230
|
-
return self.
|
|
231
|
-
|
|
232
|
-
@property
|
|
233
|
-
def value_type(self) -> Optional[ValueTypeInputType]:
|
|
234
|
-
return self._value_type
|
|
235
|
-
|
|
236
|
-
@property
|
|
237
|
-
def range(self) -> Optional[RangeInputType]:
|
|
238
|
-
return self._range
|
|
239
|
-
|
|
240
|
-
@property
|
|
241
|
-
def range_type(self) -> Optional[RangeTypeInputType]:
|
|
242
|
-
return self._range_type
|
|
219
|
+
def criteria_parameters(self) -> Optional[SmartColumnMetricAssertionParameters]:
|
|
220
|
+
return self._criteria_parameters
|
|
243
221
|
|
|
244
222
|
@staticmethod
|
|
245
223
|
def _get_column_name(assertion: Assertion) -> str:
|
|
@@ -281,25 +259,19 @@ class _HasColumnMetricFunctionality:
|
|
|
281
259
|
return operator
|
|
282
260
|
|
|
283
261
|
@staticmethod
|
|
284
|
-
def
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
)
|
|
290
|
-
return value
|
|
291
|
-
|
|
292
|
-
@staticmethod
|
|
293
|
-
def _get_value_type(assertion: Assertion) -> Optional[ValueTypeInputType]:
|
|
294
|
-
value_type = _get_nested_field_for_entity_with_default(
|
|
262
|
+
def _get_criteria_parameters(
|
|
263
|
+
assertion: Assertion,
|
|
264
|
+
) -> Optional[SmartColumnMetricAssertionParameters]:
|
|
265
|
+
# First check if there's a single value parameter
|
|
266
|
+
value_param = _get_nested_field_for_entity_with_default(
|
|
295
267
|
assertion,
|
|
296
|
-
field_path="info.fieldMetricAssertion.parameters.value
|
|
268
|
+
field_path="info.fieldMetricAssertion.parameters.value",
|
|
297
269
|
default=None,
|
|
298
270
|
)
|
|
299
|
-
|
|
271
|
+
if value_param is not None:
|
|
272
|
+
return value_param.value
|
|
300
273
|
|
|
301
|
-
|
|
302
|
-
def _get_range(assertion: Assertion) -> Optional[RangeInputType]:
|
|
274
|
+
# Then check for range parameters
|
|
303
275
|
min_value = _get_nested_field_for_entity_with_default(
|
|
304
276
|
assertion,
|
|
305
277
|
field_path="info.fieldMetricAssertion.parameters.minValue",
|
|
@@ -311,36 +283,59 @@ class _HasColumnMetricFunctionality:
|
|
|
311
283
|
default=None,
|
|
312
284
|
)
|
|
313
285
|
|
|
314
|
-
# If both
|
|
315
|
-
if min_value is None and max_value is None:
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
min_value
|
|
321
|
-
if max_value is not None and hasattr(max_value, "value"):
|
|
322
|
-
max_value = max_value.value
|
|
286
|
+
# If both range values exist, extract their values and return as tuple
|
|
287
|
+
if min_value is not None and max_value is not None:
|
|
288
|
+
if hasattr(min_value, "value"):
|
|
289
|
+
min_value = min_value.value
|
|
290
|
+
if hasattr(max_value, "value"):
|
|
291
|
+
max_value = max_value.value
|
|
292
|
+
return (min_value, max_value)
|
|
323
293
|
|
|
324
|
-
|
|
294
|
+
# If no parameters found, return None
|
|
295
|
+
return None
|
|
325
296
|
|
|
326
297
|
@staticmethod
|
|
327
|
-
def
|
|
328
|
-
|
|
298
|
+
def _get_criteria_parameters_with_type(
|
|
299
|
+
assertion: Assertion,
|
|
300
|
+
) -> Optional[tuple]:
|
|
301
|
+
"""
|
|
302
|
+
Get criteria parameters along with their type information from the backend.
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
For single values: (value, type)
|
|
306
|
+
For ranges: ((min_value, max_value), (min_type, max_type))
|
|
307
|
+
None if no parameters found
|
|
308
|
+
"""
|
|
309
|
+
# First check if there's a single value parameter
|
|
310
|
+
value_param = _get_nested_field_for_entity_with_default(
|
|
311
|
+
assertion,
|
|
312
|
+
field_path="info.fieldMetricAssertion.parameters.value",
|
|
313
|
+
default=None,
|
|
314
|
+
)
|
|
315
|
+
if value_param is not None:
|
|
316
|
+
return (value_param.value, value_param.type)
|
|
317
|
+
|
|
318
|
+
# Then check for range parameters
|
|
319
|
+
min_param = _get_nested_field_for_entity_with_default(
|
|
329
320
|
assertion,
|
|
330
|
-
field_path="info.fieldMetricAssertion.parameters.minValue
|
|
321
|
+
field_path="info.fieldMetricAssertion.parameters.minValue",
|
|
331
322
|
default=None,
|
|
332
323
|
)
|
|
333
|
-
|
|
324
|
+
max_param = _get_nested_field_for_entity_with_default(
|
|
334
325
|
assertion,
|
|
335
|
-
field_path="info.fieldMetricAssertion.parameters.maxValue
|
|
326
|
+
field_path="info.fieldMetricAssertion.parameters.maxValue",
|
|
336
327
|
default=None,
|
|
337
328
|
)
|
|
338
329
|
|
|
339
|
-
# If both
|
|
340
|
-
if
|
|
341
|
-
return
|
|
330
|
+
# If both range parameters exist, return values and types
|
|
331
|
+
if min_param is not None and max_param is not None:
|
|
332
|
+
return (
|
|
333
|
+
(min_param.value, max_param.value),
|
|
334
|
+
(min_param.type, max_param.type),
|
|
335
|
+
)
|
|
342
336
|
|
|
343
|
-
|
|
337
|
+
# If no parameters found, return None
|
|
338
|
+
return None
|
|
344
339
|
|
|
345
340
|
|
|
346
341
|
class _AssertionPublic(ABC):
|
|
@@ -1001,7 +996,7 @@ class VolumeAssertion(_HasSchedule, _AssertionPublic):
|
|
|
1001
996
|
display_name: str,
|
|
1002
997
|
mode: AssertionMode,
|
|
1003
998
|
schedule: models.CronScheduleClass,
|
|
1004
|
-
|
|
999
|
+
criteria: VolumeAssertionCriteria,
|
|
1005
1000
|
tags: list[TagUrn],
|
|
1006
1001
|
incident_behavior: list[AssertionIncidentBehavior],
|
|
1007
1002
|
detection_mechanism: Optional[
|
|
@@ -1023,7 +1018,7 @@ class VolumeAssertion(_HasSchedule, _AssertionPublic):
|
|
|
1023
1018
|
display_name: The display name of the assertion.
|
|
1024
1019
|
mode: The mode of the assertion (active, inactive).
|
|
1025
1020
|
schedule: The schedule of the assertion.
|
|
1026
|
-
|
|
1021
|
+
criteria: The volume assertion criteria.
|
|
1027
1022
|
tags: The tags applied to the assertion.
|
|
1028
1023
|
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
1029
1024
|
detection_mechanism: The detection mechanism of the assertion.
|
|
@@ -1047,18 +1042,18 @@ class VolumeAssertion(_HasSchedule, _AssertionPublic):
|
|
|
1047
1042
|
updated_at=updated_at,
|
|
1048
1043
|
tags=tags,
|
|
1049
1044
|
)
|
|
1050
|
-
self.
|
|
1045
|
+
self._criteria = criteria
|
|
1051
1046
|
|
|
1052
1047
|
@property
|
|
1053
|
-
def
|
|
1054
|
-
return self.
|
|
1048
|
+
def criteria(self) -> VolumeAssertionCriteria:
|
|
1049
|
+
return self._criteria
|
|
1055
1050
|
|
|
1056
1051
|
@staticmethod
|
|
1057
1052
|
def _get_volume_definition(
|
|
1058
1053
|
assertion: Assertion,
|
|
1059
|
-
) ->
|
|
1054
|
+
) -> VolumeAssertionCriteria:
|
|
1060
1055
|
"""Get volume assertion definition from a DataHub assertion entity."""
|
|
1061
|
-
return
|
|
1056
|
+
return VolumeAssertionCriteria.from_assertion(assertion)
|
|
1062
1057
|
|
|
1063
1058
|
@staticmethod
|
|
1064
1059
|
def _get_detection_mechanism(
|
|
@@ -1106,7 +1101,7 @@ class VolumeAssertion(_HasSchedule, _AssertionPublic):
|
|
|
1106
1101
|
display_name=assertion.description or "",
|
|
1107
1102
|
mode=cls._get_mode(monitor),
|
|
1108
1103
|
schedule=cls._get_schedule(monitor),
|
|
1109
|
-
|
|
1104
|
+
criteria=cls._get_volume_definition(assertion),
|
|
1110
1105
|
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1111
1106
|
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
1112
1107
|
created_by=cls._get_created_by(assertion),
|
|
@@ -1359,16 +1354,8 @@ class SqlAssertion(_AssertionPublic, _HasSchedule):
|
|
|
1359
1354
|
return self._statement
|
|
1360
1355
|
|
|
1361
1356
|
@property
|
|
1362
|
-
def
|
|
1363
|
-
return self._criteria.
|
|
1364
|
-
|
|
1365
|
-
@property
|
|
1366
|
-
def criteria_change_type(self) -> Optional[Union[SqlAssertionChangeType, str]]:
|
|
1367
|
-
return self._criteria.change_type
|
|
1368
|
-
|
|
1369
|
-
@property
|
|
1370
|
-
def criteria_operator(self) -> Union[SqlAssertionOperator, str]:
|
|
1371
|
-
return self._criteria.operator
|
|
1357
|
+
def criteria_condition(self) -> Union[SqlAssertionCondition, str]:
|
|
1358
|
+
return self._criteria.condition
|
|
1372
1359
|
|
|
1373
1360
|
@property
|
|
1374
1361
|
def criteria_parameters(
|
|
@@ -1398,6 +1385,76 @@ class SqlAssertion(_AssertionPublic, _HasSchedule):
|
|
|
1398
1385
|
f"Assertion {assertion.urn} is not a SQL assertion"
|
|
1399
1386
|
)
|
|
1400
1387
|
|
|
1388
|
+
@staticmethod
|
|
1389
|
+
def _get_condition_from_model_assertion_info(
|
|
1390
|
+
assertion_info: models.SqlAssertionInfoClass,
|
|
1391
|
+
) -> SqlAssertionCondition:
|
|
1392
|
+
"""Convert stored assertion info to condition enum."""
|
|
1393
|
+
# Handle value-based conditions (no change type)
|
|
1394
|
+
if str(assertion_info.type) == str(models.SqlAssertionTypeClass.METRIC):
|
|
1395
|
+
value_conditions = {
|
|
1396
|
+
str(
|
|
1397
|
+
models.AssertionStdOperatorClass.EQUAL_TO
|
|
1398
|
+
): SqlAssertionCondition.IS_EQUAL_TO,
|
|
1399
|
+
str(
|
|
1400
|
+
models.AssertionStdOperatorClass.NOT_EQUAL_TO
|
|
1401
|
+
): SqlAssertionCondition.IS_NOT_EQUAL_TO,
|
|
1402
|
+
str(
|
|
1403
|
+
models.AssertionStdOperatorClass.GREATER_THAN
|
|
1404
|
+
): SqlAssertionCondition.IS_GREATER_THAN,
|
|
1405
|
+
str(
|
|
1406
|
+
models.AssertionStdOperatorClass.LESS_THAN
|
|
1407
|
+
): SqlAssertionCondition.IS_LESS_THAN,
|
|
1408
|
+
str(
|
|
1409
|
+
models.AssertionStdOperatorClass.BETWEEN
|
|
1410
|
+
): SqlAssertionCondition.IS_WITHIN_A_RANGE,
|
|
1411
|
+
}
|
|
1412
|
+
if str(assertion_info.operator) in value_conditions:
|
|
1413
|
+
return value_conditions[str(assertion_info.operator)]
|
|
1414
|
+
|
|
1415
|
+
# Handle growth-based conditions (with change type)
|
|
1416
|
+
elif str(assertion_info.type) == str(
|
|
1417
|
+
models.SqlAssertionTypeClass.METRIC_CHANGE
|
|
1418
|
+
):
|
|
1419
|
+
assert assertion_info.changeType is not None, (
|
|
1420
|
+
"changeType must be present for METRIC_CHANGE assertions"
|
|
1421
|
+
)
|
|
1422
|
+
|
|
1423
|
+
growth_conditions = {
|
|
1424
|
+
(
|
|
1425
|
+
str(models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO),
|
|
1426
|
+
str(models.AssertionValueChangeTypeClass.ABSOLUTE),
|
|
1427
|
+
): SqlAssertionCondition.GROWS_AT_MOST_ABSOLUTE,
|
|
1428
|
+
(
|
|
1429
|
+
str(models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO),
|
|
1430
|
+
str(models.AssertionValueChangeTypeClass.PERCENTAGE),
|
|
1431
|
+
): SqlAssertionCondition.GROWS_AT_MOST_PERCENTAGE,
|
|
1432
|
+
(
|
|
1433
|
+
str(models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO),
|
|
1434
|
+
str(models.AssertionValueChangeTypeClass.ABSOLUTE),
|
|
1435
|
+
): SqlAssertionCondition.GROWS_AT_LEAST_ABSOLUTE,
|
|
1436
|
+
(
|
|
1437
|
+
str(models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO),
|
|
1438
|
+
str(models.AssertionValueChangeTypeClass.PERCENTAGE),
|
|
1439
|
+
): SqlAssertionCondition.GROWS_AT_LEAST_PERCENTAGE,
|
|
1440
|
+
(
|
|
1441
|
+
str(models.AssertionStdOperatorClass.BETWEEN),
|
|
1442
|
+
str(models.AssertionValueChangeTypeClass.ABSOLUTE),
|
|
1443
|
+
): SqlAssertionCondition.GROWS_WITHIN_A_RANGE_ABSOLUTE,
|
|
1444
|
+
(
|
|
1445
|
+
str(models.AssertionStdOperatorClass.BETWEEN),
|
|
1446
|
+
str(models.AssertionValueChangeTypeClass.PERCENTAGE),
|
|
1447
|
+
): SqlAssertionCondition.GROWS_WITHIN_A_RANGE_PERCENTAGE,
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
key = (str(assertion_info.operator), str(assertion_info.changeType))
|
|
1451
|
+
if key in growth_conditions:
|
|
1452
|
+
return growth_conditions[key]
|
|
1453
|
+
|
|
1454
|
+
raise ValueError(
|
|
1455
|
+
f"Unsupported combination: type={assertion_info.type}, operator={assertion_info.operator}, changeType={assertion_info.changeType}"
|
|
1456
|
+
)
|
|
1457
|
+
|
|
1401
1458
|
@staticmethod
|
|
1402
1459
|
def _get_criteria(assertion: Assertion) -> SqlAssertionCriteria:
|
|
1403
1460
|
if assertion.info is None:
|
|
@@ -1422,20 +1479,12 @@ class SqlAssertion(_AssertionPublic, _HasSchedule):
|
|
|
1422
1479
|
f"Assertion {assertion.urn} does not have a valid parameters for the SQL assertion"
|
|
1423
1480
|
)
|
|
1424
1481
|
|
|
1482
|
+
condition = SqlAssertion._get_condition_from_model_assertion_info(
|
|
1483
|
+
assertion.info
|
|
1484
|
+
)
|
|
1485
|
+
|
|
1425
1486
|
return SqlAssertionCriteria(
|
|
1426
|
-
|
|
1427
|
-
if isinstance(assertion.info.type, str)
|
|
1428
|
-
else str(assertion.info.type),
|
|
1429
|
-
change_type=assertion.info.changeType
|
|
1430
|
-
if assertion.info.changeType is None
|
|
1431
|
-
else (
|
|
1432
|
-
assertion.info.changeType
|
|
1433
|
-
if isinstance(assertion.info.changeType, str)
|
|
1434
|
-
else str(assertion.info.changeType)
|
|
1435
|
-
),
|
|
1436
|
-
operator=assertion.info.operator
|
|
1437
|
-
if isinstance(assertion.info.operator, str)
|
|
1438
|
-
else str(assertion.info.operator),
|
|
1487
|
+
condition=condition,
|
|
1439
1488
|
parameters=parameters,
|
|
1440
1489
|
)
|
|
1441
1490
|
else:
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
8
|
+
AssertionMode,
|
|
9
|
+
_AssertionPublic,
|
|
10
|
+
_HasColumnMetricFunctionality,
|
|
11
|
+
_HasSchedule,
|
|
12
|
+
)
|
|
13
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
14
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
15
|
+
DEFAULT_SCHEDULE,
|
|
16
|
+
AssertionIncidentBehavior,
|
|
17
|
+
DetectionMechanism,
|
|
18
|
+
_DetectionMechanismTypes,
|
|
19
|
+
)
|
|
20
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_assertion_input import (
|
|
21
|
+
ColumnMetricAssertionParameters,
|
|
22
|
+
MetricInputType,
|
|
23
|
+
OperatorInputType,
|
|
24
|
+
)
|
|
25
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
26
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
27
|
+
from datahub.metadata import schema_classes as models
|
|
28
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ColumnMetricAssertion(
|
|
34
|
+
_HasColumnMetricFunctionality,
|
|
35
|
+
_HasSchedule,
|
|
36
|
+
_AssertionPublic,
|
|
37
|
+
):
|
|
38
|
+
"""
|
|
39
|
+
A class that represents a column metric assertion.
|
|
40
|
+
This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
|
|
41
|
+
min, max, median, and more. It uses native source types without AI inference.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
*,
|
|
47
|
+
urn: AssertionUrn,
|
|
48
|
+
dataset_urn: DatasetUrn,
|
|
49
|
+
column_name: str,
|
|
50
|
+
metric_type: MetricInputType,
|
|
51
|
+
operator: OperatorInputType,
|
|
52
|
+
# Consolidated criteria parameters
|
|
53
|
+
criteria_parameters: Optional[ColumnMetricAssertionParameters] = None,
|
|
54
|
+
# Standard assertion parameters:
|
|
55
|
+
display_name: str,
|
|
56
|
+
mode: AssertionMode,
|
|
57
|
+
schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
|
|
58
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
59
|
+
detection_mechanism: Optional[
|
|
60
|
+
_DetectionMechanismTypes
|
|
61
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
62
|
+
tags: list[TagUrn],
|
|
63
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
64
|
+
created_at: Union[datetime, None] = None,
|
|
65
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
66
|
+
updated_at: Optional[datetime] = None,
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize a column metric assertion.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
urn: The URN of the assertion.
|
|
73
|
+
dataset_urn: The URN of the dataset to monitor.
|
|
74
|
+
display_name: The display name of the assertion.
|
|
75
|
+
mode: The mode of the assertion (active/inactive).
|
|
76
|
+
incident_behavior: The behavior when incidents occur.
|
|
77
|
+
detection_mechanism: The mechanism used to detect changes.
|
|
78
|
+
tags: The tags to apply to the assertion.
|
|
79
|
+
created_by: The URN of the user who created the assertion.
|
|
80
|
+
created_at: The timestamp when the assertion was created.
|
|
81
|
+
updated_by: The URN of the user who last updated the assertion.
|
|
82
|
+
updated_at: The timestamp when the assertion was last updated.
|
|
83
|
+
"""
|
|
84
|
+
_AssertionPublic.__init__(
|
|
85
|
+
self,
|
|
86
|
+
urn=urn,
|
|
87
|
+
dataset_urn=dataset_urn,
|
|
88
|
+
display_name=display_name,
|
|
89
|
+
mode=mode,
|
|
90
|
+
tags=tags,
|
|
91
|
+
incident_behavior=incident_behavior,
|
|
92
|
+
detection_mechanism=detection_mechanism,
|
|
93
|
+
created_by=created_by,
|
|
94
|
+
created_at=created_at,
|
|
95
|
+
updated_by=updated_by,
|
|
96
|
+
updated_at=updated_at,
|
|
97
|
+
)
|
|
98
|
+
_HasSchedule.__init__(
|
|
99
|
+
self,
|
|
100
|
+
schedule=schedule,
|
|
101
|
+
)
|
|
102
|
+
_HasColumnMetricFunctionality.__init__(
|
|
103
|
+
self,
|
|
104
|
+
column_name=column_name,
|
|
105
|
+
metric_type=metric_type,
|
|
106
|
+
operator=operator,
|
|
107
|
+
criteria_parameters=criteria_parameters,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
112
|
+
"""
|
|
113
|
+
Create a ColumnMetricAssertion from an Assertion and Monitor entity.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
assertion: The Assertion entity.
|
|
117
|
+
monitor: The Monitor entity.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
A ColumnMetricAssertion instance.
|
|
121
|
+
"""
|
|
122
|
+
return cls(
|
|
123
|
+
urn=assertion.urn,
|
|
124
|
+
dataset_urn=assertion.dataset,
|
|
125
|
+
column_name=cls._get_column_name(assertion),
|
|
126
|
+
metric_type=cls._get_metric_type(assertion),
|
|
127
|
+
operator=cls._get_operator(assertion),
|
|
128
|
+
criteria_parameters=cls._get_criteria_parameters(assertion),
|
|
129
|
+
display_name=assertion.description or "",
|
|
130
|
+
mode=cls._get_mode(monitor),
|
|
131
|
+
schedule=cls._get_schedule(monitor),
|
|
132
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
133
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
134
|
+
tags=cls._get_tags(assertion),
|
|
135
|
+
created_by=cls._get_created_by(assertion),
|
|
136
|
+
created_at=cls._get_created_at(assertion),
|
|
137
|
+
updated_by=cls._get_updated_by(assertion),
|
|
138
|
+
updated_at=cls._get_updated_at(assertion),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def _get_detection_mechanism(
|
|
143
|
+
assertion: Assertion,
|
|
144
|
+
monitor: Monitor,
|
|
145
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
146
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
147
|
+
"""Get the detection mechanism for column metric assertions."""
|
|
148
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
149
|
+
monitor,
|
|
150
|
+
assertion,
|
|
151
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
152
|
+
models.FieldAssertionInfoClass,
|
|
153
|
+
default,
|
|
154
|
+
)
|
|
155
|
+
if parameters is None:
|
|
156
|
+
return default
|
|
157
|
+
if parameters.datasetFieldParameters is None:
|
|
158
|
+
logger.warning(
|
|
159
|
+
f"Monitor does not have datasetFieldParameters, defaulting detection mechanism to {default}"
|
|
160
|
+
)
|
|
161
|
+
return default
|
|
162
|
+
source_type = parameters.datasetFieldParameters.sourceType
|
|
163
|
+
if source_type == models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY:
|
|
164
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
165
|
+
return DetectionMechanism.ALL_ROWS_QUERY(
|
|
166
|
+
additional_filter=additional_filter
|
|
167
|
+
)
|
|
168
|
+
elif (
|
|
169
|
+
source_type
|
|
170
|
+
== models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
|
|
171
|
+
):
|
|
172
|
+
if parameters.datasetFieldParameters.changedRowsField is None:
|
|
173
|
+
logger.warning(
|
|
174
|
+
f"Monitor has CHANGED_ROWS_QUERY source type but no changedRowsField, defaulting detection mechanism to {default}"
|
|
175
|
+
)
|
|
176
|
+
return default
|
|
177
|
+
column_name = parameters.datasetFieldParameters.changedRowsField.path
|
|
178
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
179
|
+
return DetectionMechanism.CHANGED_ROWS_QUERY(
|
|
180
|
+
column_name=column_name, additional_filter=additional_filter
|
|
181
|
+
)
|
|
182
|
+
elif (
|
|
183
|
+
source_type
|
|
184
|
+
== models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
185
|
+
):
|
|
186
|
+
return DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
187
|
+
else:
|
|
188
|
+
logger.warning(
|
|
189
|
+
f"Unsupported DatasetFieldAssertionSourceType {source_type}, defaulting detection mechanism to {default}"
|
|
190
|
+
)
|
|
191
|
+
return default
|
|
@@ -22,13 +22,11 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
|
22
22
|
InferenceSensitivity,
|
|
23
23
|
_DetectionMechanismTypes,
|
|
24
24
|
)
|
|
25
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
|
|
26
|
+
OperatorType,
|
|
27
|
+
)
|
|
25
28
|
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
26
29
|
MetricInputType,
|
|
27
|
-
OperatorInputType,
|
|
28
|
-
RangeInputType,
|
|
29
|
-
RangeTypeInputType,
|
|
30
|
-
ValueInputType,
|
|
31
|
-
ValueTypeInputType,
|
|
32
30
|
)
|
|
33
31
|
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
34
32
|
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
@@ -47,7 +45,8 @@ class SmartColumnMetricAssertion(
|
|
|
47
45
|
"""
|
|
48
46
|
A class that represents a smart column metric assertion.
|
|
49
47
|
This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
|
|
50
|
-
min, max, median, and more. It uses AI to infer the assertion parameters.
|
|
48
|
+
min, max, median, and more. It uses AI to infer the assertion parameters. The operator is automatically set to BETWEEN with
|
|
49
|
+
criteria_parameters of (0, 0) since the actual values will be inferred by AI.
|
|
51
50
|
"""
|
|
52
51
|
|
|
53
52
|
def __init__(
|
|
@@ -57,12 +56,6 @@ class SmartColumnMetricAssertion(
|
|
|
57
56
|
dataset_urn: DatasetUrn,
|
|
58
57
|
column_name: str,
|
|
59
58
|
metric_type: MetricInputType,
|
|
60
|
-
operator: OperatorInputType,
|
|
61
|
-
# Depending on the operator, value, range (and corresponding type) or no parameters are required:
|
|
62
|
-
value: Optional[ValueInputType] = None,
|
|
63
|
-
value_type: Optional[ValueTypeInputType] = None,
|
|
64
|
-
range: Optional[RangeInputType] = None,
|
|
65
|
-
range_type: Optional[RangeTypeInputType] = None,
|
|
66
59
|
# TODO: Evaluate these params:
|
|
67
60
|
display_name: str,
|
|
68
61
|
mode: AssertionMode,
|
|
@@ -127,11 +120,11 @@ class SmartColumnMetricAssertion(
|
|
|
127
120
|
self,
|
|
128
121
|
column_name=column_name,
|
|
129
122
|
metric_type=metric_type,
|
|
130
|
-
operator=operator
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
123
|
+
operator=OperatorType.BETWEEN, # Fixed operator for smart assertions
|
|
124
|
+
criteria_parameters=(
|
|
125
|
+
0,
|
|
126
|
+
0,
|
|
127
|
+
), # Fixed criteria_parameters for smart assertions
|
|
135
128
|
)
|
|
136
129
|
|
|
137
130
|
@classmethod
|
|
@@ -151,11 +144,6 @@ class SmartColumnMetricAssertion(
|
|
|
151
144
|
dataset_urn=assertion.dataset,
|
|
152
145
|
column_name=cls._get_column_name(assertion),
|
|
153
146
|
metric_type=cls._get_metric_type(assertion),
|
|
154
|
-
operator=cls._get_operator(assertion),
|
|
155
|
-
value=cls._get_value(assertion),
|
|
156
|
-
value_type=cls._get_value_type(assertion),
|
|
157
|
-
range=cls._get_range(assertion),
|
|
158
|
-
range_type=cls._get_range_type(assertion),
|
|
159
147
|
display_name=assertion.description or "",
|
|
160
148
|
mode=cls._get_mode(monitor),
|
|
161
149
|
schedule=cls._get_schedule(monitor),
|