acryl-datahub-cloud 0.3.12rc5__py3-none-any.whl → 0.3.12rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1934 -1934
- acryl_datahub_cloud/metadata/schema.avsc +23968 -23968
- acryl_datahub_cloud/metadata/schema_classes.py +658 -658
- acryl_datahub_cloud/sdk/__init__.py +6 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +327 -27
- acryl_datahub_cloud/sdk/assertion/types.py +2 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +36 -11
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +1 -2
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +4 -31
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +274 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +630 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1114 -137
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/METADATA +43 -43
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/RECORD +19 -16
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/top_level.txt +0 -0
|
@@ -2,6 +2,7 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
|
2
2
|
FreshnessAssertion,
|
|
3
3
|
SmartFreshnessAssertion,
|
|
4
4
|
SmartVolumeAssertion,
|
|
5
|
+
SqlAssertion,
|
|
5
6
|
)
|
|
6
7
|
from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
7
8
|
SmartColumnMetricAssertion,
|
|
@@ -13,6 +14,9 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
|
13
14
|
InferenceSensitivity,
|
|
14
15
|
TimeWindowSize,
|
|
15
16
|
)
|
|
17
|
+
from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
|
|
18
|
+
SqlAssertionCriteria,
|
|
19
|
+
)
|
|
16
20
|
from acryl_datahub_cloud.sdk.assertions_client import AssertionsClient
|
|
17
21
|
from acryl_datahub_cloud.sdk.resolver_client import ResolverClient
|
|
18
22
|
from acryl_datahub_cloud.sdk.subscription_client import SubscriptionClient
|
|
@@ -30,4 +34,6 @@ __all__ = [
|
|
|
30
34
|
"AssertionsClient",
|
|
31
35
|
"ResolverClient",
|
|
32
36
|
"SubscriptionClient",
|
|
37
|
+
"SqlAssertion",
|
|
38
|
+
"SqlAssertionCriteria",
|
|
33
39
|
]
|
|
@@ -17,6 +17,7 @@ from typing_extensions import Self
|
|
|
17
17
|
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
18
18
|
ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
19
19
|
DEFAULT_DETECTION_MECHANISM,
|
|
20
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
|
|
20
21
|
DEFAULT_SCHEDULE,
|
|
21
22
|
DEFAULT_SENSITIVITY,
|
|
22
23
|
AssertionIncidentBehavior,
|
|
@@ -35,6 +36,13 @@ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input
|
|
|
35
36
|
ValueInputType,
|
|
36
37
|
ValueTypeInputType,
|
|
37
38
|
)
|
|
39
|
+
from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
|
|
40
|
+
SqlAssertionCriteria,
|
|
41
|
+
)
|
|
42
|
+
from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
|
|
43
|
+
VolumeAssertionDefinition,
|
|
44
|
+
_VolumeAssertionDefinitionTypes,
|
|
45
|
+
)
|
|
38
46
|
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
39
47
|
from acryl_datahub_cloud.sdk.entities.monitor import (
|
|
40
48
|
Monitor,
|
|
@@ -73,7 +81,9 @@ class _HasSchedule:
|
|
|
73
81
|
return self._schedule
|
|
74
82
|
|
|
75
83
|
@staticmethod
|
|
76
|
-
def _get_schedule(
|
|
84
|
+
def _get_schedule(
|
|
85
|
+
monitor: Monitor, default: models.CronScheduleClass = DEFAULT_SCHEDULE
|
|
86
|
+
) -> models.CronScheduleClass:
|
|
77
87
|
"""Get the schedule from the monitor."""
|
|
78
88
|
assertion_evaluation_specs = _get_nested_field_for_entity_with_default(
|
|
79
89
|
monitor,
|
|
@@ -81,11 +91,11 @@ class _HasSchedule:
|
|
|
81
91
|
[],
|
|
82
92
|
)
|
|
83
93
|
if len(assertion_evaluation_specs) == 0:
|
|
84
|
-
return
|
|
94
|
+
return default
|
|
85
95
|
assertion_evaluation_spec = assertion_evaluation_specs[0]
|
|
86
96
|
schedule = assertion_evaluation_spec.schedule
|
|
87
97
|
if schedule is None:
|
|
88
|
-
return
|
|
98
|
+
return default
|
|
89
99
|
return schedule
|
|
90
100
|
|
|
91
101
|
|
|
@@ -335,6 +345,7 @@ class _AssertionPublic(ABC):
|
|
|
335
345
|
Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
|
|
336
346
|
"""
|
|
337
347
|
|
|
348
|
+
# TODO: have the individual classes self-declare this
|
|
338
349
|
_SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
|
|
339
350
|
models.FreshnessAssertionInfoClass,
|
|
340
351
|
models.VolumeAssertionInfoClass,
|
|
@@ -974,6 +985,135 @@ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPubli
|
|
|
974
985
|
raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
|
|
975
986
|
|
|
976
987
|
|
|
988
|
+
class VolumeAssertion(_HasSchedule, _AssertionPublic):
|
|
989
|
+
"""
|
|
990
|
+
A class that represents a volume assertion.
|
|
991
|
+
"""
|
|
992
|
+
|
|
993
|
+
def __init__(
|
|
994
|
+
self,
|
|
995
|
+
*,
|
|
996
|
+
urn: AssertionUrn,
|
|
997
|
+
dataset_urn: DatasetUrn,
|
|
998
|
+
display_name: str,
|
|
999
|
+
mode: AssertionMode,
|
|
1000
|
+
schedule: models.CronScheduleClass,
|
|
1001
|
+
definition: _VolumeAssertionDefinitionTypes,
|
|
1002
|
+
tags: list[TagUrn],
|
|
1003
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
1004
|
+
detection_mechanism: Optional[
|
|
1005
|
+
_DetectionMechanismTypes
|
|
1006
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
1007
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
1008
|
+
created_at: Union[datetime, None] = None,
|
|
1009
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
1010
|
+
updated_at: Optional[datetime] = None,
|
|
1011
|
+
):
|
|
1012
|
+
"""
|
|
1013
|
+
Initialize a volume assertion.
|
|
1014
|
+
|
|
1015
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
1016
|
+
To update an assertion, use the `upsert_*` method.
|
|
1017
|
+
Args:
|
|
1018
|
+
urn: The urn of the assertion.
|
|
1019
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
1020
|
+
display_name: The display name of the assertion.
|
|
1021
|
+
mode: The mode of the assertion (active, inactive).
|
|
1022
|
+
schedule: The schedule of the assertion.
|
|
1023
|
+
definition: The volume assertion definition (RowCountTotal or RowCountChange).
|
|
1024
|
+
tags: The tags applied to the assertion.
|
|
1025
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
1026
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
1027
|
+
created_by: The urn of the user that created the assertion.
|
|
1028
|
+
created_at: The timestamp of when the assertion was created.
|
|
1029
|
+
updated_by: The urn of the user that updated the assertion.
|
|
1030
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
1031
|
+
"""
|
|
1032
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
1033
|
+
_AssertionPublic.__init__(
|
|
1034
|
+
self,
|
|
1035
|
+
urn=urn,
|
|
1036
|
+
dataset_urn=dataset_urn,
|
|
1037
|
+
display_name=display_name,
|
|
1038
|
+
mode=mode,
|
|
1039
|
+
incident_behavior=incident_behavior,
|
|
1040
|
+
detection_mechanism=detection_mechanism,
|
|
1041
|
+
created_by=created_by,
|
|
1042
|
+
created_at=created_at,
|
|
1043
|
+
updated_by=updated_by,
|
|
1044
|
+
updated_at=updated_at,
|
|
1045
|
+
tags=tags,
|
|
1046
|
+
)
|
|
1047
|
+
self._definition = definition
|
|
1048
|
+
|
|
1049
|
+
@property
|
|
1050
|
+
def definition(self) -> _VolumeAssertionDefinitionTypes:
|
|
1051
|
+
return self._definition
|
|
1052
|
+
|
|
1053
|
+
@staticmethod
|
|
1054
|
+
def _get_volume_definition(
|
|
1055
|
+
assertion: Assertion,
|
|
1056
|
+
) -> _VolumeAssertionDefinitionTypes:
|
|
1057
|
+
"""Get volume assertion definition from a DataHub assertion entity."""
|
|
1058
|
+
return VolumeAssertionDefinition.from_assertion(assertion)
|
|
1059
|
+
|
|
1060
|
+
@staticmethod
|
|
1061
|
+
def _get_detection_mechanism(
|
|
1062
|
+
assertion: Assertion,
|
|
1063
|
+
monitor: Monitor,
|
|
1064
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
1065
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
1066
|
+
"""Get the detection mechanism for volume assertions."""
|
|
1067
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
1068
|
+
monitor,
|
|
1069
|
+
assertion,
|
|
1070
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
1071
|
+
models.VolumeAssertionInfoClass,
|
|
1072
|
+
default,
|
|
1073
|
+
)
|
|
1074
|
+
if parameters is None:
|
|
1075
|
+
return default
|
|
1076
|
+
if parameters.datasetVolumeParameters is None:
|
|
1077
|
+
logger.warning(
|
|
1078
|
+
f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
1079
|
+
)
|
|
1080
|
+
if default is None:
|
|
1081
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
1082
|
+
else:
|
|
1083
|
+
return default
|
|
1084
|
+
source_type = parameters.datasetVolumeParameters.sourceType
|
|
1085
|
+
if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
|
|
1086
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
1087
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
|
|
1088
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
1089
|
+
return DetectionMechanism.QUERY(additional_filter=additional_filter)
|
|
1090
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
|
|
1091
|
+
return DetectionMechanism.DATASET_PROFILE
|
|
1092
|
+
else:
|
|
1093
|
+
raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
|
|
1094
|
+
|
|
1095
|
+
@classmethod
|
|
1096
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
1097
|
+
"""
|
|
1098
|
+
Create a volume assertion from the assertion and monitor entities.
|
|
1099
|
+
"""
|
|
1100
|
+
return cls(
|
|
1101
|
+
urn=assertion.urn,
|
|
1102
|
+
dataset_urn=assertion.dataset,
|
|
1103
|
+
display_name=assertion.description or "",
|
|
1104
|
+
mode=cls._get_mode(monitor),
|
|
1105
|
+
schedule=cls._get_schedule(monitor),
|
|
1106
|
+
definition=cls._get_volume_definition(assertion),
|
|
1107
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1108
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
1109
|
+
created_by=cls._get_created_by(assertion),
|
|
1110
|
+
created_at=cls._get_created_at(assertion),
|
|
1111
|
+
updated_by=cls._get_updated_by(assertion),
|
|
1112
|
+
updated_at=cls._get_updated_at(assertion),
|
|
1113
|
+
tags=cls._get_tags(assertion),
|
|
1114
|
+
)
|
|
1115
|
+
|
|
1116
|
+
|
|
977
1117
|
class FreshnessAssertion(_HasSchedule, _AssertionPublic):
|
|
978
1118
|
"""
|
|
979
1119
|
A class that represents a freshness assertion.
|
|
@@ -1088,30 +1228,6 @@ class FreshnessAssertion(_HasSchedule, _AssertionPublic):
|
|
|
1088
1228
|
f"Assertion {assertion.urn} is not a freshness assertion"
|
|
1089
1229
|
)
|
|
1090
1230
|
|
|
1091
|
-
@classmethod
|
|
1092
|
-
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
1093
|
-
"""
|
|
1094
|
-
Create a freshness assertion from the assertion and monitor entities.
|
|
1095
|
-
"""
|
|
1096
|
-
return cls(
|
|
1097
|
-
urn=assertion.urn,
|
|
1098
|
-
dataset_urn=assertion.dataset,
|
|
1099
|
-
display_name=assertion.description or "",
|
|
1100
|
-
mode=cls._get_mode(monitor),
|
|
1101
|
-
schedule=cls._get_schedule(monitor),
|
|
1102
|
-
freshness_schedule_check_type=cls._get_freshness_schedule_check_type(
|
|
1103
|
-
assertion
|
|
1104
|
-
),
|
|
1105
|
-
lookback_window=cls._get_lookback_window(assertion),
|
|
1106
|
-
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1107
|
-
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
1108
|
-
created_by=cls._get_created_by(assertion),
|
|
1109
|
-
created_at=cls._get_created_at(assertion),
|
|
1110
|
-
updated_by=cls._get_updated_by(assertion),
|
|
1111
|
-
updated_at=cls._get_updated_at(assertion),
|
|
1112
|
-
tags=cls._get_tags(assertion),
|
|
1113
|
-
)
|
|
1114
|
-
|
|
1115
1231
|
@staticmethod
|
|
1116
1232
|
def _get_detection_mechanism(
|
|
1117
1233
|
assertion: Assertion,
|
|
@@ -1148,3 +1264,187 @@ class FreshnessAssertion(_HasSchedule, _AssertionPublic):
|
|
|
1148
1264
|
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
1149
1265
|
else:
|
|
1150
1266
|
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
1267
|
+
|
|
1268
|
+
@classmethod
|
|
1269
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
1270
|
+
"""
|
|
1271
|
+
Create a freshness assertion from the assertion and monitor entities.
|
|
1272
|
+
"""
|
|
1273
|
+
return cls(
|
|
1274
|
+
urn=assertion.urn,
|
|
1275
|
+
dataset_urn=assertion.dataset,
|
|
1276
|
+
display_name=assertion.description or "",
|
|
1277
|
+
mode=cls._get_mode(monitor),
|
|
1278
|
+
schedule=cls._get_schedule(monitor),
|
|
1279
|
+
freshness_schedule_check_type=cls._get_freshness_schedule_check_type(
|
|
1280
|
+
assertion
|
|
1281
|
+
),
|
|
1282
|
+
lookback_window=cls._get_lookback_window(assertion),
|
|
1283
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1284
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
1285
|
+
created_by=cls._get_created_by(assertion),
|
|
1286
|
+
created_at=cls._get_created_at(assertion),
|
|
1287
|
+
updated_by=cls._get_updated_by(assertion),
|
|
1288
|
+
updated_at=cls._get_updated_at(assertion),
|
|
1289
|
+
tags=cls._get_tags(assertion),
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
|
|
1293
|
+
class SqlAssertion(_AssertionPublic, _HasSchedule):
|
|
1294
|
+
"""
|
|
1295
|
+
A class that represents a SQL assertion.
|
|
1296
|
+
"""
|
|
1297
|
+
|
|
1298
|
+
def __init__(
|
|
1299
|
+
self,
|
|
1300
|
+
*,
|
|
1301
|
+
urn: AssertionUrn,
|
|
1302
|
+
dataset_urn: DatasetUrn,
|
|
1303
|
+
display_name: str,
|
|
1304
|
+
mode: AssertionMode,
|
|
1305
|
+
statement: str,
|
|
1306
|
+
criteria: SqlAssertionCriteria,
|
|
1307
|
+
schedule: models.CronScheduleClass,
|
|
1308
|
+
tags: list[TagUrn],
|
|
1309
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
1310
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
1311
|
+
created_at: Union[datetime, None] = None,
|
|
1312
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
1313
|
+
updated_at: Optional[datetime] = None,
|
|
1314
|
+
):
|
|
1315
|
+
"""
|
|
1316
|
+
Initialize a SQL assertion.
|
|
1317
|
+
|
|
1318
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
1319
|
+
To update an assertion, use the `upsert_*` method.
|
|
1320
|
+
Args:
|
|
1321
|
+
urn: The urn of the assertion.
|
|
1322
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
1323
|
+
display_name: The display name of the assertion.
|
|
1324
|
+
mode: The mode of the assertion (active, inactive).
|
|
1325
|
+
statement: The SQL statement to be used for the assertion.
|
|
1326
|
+
criteria: The criteria to be used for the assertion.
|
|
1327
|
+
schedule: The schedule of the assertion.
|
|
1328
|
+
tags: The tags applied to the assertion.
|
|
1329
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
1330
|
+
created_by: The urn of the user that created the assertion.
|
|
1331
|
+
created_at: The timestamp of when the assertion was created.
|
|
1332
|
+
updated_by: The urn of the user that updated the assertion.
|
|
1333
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
1334
|
+
"""
|
|
1335
|
+
# Initialize the mixins first
|
|
1336
|
+
_AssertionPublic.__init__(
|
|
1337
|
+
self,
|
|
1338
|
+
urn=urn,
|
|
1339
|
+
dataset_urn=dataset_urn,
|
|
1340
|
+
display_name=display_name,
|
|
1341
|
+
mode=mode,
|
|
1342
|
+
tags=tags,
|
|
1343
|
+
incident_behavior=incident_behavior,
|
|
1344
|
+
created_by=created_by,
|
|
1345
|
+
created_at=created_at,
|
|
1346
|
+
updated_by=updated_by,
|
|
1347
|
+
updated_at=updated_at,
|
|
1348
|
+
)
|
|
1349
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
1350
|
+
# Then initialize the parent class
|
|
1351
|
+
self._statement = statement
|
|
1352
|
+
self._criteria = criteria
|
|
1353
|
+
|
|
1354
|
+
@property
|
|
1355
|
+
def statement(self) -> str:
|
|
1356
|
+
return self._statement
|
|
1357
|
+
|
|
1358
|
+
@property
|
|
1359
|
+
def criteria(self) -> SqlAssertionCriteria:
|
|
1360
|
+
return self._criteria
|
|
1361
|
+
|
|
1362
|
+
@staticmethod
|
|
1363
|
+
def _get_detection_mechanism(
|
|
1364
|
+
assertion: Assertion,
|
|
1365
|
+
monitor: Monitor,
|
|
1366
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
1367
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
1368
|
+
"""Sql assertions do not have a detection mechanism."""
|
|
1369
|
+
return None
|
|
1370
|
+
|
|
1371
|
+
@staticmethod
|
|
1372
|
+
def _get_statement(assertion: Assertion) -> str:
|
|
1373
|
+
if assertion.info is None:
|
|
1374
|
+
raise SDKNotYetSupportedError(
|
|
1375
|
+
f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
|
|
1376
|
+
)
|
|
1377
|
+
if isinstance(assertion.info, models.SqlAssertionInfoClass):
|
|
1378
|
+
return assertion.info.statement
|
|
1379
|
+
else:
|
|
1380
|
+
raise SDKNotYetSupportedError(
|
|
1381
|
+
f"Assertion {assertion.urn} is not a SQL assertion"
|
|
1382
|
+
)
|
|
1383
|
+
|
|
1384
|
+
@staticmethod
|
|
1385
|
+
def _get_criteria(assertion: Assertion) -> SqlAssertionCriteria:
|
|
1386
|
+
if assertion.info is None:
|
|
1387
|
+
raise SDKNotYetSupportedError(
|
|
1388
|
+
f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
|
|
1389
|
+
)
|
|
1390
|
+
if isinstance(assertion.info, models.SqlAssertionInfoClass):
|
|
1391
|
+
parameters: Union[float, tuple[float, float]]
|
|
1392
|
+
if assertion.info.parameters.value is not None:
|
|
1393
|
+
parameters = float(assertion.info.parameters.value.value)
|
|
1394
|
+
elif (
|
|
1395
|
+
assertion.info.parameters.maxValue is not None
|
|
1396
|
+
and assertion.info.parameters.minValue is not None
|
|
1397
|
+
):
|
|
1398
|
+
# min and max values are in the order of min, max
|
|
1399
|
+
parameters = (
|
|
1400
|
+
float(assertion.info.parameters.minValue.value),
|
|
1401
|
+
float(assertion.info.parameters.maxValue.value),
|
|
1402
|
+
)
|
|
1403
|
+
else:
|
|
1404
|
+
raise SDKNotYetSupportedError(
|
|
1405
|
+
f"Assertion {assertion.urn} does not have a valid parameters for the SQL assertion"
|
|
1406
|
+
)
|
|
1407
|
+
|
|
1408
|
+
return SqlAssertionCriteria(
|
|
1409
|
+
type=assertion.info.type
|
|
1410
|
+
if isinstance(assertion.info.type, str)
|
|
1411
|
+
else str(assertion.info.type),
|
|
1412
|
+
change_type=assertion.info.changeType
|
|
1413
|
+
if assertion.info.changeType is None
|
|
1414
|
+
else (
|
|
1415
|
+
assertion.info.changeType
|
|
1416
|
+
if isinstance(assertion.info.changeType, str)
|
|
1417
|
+
else str(assertion.info.changeType)
|
|
1418
|
+
),
|
|
1419
|
+
operator=assertion.info.operator
|
|
1420
|
+
if isinstance(assertion.info.operator, str)
|
|
1421
|
+
else str(assertion.info.operator),
|
|
1422
|
+
parameters=parameters,
|
|
1423
|
+
)
|
|
1424
|
+
else:
|
|
1425
|
+
raise SDKNotYetSupportedError(
|
|
1426
|
+
f"Assertion {assertion.urn} is not a SQL assertion"
|
|
1427
|
+
)
|
|
1428
|
+
|
|
1429
|
+
@classmethod
|
|
1430
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
1431
|
+
"""
|
|
1432
|
+
Create a SQL assertion from the assertion and monitor entities.
|
|
1433
|
+
"""
|
|
1434
|
+
return cls(
|
|
1435
|
+
urn=assertion.urn,
|
|
1436
|
+
dataset_urn=assertion.dataset,
|
|
1437
|
+
display_name=assertion.description or "",
|
|
1438
|
+
mode=cls._get_mode(monitor),
|
|
1439
|
+
statement=cls._get_statement(assertion),
|
|
1440
|
+
criteria=cls._get_criteria(assertion),
|
|
1441
|
+
schedule=cls._get_schedule(
|
|
1442
|
+
monitor, default=DEFAULT_EVERY_SIX_HOURS_SCHEDULE
|
|
1443
|
+
),
|
|
1444
|
+
tags=cls._get_tags(assertion),
|
|
1445
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1446
|
+
created_by=cls._get_created_by(assertion),
|
|
1447
|
+
created_at=cls._get_created_at(assertion),
|
|
1448
|
+
updated_by=cls._get_updated_by(assertion),
|
|
1449
|
+
updated_at=cls._get_updated_at(assertion),
|
|
1450
|
+
)
|
|
@@ -4,6 +4,7 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
|
4
4
|
FreshnessAssertion,
|
|
5
5
|
SmartFreshnessAssertion,
|
|
6
6
|
SmartVolumeAssertion,
|
|
7
|
+
SqlAssertion,
|
|
7
8
|
)
|
|
8
9
|
from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
9
10
|
SmartColumnMetricAssertion,
|
|
@@ -14,5 +15,6 @@ AssertionTypes = Union[
|
|
|
14
15
|
SmartVolumeAssertion,
|
|
15
16
|
FreshnessAssertion,
|
|
16
17
|
SmartColumnMetricAssertion,
|
|
18
|
+
SqlAssertion,
|
|
17
19
|
# TODO: Add other assertion types here as we add them.
|
|
18
20
|
]
|
|
@@ -142,6 +142,40 @@ class _DatasetProfile(AbstractDetectionMechanism):
|
|
|
142
142
|
type: Literal["dataset_profile"] = "dataset_profile"
|
|
143
143
|
|
|
144
144
|
|
|
145
|
+
# Operators that require a single value numeric parameter
|
|
146
|
+
SINGLE_VALUE_NUMERIC_OPERATORS = [
|
|
147
|
+
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
148
|
+
models.AssertionStdOperatorClass.NOT_EQUAL_TO,
|
|
149
|
+
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
150
|
+
models.AssertionStdOperatorClass.LESS_THAN,
|
|
151
|
+
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
152
|
+
models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
# Operators that require a single value parameter
|
|
156
|
+
SINGLE_VALUE_OPERATORS = [
|
|
157
|
+
models.AssertionStdOperatorClass.CONTAIN,
|
|
158
|
+
models.AssertionStdOperatorClass.END_WITH,
|
|
159
|
+
models.AssertionStdOperatorClass.START_WITH,
|
|
160
|
+
models.AssertionStdOperatorClass.REGEX_MATCH,
|
|
161
|
+
models.AssertionStdOperatorClass.IN,
|
|
162
|
+
models.AssertionStdOperatorClass.NOT_IN,
|
|
163
|
+
] + SINGLE_VALUE_NUMERIC_OPERATORS
|
|
164
|
+
|
|
165
|
+
# Operators that require a numeric range parameter
|
|
166
|
+
RANGE_OPERATORS = [
|
|
167
|
+
models.AssertionStdOperatorClass.BETWEEN,
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
# Operators that require no parameters
|
|
171
|
+
NO_PARAMETER_OPERATORS = [
|
|
172
|
+
models.AssertionStdOperatorClass.NULL,
|
|
173
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
174
|
+
models.AssertionStdOperatorClass.IS_TRUE,
|
|
175
|
+
models.AssertionStdOperatorClass.IS_FALSE,
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
|
|
145
179
|
# Keep these two lists in sync:
|
|
146
180
|
_DETECTION_MECHANISM_CONCRETE_TYPES = (
|
|
147
181
|
_InformationSchema,
|
|
@@ -1096,15 +1130,12 @@ class _AssertionInput(ABC):
|
|
|
1096
1130
|
Returns:
|
|
1097
1131
|
A Monitor entity configured with the assertion input parameters.
|
|
1098
1132
|
"""
|
|
1099
|
-
source_type, field = self._convert_assertion_source_type_and_field()
|
|
1100
1133
|
return Monitor(
|
|
1101
1134
|
id=(self.dataset_urn, assertion_urn),
|
|
1102
1135
|
info=self._create_monitor_info(
|
|
1103
1136
|
assertion_urn=assertion_urn,
|
|
1104
1137
|
status=self._convert_monitor_status(),
|
|
1105
1138
|
schedule=self._convert_schedule(),
|
|
1106
|
-
source_type=source_type,
|
|
1107
|
-
field=field,
|
|
1108
1139
|
),
|
|
1109
1140
|
)
|
|
1110
1141
|
|
|
@@ -1182,8 +1213,6 @@ class _AssertionInput(ABC):
|
|
|
1182
1213
|
assertion_urn: AssertionUrn,
|
|
1183
1214
|
status: models.MonitorStatusClass,
|
|
1184
1215
|
schedule: models.CronScheduleClass,
|
|
1185
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1186
|
-
field: Optional[FieldSpecType],
|
|
1187
1216
|
) -> models.MonitorInfoClass:
|
|
1188
1217
|
"""
|
|
1189
1218
|
Create a MonitorInfoClass with all the necessary components.
|
|
@@ -1191,8 +1220,6 @@ class _AssertionInput(ABC):
|
|
|
1191
1220
|
Args:
|
|
1192
1221
|
status: The monitor status.
|
|
1193
1222
|
schedule: The monitor schedule.
|
|
1194
|
-
source_type: The source type.
|
|
1195
|
-
field: Optional field specification.
|
|
1196
1223
|
Returns:
|
|
1197
1224
|
A MonitorInfoClass configured with all the provided components.
|
|
1198
1225
|
"""
|
|
@@ -1432,12 +1459,11 @@ class _SmartFreshnessAssertionInput(
|
|
|
1432
1459
|
assertion_urn: AssertionUrn,
|
|
1433
1460
|
status: models.MonitorStatusClass,
|
|
1434
1461
|
schedule: models.CronScheduleClass,
|
|
1435
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1436
|
-
field: Optional[FieldSpecType],
|
|
1437
1462
|
) -> models.MonitorInfoClass:
|
|
1438
1463
|
"""
|
|
1439
1464
|
Create a MonitorInfoClass with all the necessary components.
|
|
1440
1465
|
"""
|
|
1466
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
1441
1467
|
return models.MonitorInfoClass(
|
|
1442
1468
|
type=models.MonitorTypeClass.ASSERTION,
|
|
1443
1469
|
status=status,
|
|
@@ -1591,12 +1617,11 @@ class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
|
|
|
1591
1617
|
assertion_urn: AssertionUrn,
|
|
1592
1618
|
status: models.MonitorStatusClass,
|
|
1593
1619
|
schedule: models.CronScheduleClass,
|
|
1594
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1595
|
-
field: Optional[FieldSpecType],
|
|
1596
1620
|
) -> models.MonitorInfoClass:
|
|
1597
1621
|
"""
|
|
1598
1622
|
Create a MonitorInfoClass with all the necessary components.
|
|
1599
1623
|
"""
|
|
1624
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
1600
1625
|
return models.MonitorInfoClass(
|
|
1601
1626
|
type=models.MonitorTypeClass.ASSERTION,
|
|
1602
1627
|
status=status,
|
|
@@ -112,12 +112,11 @@ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
|
|
|
112
112
|
assertion_urn: AssertionUrn,
|
|
113
113
|
status: models.MonitorStatusClass,
|
|
114
114
|
schedule: models.CronScheduleClass,
|
|
115
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
116
|
-
field: Optional[FieldSpecType],
|
|
117
115
|
) -> models.MonitorInfoClass:
|
|
118
116
|
"""
|
|
119
117
|
Create a MonitorInfoClass with all the necessary components.
|
|
120
118
|
"""
|
|
119
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
121
120
|
return models.MonitorInfoClass(
|
|
122
121
|
type=models.MonitorTypeClass.ASSERTION,
|
|
123
122
|
status=status,
|
|
@@ -5,6 +5,9 @@ from typing import Optional, Union
|
|
|
5
5
|
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
6
6
|
DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
|
|
7
7
|
HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
|
|
8
|
+
NO_PARAMETER_OPERATORS,
|
|
9
|
+
RANGE_OPERATORS,
|
|
10
|
+
SINGLE_VALUE_OPERATORS,
|
|
8
11
|
AssertionIncidentBehavior,
|
|
9
12
|
AssertionInfoInputType,
|
|
10
13
|
DetectionMechanismInputTypes,
|
|
@@ -85,35 +88,6 @@ FIELD_VALUES_OPERATOR_CONFIG = {
|
|
|
85
88
|
],
|
|
86
89
|
}
|
|
87
90
|
|
|
88
|
-
# Operators that require a single value parameter
|
|
89
|
-
SINGLE_VALUE_OPERATORS = [
|
|
90
|
-
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
91
|
-
models.AssertionStdOperatorClass.NOT_EQUAL_TO,
|
|
92
|
-
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
93
|
-
models.AssertionStdOperatorClass.LESS_THAN,
|
|
94
|
-
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
95
|
-
models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
|
|
96
|
-
models.AssertionStdOperatorClass.CONTAIN,
|
|
97
|
-
models.AssertionStdOperatorClass.END_WITH,
|
|
98
|
-
models.AssertionStdOperatorClass.START_WITH,
|
|
99
|
-
models.AssertionStdOperatorClass.REGEX_MATCH,
|
|
100
|
-
models.AssertionStdOperatorClass.IN,
|
|
101
|
-
models.AssertionStdOperatorClass.NOT_IN,
|
|
102
|
-
]
|
|
103
|
-
|
|
104
|
-
# Operators that require a range parameter
|
|
105
|
-
RANGE_OPERATORS = [
|
|
106
|
-
models.AssertionStdOperatorClass.BETWEEN,
|
|
107
|
-
]
|
|
108
|
-
|
|
109
|
-
# Operators that require no parameters
|
|
110
|
-
NO_PARAMETER_OPERATORS = [
|
|
111
|
-
models.AssertionStdOperatorClass.NULL,
|
|
112
|
-
models.AssertionStdOperatorClass.NOT_NULL,
|
|
113
|
-
models.AssertionStdOperatorClass.IS_TRUE,
|
|
114
|
-
models.AssertionStdOperatorClass.IS_FALSE,
|
|
115
|
-
]
|
|
116
|
-
|
|
117
91
|
# Keep this in sync with FIELD_METRIC_TYPE_CONFIG in the frontend
|
|
118
92
|
# datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
|
|
119
93
|
FIELD_METRIC_TYPE_CONFIG = {
|
|
@@ -442,12 +416,11 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
|
|
|
442
416
|
assertion_urn: AssertionUrn,
|
|
443
417
|
status: models.MonitorStatusClass,
|
|
444
418
|
schedule: models.CronScheduleClass,
|
|
445
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
446
|
-
field: Optional[FieldSpecType],
|
|
447
419
|
) -> models.MonitorInfoClass:
|
|
448
420
|
"""
|
|
449
421
|
Create a MonitorInfoClass with all the necessary components.
|
|
450
422
|
"""
|
|
423
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
451
424
|
return models.MonitorInfoClass(
|
|
452
425
|
type=models.MonitorTypeClass.ASSERTION,
|
|
453
426
|
status=status,
|