acryl-datahub-cloud 0.3.12rc4__py3-none-any.whl → 0.3.12rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/sdk/__init__.py +11 -1
- acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +565 -166
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +46 -14
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +1 -2
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +12 -43
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +274 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1202 -27
- acryl_datahub_cloud/sdk/entities/assertion.py +4 -0
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc6.dist-info}/METADATA +44 -44
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc6.dist-info}/RECORD +18 -13
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc6.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc6.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc4.dist-info → acryl_datahub_cloud-0.3.12rc6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
8
|
+
AssertionMode,
|
|
9
|
+
_AssertionPublic,
|
|
10
|
+
_HasColumnMetricFunctionality,
|
|
11
|
+
_HasSchedule,
|
|
12
|
+
_HasSmartFunctionality,
|
|
13
|
+
)
|
|
14
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
15
|
+
ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
16
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
17
|
+
DEFAULT_SCHEDULE,
|
|
18
|
+
DEFAULT_SENSITIVITY,
|
|
19
|
+
AssertionIncidentBehavior,
|
|
20
|
+
DetectionMechanism,
|
|
21
|
+
ExclusionWindowTypes,
|
|
22
|
+
InferenceSensitivity,
|
|
23
|
+
_DetectionMechanismTypes,
|
|
24
|
+
)
|
|
25
|
+
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
26
|
+
MetricInputType,
|
|
27
|
+
OperatorInputType,
|
|
28
|
+
RangeInputType,
|
|
29
|
+
RangeTypeInputType,
|
|
30
|
+
ValueInputType,
|
|
31
|
+
ValueTypeInputType,
|
|
32
|
+
)
|
|
33
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
34
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
35
|
+
from datahub.metadata import schema_classes as models
|
|
36
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SmartColumnMetricAssertion(
|
|
42
|
+
_HasColumnMetricFunctionality,
|
|
43
|
+
_HasSmartFunctionality,
|
|
44
|
+
_HasSchedule,
|
|
45
|
+
_AssertionPublic,
|
|
46
|
+
):
|
|
47
|
+
"""
|
|
48
|
+
A class that represents a smart column metric assertion.
|
|
49
|
+
This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
|
|
50
|
+
min, max, median, and more. It uses AI to infer the assertion parameters.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
*,
|
|
56
|
+
urn: AssertionUrn,
|
|
57
|
+
dataset_urn: DatasetUrn,
|
|
58
|
+
column_name: str,
|
|
59
|
+
metric_type: MetricInputType,
|
|
60
|
+
operator: OperatorInputType,
|
|
61
|
+
# Depending on the operator, value, range (and corresponding type) or no parameters are required:
|
|
62
|
+
value: Optional[ValueInputType] = None,
|
|
63
|
+
value_type: Optional[ValueTypeInputType] = None,
|
|
64
|
+
range: Optional[RangeInputType] = None,
|
|
65
|
+
range_type: Optional[RangeTypeInputType] = None,
|
|
66
|
+
# TODO: Evaluate these params:
|
|
67
|
+
display_name: str,
|
|
68
|
+
mode: AssertionMode,
|
|
69
|
+
schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
|
|
70
|
+
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
71
|
+
exclusion_windows: list[ExclusionWindowTypes],
|
|
72
|
+
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
73
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
74
|
+
detection_mechanism: Optional[
|
|
75
|
+
_DetectionMechanismTypes
|
|
76
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
77
|
+
tags: list[TagUrn],
|
|
78
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
79
|
+
created_at: Union[datetime, None] = None,
|
|
80
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
81
|
+
updated_at: Optional[datetime] = None,
|
|
82
|
+
):
|
|
83
|
+
"""
|
|
84
|
+
Initialize a smart column metric assertion.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
urn: The URN of the assertion.
|
|
88
|
+
dataset_urn: The URN of the dataset to monitor.
|
|
89
|
+
display_name: The display name of the assertion.
|
|
90
|
+
mode: The mode of the assertion (active/inactive).
|
|
91
|
+
sensitivity: The sensitivity of the assertion (low/medium/high).
|
|
92
|
+
exclusion_windows: The exclusion windows to apply to the assertion.
|
|
93
|
+
training_data_lookback_days: The number of days of data to use for training.
|
|
94
|
+
incident_behavior: The behavior when incidents occur.
|
|
95
|
+
detection_mechanism: The mechanism used to detect changes.
|
|
96
|
+
tags: The tags to apply to the assertion.
|
|
97
|
+
created_by: The URN of the user who created the assertion.
|
|
98
|
+
created_at: The timestamp when the assertion was created.
|
|
99
|
+
updated_by: The URN of the user who last updated the assertion.
|
|
100
|
+
updated_at: The timestamp when the assertion was last updated.
|
|
101
|
+
"""
|
|
102
|
+
_AssertionPublic.__init__(
|
|
103
|
+
self,
|
|
104
|
+
urn=urn,
|
|
105
|
+
dataset_urn=dataset_urn,
|
|
106
|
+
display_name=display_name,
|
|
107
|
+
mode=mode,
|
|
108
|
+
tags=tags,
|
|
109
|
+
incident_behavior=incident_behavior,
|
|
110
|
+
detection_mechanism=detection_mechanism,
|
|
111
|
+
created_by=created_by,
|
|
112
|
+
created_at=created_at,
|
|
113
|
+
updated_by=updated_by,
|
|
114
|
+
updated_at=updated_at,
|
|
115
|
+
)
|
|
116
|
+
_HasSmartFunctionality.__init__(
|
|
117
|
+
self,
|
|
118
|
+
sensitivity=sensitivity,
|
|
119
|
+
exclusion_windows=exclusion_windows,
|
|
120
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
121
|
+
)
|
|
122
|
+
_HasSchedule.__init__(
|
|
123
|
+
self,
|
|
124
|
+
schedule=schedule,
|
|
125
|
+
)
|
|
126
|
+
_HasColumnMetricFunctionality.__init__(
|
|
127
|
+
self,
|
|
128
|
+
column_name=column_name,
|
|
129
|
+
metric_type=metric_type,
|
|
130
|
+
operator=operator,
|
|
131
|
+
value=value,
|
|
132
|
+
value_type=value_type,
|
|
133
|
+
range=range,
|
|
134
|
+
range_type=range_type,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
139
|
+
"""
|
|
140
|
+
Create a SmartColumnMetricAssertion from an Assertion and Monitor entity.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
assertion: The Assertion entity.
|
|
144
|
+
monitor: The Monitor entity.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
A SmartColumnMetricAssertion instance.
|
|
148
|
+
"""
|
|
149
|
+
return cls(
|
|
150
|
+
urn=assertion.urn,
|
|
151
|
+
dataset_urn=assertion.dataset,
|
|
152
|
+
column_name=cls._get_column_name(assertion),
|
|
153
|
+
metric_type=cls._get_metric_type(assertion),
|
|
154
|
+
operator=cls._get_operator(assertion),
|
|
155
|
+
value=cls._get_value(assertion),
|
|
156
|
+
value_type=cls._get_value_type(assertion),
|
|
157
|
+
range=cls._get_range(assertion),
|
|
158
|
+
range_type=cls._get_range_type(assertion),
|
|
159
|
+
display_name=assertion.description or "",
|
|
160
|
+
mode=cls._get_mode(monitor),
|
|
161
|
+
schedule=cls._get_schedule(monitor),
|
|
162
|
+
sensitivity=cls._get_sensitivity(monitor),
|
|
163
|
+
exclusion_windows=cls._get_exclusion_windows(monitor),
|
|
164
|
+
training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
|
|
165
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
166
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
167
|
+
tags=cls._get_tags(assertion),
|
|
168
|
+
created_by=cls._get_created_by(assertion),
|
|
169
|
+
created_at=cls._get_created_at(assertion),
|
|
170
|
+
updated_by=cls._get_updated_by(assertion),
|
|
171
|
+
updated_at=cls._get_updated_at(assertion),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def _get_detection_mechanism(
|
|
176
|
+
assertion: Assertion,
|
|
177
|
+
monitor: Monitor,
|
|
178
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
179
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
180
|
+
"""Get the detection mechanism for column metric assertions."""
|
|
181
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
182
|
+
monitor,
|
|
183
|
+
assertion,
|
|
184
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
185
|
+
models.FieldAssertionInfoClass,
|
|
186
|
+
default,
|
|
187
|
+
)
|
|
188
|
+
if parameters is None:
|
|
189
|
+
return default
|
|
190
|
+
if parameters.datasetFieldParameters is None:
|
|
191
|
+
logger.warning(
|
|
192
|
+
f"Monitor does not have datasetFieldParameters, defaulting detection mechanism to {default}"
|
|
193
|
+
)
|
|
194
|
+
return default
|
|
195
|
+
source_type = parameters.datasetFieldParameters.sourceType
|
|
196
|
+
if source_type == models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY:
|
|
197
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
198
|
+
return DetectionMechanism.ALL_ROWS_QUERY(
|
|
199
|
+
additional_filter=additional_filter
|
|
200
|
+
)
|
|
201
|
+
elif (
|
|
202
|
+
source_type
|
|
203
|
+
== models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
|
|
204
|
+
):
|
|
205
|
+
if parameters.datasetFieldParameters.changedRowsField is None:
|
|
206
|
+
logger.warning(
|
|
207
|
+
f"Monitor has CHANGED_ROWS_QUERY source type but no changedRowsField, defaulting detection mechanism to {default}"
|
|
208
|
+
)
|
|
209
|
+
return default
|
|
210
|
+
column_name = parameters.datasetFieldParameters.changedRowsField.path
|
|
211
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
212
|
+
return DetectionMechanism.CHANGED_ROWS_QUERY(
|
|
213
|
+
column_name=column_name, additional_filter=additional_filter
|
|
214
|
+
)
|
|
215
|
+
elif (
|
|
216
|
+
source_type
|
|
217
|
+
== models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
218
|
+
):
|
|
219
|
+
return DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
220
|
+
else:
|
|
221
|
+
logger.warning(
|
|
222
|
+
f"Unsupported DatasetFieldAssertionSourceType {source_type}, defaulting detection mechanism to {default}"
|
|
223
|
+
)
|
|
224
|
+
return default
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
4
|
+
FreshnessAssertion,
|
|
5
|
+
SmartFreshnessAssertion,
|
|
6
|
+
SmartVolumeAssertion,
|
|
7
|
+
SqlAssertion,
|
|
8
|
+
)
|
|
9
|
+
from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
10
|
+
SmartColumnMetricAssertion,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
AssertionTypes = Union[
|
|
14
|
+
SmartFreshnessAssertion,
|
|
15
|
+
SmartVolumeAssertion,
|
|
16
|
+
FreshnessAssertion,
|
|
17
|
+
SmartColumnMetricAssertion,
|
|
18
|
+
SqlAssertion,
|
|
19
|
+
# TODO: Add other assertion types here as we add them.
|
|
20
|
+
]
|
|
@@ -44,7 +44,7 @@ DEFAULT_NAME_PREFIX = "New Assertion"
|
|
|
44
44
|
DEFAULT_NAME_SUFFIX_LENGTH = 8
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
DEFAULT_HOURLY_SCHEDULE = models.CronScheduleClass(
|
|
47
|
+
DEFAULT_HOURLY_SCHEDULE: models.CronScheduleClass = models.CronScheduleClass(
|
|
48
48
|
cron="0 * * * *", # Every hour, matches the UI default
|
|
49
49
|
timezone=str(
|
|
50
50
|
tzlocal.get_localzone()
|
|
@@ -59,6 +59,13 @@ DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
|
|
|
59
59
|
), # User local timezone, matches the UI default
|
|
60
60
|
)
|
|
61
61
|
|
|
62
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE = models.CronScheduleClass(
|
|
63
|
+
cron="0 */6 * * *", # Every 6 hours, matches the UI default
|
|
64
|
+
timezone=str(
|
|
65
|
+
tzlocal.get_localzone()
|
|
66
|
+
), # User local timezone, matches the UI default
|
|
67
|
+
)
|
|
68
|
+
|
|
62
69
|
|
|
63
70
|
class AbstractDetectionMechanism(BaseModel, ABC):
|
|
64
71
|
type: str
|
|
@@ -135,6 +142,40 @@ class _DatasetProfile(AbstractDetectionMechanism):
|
|
|
135
142
|
type: Literal["dataset_profile"] = "dataset_profile"
|
|
136
143
|
|
|
137
144
|
|
|
145
|
+
# Operators that require a single value numeric parameter
|
|
146
|
+
SINGLE_VALUE_NUMERIC_OPERATORS = [
|
|
147
|
+
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
148
|
+
models.AssertionStdOperatorClass.NOT_EQUAL_TO,
|
|
149
|
+
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
150
|
+
models.AssertionStdOperatorClass.LESS_THAN,
|
|
151
|
+
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
152
|
+
models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
# Operators that require a single value parameter
|
|
156
|
+
SINGLE_VALUE_OPERATORS = [
|
|
157
|
+
models.AssertionStdOperatorClass.CONTAIN,
|
|
158
|
+
models.AssertionStdOperatorClass.END_WITH,
|
|
159
|
+
models.AssertionStdOperatorClass.START_WITH,
|
|
160
|
+
models.AssertionStdOperatorClass.REGEX_MATCH,
|
|
161
|
+
models.AssertionStdOperatorClass.IN,
|
|
162
|
+
models.AssertionStdOperatorClass.NOT_IN,
|
|
163
|
+
] + SINGLE_VALUE_NUMERIC_OPERATORS
|
|
164
|
+
|
|
165
|
+
# Operators that require a numeric range parameter
|
|
166
|
+
RANGE_OPERATORS = [
|
|
167
|
+
models.AssertionStdOperatorClass.BETWEEN,
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
# Operators that require no parameters
|
|
171
|
+
NO_PARAMETER_OPERATORS = [
|
|
172
|
+
models.AssertionStdOperatorClass.NULL,
|
|
173
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
174
|
+
models.AssertionStdOperatorClass.IS_TRUE,
|
|
175
|
+
models.AssertionStdOperatorClass.IS_FALSE,
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
|
|
138
179
|
# Keep these two lists in sync:
|
|
139
180
|
_DETECTION_MECHANISM_CONCRETE_TYPES = (
|
|
140
181
|
_InformationSchema,
|
|
@@ -181,7 +222,7 @@ class DetectionMechanism:
|
|
|
181
222
|
HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
|
|
182
223
|
DATAHUB_OPERATION = _DataHubOperation()
|
|
183
224
|
QUERY = _Query
|
|
184
|
-
ALL_ROWS_QUERY = _AllRowsQuery
|
|
225
|
+
ALL_ROWS_QUERY = _AllRowsQuery
|
|
185
226
|
CHANGED_ROWS_QUERY = _ChangedRowsQuery
|
|
186
227
|
ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
|
|
187
228
|
DATASET_PROFILE = _DatasetProfile()
|
|
@@ -710,7 +751,7 @@ def _try_parse_and_validate_schema_classes_enum(
|
|
|
710
751
|
if isinstance(value, enum_class):
|
|
711
752
|
return value
|
|
712
753
|
assert isinstance(value, str)
|
|
713
|
-
if value not in get_enum_options(enum_class):
|
|
754
|
+
if value.upper() not in get_enum_options(enum_class):
|
|
714
755
|
raise SDKUsageError(
|
|
715
756
|
f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
|
|
716
757
|
)
|
|
@@ -1089,15 +1130,12 @@ class _AssertionInput(ABC):
|
|
|
1089
1130
|
Returns:
|
|
1090
1131
|
A Monitor entity configured with the assertion input parameters.
|
|
1091
1132
|
"""
|
|
1092
|
-
source_type, field = self._convert_assertion_source_type_and_field()
|
|
1093
1133
|
return Monitor(
|
|
1094
1134
|
id=(self.dataset_urn, assertion_urn),
|
|
1095
1135
|
info=self._create_monitor_info(
|
|
1096
1136
|
assertion_urn=assertion_urn,
|
|
1097
1137
|
status=self._convert_monitor_status(),
|
|
1098
1138
|
schedule=self._convert_schedule(),
|
|
1099
|
-
source_type=source_type,
|
|
1100
|
-
field=field,
|
|
1101
1139
|
),
|
|
1102
1140
|
)
|
|
1103
1141
|
|
|
@@ -1175,8 +1213,6 @@ class _AssertionInput(ABC):
|
|
|
1175
1213
|
assertion_urn: AssertionUrn,
|
|
1176
1214
|
status: models.MonitorStatusClass,
|
|
1177
1215
|
schedule: models.CronScheduleClass,
|
|
1178
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1179
|
-
field: Optional[FieldSpecType],
|
|
1180
1216
|
) -> models.MonitorInfoClass:
|
|
1181
1217
|
"""
|
|
1182
1218
|
Create a MonitorInfoClass with all the necessary components.
|
|
@@ -1184,8 +1220,6 @@ class _AssertionInput(ABC):
|
|
|
1184
1220
|
Args:
|
|
1185
1221
|
status: The monitor status.
|
|
1186
1222
|
schedule: The monitor schedule.
|
|
1187
|
-
source_type: The source type.
|
|
1188
|
-
field: Optional field specification.
|
|
1189
1223
|
Returns:
|
|
1190
1224
|
A MonitorInfoClass configured with all the provided components.
|
|
1191
1225
|
"""
|
|
@@ -1425,12 +1459,11 @@ class _SmartFreshnessAssertionInput(
|
|
|
1425
1459
|
assertion_urn: AssertionUrn,
|
|
1426
1460
|
status: models.MonitorStatusClass,
|
|
1427
1461
|
schedule: models.CronScheduleClass,
|
|
1428
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1429
|
-
field: Optional[FieldSpecType],
|
|
1430
1462
|
) -> models.MonitorInfoClass:
|
|
1431
1463
|
"""
|
|
1432
1464
|
Create a MonitorInfoClass with all the necessary components.
|
|
1433
1465
|
"""
|
|
1466
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
1434
1467
|
return models.MonitorInfoClass(
|
|
1435
1468
|
type=models.MonitorTypeClass.ASSERTION,
|
|
1436
1469
|
status=status,
|
|
@@ -1584,12 +1617,11 @@ class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
|
|
|
1584
1617
|
assertion_urn: AssertionUrn,
|
|
1585
1618
|
status: models.MonitorStatusClass,
|
|
1586
1619
|
schedule: models.CronScheduleClass,
|
|
1587
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1588
|
-
field: Optional[FieldSpecType],
|
|
1589
1620
|
) -> models.MonitorInfoClass:
|
|
1590
1621
|
"""
|
|
1591
1622
|
Create a MonitorInfoClass with all the necessary components.
|
|
1592
1623
|
"""
|
|
1624
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
1593
1625
|
return models.MonitorInfoClass(
|
|
1594
1626
|
type=models.MonitorTypeClass.ASSERTION,
|
|
1595
1627
|
status=status,
|
|
@@ -112,12 +112,11 @@ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
|
|
|
112
112
|
assertion_urn: AssertionUrn,
|
|
113
113
|
status: models.MonitorStatusClass,
|
|
114
114
|
schedule: models.CronScheduleClass,
|
|
115
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
116
|
-
field: Optional[FieldSpecType],
|
|
117
115
|
) -> models.MonitorInfoClass:
|
|
118
116
|
"""
|
|
119
117
|
Create a MonitorInfoClass with all the necessary components.
|
|
120
118
|
"""
|
|
119
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
121
120
|
return models.MonitorInfoClass(
|
|
122
121
|
type=models.MonitorTypeClass.ASSERTION,
|
|
123
122
|
status=status,
|
|
@@ -3,11 +3,13 @@ from datetime import datetime
|
|
|
3
3
|
from typing import Optional, Union
|
|
4
4
|
|
|
5
5
|
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
6
|
-
|
|
6
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
|
|
7
7
|
HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
|
|
8
|
+
NO_PARAMETER_OPERATORS,
|
|
9
|
+
RANGE_OPERATORS,
|
|
10
|
+
SINGLE_VALUE_OPERATORS,
|
|
8
11
|
AssertionIncidentBehavior,
|
|
9
12
|
AssertionInfoInputType,
|
|
10
|
-
DetectionMechanism,
|
|
11
13
|
DetectionMechanismInputTypes,
|
|
12
14
|
ExclusionWindowInputTypes,
|
|
13
15
|
FieldSpecType,
|
|
@@ -86,35 +88,6 @@ FIELD_VALUES_OPERATOR_CONFIG = {
|
|
|
86
88
|
],
|
|
87
89
|
}
|
|
88
90
|
|
|
89
|
-
# Operators that require a single value parameter
|
|
90
|
-
SINGLE_VALUE_OPERATORS = [
|
|
91
|
-
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
92
|
-
models.AssertionStdOperatorClass.NOT_EQUAL_TO,
|
|
93
|
-
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
94
|
-
models.AssertionStdOperatorClass.LESS_THAN,
|
|
95
|
-
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
96
|
-
models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
|
|
97
|
-
models.AssertionStdOperatorClass.CONTAIN,
|
|
98
|
-
models.AssertionStdOperatorClass.END_WITH,
|
|
99
|
-
models.AssertionStdOperatorClass.START_WITH,
|
|
100
|
-
models.AssertionStdOperatorClass.REGEX_MATCH,
|
|
101
|
-
models.AssertionStdOperatorClass.IN,
|
|
102
|
-
models.AssertionStdOperatorClass.NOT_IN,
|
|
103
|
-
]
|
|
104
|
-
|
|
105
|
-
# Operators that require a range parameter
|
|
106
|
-
RANGE_OPERATORS = [
|
|
107
|
-
models.AssertionStdOperatorClass.BETWEEN,
|
|
108
|
-
]
|
|
109
|
-
|
|
110
|
-
# Operators that require no parameters
|
|
111
|
-
NO_PARAMETER_OPERATORS = [
|
|
112
|
-
models.AssertionStdOperatorClass.NULL,
|
|
113
|
-
models.AssertionStdOperatorClass.NOT_NULL,
|
|
114
|
-
models.AssertionStdOperatorClass.IS_TRUE,
|
|
115
|
-
models.AssertionStdOperatorClass.IS_FALSE,
|
|
116
|
-
]
|
|
117
|
-
|
|
118
91
|
# Keep this in sync with FIELD_METRIC_TYPE_CONFIG in the frontend
|
|
119
92
|
# datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
|
|
120
93
|
FIELD_METRIC_TYPE_CONFIG = {
|
|
@@ -183,8 +156,8 @@ RangeTypeInputType = Union[
|
|
|
183
156
|
RangeTypeParsedType = tuple[ValueTypeInputType, ValueTypeInputType]
|
|
184
157
|
OperatorInputType = Union[str, models.AssertionStdOperatorClass]
|
|
185
158
|
|
|
186
|
-
DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION = (
|
|
187
|
-
|
|
159
|
+
DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION: _AllRowsQuery = (
|
|
160
|
+
_AllRowsQuery()
|
|
188
161
|
)
|
|
189
162
|
|
|
190
163
|
|
|
@@ -443,12 +416,11 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
|
|
|
443
416
|
assertion_urn: AssertionUrn,
|
|
444
417
|
status: models.MonitorStatusClass,
|
|
445
418
|
schedule: models.CronScheduleClass,
|
|
446
|
-
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
447
|
-
field: Optional[FieldSpecType],
|
|
448
419
|
) -> models.MonitorInfoClass:
|
|
449
420
|
"""
|
|
450
421
|
Create a MonitorInfoClass with all the necessary components.
|
|
451
422
|
"""
|
|
423
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
452
424
|
return models.MonitorInfoClass(
|
|
453
425
|
type=models.MonitorTypeClass.ASSERTION,
|
|
454
426
|
status=status,
|
|
@@ -512,7 +484,7 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
|
|
|
512
484
|
A CronScheduleClass with appropriate schedule settings.
|
|
513
485
|
"""
|
|
514
486
|
if self.schedule is None:
|
|
515
|
-
return
|
|
487
|
+
return DEFAULT_EVERY_SIX_HOURS_SCHEDULE
|
|
516
488
|
|
|
517
489
|
return models.CronScheduleClass(
|
|
518
490
|
cron=self.schedule.cron,
|
|
@@ -815,13 +787,10 @@ def _try_parse_and_validate_value_type(
|
|
|
815
787
|
) -> models.AssertionStdParameterTypeClass:
|
|
816
788
|
if value_type is None:
|
|
817
789
|
raise SDKUsageError("Value type is required")
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
f"Invalid value type: {value_type}, valid options are {get_enum_options(models.AssertionStdParameterTypeClass)}"
|
|
823
|
-
)
|
|
824
|
-
return getattr(models.AssertionStdParameterTypeClass, value_type)
|
|
790
|
+
|
|
791
|
+
return _try_parse_and_validate_schema_classes_enum(
|
|
792
|
+
value_type, models.AssertionStdParameterTypeClass
|
|
793
|
+
)
|
|
825
794
|
|
|
826
795
|
|
|
827
796
|
def _try_parse_and_validate_value(
|