acryl-datahub-cloud 0.3.11.1rc8__py3-none-any.whl → 0.3.12rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/_sdk_extras/__init__.py +17 -2
- acryl_datahub_cloud/_sdk_extras/assertion.py +603 -8
- acryl_datahub_cloud/_sdk_extras/assertion_input.py +1074 -0
- acryl_datahub_cloud/_sdk_extras/assertions_client.py +705 -11
- acryl_datahub_cloud/_sdk_extras/entities/__init__.py +0 -0
- acryl_datahub_cloud/_sdk_extras/entities/assertion.py +425 -0
- acryl_datahub_cloud/_sdk_extras/entities/monitor.py +291 -0
- acryl_datahub_cloud/_sdk_extras/entities/subscription.py +84 -0
- acryl_datahub_cloud/_sdk_extras/errors.py +34 -0
- acryl_datahub_cloud/_sdk_extras/resolver_client.py +39 -0
- acryl_datahub_cloud/_sdk_extras/subscription_client.py +565 -0
- acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2023 -2023
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
- acryl_datahub_cloud/metadata/schema.avsc +24889 -25252
- acryl_datahub_cloud/metadata/schema_classes.py +1133 -1008
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +189 -201
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +9 -1
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +27 -0
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +12 -4
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/METADATA +46 -46
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/RECORD +37 -28
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,19 @@
|
|
|
1
|
-
from acryl_datahub_cloud._sdk_extras.assertion import
|
|
1
|
+
from acryl_datahub_cloud._sdk_extras.assertion import SmartFreshnessAssertion
|
|
2
|
+
from acryl_datahub_cloud._sdk_extras.assertion_input import (
|
|
3
|
+
AssertionIncidentBehavior,
|
|
4
|
+
DetectionMechanism,
|
|
5
|
+
FixedRangeExclusionWindow,
|
|
6
|
+
InferenceSensitivity,
|
|
7
|
+
)
|
|
2
8
|
from acryl_datahub_cloud._sdk_extras.assertions_client import AssertionsClient
|
|
9
|
+
from acryl_datahub_cloud._sdk_extras.resolver_client import ResolverClient
|
|
3
10
|
|
|
4
|
-
__all__ = [
|
|
11
|
+
__all__ = [
|
|
12
|
+
"SmartFreshnessAssertion",
|
|
13
|
+
"DetectionMechanism",
|
|
14
|
+
"InferenceSensitivity",
|
|
15
|
+
"FixedRangeExclusionWindow",
|
|
16
|
+
"AssertionIncidentBehavior",
|
|
17
|
+
"AssertionsClient",
|
|
18
|
+
"ResolverClient",
|
|
19
|
+
]
|
|
@@ -1,15 +1,610 @@
|
|
|
1
1
|
"""
|
|
2
|
-
This
|
|
2
|
+
This module contains the classes that represent assertions. These
|
|
3
|
+
classes are used to provide a user-friendly interface for creating and
|
|
4
|
+
managing assertions.
|
|
3
5
|
|
|
4
|
-
The Assertion
|
|
6
|
+
The actual Assertion Entity classes are defined in `metadata-ingestion/src/datahub/sdk`.
|
|
5
7
|
"""
|
|
6
8
|
|
|
7
|
-
|
|
9
|
+
import logging
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import Any, Optional, Union
|
|
8
14
|
|
|
9
|
-
from
|
|
15
|
+
from typing_extensions import Self
|
|
10
16
|
|
|
17
|
+
from acryl_datahub_cloud._sdk_extras.assertion_input import (
|
|
18
|
+
ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
19
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
20
|
+
DEFAULT_SENSITIVITY,
|
|
21
|
+
AssertionIncidentBehavior,
|
|
22
|
+
DetectionMechanism,
|
|
23
|
+
ExclusionWindowTypes,
|
|
24
|
+
FixedRangeExclusionWindow,
|
|
25
|
+
InferenceSensitivity,
|
|
26
|
+
_DetectionMechanismTypes,
|
|
27
|
+
)
|
|
28
|
+
from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion
|
|
29
|
+
from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
|
|
30
|
+
from acryl_datahub_cloud._sdk_extras.errors import SDKNotYetSupportedError
|
|
31
|
+
from datahub.emitter.mce_builder import parse_ts_millis
|
|
32
|
+
from datahub.metadata import schema_classes as models
|
|
33
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
34
|
+
from datahub.sdk.entity import Entity
|
|
11
35
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AssertionMode(Enum):
|
|
40
|
+
"""
|
|
41
|
+
The mode of an assertion, e.g. whether it is active or inactive.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
# Note: Modeled here after MonitorStatus but called AssertionMode in this user facing interface
|
|
45
|
+
# to keep all naming related to assertions.
|
|
46
|
+
ACTIVE = "ACTIVE"
|
|
47
|
+
INACTIVE = "INACTIVE"
|
|
48
|
+
# PASSIVE = "PASSIVE" # Not supported in the user facing interface.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_nested_field_for_entity_with_default(
|
|
52
|
+
entity: Entity,
|
|
53
|
+
field_path: str,
|
|
54
|
+
default: Any = None,
|
|
55
|
+
) -> Any:
|
|
56
|
+
"""
|
|
57
|
+
Get a nested field from an Entity object, and warn and return default if not found.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
entity: The entity to get the nested field from.
|
|
61
|
+
field_path: The path to the nested field.
|
|
62
|
+
default: The default value to return if the field is not found.
|
|
63
|
+
"""
|
|
64
|
+
fields = field_path.split(".")
|
|
65
|
+
current = entity
|
|
66
|
+
last_valid_path = entity.entity_type_name()
|
|
67
|
+
|
|
68
|
+
for field in fields:
|
|
69
|
+
try:
|
|
70
|
+
current = getattr(current, field)
|
|
71
|
+
last_valid_path = f"{last_valid_path}.{field}"
|
|
72
|
+
except AttributeError:
|
|
73
|
+
logger.warning(
|
|
74
|
+
f"{entity.entity_type_name().capitalize()} {entity.urn} does not have an `{last_valid_path}` field, defaulting to {default}"
|
|
75
|
+
)
|
|
76
|
+
return default
|
|
77
|
+
|
|
78
|
+
return current
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class _HasSmartFunctionality:
|
|
82
|
+
"""
|
|
83
|
+
Mixin class that provides smart functionality for assertions.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(
|
|
87
|
+
self,
|
|
88
|
+
*,
|
|
89
|
+
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
90
|
+
exclusion_windows: list[ExclusionWindowTypes],
|
|
91
|
+
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
92
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
93
|
+
detection_mechanism: Optional[
|
|
94
|
+
_DetectionMechanismTypes
|
|
95
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
96
|
+
) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Initialize the smart functionality mixin.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
sensitivity: The sensitivity of the assertion (low, medium, high).
|
|
102
|
+
exclusion_windows: The exclusion windows of the assertion.
|
|
103
|
+
training_data_lookback_days: The max number of days of data to use for training the assertion.
|
|
104
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
105
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
106
|
+
**kwargs: Additional arguments to pass to the parent class (_Assertion).
|
|
107
|
+
"""
|
|
108
|
+
self._sensitivity = sensitivity
|
|
109
|
+
self._exclusion_windows = exclusion_windows
|
|
110
|
+
self._training_data_lookback_days = training_data_lookback_days
|
|
111
|
+
self._incident_behavior = incident_behavior
|
|
112
|
+
self._detection_mechanism = detection_mechanism
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def sensitivity(self) -> InferenceSensitivity:
|
|
116
|
+
return self._sensitivity
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def exclusion_windows(self) -> list[ExclusionWindowTypes]:
|
|
120
|
+
return self._exclusion_windows
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def training_data_lookback_days(self) -> int:
|
|
124
|
+
return self._training_data_lookback_days
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def incident_behavior(self) -> list[AssertionIncidentBehavior]:
|
|
128
|
+
return self._incident_behavior
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def detection_mechanism(self) -> Optional[_DetectionMechanismTypes]:
|
|
132
|
+
return self._detection_mechanism
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def _get_sensitivity(monitor: Monitor) -> InferenceSensitivity:
|
|
136
|
+
# 1. Check if the monitor has a sensitivity field
|
|
137
|
+
raw_sensitivity = _get_nested_field_for_entity_with_default(
|
|
138
|
+
monitor,
|
|
139
|
+
"info.assertionMonitor.settings.adjustmentSettings.sensitivity.level",
|
|
140
|
+
DEFAULT_SENSITIVITY,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# 2. Convert the raw sensitivity to the SDK sensitivity enum (1-3: LOW, 4-6: MEDIUM, 7-10: HIGH)
|
|
144
|
+
return InferenceSensitivity.parse(raw_sensitivity)
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def _get_exclusion_windows(monitor: Monitor) -> list[ExclusionWindowTypes]:
|
|
148
|
+
# 1. Check if the monitor has an exclusion windows field
|
|
149
|
+
raw_windows = monitor.exclusion_windows or []
|
|
150
|
+
|
|
151
|
+
# 2. Convert the raw exclusion windows to the SDK exclusion windows
|
|
152
|
+
exclusion_windows = []
|
|
153
|
+
for raw_window in raw_windows:
|
|
154
|
+
if raw_window.type == models.AssertionExclusionWindowTypeClass.FIXED_RANGE:
|
|
155
|
+
if raw_window.fixedRange is None:
|
|
156
|
+
logger.warning(
|
|
157
|
+
f"Monitor {monitor.urn} has a fixed range exclusion window with no fixed range, skipping"
|
|
158
|
+
)
|
|
159
|
+
continue
|
|
160
|
+
exclusion_windows.append(
|
|
161
|
+
FixedRangeExclusionWindow(
|
|
162
|
+
start=parse_ts_millis(raw_window.fixedRange.startTimeMillis),
|
|
163
|
+
end=parse_ts_millis(raw_window.fixedRange.endTimeMillis),
|
|
164
|
+
)
|
|
165
|
+
)
|
|
166
|
+
else:
|
|
167
|
+
raise SDKNotYetSupportedError(
|
|
168
|
+
f"AssertionExclusionWindowType {raw_window.type}"
|
|
169
|
+
)
|
|
170
|
+
return exclusion_windows
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def _get_training_data_lookback_days(monitor: Monitor) -> int:
|
|
174
|
+
retrieved = monitor.training_data_lookback_days
|
|
175
|
+
if (
|
|
176
|
+
retrieved is None
|
|
177
|
+
): # Explicitly check for None since retrieved can be 0 which is falsy
|
|
178
|
+
return ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS
|
|
179
|
+
assert isinstance(retrieved, int)
|
|
180
|
+
return retrieved
|
|
181
|
+
|
|
182
|
+
@staticmethod
|
|
183
|
+
def _get_detection_mechanism(
|
|
184
|
+
assertion: Assertion,
|
|
185
|
+
monitor: Monitor,
|
|
186
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
187
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
188
|
+
"""Get the detection mechanism from the monitor and assertion."""
|
|
189
|
+
if not _HasSmartFunctionality._has_valid_monitor_info(monitor):
|
|
190
|
+
return default
|
|
191
|
+
|
|
192
|
+
# 1. Check if the assertion has a parameters field
|
|
193
|
+
def _warn_and_return_default_detection_mechanism(
|
|
194
|
+
field_name: str,
|
|
195
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
196
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
197
|
+
logger.warning(
|
|
198
|
+
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
|
|
199
|
+
)
|
|
200
|
+
return default
|
|
201
|
+
|
|
202
|
+
parameters = _HasSmartFunctionality._get_assertion_parameters(monitor, default)
|
|
203
|
+
if parameters is None:
|
|
204
|
+
return _warn_and_return_default_detection_mechanism("parameters", default)
|
|
205
|
+
|
|
206
|
+
# 2. Convert the raw detection mechanism to the SDK detection mechanism
|
|
207
|
+
if (
|
|
208
|
+
parameters.type
|
|
209
|
+
== models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS
|
|
210
|
+
):
|
|
211
|
+
# TODO: Add support for other detection mechanisms when other assertion types are supported
|
|
212
|
+
return _HasSmartFunctionality._get_freshness_detection_mechanism(
|
|
213
|
+
assertion, parameters, default
|
|
214
|
+
)
|
|
215
|
+
else:
|
|
216
|
+
raise SDKNotYetSupportedError(
|
|
217
|
+
f"AssertionEvaluationParametersType {parameters.type}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
@staticmethod
|
|
221
|
+
def _has_valid_monitor_info(monitor: Monitor) -> bool:
|
|
222
|
+
"""Check if monitor has valid info and assertion monitor."""
|
|
223
|
+
|
|
224
|
+
def _warn_and_return_false(field_name: str) -> bool:
|
|
225
|
+
logger.warning(
|
|
226
|
+
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
227
|
+
)
|
|
228
|
+
return False
|
|
229
|
+
|
|
230
|
+
if monitor.info is None:
|
|
231
|
+
return _warn_and_return_false("info")
|
|
232
|
+
if monitor.info.assertionMonitor is None:
|
|
233
|
+
return _warn_and_return_false("assertionMonitor")
|
|
234
|
+
if (
|
|
235
|
+
monitor.info.assertionMonitor.assertions is None
|
|
236
|
+
or len(monitor.info.assertionMonitor.assertions) == 0
|
|
237
|
+
):
|
|
238
|
+
return _warn_and_return_false("assertionMonitor.assertions")
|
|
239
|
+
|
|
240
|
+
return True
|
|
241
|
+
|
|
242
|
+
@staticmethod
|
|
243
|
+
def _get_assertion_parameters(
|
|
244
|
+
monitor: Monitor,
|
|
245
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
246
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
247
|
+
"""Get the assertion parameters from the monitor."""
|
|
248
|
+
# We know these are not None from _has_valid_monitor_info check
|
|
249
|
+
assert (
|
|
250
|
+
monitor is not None
|
|
251
|
+
and monitor.info is not None
|
|
252
|
+
and monitor.info.assertionMonitor is not None
|
|
253
|
+
)
|
|
254
|
+
assertion_monitor = monitor.info.assertionMonitor
|
|
255
|
+
assert (
|
|
256
|
+
assertion_monitor is not None and assertion_monitor.assertions is not None
|
|
257
|
+
)
|
|
258
|
+
assertions = assertion_monitor.assertions
|
|
259
|
+
|
|
260
|
+
if assertions[0].parameters is None:
|
|
261
|
+
logger.warning(
|
|
262
|
+
f"Monitor {monitor.urn} does not have a assertionMonitor.assertions[0].parameters, defaulting detection mechanism to {default}"
|
|
263
|
+
)
|
|
264
|
+
return None
|
|
265
|
+
return assertions[0].parameters
|
|
266
|
+
|
|
267
|
+
@staticmethod
|
|
268
|
+
def _get_freshness_detection_mechanism(
|
|
269
|
+
assertion: Assertion,
|
|
270
|
+
parameters: models.AssertionEvaluationParametersClass,
|
|
271
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
272
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
273
|
+
"""Get the detection mechanism for freshness assertions."""
|
|
274
|
+
if parameters.datasetFreshnessParameters is None:
|
|
275
|
+
logger.warning(
|
|
276
|
+
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
277
|
+
)
|
|
278
|
+
return default
|
|
279
|
+
|
|
280
|
+
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
281
|
+
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
282
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
283
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
284
|
+
return DetectionMechanism.AUDIT_LOG
|
|
285
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
286
|
+
return _HasSmartFunctionality._get_field_value_detection_mechanism(
|
|
287
|
+
assertion, parameters
|
|
288
|
+
)
|
|
289
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
290
|
+
return DetectionMechanism.DATAHUB_OPERATION
|
|
291
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
292
|
+
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
293
|
+
else:
|
|
294
|
+
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
295
|
+
|
|
296
|
+
@staticmethod
|
|
297
|
+
def _get_field_value_detection_mechanism(
|
|
298
|
+
assertion: Assertion,
|
|
299
|
+
parameters: models.AssertionEvaluationParametersClass,
|
|
300
|
+
) -> _DetectionMechanismTypes:
|
|
301
|
+
"""Get the detection mechanism for field value based freshness."""
|
|
302
|
+
# We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
|
|
303
|
+
assert parameters.datasetFreshnessParameters is not None
|
|
304
|
+
field = parameters.datasetFreshnessParameters.field
|
|
305
|
+
|
|
306
|
+
if field is None or field.kind is None:
|
|
307
|
+
logger.warning(
|
|
308
|
+
f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
309
|
+
)
|
|
310
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
311
|
+
|
|
312
|
+
column_name = field.path
|
|
313
|
+
additional_filter = _HasSmartFunctionality._get_additional_filter(assertion)
|
|
314
|
+
|
|
315
|
+
if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
|
|
316
|
+
return DetectionMechanism.LAST_MODIFIED_COLUMN(
|
|
317
|
+
column_name=column_name, additional_filter=additional_filter
|
|
318
|
+
)
|
|
319
|
+
elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
|
|
320
|
+
return DetectionMechanism.HIGH_WATERMARK_COLUMN(
|
|
321
|
+
column_name=column_name, additional_filter=additional_filter
|
|
322
|
+
)
|
|
323
|
+
else:
|
|
324
|
+
raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
|
|
325
|
+
|
|
326
|
+
@staticmethod
|
|
327
|
+
def _get_additional_filter(assertion: Assertion) -> Optional[str]:
|
|
328
|
+
"""Get the additional filter SQL from the assertion."""
|
|
329
|
+
if assertion.info is None:
|
|
330
|
+
logger.warning(
|
|
331
|
+
f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
|
|
332
|
+
)
|
|
333
|
+
return None
|
|
334
|
+
if (
|
|
335
|
+
not isinstance(assertion.info, models.FreshnessAssertionInfoClass)
|
|
336
|
+
or assertion.info.filter is None
|
|
337
|
+
):
|
|
338
|
+
logger.warning(
|
|
339
|
+
f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
|
|
340
|
+
)
|
|
341
|
+
return None
|
|
342
|
+
if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
|
|
343
|
+
raise SDKNotYetSupportedError(
|
|
344
|
+
f"DatasetFilterType {assertion.info.filter.type}"
|
|
345
|
+
)
|
|
346
|
+
return assertion.info.filter.sql
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
class _AssertionPublic(ABC):
|
|
350
|
+
"""
|
|
351
|
+
Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
def __init__(
|
|
355
|
+
self,
|
|
356
|
+
*,
|
|
357
|
+
urn: AssertionUrn,
|
|
358
|
+
dataset_urn: DatasetUrn,
|
|
359
|
+
display_name: str,
|
|
360
|
+
mode: AssertionMode,
|
|
361
|
+
tags: list[TagUrn],
|
|
362
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
363
|
+
created_at: Union[datetime, None] = None,
|
|
364
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
365
|
+
updated_at: Optional[datetime] = None,
|
|
366
|
+
):
|
|
367
|
+
"""
|
|
368
|
+
Initialize the public facing assertion class.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
urn: The urn of the assertion.
|
|
372
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
373
|
+
display_name: The display name of the assertion.
|
|
374
|
+
mode: The mode of the assertion (active, inactive).
|
|
375
|
+
tags: The tags of the assertion.
|
|
376
|
+
created_by: The urn of the user that created the assertion.
|
|
377
|
+
created_at: The timestamp of when the assertion was created.
|
|
378
|
+
updated_by: The urn of the user that updated the assertion.
|
|
379
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
380
|
+
"""
|
|
381
|
+
self._urn = urn
|
|
382
|
+
self._dataset_urn = dataset_urn
|
|
383
|
+
self._display_name = display_name
|
|
384
|
+
self._mode = mode
|
|
385
|
+
self._created_by = created_by
|
|
386
|
+
self._created_at = created_at
|
|
387
|
+
self._updated_by = updated_by
|
|
388
|
+
self._updated_at = updated_at
|
|
389
|
+
self._tags = tags
|
|
390
|
+
|
|
391
|
+
@property
|
|
392
|
+
def urn(self) -> AssertionUrn:
|
|
393
|
+
return self._urn
|
|
394
|
+
|
|
395
|
+
@property
|
|
396
|
+
def dataset_urn(self) -> DatasetUrn:
|
|
397
|
+
return self._dataset_urn
|
|
398
|
+
|
|
399
|
+
@property
|
|
400
|
+
def display_name(self) -> str:
|
|
401
|
+
return self._display_name
|
|
402
|
+
|
|
403
|
+
@property
|
|
404
|
+
def mode(self) -> AssertionMode:
|
|
405
|
+
return self._mode
|
|
406
|
+
|
|
407
|
+
@property
|
|
408
|
+
def created_by(self) -> Optional[CorpUserUrn]:
|
|
409
|
+
return self._created_by
|
|
410
|
+
|
|
411
|
+
@property
|
|
412
|
+
def created_at(self) -> Union[datetime, None]:
|
|
413
|
+
return self._created_at
|
|
414
|
+
|
|
415
|
+
@property
|
|
416
|
+
def updated_by(self) -> Optional[CorpUserUrn]:
|
|
417
|
+
return self._updated_by
|
|
418
|
+
|
|
419
|
+
@property
|
|
420
|
+
def updated_at(self) -> Union[datetime, None]:
|
|
421
|
+
return self._updated_at
|
|
422
|
+
|
|
423
|
+
@property
|
|
424
|
+
def tags(self) -> list[TagUrn]:
|
|
425
|
+
return self._tags
|
|
426
|
+
|
|
427
|
+
@staticmethod
|
|
428
|
+
def _get_incident_behavior(assertion: Assertion) -> list[AssertionIncidentBehavior]:
|
|
429
|
+
incident_behaviors = []
|
|
430
|
+
for action in assertion.on_failure + assertion.on_success:
|
|
431
|
+
if action.type == models.AssertionActionTypeClass.RAISE_INCIDENT:
|
|
432
|
+
incident_behaviors.append(AssertionIncidentBehavior.RAISE_ON_FAIL)
|
|
433
|
+
elif action.type == models.AssertionActionTypeClass.RESOLVE_INCIDENT:
|
|
434
|
+
incident_behaviors.append(AssertionIncidentBehavior.RESOLVE_ON_PASS)
|
|
435
|
+
|
|
436
|
+
return incident_behaviors
|
|
437
|
+
|
|
438
|
+
@staticmethod
|
|
439
|
+
def _get_created_by(assertion: Assertion) -> Optional[CorpUserUrn]:
|
|
440
|
+
if assertion.source is None:
|
|
441
|
+
logger.warning(f"Assertion {assertion.urn} does not have a source")
|
|
442
|
+
return None
|
|
443
|
+
if isinstance(assertion.source, models.AssertionSourceClass):
|
|
444
|
+
if assertion.source.created is None:
|
|
445
|
+
logger.warning(
|
|
446
|
+
f"Assertion {assertion.urn} does not have a created by in the source"
|
|
447
|
+
)
|
|
448
|
+
return None
|
|
449
|
+
return CorpUserUrn.from_string(assertion.source.created.actor)
|
|
450
|
+
elif isinstance(assertion.source, models.AssertionSourceTypeClass):
|
|
451
|
+
logger.warning(
|
|
452
|
+
f"Assertion {assertion.urn} has a source type with no created by"
|
|
453
|
+
)
|
|
454
|
+
return None
|
|
455
|
+
return None
|
|
456
|
+
|
|
457
|
+
@staticmethod
|
|
458
|
+
def _get_created_at(assertion: Assertion) -> Union[datetime, None]:
|
|
459
|
+
if assertion.source is None:
|
|
460
|
+
logger.warning(f"Assertion {assertion.urn} does not have a source")
|
|
461
|
+
return None
|
|
462
|
+
if isinstance(assertion.source, models.AssertionSourceClass):
|
|
463
|
+
if assertion.source.created is None:
|
|
464
|
+
logger.warning(
|
|
465
|
+
f"Assertion {assertion.urn} does not have a created by in the source"
|
|
466
|
+
)
|
|
467
|
+
return None
|
|
468
|
+
return parse_ts_millis(assertion.source.created.time)
|
|
469
|
+
elif isinstance(assertion.source, models.AssertionSourceTypeClass):
|
|
470
|
+
logger.warning(
|
|
471
|
+
f"Assertion {assertion.urn} has a source type with no created by"
|
|
472
|
+
)
|
|
473
|
+
return None
|
|
474
|
+
return None
|
|
475
|
+
|
|
476
|
+
@staticmethod
|
|
477
|
+
def _get_updated_by(assertion: Assertion) -> Optional[CorpUserUrn]:
|
|
478
|
+
if assertion.last_updated is None:
|
|
479
|
+
logger.warning(f"Assertion {assertion.urn} does not have a last updated")
|
|
480
|
+
return None
|
|
481
|
+
return CorpUserUrn.from_string(assertion.last_updated.actor)
|
|
482
|
+
|
|
483
|
+
@staticmethod
|
|
484
|
+
def _get_updated_at(assertion: Assertion) -> Union[datetime, None]:
|
|
485
|
+
if assertion.last_updated is None:
|
|
486
|
+
logger.warning(f"Assertion {assertion.urn} does not have a last updated")
|
|
487
|
+
return None
|
|
488
|
+
return parse_ts_millis(assertion.last_updated.time)
|
|
489
|
+
|
|
490
|
+
@staticmethod
|
|
491
|
+
def _get_tags(assertion: Assertion) -> list[TagUrn]:
|
|
492
|
+
return [TagUrn.from_string(t.tag) for t in assertion.tags or []]
|
|
493
|
+
|
|
494
|
+
@staticmethod
|
|
495
|
+
def _get_mode(monitor: Monitor) -> AssertionMode:
|
|
496
|
+
if monitor.info is None:
|
|
497
|
+
logger.warning(
|
|
498
|
+
f"Monitor {monitor.urn} does not have a info, defaulting status to INACTIVE"
|
|
499
|
+
)
|
|
500
|
+
return AssertionMode.INACTIVE
|
|
501
|
+
return AssertionMode(monitor.info.status.mode)
|
|
502
|
+
|
|
503
|
+
@abstractmethod
|
|
504
|
+
def from_entities(
|
|
505
|
+
cls,
|
|
506
|
+
assertion: Assertion,
|
|
507
|
+
monitor: Monitor,
|
|
508
|
+
) -> Self:
|
|
509
|
+
"""
|
|
510
|
+
Create an assertion from the assertion and monitor entities.
|
|
511
|
+
"""
|
|
512
|
+
pass
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
class SmartFreshnessAssertion(_HasSmartFunctionality, _AssertionPublic):
|
|
516
|
+
"""
|
|
517
|
+
A class that represents a smart freshness assertion.
|
|
518
|
+
"""
|
|
519
|
+
|
|
520
|
+
def __init__(
|
|
521
|
+
self,
|
|
522
|
+
*,
|
|
523
|
+
urn: AssertionUrn,
|
|
524
|
+
dataset_urn: DatasetUrn,
|
|
525
|
+
display_name: str,
|
|
526
|
+
mode: AssertionMode,
|
|
527
|
+
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
528
|
+
exclusion_windows: list[ExclusionWindowTypes],
|
|
529
|
+
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
530
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
531
|
+
detection_mechanism: Optional[
|
|
532
|
+
_DetectionMechanismTypes
|
|
533
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
534
|
+
tags: list[TagUrn],
|
|
535
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
536
|
+
created_at: Union[datetime, None] = None,
|
|
537
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
538
|
+
updated_at: Optional[datetime] = None,
|
|
539
|
+
):
|
|
540
|
+
"""
|
|
541
|
+
Initialize a smart freshness assertion.
|
|
542
|
+
|
|
543
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
544
|
+
To update an assertion, use the `upsert_*` method.
|
|
545
|
+
Args:
|
|
546
|
+
urn: The urn of the assertion.
|
|
547
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
548
|
+
display_name: The display name of the assertion.
|
|
549
|
+
mode: The mode of the assertion (active, inactive).
|
|
550
|
+
sensitivity: The sensitivity of the assertion (low, medium, high).
|
|
551
|
+
exclusion_windows: The exclusion windows of the assertion.
|
|
552
|
+
training_data_lookback_days: The max number of days of data to use for training the assertion.
|
|
553
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
554
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
555
|
+
tags: The tags applied to the assertion.
|
|
556
|
+
created_by: The urn of the user that created the assertion.
|
|
557
|
+
created_at: The timestamp of when the assertion was created.
|
|
558
|
+
updated_by: The urn of the user that updated the assertion.
|
|
559
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
560
|
+
"""
|
|
561
|
+
# Initialize the mixin first
|
|
562
|
+
_HasSmartFunctionality.__init__(
|
|
563
|
+
self,
|
|
564
|
+
sensitivity=sensitivity,
|
|
565
|
+
exclusion_windows=exclusion_windows,
|
|
566
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
567
|
+
incident_behavior=incident_behavior,
|
|
568
|
+
detection_mechanism=detection_mechanism,
|
|
569
|
+
)
|
|
570
|
+
# Then initialize the parent class
|
|
571
|
+
_AssertionPublic.__init__(
|
|
572
|
+
self,
|
|
573
|
+
urn=urn,
|
|
574
|
+
dataset_urn=dataset_urn,
|
|
575
|
+
display_name=display_name,
|
|
576
|
+
mode=mode,
|
|
577
|
+
created_by=created_by,
|
|
578
|
+
created_at=created_at,
|
|
579
|
+
updated_by=updated_by,
|
|
580
|
+
updated_at=updated_at,
|
|
581
|
+
tags=tags,
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
@classmethod
|
|
585
|
+
def from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
586
|
+
"""
|
|
587
|
+
Create a smart freshness assertion from the assertion and monitor entities.
|
|
588
|
+
"""
|
|
589
|
+
return cls(
|
|
590
|
+
urn=assertion.urn,
|
|
591
|
+
dataset_urn=assertion.dataset,
|
|
592
|
+
display_name=assertion.description or "",
|
|
593
|
+
mode=cls._get_mode(monitor),
|
|
594
|
+
sensitivity=cls._get_sensitivity(monitor),
|
|
595
|
+
exclusion_windows=cls._get_exclusion_windows(monitor),
|
|
596
|
+
training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
|
|
597
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
598
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
599
|
+
created_by=cls._get_created_by(assertion),
|
|
600
|
+
created_at=cls._get_created_at(assertion),
|
|
601
|
+
updated_by=cls._get_updated_by(assertion),
|
|
602
|
+
updated_at=cls._get_updated_at(assertion),
|
|
603
|
+
tags=cls._get_tags(assertion),
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
AssertionTypes = Union[
|
|
608
|
+
SmartFreshnessAssertion,
|
|
609
|
+
# TODO: Add other assertion types here as we add them.
|
|
610
|
+
]
|