acryl-datahub-cloud 0.3.11.1rc8__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
- acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +29 -13
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +2011 -1955
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
- acryl_datahub_cloud/metadata/schema.avsc +25413 -25425
- acryl_datahub_cloud/metadata/schema_classes.py +1316 -791
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +223 -202
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +36 -7
- acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +40 -8
- acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +39 -10
- acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
- acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
- acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
- acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +39 -0
- acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +1467 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
- acryl_datahub_cloud/sdk/assertion/types.py +20 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1648 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +258 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +914 -0
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +272 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +648 -0
- acryl_datahub_cloud/sdk/assertions_client.py +3206 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/sdk/entities/assertion.py +432 -0
- acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
- acryl_datahub_cloud/sdk/entities/subscription.py +84 -0
- acryl_datahub_cloud/sdk/errors.py +34 -0
- acryl_datahub_cloud/sdk/resolver_client.py +39 -0
- acryl_datahub_cloud/sdk/subscription_client.py +714 -0
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/METADATA +50 -45
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/RECORD +91 -58
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/entry_points.txt +1 -0
- acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
- acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
- acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
- {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1467 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the classes that represent assertions. These
|
|
3
|
+
classes are used to provide a user-friendly interface for creating and
|
|
4
|
+
managing assertions.
|
|
5
|
+
|
|
6
|
+
The actual Assertion Entity classes are defined in `metadata-ingestion/src/datahub/sdk`.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import Optional, Union
|
|
14
|
+
|
|
15
|
+
from typing_extensions import Self
|
|
16
|
+
|
|
17
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
18
|
+
ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
19
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
20
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
|
|
21
|
+
DEFAULT_SCHEDULE,
|
|
22
|
+
DEFAULT_SENSITIVITY,
|
|
23
|
+
AssertionIncidentBehavior,
|
|
24
|
+
DetectionMechanism,
|
|
25
|
+
ExclusionWindowTypes,
|
|
26
|
+
FixedRangeExclusionWindow,
|
|
27
|
+
InferenceSensitivity,
|
|
28
|
+
TimeWindowSizeInputTypes,
|
|
29
|
+
_DetectionMechanismTypes,
|
|
30
|
+
)
|
|
31
|
+
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
32
|
+
MetricInputType,
|
|
33
|
+
OperatorInputType,
|
|
34
|
+
RangeInputType,
|
|
35
|
+
RangeTypeInputType,
|
|
36
|
+
ValueInputType,
|
|
37
|
+
ValueTypeInputType,
|
|
38
|
+
)
|
|
39
|
+
from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
|
|
40
|
+
SqlAssertionChangeType,
|
|
41
|
+
SqlAssertionCriteria,
|
|
42
|
+
SqlAssertionOperator,
|
|
43
|
+
SqlAssertionType,
|
|
44
|
+
)
|
|
45
|
+
from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
|
|
46
|
+
VolumeAssertionDefinition,
|
|
47
|
+
_VolumeAssertionDefinitionTypes,
|
|
48
|
+
)
|
|
49
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
50
|
+
from acryl_datahub_cloud.sdk.entities.monitor import (
|
|
51
|
+
Monitor,
|
|
52
|
+
_get_nested_field_for_entity_with_default,
|
|
53
|
+
)
|
|
54
|
+
from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError, SDKUsageError
|
|
55
|
+
from datahub.emitter.mce_builder import parse_ts_millis
|
|
56
|
+
from datahub.metadata import schema_classes as models
|
|
57
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
58
|
+
|
|
59
|
+
logger = logging.getLogger(__name__)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class AssertionMode(Enum):
|
|
63
|
+
"""
|
|
64
|
+
The mode of an assertion, e.g. whether it is active or inactive.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
# Note: Modeled here after MonitorStatus but called AssertionMode in this user facing interface
|
|
68
|
+
# to keep all naming related to assertions.
|
|
69
|
+
ACTIVE = "ACTIVE"
|
|
70
|
+
INACTIVE = "INACTIVE"
|
|
71
|
+
# PASSIVE = "PASSIVE" # Not supported in the user facing interface.
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class _HasSchedule:
|
|
75
|
+
"""
|
|
76
|
+
Mixin class that provides schedule functionality for assertions.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, schedule: models.CronScheduleClass) -> None:
|
|
80
|
+
self._schedule = schedule
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def schedule(self) -> models.CronScheduleClass:
|
|
84
|
+
return self._schedule
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def _get_schedule(
|
|
88
|
+
monitor: Monitor, default: models.CronScheduleClass = DEFAULT_SCHEDULE
|
|
89
|
+
) -> models.CronScheduleClass:
|
|
90
|
+
"""Get the schedule from the monitor."""
|
|
91
|
+
assertion_evaluation_specs = _get_nested_field_for_entity_with_default(
|
|
92
|
+
monitor,
|
|
93
|
+
"info.assertionMonitor.assertions",
|
|
94
|
+
[],
|
|
95
|
+
)
|
|
96
|
+
if len(assertion_evaluation_specs) == 0:
|
|
97
|
+
return default
|
|
98
|
+
assertion_evaluation_spec = assertion_evaluation_specs[0]
|
|
99
|
+
schedule = assertion_evaluation_spec.schedule
|
|
100
|
+
if schedule is None:
|
|
101
|
+
return default
|
|
102
|
+
return schedule
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class _HasSmartFunctionality:
|
|
106
|
+
"""
|
|
107
|
+
Mixin class that provides smart functionality for assertions.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
def __init__(
|
|
111
|
+
self,
|
|
112
|
+
*,
|
|
113
|
+
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
114
|
+
exclusion_windows: list[ExclusionWindowTypes],
|
|
115
|
+
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
116
|
+
) -> None:
|
|
117
|
+
"""
|
|
118
|
+
Initialize the smart functionality mixin.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
sensitivity: The sensitivity of the assertion (low, medium, high).
|
|
122
|
+
exclusion_windows: The exclusion windows of the assertion.
|
|
123
|
+
training_data_lookback_days: The max number of days of data to use for training the assertion.
|
|
124
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
125
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
126
|
+
**kwargs: Additional arguments to pass to the parent class (_Assertion).
|
|
127
|
+
"""
|
|
128
|
+
self._sensitivity = sensitivity
|
|
129
|
+
self._exclusion_windows = exclusion_windows
|
|
130
|
+
self._training_data_lookback_days = training_data_lookback_days
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def sensitivity(self) -> InferenceSensitivity:
|
|
134
|
+
return self._sensitivity
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def exclusion_windows(self) -> list[ExclusionWindowTypes]:
|
|
138
|
+
return self._exclusion_windows
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def training_data_lookback_days(self) -> int:
|
|
142
|
+
return self._training_data_lookback_days
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def _get_sensitivity(monitor: Monitor) -> InferenceSensitivity:
|
|
146
|
+
# 1. Check if the monitor has a sensitivity field
|
|
147
|
+
raw_sensitivity = _get_nested_field_for_entity_with_default(
|
|
148
|
+
monitor,
|
|
149
|
+
"info.assertionMonitor.settings.adjustmentSettings.sensitivity.level",
|
|
150
|
+
DEFAULT_SENSITIVITY,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# 2. Convert the raw sensitivity to the SDK sensitivity enum (1-3: LOW, 4-6: MEDIUM, 7-10: HIGH)
|
|
154
|
+
return InferenceSensitivity.parse(raw_sensitivity)
|
|
155
|
+
|
|
156
|
+
@staticmethod
|
|
157
|
+
def _get_exclusion_windows(monitor: Monitor) -> list[ExclusionWindowTypes]:
|
|
158
|
+
# 1. Check if the monitor has an exclusion windows field
|
|
159
|
+
raw_windows = monitor.exclusion_windows or []
|
|
160
|
+
|
|
161
|
+
# 2. Convert the raw exclusion windows to the SDK exclusion windows
|
|
162
|
+
exclusion_windows = []
|
|
163
|
+
for raw_window in raw_windows:
|
|
164
|
+
if raw_window.type == models.AssertionExclusionWindowTypeClass.FIXED_RANGE:
|
|
165
|
+
if raw_window.fixedRange is None:
|
|
166
|
+
logger.warning(
|
|
167
|
+
f"Monitor {monitor.urn} has a fixed range exclusion window with no fixed range, skipping"
|
|
168
|
+
)
|
|
169
|
+
continue
|
|
170
|
+
exclusion_windows.append(
|
|
171
|
+
FixedRangeExclusionWindow(
|
|
172
|
+
start=parse_ts_millis(raw_window.fixedRange.startTimeMillis),
|
|
173
|
+
end=parse_ts_millis(raw_window.fixedRange.endTimeMillis),
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
else:
|
|
177
|
+
raise SDKNotYetSupportedError(
|
|
178
|
+
f"AssertionExclusionWindowType {raw_window.type}"
|
|
179
|
+
)
|
|
180
|
+
return exclusion_windows
|
|
181
|
+
|
|
182
|
+
@staticmethod
|
|
183
|
+
def _get_training_data_lookback_days(monitor: Monitor) -> int:
|
|
184
|
+
retrieved = monitor.training_data_lookback_days
|
|
185
|
+
if (
|
|
186
|
+
retrieved is None
|
|
187
|
+
): # Explicitly check for None since retrieved can be 0 which is falsy
|
|
188
|
+
return ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS
|
|
189
|
+
assert isinstance(retrieved, int)
|
|
190
|
+
return retrieved
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class _HasColumnMetricFunctionality:
|
|
194
|
+
"""
|
|
195
|
+
Mixin class that provides column metric functionality for assertions.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
def __init__(
|
|
199
|
+
self,
|
|
200
|
+
column_name: str,
|
|
201
|
+
metric_type: MetricInputType,
|
|
202
|
+
operator: OperatorInputType,
|
|
203
|
+
value: Optional[ValueInputType] = None,
|
|
204
|
+
value_type: Optional[ValueTypeInputType] = None,
|
|
205
|
+
range: Optional[RangeInputType] = None,
|
|
206
|
+
range_type: Optional[RangeTypeInputType] = None,
|
|
207
|
+
):
|
|
208
|
+
self._column_name = column_name
|
|
209
|
+
self._metric_type = metric_type
|
|
210
|
+
self._operator = operator
|
|
211
|
+
self._value = value
|
|
212
|
+
self._value_type = value_type
|
|
213
|
+
self._range = range
|
|
214
|
+
self._range_type = range_type
|
|
215
|
+
|
|
216
|
+
@property
|
|
217
|
+
def column_name(self) -> str:
|
|
218
|
+
return self._column_name
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def metric_type(self) -> MetricInputType:
|
|
222
|
+
return self._metric_type
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def operator(self) -> OperatorInputType:
|
|
226
|
+
return self._operator
|
|
227
|
+
|
|
228
|
+
@property
|
|
229
|
+
def value(self) -> Optional[ValueInputType]:
|
|
230
|
+
return self._value
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def value_type(self) -> Optional[ValueTypeInputType]:
|
|
234
|
+
return self._value_type
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def range(self) -> Optional[RangeInputType]:
|
|
238
|
+
return self._range
|
|
239
|
+
|
|
240
|
+
@property
|
|
241
|
+
def range_type(self) -> Optional[RangeTypeInputType]:
|
|
242
|
+
return self._range_type
|
|
243
|
+
|
|
244
|
+
@staticmethod
|
|
245
|
+
def _get_column_name(assertion: Assertion) -> str:
|
|
246
|
+
column_name = _get_nested_field_for_entity_with_default(
|
|
247
|
+
assertion,
|
|
248
|
+
field_path="info.fieldMetricAssertion.field.path",
|
|
249
|
+
default=None,
|
|
250
|
+
)
|
|
251
|
+
if column_name is None:
|
|
252
|
+
raise SDKUsageError(
|
|
253
|
+
f"Column name is required for column metric assertions. Assertion {assertion.urn} does not have a column name"
|
|
254
|
+
)
|
|
255
|
+
return column_name
|
|
256
|
+
|
|
257
|
+
@staticmethod
|
|
258
|
+
def _get_metric_type(assertion: Assertion) -> MetricInputType:
|
|
259
|
+
metric_type = _get_nested_field_for_entity_with_default(
|
|
260
|
+
assertion,
|
|
261
|
+
field_path="info.fieldMetricAssertion.metric",
|
|
262
|
+
default=None,
|
|
263
|
+
)
|
|
264
|
+
if metric_type is None:
|
|
265
|
+
raise SDKUsageError(
|
|
266
|
+
f"Metric type is required for column metric assertions. Assertion {assertion.urn} does not have a metric type"
|
|
267
|
+
)
|
|
268
|
+
return metric_type
|
|
269
|
+
|
|
270
|
+
@staticmethod
|
|
271
|
+
def _get_operator(assertion: Assertion) -> OperatorInputType:
|
|
272
|
+
operator = _get_nested_field_for_entity_with_default(
|
|
273
|
+
assertion,
|
|
274
|
+
field_path="info.fieldMetricAssertion.operator",
|
|
275
|
+
default=None,
|
|
276
|
+
)
|
|
277
|
+
if operator is None:
|
|
278
|
+
raise SDKUsageError(
|
|
279
|
+
f"Operator is required for column metric assertions. Assertion {assertion.urn} does not have an operator"
|
|
280
|
+
)
|
|
281
|
+
return operator
|
|
282
|
+
|
|
283
|
+
@staticmethod
|
|
284
|
+
def _get_value(assertion: Assertion) -> Optional[ValueInputType]:
|
|
285
|
+
value = _get_nested_field_for_entity_with_default(
|
|
286
|
+
assertion,
|
|
287
|
+
field_path="info.fieldMetricAssertion.parameters.value.value",
|
|
288
|
+
default=None,
|
|
289
|
+
)
|
|
290
|
+
return value
|
|
291
|
+
|
|
292
|
+
@staticmethod
|
|
293
|
+
def _get_value_type(assertion: Assertion) -> Optional[ValueTypeInputType]:
|
|
294
|
+
value_type = _get_nested_field_for_entity_with_default(
|
|
295
|
+
assertion,
|
|
296
|
+
field_path="info.fieldMetricAssertion.parameters.value.type",
|
|
297
|
+
default=None,
|
|
298
|
+
)
|
|
299
|
+
return value_type
|
|
300
|
+
|
|
301
|
+
@staticmethod
|
|
302
|
+
def _get_range(assertion: Assertion) -> Optional[RangeInputType]:
|
|
303
|
+
min_value = _get_nested_field_for_entity_with_default(
|
|
304
|
+
assertion,
|
|
305
|
+
field_path="info.fieldMetricAssertion.parameters.minValue",
|
|
306
|
+
default=None,
|
|
307
|
+
)
|
|
308
|
+
max_value = _get_nested_field_for_entity_with_default(
|
|
309
|
+
assertion,
|
|
310
|
+
field_path="info.fieldMetricAssertion.parameters.maxValue",
|
|
311
|
+
default=None,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# If both are None, return None
|
|
315
|
+
if min_value is None and max_value is None:
|
|
316
|
+
return None
|
|
317
|
+
|
|
318
|
+
# Extract the value from the parameter objects if they exist
|
|
319
|
+
if min_value is not None and hasattr(min_value, "value"):
|
|
320
|
+
min_value = min_value.value
|
|
321
|
+
if max_value is not None and hasattr(max_value, "value"):
|
|
322
|
+
max_value = max_value.value
|
|
323
|
+
|
|
324
|
+
return (min_value, max_value)
|
|
325
|
+
|
|
326
|
+
@staticmethod
|
|
327
|
+
def _get_range_type(assertion: Assertion) -> Optional[RangeTypeInputType]:
|
|
328
|
+
min_value_range_type = _get_nested_field_for_entity_with_default(
|
|
329
|
+
assertion,
|
|
330
|
+
field_path="info.fieldMetricAssertion.parameters.minValue.type",
|
|
331
|
+
default=None,
|
|
332
|
+
)
|
|
333
|
+
max_value_range_type = _get_nested_field_for_entity_with_default(
|
|
334
|
+
assertion,
|
|
335
|
+
field_path="info.fieldMetricAssertion.parameters.maxValue.type",
|
|
336
|
+
default=None,
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# If both are None, return None instead of a tuple of Nones
|
|
340
|
+
if min_value_range_type is None and max_value_range_type is None:
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
return (min_value_range_type, max_value_range_type)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class _AssertionPublic(ABC):
|
|
347
|
+
"""
|
|
348
|
+
Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
# TODO: have the individual classes self-declare this
|
|
352
|
+
_SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
|
|
353
|
+
models.FreshnessAssertionInfoClass,
|
|
354
|
+
models.VolumeAssertionInfoClass,
|
|
355
|
+
models.FieldAssertionInfoClass,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
def __init__(
|
|
359
|
+
self,
|
|
360
|
+
*,
|
|
361
|
+
urn: AssertionUrn,
|
|
362
|
+
dataset_urn: DatasetUrn,
|
|
363
|
+
display_name: str,
|
|
364
|
+
mode: AssertionMode,
|
|
365
|
+
tags: list[TagUrn],
|
|
366
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
367
|
+
detection_mechanism: Optional[
|
|
368
|
+
_DetectionMechanismTypes
|
|
369
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
370
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
371
|
+
created_at: Union[datetime, None] = None,
|
|
372
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
373
|
+
updated_at: Optional[datetime] = None,
|
|
374
|
+
):
|
|
375
|
+
"""
|
|
376
|
+
Initialize the public facing assertion class.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
urn: The urn of the assertion.
|
|
380
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
381
|
+
display_name: The display name of the assertion.
|
|
382
|
+
mode: The mode of the assertion (active, inactive).
|
|
383
|
+
tags: The tags of the assertion.
|
|
384
|
+
created_by: The urn of the user that created the assertion.
|
|
385
|
+
created_at: The timestamp of when the assertion was created.
|
|
386
|
+
updated_by: The urn of the user that updated the assertion.
|
|
387
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
388
|
+
"""
|
|
389
|
+
self._urn = urn
|
|
390
|
+
self._dataset_urn = dataset_urn
|
|
391
|
+
self._display_name = display_name
|
|
392
|
+
self._mode = mode
|
|
393
|
+
self._incident_behavior = incident_behavior
|
|
394
|
+
self._detection_mechanism = detection_mechanism
|
|
395
|
+
self._created_by = created_by
|
|
396
|
+
self._created_at = created_at
|
|
397
|
+
self._updated_by = updated_by
|
|
398
|
+
self._updated_at = updated_at
|
|
399
|
+
self._tags = tags
|
|
400
|
+
|
|
401
|
+
@property
|
|
402
|
+
def urn(self) -> AssertionUrn:
|
|
403
|
+
return self._urn
|
|
404
|
+
|
|
405
|
+
@property
|
|
406
|
+
def dataset_urn(self) -> DatasetUrn:
|
|
407
|
+
return self._dataset_urn
|
|
408
|
+
|
|
409
|
+
@property
|
|
410
|
+
def display_name(self) -> str:
|
|
411
|
+
return self._display_name
|
|
412
|
+
|
|
413
|
+
@property
|
|
414
|
+
def mode(self) -> AssertionMode:
|
|
415
|
+
return self._mode
|
|
416
|
+
|
|
417
|
+
@property
|
|
418
|
+
def incident_behavior(self) -> list[AssertionIncidentBehavior]:
|
|
419
|
+
return self._incident_behavior
|
|
420
|
+
|
|
421
|
+
@property
|
|
422
|
+
def detection_mechanism(self) -> Optional[_DetectionMechanismTypes]:
|
|
423
|
+
return self._detection_mechanism
|
|
424
|
+
|
|
425
|
+
@property
|
|
426
|
+
def created_by(self) -> Optional[CorpUserUrn]:
|
|
427
|
+
return self._created_by
|
|
428
|
+
|
|
429
|
+
@property
|
|
430
|
+
def created_at(self) -> Union[datetime, None]:
|
|
431
|
+
return self._created_at
|
|
432
|
+
|
|
433
|
+
@property
|
|
434
|
+
def updated_by(self) -> Optional[CorpUserUrn]:
|
|
435
|
+
return self._updated_by
|
|
436
|
+
|
|
437
|
+
@property
|
|
438
|
+
def updated_at(self) -> Union[datetime, None]:
|
|
439
|
+
return self._updated_at
|
|
440
|
+
|
|
441
|
+
@property
|
|
442
|
+
def tags(self) -> list[TagUrn]:
|
|
443
|
+
return self._tags
|
|
444
|
+
|
|
445
|
+
@staticmethod
|
|
446
|
+
def _get_incident_behavior(assertion: Assertion) -> list[AssertionIncidentBehavior]:
|
|
447
|
+
incident_behaviors = []
|
|
448
|
+
for action in assertion.on_failure + assertion.on_success:
|
|
449
|
+
if action.type == models.AssertionActionTypeClass.RAISE_INCIDENT:
|
|
450
|
+
incident_behaviors.append(AssertionIncidentBehavior.RAISE_ON_FAIL)
|
|
451
|
+
elif action.type == models.AssertionActionTypeClass.RESOLVE_INCIDENT:
|
|
452
|
+
incident_behaviors.append(AssertionIncidentBehavior.RESOLVE_ON_PASS)
|
|
453
|
+
|
|
454
|
+
return incident_behaviors
|
|
455
|
+
|
|
456
|
+
@staticmethod
|
|
457
|
+
@abstractmethod
|
|
458
|
+
def _get_detection_mechanism(
|
|
459
|
+
assertion: Assertion,
|
|
460
|
+
monitor: Monitor,
|
|
461
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
462
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
463
|
+
"""Get the detection mechanism from the monitor and assertion.
|
|
464
|
+
|
|
465
|
+
This method should be implemented by each assertion class to handle
|
|
466
|
+
its specific detection mechanism logic.
|
|
467
|
+
|
|
468
|
+
Args:
|
|
469
|
+
assertion: The assertion entity
|
|
470
|
+
monitor: The monitor entity
|
|
471
|
+
default: Default detection mechanism to return if none is found
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
The detection mechanism or default if none is found
|
|
475
|
+
"""
|
|
476
|
+
pass
|
|
477
|
+
|
|
478
|
+
@staticmethod
|
|
479
|
+
def _has_valid_monitor_info(monitor: Monitor) -> bool:
|
|
480
|
+
"""Check if monitor has valid info and assertion monitor."""
|
|
481
|
+
|
|
482
|
+
def _warn_and_return_false(field_name: str) -> bool:
|
|
483
|
+
logger.warning(
|
|
484
|
+
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
485
|
+
)
|
|
486
|
+
return False
|
|
487
|
+
|
|
488
|
+
if monitor.info is None:
|
|
489
|
+
return _warn_and_return_false("info")
|
|
490
|
+
if monitor.info.assertionMonitor is None:
|
|
491
|
+
return _warn_and_return_false("assertionMonitor")
|
|
492
|
+
if (
|
|
493
|
+
monitor.info.assertionMonitor.assertions is None
|
|
494
|
+
or len(monitor.info.assertionMonitor.assertions) == 0
|
|
495
|
+
):
|
|
496
|
+
return _warn_and_return_false("assertionMonitor.assertions")
|
|
497
|
+
|
|
498
|
+
return True
|
|
499
|
+
|
|
500
|
+
@staticmethod
|
|
501
|
+
def _get_assertion_parameters(
|
|
502
|
+
monitor: Monitor,
|
|
503
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
504
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
505
|
+
"""Get the assertion parameters from the monitor."""
|
|
506
|
+
# We know these are not None from _has_valid_monitor_info check
|
|
507
|
+
assert (
|
|
508
|
+
monitor is not None
|
|
509
|
+
and monitor.info is not None
|
|
510
|
+
and monitor.info.assertionMonitor is not None
|
|
511
|
+
)
|
|
512
|
+
assertion_monitor = monitor.info.assertionMonitor
|
|
513
|
+
assert (
|
|
514
|
+
assertion_monitor is not None and assertion_monitor.assertions is not None
|
|
515
|
+
)
|
|
516
|
+
assertions = assertion_monitor.assertions
|
|
517
|
+
|
|
518
|
+
if assertions[0].parameters is None:
|
|
519
|
+
logger.warning(
|
|
520
|
+
f"Monitor {monitor.urn} does not have a assertionMonitor.assertions[0].parameters, defaulting detection mechanism to {default}"
|
|
521
|
+
)
|
|
522
|
+
return None
|
|
523
|
+
return assertions[0].parameters
|
|
524
|
+
|
|
525
|
+
@staticmethod
|
|
526
|
+
def _get_created_by(assertion: Assertion) -> Optional[CorpUserUrn]:
|
|
527
|
+
if assertion.source is None:
|
|
528
|
+
logger.warning(f"Assertion {assertion.urn} does not have a source")
|
|
529
|
+
return None
|
|
530
|
+
if isinstance(assertion.source, models.AssertionSourceClass):
|
|
531
|
+
if assertion.source.created is None:
|
|
532
|
+
logger.warning(
|
|
533
|
+
f"Assertion {assertion.urn} does not have a created by in the source"
|
|
534
|
+
)
|
|
535
|
+
return None
|
|
536
|
+
return CorpUserUrn.from_string(assertion.source.created.actor)
|
|
537
|
+
elif isinstance(assertion.source, models.AssertionSourceTypeClass):
|
|
538
|
+
logger.warning(
|
|
539
|
+
f"Assertion {assertion.urn} has a source type with no created by"
|
|
540
|
+
)
|
|
541
|
+
return None
|
|
542
|
+
return None
|
|
543
|
+
|
|
544
|
+
@staticmethod
|
|
545
|
+
def _get_created_at(assertion: Assertion) -> Union[datetime, None]:
|
|
546
|
+
if assertion.source is None:
|
|
547
|
+
logger.warning(f"Assertion {assertion.urn} does not have a source")
|
|
548
|
+
return None
|
|
549
|
+
if isinstance(assertion.source, models.AssertionSourceClass):
|
|
550
|
+
if assertion.source.created is None:
|
|
551
|
+
logger.warning(
|
|
552
|
+
f"Assertion {assertion.urn} does not have a created by in the source"
|
|
553
|
+
)
|
|
554
|
+
return None
|
|
555
|
+
return parse_ts_millis(assertion.source.created.time)
|
|
556
|
+
elif isinstance(assertion.source, models.AssertionSourceTypeClass):
|
|
557
|
+
logger.warning(
|
|
558
|
+
f"Assertion {assertion.urn} has a source type with no created by"
|
|
559
|
+
)
|
|
560
|
+
return None
|
|
561
|
+
return None
|
|
562
|
+
|
|
563
|
+
@staticmethod
|
|
564
|
+
def _get_updated_by(assertion: Assertion) -> Optional[CorpUserUrn]:
|
|
565
|
+
if assertion.last_updated is None:
|
|
566
|
+
logger.warning(f"Assertion {assertion.urn} does not have a last updated")
|
|
567
|
+
return None
|
|
568
|
+
return CorpUserUrn.from_string(assertion.last_updated.actor)
|
|
569
|
+
|
|
570
|
+
@staticmethod
|
|
571
|
+
def _get_updated_at(assertion: Assertion) -> Union[datetime, None]:
|
|
572
|
+
if assertion.last_updated is None:
|
|
573
|
+
logger.warning(f"Assertion {assertion.urn} does not have a last updated")
|
|
574
|
+
return None
|
|
575
|
+
return parse_ts_millis(assertion.last_updated.time)
|
|
576
|
+
|
|
577
|
+
@staticmethod
|
|
578
|
+
def _get_tags(assertion: Assertion) -> list[TagUrn]:
|
|
579
|
+
return [TagUrn.from_string(t.tag) for t in assertion.tags or []]
|
|
580
|
+
|
|
581
|
+
@staticmethod
|
|
582
|
+
def _get_mode(monitor: Monitor) -> AssertionMode:
|
|
583
|
+
if monitor.info is None:
|
|
584
|
+
logger.warning(
|
|
585
|
+
f"Monitor {monitor.urn} does not have a info, defaulting status to INACTIVE"
|
|
586
|
+
)
|
|
587
|
+
return AssertionMode.INACTIVE
|
|
588
|
+
return AssertionMode(monitor.info.status.mode)
|
|
589
|
+
|
|
590
|
+
@classmethod
|
|
591
|
+
@abstractmethod
|
|
592
|
+
def _from_entities(
|
|
593
|
+
cls,
|
|
594
|
+
assertion: Assertion,
|
|
595
|
+
monitor: Monitor,
|
|
596
|
+
) -> Self:
|
|
597
|
+
"""
|
|
598
|
+
Create an assertion from the assertion and monitor entities.
|
|
599
|
+
|
|
600
|
+
Note: This is a private method since it is intended to be called internally by the client.
|
|
601
|
+
"""
|
|
602
|
+
pass
|
|
603
|
+
|
|
604
|
+
@staticmethod
|
|
605
|
+
def _get_additional_filter(assertion: Assertion) -> Optional[str]:
|
|
606
|
+
"""Get the additional filter SQL from the assertion."""
|
|
607
|
+
if assertion.info is None:
|
|
608
|
+
logger.warning(
|
|
609
|
+
f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
|
|
610
|
+
)
|
|
611
|
+
return None
|
|
612
|
+
if (
|
|
613
|
+
not isinstance(
|
|
614
|
+
assertion.info,
|
|
615
|
+
_AssertionPublic._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
|
|
616
|
+
)
|
|
617
|
+
or assertion.info.filter is None
|
|
618
|
+
):
|
|
619
|
+
logger.warning(
|
|
620
|
+
f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
|
|
621
|
+
)
|
|
622
|
+
return None
|
|
623
|
+
if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
|
|
624
|
+
raise SDKNotYetSupportedError(
|
|
625
|
+
f"DatasetFilterType {assertion.info.filter.type}"
|
|
626
|
+
)
|
|
627
|
+
return assertion.info.filter.sql
|
|
628
|
+
|
|
629
|
+
@staticmethod
|
|
630
|
+
def _get_field_value_detection_mechanism(
|
|
631
|
+
assertion: Assertion,
|
|
632
|
+
parameters: models.AssertionEvaluationParametersClass,
|
|
633
|
+
) -> _DetectionMechanismTypes:
|
|
634
|
+
"""Get the detection mechanism for field value based freshness."""
|
|
635
|
+
# We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
|
|
636
|
+
assert parameters.datasetFreshnessParameters is not None
|
|
637
|
+
field = parameters.datasetFreshnessParameters.field
|
|
638
|
+
|
|
639
|
+
if field is None or field.kind is None:
|
|
640
|
+
logger.warning(
|
|
641
|
+
f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
642
|
+
)
|
|
643
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
644
|
+
|
|
645
|
+
column_name = field.path
|
|
646
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
647
|
+
|
|
648
|
+
if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
|
|
649
|
+
return DetectionMechanism.LAST_MODIFIED_COLUMN(
|
|
650
|
+
column_name=column_name, additional_filter=additional_filter
|
|
651
|
+
)
|
|
652
|
+
elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
|
|
653
|
+
return DetectionMechanism.HIGH_WATERMARK_COLUMN(
|
|
654
|
+
column_name=column_name, additional_filter=additional_filter
|
|
655
|
+
)
|
|
656
|
+
else:
|
|
657
|
+
raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
|
|
658
|
+
|
|
659
|
+
@staticmethod
|
|
660
|
+
def _warn_and_return_default_detection_mechanism(
|
|
661
|
+
monitor: Monitor,
|
|
662
|
+
field_name: str,
|
|
663
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
664
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
665
|
+
"""Helper method to log a warning and return default detection mechanism."""
|
|
666
|
+
logger.warning(
|
|
667
|
+
f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
|
|
668
|
+
)
|
|
669
|
+
return default
|
|
670
|
+
|
|
671
|
+
@staticmethod
|
|
672
|
+
def _check_valid_monitor_info(
|
|
673
|
+
monitor: Monitor,
|
|
674
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
675
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
676
|
+
"""Check if monitor has valid info and get assertion parameters.
|
|
677
|
+
|
|
678
|
+
Returns:
|
|
679
|
+
The assertion parameters if monitor info is valid, None otherwise.
|
|
680
|
+
"""
|
|
681
|
+
if not _AssertionPublic._has_valid_monitor_info(monitor):
|
|
682
|
+
return None
|
|
683
|
+
|
|
684
|
+
parameters = _AssertionPublic._get_assertion_parameters(monitor)
|
|
685
|
+
if parameters is None:
|
|
686
|
+
return None
|
|
687
|
+
|
|
688
|
+
return parameters
|
|
689
|
+
|
|
690
|
+
@staticmethod
|
|
691
|
+
def _get_validated_detection_context(
|
|
692
|
+
monitor: Monitor,
|
|
693
|
+
assertion: Assertion,
|
|
694
|
+
expected_parameters_type: str,
|
|
695
|
+
expected_info_class: type,
|
|
696
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
697
|
+
) -> Optional[models.AssertionEvaluationParametersClass]:
|
|
698
|
+
"""
|
|
699
|
+
Validate and extract the detection context (parameters) for detection mechanism logic.
|
|
700
|
+
Returns the parameters if all checks pass, otherwise None.
|
|
701
|
+
"""
|
|
702
|
+
parameters = _AssertionPublic._check_valid_monitor_info(monitor, default)
|
|
703
|
+
if parameters is None:
|
|
704
|
+
return None
|
|
705
|
+
if parameters.type != expected_parameters_type:
|
|
706
|
+
logger.warning(
|
|
707
|
+
f"Expected {expected_parameters_type} parameters type, got {parameters.type}, defaulting detection mechanism to {default}"
|
|
708
|
+
)
|
|
709
|
+
return None
|
|
710
|
+
if assertion.info is None:
|
|
711
|
+
_AssertionPublic._warn_and_return_default_detection_mechanism(
|
|
712
|
+
monitor, "info", default
|
|
713
|
+
)
|
|
714
|
+
return None
|
|
715
|
+
if not isinstance(assertion.info, expected_info_class):
|
|
716
|
+
logger.warning(
|
|
717
|
+
f"Expected {expected_info_class.__name__}, got {type(assertion.info).__name__}, defaulting detection mechanism to {default}"
|
|
718
|
+
)
|
|
719
|
+
return None
|
|
720
|
+
return parameters
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
|
|
724
|
+
"""
|
|
725
|
+
A class that represents a smart freshness assertion.
|
|
726
|
+
"""
|
|
727
|
+
|
|
728
|
+
def __init__(
|
|
729
|
+
self,
|
|
730
|
+
*,
|
|
731
|
+
urn: AssertionUrn,
|
|
732
|
+
dataset_urn: DatasetUrn,
|
|
733
|
+
display_name: str,
|
|
734
|
+
mode: AssertionMode,
|
|
735
|
+
schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
|
|
736
|
+
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
737
|
+
exclusion_windows: list[ExclusionWindowTypes],
|
|
738
|
+
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
739
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
740
|
+
detection_mechanism: Optional[
|
|
741
|
+
_DetectionMechanismTypes
|
|
742
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
743
|
+
tags: list[TagUrn],
|
|
744
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
745
|
+
created_at: Union[datetime, None] = None,
|
|
746
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
747
|
+
updated_at: Optional[datetime] = None,
|
|
748
|
+
):
|
|
749
|
+
"""
|
|
750
|
+
Initialize a smart freshness assertion.
|
|
751
|
+
|
|
752
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
753
|
+
To update an assertion, use the `upsert_*` method.
|
|
754
|
+
Args:
|
|
755
|
+
urn: The urn of the assertion.
|
|
756
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
757
|
+
display_name: The display name of the assertion.
|
|
758
|
+
mode: The mode of the assertion (active, inactive).
|
|
759
|
+
schedule: The schedule of the assertion.
|
|
760
|
+
sensitivity: The sensitivity of the assertion (low, medium, high).
|
|
761
|
+
exclusion_windows: The exclusion windows of the assertion.
|
|
762
|
+
training_data_lookback_days: The max number of days of data to use for training the assertion.
|
|
763
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
764
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
765
|
+
tags: The tags applied to the assertion.
|
|
766
|
+
created_by: The urn of the user that created the assertion.
|
|
767
|
+
created_at: The timestamp of when the assertion was created.
|
|
768
|
+
updated_by: The urn of the user that updated the assertion.
|
|
769
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
770
|
+
"""
|
|
771
|
+
# Initialize the mixins first
|
|
772
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
773
|
+
_HasSmartFunctionality.__init__(
|
|
774
|
+
self,
|
|
775
|
+
sensitivity=sensitivity,
|
|
776
|
+
exclusion_windows=exclusion_windows,
|
|
777
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
778
|
+
)
|
|
779
|
+
# Then initialize the parent class
|
|
780
|
+
_AssertionPublic.__init__(
|
|
781
|
+
self,
|
|
782
|
+
urn=urn,
|
|
783
|
+
dataset_urn=dataset_urn,
|
|
784
|
+
display_name=display_name,
|
|
785
|
+
mode=mode,
|
|
786
|
+
incident_behavior=incident_behavior,
|
|
787
|
+
detection_mechanism=detection_mechanism,
|
|
788
|
+
created_by=created_by,
|
|
789
|
+
created_at=created_at,
|
|
790
|
+
updated_by=updated_by,
|
|
791
|
+
updated_at=updated_at,
|
|
792
|
+
tags=tags,
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
@classmethod
|
|
796
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
797
|
+
"""
|
|
798
|
+
Create a smart freshness assertion from the assertion and monitor entities.
|
|
799
|
+
|
|
800
|
+
Note: This is a private method since it is intended to be called internally by the client.
|
|
801
|
+
"""
|
|
802
|
+
return cls(
|
|
803
|
+
urn=assertion.urn,
|
|
804
|
+
dataset_urn=assertion.dataset,
|
|
805
|
+
display_name=assertion.description or "",
|
|
806
|
+
mode=cls._get_mode(monitor),
|
|
807
|
+
schedule=cls._get_schedule(monitor),
|
|
808
|
+
sensitivity=cls._get_sensitivity(monitor),
|
|
809
|
+
exclusion_windows=cls._get_exclusion_windows(monitor),
|
|
810
|
+
training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
|
|
811
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
812
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
813
|
+
created_by=cls._get_created_by(assertion),
|
|
814
|
+
created_at=cls._get_created_at(assertion),
|
|
815
|
+
updated_by=cls._get_updated_by(assertion),
|
|
816
|
+
updated_at=cls._get_updated_at(assertion),
|
|
817
|
+
tags=cls._get_tags(assertion),
|
|
818
|
+
)
|
|
819
|
+
|
|
820
|
+
@staticmethod
|
|
821
|
+
def _get_detection_mechanism(
|
|
822
|
+
assertion: Assertion,
|
|
823
|
+
monitor: Monitor,
|
|
824
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
825
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
826
|
+
"""Get the detection mechanism for freshness assertions."""
|
|
827
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
828
|
+
monitor,
|
|
829
|
+
assertion,
|
|
830
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
831
|
+
models.FreshnessAssertionInfoClass,
|
|
832
|
+
default,
|
|
833
|
+
)
|
|
834
|
+
if parameters is None:
|
|
835
|
+
return default
|
|
836
|
+
if parameters.datasetFreshnessParameters is None:
|
|
837
|
+
logger.warning(
|
|
838
|
+
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
839
|
+
)
|
|
840
|
+
return default
|
|
841
|
+
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
842
|
+
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
843
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
844
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
845
|
+
return DetectionMechanism.AUDIT_LOG
|
|
846
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
847
|
+
return _AssertionPublic._get_field_value_detection_mechanism(
|
|
848
|
+
assertion, parameters
|
|
849
|
+
)
|
|
850
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
851
|
+
return DetectionMechanism.DATAHUB_OPERATION
|
|
852
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
853
|
+
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
854
|
+
else:
|
|
855
|
+
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
|
|
859
|
+
"""
|
|
860
|
+
A class that represents a smart volume assertion.
|
|
861
|
+
"""
|
|
862
|
+
|
|
863
|
+
def __init__(
|
|
864
|
+
self,
|
|
865
|
+
*,
|
|
866
|
+
urn: AssertionUrn,
|
|
867
|
+
dataset_urn: DatasetUrn,
|
|
868
|
+
display_name: str,
|
|
869
|
+
mode: AssertionMode,
|
|
870
|
+
schedule: models.CronScheduleClass,
|
|
871
|
+
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
872
|
+
exclusion_windows: list[ExclusionWindowTypes],
|
|
873
|
+
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
874
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
875
|
+
detection_mechanism: Optional[
|
|
876
|
+
_DetectionMechanismTypes
|
|
877
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
878
|
+
tags: list[TagUrn],
|
|
879
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
880
|
+
created_at: Union[datetime, None] = None,
|
|
881
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
882
|
+
updated_at: Optional[datetime] = None,
|
|
883
|
+
):
|
|
884
|
+
"""
|
|
885
|
+
Initialize a smart volume assertion.
|
|
886
|
+
|
|
887
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
888
|
+
To update an assertion, use the `upsert_*` method.
|
|
889
|
+
Args:
|
|
890
|
+
urn: The urn of the assertion.
|
|
891
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
892
|
+
display_name: The display name of the assertion.
|
|
893
|
+
mode: The mode of the assertion (active, inactive).
|
|
894
|
+
schedule: The schedule of the assertion.
|
|
895
|
+
sensitivity: The sensitivity of the assertion (low, medium, high).
|
|
896
|
+
exclusion_windows: The exclusion windows of the assertion.
|
|
897
|
+
training_data_lookback_days: The max number of days of data to use for training the assertion.
|
|
898
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
899
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
900
|
+
tags: The tags applied to the assertion.
|
|
901
|
+
created_by: The urn of the user that created the assertion.
|
|
902
|
+
created_at: The timestamp of when the assertion was created.
|
|
903
|
+
updated_by: The urn of the user that updated the assertion.
|
|
904
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
905
|
+
"""
|
|
906
|
+
# Initialize the mixins first
|
|
907
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
908
|
+
_HasSmartFunctionality.__init__(
|
|
909
|
+
self,
|
|
910
|
+
sensitivity=sensitivity,
|
|
911
|
+
exclusion_windows=exclusion_windows,
|
|
912
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
913
|
+
)
|
|
914
|
+
# Then initialize the parent class
|
|
915
|
+
_AssertionPublic.__init__(
|
|
916
|
+
self,
|
|
917
|
+
urn=urn,
|
|
918
|
+
dataset_urn=dataset_urn,
|
|
919
|
+
display_name=display_name,
|
|
920
|
+
mode=mode,
|
|
921
|
+
incident_behavior=incident_behavior,
|
|
922
|
+
detection_mechanism=detection_mechanism,
|
|
923
|
+
created_by=created_by,
|
|
924
|
+
created_at=created_at,
|
|
925
|
+
updated_by=updated_by,
|
|
926
|
+
updated_at=updated_at,
|
|
927
|
+
tags=tags,
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
@classmethod
|
|
931
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
932
|
+
"""
|
|
933
|
+
Create a smart freshness assertion from the assertion and monitor entities.
|
|
934
|
+
|
|
935
|
+
Note: This is a private method since it is intended to be called internally by the client.
|
|
936
|
+
"""
|
|
937
|
+
return cls(
|
|
938
|
+
urn=assertion.urn,
|
|
939
|
+
dataset_urn=assertion.dataset,
|
|
940
|
+
display_name=assertion.description or "",
|
|
941
|
+
mode=cls._get_mode(monitor),
|
|
942
|
+
schedule=cls._get_schedule(monitor),
|
|
943
|
+
sensitivity=cls._get_sensitivity(monitor),
|
|
944
|
+
exclusion_windows=cls._get_exclusion_windows(monitor),
|
|
945
|
+
training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
|
|
946
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
947
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
948
|
+
created_by=cls._get_created_by(assertion),
|
|
949
|
+
created_at=cls._get_created_at(assertion),
|
|
950
|
+
updated_by=cls._get_updated_by(assertion),
|
|
951
|
+
updated_at=cls._get_updated_at(assertion),
|
|
952
|
+
tags=cls._get_tags(assertion),
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
@staticmethod
|
|
956
|
+
def _get_detection_mechanism(
|
|
957
|
+
assertion: Assertion,
|
|
958
|
+
monitor: Monitor,
|
|
959
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
960
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
961
|
+
"""Get the detection mechanism for volume assertions."""
|
|
962
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
963
|
+
monitor,
|
|
964
|
+
assertion,
|
|
965
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
966
|
+
models.VolumeAssertionInfoClass,
|
|
967
|
+
default,
|
|
968
|
+
)
|
|
969
|
+
if parameters is None:
|
|
970
|
+
return default
|
|
971
|
+
if parameters.datasetVolumeParameters is None:
|
|
972
|
+
logger.warning(
|
|
973
|
+
f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
974
|
+
)
|
|
975
|
+
if default is None:
|
|
976
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
977
|
+
else:
|
|
978
|
+
return default
|
|
979
|
+
source_type = parameters.datasetVolumeParameters.sourceType
|
|
980
|
+
if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
|
|
981
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
982
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
|
|
983
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
984
|
+
return DetectionMechanism.QUERY(additional_filter=additional_filter)
|
|
985
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
|
|
986
|
+
return DetectionMechanism.DATASET_PROFILE
|
|
987
|
+
else:
|
|
988
|
+
raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
class VolumeAssertion(_HasSchedule, _AssertionPublic):
|
|
992
|
+
"""
|
|
993
|
+
A class that represents a volume assertion.
|
|
994
|
+
"""
|
|
995
|
+
|
|
996
|
+
def __init__(
|
|
997
|
+
self,
|
|
998
|
+
*,
|
|
999
|
+
urn: AssertionUrn,
|
|
1000
|
+
dataset_urn: DatasetUrn,
|
|
1001
|
+
display_name: str,
|
|
1002
|
+
mode: AssertionMode,
|
|
1003
|
+
schedule: models.CronScheduleClass,
|
|
1004
|
+
definition: _VolumeAssertionDefinitionTypes,
|
|
1005
|
+
tags: list[TagUrn],
|
|
1006
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
1007
|
+
detection_mechanism: Optional[
|
|
1008
|
+
_DetectionMechanismTypes
|
|
1009
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
1010
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
1011
|
+
created_at: Union[datetime, None] = None,
|
|
1012
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
1013
|
+
updated_at: Optional[datetime] = None,
|
|
1014
|
+
):
|
|
1015
|
+
"""
|
|
1016
|
+
Initialize a volume assertion.
|
|
1017
|
+
|
|
1018
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
1019
|
+
To update an assertion, use the `upsert_*` method.
|
|
1020
|
+
Args:
|
|
1021
|
+
urn: The urn of the assertion.
|
|
1022
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
1023
|
+
display_name: The display name of the assertion.
|
|
1024
|
+
mode: The mode of the assertion (active, inactive).
|
|
1025
|
+
schedule: The schedule of the assertion.
|
|
1026
|
+
definition: The volume assertion definition (RowCountTotal or RowCountChange).
|
|
1027
|
+
tags: The tags applied to the assertion.
|
|
1028
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
1029
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
1030
|
+
created_by: The urn of the user that created the assertion.
|
|
1031
|
+
created_at: The timestamp of when the assertion was created.
|
|
1032
|
+
updated_by: The urn of the user that updated the assertion.
|
|
1033
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
1034
|
+
"""
|
|
1035
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
1036
|
+
_AssertionPublic.__init__(
|
|
1037
|
+
self,
|
|
1038
|
+
urn=urn,
|
|
1039
|
+
dataset_urn=dataset_urn,
|
|
1040
|
+
display_name=display_name,
|
|
1041
|
+
mode=mode,
|
|
1042
|
+
incident_behavior=incident_behavior,
|
|
1043
|
+
detection_mechanism=detection_mechanism,
|
|
1044
|
+
created_by=created_by,
|
|
1045
|
+
created_at=created_at,
|
|
1046
|
+
updated_by=updated_by,
|
|
1047
|
+
updated_at=updated_at,
|
|
1048
|
+
tags=tags,
|
|
1049
|
+
)
|
|
1050
|
+
self._definition = definition
|
|
1051
|
+
|
|
1052
|
+
@property
|
|
1053
|
+
def definition(self) -> _VolumeAssertionDefinitionTypes:
|
|
1054
|
+
return self._definition
|
|
1055
|
+
|
|
1056
|
+
@staticmethod
|
|
1057
|
+
def _get_volume_definition(
|
|
1058
|
+
assertion: Assertion,
|
|
1059
|
+
) -> _VolumeAssertionDefinitionTypes:
|
|
1060
|
+
"""Get volume assertion definition from a DataHub assertion entity."""
|
|
1061
|
+
return VolumeAssertionDefinition.from_assertion(assertion)
|
|
1062
|
+
|
|
1063
|
+
@staticmethod
|
|
1064
|
+
def _get_detection_mechanism(
|
|
1065
|
+
assertion: Assertion,
|
|
1066
|
+
monitor: Monitor,
|
|
1067
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
1068
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
1069
|
+
"""Get the detection mechanism for volume assertions."""
|
|
1070
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
1071
|
+
monitor,
|
|
1072
|
+
assertion,
|
|
1073
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
1074
|
+
models.VolumeAssertionInfoClass,
|
|
1075
|
+
default,
|
|
1076
|
+
)
|
|
1077
|
+
if parameters is None:
|
|
1078
|
+
return default
|
|
1079
|
+
if parameters.datasetVolumeParameters is None:
|
|
1080
|
+
logger.warning(
|
|
1081
|
+
f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
1082
|
+
)
|
|
1083
|
+
if default is None:
|
|
1084
|
+
return DEFAULT_DETECTION_MECHANISM
|
|
1085
|
+
else:
|
|
1086
|
+
return default
|
|
1087
|
+
source_type = parameters.datasetVolumeParameters.sourceType
|
|
1088
|
+
if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
|
|
1089
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
1090
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
|
|
1091
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
1092
|
+
return DetectionMechanism.QUERY(additional_filter=additional_filter)
|
|
1093
|
+
elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
|
|
1094
|
+
return DetectionMechanism.DATASET_PROFILE
|
|
1095
|
+
else:
|
|
1096
|
+
raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
|
|
1097
|
+
|
|
1098
|
+
@classmethod
|
|
1099
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
1100
|
+
"""
|
|
1101
|
+
Create a volume assertion from the assertion and monitor entities.
|
|
1102
|
+
"""
|
|
1103
|
+
return cls(
|
|
1104
|
+
urn=assertion.urn,
|
|
1105
|
+
dataset_urn=assertion.dataset,
|
|
1106
|
+
display_name=assertion.description or "",
|
|
1107
|
+
mode=cls._get_mode(monitor),
|
|
1108
|
+
schedule=cls._get_schedule(monitor),
|
|
1109
|
+
definition=cls._get_volume_definition(assertion),
|
|
1110
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1111
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
1112
|
+
created_by=cls._get_created_by(assertion),
|
|
1113
|
+
created_at=cls._get_created_at(assertion),
|
|
1114
|
+
updated_by=cls._get_updated_by(assertion),
|
|
1115
|
+
updated_at=cls._get_updated_at(assertion),
|
|
1116
|
+
tags=cls._get_tags(assertion),
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
class FreshnessAssertion(_HasSchedule, _AssertionPublic):
|
|
1121
|
+
"""
|
|
1122
|
+
A class that represents a freshness assertion.
|
|
1123
|
+
"""
|
|
1124
|
+
|
|
1125
|
+
def __init__(
|
|
1126
|
+
self,
|
|
1127
|
+
*,
|
|
1128
|
+
urn: AssertionUrn,
|
|
1129
|
+
dataset_urn: DatasetUrn,
|
|
1130
|
+
display_name: str,
|
|
1131
|
+
mode: AssertionMode,
|
|
1132
|
+
schedule: models.CronScheduleClass,
|
|
1133
|
+
freshness_schedule_check_type: Union[
|
|
1134
|
+
str, models.FreshnessAssertionScheduleTypeClass
|
|
1135
|
+
],
|
|
1136
|
+
lookback_window: Optional[TimeWindowSizeInputTypes],
|
|
1137
|
+
tags: list[TagUrn],
|
|
1138
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
1139
|
+
detection_mechanism: Optional[
|
|
1140
|
+
_DetectionMechanismTypes
|
|
1141
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
1142
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
1143
|
+
created_at: Union[datetime, None] = None,
|
|
1144
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
1145
|
+
updated_at: Optional[datetime] = None,
|
|
1146
|
+
):
|
|
1147
|
+
"""
|
|
1148
|
+
Initialize a freshness assertion.
|
|
1149
|
+
|
|
1150
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
1151
|
+
To update an assertion, use the `upsert_*` method.
|
|
1152
|
+
Args:
|
|
1153
|
+
urn: The urn of the assertion.
|
|
1154
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
1155
|
+
display_name: The display name of the assertion.
|
|
1156
|
+
mode: The mode of the assertion (active, inactive).
|
|
1157
|
+
schedule: The schedule of the assertion.
|
|
1158
|
+
freshness_schedule_check_type: The type of freshness schedule check to be used for the assertion.
|
|
1159
|
+
lookback_window: The lookback window to be used for the assertion.
|
|
1160
|
+
tags: The tags applied to the assertion.
|
|
1161
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
1162
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
1163
|
+
created_by: The urn of the user that created the assertion.
|
|
1164
|
+
created_at: The timestamp of when the assertion was created.
|
|
1165
|
+
updated_by: The urn of the user that updated the assertion.
|
|
1166
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
1167
|
+
"""
|
|
1168
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
1169
|
+
_AssertionPublic.__init__(
|
|
1170
|
+
self,
|
|
1171
|
+
urn=urn,
|
|
1172
|
+
dataset_urn=dataset_urn,
|
|
1173
|
+
display_name=display_name,
|
|
1174
|
+
mode=mode,
|
|
1175
|
+
incident_behavior=incident_behavior,
|
|
1176
|
+
detection_mechanism=detection_mechanism,
|
|
1177
|
+
created_by=created_by,
|
|
1178
|
+
created_at=created_at,
|
|
1179
|
+
updated_by=updated_by,
|
|
1180
|
+
updated_at=updated_at,
|
|
1181
|
+
tags=tags,
|
|
1182
|
+
)
|
|
1183
|
+
self._freshness_schedule_check_type = freshness_schedule_check_type
|
|
1184
|
+
self._lookback_window = lookback_window
|
|
1185
|
+
|
|
1186
|
+
@property
|
|
1187
|
+
def freshness_schedule_check_type(
|
|
1188
|
+
self,
|
|
1189
|
+
) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
|
|
1190
|
+
return self._freshness_schedule_check_type
|
|
1191
|
+
|
|
1192
|
+
@property
|
|
1193
|
+
def lookback_window(self) -> Optional[TimeWindowSizeInputTypes]:
|
|
1194
|
+
return self._lookback_window
|
|
1195
|
+
|
|
1196
|
+
@staticmethod
|
|
1197
|
+
def _get_freshness_schedule_check_type(
|
|
1198
|
+
assertion: Assertion,
|
|
1199
|
+
) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
|
|
1200
|
+
if assertion.info is None:
|
|
1201
|
+
raise SDKNotYetSupportedError(
|
|
1202
|
+
f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
|
|
1203
|
+
)
|
|
1204
|
+
if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
|
|
1205
|
+
if assertion.info.schedule is None:
|
|
1206
|
+
raise SDKNotYetSupportedError(
|
|
1207
|
+
f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
|
|
1208
|
+
)
|
|
1209
|
+
return assertion.info.schedule.type
|
|
1210
|
+
else:
|
|
1211
|
+
raise SDKNotYetSupportedError(
|
|
1212
|
+
f"Assertion {assertion.urn} is not a freshness assertion"
|
|
1213
|
+
)
|
|
1214
|
+
|
|
1215
|
+
@staticmethod
|
|
1216
|
+
def _get_lookback_window(
|
|
1217
|
+
assertion: Assertion,
|
|
1218
|
+
) -> Optional[models.FixedIntervalScheduleClass]:
|
|
1219
|
+
if assertion.info is None:
|
|
1220
|
+
raise SDKNotYetSupportedError(
|
|
1221
|
+
f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
|
|
1222
|
+
)
|
|
1223
|
+
if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
|
|
1224
|
+
if assertion.info.schedule is None:
|
|
1225
|
+
raise SDKNotYetSupportedError(
|
|
1226
|
+
f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
|
|
1227
|
+
)
|
|
1228
|
+
return assertion.info.schedule.fixedInterval
|
|
1229
|
+
else:
|
|
1230
|
+
raise SDKNotYetSupportedError(
|
|
1231
|
+
f"Assertion {assertion.urn} is not a freshness assertion"
|
|
1232
|
+
)
|
|
1233
|
+
|
|
1234
|
+
@staticmethod
|
|
1235
|
+
def _get_detection_mechanism(
|
|
1236
|
+
assertion: Assertion,
|
|
1237
|
+
monitor: Monitor,
|
|
1238
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
1239
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
1240
|
+
"""Get the detection mechanism for freshness assertions."""
|
|
1241
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
1242
|
+
monitor,
|
|
1243
|
+
assertion,
|
|
1244
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
1245
|
+
models.FreshnessAssertionInfoClass,
|
|
1246
|
+
default,
|
|
1247
|
+
)
|
|
1248
|
+
if parameters is None:
|
|
1249
|
+
return default
|
|
1250
|
+
if parameters.datasetFreshnessParameters is None:
|
|
1251
|
+
logger.warning(
|
|
1252
|
+
f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
|
|
1253
|
+
)
|
|
1254
|
+
return default
|
|
1255
|
+
source_type = parameters.datasetFreshnessParameters.sourceType
|
|
1256
|
+
if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
|
|
1257
|
+
return DetectionMechanism.INFORMATION_SCHEMA
|
|
1258
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
|
|
1259
|
+
return DetectionMechanism.AUDIT_LOG
|
|
1260
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
|
|
1261
|
+
return _AssertionPublic._get_field_value_detection_mechanism(
|
|
1262
|
+
assertion, parameters
|
|
1263
|
+
)
|
|
1264
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
|
|
1265
|
+
return DetectionMechanism.DATAHUB_OPERATION
|
|
1266
|
+
elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
|
|
1267
|
+
raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
|
|
1268
|
+
else:
|
|
1269
|
+
raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
|
|
1270
|
+
|
|
1271
|
+
@classmethod
|
|
1272
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
1273
|
+
"""
|
|
1274
|
+
Create a freshness assertion from the assertion and monitor entities.
|
|
1275
|
+
"""
|
|
1276
|
+
return cls(
|
|
1277
|
+
urn=assertion.urn,
|
|
1278
|
+
dataset_urn=assertion.dataset,
|
|
1279
|
+
display_name=assertion.description or "",
|
|
1280
|
+
mode=cls._get_mode(monitor),
|
|
1281
|
+
schedule=cls._get_schedule(monitor),
|
|
1282
|
+
freshness_schedule_check_type=cls._get_freshness_schedule_check_type(
|
|
1283
|
+
assertion
|
|
1284
|
+
),
|
|
1285
|
+
lookback_window=cls._get_lookback_window(assertion),
|
|
1286
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1287
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
1288
|
+
created_by=cls._get_created_by(assertion),
|
|
1289
|
+
created_at=cls._get_created_at(assertion),
|
|
1290
|
+
updated_by=cls._get_updated_by(assertion),
|
|
1291
|
+
updated_at=cls._get_updated_at(assertion),
|
|
1292
|
+
tags=cls._get_tags(assertion),
|
|
1293
|
+
)
|
|
1294
|
+
|
|
1295
|
+
|
|
1296
|
+
class SqlAssertion(_AssertionPublic, _HasSchedule):
|
|
1297
|
+
"""
|
|
1298
|
+
A class that represents a SQL assertion.
|
|
1299
|
+
"""
|
|
1300
|
+
|
|
1301
|
+
def __init__(
|
|
1302
|
+
self,
|
|
1303
|
+
*,
|
|
1304
|
+
urn: AssertionUrn,
|
|
1305
|
+
dataset_urn: DatasetUrn,
|
|
1306
|
+
display_name: str,
|
|
1307
|
+
mode: AssertionMode,
|
|
1308
|
+
statement: str,
|
|
1309
|
+
criteria: SqlAssertionCriteria,
|
|
1310
|
+
schedule: models.CronScheduleClass,
|
|
1311
|
+
tags: list[TagUrn],
|
|
1312
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
1313
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
1314
|
+
created_at: Union[datetime, None] = None,
|
|
1315
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
1316
|
+
updated_at: Optional[datetime] = None,
|
|
1317
|
+
):
|
|
1318
|
+
"""
|
|
1319
|
+
Initialize a SQL assertion.
|
|
1320
|
+
|
|
1321
|
+
Note: Values can be accessed, but not set on the assertion object.
|
|
1322
|
+
To update an assertion, use the `upsert_*` method.
|
|
1323
|
+
Args:
|
|
1324
|
+
urn: The urn of the assertion.
|
|
1325
|
+
dataset_urn: The urn of the dataset that the assertion is for.
|
|
1326
|
+
display_name: The display name of the assertion.
|
|
1327
|
+
mode: The mode of the assertion (active, inactive).
|
|
1328
|
+
statement: The SQL statement to be used for the assertion.
|
|
1329
|
+
criteria: The criteria to be used for the assertion.
|
|
1330
|
+
schedule: The schedule of the assertion.
|
|
1331
|
+
tags: The tags applied to the assertion.
|
|
1332
|
+
incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
|
|
1333
|
+
created_by: The urn of the user that created the assertion.
|
|
1334
|
+
created_at: The timestamp of when the assertion was created.
|
|
1335
|
+
updated_by: The urn of the user that updated the assertion.
|
|
1336
|
+
updated_at: The timestamp of when the assertion was updated.
|
|
1337
|
+
"""
|
|
1338
|
+
# Initialize the mixins first
|
|
1339
|
+
_AssertionPublic.__init__(
|
|
1340
|
+
self,
|
|
1341
|
+
urn=urn,
|
|
1342
|
+
dataset_urn=dataset_urn,
|
|
1343
|
+
display_name=display_name,
|
|
1344
|
+
mode=mode,
|
|
1345
|
+
tags=tags,
|
|
1346
|
+
incident_behavior=incident_behavior,
|
|
1347
|
+
created_by=created_by,
|
|
1348
|
+
created_at=created_at,
|
|
1349
|
+
updated_by=updated_by,
|
|
1350
|
+
updated_at=updated_at,
|
|
1351
|
+
)
|
|
1352
|
+
_HasSchedule.__init__(self, schedule=schedule)
|
|
1353
|
+
# Then initialize the parent class
|
|
1354
|
+
self._statement = statement
|
|
1355
|
+
self._criteria = criteria
|
|
1356
|
+
|
|
1357
|
+
@property
|
|
1358
|
+
def statement(self) -> str:
|
|
1359
|
+
return self._statement
|
|
1360
|
+
|
|
1361
|
+
@property
|
|
1362
|
+
def criteria_type(self) -> Union[SqlAssertionType, str]:
|
|
1363
|
+
return self._criteria.type
|
|
1364
|
+
|
|
1365
|
+
@property
|
|
1366
|
+
def criteria_change_type(self) -> Optional[Union[SqlAssertionChangeType, str]]:
|
|
1367
|
+
return self._criteria.change_type
|
|
1368
|
+
|
|
1369
|
+
@property
|
|
1370
|
+
def criteria_operator(self) -> Union[SqlAssertionOperator, str]:
|
|
1371
|
+
return self._criteria.operator
|
|
1372
|
+
|
|
1373
|
+
@property
|
|
1374
|
+
def criteria_parameters(
|
|
1375
|
+
self,
|
|
1376
|
+
) -> Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]:
|
|
1377
|
+
return self._criteria.parameters
|
|
1378
|
+
|
|
1379
|
+
@staticmethod
|
|
1380
|
+
def _get_detection_mechanism(
|
|
1381
|
+
assertion: Assertion,
|
|
1382
|
+
monitor: Monitor,
|
|
1383
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
1384
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
1385
|
+
"""Sql assertions do not have a detection mechanism."""
|
|
1386
|
+
return None
|
|
1387
|
+
|
|
1388
|
+
@staticmethod
|
|
1389
|
+
def _get_statement(assertion: Assertion) -> str:
|
|
1390
|
+
if assertion.info is None:
|
|
1391
|
+
raise SDKNotYetSupportedError(
|
|
1392
|
+
f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
|
|
1393
|
+
)
|
|
1394
|
+
if isinstance(assertion.info, models.SqlAssertionInfoClass):
|
|
1395
|
+
return assertion.info.statement
|
|
1396
|
+
else:
|
|
1397
|
+
raise SDKNotYetSupportedError(
|
|
1398
|
+
f"Assertion {assertion.urn} is not a SQL assertion"
|
|
1399
|
+
)
|
|
1400
|
+
|
|
1401
|
+
@staticmethod
|
|
1402
|
+
def _get_criteria(assertion: Assertion) -> SqlAssertionCriteria:
|
|
1403
|
+
if assertion.info is None:
|
|
1404
|
+
raise SDKNotYetSupportedError(
|
|
1405
|
+
f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
|
|
1406
|
+
)
|
|
1407
|
+
if isinstance(assertion.info, models.SqlAssertionInfoClass):
|
|
1408
|
+
parameters: Union[float, tuple[float, float]]
|
|
1409
|
+
if assertion.info.parameters.value is not None:
|
|
1410
|
+
parameters = float(assertion.info.parameters.value.value)
|
|
1411
|
+
elif (
|
|
1412
|
+
assertion.info.parameters.maxValue is not None
|
|
1413
|
+
and assertion.info.parameters.minValue is not None
|
|
1414
|
+
):
|
|
1415
|
+
# min and max values are in the order of min, max
|
|
1416
|
+
parameters = (
|
|
1417
|
+
float(assertion.info.parameters.minValue.value),
|
|
1418
|
+
float(assertion.info.parameters.maxValue.value),
|
|
1419
|
+
)
|
|
1420
|
+
else:
|
|
1421
|
+
raise SDKNotYetSupportedError(
|
|
1422
|
+
f"Assertion {assertion.urn} does not have a valid parameters for the SQL assertion"
|
|
1423
|
+
)
|
|
1424
|
+
|
|
1425
|
+
return SqlAssertionCriteria(
|
|
1426
|
+
type=assertion.info.type
|
|
1427
|
+
if isinstance(assertion.info.type, str)
|
|
1428
|
+
else str(assertion.info.type),
|
|
1429
|
+
change_type=assertion.info.changeType
|
|
1430
|
+
if assertion.info.changeType is None
|
|
1431
|
+
else (
|
|
1432
|
+
assertion.info.changeType
|
|
1433
|
+
if isinstance(assertion.info.changeType, str)
|
|
1434
|
+
else str(assertion.info.changeType)
|
|
1435
|
+
),
|
|
1436
|
+
operator=assertion.info.operator
|
|
1437
|
+
if isinstance(assertion.info.operator, str)
|
|
1438
|
+
else str(assertion.info.operator),
|
|
1439
|
+
parameters=parameters,
|
|
1440
|
+
)
|
|
1441
|
+
else:
|
|
1442
|
+
raise SDKNotYetSupportedError(
|
|
1443
|
+
f"Assertion {assertion.urn} is not a SQL assertion"
|
|
1444
|
+
)
|
|
1445
|
+
|
|
1446
|
+
@classmethod
|
|
1447
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
1448
|
+
"""
|
|
1449
|
+
Create a SQL assertion from the assertion and monitor entities.
|
|
1450
|
+
"""
|
|
1451
|
+
return cls(
|
|
1452
|
+
urn=assertion.urn,
|
|
1453
|
+
dataset_urn=assertion.dataset,
|
|
1454
|
+
display_name=assertion.description or "",
|
|
1455
|
+
mode=cls._get_mode(monitor),
|
|
1456
|
+
statement=cls._get_statement(assertion),
|
|
1457
|
+
criteria=cls._get_criteria(assertion),
|
|
1458
|
+
schedule=cls._get_schedule(
|
|
1459
|
+
monitor, default=DEFAULT_EVERY_SIX_HOURS_SCHEDULE
|
|
1460
|
+
),
|
|
1461
|
+
tags=cls._get_tags(assertion),
|
|
1462
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
1463
|
+
created_by=cls._get_created_by(assertion),
|
|
1464
|
+
created_at=cls._get_created_at(assertion),
|
|
1465
|
+
updated_by=cls._get_updated_by(assertion),
|
|
1466
|
+
updated_at=cls._get_updated_at(assertion),
|
|
1467
|
+
)
|