acryl-datahub-cloud 0.3.12rc1__py3-none-any.whl → 0.3.12rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +524 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1842 -1786
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
- acryl_datahub_cloud/metadata/schema.avsc +24747 -23945
- acryl_datahub_cloud/metadata/schema_classes.py +1031 -631
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +31 -7
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +27 -6
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +31 -7
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +27 -6
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +25 -0
- acryl_datahub_cloud/{_sdk_extras → sdk}/assertion.py +202 -45
- acryl_datahub_cloud/{_sdk_extras → sdk}/assertion_input.py +344 -83
- acryl_datahub_cloud/{_sdk_extras → sdk}/assertions_client.py +635 -199
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/{_sdk_extras → sdk}/entities/assertion.py +1 -1
- acryl_datahub_cloud/{_sdk_extras → sdk}/subscription_client.py +146 -33
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/METADATA +48 -43
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/RECORD +69 -54
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/entry_points.txt +1 -0
- acryl_datahub_cloud/_sdk_extras/__init__.py +0 -19
- /acryl_datahub_cloud/{_sdk_extras/entities → datahub_forms_notifications}/__init__.py +0 -0
- /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/monitor.py +0 -0
- /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/subscription.py +0 -0
- /acryl_datahub_cloud/{_sdk_extras → sdk}/errors.py +0 -0
- /acryl_datahub_cloud/{_sdk_extras → sdk}/resolver_client.py +0 -0
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/top_level.txt +0 -0
|
@@ -4,20 +4,26 @@ import logging
|
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
5
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
6
6
|
|
|
7
|
-
from acryl_datahub_cloud.
|
|
7
|
+
from acryl_datahub_cloud.sdk.assertion import (
|
|
8
|
+
AssertionMode,
|
|
8
9
|
SmartFreshnessAssertion,
|
|
10
|
+
SmartVolumeAssertion,
|
|
11
|
+
_AssertionPublic,
|
|
9
12
|
)
|
|
10
|
-
from acryl_datahub_cloud.
|
|
13
|
+
from acryl_datahub_cloud.sdk.assertion_input import (
|
|
11
14
|
AssertionIncidentBehavior,
|
|
12
15
|
DetectionMechanismInputTypes,
|
|
13
16
|
ExclusionWindowInputTypes,
|
|
14
17
|
InferenceSensitivity,
|
|
18
|
+
_AssertionInput,
|
|
15
19
|
_SmartFreshnessAssertionInput,
|
|
20
|
+
_SmartVolumeAssertionInput,
|
|
16
21
|
)
|
|
17
|
-
from acryl_datahub_cloud.
|
|
18
|
-
from acryl_datahub_cloud.
|
|
19
|
-
from acryl_datahub_cloud.
|
|
22
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
|
|
23
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
24
|
+
from acryl_datahub_cloud.sdk.errors import SDKUsageError
|
|
20
25
|
from datahub.errors import ItemNotFoundError
|
|
26
|
+
from datahub.metadata import schema_classes as models
|
|
21
27
|
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, MonitorUrn
|
|
22
28
|
|
|
23
29
|
if TYPE_CHECKING:
|
|
@@ -34,146 +40,13 @@ class AssertionsClient:
|
|
|
34
40
|
self.client = client
|
|
35
41
|
_print_experimental_warning()
|
|
36
42
|
|
|
37
|
-
def
|
|
38
|
-
self,
|
|
39
|
-
*,
|
|
40
|
-
dataset_urn: Union[str, DatasetUrn],
|
|
41
|
-
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
42
|
-
display_name: Optional[str] = None,
|
|
43
|
-
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
44
|
-
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
45
|
-
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
46
|
-
training_data_lookback_days: Optional[int] = None,
|
|
47
|
-
incident_behavior: Optional[
|
|
48
|
-
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
49
|
-
] = None,
|
|
50
|
-
tags: Optional[TagsInputType] = None,
|
|
51
|
-
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
52
|
-
) -> SmartFreshnessAssertion:
|
|
53
|
-
"""Upsert a smart freshness assertion.
|
|
54
|
-
|
|
55
|
-
Note: keyword arguments are required.
|
|
56
|
-
|
|
57
|
-
Upsert is a combination of create and update. If the assertion does not exist, it will be created.
|
|
58
|
-
If it does exist, it will be overwritten with the input values. If the input value is None,
|
|
59
|
-
the existing value will be overridden with a default value.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
dataset_urn: The urn of the dataset to be monitored.
|
|
63
|
-
urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
|
|
64
|
-
will be _created_ in the DataHub instance.
|
|
65
|
-
display_name: The display name of the assertion. If not provided, a random display name
|
|
66
|
-
will be generated.
|
|
67
|
-
detection_mechanism: The detection mechanism to be used for the assertion. Information
|
|
68
|
-
schema is recommended. Valid values are:
|
|
69
|
-
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
70
|
-
- "audit_log" or DetectionMechanism.AUDIT_LOG
|
|
71
|
-
- {
|
|
72
|
-
"type": "last_modified_column",
|
|
73
|
-
"column_name": "last_modified",
|
|
74
|
-
"additional_filter": "last_modified > '2021-01-01'",
|
|
75
|
-
} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
|
|
76
|
-
additional_filter='last_modified > 2021-01-01')
|
|
77
|
-
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
78
|
-
sensitivity: The sensitivity to be applied to the assertion. Valid values are:
|
|
79
|
-
- "low" or InferenceSensitivity.LOW
|
|
80
|
-
- "medium" or InferenceSensitivity.MEDIUM
|
|
81
|
-
- "high" or InferenceSensitivity.HIGH
|
|
82
|
-
exclusion_windows: The exclusion windows to be applied to the assertion, currently only
|
|
83
|
-
fixed range exclusion windows are supported. Valid values are:
|
|
84
|
-
- from datetime.datetime objects: {
|
|
85
|
-
"start": "datetime(2025, 1, 1, 0, 0, 0)",
|
|
86
|
-
"end": "datetime(2025, 1, 2, 0, 0, 0)",
|
|
87
|
-
}
|
|
88
|
-
- from string datetimes: {
|
|
89
|
-
"start": "2025-01-01T00:00:00",
|
|
90
|
-
"end": "2025-01-02T00:00:00",
|
|
91
|
-
}
|
|
92
|
-
- from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
|
|
93
|
-
start=datetime(2025, 1, 1, 0, 0, 0),
|
|
94
|
-
end=datetime(2025, 1, 2, 0, 0, 0)
|
|
95
|
-
)
|
|
96
|
-
training_data_lookback_days: The training data lookback days to be applied to the
|
|
97
|
-
assertion as an integer.
|
|
98
|
-
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
99
|
-
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
100
|
-
- "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
101
|
-
tags: The tags to be applied to the assertion. Valid values are:
|
|
102
|
-
- a list of strings (strings will be converted to TagUrn objects)
|
|
103
|
-
- a list of TagUrn objects
|
|
104
|
-
- a list of TagAssociationClass objects
|
|
105
|
-
updated_by: Optional urn of the user who updated the assertion. The format is
|
|
106
|
-
"urn:li:corpuser:<username>", which you can find on the Users & Groups page.
|
|
107
|
-
The default is the datahub system user.
|
|
108
|
-
TODO: Retrieve the SDK user as the default instead of the datahub system user.
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
SmartFreshnessAssertion: The created or updated assertion.
|
|
112
|
-
"""
|
|
113
|
-
_print_experimental_warning()
|
|
114
|
-
now_utc = datetime.now(timezone.utc)
|
|
115
|
-
|
|
116
|
-
if updated_by is None:
|
|
117
|
-
logger.warning(
|
|
118
|
-
f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
119
|
-
)
|
|
120
|
-
updated_by = DEFAULT_CREATED_BY
|
|
121
|
-
|
|
122
|
-
# 1. If urn is not set, create a new assertion
|
|
123
|
-
if urn is None:
|
|
124
|
-
logger.info("URN is not set, creating a new assertion")
|
|
125
|
-
return self.create_smart_freshness_assertion(
|
|
126
|
-
dataset_urn=dataset_urn,
|
|
127
|
-
display_name=display_name,
|
|
128
|
-
detection_mechanism=detection_mechanism,
|
|
129
|
-
sensitivity=sensitivity,
|
|
130
|
-
exclusion_windows=exclusion_windows,
|
|
131
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
132
|
-
incident_behavior=incident_behavior,
|
|
133
|
-
tags=tags,
|
|
134
|
-
created_by=updated_by,
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
# 2. If urn is set, first validate the input:
|
|
138
|
-
assertion_input = _SmartFreshnessAssertionInput(
|
|
139
|
-
urn=urn,
|
|
140
|
-
entity_client=self.client.entities,
|
|
141
|
-
dataset_urn=dataset_urn,
|
|
142
|
-
display_name=display_name,
|
|
143
|
-
detection_mechanism=detection_mechanism,
|
|
144
|
-
sensitivity=sensitivity,
|
|
145
|
-
exclusion_windows=exclusion_windows,
|
|
146
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
147
|
-
incident_behavior=incident_behavior,
|
|
148
|
-
tags=tags,
|
|
149
|
-
created_by=updated_by, # This will be overridden by the actual created_by
|
|
150
|
-
created_at=now_utc, # This will be overridden by the actual created_at
|
|
151
|
-
updated_by=updated_by,
|
|
152
|
-
updated_at=now_utc,
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
# 3. Upsert the assertion and monitor entities:
|
|
156
|
-
assertion_entity, monitor_entity = (
|
|
157
|
-
assertion_input.to_assertion_and_monitor_entities()
|
|
158
|
-
)
|
|
159
|
-
# If assertion upsert fails, we won't try to upsert the monitor
|
|
160
|
-
self.client.entities.upsert(assertion_entity)
|
|
161
|
-
# TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
162
|
-
# try:
|
|
163
|
-
self.client.entities.upsert(monitor_entity)
|
|
164
|
-
# except Exception as e:
|
|
165
|
-
# logger.error(f"Error upserting monitor: {e}")
|
|
166
|
-
# self.client.entities.delete(assertion_entity)
|
|
167
|
-
# raise e
|
|
168
|
-
|
|
169
|
-
return SmartFreshnessAssertion.from_entities(assertion_entity, monitor_entity)
|
|
170
|
-
|
|
171
|
-
def _upsert_and_merge_smart_freshness_assertion(
|
|
43
|
+
def sync_smart_freshness_assertion(
|
|
172
44
|
self,
|
|
173
45
|
*,
|
|
174
46
|
dataset_urn: Union[str, DatasetUrn],
|
|
175
47
|
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
176
48
|
display_name: Optional[str] = None,
|
|
49
|
+
enabled: Optional[bool] = None,
|
|
177
50
|
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
178
51
|
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
179
52
|
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
@@ -194,8 +67,9 @@ class AssertionsClient:
|
|
|
194
67
|
will be preserved. If the input value can be un-set e.g. by passing an empty list or
|
|
195
68
|
empty string.
|
|
196
69
|
|
|
197
|
-
|
|
198
|
-
|
|
70
|
+
Schedule behavior:
|
|
71
|
+
- Create case: Uses default hourly schedule ("0 * * * *")
|
|
72
|
+
- Update case: Preserves existing schedule from backend (not modifiable)
|
|
199
73
|
|
|
200
74
|
Args:
|
|
201
75
|
dataset_urn: The urn of the dataset to be monitored.
|
|
@@ -203,6 +77,8 @@ class AssertionsClient:
|
|
|
203
77
|
assertion will be _created_ in the DataHub instance.
|
|
204
78
|
display_name: The display name of the assertion. If not provided, a random display
|
|
205
79
|
name will be generated.
|
|
80
|
+
enabled: Whether the assertion is enabled. If not provided, the existing value
|
|
81
|
+
will be preserved.
|
|
206
82
|
detection_mechanism: The detection mechanism to be used for the assertion. Information
|
|
207
83
|
schema is recommended. Valid values are:
|
|
208
84
|
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
@@ -261,9 +137,10 @@ class AssertionsClient:
|
|
|
261
137
|
# 1. If urn is not set, create a new assertion
|
|
262
138
|
if urn is None:
|
|
263
139
|
logger.info("URN is not set, creating a new assertion")
|
|
264
|
-
return self.
|
|
140
|
+
return self._create_smart_freshness_assertion(
|
|
265
141
|
dataset_urn=dataset_urn,
|
|
266
142
|
display_name=display_name,
|
|
143
|
+
enabled=enabled if enabled is not None else True,
|
|
267
144
|
detection_mechanism=detection_mechanism,
|
|
268
145
|
sensitivity=sensitivity,
|
|
269
146
|
exclusion_windows=exclusion_windows,
|
|
@@ -294,11 +171,12 @@ class AssertionsClient:
|
|
|
294
171
|
# 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
|
|
295
172
|
# if the assertion does not exist:
|
|
296
173
|
merged_assertion_input_or_created_assertion = (
|
|
297
|
-
self.
|
|
174
|
+
self._retrieve_and_merge_freshness_assertion_and_monitor(
|
|
298
175
|
assertion_input=assertion_input,
|
|
299
176
|
dataset_urn=dataset_urn,
|
|
300
177
|
urn=urn,
|
|
301
178
|
display_name=display_name,
|
|
179
|
+
enabled=enabled,
|
|
302
180
|
detection_mechanism=detection_mechanism,
|
|
303
181
|
sensitivity=sensitivity,
|
|
304
182
|
exclusion_windows=exclusion_windows,
|
|
@@ -311,9 +189,11 @@ class AssertionsClient:
|
|
|
311
189
|
)
|
|
312
190
|
|
|
313
191
|
# Return early if we created a new assertion in the merge:
|
|
314
|
-
if isinstance(
|
|
315
|
-
|
|
316
|
-
|
|
192
|
+
if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
|
|
193
|
+
# We know this is the correct type because we passed the assertion_class parameter
|
|
194
|
+
assert isinstance(
|
|
195
|
+
merged_assertion_input_or_created_assertion, SmartFreshnessAssertion
|
|
196
|
+
)
|
|
317
197
|
return merged_assertion_input_or_created_assertion
|
|
318
198
|
|
|
319
199
|
# 4. Upsert the assertion and monitor entities:
|
|
@@ -330,14 +210,15 @@ class AssertionsClient:
|
|
|
330
210
|
# self.client.entities.delete(assertion_entity)
|
|
331
211
|
# raise e
|
|
332
212
|
|
|
333
|
-
return SmartFreshnessAssertion.
|
|
213
|
+
return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
|
|
334
214
|
|
|
335
|
-
def
|
|
215
|
+
def _retrieve_and_merge_freshness_assertion_and_monitor(
|
|
336
216
|
self,
|
|
337
217
|
assertion_input: _SmartFreshnessAssertionInput,
|
|
338
218
|
dataset_urn: Union[str, DatasetUrn],
|
|
339
219
|
urn: Union[str, AssertionUrn],
|
|
340
220
|
display_name: Optional[str],
|
|
221
|
+
enabled: Optional[bool],
|
|
341
222
|
detection_mechanism: DetectionMechanismInputTypes,
|
|
342
223
|
sensitivity: Optional[Union[str, InferenceSensitivity]],
|
|
343
224
|
exclusion_windows: Optional[ExclusionWindowInputTypes],
|
|
@@ -354,25 +235,112 @@ class AssertionsClient:
|
|
|
354
235
|
self._retrieve_assertion_and_monitor(assertion_input)
|
|
355
236
|
)
|
|
356
237
|
|
|
357
|
-
# 2.1 If the assertion and monitor entities exist, create
|
|
238
|
+
# 2.1 If the assertion and monitor entities exist, create an assertion object from them:
|
|
358
239
|
if maybe_assertion_entity and maybe_monitor_entity:
|
|
359
|
-
existing_assertion = SmartFreshnessAssertion.
|
|
240
|
+
existing_assertion = SmartFreshnessAssertion._from_entities(
|
|
360
241
|
maybe_assertion_entity, maybe_monitor_entity
|
|
361
242
|
)
|
|
362
243
|
# 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
|
|
363
244
|
elif maybe_assertion_entity and not maybe_monitor_entity:
|
|
364
|
-
|
|
245
|
+
monitor_mode = (
|
|
246
|
+
"ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
|
|
247
|
+
)
|
|
248
|
+
existing_assertion = SmartFreshnessAssertion._from_entities(
|
|
249
|
+
maybe_assertion_entity,
|
|
250
|
+
Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
|
|
251
|
+
)
|
|
252
|
+
# 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
|
|
253
|
+
elif not maybe_assertion_entity:
|
|
254
|
+
logger.info(
|
|
255
|
+
f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
|
|
256
|
+
)
|
|
257
|
+
return self._create_smart_freshness_assertion(
|
|
258
|
+
dataset_urn=dataset_urn,
|
|
259
|
+
display_name=display_name,
|
|
260
|
+
detection_mechanism=detection_mechanism,
|
|
261
|
+
sensitivity=sensitivity,
|
|
262
|
+
exclusion_windows=exclusion_windows,
|
|
263
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
264
|
+
incident_behavior=incident_behavior,
|
|
265
|
+
tags=tags,
|
|
266
|
+
created_by=updated_by,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# 3. Check for any issues e.g. different dataset urns
|
|
270
|
+
if (
|
|
271
|
+
existing_assertion
|
|
272
|
+
and hasattr(existing_assertion, "dataset_urn")
|
|
273
|
+
and existing_assertion.dataset_urn != assertion_input.dataset_urn
|
|
274
|
+
):
|
|
275
|
+
raise SDKUsageError(
|
|
276
|
+
f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# 4. Merge the existing assertion with the validated input:
|
|
280
|
+
merged_assertion_input = self._merge_freshness_input(
|
|
281
|
+
dataset_urn=dataset_urn,
|
|
282
|
+
urn=urn,
|
|
283
|
+
display_name=display_name,
|
|
284
|
+
enabled=enabled,
|
|
285
|
+
detection_mechanism=detection_mechanism,
|
|
286
|
+
sensitivity=sensitivity,
|
|
287
|
+
exclusion_windows=exclusion_windows,
|
|
288
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
289
|
+
incident_behavior=incident_behavior,
|
|
290
|
+
tags=tags,
|
|
291
|
+
now_utc=now_utc,
|
|
292
|
+
assertion_input=assertion_input,
|
|
293
|
+
maybe_assertion_entity=maybe_assertion_entity,
|
|
294
|
+
maybe_monitor_entity=maybe_monitor_entity,
|
|
295
|
+
existing_assertion=existing_assertion,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
return merged_assertion_input
|
|
299
|
+
|
|
300
|
+
def _retrieve_and_merge_volume_assertion_and_monitor(
|
|
301
|
+
self,
|
|
302
|
+
assertion_input: _SmartVolumeAssertionInput,
|
|
303
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
304
|
+
urn: Union[str, AssertionUrn],
|
|
305
|
+
display_name: Optional[str],
|
|
306
|
+
enabled: Optional[bool],
|
|
307
|
+
detection_mechanism: DetectionMechanismInputTypes,
|
|
308
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]],
|
|
309
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes],
|
|
310
|
+
training_data_lookback_days: Optional[int],
|
|
311
|
+
incident_behavior: Optional[
|
|
312
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
313
|
+
],
|
|
314
|
+
tags: Optional[TagsInputType],
|
|
315
|
+
updated_by: Optional[Union[str, CorpUserUrn]],
|
|
316
|
+
now_utc: datetime,
|
|
317
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
318
|
+
) -> Union[SmartVolumeAssertion, _SmartVolumeAssertionInput]:
|
|
319
|
+
# 1. Retrieve any existing assertion and monitor entities:
|
|
320
|
+
maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
|
|
321
|
+
self._retrieve_assertion_and_monitor(assertion_input)
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# 2.1 If the assertion and monitor entities exist, create an assertion object from them:
|
|
325
|
+
if maybe_assertion_entity and maybe_monitor_entity:
|
|
326
|
+
existing_assertion = SmartVolumeAssertion._from_entities(
|
|
327
|
+
maybe_assertion_entity, maybe_monitor_entity
|
|
328
|
+
)
|
|
329
|
+
# 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
|
|
330
|
+
elif maybe_assertion_entity and not maybe_monitor_entity:
|
|
331
|
+
monitor_mode = (
|
|
332
|
+
"ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
|
|
333
|
+
)
|
|
334
|
+
existing_assertion = SmartVolumeAssertion._from_entities(
|
|
365
335
|
maybe_assertion_entity,
|
|
366
|
-
Monitor(
|
|
367
|
-
id=monitor_urn, info=("ASSERTION", "ACTIVE")
|
|
368
|
-
), # TODO: Set active based on enabled parameter once it is added
|
|
336
|
+
Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
|
|
369
337
|
)
|
|
370
338
|
# 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
|
|
371
339
|
elif not maybe_assertion_entity:
|
|
372
340
|
logger.info(
|
|
373
341
|
f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
|
|
374
342
|
)
|
|
375
|
-
return self.
|
|
343
|
+
return self._create_smart_volume_assertion(
|
|
376
344
|
dataset_urn=dataset_urn,
|
|
377
345
|
display_name=display_name,
|
|
378
346
|
detection_mechanism=detection_mechanism,
|
|
@@ -387,6 +355,7 @@ class AssertionsClient:
|
|
|
387
355
|
# 3. Check for any issues e.g. different dataset urns
|
|
388
356
|
if (
|
|
389
357
|
existing_assertion
|
|
358
|
+
and hasattr(existing_assertion, "dataset_urn")
|
|
390
359
|
and existing_assertion.dataset_urn != assertion_input.dataset_urn
|
|
391
360
|
):
|
|
392
361
|
raise SDKUsageError(
|
|
@@ -394,16 +363,18 @@ class AssertionsClient:
|
|
|
394
363
|
)
|
|
395
364
|
|
|
396
365
|
# 4. Merge the existing assertion with the validated input:
|
|
397
|
-
merged_assertion_input = self.
|
|
366
|
+
merged_assertion_input = self._merge_volume_input(
|
|
398
367
|
dataset_urn=dataset_urn,
|
|
399
368
|
urn=urn,
|
|
400
369
|
display_name=display_name,
|
|
370
|
+
enabled=enabled,
|
|
401
371
|
detection_mechanism=detection_mechanism,
|
|
402
372
|
sensitivity=sensitivity,
|
|
403
373
|
exclusion_windows=exclusion_windows,
|
|
404
374
|
training_data_lookback_days=training_data_lookback_days,
|
|
405
375
|
incident_behavior=incident_behavior,
|
|
406
376
|
tags=tags,
|
|
377
|
+
schedule=schedule,
|
|
407
378
|
now_utc=now_utc,
|
|
408
379
|
assertion_input=assertion_input,
|
|
409
380
|
maybe_assertion_entity=maybe_assertion_entity,
|
|
@@ -414,7 +385,8 @@ class AssertionsClient:
|
|
|
414
385
|
return merged_assertion_input
|
|
415
386
|
|
|
416
387
|
def _retrieve_assertion_and_monitor(
|
|
417
|
-
self,
|
|
388
|
+
self,
|
|
389
|
+
assertion_input: _AssertionInput,
|
|
418
390
|
) -> tuple[Optional[Assertion], MonitorUrn, Optional[Monitor]]:
|
|
419
391
|
"""Retrieve the assertion and monitor entities from the DataHub instance.
|
|
420
392
|
|
|
@@ -451,11 +423,12 @@ class AssertionsClient:
|
|
|
451
423
|
|
|
452
424
|
return maybe_assertion_entity, monitor_urn, maybe_monitor_entity
|
|
453
425
|
|
|
454
|
-
def
|
|
426
|
+
def _merge_freshness_input(
|
|
455
427
|
self,
|
|
456
428
|
dataset_urn: Union[str, DatasetUrn],
|
|
457
429
|
urn: Union[str, AssertionUrn],
|
|
458
430
|
display_name: Optional[str],
|
|
431
|
+
enabled: Optional[bool],
|
|
459
432
|
detection_mechanism: DetectionMechanismInputTypes,
|
|
460
433
|
sensitivity: Optional[Union[str, InferenceSensitivity]],
|
|
461
434
|
exclusion_windows: Optional[ExclusionWindowInputTypes],
|
|
@@ -476,6 +449,7 @@ class AssertionsClient:
|
|
|
476
449
|
dataset_urn: The urn of the dataset to be monitored.
|
|
477
450
|
urn: The urn of the assertion.
|
|
478
451
|
display_name: The display name of the assertion.
|
|
452
|
+
enabled: Whether the assertion is enabled.
|
|
479
453
|
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
480
454
|
sensitivity: The sensitivity to be applied to the assertion.
|
|
481
455
|
exclusion_windows: The exclusion windows to be applied to the assertion.
|
|
@@ -502,6 +476,22 @@ class AssertionsClient:
|
|
|
502
476
|
existing_assertion,
|
|
503
477
|
maybe_assertion_entity.description if maybe_assertion_entity else None,
|
|
504
478
|
),
|
|
479
|
+
enabled=_merge_field(
|
|
480
|
+
enabled,
|
|
481
|
+
"enabled",
|
|
482
|
+
assertion_input,
|
|
483
|
+
existing_assertion,
|
|
484
|
+
existing_assertion.mode == AssertionMode.ACTIVE
|
|
485
|
+
if existing_assertion
|
|
486
|
+
else None,
|
|
487
|
+
),
|
|
488
|
+
schedule=_merge_field(
|
|
489
|
+
None, # Don't allow schedule modification in updates - always preserve existing
|
|
490
|
+
"schedule",
|
|
491
|
+
assertion_input,
|
|
492
|
+
existing_assertion,
|
|
493
|
+
existing_assertion.schedule if existing_assertion else None,
|
|
494
|
+
),
|
|
505
495
|
detection_mechanism=_merge_field(
|
|
506
496
|
detection_mechanism,
|
|
507
497
|
"detection_mechanism",
|
|
@@ -564,50 +554,186 @@ class AssertionsClient:
|
|
|
564
554
|
|
|
565
555
|
return merged_assertion_input
|
|
566
556
|
|
|
567
|
-
def
|
|
557
|
+
def _merge_volume_input(
|
|
568
558
|
self,
|
|
569
|
-
*,
|
|
570
559
|
dataset_urn: Union[str, DatasetUrn],
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
560
|
+
urn: Union[str, AssertionUrn],
|
|
561
|
+
display_name: Optional[str],
|
|
562
|
+
enabled: Optional[bool],
|
|
563
|
+
detection_mechanism: DetectionMechanismInputTypes,
|
|
564
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]],
|
|
565
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes],
|
|
566
|
+
training_data_lookback_days: Optional[int],
|
|
576
567
|
incident_behavior: Optional[
|
|
577
568
|
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
578
|
-
]
|
|
579
|
-
tags: Optional[TagsInputType]
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
569
|
+
],
|
|
570
|
+
tags: Optional[TagsInputType],
|
|
571
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
572
|
+
now_utc: datetime,
|
|
573
|
+
assertion_input: _SmartVolumeAssertionInput,
|
|
574
|
+
maybe_assertion_entity: Optional[Assertion],
|
|
575
|
+
maybe_monitor_entity: Optional[Monitor],
|
|
576
|
+
existing_assertion: SmartVolumeAssertion,
|
|
577
|
+
) -> _SmartVolumeAssertionInput:
|
|
578
|
+
"""Merge the input with the existing assertion and monitor entities.
|
|
585
579
|
|
|
586
580
|
Args:
|
|
587
581
|
dataset_urn: The urn of the dataset to be monitored.
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
582
|
+
urn: The urn of the assertion.
|
|
583
|
+
display_name: The display name of the assertion.
|
|
584
|
+
enabled: Whether the assertion is enabled.
|
|
585
|
+
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
586
|
+
sensitivity: The sensitivity to be applied to the assertion.
|
|
587
|
+
exclusion_windows: The exclusion windows to be applied to the assertion.
|
|
588
|
+
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
589
|
+
incident_behavior: The incident behavior to be applied to the assertion.
|
|
590
|
+
tags: The tags to be applied to the assertion.
|
|
591
|
+
now_utc: The current UTC time from when the function is called.
|
|
592
|
+
assertion_input: The validated input to the function.
|
|
593
|
+
maybe_assertion_entity: The existing assertion entity from the DataHub instance.
|
|
594
|
+
maybe_monitor_entity: The existing monitor entity from the DataHub instance.
|
|
595
|
+
existing_assertion: The existing assertion from the DataHub instance.
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
The merged assertion input.
|
|
599
|
+
"""
|
|
600
|
+
merged_assertion_input = _SmartVolumeAssertionInput(
|
|
601
|
+
urn=urn,
|
|
602
|
+
entity_client=self.client.entities,
|
|
603
|
+
dataset_urn=dataset_urn,
|
|
604
|
+
display_name=_merge_field(
|
|
605
|
+
display_name,
|
|
606
|
+
"display_name",
|
|
607
|
+
assertion_input,
|
|
608
|
+
existing_assertion,
|
|
609
|
+
maybe_assertion_entity.description if maybe_assertion_entity else None,
|
|
610
|
+
),
|
|
611
|
+
enabled=_merge_field(
|
|
612
|
+
enabled,
|
|
613
|
+
"enabled",
|
|
614
|
+
assertion_input,
|
|
615
|
+
existing_assertion,
|
|
616
|
+
existing_assertion.mode == AssertionMode.ACTIVE
|
|
617
|
+
if existing_assertion
|
|
618
|
+
else None,
|
|
619
|
+
),
|
|
620
|
+
schedule=_merge_field(
|
|
621
|
+
schedule,
|
|
622
|
+
"schedule",
|
|
623
|
+
assertion_input,
|
|
624
|
+
existing_assertion,
|
|
625
|
+
existing_assertion.schedule if existing_assertion else None,
|
|
626
|
+
),
|
|
627
|
+
detection_mechanism=_merge_field(
|
|
628
|
+
detection_mechanism,
|
|
629
|
+
"detection_mechanism",
|
|
630
|
+
assertion_input,
|
|
631
|
+
existing_assertion,
|
|
632
|
+
SmartVolumeAssertion._get_detection_mechanism(
|
|
633
|
+
maybe_assertion_entity, maybe_monitor_entity, default=None
|
|
634
|
+
)
|
|
635
|
+
if maybe_assertion_entity and maybe_monitor_entity
|
|
636
|
+
else None,
|
|
637
|
+
),
|
|
638
|
+
sensitivity=_merge_field(
|
|
639
|
+
sensitivity,
|
|
640
|
+
"sensitivity",
|
|
641
|
+
assertion_input,
|
|
642
|
+
existing_assertion,
|
|
643
|
+
maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
|
|
644
|
+
),
|
|
645
|
+
exclusion_windows=_merge_field(
|
|
646
|
+
exclusion_windows,
|
|
647
|
+
"exclusion_windows",
|
|
648
|
+
assertion_input,
|
|
649
|
+
existing_assertion,
|
|
650
|
+
maybe_monitor_entity.exclusion_windows
|
|
651
|
+
if maybe_monitor_entity
|
|
652
|
+
else None,
|
|
653
|
+
),
|
|
654
|
+
training_data_lookback_days=_merge_field(
|
|
655
|
+
training_data_lookback_days,
|
|
656
|
+
"training_data_lookback_days",
|
|
657
|
+
assertion_input,
|
|
658
|
+
existing_assertion,
|
|
659
|
+
maybe_monitor_entity.training_data_lookback_days
|
|
660
|
+
if maybe_monitor_entity
|
|
661
|
+
else None,
|
|
662
|
+
),
|
|
663
|
+
incident_behavior=_merge_field(
|
|
664
|
+
incident_behavior,
|
|
665
|
+
"incident_behavior",
|
|
666
|
+
assertion_input,
|
|
667
|
+
existing_assertion,
|
|
668
|
+
SmartVolumeAssertion._get_incident_behavior(maybe_assertion_entity)
|
|
669
|
+
if maybe_assertion_entity
|
|
670
|
+
else None,
|
|
671
|
+
),
|
|
672
|
+
tags=_merge_field(
|
|
673
|
+
tags,
|
|
674
|
+
"tags",
|
|
675
|
+
assertion_input,
|
|
676
|
+
existing_assertion,
|
|
677
|
+
maybe_assertion_entity.tags if maybe_assertion_entity else None,
|
|
678
|
+
),
|
|
679
|
+
created_by=existing_assertion.created_by
|
|
680
|
+
or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
|
|
681
|
+
created_at=existing_assertion.created_at
|
|
682
|
+
or now_utc, # Override with the existing assertion's created_at or now if not set
|
|
683
|
+
updated_by=assertion_input.updated_by, # Override with the input's updated_by
|
|
684
|
+
updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
return merged_assertion_input
|
|
688
|
+
|
|
689
|
+
def _create_smart_freshness_assertion(
|
|
690
|
+
self,
|
|
691
|
+
*,
|
|
692
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
693
|
+
display_name: Optional[str] = None,
|
|
694
|
+
enabled: bool = True,
|
|
695
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
696
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
697
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
698
|
+
training_data_lookback_days: Optional[int] = None,
|
|
699
|
+
incident_behavior: Optional[
|
|
700
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
701
|
+
] = None,
|
|
702
|
+
tags: Optional[TagsInputType] = None,
|
|
703
|
+
created_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
704
|
+
) -> SmartFreshnessAssertion:
|
|
705
|
+
"""Create a smart freshness assertion.
|
|
706
|
+
|
|
707
|
+
Note: keyword arguments are required.
|
|
708
|
+
|
|
709
|
+
The created assertion will use the default hourly schedule ("0 * * * *").
|
|
710
|
+
|
|
711
|
+
Args:
|
|
712
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
713
|
+
display_name: The display name of the assertion. If not provided, a random display
|
|
714
|
+
name will be generated.
|
|
715
|
+
enabled: Whether the assertion is enabled. Defaults to True.
|
|
716
|
+
detection_mechanism: The detection mechanism to be used for the assertion. Information
|
|
717
|
+
schema is recommended. Valid values are:
|
|
718
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
719
|
+
- "audit_log" or DetectionMechanism.AUDIT_LOG
|
|
720
|
+
- {
|
|
721
|
+
"type": "last_modified_column",
|
|
722
|
+
"column_name": "last_modified",
|
|
723
|
+
"additional_filter": "last_modified > '2021-01-01'",
|
|
724
|
+
} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
|
|
725
|
+
additional_filter='last_modified > 2021-01-01')
|
|
726
|
+
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
727
|
+
sensitivity: The sensitivity to be applied to the assertion. Valid values are:
|
|
728
|
+
- "low" or InferenceSensitivity.LOW
|
|
729
|
+
- "medium" or InferenceSensitivity.MEDIUM
|
|
730
|
+
- "high" or InferenceSensitivity.HIGH
|
|
731
|
+
exclusion_windows: The exclusion windows to be applied to the assertion, currently only
|
|
732
|
+
fixed range exclusion windows are supported. Valid values are:
|
|
733
|
+
- from datetime.datetime objects: {
|
|
734
|
+
"start": "datetime(2025, 1, 1, 0, 0, 0)",
|
|
735
|
+
"end": "datetime(2025, 1, 2, 0, 0, 0)",
|
|
736
|
+
}
|
|
611
737
|
- from string datetimes: {
|
|
612
738
|
"start": "2025-01-01T00:00:00",
|
|
613
739
|
"end": "2025-01-02T00:00:00",
|
|
@@ -645,6 +771,128 @@ class AssertionsClient:
|
|
|
645
771
|
entity_client=self.client.entities,
|
|
646
772
|
dataset_urn=dataset_urn,
|
|
647
773
|
display_name=display_name,
|
|
774
|
+
enabled=enabled,
|
|
775
|
+
detection_mechanism=detection_mechanism,
|
|
776
|
+
sensitivity=sensitivity,
|
|
777
|
+
exclusion_windows=exclusion_windows,
|
|
778
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
779
|
+
incident_behavior=incident_behavior,
|
|
780
|
+
tags=tags,
|
|
781
|
+
created_by=created_by,
|
|
782
|
+
created_at=now_utc,
|
|
783
|
+
updated_by=created_by,
|
|
784
|
+
updated_at=now_utc,
|
|
785
|
+
)
|
|
786
|
+
assertion_entity, monitor_entity = (
|
|
787
|
+
assertion_input.to_assertion_and_monitor_entities()
|
|
788
|
+
)
|
|
789
|
+
# If assertion creation fails, we won't try to create the monitor
|
|
790
|
+
self.client.entities.create(assertion_entity)
|
|
791
|
+
# TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
792
|
+
# try:
|
|
793
|
+
self.client.entities.create(monitor_entity)
|
|
794
|
+
# except Exception as e:
|
|
795
|
+
# logger.error(f"Error creating monitor: {e}")
|
|
796
|
+
# self.client.entities.delete(assertion_entity)
|
|
797
|
+
# raise e
|
|
798
|
+
return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
|
|
799
|
+
|
|
800
|
+
def _create_smart_volume_assertion(
|
|
801
|
+
self,
|
|
802
|
+
*,
|
|
803
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
804
|
+
display_name: Optional[str] = None,
|
|
805
|
+
enabled: bool = True,
|
|
806
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
807
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
808
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
809
|
+
training_data_lookback_days: Optional[int] = None,
|
|
810
|
+
incident_behavior: Optional[
|
|
811
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
812
|
+
] = None,
|
|
813
|
+
tags: Optional[TagsInputType] = None,
|
|
814
|
+
created_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
815
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
816
|
+
) -> SmartVolumeAssertion:
|
|
817
|
+
"""Create a smart volume assertion.
|
|
818
|
+
|
|
819
|
+
Note: keyword arguments are required.
|
|
820
|
+
|
|
821
|
+
Args:
|
|
822
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
823
|
+
display_name: The display name of the assertion. If not provided, a random display
|
|
824
|
+
name will be generated.
|
|
825
|
+
enabled: Whether the assertion is enabled. Defaults to True.
|
|
826
|
+
detection_mechanism: The detection mechanism to be used for the assertion. Information
|
|
827
|
+
schema is recommended. Valid values are:
|
|
828
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
829
|
+
- "audit_log" or DetectionMechanism.AUDIT_LOG
|
|
830
|
+
- {
|
|
831
|
+
"type": "last_modified_column",
|
|
832
|
+
"column_name": "last_modified",
|
|
833
|
+
"additional_filter": "last_modified > '2021-01-01'",
|
|
834
|
+
} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
|
|
835
|
+
additional_filter='last_modified > 2021-01-01')
|
|
836
|
+
- {
|
|
837
|
+
"type": "high_watermark_column",
|
|
838
|
+
"column_name": "id",
|
|
839
|
+
"additional_filter": "id > 1000",
|
|
840
|
+
} or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
|
|
841
|
+
additional_filter='id > 1000')
|
|
842
|
+
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
843
|
+
sensitivity: The sensitivity to be applied to the assertion. Valid values are:
|
|
844
|
+
- "low" or InferenceSensitivity.LOW
|
|
845
|
+
- "medium" or InferenceSensitivity.MEDIUM
|
|
846
|
+
- "high" or InferenceSensitivity.HIGH
|
|
847
|
+
exclusion_windows: The exclusion windows to be applied to the assertion, currently only
|
|
848
|
+
fixed range exclusion windows are supported. Valid values are:
|
|
849
|
+
- from datetime.datetime objects: {
|
|
850
|
+
"start": "datetime(2025, 1, 1, 0, 0, 0)",
|
|
851
|
+
"end": "datetime(2025, 1, 2, 0, 0, 0)",
|
|
852
|
+
}
|
|
853
|
+
- from string datetimes: {
|
|
854
|
+
"start": "2025-01-01T00:00:00",
|
|
855
|
+
"end": "2025-01-02T00:00:00",
|
|
856
|
+
}
|
|
857
|
+
- from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
|
|
858
|
+
start=datetime(2025, 1, 1, 0, 0, 0),
|
|
859
|
+
end=datetime(2025, 1, 2, 0, 0, 0)
|
|
860
|
+
)
|
|
861
|
+
training_data_lookback_days: The training data lookback days to be applied to the
|
|
862
|
+
assertion as an integer.
|
|
863
|
+
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
864
|
+
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
865
|
+
- "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
866
|
+
tags: The tags to be applied to the assertion. Valid values are:
|
|
867
|
+
- a list of strings (strings will be converted to TagUrn objects)
|
|
868
|
+
- a list of TagUrn objects
|
|
869
|
+
- a list of TagAssociationClass objects
|
|
870
|
+
created_by: Optional urn of the user who created the assertion. The format is
|
|
871
|
+
"urn:li:corpuser:<username>", which you can find on the Users & Groups page.
|
|
872
|
+
The default is the datahub system user.
|
|
873
|
+
TODO: Retrieve the SDK user as the default instead of the datahub system user.
|
|
874
|
+
schedule: Optional cron formatted schedule for the assertion. If not provided, a default
|
|
875
|
+
schedule will be used. The schedule determines when the assertion will be evaluated.
|
|
876
|
+
The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
|
|
877
|
+
Alternatively, a models.CronScheduleClass object can be provided with string parameters
|
|
878
|
+
cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
|
|
879
|
+
|
|
880
|
+
Returns:
|
|
881
|
+
SmartVolumeAssertion: The created assertion.
|
|
882
|
+
"""
|
|
883
|
+
_print_experimental_warning()
|
|
884
|
+
now_utc = datetime.now(timezone.utc)
|
|
885
|
+
if created_by is None:
|
|
886
|
+
logger.warning(
|
|
887
|
+
f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
888
|
+
)
|
|
889
|
+
created_by = DEFAULT_CREATED_BY
|
|
890
|
+
assertion_input = _SmartVolumeAssertionInput(
|
|
891
|
+
urn=None,
|
|
892
|
+
entity_client=self.client.entities,
|
|
893
|
+
dataset_urn=dataset_urn,
|
|
894
|
+
display_name=display_name,
|
|
895
|
+
enabled=enabled,
|
|
648
896
|
detection_mechanism=detection_mechanism,
|
|
649
897
|
sensitivity=sensitivity,
|
|
650
898
|
exclusion_windows=exclusion_windows,
|
|
@@ -655,6 +903,7 @@ class AssertionsClient:
|
|
|
655
903
|
created_at=now_utc,
|
|
656
904
|
updated_by=created_by,
|
|
657
905
|
updated_at=now_utc,
|
|
906
|
+
schedule=schedule,
|
|
658
907
|
)
|
|
659
908
|
assertion_entity, monitor_entity = (
|
|
660
909
|
assertion_input.to_assertion_and_monitor_entities()
|
|
@@ -668,14 +917,201 @@ class AssertionsClient:
|
|
|
668
917
|
# logger.error(f"Error creating monitor: {e}")
|
|
669
918
|
# self.client.entities.delete(assertion_entity)
|
|
670
919
|
# raise e
|
|
671
|
-
return
|
|
920
|
+
return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
|
|
921
|
+
|
|
922
|
+
def sync_smart_volume_assertion(
|
|
923
|
+
self,
|
|
924
|
+
*,
|
|
925
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
926
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
927
|
+
display_name: Optional[str] = None,
|
|
928
|
+
enabled: Optional[bool] = None,
|
|
929
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
930
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
931
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
932
|
+
training_data_lookback_days: Optional[int] = None,
|
|
933
|
+
incident_behavior: Optional[
|
|
934
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
935
|
+
] = None,
|
|
936
|
+
tags: Optional[TagsInputType] = None,
|
|
937
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
938
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
939
|
+
) -> SmartVolumeAssertion:
|
|
940
|
+
"""Upsert and merge a smart volume assertion.
|
|
941
|
+
|
|
942
|
+
Note: keyword arguments are required.
|
|
943
|
+
|
|
944
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
945
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
946
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
947
|
+
will be preserved. If the input value can be un-set e.g. by passing an empty list or
|
|
948
|
+
empty string.
|
|
949
|
+
|
|
950
|
+
Schedule behavior:
|
|
951
|
+
- Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
|
|
952
|
+
- Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
|
|
953
|
+
|
|
954
|
+
Args:
|
|
955
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
956
|
+
urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
|
|
957
|
+
will be _created_ in the DataHub instance.
|
|
958
|
+
display_name: The display name of the assertion. If not provided, a random display name
|
|
959
|
+
will be generated.
|
|
960
|
+
enabled: Whether the assertion is enabled. If not provided, the existing value
|
|
961
|
+
will be preserved.
|
|
962
|
+
detection_mechanism: The detection mechanism to be used for the assertion. Information
|
|
963
|
+
schema is recommended. Valid values are:
|
|
964
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
965
|
+
- "audit_log" or DetectionMechanism.AUDIT_LOG
|
|
966
|
+
- {
|
|
967
|
+
"type": "last_modified_column",
|
|
968
|
+
"column_name": "last_modified",
|
|
969
|
+
"additional_filter": "last_modified > '2021-01-01'",
|
|
970
|
+
} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
|
|
971
|
+
additional_filter='last_modified > 2021-01-01')
|
|
972
|
+
- {
|
|
973
|
+
"type": "high_watermark_column",
|
|
974
|
+
"column_name": "id",
|
|
975
|
+
"additional_filter": "id > 1000",
|
|
976
|
+
} or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
|
|
977
|
+
additional_filter='id > 1000')
|
|
978
|
+
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
979
|
+
sensitivity: The sensitivity to be applied to the assertion. Valid values are:
|
|
980
|
+
- "low" or InferenceSensitivity.LOW
|
|
981
|
+
- "medium" or InferenceSensitivity.MEDIUM
|
|
982
|
+
- "high" or InferenceSensitivity.HIGH
|
|
983
|
+
exclusion_windows: The exclusion windows to be applied to the assertion, currently only
|
|
984
|
+
fixed range exclusion windows are supported. Valid values are:
|
|
985
|
+
- from datetime.datetime objects: {
|
|
986
|
+
"start": "datetime(2025, 1, 1, 0, 0, 0)",
|
|
987
|
+
"end": "datetime(2025, 1, 2, 0, 0, 0)",
|
|
988
|
+
}
|
|
989
|
+
- from string datetimes: {
|
|
990
|
+
"start": "2025-01-01T00:00:00",
|
|
991
|
+
"end": "2025-01-02T00:00:00",
|
|
992
|
+
}
|
|
993
|
+
- from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
|
|
994
|
+
start=datetime(2025, 1, 1, 0, 0, 0),
|
|
995
|
+
end=datetime(2025, 1, 2, 0, 0, 0)
|
|
996
|
+
)
|
|
997
|
+
training_data_lookback_days: The training data lookback days to be applied to the
|
|
998
|
+
assertion as an integer.
|
|
999
|
+
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
1000
|
+
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
1001
|
+
- "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
1002
|
+
tags: The tags to be applied to the assertion. Valid values are:
|
|
1003
|
+
- a list of strings (strings will be converted to TagUrn objects)
|
|
1004
|
+
- a list of TagUrn objects
|
|
1005
|
+
- a list of TagAssociationClass objects
|
|
1006
|
+
updated_by: Optional urn of the user who updated the assertion. The format is
|
|
1007
|
+
"urn:li:corpuser:<username>", which you can find on the Users & Groups page.
|
|
1008
|
+
The default is the datahub system user.
|
|
1009
|
+
TODO: Retrieve the SDK user as the default instead of the datahub system user.
|
|
1010
|
+
schedule: Optional cron formatted schedule for the assertion. If not provided, a default
|
|
1011
|
+
schedule will be used. The schedule determines when the assertion will be evaluated.
|
|
1012
|
+
The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
|
|
1013
|
+
Alternatively, a models.CronScheduleClass object can be provided with string parameters
|
|
1014
|
+
cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
|
|
1015
|
+
|
|
1016
|
+
Returns:
|
|
1017
|
+
SmartVolumeAssertion: The created or updated assertion.
|
|
1018
|
+
"""
|
|
1019
|
+
_print_experimental_warning()
|
|
1020
|
+
now_utc = datetime.now(timezone.utc)
|
|
1021
|
+
|
|
1022
|
+
if updated_by is None:
|
|
1023
|
+
logger.warning(
|
|
1024
|
+
f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
1025
|
+
)
|
|
1026
|
+
updated_by = DEFAULT_CREATED_BY
|
|
1027
|
+
|
|
1028
|
+
# 1. If urn is not set, create a new assertion
|
|
1029
|
+
if urn is None:
|
|
1030
|
+
logger.info("URN is not set, creating a new assertion")
|
|
1031
|
+
return self._create_smart_volume_assertion(
|
|
1032
|
+
dataset_urn=dataset_urn,
|
|
1033
|
+
display_name=display_name,
|
|
1034
|
+
enabled=enabled if enabled is not None else True,
|
|
1035
|
+
detection_mechanism=detection_mechanism,
|
|
1036
|
+
sensitivity=sensitivity,
|
|
1037
|
+
exclusion_windows=exclusion_windows,
|
|
1038
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1039
|
+
incident_behavior=incident_behavior,
|
|
1040
|
+
tags=tags,
|
|
1041
|
+
created_by=updated_by,
|
|
1042
|
+
schedule=schedule,
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
# 2. If urn is set, first validate the input:
|
|
1046
|
+
assertion_input = _SmartVolumeAssertionInput(
|
|
1047
|
+
urn=urn,
|
|
1048
|
+
entity_client=self.client.entities,
|
|
1049
|
+
dataset_urn=dataset_urn,
|
|
1050
|
+
display_name=display_name,
|
|
1051
|
+
detection_mechanism=detection_mechanism,
|
|
1052
|
+
sensitivity=sensitivity,
|
|
1053
|
+
exclusion_windows=exclusion_windows,
|
|
1054
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1055
|
+
incident_behavior=incident_behavior,
|
|
1056
|
+
tags=tags,
|
|
1057
|
+
created_by=updated_by, # This will be overridden by the actual created_by
|
|
1058
|
+
created_at=now_utc, # This will be overridden by the actual created_at
|
|
1059
|
+
updated_by=updated_by,
|
|
1060
|
+
updated_at=now_utc,
|
|
1061
|
+
schedule=schedule,
|
|
1062
|
+
)
|
|
1063
|
+
|
|
1064
|
+
# 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
|
|
1065
|
+
# if the assertion does not exist:
|
|
1066
|
+
merged_assertion_input_or_created_assertion = (
|
|
1067
|
+
self._retrieve_and_merge_volume_assertion_and_monitor(
|
|
1068
|
+
assertion_input=assertion_input,
|
|
1069
|
+
dataset_urn=dataset_urn,
|
|
1070
|
+
urn=urn,
|
|
1071
|
+
display_name=display_name,
|
|
1072
|
+
enabled=enabled,
|
|
1073
|
+
detection_mechanism=detection_mechanism,
|
|
1074
|
+
sensitivity=sensitivity,
|
|
1075
|
+
exclusion_windows=exclusion_windows,
|
|
1076
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1077
|
+
incident_behavior=incident_behavior,
|
|
1078
|
+
tags=tags,
|
|
1079
|
+
updated_by=updated_by,
|
|
1080
|
+
now_utc=now_utc,
|
|
1081
|
+
schedule=schedule,
|
|
1082
|
+
)
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
# Return early if we created a new assertion in the merge:
|
|
1086
|
+
if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
|
|
1087
|
+
# We know this is the correct type because we passed the assertion_class parameter
|
|
1088
|
+
assert isinstance(
|
|
1089
|
+
merged_assertion_input_or_created_assertion, SmartVolumeAssertion
|
|
1090
|
+
)
|
|
1091
|
+
return merged_assertion_input_or_created_assertion
|
|
1092
|
+
|
|
1093
|
+
# 4. Upsert the assertion and monitor entities:
|
|
1094
|
+
assertion_entity, monitor_entity = (
|
|
1095
|
+
merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
|
|
1096
|
+
)
|
|
1097
|
+
# If assertion upsert fails, we won't try to upsert the monitor
|
|
1098
|
+
self.client.entities.upsert(assertion_entity)
|
|
1099
|
+
# TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
1100
|
+
# try:
|
|
1101
|
+
self.client.entities.upsert(monitor_entity)
|
|
1102
|
+
# except Exception as e:
|
|
1103
|
+
# logger.error(f"Error upserting monitor: {e}")
|
|
1104
|
+
# self.client.entities.delete(assertion_entity)
|
|
1105
|
+
# raise e
|
|
1106
|
+
|
|
1107
|
+
return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
|
|
672
1108
|
|
|
673
1109
|
|
|
674
1110
|
def _merge_field(
|
|
675
1111
|
input_field_value: Any,
|
|
676
1112
|
input_field_name: str,
|
|
677
|
-
validated_assertion_input:
|
|
678
|
-
validated_existing_assertion:
|
|
1113
|
+
validated_assertion_input: _AssertionInput,
|
|
1114
|
+
validated_existing_assertion: _AssertionPublic,
|
|
679
1115
|
existing_entity_value: Optional[Any] = None, # TODO: Can we do better than Any?
|
|
680
1116
|
) -> Any:
|
|
681
1117
|
"""Merge the input field value with any existing entity value or default value.
|