acryl-datahub-cloud 0.3.12rc1__py3-none-any.whl → 0.3.12rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (74) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +559 -0
  3. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  4. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  7. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  8. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
  9. acryl_datahub_cloud/metadata/_urns/urn_defs.py +1842 -1786
  10. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  11. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
  12. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  14. acryl_datahub_cloud/metadata/schema.avsc +24861 -24050
  15. acryl_datahub_cloud/metadata/schema_classes.py +1031 -631
  16. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  17. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
  18. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  19. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +40 -7
  20. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +27 -6
  21. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +31 -7
  22. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
  23. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  24. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  25. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  26. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  27. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
  28. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  29. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  30. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  31. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  32. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
  33. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
  34. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
  35. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  36. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  37. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
  38. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  39. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +3 -0
  40. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
  41. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
  42. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
  43. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  44. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
  45. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
  46. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  47. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
  48. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +27 -6
  49. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  50. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
  51. acryl_datahub_cloud/notifications/__init__.py +0 -0
  52. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  53. acryl_datahub_cloud/sdk/__init__.py +29 -0
  54. acryl_datahub_cloud/{_sdk_extras → sdk}/assertion.py +501 -193
  55. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  56. acryl_datahub_cloud/{_sdk_extras → sdk/assertion_input}/assertion_input.py +733 -189
  57. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
  58. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +947 -0
  59. acryl_datahub_cloud/sdk/assertions_client.py +1639 -0
  60. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  61. acryl_datahub_cloud/{_sdk_extras → sdk}/entities/assertion.py +5 -2
  62. acryl_datahub_cloud/{_sdk_extras → sdk}/subscription_client.py +146 -33
  63. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/METADATA +48 -43
  64. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/RECORD +72 -54
  65. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/entry_points.txt +1 -0
  66. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -19
  67. acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -717
  68. /acryl_datahub_cloud/{_sdk_extras/entities → datahub_forms_notifications}/__init__.py +0 -0
  69. /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/monitor.py +0 -0
  70. /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/subscription.py +0 -0
  71. /acryl_datahub_cloud/{_sdk_extras → sdk}/errors.py +0 -0
  72. /acryl_datahub_cloud/{_sdk_extras → sdk}/resolver_client.py +0 -0
  73. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/WHEEL +0 -0
  74. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1639 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime, timezone
5
+ from typing import TYPE_CHECKING, Any, Optional, Union
6
+
7
+ from acryl_datahub_cloud.sdk.assertion import (
8
+ AssertionMode,
9
+ FreshnessAssertion,
10
+ SmartFreshnessAssertion,
11
+ SmartVolumeAssertion,
12
+ _AssertionPublic,
13
+ )
14
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
15
+ AssertionIncidentBehavior,
16
+ DetectionMechanismInputTypes,
17
+ ExclusionWindowInputTypes,
18
+ InferenceSensitivity,
19
+ TimeWindowSizeInputTypes,
20
+ _AssertionInput,
21
+ _SmartFreshnessAssertionInput,
22
+ _SmartVolumeAssertionInput,
23
+ )
24
+ from acryl_datahub_cloud.sdk.assertion_input.freshness_assertion_input import (
25
+ _FreshnessAssertionInput,
26
+ )
27
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
28
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
29
+ from acryl_datahub_cloud.sdk.errors import SDKUsageError
30
+ from datahub.errors import ItemNotFoundError
31
+ from datahub.metadata import schema_classes as models
32
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, MonitorUrn
33
+
34
+ if TYPE_CHECKING:
35
+ from datahub.sdk.main_client import DataHubClient
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ # TODO: Replace __datahub_system with the actual datahub system user https://linear.app/acryl-data/issue/OBS-1351/auditstamp-actor-hydration-pattern-for-sdk-calls
40
+ DEFAULT_CREATED_BY = CorpUserUrn.from_string("urn:li:corpuser:__datahub_system")
41
+
42
+
43
+ class AssertionsClient:
44
+ def __init__(self, client: "DataHubClient"):
45
+ self.client = client
46
+ _print_experimental_warning()
47
+
48
+ def sync_smart_freshness_assertion(
49
+ self,
50
+ *,
51
+ dataset_urn: Union[str, DatasetUrn],
52
+ urn: Optional[Union[str, AssertionUrn]] = None,
53
+ display_name: Optional[str] = None,
54
+ enabled: Optional[bool] = None,
55
+ detection_mechanism: DetectionMechanismInputTypes = None,
56
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
57
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
58
+ training_data_lookback_days: Optional[int] = None,
59
+ incident_behavior: Optional[
60
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
61
+ ] = None,
62
+ tags: Optional[TagsInputType] = None,
63
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
64
+ ) -> SmartFreshnessAssertion:
65
+ """Upsert and merge a smart freshness assertion.
66
+
67
+ Note: keyword arguments are required.
68
+
69
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
70
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
71
+ be updated if the input value is not None. If the input value is None, the existing value
72
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
73
+ empty string.
74
+
75
+ Schedule behavior:
76
+ - Create case: Uses default hourly schedule ("0 * * * *")
77
+ - Update case: Preserves existing schedule from backend (not modifiable)
78
+
79
+ Args:
80
+ dataset_urn: The urn of the dataset to be monitored.
81
+ urn: The urn of the assertion. If not provided, a urn will be generated and the
82
+ assertion will be _created_ in the DataHub instance.
83
+ display_name: The display name of the assertion. If not provided, a random display
84
+ name will be generated.
85
+ enabled: Whether the assertion is enabled. If not provided, the existing value
86
+ will be preserved.
87
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
88
+ schema is recommended. Valid values are:
89
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
90
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
91
+ - {
92
+ "type": "last_modified_column",
93
+ "column_name": "last_modified",
94
+ "additional_filter": "last_modified > '2021-01-01'",
95
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
96
+ additional_filter='last_modified > 2021-01-01')
97
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
98
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
99
+ - "low" or InferenceSensitivity.LOW
100
+ - "medium" or InferenceSensitivity.MEDIUM
101
+ - "high" or InferenceSensitivity.HIGH
102
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
103
+ fixed range exclusion windows are supported. Valid values are:
104
+ - from datetime.datetime objects: {
105
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
106
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
107
+ }
108
+ - from string datetimes: {
109
+ "start": "2025-01-01T00:00:00",
110
+ "end": "2025-01-02T00:00:00",
111
+ }
112
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
113
+ start=datetime(2025, 1, 1, 0, 0, 0),
114
+ end=datetime(2025, 1, 2, 0, 0, 0)
115
+ )
116
+ training_data_lookback_days: The training data lookback days to be applied to the
117
+ assertion as an integer.
118
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
119
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
120
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
121
+ tags: The tags to be applied to the assertion. Valid values are:
122
+ - a list of strings (strings will be converted to TagUrn objects)
123
+ - a list of TagUrn objects
124
+ - a list of TagAssociationClass objects
125
+ updated_by: Optional urn of the user who updated the assertion. The format is
126
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
127
+ The default is the datahub system user.
128
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
129
+
130
+ Returns:
131
+ SmartFreshnessAssertion: The created or updated assertion.
132
+ """
133
+ _print_experimental_warning()
134
+ now_utc = datetime.now(timezone.utc)
135
+
136
+ if updated_by is None:
137
+ logger.warning(
138
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
139
+ )
140
+ updated_by = DEFAULT_CREATED_BY
141
+
142
+ # 1. If urn is not set, create a new assertion
143
+ if urn is None:
144
+ logger.info("URN is not set, creating a new assertion")
145
+ return self._create_smart_freshness_assertion(
146
+ dataset_urn=dataset_urn,
147
+ display_name=display_name,
148
+ enabled=enabled if enabled is not None else True,
149
+ detection_mechanism=detection_mechanism,
150
+ sensitivity=sensitivity,
151
+ exclusion_windows=exclusion_windows,
152
+ training_data_lookback_days=training_data_lookback_days,
153
+ incident_behavior=incident_behavior,
154
+ tags=tags,
155
+ created_by=updated_by,
156
+ )
157
+
158
+ # 2. If urn is set, first validate the input:
159
+ assertion_input = _SmartFreshnessAssertionInput(
160
+ urn=urn,
161
+ entity_client=self.client.entities,
162
+ dataset_urn=dataset_urn,
163
+ display_name=display_name,
164
+ detection_mechanism=detection_mechanism,
165
+ sensitivity=sensitivity,
166
+ exclusion_windows=exclusion_windows,
167
+ training_data_lookback_days=training_data_lookback_days,
168
+ incident_behavior=incident_behavior,
169
+ tags=tags,
170
+ created_by=updated_by, # This will be overridden by the actual created_by
171
+ created_at=now_utc, # This will be overridden by the actual created_at
172
+ updated_by=updated_by,
173
+ updated_at=now_utc,
174
+ )
175
+
176
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
177
+ # if the assertion does not exist:
178
+ merged_assertion_input_or_created_assertion = (
179
+ self._retrieve_and_merge_smart_freshness_assertion_and_monitor(
180
+ assertion_input=assertion_input,
181
+ dataset_urn=dataset_urn,
182
+ urn=urn,
183
+ display_name=display_name,
184
+ enabled=enabled,
185
+ detection_mechanism=detection_mechanism,
186
+ sensitivity=sensitivity,
187
+ exclusion_windows=exclusion_windows,
188
+ training_data_lookback_days=training_data_lookback_days,
189
+ incident_behavior=incident_behavior,
190
+ tags=tags,
191
+ updated_by=updated_by,
192
+ now_utc=now_utc,
193
+ )
194
+ )
195
+
196
+ # Return early if we created a new assertion in the merge:
197
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
198
+ # We know this is the correct type because we passed the assertion_class parameter
199
+ assert isinstance(
200
+ merged_assertion_input_or_created_assertion, SmartFreshnessAssertion
201
+ )
202
+ return merged_assertion_input_or_created_assertion
203
+
204
+ # 4. Upsert the assertion and monitor entities:
205
+ assertion_entity, monitor_entity = (
206
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
207
+ )
208
+ # If assertion upsert fails, we won't try to upsert the monitor
209
+ self.client.entities.upsert(assertion_entity)
210
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
211
+ # try:
212
+ self.client.entities.upsert(monitor_entity)
213
+ # except Exception as e:
214
+ # logger.error(f"Error upserting monitor: {e}")
215
+ # self.client.entities.delete(assertion_entity)
216
+ # raise e
217
+
218
+ return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
219
+
220
+ def _retrieve_and_merge_smart_freshness_assertion_and_monitor(
221
+ self,
222
+ assertion_input: _SmartFreshnessAssertionInput,
223
+ dataset_urn: Union[str, DatasetUrn],
224
+ urn: Union[str, AssertionUrn],
225
+ display_name: Optional[str],
226
+ enabled: Optional[bool],
227
+ detection_mechanism: DetectionMechanismInputTypes,
228
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
229
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
230
+ training_data_lookback_days: Optional[int],
231
+ incident_behavior: Optional[
232
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
233
+ ],
234
+ tags: Optional[TagsInputType],
235
+ updated_by: Optional[Union[str, CorpUserUrn]],
236
+ now_utc: datetime,
237
+ ) -> Union[SmartFreshnessAssertion, _SmartFreshnessAssertionInput]:
238
+ # 1. Retrieve any existing assertion and monitor entities:
239
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
240
+ self._retrieve_assertion_and_monitor(assertion_input)
241
+ )
242
+
243
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
244
+ if maybe_assertion_entity and maybe_monitor_entity:
245
+ existing_assertion = SmartFreshnessAssertion._from_entities(
246
+ maybe_assertion_entity, maybe_monitor_entity
247
+ )
248
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
249
+ elif maybe_assertion_entity and not maybe_monitor_entity:
250
+ monitor_mode = (
251
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
252
+ )
253
+ existing_assertion = SmartFreshnessAssertion._from_entities(
254
+ maybe_assertion_entity,
255
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
256
+ )
257
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
258
+ elif not maybe_assertion_entity:
259
+ logger.info(
260
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
261
+ )
262
+ return self._create_smart_freshness_assertion(
263
+ dataset_urn=dataset_urn,
264
+ display_name=display_name,
265
+ detection_mechanism=detection_mechanism,
266
+ sensitivity=sensitivity,
267
+ exclusion_windows=exclusion_windows,
268
+ training_data_lookback_days=training_data_lookback_days,
269
+ incident_behavior=incident_behavior,
270
+ tags=tags,
271
+ created_by=updated_by,
272
+ )
273
+
274
+ # 3. Check for any issues e.g. different dataset urns
275
+ if (
276
+ existing_assertion
277
+ and hasattr(existing_assertion, "dataset_urn")
278
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
279
+ ):
280
+ raise SDKUsageError(
281
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
282
+ )
283
+
284
+ # 4. Merge the existing assertion with the validated input:
285
+ merged_assertion_input = self._merge_smart_freshness_input(
286
+ dataset_urn=dataset_urn,
287
+ urn=urn,
288
+ display_name=display_name,
289
+ enabled=enabled,
290
+ detection_mechanism=detection_mechanism,
291
+ sensitivity=sensitivity,
292
+ exclusion_windows=exclusion_windows,
293
+ training_data_lookback_days=training_data_lookback_days,
294
+ incident_behavior=incident_behavior,
295
+ tags=tags,
296
+ now_utc=now_utc,
297
+ assertion_input=assertion_input,
298
+ maybe_assertion_entity=maybe_assertion_entity,
299
+ maybe_monitor_entity=maybe_monitor_entity,
300
+ existing_assertion=existing_assertion,
301
+ )
302
+
303
+ return merged_assertion_input
304
+
305
+ def _retrieve_and_merge_volume_assertion_and_monitor(
306
+ self,
307
+ assertion_input: _SmartVolumeAssertionInput,
308
+ dataset_urn: Union[str, DatasetUrn],
309
+ urn: Union[str, AssertionUrn],
310
+ display_name: Optional[str],
311
+ enabled: Optional[bool],
312
+ detection_mechanism: DetectionMechanismInputTypes,
313
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
314
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
315
+ training_data_lookback_days: Optional[int],
316
+ incident_behavior: Optional[
317
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
318
+ ],
319
+ tags: Optional[TagsInputType],
320
+ updated_by: Optional[Union[str, CorpUserUrn]],
321
+ now_utc: datetime,
322
+ schedule: Optional[Union[str, models.CronScheduleClass]],
323
+ ) -> Union[SmartVolumeAssertion, _SmartVolumeAssertionInput]:
324
+ # 1. Retrieve any existing assertion and monitor entities:
325
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
326
+ self._retrieve_assertion_and_monitor(assertion_input)
327
+ )
328
+
329
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
330
+ if maybe_assertion_entity and maybe_monitor_entity:
331
+ existing_assertion = SmartVolumeAssertion._from_entities(
332
+ maybe_assertion_entity, maybe_monitor_entity
333
+ )
334
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
335
+ elif maybe_assertion_entity and not maybe_monitor_entity:
336
+ monitor_mode = (
337
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
338
+ )
339
+ existing_assertion = SmartVolumeAssertion._from_entities(
340
+ maybe_assertion_entity,
341
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
342
+ )
343
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
344
+ elif not maybe_assertion_entity:
345
+ logger.info(
346
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
347
+ )
348
+ return self._create_smart_volume_assertion(
349
+ dataset_urn=dataset_urn,
350
+ display_name=display_name,
351
+ detection_mechanism=detection_mechanism,
352
+ sensitivity=sensitivity,
353
+ exclusion_windows=exclusion_windows,
354
+ training_data_lookback_days=training_data_lookback_days,
355
+ incident_behavior=incident_behavior,
356
+ tags=tags,
357
+ created_by=updated_by,
358
+ schedule=schedule,
359
+ )
360
+
361
+ # 3. Check for any issues e.g. different dataset urns
362
+ if (
363
+ existing_assertion
364
+ and hasattr(existing_assertion, "dataset_urn")
365
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
366
+ ):
367
+ raise SDKUsageError(
368
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
369
+ )
370
+
371
+ # 4. Merge the existing assertion with the validated input:
372
+ merged_assertion_input = self._merge_smart_volume_input(
373
+ dataset_urn=dataset_urn,
374
+ urn=urn,
375
+ display_name=display_name,
376
+ enabled=enabled,
377
+ detection_mechanism=detection_mechanism,
378
+ sensitivity=sensitivity,
379
+ exclusion_windows=exclusion_windows,
380
+ training_data_lookback_days=training_data_lookback_days,
381
+ incident_behavior=incident_behavior,
382
+ tags=tags,
383
+ schedule=schedule,
384
+ now_utc=now_utc,
385
+ assertion_input=assertion_input,
386
+ maybe_assertion_entity=maybe_assertion_entity,
387
+ maybe_monitor_entity=maybe_monitor_entity,
388
+ existing_assertion=existing_assertion,
389
+ )
390
+
391
+ return merged_assertion_input
392
+
393
+ def _retrieve_and_merge_freshness_assertion_and_monitor(
394
+ self,
395
+ assertion_input: _FreshnessAssertionInput,
396
+ dataset_urn: Union[str, DatasetUrn],
397
+ urn: Union[str, AssertionUrn],
398
+ display_name: Optional[str],
399
+ enabled: Optional[bool],
400
+ detection_mechanism: DetectionMechanismInputTypes,
401
+ incident_behavior: Optional[
402
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
403
+ ],
404
+ tags: Optional[TagsInputType],
405
+ updated_by: Optional[Union[str, CorpUserUrn]],
406
+ now_utc: datetime,
407
+ schedule: Optional[Union[str, models.CronScheduleClass]],
408
+ freshness_schedule_check_type: Optional[
409
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
410
+ ] = None,
411
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
412
+ ) -> Union[FreshnessAssertion, _FreshnessAssertionInput]:
413
+ # 1. Retrieve any existing assertion and monitor entities:
414
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
415
+ self._retrieve_assertion_and_monitor(assertion_input)
416
+ )
417
+
418
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
419
+ if maybe_assertion_entity and maybe_monitor_entity:
420
+ existing_assertion = FreshnessAssertion._from_entities(
421
+ maybe_assertion_entity, maybe_monitor_entity
422
+ )
423
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
424
+ elif maybe_assertion_entity and not maybe_monitor_entity:
425
+ monitor_mode = (
426
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
427
+ )
428
+ existing_assertion = FreshnessAssertion._from_entities(
429
+ maybe_assertion_entity,
430
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
431
+ )
432
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
433
+ elif not maybe_assertion_entity:
434
+ logger.info(
435
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
436
+ )
437
+ return self._create_freshness_assertion(
438
+ dataset_urn=dataset_urn,
439
+ display_name=display_name,
440
+ detection_mechanism=detection_mechanism,
441
+ incident_behavior=incident_behavior,
442
+ tags=tags,
443
+ created_by=updated_by,
444
+ schedule=schedule,
445
+ freshness_schedule_check_type=freshness_schedule_check_type,
446
+ lookback_window=lookback_window,
447
+ )
448
+
449
+ # 3. Check for any issues e.g. different dataset urns
450
+ if (
451
+ existing_assertion
452
+ and hasattr(existing_assertion, "dataset_urn")
453
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
454
+ ):
455
+ raise SDKUsageError(
456
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
457
+ )
458
+
459
+ # 4. Merge the existing assertion with the validated input:
460
+ merged_assertion_input = self._merge_freshness_input(
461
+ dataset_urn=dataset_urn,
462
+ urn=urn,
463
+ display_name=display_name,
464
+ enabled=enabled,
465
+ detection_mechanism=detection_mechanism,
466
+ incident_behavior=incident_behavior,
467
+ tags=tags,
468
+ now_utc=now_utc,
469
+ assertion_input=assertion_input,
470
+ maybe_assertion_entity=maybe_assertion_entity,
471
+ maybe_monitor_entity=maybe_monitor_entity,
472
+ existing_assertion=existing_assertion,
473
+ schedule=schedule,
474
+ freshness_schedule_check_type=freshness_schedule_check_type,
475
+ lookback_window=lookback_window,
476
+ )
477
+
478
+ return merged_assertion_input
479
+
480
+ def _retrieve_assertion_and_monitor(
481
+ self,
482
+ assertion_input: _AssertionInput,
483
+ ) -> tuple[Optional[Assertion], MonitorUrn, Optional[Monitor]]:
484
+ """Retrieve the assertion and monitor entities from the DataHub instance.
485
+
486
+ Args:
487
+ assertion_input: The validated input to the function.
488
+
489
+ Returns:
490
+ The assertion and monitor entities.
491
+ """
492
+ assert assertion_input.urn is not None, "URN is required"
493
+
494
+ # Get assertion entity
495
+ maybe_assertion_entity: Optional[Assertion] = None
496
+ try:
497
+ entity = self.client.entities.get(assertion_input.urn)
498
+ if entity is not None:
499
+ assert isinstance(entity, Assertion)
500
+ maybe_assertion_entity = entity
501
+ except ItemNotFoundError:
502
+ pass
503
+
504
+ # Get monitor entity
505
+ monitor_urn = Monitor._ensure_id(
506
+ id=(assertion_input.dataset_urn, assertion_input.urn)
507
+ )
508
+ maybe_monitor_entity: Optional[Monitor] = None
509
+ try:
510
+ entity = self.client.entities.get(monitor_urn)
511
+ if entity is not None:
512
+ assert isinstance(entity, Monitor)
513
+ maybe_monitor_entity = entity
514
+ except ItemNotFoundError:
515
+ pass
516
+
517
+ return maybe_assertion_entity, monitor_urn, maybe_monitor_entity
518
+
519
+ def _merge_smart_freshness_input(
520
+ self,
521
+ dataset_urn: Union[str, DatasetUrn],
522
+ urn: Union[str, AssertionUrn],
523
+ display_name: Optional[str],
524
+ enabled: Optional[bool],
525
+ detection_mechanism: DetectionMechanismInputTypes,
526
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
527
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
528
+ training_data_lookback_days: Optional[int],
529
+ incident_behavior: Optional[
530
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
531
+ ],
532
+ tags: Optional[TagsInputType],
533
+ now_utc: datetime,
534
+ assertion_input: _SmartFreshnessAssertionInput,
535
+ maybe_assertion_entity: Optional[Assertion],
536
+ maybe_monitor_entity: Optional[Monitor],
537
+ existing_assertion: SmartFreshnessAssertion,
538
+ ) -> _SmartFreshnessAssertionInput:
539
+ """Merge the input with the existing assertion and monitor entities.
540
+
541
+ Args:
542
+ dataset_urn: The urn of the dataset to be monitored.
543
+ urn: The urn of the assertion.
544
+ display_name: The display name of the assertion.
545
+ enabled: Whether the assertion is enabled.
546
+ detection_mechanism: The detection mechanism to be used for the assertion.
547
+ sensitivity: The sensitivity to be applied to the assertion.
548
+ exclusion_windows: The exclusion windows to be applied to the assertion.
549
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
550
+ incident_behavior: The incident behavior to be applied to the assertion.
551
+ tags: The tags to be applied to the assertion.
552
+ now_utc: The current UTC time from when the function is called.
553
+ assertion_input: The validated input to the function.
554
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
555
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
556
+ existing_assertion: The existing assertion from the DataHub instance.
557
+
558
+ Returns:
559
+ The merged assertion input.
560
+ """
561
+ merged_assertion_input = _SmartFreshnessAssertionInput(
562
+ urn=urn,
563
+ entity_client=self.client.entities,
564
+ dataset_urn=dataset_urn,
565
+ display_name=_merge_field(
566
+ display_name,
567
+ "display_name",
568
+ assertion_input,
569
+ existing_assertion,
570
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
571
+ ),
572
+ enabled=_merge_field(
573
+ enabled,
574
+ "enabled",
575
+ assertion_input,
576
+ existing_assertion,
577
+ existing_assertion.mode == AssertionMode.ACTIVE
578
+ if existing_assertion
579
+ else None,
580
+ ),
581
+ schedule=_merge_field(
582
+ None, # Don't allow schedule modification in updates - always preserve existing
583
+ "schedule",
584
+ assertion_input,
585
+ existing_assertion,
586
+ existing_assertion.schedule if existing_assertion else None,
587
+ ),
588
+ detection_mechanism=_merge_field(
589
+ detection_mechanism,
590
+ "detection_mechanism",
591
+ assertion_input,
592
+ existing_assertion,
593
+ SmartFreshnessAssertion._get_detection_mechanism( # TODO: Consider moving this conversion to DetectionMechanism.parse(), it could avoid having to use Optional on the return type of SmartFreshnessAssertion.get_detection_mechanism()
594
+ maybe_assertion_entity, maybe_monitor_entity, default=None
595
+ )
596
+ if maybe_assertion_entity and maybe_monitor_entity
597
+ else None,
598
+ ),
599
+ sensitivity=_merge_field(
600
+ sensitivity,
601
+ "sensitivity",
602
+ assertion_input,
603
+ existing_assertion,
604
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
605
+ ),
606
+ exclusion_windows=_merge_field(
607
+ exclusion_windows,
608
+ "exclusion_windows",
609
+ assertion_input,
610
+ existing_assertion,
611
+ maybe_monitor_entity.exclusion_windows
612
+ if maybe_monitor_entity
613
+ else None,
614
+ ),
615
+ training_data_lookback_days=_merge_field(
616
+ training_data_lookback_days,
617
+ "training_data_lookback_days",
618
+ assertion_input,
619
+ existing_assertion,
620
+ maybe_monitor_entity.training_data_lookback_days
621
+ if maybe_monitor_entity
622
+ else None,
623
+ ),
624
+ incident_behavior=_merge_field(
625
+ incident_behavior,
626
+ "incident_behavior",
627
+ assertion_input,
628
+ existing_assertion,
629
+ SmartFreshnessAssertion._get_incident_behavior(maybe_assertion_entity)
630
+ if maybe_assertion_entity
631
+ else None,
632
+ ),
633
+ tags=_merge_field(
634
+ tags,
635
+ "tags",
636
+ assertion_input,
637
+ existing_assertion,
638
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
639
+ ),
640
+ created_by=existing_assertion.created_by
641
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
642
+ created_at=existing_assertion.created_at
643
+ or now_utc, # Override with the existing assertion's created_at or now if not set
644
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
645
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
646
+ )
647
+
648
+ return merged_assertion_input
649
+
650
+ def _merge_freshness_input(
651
+ self,
652
+ dataset_urn: Union[str, DatasetUrn],
653
+ urn: Union[str, AssertionUrn],
654
+ display_name: Optional[str],
655
+ enabled: Optional[bool],
656
+ detection_mechanism: DetectionMechanismInputTypes,
657
+ incident_behavior: Optional[
658
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
659
+ ],
660
+ tags: Optional[TagsInputType],
661
+ now_utc: datetime,
662
+ assertion_input: _FreshnessAssertionInput,
663
+ maybe_assertion_entity: Optional[Assertion],
664
+ maybe_monitor_entity: Optional[Monitor],
665
+ existing_assertion: FreshnessAssertion,
666
+ schedule: Optional[Union[str, models.CronScheduleClass]],
667
+ freshness_schedule_check_type: Optional[
668
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
669
+ ] = None,
670
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
671
+ ) -> _FreshnessAssertionInput:
672
+ """Merge the input with the existing assertion and monitor entities.
673
+
674
+ Args:
675
+ dataset_urn: The urn of the dataset to be monitored.
676
+ urn: The urn of the assertion.
677
+ display_name: The display name of the assertion.
678
+ enabled: Whether the assertion is enabled.
679
+ incident_behavior: The incident behavior to be applied to the assertion.
680
+ tags: The tags to be applied to the assertion.
681
+ now_utc: The current UTC time from when the function is called.
682
+ assertion_input: The validated input to the function.
683
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
684
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
685
+ existing_assertion: The existing assertion from the DataHub instance.
686
+ schedule: The schedule to be applied to the assertion.
687
+ freshness_schedule_check_type: The freshness schedule check type to be applied to the assertion.
688
+ lookback_window: The lookback window to be applied to the assertion.
689
+
690
+ Returns:
691
+ The merged assertion input.
692
+ """
693
+ merged_assertion_input = _FreshnessAssertionInput(
694
+ urn=urn,
695
+ entity_client=self.client.entities,
696
+ dataset_urn=dataset_urn,
697
+ display_name=_merge_field(
698
+ display_name,
699
+ "display_name",
700
+ assertion_input,
701
+ existing_assertion,
702
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
703
+ ),
704
+ enabled=_merge_field(
705
+ enabled,
706
+ "enabled",
707
+ assertion_input,
708
+ existing_assertion,
709
+ existing_assertion.mode == AssertionMode.ACTIVE
710
+ if existing_assertion
711
+ else None,
712
+ ),
713
+ schedule=_merge_field(
714
+ schedule,
715
+ "schedule",
716
+ assertion_input,
717
+ existing_assertion,
718
+ existing_assertion.schedule if existing_assertion else None,
719
+ ),
720
+ freshness_schedule_check_type=_merge_field(
721
+ freshness_schedule_check_type,
722
+ "freshness_schedule_check_type",
723
+ assertion_input,
724
+ existing_assertion,
725
+ existing_assertion._freshness_schedule_check_type
726
+ if existing_assertion
727
+ else None,
728
+ ),
729
+ lookback_window=_merge_field(
730
+ lookback_window,
731
+ "lookback_window",
732
+ assertion_input,
733
+ existing_assertion,
734
+ existing_assertion.lookback_window if existing_assertion else None,
735
+ ),
736
+ detection_mechanism=_merge_field(
737
+ detection_mechanism,
738
+ "detection_mechanism",
739
+ assertion_input,
740
+ existing_assertion,
741
+ FreshnessAssertion._get_detection_mechanism(
742
+ maybe_assertion_entity, maybe_monitor_entity, default=None
743
+ )
744
+ if maybe_assertion_entity and maybe_monitor_entity
745
+ else None,
746
+ ),
747
+ incident_behavior=_merge_field(
748
+ incident_behavior,
749
+ "incident_behavior",
750
+ assertion_input,
751
+ existing_assertion,
752
+ FreshnessAssertion._get_incident_behavior(maybe_assertion_entity)
753
+ if maybe_assertion_entity
754
+ else None,
755
+ ),
756
+ tags=_merge_field(
757
+ tags,
758
+ "tags",
759
+ assertion_input,
760
+ existing_assertion,
761
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
762
+ ),
763
+ created_by=existing_assertion.created_by
764
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
765
+ created_at=existing_assertion.created_at
766
+ or now_utc, # Override with the existing assertion's created_at or now if not set
767
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
768
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
769
+ )
770
+ return merged_assertion_input
771
+
772
+ def _merge_smart_volume_input(
773
+ self,
774
+ dataset_urn: Union[str, DatasetUrn],
775
+ urn: Union[str, AssertionUrn],
776
+ display_name: Optional[str],
777
+ enabled: Optional[bool],
778
+ detection_mechanism: DetectionMechanismInputTypes,
779
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
780
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
781
+ training_data_lookback_days: Optional[int],
782
+ incident_behavior: Optional[
783
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
784
+ ],
785
+ tags: Optional[TagsInputType],
786
+ schedule: Optional[Union[str, models.CronScheduleClass]],
787
+ now_utc: datetime,
788
+ assertion_input: _SmartVolumeAssertionInput,
789
+ maybe_assertion_entity: Optional[Assertion],
790
+ maybe_monitor_entity: Optional[Monitor],
791
+ existing_assertion: SmartVolumeAssertion,
792
+ ) -> _SmartVolumeAssertionInput:
793
+ """Merge the input with the existing assertion and monitor entities.
794
+
795
+ Args:
796
+ dataset_urn: The urn of the dataset to be monitored.
797
+ urn: The urn of the assertion.
798
+ display_name: The display name of the assertion.
799
+ enabled: Whether the assertion is enabled.
800
+ detection_mechanism: The detection mechanism to be used for the assertion.
801
+ sensitivity: The sensitivity to be applied to the assertion.
802
+ exclusion_windows: The exclusion windows to be applied to the assertion.
803
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
804
+ incident_behavior: The incident behavior to be applied to the assertion.
805
+ tags: The tags to be applied to the assertion.
806
+ now_utc: The current UTC time from when the function is called.
807
+ assertion_input: The validated input to the function.
808
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
809
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
810
+ existing_assertion: The existing assertion from the DataHub instance.
811
+
812
+ Returns:
813
+ The merged assertion input.
814
+ """
815
+ merged_assertion_input = _SmartVolumeAssertionInput(
816
+ urn=urn,
817
+ entity_client=self.client.entities,
818
+ dataset_urn=dataset_urn,
819
+ display_name=_merge_field(
820
+ display_name,
821
+ "display_name",
822
+ assertion_input,
823
+ existing_assertion,
824
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
825
+ ),
826
+ enabled=_merge_field(
827
+ enabled,
828
+ "enabled",
829
+ assertion_input,
830
+ existing_assertion,
831
+ existing_assertion.mode == AssertionMode.ACTIVE
832
+ if existing_assertion
833
+ else None,
834
+ ),
835
+ schedule=_merge_field(
836
+ schedule,
837
+ "schedule",
838
+ assertion_input,
839
+ existing_assertion,
840
+ existing_assertion.schedule if existing_assertion else None,
841
+ ),
842
+ detection_mechanism=_merge_field(
843
+ detection_mechanism,
844
+ "detection_mechanism",
845
+ assertion_input,
846
+ existing_assertion,
847
+ SmartVolumeAssertion._get_detection_mechanism(
848
+ maybe_assertion_entity, maybe_monitor_entity, default=None
849
+ )
850
+ if maybe_assertion_entity and maybe_monitor_entity
851
+ else None,
852
+ ),
853
+ sensitivity=_merge_field(
854
+ sensitivity,
855
+ "sensitivity",
856
+ assertion_input,
857
+ existing_assertion,
858
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
859
+ ),
860
+ exclusion_windows=_merge_field(
861
+ exclusion_windows,
862
+ "exclusion_windows",
863
+ assertion_input,
864
+ existing_assertion,
865
+ maybe_monitor_entity.exclusion_windows
866
+ if maybe_monitor_entity
867
+ else None,
868
+ ),
869
+ training_data_lookback_days=_merge_field(
870
+ training_data_lookback_days,
871
+ "training_data_lookback_days",
872
+ assertion_input,
873
+ existing_assertion,
874
+ maybe_monitor_entity.training_data_lookback_days
875
+ if maybe_monitor_entity
876
+ else None,
877
+ ),
878
+ incident_behavior=_merge_field(
879
+ incident_behavior,
880
+ "incident_behavior",
881
+ assertion_input,
882
+ existing_assertion,
883
+ SmartVolumeAssertion._get_incident_behavior(maybe_assertion_entity)
884
+ if maybe_assertion_entity
885
+ else None,
886
+ ),
887
+ tags=_merge_field(
888
+ tags,
889
+ "tags",
890
+ assertion_input,
891
+ existing_assertion,
892
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
893
+ ),
894
+ created_by=existing_assertion.created_by
895
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
896
+ created_at=existing_assertion.created_at
897
+ or now_utc, # Override with the existing assertion's created_at or now if not set
898
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
899
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
900
+ )
901
+
902
+ return merged_assertion_input
903
+
904
+ def _create_smart_freshness_assertion(
905
+ self,
906
+ *,
907
+ dataset_urn: Union[str, DatasetUrn],
908
+ display_name: Optional[str] = None,
909
+ enabled: bool = True,
910
+ detection_mechanism: DetectionMechanismInputTypes = None,
911
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
912
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
913
+ training_data_lookback_days: Optional[int] = None,
914
+ incident_behavior: Optional[
915
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
916
+ ] = None,
917
+ tags: Optional[TagsInputType] = None,
918
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
919
+ ) -> SmartFreshnessAssertion:
920
+ """Create a smart freshness assertion.
921
+
922
+ Note: keyword arguments are required.
923
+
924
+ The created assertion will use the default hourly schedule ("0 * * * *").
925
+
926
+ Args:
927
+ dataset_urn: The urn of the dataset to be monitored.
928
+ display_name: The display name of the assertion. If not provided, a random display
929
+ name will be generated.
930
+ enabled: Whether the assertion is enabled. Defaults to True.
931
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
932
+ schema is recommended. Valid values are:
933
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
934
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
935
+ - {
936
+ "type": "last_modified_column",
937
+ "column_name": "last_modified",
938
+ "additional_filter": "last_modified > '2021-01-01'",
939
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
940
+ additional_filter='last_modified > 2021-01-01')
941
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
942
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
943
+ - "low" or InferenceSensitivity.LOW
944
+ - "medium" or InferenceSensitivity.MEDIUM
945
+ - "high" or InferenceSensitivity.HIGH
946
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
947
+ fixed range exclusion windows are supported. Valid values are:
948
+ - from datetime.datetime objects: {
949
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
950
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
951
+ }
952
+ - from string datetimes: {
953
+ "start": "2025-01-01T00:00:00",
954
+ "end": "2025-01-02T00:00:00",
955
+ }
956
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
957
+ start=datetime(2025, 1, 1, 0, 0, 0),
958
+ end=datetime(2025, 1, 2, 0, 0, 0)
959
+ )
960
+ training_data_lookback_days: The training data lookback days to be applied to the
961
+ assertion as an integer.
962
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
963
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
964
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
965
+ tags: The tags to be applied to the assertion. Valid values are:
966
+ - a list of strings (strings will be converted to TagUrn objects)
967
+ - a list of TagUrn objects
968
+ - a list of TagAssociationClass objects
969
+ created_by: Optional urn of the user who created the assertion. The format is
970
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
971
+ The default is the datahub system user.
972
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
973
+
974
+ Returns:
975
+ SmartFreshnessAssertion: The created assertion.
976
+ """
977
+ _print_experimental_warning()
978
+ now_utc = datetime.now(timezone.utc)
979
+ if created_by is None:
980
+ logger.warning(
981
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
982
+ )
983
+ created_by = DEFAULT_CREATED_BY
984
+ assertion_input = _SmartFreshnessAssertionInput(
985
+ urn=None,
986
+ entity_client=self.client.entities,
987
+ dataset_urn=dataset_urn,
988
+ display_name=display_name,
989
+ enabled=enabled,
990
+ detection_mechanism=detection_mechanism,
991
+ sensitivity=sensitivity,
992
+ exclusion_windows=exclusion_windows,
993
+ training_data_lookback_days=training_data_lookback_days,
994
+ incident_behavior=incident_behavior,
995
+ tags=tags,
996
+ created_by=created_by,
997
+ created_at=now_utc,
998
+ updated_by=created_by,
999
+ updated_at=now_utc,
1000
+ )
1001
+ assertion_entity, monitor_entity = (
1002
+ assertion_input.to_assertion_and_monitor_entities()
1003
+ )
1004
+ # If assertion creation fails, we won't try to create the monitor
1005
+ self.client.entities.create(assertion_entity)
1006
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1007
+ # try:
1008
+ self.client.entities.create(monitor_entity)
1009
+ # except Exception as e:
1010
+ # logger.error(f"Error creating monitor: {e}")
1011
+ # self.client.entities.delete(assertion_entity)
1012
+ # raise e
1013
+ return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1014
+
1015
+ def _create_smart_volume_assertion(
1016
+ self,
1017
+ *,
1018
+ dataset_urn: Union[str, DatasetUrn],
1019
+ display_name: Optional[str] = None,
1020
+ enabled: bool = True,
1021
+ detection_mechanism: DetectionMechanismInputTypes = None,
1022
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1023
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1024
+ training_data_lookback_days: Optional[int] = None,
1025
+ incident_behavior: Optional[
1026
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1027
+ ] = None,
1028
+ tags: Optional[TagsInputType] = None,
1029
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1030
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1031
+ ) -> SmartVolumeAssertion:
1032
+ """Create a smart volume assertion.
1033
+
1034
+ Note: keyword arguments are required.
1035
+
1036
+ Args:
1037
+ dataset_urn: The urn of the dataset to be monitored.
1038
+ display_name: The display name of the assertion. If not provided, a random display
1039
+ name will be generated.
1040
+ enabled: Whether the assertion is enabled. Defaults to True.
1041
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1042
+ schema is recommended. Valid values are:
1043
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1044
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1045
+ - {
1046
+ "type": "last_modified_column",
1047
+ "column_name": "last_modified",
1048
+ "additional_filter": "last_modified > '2021-01-01'",
1049
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1050
+ additional_filter='last_modified > 2021-01-01')
1051
+ - {
1052
+ "type": "high_watermark_column",
1053
+ "column_name": "id",
1054
+ "additional_filter": "id > 1000",
1055
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1056
+ additional_filter='id > 1000')
1057
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1058
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1059
+ - "low" or InferenceSensitivity.LOW
1060
+ - "medium" or InferenceSensitivity.MEDIUM
1061
+ - "high" or InferenceSensitivity.HIGH
1062
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1063
+ fixed range exclusion windows are supported. Valid values are:
1064
+ - from datetime.datetime objects: {
1065
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1066
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1067
+ }
1068
+ - from string datetimes: {
1069
+ "start": "2025-01-01T00:00:00",
1070
+ "end": "2025-01-02T00:00:00",
1071
+ }
1072
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1073
+ start=datetime(2025, 1, 1, 0, 0, 0),
1074
+ end=datetime(2025, 1, 2, 0, 0, 0)
1075
+ )
1076
+ training_data_lookback_days: The training data lookback days to be applied to the
1077
+ assertion as an integer.
1078
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1079
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1080
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1081
+ tags: The tags to be applied to the assertion. Valid values are:
1082
+ - a list of strings (strings will be converted to TagUrn objects)
1083
+ - a list of TagUrn objects
1084
+ - a list of TagAssociationClass objects
1085
+ created_by: Optional urn of the user who created the assertion. The format is
1086
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1087
+ The default is the datahub system user.
1088
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1089
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1090
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1091
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1092
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1093
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1094
+
1095
+ Returns:
1096
+ SmartVolumeAssertion: The created assertion.
1097
+ """
1098
+ _print_experimental_warning()
1099
+ now_utc = datetime.now(timezone.utc)
1100
+ if created_by is None:
1101
+ logger.warning(
1102
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1103
+ )
1104
+ created_by = DEFAULT_CREATED_BY
1105
+ assertion_input = _SmartVolumeAssertionInput(
1106
+ urn=None,
1107
+ entity_client=self.client.entities,
1108
+ dataset_urn=dataset_urn,
1109
+ display_name=display_name,
1110
+ enabled=enabled,
1111
+ detection_mechanism=detection_mechanism,
1112
+ sensitivity=sensitivity,
1113
+ exclusion_windows=exclusion_windows,
1114
+ training_data_lookback_days=training_data_lookback_days,
1115
+ incident_behavior=incident_behavior,
1116
+ tags=tags,
1117
+ created_by=created_by,
1118
+ created_at=now_utc,
1119
+ updated_by=created_by,
1120
+ updated_at=now_utc,
1121
+ schedule=schedule,
1122
+ )
1123
+ assertion_entity, monitor_entity = (
1124
+ assertion_input.to_assertion_and_monitor_entities()
1125
+ )
1126
+ # If assertion creation fails, we won't try to create the monitor
1127
+ self.client.entities.create(assertion_entity)
1128
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1129
+ # try:
1130
+ self.client.entities.create(monitor_entity)
1131
+ # except Exception as e:
1132
+ # logger.error(f"Error creating monitor: {e}")
1133
+ # self.client.entities.delete(assertion_entity)
1134
+ # raise e
1135
+ return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
1136
+
1137
+ def _create_freshness_assertion(
1138
+ self,
1139
+ *,
1140
+ dataset_urn: Union[str, DatasetUrn],
1141
+ display_name: Optional[str] = None,
1142
+ enabled: bool = True,
1143
+ freshness_schedule_check_type: Optional[
1144
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
1145
+ ] = None,
1146
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
1147
+ detection_mechanism: DetectionMechanismInputTypes = None,
1148
+ incident_behavior: Optional[
1149
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1150
+ ] = None,
1151
+ tags: Optional[TagsInputType] = None,
1152
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1153
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1154
+ ) -> FreshnessAssertion:
1155
+ """Create a freshness assertion.
1156
+
1157
+ Note: keyword arguments are required.
1158
+
1159
+ The created assertion will use the default daily schedule ("0 0 * * *").
1160
+
1161
+ Args:
1162
+ dataset_urn: The urn of the dataset to be monitored.
1163
+ display_name: The display name of the assertion. If not provided, a random display
1164
+ name will be generated.
1165
+ enabled: Whether the assertion is enabled. Defaults to True.
1166
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1167
+ schema is recommended. Valid values are:
1168
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1169
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1170
+ - {
1171
+ "type": "last_modified_column",
1172
+ "column_name": "last_modified",
1173
+ "additional_filter": "last_modified > '2021-01-01'",
1174
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1175
+ additional_filter='last_modified > 2021-01-01')
1176
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1177
+ freshness_schedule_check_type: The freshness schedule check type to be applied to the assertion. Valid values are:
1178
+ - "since_the_last_check" or models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK
1179
+ - "cron" or models.FreshnessAssertionScheduleTypeClass.CRON
1180
+ lookback_window: The lookback window to be applied to the assertion. Valid values are:
1181
+ - from models.TimeWindowSize objects: models.TimeWindowSizeClass(
1182
+ unit=models.CalendarIntervalClass.DAY,
1183
+ multiple=1)
1184
+ - from TimeWindowSize objects: TimeWindowSize(unit='DAY', multiple=1)
1185
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1186
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1187
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1188
+ tags: The tags to be applied to the assertion. Valid values are:
1189
+ - a list of strings (strings will be converted to TagUrn objects)
1190
+ - a list of TagUrn objects
1191
+ - a list of TagAssociationClass objects
1192
+ created_by: Optional urn of the user who created the assertion. The format is
1193
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1194
+ The default is the datahub system user.
1195
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1196
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1197
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1198
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1199
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1200
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1201
+
1202
+ Returns:
1203
+ FreshnessAssertion: The created assertion.
1204
+ """
1205
+ _print_experimental_warning()
1206
+ now_utc = datetime.now(timezone.utc)
1207
+ if created_by is None:
1208
+ logger.warning(
1209
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1210
+ )
1211
+ created_by = DEFAULT_CREATED_BY
1212
+ assertion_input = _FreshnessAssertionInput(
1213
+ urn=None,
1214
+ entity_client=self.client.entities,
1215
+ dataset_urn=dataset_urn,
1216
+ display_name=display_name,
1217
+ enabled=enabled,
1218
+ detection_mechanism=detection_mechanism,
1219
+ freshness_schedule_check_type=freshness_schedule_check_type,
1220
+ lookback_window=lookback_window,
1221
+ incident_behavior=incident_behavior,
1222
+ tags=tags,
1223
+ created_by=created_by,
1224
+ created_at=now_utc,
1225
+ updated_by=created_by,
1226
+ updated_at=now_utc,
1227
+ schedule=schedule,
1228
+ )
1229
+ assertion_entity, monitor_entity = (
1230
+ assertion_input.to_assertion_and_monitor_entities()
1231
+ )
1232
+ # If assertion creation fails, we won't try to create the monitor
1233
+ self.client.entities.create(assertion_entity)
1234
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1235
+ # try:
1236
+ self.client.entities.create(monitor_entity)
1237
+ # except Exception as e:
1238
+ # logger.error(f"Error creating monitor: {e}")
1239
+ # self.client.entities.delete(assertion_entity)
1240
+ # raise e
1241
+ return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1242
+
1243
+ def sync_smart_volume_assertion(
1244
+ self,
1245
+ *,
1246
+ dataset_urn: Union[str, DatasetUrn],
1247
+ urn: Optional[Union[str, AssertionUrn]] = None,
1248
+ display_name: Optional[str] = None,
1249
+ enabled: Optional[bool] = None,
1250
+ detection_mechanism: DetectionMechanismInputTypes = None,
1251
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1252
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1253
+ training_data_lookback_days: Optional[int] = None,
1254
+ incident_behavior: Optional[
1255
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1256
+ ] = None,
1257
+ tags: Optional[TagsInputType] = None,
1258
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
1259
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1260
+ ) -> SmartVolumeAssertion:
1261
+ """Upsert and merge a smart volume assertion.
1262
+
1263
+ Note: keyword arguments are required.
1264
+
1265
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
1266
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
1267
+ be updated if the input value is not None. If the input value is None, the existing value
1268
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
1269
+ empty string.
1270
+
1271
+ Schedule behavior:
1272
+ - Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
1273
+ - Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
1274
+
1275
+ Args:
1276
+ dataset_urn: The urn of the dataset to be monitored.
1277
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
1278
+ will be _created_ in the DataHub instance.
1279
+ display_name: The display name of the assertion. If not provided, a random display name
1280
+ will be generated.
1281
+ enabled: Whether the assertion is enabled. If not provided, the existing value
1282
+ will be preserved.
1283
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1284
+ schema is recommended. Valid values are:
1285
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1286
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1287
+ - {
1288
+ "type": "last_modified_column",
1289
+ "column_name": "last_modified",
1290
+ "additional_filter": "last_modified > '2021-01-01'",
1291
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1292
+ additional_filter='last_modified > 2021-01-01')
1293
+ - {
1294
+ "type": "high_watermark_column",
1295
+ "column_name": "id",
1296
+ "additional_filter": "id > 1000",
1297
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1298
+ additional_filter='id > 1000')
1299
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1300
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1301
+ - "low" or InferenceSensitivity.LOW
1302
+ - "medium" or InferenceSensitivity.MEDIUM
1303
+ - "high" or InferenceSensitivity.HIGH
1304
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1305
+ fixed range exclusion windows are supported. Valid values are:
1306
+ - from datetime.datetime objects: {
1307
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1308
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1309
+ }
1310
+ - from string datetimes: {
1311
+ "start": "2025-01-01T00:00:00",
1312
+ "end": "2025-01-02T00:00:00",
1313
+ }
1314
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1315
+ start=datetime(2025, 1, 1, 0, 0, 0),
1316
+ end=datetime(2025, 1, 2, 0, 0, 0)
1317
+ )
1318
+ training_data_lookback_days: The training data lookback days to be applied to the
1319
+ assertion as an integer.
1320
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1321
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1322
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1323
+ tags: The tags to be applied to the assertion. Valid values are:
1324
+ - a list of strings (strings will be converted to TagUrn objects)
1325
+ - a list of TagUrn objects
1326
+ - a list of TagAssociationClass objects
1327
+ updated_by: Optional urn of the user who updated the assertion. The format is
1328
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1329
+ The default is the datahub system user.
1330
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1331
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1332
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1333
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1334
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1335
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1336
+
1337
+ Returns:
1338
+ SmartVolumeAssertion: The created or updated assertion.
1339
+ """
1340
+ _print_experimental_warning()
1341
+ now_utc = datetime.now(timezone.utc)
1342
+
1343
+ if updated_by is None:
1344
+ logger.warning(
1345
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1346
+ )
1347
+ updated_by = DEFAULT_CREATED_BY
1348
+
1349
+ # 1. If urn is not set, create a new assertion
1350
+ if urn is None:
1351
+ logger.info("URN is not set, creating a new assertion")
1352
+ return self._create_smart_volume_assertion(
1353
+ dataset_urn=dataset_urn,
1354
+ display_name=display_name,
1355
+ enabled=enabled if enabled is not None else True,
1356
+ detection_mechanism=detection_mechanism,
1357
+ sensitivity=sensitivity,
1358
+ exclusion_windows=exclusion_windows,
1359
+ training_data_lookback_days=training_data_lookback_days,
1360
+ incident_behavior=incident_behavior,
1361
+ tags=tags,
1362
+ created_by=updated_by,
1363
+ schedule=schedule,
1364
+ )
1365
+
1366
+ # 2. If urn is set, first validate the input:
1367
+ assertion_input = _SmartVolumeAssertionInput(
1368
+ urn=urn,
1369
+ entity_client=self.client.entities,
1370
+ dataset_urn=dataset_urn,
1371
+ display_name=display_name,
1372
+ detection_mechanism=detection_mechanism,
1373
+ sensitivity=sensitivity,
1374
+ exclusion_windows=exclusion_windows,
1375
+ training_data_lookback_days=training_data_lookback_days,
1376
+ incident_behavior=incident_behavior,
1377
+ tags=tags,
1378
+ created_by=updated_by, # This will be overridden by the actual created_by
1379
+ created_at=now_utc, # This will be overridden by the actual created_at
1380
+ updated_by=updated_by,
1381
+ updated_at=now_utc,
1382
+ schedule=schedule,
1383
+ )
1384
+
1385
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
1386
+ # if the assertion does not exist:
1387
+ merged_assertion_input_or_created_assertion = (
1388
+ self._retrieve_and_merge_volume_assertion_and_monitor(
1389
+ assertion_input=assertion_input,
1390
+ dataset_urn=dataset_urn,
1391
+ urn=urn,
1392
+ display_name=display_name,
1393
+ enabled=enabled,
1394
+ detection_mechanism=detection_mechanism,
1395
+ sensitivity=sensitivity,
1396
+ exclusion_windows=exclusion_windows,
1397
+ training_data_lookback_days=training_data_lookback_days,
1398
+ incident_behavior=incident_behavior,
1399
+ tags=tags,
1400
+ updated_by=updated_by,
1401
+ now_utc=now_utc,
1402
+ schedule=schedule,
1403
+ )
1404
+ )
1405
+
1406
+ # Return early if we created a new assertion in the merge:
1407
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
1408
+ # We know this is the correct type because we passed the assertion_class parameter
1409
+ assert isinstance(
1410
+ merged_assertion_input_or_created_assertion, SmartVolumeAssertion
1411
+ )
1412
+ return merged_assertion_input_or_created_assertion
1413
+
1414
+ # 4. Upsert the assertion and monitor entities:
1415
+ assertion_entity, monitor_entity = (
1416
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
1417
+ )
1418
+ # If assertion upsert fails, we won't try to upsert the monitor
1419
+ self.client.entities.upsert(assertion_entity)
1420
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1421
+ # try:
1422
+ self.client.entities.upsert(monitor_entity)
1423
+ # except Exception as e:
1424
+ # logger.error(f"Error upserting monitor: {e}")
1425
+ # self.client.entities.delete(assertion_entity)
1426
+ # raise e
1427
+
1428
+ return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
1429
+
1430
+ def sync_freshness_assertion(
1431
+ self,
1432
+ *,
1433
+ dataset_urn: Union[str, DatasetUrn],
1434
+ urn: Optional[Union[str, AssertionUrn]] = None,
1435
+ display_name: Optional[str] = None,
1436
+ enabled: Optional[bool] = None,
1437
+ detection_mechanism: DetectionMechanismInputTypes = None,
1438
+ incident_behavior: Optional[
1439
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1440
+ ] = None,
1441
+ tags: Optional[TagsInputType] = None,
1442
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
1443
+ freshness_schedule_check_type: Optional[
1444
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
1445
+ ] = None,
1446
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1447
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
1448
+ ) -> FreshnessAssertion:
1449
+ """Upsert and merge a freshness assertion.
1450
+
1451
+ Note: keyword arguments are required.
1452
+
1453
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
1454
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
1455
+ be updated if the input value is not None. If the input value is None, the existing value
1456
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
1457
+ empty string.
1458
+
1459
+ Schedule behavior:
1460
+ - Create case: Uses default daily schedule (\"0 0 * * *\") or provided schedule
1461
+ - Update case: Uses existing schedule or provided schedule.
1462
+
1463
+ Args:
1464
+ dataset_urn: The urn of the dataset to be monitored.
1465
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
1466
+ will be _created_ in the DataHub instance.
1467
+ display_name: The display name of the assertion. If not provided, a random display name
1468
+ will be generated.
1469
+ enabled: Whether the assertion is enabled. If not provided, the existing value
1470
+ will be preserved.
1471
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1472
+ schema is recommended. Valid values are:
1473
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1474
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1475
+ - {
1476
+ "type": "last_modified_column",
1477
+ "column_name": "last_modified",
1478
+ "additional_filter": "last_modified > '2021-01-01'",
1479
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1480
+ additional_filter='last_modified > 2021-01-01')
1481
+ - {
1482
+ "type": "high_watermark_column",
1483
+ "column_name": "id",
1484
+ "additional_filter": "id > 1000",
1485
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1486
+ additional_filter='id > 1000')
1487
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1488
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1489
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1490
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1491
+ tags: The tags to be applied to the assertion. Valid values are:
1492
+ - a list of strings (strings will be converted to TagUrn objects)
1493
+ - a list of TagUrn objects
1494
+ - a list of TagAssociationClass objects
1495
+ updated_by: Optional urn of the user who updated the assertion. The format is
1496
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1497
+ The default is the datahub system user.
1498
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1499
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1500
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1501
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1502
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1503
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1504
+
1505
+ Returns:
1506
+ FreshnessAssertion: The created or updated assertion.
1507
+ """
1508
+ _print_experimental_warning()
1509
+ now_utc = datetime.now(timezone.utc)
1510
+
1511
+ if updated_by is None:
1512
+ logger.warning(
1513
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1514
+ )
1515
+ updated_by = DEFAULT_CREATED_BY
1516
+
1517
+ # 1. If urn is not set, create a new assertion
1518
+ if urn is None:
1519
+ logger.info("URN is not set, creating a new assertion")
1520
+ return self._create_freshness_assertion(
1521
+ dataset_urn=dataset_urn,
1522
+ display_name=display_name,
1523
+ enabled=enabled if enabled is not None else True,
1524
+ detection_mechanism=detection_mechanism,
1525
+ incident_behavior=incident_behavior,
1526
+ tags=tags,
1527
+ created_by=updated_by,
1528
+ schedule=schedule,
1529
+ freshness_schedule_check_type=freshness_schedule_check_type,
1530
+ lookback_window=lookback_window,
1531
+ )
1532
+
1533
+ # 2. If urn is set, first validate the input:
1534
+ assertion_input = _FreshnessAssertionInput(
1535
+ urn=urn,
1536
+ entity_client=self.client.entities,
1537
+ dataset_urn=dataset_urn,
1538
+ display_name=display_name,
1539
+ detection_mechanism=detection_mechanism,
1540
+ incident_behavior=incident_behavior,
1541
+ tags=tags,
1542
+ created_by=updated_by, # This will be overridden by the actual created_by
1543
+ created_at=now_utc, # This will be overridden by the actual created_at
1544
+ updated_by=updated_by,
1545
+ updated_at=now_utc,
1546
+ schedule=schedule,
1547
+ freshness_schedule_check_type=freshness_schedule_check_type,
1548
+ lookback_window=lookback_window,
1549
+ )
1550
+
1551
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
1552
+ # if the assertion does not exist:
1553
+ merged_assertion_input_or_created_assertion = (
1554
+ self._retrieve_and_merge_freshness_assertion_and_monitor(
1555
+ assertion_input=assertion_input,
1556
+ dataset_urn=dataset_urn,
1557
+ urn=urn,
1558
+ display_name=display_name,
1559
+ enabled=enabled,
1560
+ detection_mechanism=detection_mechanism,
1561
+ incident_behavior=incident_behavior,
1562
+ tags=tags,
1563
+ updated_by=updated_by,
1564
+ now_utc=now_utc,
1565
+ schedule=schedule,
1566
+ freshness_schedule_check_type=freshness_schedule_check_type,
1567
+ lookback_window=lookback_window,
1568
+ )
1569
+ )
1570
+
1571
+ # Return early if we created a new assertion in the merge:
1572
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
1573
+ # We know this is the correct type because we passed the assertion_class parameter
1574
+ assert isinstance(
1575
+ merged_assertion_input_or_created_assertion, FreshnessAssertion
1576
+ )
1577
+ return merged_assertion_input_or_created_assertion
1578
+
1579
+ # 4. Upsert the assertion and monitor entities:
1580
+ assertion_entity, monitor_entity = (
1581
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
1582
+ )
1583
+ # If assertion upsert fails, we won't try to upsert the monitor
1584
+ self.client.entities.upsert(assertion_entity)
1585
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1586
+ # try:
1587
+ self.client.entities.upsert(monitor_entity)
1588
+ # except Exception as e:
1589
+ # logger.error(f"Error upserting monitor: {e}")
1590
+ # self.client.entities.delete(assertion_entity)
1591
+ # raise e
1592
+
1593
+ return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1594
+
1595
+
1596
+ def _merge_field(
1597
+ input_field_value: Any,
1598
+ input_field_name: str,
1599
+ validated_assertion_input: _AssertionInput,
1600
+ validated_existing_assertion: _AssertionPublic,
1601
+ existing_entity_value: Optional[Any] = None, # TODO: Can we do better than Any?
1602
+ ) -> Any:
1603
+ """Merge the input field value with any existing entity value or default value.
1604
+
1605
+ The merge logic is as follows:
1606
+ - If the input is None, use the existing value
1607
+ - If the input is not None, use the input value
1608
+ - If the input is an empty list or empty string, still use the input value (falsy values can be used to unset fields)
1609
+ - If the input is a non-empty list or non-empty string, use the input value
1610
+ - If the input is None and the existing value is None, use the default value from _AssertionInput
1611
+
1612
+ Args:
1613
+ input_field_value: The value of the field in the input e.g. passed to the function.
1614
+ input_field_name: The name of the field in the input.
1615
+ validated_assertion_input: The *validated* input to the function.
1616
+ validated_existing_assertion: The *validated* existing assertion from the DataHub instance.
1617
+ existing_entity_value: The value of the field in the existing entity from the DataHub instance, directly retrieved from the entity.
1618
+
1619
+ Returns:
1620
+ The merged value of the field.
1621
+
1622
+ """
1623
+ if input_field_value is None: # Input value default
1624
+ if existing_entity_value is not None: # Existing entity value set
1625
+ return existing_entity_value
1626
+ elif (
1627
+ getattr(validated_existing_assertion, input_field_name) is None
1628
+ ): # Validated existing value not set
1629
+ return getattr(validated_assertion_input, input_field_name)
1630
+ else: # Validated existing value set
1631
+ return getattr(validated_existing_assertion, input_field_name)
1632
+ else: # Input value set
1633
+ return input_field_value
1634
+
1635
+
1636
+ def _print_experimental_warning() -> None:
1637
+ print(
1638
+ "Warning: The assertions client is experimental and under heavy development. Expect breaking changes."
1639
+ )