acryl-datahub-cloud 0.3.11.1rc8__py3-none-any.whl → 0.3.12rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (82) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  3. acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
  4. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +524 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  7. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  8. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  9. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  10. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
  11. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2014 -1958
  12. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
  14. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  17. acryl_datahub_cloud/metadata/schema.avsc +26713 -26274
  18. acryl_datahub_cloud/metadata/schema_classes.py +1302 -777
  19. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  20. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
  21. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  22. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +220 -208
  23. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +36 -7
  24. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  25. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +40 -8
  26. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
  27. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
  28. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  29. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  30. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  31. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  32. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
  33. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  34. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  35. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  36. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  37. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  38. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
  39. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
  40. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
  41. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  42. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  43. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
  44. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  45. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
  46. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
  47. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
  48. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
  49. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  50. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
  51. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
  52. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  53. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
  54. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
  55. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +39 -10
  56. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
  57. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  58. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
  59. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  60. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
  61. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  62. acryl_datahub_cloud/notifications/__init__.py +0 -0
  63. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  64. acryl_datahub_cloud/sdk/__init__.py +25 -0
  65. acryl_datahub_cloud/sdk/assertion.py +767 -0
  66. acryl_datahub_cloud/sdk/assertion_input.py +1335 -0
  67. acryl_datahub_cloud/sdk/assertions_client.py +1153 -0
  68. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  69. acryl_datahub_cloud/sdk/entities/assertion.py +425 -0
  70. acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
  71. acryl_datahub_cloud/sdk/entities/subscription.py +84 -0
  72. acryl_datahub_cloud/sdk/errors.py +34 -0
  73. acryl_datahub_cloud/sdk/resolver_client.py +39 -0
  74. acryl_datahub_cloud/sdk/subscription_client.py +678 -0
  75. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/METADATA +44 -39
  76. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/RECORD +79 -55
  77. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/WHEEL +1 -1
  78. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/entry_points.txt +1 -0
  79. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
  80. acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
  81. acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
  82. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1153 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime, timezone
5
+ from typing import TYPE_CHECKING, Any, Optional, Union
6
+
7
+ from acryl_datahub_cloud.sdk.assertion import (
8
+ AssertionMode,
9
+ SmartFreshnessAssertion,
10
+ SmartVolumeAssertion,
11
+ _AssertionPublic,
12
+ )
13
+ from acryl_datahub_cloud.sdk.assertion_input import (
14
+ AssertionIncidentBehavior,
15
+ DetectionMechanismInputTypes,
16
+ ExclusionWindowInputTypes,
17
+ InferenceSensitivity,
18
+ _AssertionInput,
19
+ _SmartFreshnessAssertionInput,
20
+ _SmartVolumeAssertionInput,
21
+ )
22
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
23
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
24
+ from acryl_datahub_cloud.sdk.errors import SDKUsageError
25
+ from datahub.errors import ItemNotFoundError
26
+ from datahub.metadata import schema_classes as models
27
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, MonitorUrn
28
+
29
+ if TYPE_CHECKING:
30
+ from datahub.sdk.main_client import DataHubClient
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # TODO: Replace __datahub_system with the actual datahub system user https://linear.app/acryl-data/issue/OBS-1351/auditstamp-actor-hydration-pattern-for-sdk-calls
35
+ DEFAULT_CREATED_BY = CorpUserUrn.from_string("urn:li:corpuser:__datahub_system")
36
+
37
+
38
+ class AssertionsClient:
39
+ def __init__(self, client: "DataHubClient"):
40
+ self.client = client
41
+ _print_experimental_warning()
42
+
43
+ def sync_smart_freshness_assertion(
44
+ self,
45
+ *,
46
+ dataset_urn: Union[str, DatasetUrn],
47
+ urn: Optional[Union[str, AssertionUrn]] = None,
48
+ display_name: Optional[str] = None,
49
+ enabled: Optional[bool] = None,
50
+ detection_mechanism: DetectionMechanismInputTypes = None,
51
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
52
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
53
+ training_data_lookback_days: Optional[int] = None,
54
+ incident_behavior: Optional[
55
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
56
+ ] = None,
57
+ tags: Optional[TagsInputType] = None,
58
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
59
+ ) -> SmartFreshnessAssertion:
60
+ """Upsert and merge a smart freshness assertion.
61
+
62
+ Note: keyword arguments are required.
63
+
64
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
65
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
66
+ be updated if the input value is not None. If the input value is None, the existing value
67
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
68
+ empty string.
69
+
70
+ Schedule behavior:
71
+ - Create case: Uses default hourly schedule ("0 * * * *")
72
+ - Update case: Preserves existing schedule from backend (not modifiable)
73
+
74
+ Args:
75
+ dataset_urn: The urn of the dataset to be monitored.
76
+ urn: The urn of the assertion. If not provided, a urn will be generated and the
77
+ assertion will be _created_ in the DataHub instance.
78
+ display_name: The display name of the assertion. If not provided, a random display
79
+ name will be generated.
80
+ enabled: Whether the assertion is enabled. If not provided, the existing value
81
+ will be preserved.
82
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
83
+ schema is recommended. Valid values are:
84
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
85
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
86
+ - {
87
+ "type": "last_modified_column",
88
+ "column_name": "last_modified",
89
+ "additional_filter": "last_modified > '2021-01-01'",
90
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
91
+ additional_filter='last_modified > 2021-01-01')
92
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
93
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
94
+ - "low" or InferenceSensitivity.LOW
95
+ - "medium" or InferenceSensitivity.MEDIUM
96
+ - "high" or InferenceSensitivity.HIGH
97
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
98
+ fixed range exclusion windows are supported. Valid values are:
99
+ - from datetime.datetime objects: {
100
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
101
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
102
+ }
103
+ - from string datetimes: {
104
+ "start": "2025-01-01T00:00:00",
105
+ "end": "2025-01-02T00:00:00",
106
+ }
107
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
108
+ start=datetime(2025, 1, 1, 0, 0, 0),
109
+ end=datetime(2025, 1, 2, 0, 0, 0)
110
+ )
111
+ training_data_lookback_days: The training data lookback days to be applied to the
112
+ assertion as an integer.
113
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
114
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
115
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
116
+ tags: The tags to be applied to the assertion. Valid values are:
117
+ - a list of strings (strings will be converted to TagUrn objects)
118
+ - a list of TagUrn objects
119
+ - a list of TagAssociationClass objects
120
+ updated_by: Optional urn of the user who updated the assertion. The format is
121
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
122
+ The default is the datahub system user.
123
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
124
+
125
+ Returns:
126
+ SmartFreshnessAssertion: The created or updated assertion.
127
+ """
128
+ _print_experimental_warning()
129
+ now_utc = datetime.now(timezone.utc)
130
+
131
+ if updated_by is None:
132
+ logger.warning(
133
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
134
+ )
135
+ updated_by = DEFAULT_CREATED_BY
136
+
137
+ # 1. If urn is not set, create a new assertion
138
+ if urn is None:
139
+ logger.info("URN is not set, creating a new assertion")
140
+ return self._create_smart_freshness_assertion(
141
+ dataset_urn=dataset_urn,
142
+ display_name=display_name,
143
+ enabled=enabled if enabled is not None else True,
144
+ detection_mechanism=detection_mechanism,
145
+ sensitivity=sensitivity,
146
+ exclusion_windows=exclusion_windows,
147
+ training_data_lookback_days=training_data_lookback_days,
148
+ incident_behavior=incident_behavior,
149
+ tags=tags,
150
+ created_by=updated_by,
151
+ )
152
+
153
+ # 2. If urn is set, first validate the input:
154
+ assertion_input = _SmartFreshnessAssertionInput(
155
+ urn=urn,
156
+ entity_client=self.client.entities,
157
+ dataset_urn=dataset_urn,
158
+ display_name=display_name,
159
+ detection_mechanism=detection_mechanism,
160
+ sensitivity=sensitivity,
161
+ exclusion_windows=exclusion_windows,
162
+ training_data_lookback_days=training_data_lookback_days,
163
+ incident_behavior=incident_behavior,
164
+ tags=tags,
165
+ created_by=updated_by, # This will be overridden by the actual created_by
166
+ created_at=now_utc, # This will be overridden by the actual created_at
167
+ updated_by=updated_by,
168
+ updated_at=now_utc,
169
+ )
170
+
171
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
172
+ # if the assertion does not exist:
173
+ merged_assertion_input_or_created_assertion = (
174
+ self._retrieve_and_merge_freshness_assertion_and_monitor(
175
+ assertion_input=assertion_input,
176
+ dataset_urn=dataset_urn,
177
+ urn=urn,
178
+ display_name=display_name,
179
+ enabled=enabled,
180
+ detection_mechanism=detection_mechanism,
181
+ sensitivity=sensitivity,
182
+ exclusion_windows=exclusion_windows,
183
+ training_data_lookback_days=training_data_lookback_days,
184
+ incident_behavior=incident_behavior,
185
+ tags=tags,
186
+ updated_by=updated_by,
187
+ now_utc=now_utc,
188
+ )
189
+ )
190
+
191
+ # Return early if we created a new assertion in the merge:
192
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
193
+ # We know this is the correct type because we passed the assertion_class parameter
194
+ assert isinstance(
195
+ merged_assertion_input_or_created_assertion, SmartFreshnessAssertion
196
+ )
197
+ return merged_assertion_input_or_created_assertion
198
+
199
+ # 4. Upsert the assertion and monitor entities:
200
+ assertion_entity, monitor_entity = (
201
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
202
+ )
203
+ # If assertion upsert fails, we won't try to upsert the monitor
204
+ self.client.entities.upsert(assertion_entity)
205
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
206
+ # try:
207
+ self.client.entities.upsert(monitor_entity)
208
+ # except Exception as e:
209
+ # logger.error(f"Error upserting monitor: {e}")
210
+ # self.client.entities.delete(assertion_entity)
211
+ # raise e
212
+
213
+ return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
214
+
215
+ def _retrieve_and_merge_freshness_assertion_and_monitor(
216
+ self,
217
+ assertion_input: _SmartFreshnessAssertionInput,
218
+ dataset_urn: Union[str, DatasetUrn],
219
+ urn: Union[str, AssertionUrn],
220
+ display_name: Optional[str],
221
+ enabled: Optional[bool],
222
+ detection_mechanism: DetectionMechanismInputTypes,
223
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
224
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
225
+ training_data_lookback_days: Optional[int],
226
+ incident_behavior: Optional[
227
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
228
+ ],
229
+ tags: Optional[TagsInputType],
230
+ updated_by: Optional[Union[str, CorpUserUrn]],
231
+ now_utc: datetime,
232
+ ) -> Union[SmartFreshnessAssertion, _SmartFreshnessAssertionInput]:
233
+ # 1. Retrieve any existing assertion and monitor entities:
234
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
235
+ self._retrieve_assertion_and_monitor(assertion_input)
236
+ )
237
+
238
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
239
+ if maybe_assertion_entity and maybe_monitor_entity:
240
+ existing_assertion = SmartFreshnessAssertion._from_entities(
241
+ maybe_assertion_entity, maybe_monitor_entity
242
+ )
243
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
244
+ elif maybe_assertion_entity and not maybe_monitor_entity:
245
+ monitor_mode = (
246
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
247
+ )
248
+ existing_assertion = SmartFreshnessAssertion._from_entities(
249
+ maybe_assertion_entity,
250
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
251
+ )
252
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
253
+ elif not maybe_assertion_entity:
254
+ logger.info(
255
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
256
+ )
257
+ return self._create_smart_freshness_assertion(
258
+ dataset_urn=dataset_urn,
259
+ display_name=display_name,
260
+ detection_mechanism=detection_mechanism,
261
+ sensitivity=sensitivity,
262
+ exclusion_windows=exclusion_windows,
263
+ training_data_lookback_days=training_data_lookback_days,
264
+ incident_behavior=incident_behavior,
265
+ tags=tags,
266
+ created_by=updated_by,
267
+ )
268
+
269
+ # 3. Check for any issues e.g. different dataset urns
270
+ if (
271
+ existing_assertion
272
+ and hasattr(existing_assertion, "dataset_urn")
273
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
274
+ ):
275
+ raise SDKUsageError(
276
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
277
+ )
278
+
279
+ # 4. Merge the existing assertion with the validated input:
280
+ merged_assertion_input = self._merge_freshness_input(
281
+ dataset_urn=dataset_urn,
282
+ urn=urn,
283
+ display_name=display_name,
284
+ enabled=enabled,
285
+ detection_mechanism=detection_mechanism,
286
+ sensitivity=sensitivity,
287
+ exclusion_windows=exclusion_windows,
288
+ training_data_lookback_days=training_data_lookback_days,
289
+ incident_behavior=incident_behavior,
290
+ tags=tags,
291
+ now_utc=now_utc,
292
+ assertion_input=assertion_input,
293
+ maybe_assertion_entity=maybe_assertion_entity,
294
+ maybe_monitor_entity=maybe_monitor_entity,
295
+ existing_assertion=existing_assertion,
296
+ )
297
+
298
+ return merged_assertion_input
299
+
300
+ def _retrieve_and_merge_volume_assertion_and_monitor(
301
+ self,
302
+ assertion_input: _SmartVolumeAssertionInput,
303
+ dataset_urn: Union[str, DatasetUrn],
304
+ urn: Union[str, AssertionUrn],
305
+ display_name: Optional[str],
306
+ enabled: Optional[bool],
307
+ detection_mechanism: DetectionMechanismInputTypes,
308
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
309
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
310
+ training_data_lookback_days: Optional[int],
311
+ incident_behavior: Optional[
312
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
313
+ ],
314
+ tags: Optional[TagsInputType],
315
+ updated_by: Optional[Union[str, CorpUserUrn]],
316
+ now_utc: datetime,
317
+ schedule: Optional[Union[str, models.CronScheduleClass]],
318
+ ) -> Union[SmartVolumeAssertion, _SmartVolumeAssertionInput]:
319
+ # 1. Retrieve any existing assertion and monitor entities:
320
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
321
+ self._retrieve_assertion_and_monitor(assertion_input)
322
+ )
323
+
324
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
325
+ if maybe_assertion_entity and maybe_monitor_entity:
326
+ existing_assertion = SmartVolumeAssertion._from_entities(
327
+ maybe_assertion_entity, maybe_monitor_entity
328
+ )
329
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
330
+ elif maybe_assertion_entity and not maybe_monitor_entity:
331
+ monitor_mode = (
332
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
333
+ )
334
+ existing_assertion = SmartVolumeAssertion._from_entities(
335
+ maybe_assertion_entity,
336
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
337
+ )
338
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
339
+ elif not maybe_assertion_entity:
340
+ logger.info(
341
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
342
+ )
343
+ return self._create_smart_volume_assertion(
344
+ dataset_urn=dataset_urn,
345
+ display_name=display_name,
346
+ detection_mechanism=detection_mechanism,
347
+ sensitivity=sensitivity,
348
+ exclusion_windows=exclusion_windows,
349
+ training_data_lookback_days=training_data_lookback_days,
350
+ incident_behavior=incident_behavior,
351
+ tags=tags,
352
+ created_by=updated_by,
353
+ )
354
+
355
+ # 3. Check for any issues e.g. different dataset urns
356
+ if (
357
+ existing_assertion
358
+ and hasattr(existing_assertion, "dataset_urn")
359
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
360
+ ):
361
+ raise SDKUsageError(
362
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
363
+ )
364
+
365
+ # 4. Merge the existing assertion with the validated input:
366
+ merged_assertion_input = self._merge_volume_input(
367
+ dataset_urn=dataset_urn,
368
+ urn=urn,
369
+ display_name=display_name,
370
+ enabled=enabled,
371
+ detection_mechanism=detection_mechanism,
372
+ sensitivity=sensitivity,
373
+ exclusion_windows=exclusion_windows,
374
+ training_data_lookback_days=training_data_lookback_days,
375
+ incident_behavior=incident_behavior,
376
+ tags=tags,
377
+ schedule=schedule,
378
+ now_utc=now_utc,
379
+ assertion_input=assertion_input,
380
+ maybe_assertion_entity=maybe_assertion_entity,
381
+ maybe_monitor_entity=maybe_monitor_entity,
382
+ existing_assertion=existing_assertion,
383
+ )
384
+
385
+ return merged_assertion_input
386
+
387
+ def _retrieve_assertion_and_monitor(
388
+ self,
389
+ assertion_input: _AssertionInput,
390
+ ) -> tuple[Optional[Assertion], MonitorUrn, Optional[Monitor]]:
391
+ """Retrieve the assertion and monitor entities from the DataHub instance.
392
+
393
+ Args:
394
+ assertion_input: The validated input to the function.
395
+
396
+ Returns:
397
+ The assertion and monitor entities.
398
+ """
399
+ assert assertion_input.urn is not None, "URN is required"
400
+
401
+ # Get assertion entity
402
+ maybe_assertion_entity: Optional[Assertion] = None
403
+ try:
404
+ entity = self.client.entities.get(assertion_input.urn)
405
+ if entity is not None:
406
+ assert isinstance(entity, Assertion)
407
+ maybe_assertion_entity = entity
408
+ except ItemNotFoundError:
409
+ pass
410
+
411
+ # Get monitor entity
412
+ monitor_urn = Monitor._ensure_id(
413
+ id=(assertion_input.dataset_urn, assertion_input.urn)
414
+ )
415
+ maybe_monitor_entity: Optional[Monitor] = None
416
+ try:
417
+ entity = self.client.entities.get(monitor_urn)
418
+ if entity is not None:
419
+ assert isinstance(entity, Monitor)
420
+ maybe_monitor_entity = entity
421
+ except ItemNotFoundError:
422
+ pass
423
+
424
+ return maybe_assertion_entity, monitor_urn, maybe_monitor_entity
425
+
426
+ def _merge_freshness_input(
427
+ self,
428
+ dataset_urn: Union[str, DatasetUrn],
429
+ urn: Union[str, AssertionUrn],
430
+ display_name: Optional[str],
431
+ enabled: Optional[bool],
432
+ detection_mechanism: DetectionMechanismInputTypes,
433
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
434
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
435
+ training_data_lookback_days: Optional[int],
436
+ incident_behavior: Optional[
437
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
438
+ ],
439
+ tags: Optional[TagsInputType],
440
+ now_utc: datetime,
441
+ assertion_input: _SmartFreshnessAssertionInput,
442
+ maybe_assertion_entity: Optional[Assertion],
443
+ maybe_monitor_entity: Optional[Monitor],
444
+ existing_assertion: SmartFreshnessAssertion,
445
+ ) -> _SmartFreshnessAssertionInput:
446
+ """Merge the input with the existing assertion and monitor entities.
447
+
448
+ Args:
449
+ dataset_urn: The urn of the dataset to be monitored.
450
+ urn: The urn of the assertion.
451
+ display_name: The display name of the assertion.
452
+ enabled: Whether the assertion is enabled.
453
+ detection_mechanism: The detection mechanism to be used for the assertion.
454
+ sensitivity: The sensitivity to be applied to the assertion.
455
+ exclusion_windows: The exclusion windows to be applied to the assertion.
456
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
457
+ incident_behavior: The incident behavior to be applied to the assertion.
458
+ tags: The tags to be applied to the assertion.
459
+ now_utc: The current UTC time from when the function is called.
460
+ assertion_input: The validated input to the function.
461
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
462
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
463
+ existing_assertion: The existing assertion from the DataHub instance.
464
+
465
+ Returns:
466
+ The merged assertion input.
467
+ """
468
+ merged_assertion_input = _SmartFreshnessAssertionInput(
469
+ urn=urn,
470
+ entity_client=self.client.entities,
471
+ dataset_urn=dataset_urn,
472
+ display_name=_merge_field(
473
+ display_name,
474
+ "display_name",
475
+ assertion_input,
476
+ existing_assertion,
477
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
478
+ ),
479
+ enabled=_merge_field(
480
+ enabled,
481
+ "enabled",
482
+ assertion_input,
483
+ existing_assertion,
484
+ existing_assertion.mode == AssertionMode.ACTIVE
485
+ if existing_assertion
486
+ else None,
487
+ ),
488
+ schedule=_merge_field(
489
+ None, # Don't allow schedule modification in updates - always preserve existing
490
+ "schedule",
491
+ assertion_input,
492
+ existing_assertion,
493
+ existing_assertion.schedule if existing_assertion else None,
494
+ ),
495
+ detection_mechanism=_merge_field(
496
+ detection_mechanism,
497
+ "detection_mechanism",
498
+ assertion_input,
499
+ existing_assertion,
500
+ SmartFreshnessAssertion._get_detection_mechanism( # TODO: Consider moving this conversion to DetectionMechanism.parse(), it could avoid having to use Optional on the return type of SmartFreshnessAssertion.get_detection_mechanism()
501
+ maybe_assertion_entity, maybe_monitor_entity, default=None
502
+ )
503
+ if maybe_assertion_entity and maybe_monitor_entity
504
+ else None,
505
+ ),
506
+ sensitivity=_merge_field(
507
+ sensitivity,
508
+ "sensitivity",
509
+ assertion_input,
510
+ existing_assertion,
511
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
512
+ ),
513
+ exclusion_windows=_merge_field(
514
+ exclusion_windows,
515
+ "exclusion_windows",
516
+ assertion_input,
517
+ existing_assertion,
518
+ maybe_monitor_entity.exclusion_windows
519
+ if maybe_monitor_entity
520
+ else None,
521
+ ),
522
+ training_data_lookback_days=_merge_field(
523
+ training_data_lookback_days,
524
+ "training_data_lookback_days",
525
+ assertion_input,
526
+ existing_assertion,
527
+ maybe_monitor_entity.training_data_lookback_days
528
+ if maybe_monitor_entity
529
+ else None,
530
+ ),
531
+ incident_behavior=_merge_field(
532
+ incident_behavior,
533
+ "incident_behavior",
534
+ assertion_input,
535
+ existing_assertion,
536
+ SmartFreshnessAssertion._get_incident_behavior(maybe_assertion_entity)
537
+ if maybe_assertion_entity
538
+ else None,
539
+ ),
540
+ tags=_merge_field(
541
+ tags,
542
+ "tags",
543
+ assertion_input,
544
+ existing_assertion,
545
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
546
+ ),
547
+ created_by=existing_assertion.created_by
548
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
549
+ created_at=existing_assertion.created_at
550
+ or now_utc, # Override with the existing assertion's created_at or now if not set
551
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
552
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
553
+ )
554
+
555
+ return merged_assertion_input
556
+
557
+ def _merge_volume_input(
558
+ self,
559
+ dataset_urn: Union[str, DatasetUrn],
560
+ urn: Union[str, AssertionUrn],
561
+ display_name: Optional[str],
562
+ enabled: Optional[bool],
563
+ detection_mechanism: DetectionMechanismInputTypes,
564
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
565
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
566
+ training_data_lookback_days: Optional[int],
567
+ incident_behavior: Optional[
568
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
569
+ ],
570
+ tags: Optional[TagsInputType],
571
+ schedule: Optional[Union[str, models.CronScheduleClass]],
572
+ now_utc: datetime,
573
+ assertion_input: _SmartVolumeAssertionInput,
574
+ maybe_assertion_entity: Optional[Assertion],
575
+ maybe_monitor_entity: Optional[Monitor],
576
+ existing_assertion: SmartVolumeAssertion,
577
+ ) -> _SmartVolumeAssertionInput:
578
+ """Merge the input with the existing assertion and monitor entities.
579
+
580
+ Args:
581
+ dataset_urn: The urn of the dataset to be monitored.
582
+ urn: The urn of the assertion.
583
+ display_name: The display name of the assertion.
584
+ enabled: Whether the assertion is enabled.
585
+ detection_mechanism: The detection mechanism to be used for the assertion.
586
+ sensitivity: The sensitivity to be applied to the assertion.
587
+ exclusion_windows: The exclusion windows to be applied to the assertion.
588
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
589
+ incident_behavior: The incident behavior to be applied to the assertion.
590
+ tags: The tags to be applied to the assertion.
591
+ now_utc: The current UTC time from when the function is called.
592
+ assertion_input: The validated input to the function.
593
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
594
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
595
+ existing_assertion: The existing assertion from the DataHub instance.
596
+
597
+ Returns:
598
+ The merged assertion input.
599
+ """
600
+ merged_assertion_input = _SmartVolumeAssertionInput(
601
+ urn=urn,
602
+ entity_client=self.client.entities,
603
+ dataset_urn=dataset_urn,
604
+ display_name=_merge_field(
605
+ display_name,
606
+ "display_name",
607
+ assertion_input,
608
+ existing_assertion,
609
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
610
+ ),
611
+ enabled=_merge_field(
612
+ enabled,
613
+ "enabled",
614
+ assertion_input,
615
+ existing_assertion,
616
+ existing_assertion.mode == AssertionMode.ACTIVE
617
+ if existing_assertion
618
+ else None,
619
+ ),
620
+ schedule=_merge_field(
621
+ schedule,
622
+ "schedule",
623
+ assertion_input,
624
+ existing_assertion,
625
+ existing_assertion.schedule if existing_assertion else None,
626
+ ),
627
+ detection_mechanism=_merge_field(
628
+ detection_mechanism,
629
+ "detection_mechanism",
630
+ assertion_input,
631
+ existing_assertion,
632
+ SmartVolumeAssertion._get_detection_mechanism(
633
+ maybe_assertion_entity, maybe_monitor_entity, default=None
634
+ )
635
+ if maybe_assertion_entity and maybe_monitor_entity
636
+ else None,
637
+ ),
638
+ sensitivity=_merge_field(
639
+ sensitivity,
640
+ "sensitivity",
641
+ assertion_input,
642
+ existing_assertion,
643
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
644
+ ),
645
+ exclusion_windows=_merge_field(
646
+ exclusion_windows,
647
+ "exclusion_windows",
648
+ assertion_input,
649
+ existing_assertion,
650
+ maybe_monitor_entity.exclusion_windows
651
+ if maybe_monitor_entity
652
+ else None,
653
+ ),
654
+ training_data_lookback_days=_merge_field(
655
+ training_data_lookback_days,
656
+ "training_data_lookback_days",
657
+ assertion_input,
658
+ existing_assertion,
659
+ maybe_monitor_entity.training_data_lookback_days
660
+ if maybe_monitor_entity
661
+ else None,
662
+ ),
663
+ incident_behavior=_merge_field(
664
+ incident_behavior,
665
+ "incident_behavior",
666
+ assertion_input,
667
+ existing_assertion,
668
+ SmartVolumeAssertion._get_incident_behavior(maybe_assertion_entity)
669
+ if maybe_assertion_entity
670
+ else None,
671
+ ),
672
+ tags=_merge_field(
673
+ tags,
674
+ "tags",
675
+ assertion_input,
676
+ existing_assertion,
677
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
678
+ ),
679
+ created_by=existing_assertion.created_by
680
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
681
+ created_at=existing_assertion.created_at
682
+ or now_utc, # Override with the existing assertion's created_at or now if not set
683
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
684
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
685
+ )
686
+
687
+ return merged_assertion_input
688
+
689
+ def _create_smart_freshness_assertion(
690
+ self,
691
+ *,
692
+ dataset_urn: Union[str, DatasetUrn],
693
+ display_name: Optional[str] = None,
694
+ enabled: bool = True,
695
+ detection_mechanism: DetectionMechanismInputTypes = None,
696
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
697
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
698
+ training_data_lookback_days: Optional[int] = None,
699
+ incident_behavior: Optional[
700
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
701
+ ] = None,
702
+ tags: Optional[TagsInputType] = None,
703
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
704
+ ) -> SmartFreshnessAssertion:
705
+ """Create a smart freshness assertion.
706
+
707
+ Note: keyword arguments are required.
708
+
709
+ The created assertion will use the default hourly schedule ("0 * * * *").
710
+
711
+ Args:
712
+ dataset_urn: The urn of the dataset to be monitored.
713
+ display_name: The display name of the assertion. If not provided, a random display
714
+ name will be generated.
715
+ enabled: Whether the assertion is enabled. Defaults to True.
716
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
717
+ schema is recommended. Valid values are:
718
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
719
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
720
+ - {
721
+ "type": "last_modified_column",
722
+ "column_name": "last_modified",
723
+ "additional_filter": "last_modified > '2021-01-01'",
724
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
725
+ additional_filter='last_modified > 2021-01-01')
726
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
727
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
728
+ - "low" or InferenceSensitivity.LOW
729
+ - "medium" or InferenceSensitivity.MEDIUM
730
+ - "high" or InferenceSensitivity.HIGH
731
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
732
+ fixed range exclusion windows are supported. Valid values are:
733
+ - from datetime.datetime objects: {
734
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
735
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
736
+ }
737
+ - from string datetimes: {
738
+ "start": "2025-01-01T00:00:00",
739
+ "end": "2025-01-02T00:00:00",
740
+ }
741
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
742
+ start=datetime(2025, 1, 1, 0, 0, 0),
743
+ end=datetime(2025, 1, 2, 0, 0, 0)
744
+ )
745
+ training_data_lookback_days: The training data lookback days to be applied to the
746
+ assertion as an integer.
747
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
748
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
749
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
750
+ tags: The tags to be applied to the assertion. Valid values are:
751
+ - a list of strings (strings will be converted to TagUrn objects)
752
+ - a list of TagUrn objects
753
+ - a list of TagAssociationClass objects
754
+ created_by: Optional urn of the user who created the assertion. The format is
755
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
756
+ The default is the datahub system user.
757
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
758
+
759
+ Returns:
760
+ SmartFreshnessAssertion: The created assertion.
761
+ """
762
+ _print_experimental_warning()
763
+ now_utc = datetime.now(timezone.utc)
764
+ if created_by is None:
765
+ logger.warning(
766
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
767
+ )
768
+ created_by = DEFAULT_CREATED_BY
769
+ assertion_input = _SmartFreshnessAssertionInput(
770
+ urn=None,
771
+ entity_client=self.client.entities,
772
+ dataset_urn=dataset_urn,
773
+ display_name=display_name,
774
+ enabled=enabled,
775
+ detection_mechanism=detection_mechanism,
776
+ sensitivity=sensitivity,
777
+ exclusion_windows=exclusion_windows,
778
+ training_data_lookback_days=training_data_lookback_days,
779
+ incident_behavior=incident_behavior,
780
+ tags=tags,
781
+ created_by=created_by,
782
+ created_at=now_utc,
783
+ updated_by=created_by,
784
+ updated_at=now_utc,
785
+ )
786
+ assertion_entity, monitor_entity = (
787
+ assertion_input.to_assertion_and_monitor_entities()
788
+ )
789
+ # If assertion creation fails, we won't try to create the monitor
790
+ self.client.entities.create(assertion_entity)
791
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
792
+ # try:
793
+ self.client.entities.create(monitor_entity)
794
+ # except Exception as e:
795
+ # logger.error(f"Error creating monitor: {e}")
796
+ # self.client.entities.delete(assertion_entity)
797
+ # raise e
798
+ return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
799
+
800
+ def _create_smart_volume_assertion(
801
+ self,
802
+ *,
803
+ dataset_urn: Union[str, DatasetUrn],
804
+ display_name: Optional[str] = None,
805
+ enabled: bool = True,
806
+ detection_mechanism: DetectionMechanismInputTypes = None,
807
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
808
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
809
+ training_data_lookback_days: Optional[int] = None,
810
+ incident_behavior: Optional[
811
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
812
+ ] = None,
813
+ tags: Optional[TagsInputType] = None,
814
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
815
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
816
+ ) -> SmartVolumeAssertion:
817
+ """Create a smart volume assertion.
818
+
819
+ Note: keyword arguments are required.
820
+
821
+ Args:
822
+ dataset_urn: The urn of the dataset to be monitored.
823
+ display_name: The display name of the assertion. If not provided, a random display
824
+ name will be generated.
825
+ enabled: Whether the assertion is enabled. Defaults to True.
826
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
827
+ schema is recommended. Valid values are:
828
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
829
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
830
+ - {
831
+ "type": "last_modified_column",
832
+ "column_name": "last_modified",
833
+ "additional_filter": "last_modified > '2021-01-01'",
834
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
835
+ additional_filter='last_modified > 2021-01-01')
836
+ - {
837
+ "type": "high_watermark_column",
838
+ "column_name": "id",
839
+ "additional_filter": "id > 1000",
840
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
841
+ additional_filter='id > 1000')
842
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
843
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
844
+ - "low" or InferenceSensitivity.LOW
845
+ - "medium" or InferenceSensitivity.MEDIUM
846
+ - "high" or InferenceSensitivity.HIGH
847
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
848
+ fixed range exclusion windows are supported. Valid values are:
849
+ - from datetime.datetime objects: {
850
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
851
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
852
+ }
853
+ - from string datetimes: {
854
+ "start": "2025-01-01T00:00:00",
855
+ "end": "2025-01-02T00:00:00",
856
+ }
857
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
858
+ start=datetime(2025, 1, 1, 0, 0, 0),
859
+ end=datetime(2025, 1, 2, 0, 0, 0)
860
+ )
861
+ training_data_lookback_days: The training data lookback days to be applied to the
862
+ assertion as an integer.
863
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
864
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
865
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
866
+ tags: The tags to be applied to the assertion. Valid values are:
867
+ - a list of strings (strings will be converted to TagUrn objects)
868
+ - a list of TagUrn objects
869
+ - a list of TagAssociationClass objects
870
+ created_by: Optional urn of the user who created the assertion. The format is
871
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
872
+ The default is the datahub system user.
873
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
874
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
875
+ schedule will be used. The schedule determines when the assertion will be evaluated.
876
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
877
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
878
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
879
+
880
+ Returns:
881
+ SmartVolumeAssertion: The created assertion.
882
+ """
883
+ _print_experimental_warning()
884
+ now_utc = datetime.now(timezone.utc)
885
+ if created_by is None:
886
+ logger.warning(
887
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
888
+ )
889
+ created_by = DEFAULT_CREATED_BY
890
+ assertion_input = _SmartVolumeAssertionInput(
891
+ urn=None,
892
+ entity_client=self.client.entities,
893
+ dataset_urn=dataset_urn,
894
+ display_name=display_name,
895
+ enabled=enabled,
896
+ detection_mechanism=detection_mechanism,
897
+ sensitivity=sensitivity,
898
+ exclusion_windows=exclusion_windows,
899
+ training_data_lookback_days=training_data_lookback_days,
900
+ incident_behavior=incident_behavior,
901
+ tags=tags,
902
+ created_by=created_by,
903
+ created_at=now_utc,
904
+ updated_by=created_by,
905
+ updated_at=now_utc,
906
+ schedule=schedule,
907
+ )
908
+ assertion_entity, monitor_entity = (
909
+ assertion_input.to_assertion_and_monitor_entities()
910
+ )
911
+ # If assertion creation fails, we won't try to create the monitor
912
+ self.client.entities.create(assertion_entity)
913
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
914
+ # try:
915
+ self.client.entities.create(monitor_entity)
916
+ # except Exception as e:
917
+ # logger.error(f"Error creating monitor: {e}")
918
+ # self.client.entities.delete(assertion_entity)
919
+ # raise e
920
+ return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
921
+
922
+ def sync_smart_volume_assertion(
923
+ self,
924
+ *,
925
+ dataset_urn: Union[str, DatasetUrn],
926
+ urn: Optional[Union[str, AssertionUrn]] = None,
927
+ display_name: Optional[str] = None,
928
+ enabled: Optional[bool] = None,
929
+ detection_mechanism: DetectionMechanismInputTypes = None,
930
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
931
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
932
+ training_data_lookback_days: Optional[int] = None,
933
+ incident_behavior: Optional[
934
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
935
+ ] = None,
936
+ tags: Optional[TagsInputType] = None,
937
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
938
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
939
+ ) -> SmartVolumeAssertion:
940
+ """Upsert and merge a smart volume assertion.
941
+
942
+ Note: keyword arguments are required.
943
+
944
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
945
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
946
+ be updated if the input value is not None. If the input value is None, the existing value
947
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
948
+ empty string.
949
+
950
+ Schedule behavior:
951
+ - Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
952
+ - Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
953
+
954
+ Args:
955
+ dataset_urn: The urn of the dataset to be monitored.
956
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
957
+ will be _created_ in the DataHub instance.
958
+ display_name: The display name of the assertion. If not provided, a random display name
959
+ will be generated.
960
+ enabled: Whether the assertion is enabled. If not provided, the existing value
961
+ will be preserved.
962
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
963
+ schema is recommended. Valid values are:
964
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
965
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
966
+ - {
967
+ "type": "last_modified_column",
968
+ "column_name": "last_modified",
969
+ "additional_filter": "last_modified > '2021-01-01'",
970
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
971
+ additional_filter='last_modified > 2021-01-01')
972
+ - {
973
+ "type": "high_watermark_column",
974
+ "column_name": "id",
975
+ "additional_filter": "id > 1000",
976
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
977
+ additional_filter='id > 1000')
978
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
979
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
980
+ - "low" or InferenceSensitivity.LOW
981
+ - "medium" or InferenceSensitivity.MEDIUM
982
+ - "high" or InferenceSensitivity.HIGH
983
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
984
+ fixed range exclusion windows are supported. Valid values are:
985
+ - from datetime.datetime objects: {
986
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
987
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
988
+ }
989
+ - from string datetimes: {
990
+ "start": "2025-01-01T00:00:00",
991
+ "end": "2025-01-02T00:00:00",
992
+ }
993
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
994
+ start=datetime(2025, 1, 1, 0, 0, 0),
995
+ end=datetime(2025, 1, 2, 0, 0, 0)
996
+ )
997
+ training_data_lookback_days: The training data lookback days to be applied to the
998
+ assertion as an integer.
999
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1000
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1001
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1002
+ tags: The tags to be applied to the assertion. Valid values are:
1003
+ - a list of strings (strings will be converted to TagUrn objects)
1004
+ - a list of TagUrn objects
1005
+ - a list of TagAssociationClass objects
1006
+ updated_by: Optional urn of the user who updated the assertion. The format is
1007
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1008
+ The default is the datahub system user.
1009
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1010
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1011
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1012
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1013
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1014
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1015
+
1016
+ Returns:
1017
+ SmartVolumeAssertion: The created or updated assertion.
1018
+ """
1019
+ _print_experimental_warning()
1020
+ now_utc = datetime.now(timezone.utc)
1021
+
1022
+ if updated_by is None:
1023
+ logger.warning(
1024
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1025
+ )
1026
+ updated_by = DEFAULT_CREATED_BY
1027
+
1028
+ # 1. If urn is not set, create a new assertion
1029
+ if urn is None:
1030
+ logger.info("URN is not set, creating a new assertion")
1031
+ return self._create_smart_volume_assertion(
1032
+ dataset_urn=dataset_urn,
1033
+ display_name=display_name,
1034
+ enabled=enabled if enabled is not None else True,
1035
+ detection_mechanism=detection_mechanism,
1036
+ sensitivity=sensitivity,
1037
+ exclusion_windows=exclusion_windows,
1038
+ training_data_lookback_days=training_data_lookback_days,
1039
+ incident_behavior=incident_behavior,
1040
+ tags=tags,
1041
+ created_by=updated_by,
1042
+ schedule=schedule,
1043
+ )
1044
+
1045
+ # 2. If urn is set, first validate the input:
1046
+ assertion_input = _SmartVolumeAssertionInput(
1047
+ urn=urn,
1048
+ entity_client=self.client.entities,
1049
+ dataset_urn=dataset_urn,
1050
+ display_name=display_name,
1051
+ detection_mechanism=detection_mechanism,
1052
+ sensitivity=sensitivity,
1053
+ exclusion_windows=exclusion_windows,
1054
+ training_data_lookback_days=training_data_lookback_days,
1055
+ incident_behavior=incident_behavior,
1056
+ tags=tags,
1057
+ created_by=updated_by, # This will be overridden by the actual created_by
1058
+ created_at=now_utc, # This will be overridden by the actual created_at
1059
+ updated_by=updated_by,
1060
+ updated_at=now_utc,
1061
+ schedule=schedule,
1062
+ )
1063
+
1064
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
1065
+ # if the assertion does not exist:
1066
+ merged_assertion_input_or_created_assertion = (
1067
+ self._retrieve_and_merge_volume_assertion_and_monitor(
1068
+ assertion_input=assertion_input,
1069
+ dataset_urn=dataset_urn,
1070
+ urn=urn,
1071
+ display_name=display_name,
1072
+ enabled=enabled,
1073
+ detection_mechanism=detection_mechanism,
1074
+ sensitivity=sensitivity,
1075
+ exclusion_windows=exclusion_windows,
1076
+ training_data_lookback_days=training_data_lookback_days,
1077
+ incident_behavior=incident_behavior,
1078
+ tags=tags,
1079
+ updated_by=updated_by,
1080
+ now_utc=now_utc,
1081
+ schedule=schedule,
1082
+ )
1083
+ )
1084
+
1085
+ # Return early if we created a new assertion in the merge:
1086
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
1087
+ # We know this is the correct type because we passed the assertion_class parameter
1088
+ assert isinstance(
1089
+ merged_assertion_input_or_created_assertion, SmartVolumeAssertion
1090
+ )
1091
+ return merged_assertion_input_or_created_assertion
1092
+
1093
+ # 4. Upsert the assertion and monitor entities:
1094
+ assertion_entity, monitor_entity = (
1095
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
1096
+ )
1097
+ # If assertion upsert fails, we won't try to upsert the monitor
1098
+ self.client.entities.upsert(assertion_entity)
1099
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1100
+ # try:
1101
+ self.client.entities.upsert(monitor_entity)
1102
+ # except Exception as e:
1103
+ # logger.error(f"Error upserting monitor: {e}")
1104
+ # self.client.entities.delete(assertion_entity)
1105
+ # raise e
1106
+
1107
+ return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
1108
+
1109
+
1110
+ def _merge_field(
1111
+ input_field_value: Any,
1112
+ input_field_name: str,
1113
+ validated_assertion_input: _AssertionInput,
1114
+ validated_existing_assertion: _AssertionPublic,
1115
+ existing_entity_value: Optional[Any] = None, # TODO: Can we do better than Any?
1116
+ ) -> Any:
1117
+ """Merge the input field value with any existing entity value or default value.
1118
+
1119
+ The merge logic is as follows:
1120
+ - If the input is None, use the existing value
1121
+ - If the input is not None, use the input value
1122
+ - If the input is an empty list or empty string, still use the input value (falsy values can be used to unset fields)
1123
+ - If the input is a non-empty list or non-empty string, use the input value
1124
+ - If the input is None and the existing value is None, use the default value from _AssertionInput
1125
+
1126
+ Args:
1127
+ input_field_value: The value of the field in the input e.g. passed to the function.
1128
+ input_field_name: The name of the field in the input.
1129
+ validated_assertion_input: The *validated* input to the function.
1130
+ validated_existing_assertion: The *validated* existing assertion from the DataHub instance.
1131
+ existing_entity_value: The value of the field in the existing entity from the DataHub instance, directly retrieved from the entity.
1132
+
1133
+ Returns:
1134
+ The merged value of the field.
1135
+
1136
+ """
1137
+ if input_field_value is None: # Input value default
1138
+ if existing_entity_value is not None: # Existing entity value set
1139
+ return existing_entity_value
1140
+ elif (
1141
+ getattr(validated_existing_assertion, input_field_name) is None
1142
+ ): # Validated existing value not set
1143
+ return getattr(validated_assertion_input, input_field_name)
1144
+ else: # Validated existing value set
1145
+ return getattr(validated_existing_assertion, input_field_name)
1146
+ else: # Input value set
1147
+ return input_field_value
1148
+
1149
+
1150
+ def _print_experimental_warning() -> None:
1151
+ print(
1152
+ "Warning: The assertions client is experimental and under heavy development. Expect breaking changes."
1153
+ )