acryl-datahub-cloud 0.3.12rc1__py3-none-any.whl → 0.3.12rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (70) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +524 -0
  3. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  4. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  7. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  8. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
  9. acryl_datahub_cloud/metadata/_urns/urn_defs.py +1842 -1786
  10. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  11. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
  12. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  14. acryl_datahub_cloud/metadata/schema.avsc +24747 -23945
  15. acryl_datahub_cloud/metadata/schema_classes.py +1031 -631
  16. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  17. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
  18. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  19. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +31 -7
  20. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +27 -6
  21. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +31 -7
  22. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
  23. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  24. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  25. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  26. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  27. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
  28. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  29. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  30. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  31. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  32. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
  33. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
  34. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
  35. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  36. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  37. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
  38. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  39. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +3 -0
  40. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
  41. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
  42. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
  43. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  44. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
  45. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
  46. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  47. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
  48. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +27 -6
  49. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  50. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
  51. acryl_datahub_cloud/notifications/__init__.py +0 -0
  52. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  53. acryl_datahub_cloud/sdk/__init__.py +25 -0
  54. acryl_datahub_cloud/{_sdk_extras → sdk}/assertion.py +202 -45
  55. acryl_datahub_cloud/{_sdk_extras → sdk}/assertion_input.py +344 -83
  56. acryl_datahub_cloud/{_sdk_extras → sdk}/assertions_client.py +635 -199
  57. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  58. acryl_datahub_cloud/{_sdk_extras → sdk}/entities/assertion.py +1 -1
  59. acryl_datahub_cloud/{_sdk_extras → sdk}/subscription_client.py +146 -33
  60. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/METADATA +48 -43
  61. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/RECORD +69 -54
  62. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/entry_points.txt +1 -0
  63. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -19
  64. /acryl_datahub_cloud/{_sdk_extras/entities → datahub_forms_notifications}/__init__.py +0 -0
  65. /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/monitor.py +0 -0
  66. /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/subscription.py +0 -0
  67. /acryl_datahub_cloud/{_sdk_extras → sdk}/errors.py +0 -0
  68. /acryl_datahub_cloud/{_sdk_extras → sdk}/resolver_client.py +0 -0
  69. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/WHEEL +0 -0
  70. {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc3.dist-info}/top_level.txt +0 -0
@@ -4,20 +4,26 @@ import logging
4
4
  from datetime import datetime, timezone
5
5
  from typing import TYPE_CHECKING, Any, Optional, Union
6
6
 
7
- from acryl_datahub_cloud._sdk_extras.assertion import (
7
+ from acryl_datahub_cloud.sdk.assertion import (
8
+ AssertionMode,
8
9
  SmartFreshnessAssertion,
10
+ SmartVolumeAssertion,
11
+ _AssertionPublic,
9
12
  )
10
- from acryl_datahub_cloud._sdk_extras.assertion_input import (
13
+ from acryl_datahub_cloud.sdk.assertion_input import (
11
14
  AssertionIncidentBehavior,
12
15
  DetectionMechanismInputTypes,
13
16
  ExclusionWindowInputTypes,
14
17
  InferenceSensitivity,
18
+ _AssertionInput,
15
19
  _SmartFreshnessAssertionInput,
20
+ _SmartVolumeAssertionInput,
16
21
  )
17
- from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion, TagsInputType
18
- from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
19
- from acryl_datahub_cloud._sdk_extras.errors import SDKUsageError
22
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
23
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
24
+ from acryl_datahub_cloud.sdk.errors import SDKUsageError
20
25
  from datahub.errors import ItemNotFoundError
26
+ from datahub.metadata import schema_classes as models
21
27
  from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, MonitorUrn
22
28
 
23
29
  if TYPE_CHECKING:
@@ -34,146 +40,13 @@ class AssertionsClient:
34
40
  self.client = client
35
41
  _print_experimental_warning()
36
42
 
37
- def upsert_smart_freshness_assertion(
38
- self,
39
- *,
40
- dataset_urn: Union[str, DatasetUrn],
41
- urn: Optional[Union[str, AssertionUrn]] = None,
42
- display_name: Optional[str] = None,
43
- detection_mechanism: DetectionMechanismInputTypes = None,
44
- sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
45
- exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
46
- training_data_lookback_days: Optional[int] = None,
47
- incident_behavior: Optional[
48
- Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
49
- ] = None,
50
- tags: Optional[TagsInputType] = None,
51
- updated_by: Optional[Union[str, CorpUserUrn]] = None,
52
- ) -> SmartFreshnessAssertion:
53
- """Upsert a smart freshness assertion.
54
-
55
- Note: keyword arguments are required.
56
-
57
- Upsert is a combination of create and update. If the assertion does not exist, it will be created.
58
- If it does exist, it will be overwritten with the input values. If the input value is None,
59
- the existing value will be overridden with a default value.
60
-
61
- Args:
62
- dataset_urn: The urn of the dataset to be monitored.
63
- urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
64
- will be _created_ in the DataHub instance.
65
- display_name: The display name of the assertion. If not provided, a random display name
66
- will be generated.
67
- detection_mechanism: The detection mechanism to be used for the assertion. Information
68
- schema is recommended. Valid values are:
69
- - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
70
- - "audit_log" or DetectionMechanism.AUDIT_LOG
71
- - {
72
- "type": "last_modified_column",
73
- "column_name": "last_modified",
74
- "additional_filter": "last_modified > '2021-01-01'",
75
- } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
76
- additional_filter='last_modified > 2021-01-01')
77
- - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
78
- sensitivity: The sensitivity to be applied to the assertion. Valid values are:
79
- - "low" or InferenceSensitivity.LOW
80
- - "medium" or InferenceSensitivity.MEDIUM
81
- - "high" or InferenceSensitivity.HIGH
82
- exclusion_windows: The exclusion windows to be applied to the assertion, currently only
83
- fixed range exclusion windows are supported. Valid values are:
84
- - from datetime.datetime objects: {
85
- "start": "datetime(2025, 1, 1, 0, 0, 0)",
86
- "end": "datetime(2025, 1, 2, 0, 0, 0)",
87
- }
88
- - from string datetimes: {
89
- "start": "2025-01-01T00:00:00",
90
- "end": "2025-01-02T00:00:00",
91
- }
92
- - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
93
- start=datetime(2025, 1, 1, 0, 0, 0),
94
- end=datetime(2025, 1, 2, 0, 0, 0)
95
- )
96
- training_data_lookback_days: The training data lookback days to be applied to the
97
- assertion as an integer.
98
- incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
99
- - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
100
- - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
101
- tags: The tags to be applied to the assertion. Valid values are:
102
- - a list of strings (strings will be converted to TagUrn objects)
103
- - a list of TagUrn objects
104
- - a list of TagAssociationClass objects
105
- updated_by: Optional urn of the user who updated the assertion. The format is
106
- "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
107
- The default is the datahub system user.
108
- TODO: Retrieve the SDK user as the default instead of the datahub system user.
109
-
110
- Returns:
111
- SmartFreshnessAssertion: The created or updated assertion.
112
- """
113
- _print_experimental_warning()
114
- now_utc = datetime.now(timezone.utc)
115
-
116
- if updated_by is None:
117
- logger.warning(
118
- f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
119
- )
120
- updated_by = DEFAULT_CREATED_BY
121
-
122
- # 1. If urn is not set, create a new assertion
123
- if urn is None:
124
- logger.info("URN is not set, creating a new assertion")
125
- return self.create_smart_freshness_assertion(
126
- dataset_urn=dataset_urn,
127
- display_name=display_name,
128
- detection_mechanism=detection_mechanism,
129
- sensitivity=sensitivity,
130
- exclusion_windows=exclusion_windows,
131
- training_data_lookback_days=training_data_lookback_days,
132
- incident_behavior=incident_behavior,
133
- tags=tags,
134
- created_by=updated_by,
135
- )
136
-
137
- # 2. If urn is set, first validate the input:
138
- assertion_input = _SmartFreshnessAssertionInput(
139
- urn=urn,
140
- entity_client=self.client.entities,
141
- dataset_urn=dataset_urn,
142
- display_name=display_name,
143
- detection_mechanism=detection_mechanism,
144
- sensitivity=sensitivity,
145
- exclusion_windows=exclusion_windows,
146
- training_data_lookback_days=training_data_lookback_days,
147
- incident_behavior=incident_behavior,
148
- tags=tags,
149
- created_by=updated_by, # This will be overridden by the actual created_by
150
- created_at=now_utc, # This will be overridden by the actual created_at
151
- updated_by=updated_by,
152
- updated_at=now_utc,
153
- )
154
-
155
- # 3. Upsert the assertion and monitor entities:
156
- assertion_entity, monitor_entity = (
157
- assertion_input.to_assertion_and_monitor_entities()
158
- )
159
- # If assertion upsert fails, we won't try to upsert the monitor
160
- self.client.entities.upsert(assertion_entity)
161
- # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
162
- # try:
163
- self.client.entities.upsert(monitor_entity)
164
- # except Exception as e:
165
- # logger.error(f"Error upserting monitor: {e}")
166
- # self.client.entities.delete(assertion_entity)
167
- # raise e
168
-
169
- return SmartFreshnessAssertion.from_entities(assertion_entity, monitor_entity)
170
-
171
- def _upsert_and_merge_smart_freshness_assertion(
43
+ def sync_smart_freshness_assertion(
172
44
  self,
173
45
  *,
174
46
  dataset_urn: Union[str, DatasetUrn],
175
47
  urn: Optional[Union[str, AssertionUrn]] = None,
176
48
  display_name: Optional[str] = None,
49
+ enabled: Optional[bool] = None,
177
50
  detection_mechanism: DetectionMechanismInputTypes = None,
178
51
  sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
179
52
  exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
@@ -194,8 +67,9 @@ class AssertionsClient:
194
67
  will be preserved. If the input value can be un-set e.g. by passing an empty list or
195
68
  empty string.
196
69
 
197
- NOTE: This method is private and is not part of the public API. It will be used by the
198
- yaml client to manage assertions.
70
+ Schedule behavior:
71
+ - Create case: Uses default hourly schedule ("0 * * * *")
72
+ - Update case: Preserves existing schedule from backend (not modifiable)
199
73
 
200
74
  Args:
201
75
  dataset_urn: The urn of the dataset to be monitored.
@@ -203,6 +77,8 @@ class AssertionsClient:
203
77
  assertion will be _created_ in the DataHub instance.
204
78
  display_name: The display name of the assertion. If not provided, a random display
205
79
  name will be generated.
80
+ enabled: Whether the assertion is enabled. If not provided, the existing value
81
+ will be preserved.
206
82
  detection_mechanism: The detection mechanism to be used for the assertion. Information
207
83
  schema is recommended. Valid values are:
208
84
  - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
@@ -261,9 +137,10 @@ class AssertionsClient:
261
137
  # 1. If urn is not set, create a new assertion
262
138
  if urn is None:
263
139
  logger.info("URN is not set, creating a new assertion")
264
- return self.create_smart_freshness_assertion(
140
+ return self._create_smart_freshness_assertion(
265
141
  dataset_urn=dataset_urn,
266
142
  display_name=display_name,
143
+ enabled=enabled if enabled is not None else True,
267
144
  detection_mechanism=detection_mechanism,
268
145
  sensitivity=sensitivity,
269
146
  exclusion_windows=exclusion_windows,
@@ -294,11 +171,12 @@ class AssertionsClient:
294
171
  # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
295
172
  # if the assertion does not exist:
296
173
  merged_assertion_input_or_created_assertion = (
297
- self._retrieve_and_merge_assertion_and_monitor(
174
+ self._retrieve_and_merge_freshness_assertion_and_monitor(
298
175
  assertion_input=assertion_input,
299
176
  dataset_urn=dataset_urn,
300
177
  urn=urn,
301
178
  display_name=display_name,
179
+ enabled=enabled,
302
180
  detection_mechanism=detection_mechanism,
303
181
  sensitivity=sensitivity,
304
182
  exclusion_windows=exclusion_windows,
@@ -311,9 +189,11 @@ class AssertionsClient:
311
189
  )
312
190
 
313
191
  # Return early if we created a new assertion in the merge:
314
- if isinstance(
315
- merged_assertion_input_or_created_assertion, SmartFreshnessAssertion
316
- ):
192
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
193
+ # We know this is the correct type because we passed the assertion_class parameter
194
+ assert isinstance(
195
+ merged_assertion_input_or_created_assertion, SmartFreshnessAssertion
196
+ )
317
197
  return merged_assertion_input_or_created_assertion
318
198
 
319
199
  # 4. Upsert the assertion and monitor entities:
@@ -330,14 +210,15 @@ class AssertionsClient:
330
210
  # self.client.entities.delete(assertion_entity)
331
211
  # raise e
332
212
 
333
- return SmartFreshnessAssertion.from_entities(assertion_entity, monitor_entity)
213
+ return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
334
214
 
335
- def _retrieve_and_merge_assertion_and_monitor(
215
+ def _retrieve_and_merge_freshness_assertion_and_monitor(
336
216
  self,
337
217
  assertion_input: _SmartFreshnessAssertionInput,
338
218
  dataset_urn: Union[str, DatasetUrn],
339
219
  urn: Union[str, AssertionUrn],
340
220
  display_name: Optional[str],
221
+ enabled: Optional[bool],
341
222
  detection_mechanism: DetectionMechanismInputTypes,
342
223
  sensitivity: Optional[Union[str, InferenceSensitivity]],
343
224
  exclusion_windows: Optional[ExclusionWindowInputTypes],
@@ -354,25 +235,112 @@ class AssertionsClient:
354
235
  self._retrieve_assertion_and_monitor(assertion_input)
355
236
  )
356
237
 
357
- # 2.1 If the assertion and monitor entities exist, create a SmartFreshnessAssertion object from them:
238
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
358
239
  if maybe_assertion_entity and maybe_monitor_entity:
359
- existing_assertion = SmartFreshnessAssertion.from_entities(
240
+ existing_assertion = SmartFreshnessAssertion._from_entities(
360
241
  maybe_assertion_entity, maybe_monitor_entity
361
242
  )
362
243
  # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
363
244
  elif maybe_assertion_entity and not maybe_monitor_entity:
364
- existing_assertion = SmartFreshnessAssertion.from_entities(
245
+ monitor_mode = (
246
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
247
+ )
248
+ existing_assertion = SmartFreshnessAssertion._from_entities(
249
+ maybe_assertion_entity,
250
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
251
+ )
252
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
253
+ elif not maybe_assertion_entity:
254
+ logger.info(
255
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
256
+ )
257
+ return self._create_smart_freshness_assertion(
258
+ dataset_urn=dataset_urn,
259
+ display_name=display_name,
260
+ detection_mechanism=detection_mechanism,
261
+ sensitivity=sensitivity,
262
+ exclusion_windows=exclusion_windows,
263
+ training_data_lookback_days=training_data_lookback_days,
264
+ incident_behavior=incident_behavior,
265
+ tags=tags,
266
+ created_by=updated_by,
267
+ )
268
+
269
+ # 3. Check for any issues e.g. different dataset urns
270
+ if (
271
+ existing_assertion
272
+ and hasattr(existing_assertion, "dataset_urn")
273
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
274
+ ):
275
+ raise SDKUsageError(
276
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
277
+ )
278
+
279
+ # 4. Merge the existing assertion with the validated input:
280
+ merged_assertion_input = self._merge_freshness_input(
281
+ dataset_urn=dataset_urn,
282
+ urn=urn,
283
+ display_name=display_name,
284
+ enabled=enabled,
285
+ detection_mechanism=detection_mechanism,
286
+ sensitivity=sensitivity,
287
+ exclusion_windows=exclusion_windows,
288
+ training_data_lookback_days=training_data_lookback_days,
289
+ incident_behavior=incident_behavior,
290
+ tags=tags,
291
+ now_utc=now_utc,
292
+ assertion_input=assertion_input,
293
+ maybe_assertion_entity=maybe_assertion_entity,
294
+ maybe_monitor_entity=maybe_monitor_entity,
295
+ existing_assertion=existing_assertion,
296
+ )
297
+
298
+ return merged_assertion_input
299
+
300
+ def _retrieve_and_merge_volume_assertion_and_monitor(
301
+ self,
302
+ assertion_input: _SmartVolumeAssertionInput,
303
+ dataset_urn: Union[str, DatasetUrn],
304
+ urn: Union[str, AssertionUrn],
305
+ display_name: Optional[str],
306
+ enabled: Optional[bool],
307
+ detection_mechanism: DetectionMechanismInputTypes,
308
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
309
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
310
+ training_data_lookback_days: Optional[int],
311
+ incident_behavior: Optional[
312
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
313
+ ],
314
+ tags: Optional[TagsInputType],
315
+ updated_by: Optional[Union[str, CorpUserUrn]],
316
+ now_utc: datetime,
317
+ schedule: Optional[Union[str, models.CronScheduleClass]],
318
+ ) -> Union[SmartVolumeAssertion, _SmartVolumeAssertionInput]:
319
+ # 1. Retrieve any existing assertion and monitor entities:
320
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
321
+ self._retrieve_assertion_and_monitor(assertion_input)
322
+ )
323
+
324
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
325
+ if maybe_assertion_entity and maybe_monitor_entity:
326
+ existing_assertion = SmartVolumeAssertion._from_entities(
327
+ maybe_assertion_entity, maybe_monitor_entity
328
+ )
329
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
330
+ elif maybe_assertion_entity and not maybe_monitor_entity:
331
+ monitor_mode = (
332
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
333
+ )
334
+ existing_assertion = SmartVolumeAssertion._from_entities(
365
335
  maybe_assertion_entity,
366
- Monitor(
367
- id=monitor_urn, info=("ASSERTION", "ACTIVE")
368
- ), # TODO: Set active based on enabled parameter once it is added
336
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
369
337
  )
370
338
  # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
371
339
  elif not maybe_assertion_entity:
372
340
  logger.info(
373
341
  f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
374
342
  )
375
- return self.create_smart_freshness_assertion(
343
+ return self._create_smart_volume_assertion(
376
344
  dataset_urn=dataset_urn,
377
345
  display_name=display_name,
378
346
  detection_mechanism=detection_mechanism,
@@ -387,6 +355,7 @@ class AssertionsClient:
387
355
  # 3. Check for any issues e.g. different dataset urns
388
356
  if (
389
357
  existing_assertion
358
+ and hasattr(existing_assertion, "dataset_urn")
390
359
  and existing_assertion.dataset_urn != assertion_input.dataset_urn
391
360
  ):
392
361
  raise SDKUsageError(
@@ -394,16 +363,18 @@ class AssertionsClient:
394
363
  )
395
364
 
396
365
  # 4. Merge the existing assertion with the validated input:
397
- merged_assertion_input = self._merge_input(
366
+ merged_assertion_input = self._merge_volume_input(
398
367
  dataset_urn=dataset_urn,
399
368
  urn=urn,
400
369
  display_name=display_name,
370
+ enabled=enabled,
401
371
  detection_mechanism=detection_mechanism,
402
372
  sensitivity=sensitivity,
403
373
  exclusion_windows=exclusion_windows,
404
374
  training_data_lookback_days=training_data_lookback_days,
405
375
  incident_behavior=incident_behavior,
406
376
  tags=tags,
377
+ schedule=schedule,
407
378
  now_utc=now_utc,
408
379
  assertion_input=assertion_input,
409
380
  maybe_assertion_entity=maybe_assertion_entity,
@@ -414,7 +385,8 @@ class AssertionsClient:
414
385
  return merged_assertion_input
415
386
 
416
387
  def _retrieve_assertion_and_monitor(
417
- self, assertion_input: _SmartFreshnessAssertionInput
388
+ self,
389
+ assertion_input: _AssertionInput,
418
390
  ) -> tuple[Optional[Assertion], MonitorUrn, Optional[Monitor]]:
419
391
  """Retrieve the assertion and monitor entities from the DataHub instance.
420
392
 
@@ -451,11 +423,12 @@ class AssertionsClient:
451
423
 
452
424
  return maybe_assertion_entity, monitor_urn, maybe_monitor_entity
453
425
 
454
- def _merge_input(
426
+ def _merge_freshness_input(
455
427
  self,
456
428
  dataset_urn: Union[str, DatasetUrn],
457
429
  urn: Union[str, AssertionUrn],
458
430
  display_name: Optional[str],
431
+ enabled: Optional[bool],
459
432
  detection_mechanism: DetectionMechanismInputTypes,
460
433
  sensitivity: Optional[Union[str, InferenceSensitivity]],
461
434
  exclusion_windows: Optional[ExclusionWindowInputTypes],
@@ -476,6 +449,7 @@ class AssertionsClient:
476
449
  dataset_urn: The urn of the dataset to be monitored.
477
450
  urn: The urn of the assertion.
478
451
  display_name: The display name of the assertion.
452
+ enabled: Whether the assertion is enabled.
479
453
  detection_mechanism: The detection mechanism to be used for the assertion.
480
454
  sensitivity: The sensitivity to be applied to the assertion.
481
455
  exclusion_windows: The exclusion windows to be applied to the assertion.
@@ -502,6 +476,22 @@ class AssertionsClient:
502
476
  existing_assertion,
503
477
  maybe_assertion_entity.description if maybe_assertion_entity else None,
504
478
  ),
479
+ enabled=_merge_field(
480
+ enabled,
481
+ "enabled",
482
+ assertion_input,
483
+ existing_assertion,
484
+ existing_assertion.mode == AssertionMode.ACTIVE
485
+ if existing_assertion
486
+ else None,
487
+ ),
488
+ schedule=_merge_field(
489
+ None, # Don't allow schedule modification in updates - always preserve existing
490
+ "schedule",
491
+ assertion_input,
492
+ existing_assertion,
493
+ existing_assertion.schedule if existing_assertion else None,
494
+ ),
505
495
  detection_mechanism=_merge_field(
506
496
  detection_mechanism,
507
497
  "detection_mechanism",
@@ -564,50 +554,186 @@ class AssertionsClient:
564
554
 
565
555
  return merged_assertion_input
566
556
 
567
- def create_smart_freshness_assertion(
557
+ def _merge_volume_input(
568
558
  self,
569
- *,
570
559
  dataset_urn: Union[str, DatasetUrn],
571
- display_name: Optional[str] = None,
572
- detection_mechanism: DetectionMechanismInputTypes = None,
573
- sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
574
- exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
575
- training_data_lookback_days: Optional[int] = None,
560
+ urn: Union[str, AssertionUrn],
561
+ display_name: Optional[str],
562
+ enabled: Optional[bool],
563
+ detection_mechanism: DetectionMechanismInputTypes,
564
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
565
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
566
+ training_data_lookback_days: Optional[int],
576
567
  incident_behavior: Optional[
577
568
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
578
- ] = None,
579
- tags: Optional[TagsInputType] = None,
580
- created_by: Optional[Union[str, CorpUserUrn]] = None,
581
- ) -> SmartFreshnessAssertion:
582
- """Create a smart freshness assertion.
583
-
584
- Note: keyword arguments are required.
569
+ ],
570
+ tags: Optional[TagsInputType],
571
+ schedule: Optional[Union[str, models.CronScheduleClass]],
572
+ now_utc: datetime,
573
+ assertion_input: _SmartVolumeAssertionInput,
574
+ maybe_assertion_entity: Optional[Assertion],
575
+ maybe_monitor_entity: Optional[Monitor],
576
+ existing_assertion: SmartVolumeAssertion,
577
+ ) -> _SmartVolumeAssertionInput:
578
+ """Merge the input with the existing assertion and monitor entities.
585
579
 
586
580
  Args:
587
581
  dataset_urn: The urn of the dataset to be monitored.
588
- display_name: The display name of the assertion. If not provided, a random display
589
- name will be generated.
590
- detection_mechanism: The detection mechanism to be used for the assertion. Information
591
- schema is recommended. Valid values are:
592
- - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
593
- - "audit_log" or DetectionMechanism.AUDIT_LOG
594
- - {
595
- "type": "last_modified_column",
596
- "column_name": "last_modified",
597
- "additional_filter": "last_modified > '2021-01-01'",
598
- } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
599
- additional_filter='last_modified > 2021-01-01')
600
- - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
601
- sensitivity: The sensitivity to be applied to the assertion. Valid values are:
602
- - "low" or InferenceSensitivity.LOW
603
- - "medium" or InferenceSensitivity.MEDIUM
604
- - "high" or InferenceSensitivity.HIGH
605
- exclusion_windows: The exclusion windows to be applied to the assertion, currently only
606
- fixed range exclusion windows are supported. Valid values are:
607
- - from datetime.datetime objects: {
608
- "start": "datetime(2025, 1, 1, 0, 0, 0)",
609
- "end": "datetime(2025, 1, 2, 0, 0, 0)",
610
- }
582
+ urn: The urn of the assertion.
583
+ display_name: The display name of the assertion.
584
+ enabled: Whether the assertion is enabled.
585
+ detection_mechanism: The detection mechanism to be used for the assertion.
586
+ sensitivity: The sensitivity to be applied to the assertion.
587
+ exclusion_windows: The exclusion windows to be applied to the assertion.
588
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
589
+ incident_behavior: The incident behavior to be applied to the assertion.
590
+ tags: The tags to be applied to the assertion.
591
+ now_utc: The current UTC time from when the function is called.
592
+ assertion_input: The validated input to the function.
593
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
594
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
595
+ existing_assertion: The existing assertion from the DataHub instance.
596
+
597
+ Returns:
598
+ The merged assertion input.
599
+ """
600
+ merged_assertion_input = _SmartVolumeAssertionInput(
601
+ urn=urn,
602
+ entity_client=self.client.entities,
603
+ dataset_urn=dataset_urn,
604
+ display_name=_merge_field(
605
+ display_name,
606
+ "display_name",
607
+ assertion_input,
608
+ existing_assertion,
609
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
610
+ ),
611
+ enabled=_merge_field(
612
+ enabled,
613
+ "enabled",
614
+ assertion_input,
615
+ existing_assertion,
616
+ existing_assertion.mode == AssertionMode.ACTIVE
617
+ if existing_assertion
618
+ else None,
619
+ ),
620
+ schedule=_merge_field(
621
+ schedule,
622
+ "schedule",
623
+ assertion_input,
624
+ existing_assertion,
625
+ existing_assertion.schedule if existing_assertion else None,
626
+ ),
627
+ detection_mechanism=_merge_field(
628
+ detection_mechanism,
629
+ "detection_mechanism",
630
+ assertion_input,
631
+ existing_assertion,
632
+ SmartVolumeAssertion._get_detection_mechanism(
633
+ maybe_assertion_entity, maybe_monitor_entity, default=None
634
+ )
635
+ if maybe_assertion_entity and maybe_monitor_entity
636
+ else None,
637
+ ),
638
+ sensitivity=_merge_field(
639
+ sensitivity,
640
+ "sensitivity",
641
+ assertion_input,
642
+ existing_assertion,
643
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
644
+ ),
645
+ exclusion_windows=_merge_field(
646
+ exclusion_windows,
647
+ "exclusion_windows",
648
+ assertion_input,
649
+ existing_assertion,
650
+ maybe_monitor_entity.exclusion_windows
651
+ if maybe_monitor_entity
652
+ else None,
653
+ ),
654
+ training_data_lookback_days=_merge_field(
655
+ training_data_lookback_days,
656
+ "training_data_lookback_days",
657
+ assertion_input,
658
+ existing_assertion,
659
+ maybe_monitor_entity.training_data_lookback_days
660
+ if maybe_monitor_entity
661
+ else None,
662
+ ),
663
+ incident_behavior=_merge_field(
664
+ incident_behavior,
665
+ "incident_behavior",
666
+ assertion_input,
667
+ existing_assertion,
668
+ SmartVolumeAssertion._get_incident_behavior(maybe_assertion_entity)
669
+ if maybe_assertion_entity
670
+ else None,
671
+ ),
672
+ tags=_merge_field(
673
+ tags,
674
+ "tags",
675
+ assertion_input,
676
+ existing_assertion,
677
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
678
+ ),
679
+ created_by=existing_assertion.created_by
680
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
681
+ created_at=existing_assertion.created_at
682
+ or now_utc, # Override with the existing assertion's created_at or now if not set
683
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
684
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
685
+ )
686
+
687
+ return merged_assertion_input
688
+
689
+ def _create_smart_freshness_assertion(
690
+ self,
691
+ *,
692
+ dataset_urn: Union[str, DatasetUrn],
693
+ display_name: Optional[str] = None,
694
+ enabled: bool = True,
695
+ detection_mechanism: DetectionMechanismInputTypes = None,
696
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
697
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
698
+ training_data_lookback_days: Optional[int] = None,
699
+ incident_behavior: Optional[
700
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
701
+ ] = None,
702
+ tags: Optional[TagsInputType] = None,
703
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
704
+ ) -> SmartFreshnessAssertion:
705
+ """Create a smart freshness assertion.
706
+
707
+ Note: keyword arguments are required.
708
+
709
+ The created assertion will use the default hourly schedule ("0 * * * *").
710
+
711
+ Args:
712
+ dataset_urn: The urn of the dataset to be monitored.
713
+ display_name: The display name of the assertion. If not provided, a random display
714
+ name will be generated.
715
+ enabled: Whether the assertion is enabled. Defaults to True.
716
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
717
+ schema is recommended. Valid values are:
718
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
719
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
720
+ - {
721
+ "type": "last_modified_column",
722
+ "column_name": "last_modified",
723
+ "additional_filter": "last_modified > '2021-01-01'",
724
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
725
+ additional_filter='last_modified > 2021-01-01')
726
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
727
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
728
+ - "low" or InferenceSensitivity.LOW
729
+ - "medium" or InferenceSensitivity.MEDIUM
730
+ - "high" or InferenceSensitivity.HIGH
731
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
732
+ fixed range exclusion windows are supported. Valid values are:
733
+ - from datetime.datetime objects: {
734
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
735
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
736
+ }
611
737
  - from string datetimes: {
612
738
  "start": "2025-01-01T00:00:00",
613
739
  "end": "2025-01-02T00:00:00",
@@ -645,6 +771,128 @@ class AssertionsClient:
645
771
  entity_client=self.client.entities,
646
772
  dataset_urn=dataset_urn,
647
773
  display_name=display_name,
774
+ enabled=enabled,
775
+ detection_mechanism=detection_mechanism,
776
+ sensitivity=sensitivity,
777
+ exclusion_windows=exclusion_windows,
778
+ training_data_lookback_days=training_data_lookback_days,
779
+ incident_behavior=incident_behavior,
780
+ tags=tags,
781
+ created_by=created_by,
782
+ created_at=now_utc,
783
+ updated_by=created_by,
784
+ updated_at=now_utc,
785
+ )
786
+ assertion_entity, monitor_entity = (
787
+ assertion_input.to_assertion_and_monitor_entities()
788
+ )
789
+ # If assertion creation fails, we won't try to create the monitor
790
+ self.client.entities.create(assertion_entity)
791
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
792
+ # try:
793
+ self.client.entities.create(monitor_entity)
794
+ # except Exception as e:
795
+ # logger.error(f"Error creating monitor: {e}")
796
+ # self.client.entities.delete(assertion_entity)
797
+ # raise e
798
+ return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
799
+
800
+ def _create_smart_volume_assertion(
801
+ self,
802
+ *,
803
+ dataset_urn: Union[str, DatasetUrn],
804
+ display_name: Optional[str] = None,
805
+ enabled: bool = True,
806
+ detection_mechanism: DetectionMechanismInputTypes = None,
807
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
808
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
809
+ training_data_lookback_days: Optional[int] = None,
810
+ incident_behavior: Optional[
811
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
812
+ ] = None,
813
+ tags: Optional[TagsInputType] = None,
814
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
815
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
816
+ ) -> SmartVolumeAssertion:
817
+ """Create a smart volume assertion.
818
+
819
+ Note: keyword arguments are required.
820
+
821
+ Args:
822
+ dataset_urn: The urn of the dataset to be monitored.
823
+ display_name: The display name of the assertion. If not provided, a random display
824
+ name will be generated.
825
+ enabled: Whether the assertion is enabled. Defaults to True.
826
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
827
+ schema is recommended. Valid values are:
828
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
829
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
830
+ - {
831
+ "type": "last_modified_column",
832
+ "column_name": "last_modified",
833
+ "additional_filter": "last_modified > '2021-01-01'",
834
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
835
+ additional_filter='last_modified > 2021-01-01')
836
+ - {
837
+ "type": "high_watermark_column",
838
+ "column_name": "id",
839
+ "additional_filter": "id > 1000",
840
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
841
+ additional_filter='id > 1000')
842
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
843
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
844
+ - "low" or InferenceSensitivity.LOW
845
+ - "medium" or InferenceSensitivity.MEDIUM
846
+ - "high" or InferenceSensitivity.HIGH
847
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
848
+ fixed range exclusion windows are supported. Valid values are:
849
+ - from datetime.datetime objects: {
850
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
851
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
852
+ }
853
+ - from string datetimes: {
854
+ "start": "2025-01-01T00:00:00",
855
+ "end": "2025-01-02T00:00:00",
856
+ }
857
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
858
+ start=datetime(2025, 1, 1, 0, 0, 0),
859
+ end=datetime(2025, 1, 2, 0, 0, 0)
860
+ )
861
+ training_data_lookback_days: The training data lookback days to be applied to the
862
+ assertion as an integer.
863
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
864
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
865
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
866
+ tags: The tags to be applied to the assertion. Valid values are:
867
+ - a list of strings (strings will be converted to TagUrn objects)
868
+ - a list of TagUrn objects
869
+ - a list of TagAssociationClass objects
870
+ created_by: Optional urn of the user who created the assertion. The format is
871
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
872
+ The default is the datahub system user.
873
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
874
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
875
+ schedule will be used. The schedule determines when the assertion will be evaluated.
876
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
877
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
878
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
879
+
880
+ Returns:
881
+ SmartVolumeAssertion: The created assertion.
882
+ """
883
+ _print_experimental_warning()
884
+ now_utc = datetime.now(timezone.utc)
885
+ if created_by is None:
886
+ logger.warning(
887
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
888
+ )
889
+ created_by = DEFAULT_CREATED_BY
890
+ assertion_input = _SmartVolumeAssertionInput(
891
+ urn=None,
892
+ entity_client=self.client.entities,
893
+ dataset_urn=dataset_urn,
894
+ display_name=display_name,
895
+ enabled=enabled,
648
896
  detection_mechanism=detection_mechanism,
649
897
  sensitivity=sensitivity,
650
898
  exclusion_windows=exclusion_windows,
@@ -655,6 +903,7 @@ class AssertionsClient:
655
903
  created_at=now_utc,
656
904
  updated_by=created_by,
657
905
  updated_at=now_utc,
906
+ schedule=schedule,
658
907
  )
659
908
  assertion_entity, monitor_entity = (
660
909
  assertion_input.to_assertion_and_monitor_entities()
@@ -668,14 +917,201 @@ class AssertionsClient:
668
917
  # logger.error(f"Error creating monitor: {e}")
669
918
  # self.client.entities.delete(assertion_entity)
670
919
  # raise e
671
- return SmartFreshnessAssertion.from_entities(assertion_entity, monitor_entity)
920
+ return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
921
+
922
+ def sync_smart_volume_assertion(
923
+ self,
924
+ *,
925
+ dataset_urn: Union[str, DatasetUrn],
926
+ urn: Optional[Union[str, AssertionUrn]] = None,
927
+ display_name: Optional[str] = None,
928
+ enabled: Optional[bool] = None,
929
+ detection_mechanism: DetectionMechanismInputTypes = None,
930
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
931
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
932
+ training_data_lookback_days: Optional[int] = None,
933
+ incident_behavior: Optional[
934
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
935
+ ] = None,
936
+ tags: Optional[TagsInputType] = None,
937
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
938
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
939
+ ) -> SmartVolumeAssertion:
940
+ """Upsert and merge a smart volume assertion.
941
+
942
+ Note: keyword arguments are required.
943
+
944
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
945
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
946
+ be updated if the input value is not None. If the input value is None, the existing value
947
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
948
+ empty string.
949
+
950
+ Schedule behavior:
951
+ - Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
952
+ - Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
953
+
954
+ Args:
955
+ dataset_urn: The urn of the dataset to be monitored.
956
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
957
+ will be _created_ in the DataHub instance.
958
+ display_name: The display name of the assertion. If not provided, a random display name
959
+ will be generated.
960
+ enabled: Whether the assertion is enabled. If not provided, the existing value
961
+ will be preserved.
962
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
963
+ schema is recommended. Valid values are:
964
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
965
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
966
+ - {
967
+ "type": "last_modified_column",
968
+ "column_name": "last_modified",
969
+ "additional_filter": "last_modified > '2021-01-01'",
970
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
971
+ additional_filter='last_modified > 2021-01-01')
972
+ - {
973
+ "type": "high_watermark_column",
974
+ "column_name": "id",
975
+ "additional_filter": "id > 1000",
976
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
977
+ additional_filter='id > 1000')
978
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
979
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
980
+ - "low" or InferenceSensitivity.LOW
981
+ - "medium" or InferenceSensitivity.MEDIUM
982
+ - "high" or InferenceSensitivity.HIGH
983
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
984
+ fixed range exclusion windows are supported. Valid values are:
985
+ - from datetime.datetime objects: {
986
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
987
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
988
+ }
989
+ - from string datetimes: {
990
+ "start": "2025-01-01T00:00:00",
991
+ "end": "2025-01-02T00:00:00",
992
+ }
993
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
994
+ start=datetime(2025, 1, 1, 0, 0, 0),
995
+ end=datetime(2025, 1, 2, 0, 0, 0)
996
+ )
997
+ training_data_lookback_days: The training data lookback days to be applied to the
998
+ assertion as an integer.
999
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1000
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1001
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1002
+ tags: The tags to be applied to the assertion. Valid values are:
1003
+ - a list of strings (strings will be converted to TagUrn objects)
1004
+ - a list of TagUrn objects
1005
+ - a list of TagAssociationClass objects
1006
+ updated_by: Optional urn of the user who updated the assertion. The format is
1007
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1008
+ The default is the datahub system user.
1009
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1010
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1011
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1012
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1013
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1014
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1015
+
1016
+ Returns:
1017
+ SmartVolumeAssertion: The created or updated assertion.
1018
+ """
1019
+ _print_experimental_warning()
1020
+ now_utc = datetime.now(timezone.utc)
1021
+
1022
+ if updated_by is None:
1023
+ logger.warning(
1024
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1025
+ )
1026
+ updated_by = DEFAULT_CREATED_BY
1027
+
1028
+ # 1. If urn is not set, create a new assertion
1029
+ if urn is None:
1030
+ logger.info("URN is not set, creating a new assertion")
1031
+ return self._create_smart_volume_assertion(
1032
+ dataset_urn=dataset_urn,
1033
+ display_name=display_name,
1034
+ enabled=enabled if enabled is not None else True,
1035
+ detection_mechanism=detection_mechanism,
1036
+ sensitivity=sensitivity,
1037
+ exclusion_windows=exclusion_windows,
1038
+ training_data_lookback_days=training_data_lookback_days,
1039
+ incident_behavior=incident_behavior,
1040
+ tags=tags,
1041
+ created_by=updated_by,
1042
+ schedule=schedule,
1043
+ )
1044
+
1045
+ # 2. If urn is set, first validate the input:
1046
+ assertion_input = _SmartVolumeAssertionInput(
1047
+ urn=urn,
1048
+ entity_client=self.client.entities,
1049
+ dataset_urn=dataset_urn,
1050
+ display_name=display_name,
1051
+ detection_mechanism=detection_mechanism,
1052
+ sensitivity=sensitivity,
1053
+ exclusion_windows=exclusion_windows,
1054
+ training_data_lookback_days=training_data_lookback_days,
1055
+ incident_behavior=incident_behavior,
1056
+ tags=tags,
1057
+ created_by=updated_by, # This will be overridden by the actual created_by
1058
+ created_at=now_utc, # This will be overridden by the actual created_at
1059
+ updated_by=updated_by,
1060
+ updated_at=now_utc,
1061
+ schedule=schedule,
1062
+ )
1063
+
1064
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
1065
+ # if the assertion does not exist:
1066
+ merged_assertion_input_or_created_assertion = (
1067
+ self._retrieve_and_merge_volume_assertion_and_monitor(
1068
+ assertion_input=assertion_input,
1069
+ dataset_urn=dataset_urn,
1070
+ urn=urn,
1071
+ display_name=display_name,
1072
+ enabled=enabled,
1073
+ detection_mechanism=detection_mechanism,
1074
+ sensitivity=sensitivity,
1075
+ exclusion_windows=exclusion_windows,
1076
+ training_data_lookback_days=training_data_lookback_days,
1077
+ incident_behavior=incident_behavior,
1078
+ tags=tags,
1079
+ updated_by=updated_by,
1080
+ now_utc=now_utc,
1081
+ schedule=schedule,
1082
+ )
1083
+ )
1084
+
1085
+ # Return early if we created a new assertion in the merge:
1086
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
1087
+ # We know this is the correct type because we passed the assertion_class parameter
1088
+ assert isinstance(
1089
+ merged_assertion_input_or_created_assertion, SmartVolumeAssertion
1090
+ )
1091
+ return merged_assertion_input_or_created_assertion
1092
+
1093
+ # 4. Upsert the assertion and monitor entities:
1094
+ assertion_entity, monitor_entity = (
1095
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
1096
+ )
1097
+ # If assertion upsert fails, we won't try to upsert the monitor
1098
+ self.client.entities.upsert(assertion_entity)
1099
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1100
+ # try:
1101
+ self.client.entities.upsert(monitor_entity)
1102
+ # except Exception as e:
1103
+ # logger.error(f"Error upserting monitor: {e}")
1104
+ # self.client.entities.delete(assertion_entity)
1105
+ # raise e
1106
+
1107
+ return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
672
1108
 
673
1109
 
674
1110
  def _merge_field(
675
1111
  input_field_value: Any,
676
1112
  input_field_name: str,
677
- validated_assertion_input: _SmartFreshnessAssertionInput,
678
- validated_existing_assertion: SmartFreshnessAssertion,
1113
+ validated_assertion_input: _AssertionInput,
1114
+ validated_existing_assertion: _AssertionPublic,
679
1115
  existing_entity_value: Optional[Any] = None, # TODO: Can we do better than Any?
680
1116
  ) -> Any:
681
1117
  """Merge the input field value with any existing entity value or default value.