acryl-datahub-cloud 0.3.11.1rc8__py3-none-any.whl → 0.3.12rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (37) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/_sdk_extras/__init__.py +17 -2
  3. acryl_datahub_cloud/_sdk_extras/assertion.py +603 -8
  4. acryl_datahub_cloud/_sdk_extras/assertion_input.py +1074 -0
  5. acryl_datahub_cloud/_sdk_extras/assertions_client.py +705 -11
  6. acryl_datahub_cloud/_sdk_extras/entities/__init__.py +0 -0
  7. acryl_datahub_cloud/_sdk_extras/entities/assertion.py +425 -0
  8. acryl_datahub_cloud/_sdk_extras/entities/monitor.py +291 -0
  9. acryl_datahub_cloud/_sdk_extras/entities/subscription.py +84 -0
  10. acryl_datahub_cloud/_sdk_extras/errors.py +34 -0
  11. acryl_datahub_cloud/_sdk_extras/resolver_client.py +39 -0
  12. acryl_datahub_cloud/_sdk_extras/subscription_client.py +565 -0
  13. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  14. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2023 -2023
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
  17. acryl_datahub_cloud/metadata/schema.avsc +24889 -25252
  18. acryl_datahub_cloud/metadata/schema_classes.py +1133 -1008
  19. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +189 -201
  20. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +9 -1
  21. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  22. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +9 -1
  23. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
  24. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  25. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
  26. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +27 -0
  27. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
  28. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +12 -4
  29. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
  30. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  31. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
  32. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  33. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/METADATA +46 -46
  34. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/RECORD +37 -28
  35. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/WHEEL +1 -1
  36. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/entry_points.txt +0 -0
  37. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,717 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Union
3
+ import logging
4
+ from datetime import datetime, timezone
5
+ from typing import TYPE_CHECKING, Any, Optional, Union
4
6
 
5
- from acryl_datahub_cloud._sdk_extras.assertion import Assertion
6
- from datahub.metadata.urns import AssertionUrn
7
+ from acryl_datahub_cloud._sdk_extras.assertion import (
8
+ SmartFreshnessAssertion,
9
+ )
10
+ from acryl_datahub_cloud._sdk_extras.assertion_input import (
11
+ AssertionIncidentBehavior,
12
+ DetectionMechanismInputTypes,
13
+ ExclusionWindowInputTypes,
14
+ InferenceSensitivity,
15
+ _SmartFreshnessAssertionInput,
16
+ )
17
+ from acryl_datahub_cloud._sdk_extras.entities.assertion import Assertion, TagsInputType
18
+ from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
19
+ from acryl_datahub_cloud._sdk_extras.errors import SDKUsageError
20
+ from datahub.errors import ItemNotFoundError
21
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, MonitorUrn
7
22
 
8
23
  if TYPE_CHECKING:
9
24
  from datahub.sdk.main_client import DataHubClient
10
25
 
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # TODO: Replace __datahub_system with the actual datahub system user https://linear.app/acryl-data/issue/OBS-1351/auditstamp-actor-hydration-pattern-for-sdk-calls
29
+ DEFAULT_CREATED_BY = CorpUserUrn.from_string("urn:li:corpuser:__datahub_system")
30
+
11
31
 
12
32
  class AssertionsClient:
13
- def __init__(self, client: DataHubClient):
33
+ def __init__(self, client: "DataHubClient"):
14
34
  self.client = client
35
+ _print_experimental_warning()
36
+
37
+ def upsert_smart_freshness_assertion(
38
+ self,
39
+ *,
40
+ dataset_urn: Union[str, DatasetUrn],
41
+ urn: Optional[Union[str, AssertionUrn]] = None,
42
+ display_name: Optional[str] = None,
43
+ detection_mechanism: DetectionMechanismInputTypes = None,
44
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
45
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
46
+ training_data_lookback_days: Optional[int] = None,
47
+ incident_behavior: Optional[
48
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
49
+ ] = None,
50
+ tags: Optional[TagsInputType] = None,
51
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
52
+ ) -> SmartFreshnessAssertion:
53
+ """Upsert a smart freshness assertion.
54
+
55
+ Note: keyword arguments are required.
56
+
57
+ Upsert is a combination of create and update. If the assertion does not exist, it will be created.
58
+ If it does exist, it will be overwritten with the input values. If the input value is None,
59
+ the existing value will be overridden with a default value.
60
+
61
+ Args:
62
+ dataset_urn: The urn of the dataset to be monitored.
63
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
64
+ will be _created_ in the DataHub instance.
65
+ display_name: The display name of the assertion. If not provided, a random display name
66
+ will be generated.
67
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
68
+ schema is recommended. Valid values are:
69
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
70
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
71
+ - {
72
+ "type": "last_modified_column",
73
+ "column_name": "last_modified",
74
+ "additional_filter": "last_modified > '2021-01-01'",
75
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
76
+ additional_filter='last_modified > 2021-01-01')
77
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
78
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
79
+ - "low" or InferenceSensitivity.LOW
80
+ - "medium" or InferenceSensitivity.MEDIUM
81
+ - "high" or InferenceSensitivity.HIGH
82
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
83
+ fixed range exclusion windows are supported. Valid values are:
84
+ - from datetime.datetime objects: {
85
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
86
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
87
+ }
88
+ - from string datetimes: {
89
+ "start": "2025-01-01T00:00:00",
90
+ "end": "2025-01-02T00:00:00",
91
+ }
92
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
93
+ start=datetime(2025, 1, 1, 0, 0, 0),
94
+ end=datetime(2025, 1, 2, 0, 0, 0)
95
+ )
96
+ training_data_lookback_days: The training data lookback days to be applied to the
97
+ assertion as an integer.
98
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
99
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
100
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
101
+ tags: The tags to be applied to the assertion. Valid values are:
102
+ - a list of strings (strings will be converted to TagUrn objects)
103
+ - a list of TagUrn objects
104
+ - a list of TagAssociationClass objects
105
+ updated_by: Optional urn of the user who updated the assertion. The format is
106
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
107
+ The default is the datahub system user.
108
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
109
+
110
+ Returns:
111
+ SmartFreshnessAssertion: The created or updated assertion.
112
+ """
113
+ _print_experimental_warning()
114
+ now_utc = datetime.now(timezone.utc)
115
+
116
+ if updated_by is None:
117
+ logger.warning(
118
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
119
+ )
120
+ updated_by = DEFAULT_CREATED_BY
15
121
 
16
- def get_assertions(
17
- self, urn: Union[str, list[str], AssertionUrn, list[AssertionUrn]]
18
- ) -> list[Assertion]:
19
- print(
20
- "get_assertions is not implemented, this is a placeholder. Returning empty list."
122
+ # 1. If urn is not set, create a new assertion
123
+ if urn is None:
124
+ logger.info("URN is not set, creating a new assertion")
125
+ return self.create_smart_freshness_assertion(
126
+ dataset_urn=dataset_urn,
127
+ display_name=display_name,
128
+ detection_mechanism=detection_mechanism,
129
+ sensitivity=sensitivity,
130
+ exclusion_windows=exclusion_windows,
131
+ training_data_lookback_days=training_data_lookback_days,
132
+ incident_behavior=incident_behavior,
133
+ tags=tags,
134
+ created_by=updated_by,
135
+ )
136
+
137
+ # 2. If urn is set, first validate the input:
138
+ assertion_input = _SmartFreshnessAssertionInput(
139
+ urn=urn,
140
+ entity_client=self.client.entities,
141
+ dataset_urn=dataset_urn,
142
+ display_name=display_name,
143
+ detection_mechanism=detection_mechanism,
144
+ sensitivity=sensitivity,
145
+ exclusion_windows=exclusion_windows,
146
+ training_data_lookback_days=training_data_lookback_days,
147
+ incident_behavior=incident_behavior,
148
+ tags=tags,
149
+ created_by=updated_by, # This will be overridden by the actual created_by
150
+ created_at=now_utc, # This will be overridden by the actual created_at
151
+ updated_by=updated_by,
152
+ updated_at=now_utc,
153
+ )
154
+
155
+ # 3. Upsert the assertion and monitor entities:
156
+ assertion_entity, monitor_entity = (
157
+ assertion_input.to_assertion_and_monitor_entities()
21
158
  )
22
- print(f"urn provided: {urn}")
23
- return []
159
+ # If assertion upsert fails, we won't try to upsert the monitor
160
+ self.client.entities.upsert(assertion_entity)
161
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
162
+ # try:
163
+ self.client.entities.upsert(monitor_entity)
164
+ # except Exception as e:
165
+ # logger.error(f"Error upserting monitor: {e}")
166
+ # self.client.entities.delete(assertion_entity)
167
+ # raise e
168
+
169
+ return SmartFreshnessAssertion.from_entities(assertion_entity, monitor_entity)
170
+
171
+ def _upsert_and_merge_smart_freshness_assertion(
172
+ self,
173
+ *,
174
+ dataset_urn: Union[str, DatasetUrn],
175
+ urn: Optional[Union[str, AssertionUrn]] = None,
176
+ display_name: Optional[str] = None,
177
+ detection_mechanism: DetectionMechanismInputTypes = None,
178
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
179
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
180
+ training_data_lookback_days: Optional[int] = None,
181
+ incident_behavior: Optional[
182
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
183
+ ] = None,
184
+ tags: Optional[TagsInputType] = None,
185
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
186
+ ) -> SmartFreshnessAssertion:
187
+ """Upsert and merge a smart freshness assertion.
188
+
189
+ Note: keyword arguments are required.
190
+
191
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
192
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
193
+ be updated if the input value is not None. If the input value is None, the existing value
194
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
195
+ empty string.
196
+
197
+ NOTE: This method is private and is not part of the public API. It will be used by the
198
+ yaml client to manage assertions.
199
+
200
+ Args:
201
+ dataset_urn: The urn of the dataset to be monitored.
202
+ urn: The urn of the assertion. If not provided, a urn will be generated and the
203
+ assertion will be _created_ in the DataHub instance.
204
+ display_name: The display name of the assertion. If not provided, a random display
205
+ name will be generated.
206
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
207
+ schema is recommended. Valid values are:
208
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
209
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
210
+ - {
211
+ "type": "last_modified_column",
212
+ "column_name": "last_modified",
213
+ "additional_filter": "last_modified > '2021-01-01'",
214
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
215
+ additional_filter='last_modified > 2021-01-01')
216
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
217
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
218
+ - "low" or InferenceSensitivity.LOW
219
+ - "medium" or InferenceSensitivity.MEDIUM
220
+ - "high" or InferenceSensitivity.HIGH
221
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
222
+ fixed range exclusion windows are supported. Valid values are:
223
+ - from datetime.datetime objects: {
224
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
225
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
226
+ }
227
+ - from string datetimes: {
228
+ "start": "2025-01-01T00:00:00",
229
+ "end": "2025-01-02T00:00:00",
230
+ }
231
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
232
+ start=datetime(2025, 1, 1, 0, 0, 0),
233
+ end=datetime(2025, 1, 2, 0, 0, 0)
234
+ )
235
+ training_data_lookback_days: The training data lookback days to be applied to the
236
+ assertion as an integer.
237
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
238
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
239
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
240
+ tags: The tags to be applied to the assertion. Valid values are:
241
+ - a list of strings (strings will be converted to TagUrn objects)
242
+ - a list of TagUrn objects
243
+ - a list of TagAssociationClass objects
244
+ updated_by: Optional urn of the user who updated the assertion. The format is
245
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
246
+ The default is the datahub system user.
247
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
248
+
249
+ Returns:
250
+ SmartFreshnessAssertion: The created or updated assertion.
251
+ """
252
+ _print_experimental_warning()
253
+ now_utc = datetime.now(timezone.utc)
254
+
255
+ if updated_by is None:
256
+ logger.warning(
257
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
258
+ )
259
+ updated_by = DEFAULT_CREATED_BY
260
+
261
+ # 1. If urn is not set, create a new assertion
262
+ if urn is None:
263
+ logger.info("URN is not set, creating a new assertion")
264
+ return self.create_smart_freshness_assertion(
265
+ dataset_urn=dataset_urn,
266
+ display_name=display_name,
267
+ detection_mechanism=detection_mechanism,
268
+ sensitivity=sensitivity,
269
+ exclusion_windows=exclusion_windows,
270
+ training_data_lookback_days=training_data_lookback_days,
271
+ incident_behavior=incident_behavior,
272
+ tags=tags,
273
+ created_by=updated_by,
274
+ )
275
+
276
+ # 2. If urn is set, first validate the input:
277
+ assertion_input = _SmartFreshnessAssertionInput(
278
+ urn=urn,
279
+ entity_client=self.client.entities,
280
+ dataset_urn=dataset_urn,
281
+ display_name=display_name,
282
+ detection_mechanism=detection_mechanism,
283
+ sensitivity=sensitivity,
284
+ exclusion_windows=exclusion_windows,
285
+ training_data_lookback_days=training_data_lookback_days,
286
+ incident_behavior=incident_behavior,
287
+ tags=tags,
288
+ created_by=updated_by, # This will be overridden by the actual created_by
289
+ created_at=now_utc, # This will be overridden by the actual created_at
290
+ updated_by=updated_by,
291
+ updated_at=now_utc,
292
+ )
293
+
294
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
295
+ # if the assertion does not exist:
296
+ merged_assertion_input_or_created_assertion = (
297
+ self._retrieve_and_merge_assertion_and_monitor(
298
+ assertion_input=assertion_input,
299
+ dataset_urn=dataset_urn,
300
+ urn=urn,
301
+ display_name=display_name,
302
+ detection_mechanism=detection_mechanism,
303
+ sensitivity=sensitivity,
304
+ exclusion_windows=exclusion_windows,
305
+ training_data_lookback_days=training_data_lookback_days,
306
+ incident_behavior=incident_behavior,
307
+ tags=tags,
308
+ updated_by=updated_by,
309
+ now_utc=now_utc,
310
+ )
311
+ )
312
+
313
+ # Return early if we created a new assertion in the merge:
314
+ if isinstance(
315
+ merged_assertion_input_or_created_assertion, SmartFreshnessAssertion
316
+ ):
317
+ return merged_assertion_input_or_created_assertion
318
+
319
+ # 4. Upsert the assertion and monitor entities:
320
+ assertion_entity, monitor_entity = (
321
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
322
+ )
323
+ # If assertion upsert fails, we won't try to upsert the monitor
324
+ self.client.entities.upsert(assertion_entity)
325
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
326
+ # try:
327
+ self.client.entities.upsert(monitor_entity)
328
+ # except Exception as e:
329
+ # logger.error(f"Error upserting monitor: {e}")
330
+ # self.client.entities.delete(assertion_entity)
331
+ # raise e
332
+
333
+ return SmartFreshnessAssertion.from_entities(assertion_entity, monitor_entity)
334
+
335
+ def _retrieve_and_merge_assertion_and_monitor(
336
+ self,
337
+ assertion_input: _SmartFreshnessAssertionInput,
338
+ dataset_urn: Union[str, DatasetUrn],
339
+ urn: Union[str, AssertionUrn],
340
+ display_name: Optional[str],
341
+ detection_mechanism: DetectionMechanismInputTypes,
342
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
343
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
344
+ training_data_lookback_days: Optional[int],
345
+ incident_behavior: Optional[
346
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
347
+ ],
348
+ tags: Optional[TagsInputType],
349
+ updated_by: Optional[Union[str, CorpUserUrn]],
350
+ now_utc: datetime,
351
+ ) -> Union[SmartFreshnessAssertion, _SmartFreshnessAssertionInput]:
352
+ # 1. Retrieve any existing assertion and monitor entities:
353
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
354
+ self._retrieve_assertion_and_monitor(assertion_input)
355
+ )
356
+
357
+ # 2.1 If the assertion and monitor entities exist, create a SmartFreshnessAssertion object from them:
358
+ if maybe_assertion_entity and maybe_monitor_entity:
359
+ existing_assertion = SmartFreshnessAssertion.from_entities(
360
+ maybe_assertion_entity, maybe_monitor_entity
361
+ )
362
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
363
+ elif maybe_assertion_entity and not maybe_monitor_entity:
364
+ existing_assertion = SmartFreshnessAssertion.from_entities(
365
+ maybe_assertion_entity,
366
+ Monitor(
367
+ id=monitor_urn, info=("ASSERTION", "ACTIVE")
368
+ ), # TODO: Set active based on enabled parameter once it is added
369
+ )
370
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
371
+ elif not maybe_assertion_entity:
372
+ logger.info(
373
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
374
+ )
375
+ return self.create_smart_freshness_assertion(
376
+ dataset_urn=dataset_urn,
377
+ display_name=display_name,
378
+ detection_mechanism=detection_mechanism,
379
+ sensitivity=sensitivity,
380
+ exclusion_windows=exclusion_windows,
381
+ training_data_lookback_days=training_data_lookback_days,
382
+ incident_behavior=incident_behavior,
383
+ tags=tags,
384
+ created_by=updated_by,
385
+ )
386
+
387
+ # 3. Check for any issues e.g. different dataset urns
388
+ if (
389
+ existing_assertion
390
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
391
+ ):
392
+ raise SDKUsageError(
393
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
394
+ )
395
+
396
+ # 4. Merge the existing assertion with the validated input:
397
+ merged_assertion_input = self._merge_input(
398
+ dataset_urn=dataset_urn,
399
+ urn=urn,
400
+ display_name=display_name,
401
+ detection_mechanism=detection_mechanism,
402
+ sensitivity=sensitivity,
403
+ exclusion_windows=exclusion_windows,
404
+ training_data_lookback_days=training_data_lookback_days,
405
+ incident_behavior=incident_behavior,
406
+ tags=tags,
407
+ now_utc=now_utc,
408
+ assertion_input=assertion_input,
409
+ maybe_assertion_entity=maybe_assertion_entity,
410
+ maybe_monitor_entity=maybe_monitor_entity,
411
+ existing_assertion=existing_assertion,
412
+ )
413
+
414
+ return merged_assertion_input
415
+
416
+ def _retrieve_assertion_and_monitor(
417
+ self, assertion_input: _SmartFreshnessAssertionInput
418
+ ) -> tuple[Optional[Assertion], MonitorUrn, Optional[Monitor]]:
419
+ """Retrieve the assertion and monitor entities from the DataHub instance.
420
+
421
+ Args:
422
+ assertion_input: The validated input to the function.
423
+
424
+ Returns:
425
+ The assertion and monitor entities.
426
+ """
427
+ assert assertion_input.urn is not None, "URN is required"
428
+
429
+ # Get assertion entity
430
+ maybe_assertion_entity: Optional[Assertion] = None
431
+ try:
432
+ entity = self.client.entities.get(assertion_input.urn)
433
+ if entity is not None:
434
+ assert isinstance(entity, Assertion)
435
+ maybe_assertion_entity = entity
436
+ except ItemNotFoundError:
437
+ pass
438
+
439
+ # Get monitor entity
440
+ monitor_urn = Monitor._ensure_id(
441
+ id=(assertion_input.dataset_urn, assertion_input.urn)
442
+ )
443
+ maybe_monitor_entity: Optional[Monitor] = None
444
+ try:
445
+ entity = self.client.entities.get(monitor_urn)
446
+ if entity is not None:
447
+ assert isinstance(entity, Monitor)
448
+ maybe_monitor_entity = entity
449
+ except ItemNotFoundError:
450
+ pass
451
+
452
+ return maybe_assertion_entity, monitor_urn, maybe_monitor_entity
453
+
454
+ def _merge_input(
455
+ self,
456
+ dataset_urn: Union[str, DatasetUrn],
457
+ urn: Union[str, AssertionUrn],
458
+ display_name: Optional[str],
459
+ detection_mechanism: DetectionMechanismInputTypes,
460
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
461
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
462
+ training_data_lookback_days: Optional[int],
463
+ incident_behavior: Optional[
464
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
465
+ ],
466
+ tags: Optional[TagsInputType],
467
+ now_utc: datetime,
468
+ assertion_input: _SmartFreshnessAssertionInput,
469
+ maybe_assertion_entity: Optional[Assertion],
470
+ maybe_monitor_entity: Optional[Monitor],
471
+ existing_assertion: SmartFreshnessAssertion,
472
+ ) -> _SmartFreshnessAssertionInput:
473
+ """Merge the input with the existing assertion and monitor entities.
474
+
475
+ Args:
476
+ dataset_urn: The urn of the dataset to be monitored.
477
+ urn: The urn of the assertion.
478
+ display_name: The display name of the assertion.
479
+ detection_mechanism: The detection mechanism to be used for the assertion.
480
+ sensitivity: The sensitivity to be applied to the assertion.
481
+ exclusion_windows: The exclusion windows to be applied to the assertion.
482
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
483
+ incident_behavior: The incident behavior to be applied to the assertion.
484
+ tags: The tags to be applied to the assertion.
485
+ now_utc: The current UTC time from when the function is called.
486
+ assertion_input: The validated input to the function.
487
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
488
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
489
+ existing_assertion: The existing assertion from the DataHub instance.
490
+
491
+ Returns:
492
+ The merged assertion input.
493
+ """
494
+ merged_assertion_input = _SmartFreshnessAssertionInput(
495
+ urn=urn,
496
+ entity_client=self.client.entities,
497
+ dataset_urn=dataset_urn,
498
+ display_name=_merge_field(
499
+ display_name,
500
+ "display_name",
501
+ assertion_input,
502
+ existing_assertion,
503
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
504
+ ),
505
+ detection_mechanism=_merge_field(
506
+ detection_mechanism,
507
+ "detection_mechanism",
508
+ assertion_input,
509
+ existing_assertion,
510
+ SmartFreshnessAssertion._get_detection_mechanism( # TODO: Consider moving this conversion to DetectionMechanism.parse(), it could avoid having to use Optional on the return type of SmartFreshnessAssertion.get_detection_mechanism()
511
+ maybe_assertion_entity, maybe_monitor_entity, default=None
512
+ )
513
+ if maybe_assertion_entity and maybe_monitor_entity
514
+ else None,
515
+ ),
516
+ sensitivity=_merge_field(
517
+ sensitivity,
518
+ "sensitivity",
519
+ assertion_input,
520
+ existing_assertion,
521
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
522
+ ),
523
+ exclusion_windows=_merge_field(
524
+ exclusion_windows,
525
+ "exclusion_windows",
526
+ assertion_input,
527
+ existing_assertion,
528
+ maybe_monitor_entity.exclusion_windows
529
+ if maybe_monitor_entity
530
+ else None,
531
+ ),
532
+ training_data_lookback_days=_merge_field(
533
+ training_data_lookback_days,
534
+ "training_data_lookback_days",
535
+ assertion_input,
536
+ existing_assertion,
537
+ maybe_monitor_entity.training_data_lookback_days
538
+ if maybe_monitor_entity
539
+ else None,
540
+ ),
541
+ incident_behavior=_merge_field(
542
+ incident_behavior,
543
+ "incident_behavior",
544
+ assertion_input,
545
+ existing_assertion,
546
+ SmartFreshnessAssertion._get_incident_behavior(maybe_assertion_entity)
547
+ if maybe_assertion_entity
548
+ else None,
549
+ ),
550
+ tags=_merge_field(
551
+ tags,
552
+ "tags",
553
+ assertion_input,
554
+ existing_assertion,
555
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
556
+ ),
557
+ created_by=existing_assertion.created_by
558
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
559
+ created_at=existing_assertion.created_at
560
+ or now_utc, # Override with the existing assertion's created_at or now if not set
561
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
562
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
563
+ )
564
+
565
+ return merged_assertion_input
566
+
567
+ def create_smart_freshness_assertion(
568
+ self,
569
+ *,
570
+ dataset_urn: Union[str, DatasetUrn],
571
+ display_name: Optional[str] = None,
572
+ detection_mechanism: DetectionMechanismInputTypes = None,
573
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
574
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
575
+ training_data_lookback_days: Optional[int] = None,
576
+ incident_behavior: Optional[
577
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
578
+ ] = None,
579
+ tags: Optional[TagsInputType] = None,
580
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
581
+ ) -> SmartFreshnessAssertion:
582
+ """Create a smart freshness assertion.
583
+
584
+ Note: keyword arguments are required.
585
+
586
+ Args:
587
+ dataset_urn: The urn of the dataset to be monitored.
588
+ display_name: The display name of the assertion. If not provided, a random display
589
+ name will be generated.
590
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
591
+ schema is recommended. Valid values are:
592
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
593
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
594
+ - {
595
+ "type": "last_modified_column",
596
+ "column_name": "last_modified",
597
+ "additional_filter": "last_modified > '2021-01-01'",
598
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
599
+ additional_filter='last_modified > 2021-01-01')
600
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
601
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
602
+ - "low" or InferenceSensitivity.LOW
603
+ - "medium" or InferenceSensitivity.MEDIUM
604
+ - "high" or InferenceSensitivity.HIGH
605
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
606
+ fixed range exclusion windows are supported. Valid values are:
607
+ - from datetime.datetime objects: {
608
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
609
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
610
+ }
611
+ - from string datetimes: {
612
+ "start": "2025-01-01T00:00:00",
613
+ "end": "2025-01-02T00:00:00",
614
+ }
615
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
616
+ start=datetime(2025, 1, 1, 0, 0, 0),
617
+ end=datetime(2025, 1, 2, 0, 0, 0)
618
+ )
619
+ training_data_lookback_days: The training data lookback days to be applied to the
620
+ assertion as an integer.
621
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
622
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
623
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
624
+ tags: The tags to be applied to the assertion. Valid values are:
625
+ - a list of strings (strings will be converted to TagUrn objects)
626
+ - a list of TagUrn objects
627
+ - a list of TagAssociationClass objects
628
+ created_by: Optional urn of the user who created the assertion. The format is
629
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
630
+ The default is the datahub system user.
631
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
632
+
633
+ Returns:
634
+ SmartFreshnessAssertion: The created assertion.
635
+ """
636
+ _print_experimental_warning()
637
+ now_utc = datetime.now(timezone.utc)
638
+ if created_by is None:
639
+ logger.warning(
640
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
641
+ )
642
+ created_by = DEFAULT_CREATED_BY
643
+ assertion_input = _SmartFreshnessAssertionInput(
644
+ urn=None,
645
+ entity_client=self.client.entities,
646
+ dataset_urn=dataset_urn,
647
+ display_name=display_name,
648
+ detection_mechanism=detection_mechanism,
649
+ sensitivity=sensitivity,
650
+ exclusion_windows=exclusion_windows,
651
+ training_data_lookback_days=training_data_lookback_days,
652
+ incident_behavior=incident_behavior,
653
+ tags=tags,
654
+ created_by=created_by,
655
+ created_at=now_utc,
656
+ updated_by=created_by,
657
+ updated_at=now_utc,
658
+ )
659
+ assertion_entity, monitor_entity = (
660
+ assertion_input.to_assertion_and_monitor_entities()
661
+ )
662
+ # If assertion creation fails, we won't try to create the monitor
663
+ self.client.entities.create(assertion_entity)
664
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
665
+ # try:
666
+ self.client.entities.create(monitor_entity)
667
+ # except Exception as e:
668
+ # logger.error(f"Error creating monitor: {e}")
669
+ # self.client.entities.delete(assertion_entity)
670
+ # raise e
671
+ return SmartFreshnessAssertion.from_entities(assertion_entity, monitor_entity)
672
+
673
+
674
+ def _merge_field(
675
+ input_field_value: Any,
676
+ input_field_name: str,
677
+ validated_assertion_input: _SmartFreshnessAssertionInput,
678
+ validated_existing_assertion: SmartFreshnessAssertion,
679
+ existing_entity_value: Optional[Any] = None, # TODO: Can we do better than Any?
680
+ ) -> Any:
681
+ """Merge the input field value with any existing entity value or default value.
682
+
683
+ The merge logic is as follows:
684
+ - If the input is None, use the existing value
685
+ - If the input is not None, use the input value
686
+ - If the input is an empty list or empty string, still use the input value (falsy values can be used to unset fields)
687
+ - If the input is a non-empty list or non-empty string, use the input value
688
+ - If the input is None and the existing value is None, use the default value from _AssertionInput
689
+
690
+ Args:
691
+ input_field_value: The value of the field in the input e.g. passed to the function.
692
+ input_field_name: The name of the field in the input.
693
+ validated_assertion_input: The *validated* input to the function.
694
+ validated_existing_assertion: The *validated* existing assertion from the DataHub instance.
695
+ existing_entity_value: The value of the field in the existing entity from the DataHub instance, directly retrieved from the entity.
696
+
697
+ Returns:
698
+ The merged value of the field.
699
+
700
+ """
701
+ if input_field_value is None: # Input value default
702
+ if existing_entity_value is not None: # Existing entity value set
703
+ return existing_entity_value
704
+ elif (
705
+ getattr(validated_existing_assertion, input_field_name) is None
706
+ ): # Validated existing value not set
707
+ return getattr(validated_assertion_input, input_field_name)
708
+ else: # Validated existing value set
709
+ return getattr(validated_existing_assertion, input_field_name)
710
+ else: # Input value set
711
+ return input_field_value
712
+
713
+
714
+ def _print_experimental_warning() -> None:
715
+ print(
716
+ "Warning: The assertions client is experimental and under heavy development. Expect breaking changes."
717
+ )