acryl-datahub-cloud 0.3.11.1rc7__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (94) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  3. acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
  4. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  7. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  8. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  9. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  10. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +29 -13
  11. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  12. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
  13. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2011 -1955
  14. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
  17. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  18. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  19. acryl_datahub_cloud/metadata/schema.avsc +25413 -25425
  20. acryl_datahub_cloud/metadata/schema_classes.py +1316 -791
  21. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  22. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
  23. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  24. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +223 -202
  25. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +36 -7
  26. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  27. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +40 -8
  28. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
  29. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
  30. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  31. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  32. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  33. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  34. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
  35. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  36. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  37. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  38. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  39. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  40. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
  41. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
  42. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
  43. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  44. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  45. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
  46. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  47. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
  48. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
  49. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
  50. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
  51. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  52. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
  53. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
  54. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  55. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
  56. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
  57. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +39 -10
  58. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
  59. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  60. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
  61. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  62. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
  63. acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
  64. acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
  65. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  66. acryl_datahub_cloud/notifications/__init__.py +0 -0
  67. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  68. acryl_datahub_cloud/sdk/__init__.py +39 -0
  69. acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
  70. acryl_datahub_cloud/sdk/assertion/assertion_base.py +1467 -0
  71. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
  72. acryl_datahub_cloud/sdk/assertion/types.py +20 -0
  73. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  74. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1648 -0
  75. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +258 -0
  76. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +914 -0
  77. acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +272 -0
  78. acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +648 -0
  79. acryl_datahub_cloud/sdk/assertions_client.py +3206 -0
  80. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  81. acryl_datahub_cloud/sdk/entities/assertion.py +432 -0
  82. acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
  83. acryl_datahub_cloud/sdk/entities/subscription.py +84 -0
  84. acryl_datahub_cloud/sdk/errors.py +34 -0
  85. acryl_datahub_cloud/sdk/resolver_client.py +39 -0
  86. acryl_datahub_cloud/sdk/subscription_client.py +714 -0
  87. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/METADATA +47 -42
  88. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/RECORD +91 -58
  89. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/WHEEL +1 -1
  90. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/entry_points.txt +1 -0
  91. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
  92. acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
  93. acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
  94. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,3206 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime, timezone
5
+ from typing import TYPE_CHECKING, Any, Optional, Union
6
+
7
+ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
8
+ AssertionMode,
9
+ FreshnessAssertion,
10
+ SmartFreshnessAssertion,
11
+ SmartVolumeAssertion,
12
+ SqlAssertion,
13
+ VolumeAssertion,
14
+ _AssertionPublic,
15
+ )
16
+ from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
17
+ SmartColumnMetricAssertion,
18
+ )
19
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
20
+ AssertionIncidentBehaviorInputTypes,
21
+ DetectionMechanismInputTypes,
22
+ ExclusionWindowInputTypes,
23
+ InferenceSensitivity,
24
+ TimeWindowSizeInputTypes,
25
+ _AssertionInput,
26
+ _SmartFreshnessAssertionInput,
27
+ _SmartVolumeAssertionInput,
28
+ )
29
+ from acryl_datahub_cloud.sdk.assertion_input.freshness_assertion_input import (
30
+ _FreshnessAssertionInput,
31
+ )
32
+ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
33
+ MetricInputType,
34
+ OperatorInputType,
35
+ RangeInputType,
36
+ RangeTypeInputType,
37
+ ValueInputType,
38
+ ValueTypeInputType,
39
+ _SmartColumnMetricAssertionInput,
40
+ )
41
+ from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
42
+ SqlAssertionChangeType,
43
+ SqlAssertionCriteria,
44
+ SqlAssertionOperator,
45
+ SqlAssertionType,
46
+ _SqlAssertionInput,
47
+ )
48
+ from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
49
+ RowCountChange,
50
+ RowCountTotal,
51
+ VolumeAssertionDefinition,
52
+ VolumeAssertionDefinitionChangeKind,
53
+ VolumeAssertionDefinitionInputTypes,
54
+ VolumeAssertionDefinitionParameters,
55
+ VolumeAssertionDefinitionType,
56
+ VolumeAssertionOperator,
57
+ _VolumeAssertionDefinitionTypes,
58
+ _VolumeAssertionInput,
59
+ )
60
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
61
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
62
+ from acryl_datahub_cloud.sdk.errors import SDKUsageError
63
+ from datahub.errors import ItemNotFoundError
64
+ from datahub.metadata import schema_classes as models
65
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, MonitorUrn
66
+
67
+ if TYPE_CHECKING:
68
+ from datahub.sdk.main_client import DataHubClient
69
+
70
+ logger = logging.getLogger(__name__)
71
+
72
+ # TODO: Replace __datahub_system with the actual datahub system user https://linear.app/acryl-data/issue/OBS-1351/auditstamp-actor-hydration-pattern-for-sdk-calls
73
+ DEFAULT_CREATED_BY = CorpUserUrn.from_string("urn:li:corpuser:__datahub_system")
74
+
75
+
76
+ class AssertionsClient:
77
+ def __init__(self, client: "DataHubClient"):
78
+ self.client = client
79
+ _print_experimental_warning()
80
+
81
+ def sync_smart_freshness_assertion(
82
+ self,
83
+ *,
84
+ dataset_urn: Union[str, DatasetUrn],
85
+ urn: Optional[Union[str, AssertionUrn]] = None,
86
+ display_name: Optional[str] = None,
87
+ enabled: Optional[bool] = None,
88
+ detection_mechanism: DetectionMechanismInputTypes = None,
89
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
90
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
91
+ training_data_lookback_days: Optional[int] = None,
92
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
93
+ tags: Optional[TagsInputType] = None,
94
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
95
+ ) -> SmartFreshnessAssertion:
96
+ """Upsert and merge a smart freshness assertion.
97
+
98
+ Note:
99
+ Keyword arguments are required.
100
+
101
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
102
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
103
+ be updated if the input value is not None. If the input value is None, the existing value
104
+ will be preserved. If the input value can be un-set (e.g. by passing an empty list or
105
+ empty string), it will be unset.
106
+
107
+ Schedule behavior:
108
+ - Create case: Uses default hourly schedule ("0 * * * *")
109
+ - Update case: Preserves existing schedule from backend (not modifiable)
110
+
111
+ Args:
112
+ dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
113
+ urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
114
+ display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
115
+ enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
116
+ detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Information schema is recommended. Valid values are:
117
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
118
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
119
+ - {"type": "last_modified_column", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
120
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
121
+ sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
122
+ exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported.
123
+ training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
124
+ incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass" or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
125
+ tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
126
+ updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
127
+
128
+ Returns:
129
+ SmartFreshnessAssertion: The created or updated assertion.
130
+ """
131
+ _print_experimental_warning()
132
+ now_utc = datetime.now(timezone.utc)
133
+
134
+ if updated_by is None:
135
+ logger.warning(
136
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
137
+ )
138
+ updated_by = DEFAULT_CREATED_BY
139
+
140
+ # 1. If urn is not set, create a new assertion
141
+ if urn is None:
142
+ logger.info("URN is not set, creating a new assertion")
143
+ return self._create_smart_freshness_assertion(
144
+ dataset_urn=dataset_urn,
145
+ display_name=display_name,
146
+ enabled=enabled if enabled is not None else True,
147
+ detection_mechanism=detection_mechanism,
148
+ sensitivity=sensitivity,
149
+ exclusion_windows=exclusion_windows,
150
+ training_data_lookback_days=training_data_lookback_days,
151
+ incident_behavior=incident_behavior,
152
+ tags=tags,
153
+ created_by=updated_by,
154
+ )
155
+
156
+ # 2. If urn is set, first validate the input:
157
+ assertion_input = _SmartFreshnessAssertionInput(
158
+ urn=urn,
159
+ entity_client=self.client.entities,
160
+ dataset_urn=dataset_urn,
161
+ display_name=display_name,
162
+ detection_mechanism=detection_mechanism,
163
+ sensitivity=sensitivity,
164
+ exclusion_windows=exclusion_windows,
165
+ training_data_lookback_days=training_data_lookback_days,
166
+ incident_behavior=incident_behavior,
167
+ tags=tags,
168
+ created_by=updated_by, # This will be overridden by the actual created_by
169
+ created_at=now_utc, # This will be overridden by the actual created_at
170
+ updated_by=updated_by,
171
+ updated_at=now_utc,
172
+ )
173
+
174
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
175
+ # if the assertion does not exist:
176
+ merged_assertion_input_or_created_assertion = (
177
+ self._retrieve_and_merge_smart_freshness_assertion_and_monitor(
178
+ assertion_input=assertion_input,
179
+ dataset_urn=dataset_urn,
180
+ urn=urn,
181
+ display_name=display_name,
182
+ enabled=enabled,
183
+ detection_mechanism=detection_mechanism,
184
+ sensitivity=sensitivity,
185
+ exclusion_windows=exclusion_windows,
186
+ training_data_lookback_days=training_data_lookback_days,
187
+ incident_behavior=incident_behavior,
188
+ tags=tags,
189
+ updated_by=updated_by,
190
+ now_utc=now_utc,
191
+ )
192
+ )
193
+
194
+ # Return early if we created a new assertion in the merge:
195
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
196
+ # We know this is the correct type because we passed the assertion_class parameter
197
+ assert isinstance(
198
+ merged_assertion_input_or_created_assertion, SmartFreshnessAssertion
199
+ )
200
+ return merged_assertion_input_or_created_assertion
201
+
202
+ # 4. Upsert the assertion and monitor entities:
203
+ assertion_entity, monitor_entity = (
204
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
205
+ )
206
+ # If assertion upsert fails, we won't try to upsert the monitor
207
+ self.client.entities.upsert(assertion_entity)
208
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
209
+ # try:
210
+ self.client.entities.upsert(monitor_entity)
211
+ # except Exception as e:
212
+ # logger.error(f"Error upserting monitor: {e}")
213
+ # self.client.entities.delete(assertion_entity)
214
+ # raise e
215
+
216
+ return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
217
+
218
+ def _retrieve_and_merge_smart_freshness_assertion_and_monitor(
219
+ self,
220
+ assertion_input: _SmartFreshnessAssertionInput,
221
+ dataset_urn: Union[str, DatasetUrn],
222
+ urn: Union[str, AssertionUrn],
223
+ display_name: Optional[str],
224
+ enabled: Optional[bool],
225
+ detection_mechanism: DetectionMechanismInputTypes,
226
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
227
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
228
+ training_data_lookback_days: Optional[int],
229
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
230
+ tags: Optional[TagsInputType],
231
+ updated_by: Optional[Union[str, CorpUserUrn]],
232
+ now_utc: datetime,
233
+ ) -> Union[SmartFreshnessAssertion, _SmartFreshnessAssertionInput]:
234
+ # 1. Retrieve any existing assertion and monitor entities:
235
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
236
+ self._retrieve_assertion_and_monitor(assertion_input)
237
+ )
238
+
239
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
240
+ if maybe_assertion_entity and maybe_monitor_entity:
241
+ existing_assertion = SmartFreshnessAssertion._from_entities(
242
+ maybe_assertion_entity, maybe_monitor_entity
243
+ )
244
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
245
+ elif maybe_assertion_entity and not maybe_monitor_entity:
246
+ monitor_mode = (
247
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
248
+ )
249
+ existing_assertion = SmartFreshnessAssertion._from_entities(
250
+ maybe_assertion_entity,
251
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
252
+ )
253
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
254
+ elif not maybe_assertion_entity:
255
+ logger.info(
256
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
257
+ )
258
+ return self._create_smart_freshness_assertion(
259
+ dataset_urn=dataset_urn,
260
+ display_name=display_name,
261
+ detection_mechanism=detection_mechanism,
262
+ sensitivity=sensitivity,
263
+ exclusion_windows=exclusion_windows,
264
+ training_data_lookback_days=training_data_lookback_days,
265
+ incident_behavior=incident_behavior,
266
+ tags=tags,
267
+ created_by=updated_by,
268
+ )
269
+
270
+ # 3. Check for any issues e.g. different dataset urns
271
+ if (
272
+ existing_assertion
273
+ and hasattr(existing_assertion, "dataset_urn")
274
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
275
+ ):
276
+ raise SDKUsageError(
277
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
278
+ )
279
+
280
+ # 4. Merge the existing assertion with the validated input:
281
+ merged_assertion_input = self._merge_smart_freshness_input(
282
+ dataset_urn=dataset_urn,
283
+ urn=urn,
284
+ display_name=display_name,
285
+ enabled=enabled,
286
+ detection_mechanism=detection_mechanism,
287
+ sensitivity=sensitivity,
288
+ exclusion_windows=exclusion_windows,
289
+ training_data_lookback_days=training_data_lookback_days,
290
+ incident_behavior=incident_behavior,
291
+ tags=tags,
292
+ now_utc=now_utc,
293
+ assertion_input=assertion_input,
294
+ maybe_assertion_entity=maybe_assertion_entity,
295
+ maybe_monitor_entity=maybe_monitor_entity,
296
+ existing_assertion=existing_assertion,
297
+ )
298
+
299
+ return merged_assertion_input
300
+
301
+ def _retrieve_and_merge_volume_assertion_and_monitor(
302
+ self,
303
+ assertion_input: _SmartVolumeAssertionInput,
304
+ dataset_urn: Union[str, DatasetUrn],
305
+ urn: Union[str, AssertionUrn],
306
+ display_name: Optional[str],
307
+ enabled: Optional[bool],
308
+ detection_mechanism: DetectionMechanismInputTypes,
309
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
310
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
311
+ training_data_lookback_days: Optional[int],
312
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
313
+ tags: Optional[TagsInputType],
314
+ updated_by: Optional[Union[str, CorpUserUrn]],
315
+ now_utc: datetime,
316
+ schedule: Optional[Union[str, models.CronScheduleClass]],
317
+ ) -> Union[SmartVolumeAssertion, _SmartVolumeAssertionInput]:
318
+ # 1. Retrieve any existing assertion and monitor entities:
319
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
320
+ self._retrieve_assertion_and_monitor(assertion_input)
321
+ )
322
+
323
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
324
+ if maybe_assertion_entity and maybe_monitor_entity:
325
+ existing_assertion = SmartVolumeAssertion._from_entities(
326
+ maybe_assertion_entity, maybe_monitor_entity
327
+ )
328
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
329
+ elif maybe_assertion_entity and not maybe_monitor_entity:
330
+ monitor_mode = (
331
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
332
+ )
333
+ existing_assertion = SmartVolumeAssertion._from_entities(
334
+ maybe_assertion_entity,
335
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
336
+ )
337
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
338
+ elif not maybe_assertion_entity:
339
+ logger.info(
340
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
341
+ )
342
+ return self._create_smart_volume_assertion(
343
+ dataset_urn=dataset_urn,
344
+ display_name=display_name,
345
+ detection_mechanism=detection_mechanism,
346
+ sensitivity=sensitivity,
347
+ exclusion_windows=exclusion_windows,
348
+ training_data_lookback_days=training_data_lookback_days,
349
+ incident_behavior=incident_behavior,
350
+ tags=tags,
351
+ created_by=updated_by,
352
+ schedule=schedule,
353
+ )
354
+
355
+ # 3. Check for any issues e.g. different dataset urns
356
+ if (
357
+ existing_assertion
358
+ and hasattr(existing_assertion, "dataset_urn")
359
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
360
+ ):
361
+ raise SDKUsageError(
362
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
363
+ )
364
+
365
+ # 4. Merge the existing assertion with the validated input:
366
+ merged_assertion_input = self._merge_smart_volume_input(
367
+ dataset_urn=dataset_urn,
368
+ urn=urn,
369
+ display_name=display_name,
370
+ enabled=enabled,
371
+ detection_mechanism=detection_mechanism,
372
+ sensitivity=sensitivity,
373
+ exclusion_windows=exclusion_windows,
374
+ training_data_lookback_days=training_data_lookback_days,
375
+ incident_behavior=incident_behavior,
376
+ tags=tags,
377
+ schedule=schedule,
378
+ now_utc=now_utc,
379
+ assertion_input=assertion_input,
380
+ maybe_assertion_entity=maybe_assertion_entity,
381
+ maybe_monitor_entity=maybe_monitor_entity,
382
+ existing_assertion=existing_assertion,
383
+ )
384
+
385
+ return merged_assertion_input
386
+
387
+ def _retrieve_and_merge_freshness_assertion_and_monitor(
388
+ self,
389
+ assertion_input: _FreshnessAssertionInput,
390
+ dataset_urn: Union[str, DatasetUrn],
391
+ urn: Union[str, AssertionUrn],
392
+ display_name: Optional[str],
393
+ enabled: Optional[bool],
394
+ detection_mechanism: DetectionMechanismInputTypes,
395
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
396
+ tags: Optional[TagsInputType],
397
+ updated_by: Optional[Union[str, CorpUserUrn]],
398
+ now_utc: datetime,
399
+ schedule: Optional[Union[str, models.CronScheduleClass]],
400
+ freshness_schedule_check_type: Optional[
401
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
402
+ ] = None,
403
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
404
+ ) -> Union[FreshnessAssertion, _FreshnessAssertionInput]:
405
+ # 1. Retrieve any existing assertion and monitor entities:
406
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
407
+ self._retrieve_assertion_and_monitor(assertion_input)
408
+ )
409
+
410
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
411
+ if maybe_assertion_entity and maybe_monitor_entity:
412
+ existing_assertion = FreshnessAssertion._from_entities(
413
+ maybe_assertion_entity, maybe_monitor_entity
414
+ )
415
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
416
+ elif maybe_assertion_entity and not maybe_monitor_entity:
417
+ monitor_mode = (
418
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
419
+ )
420
+ existing_assertion = FreshnessAssertion._from_entities(
421
+ maybe_assertion_entity,
422
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
423
+ )
424
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
425
+ elif not maybe_assertion_entity:
426
+ logger.info(
427
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
428
+ )
429
+ return self._create_freshness_assertion(
430
+ dataset_urn=dataset_urn,
431
+ display_name=display_name,
432
+ detection_mechanism=detection_mechanism,
433
+ incident_behavior=incident_behavior,
434
+ tags=tags,
435
+ created_by=updated_by,
436
+ schedule=schedule,
437
+ freshness_schedule_check_type=freshness_schedule_check_type,
438
+ lookback_window=lookback_window,
439
+ )
440
+
441
+ # 3. Check for any issues e.g. different dataset urns
442
+ if (
443
+ existing_assertion
444
+ and hasattr(existing_assertion, "dataset_urn")
445
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
446
+ ):
447
+ raise SDKUsageError(
448
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
449
+ )
450
+
451
+ # 4. Merge the existing assertion with the validated input:
452
+ merged_assertion_input = self._merge_freshness_input(
453
+ dataset_urn=dataset_urn,
454
+ urn=urn,
455
+ display_name=display_name,
456
+ enabled=enabled,
457
+ detection_mechanism=detection_mechanism,
458
+ incident_behavior=incident_behavior,
459
+ tags=tags,
460
+ now_utc=now_utc,
461
+ assertion_input=assertion_input,
462
+ maybe_assertion_entity=maybe_assertion_entity,
463
+ maybe_monitor_entity=maybe_monitor_entity,
464
+ existing_assertion=existing_assertion,
465
+ schedule=schedule,
466
+ freshness_schedule_check_type=freshness_schedule_check_type,
467
+ lookback_window=lookback_window,
468
+ )
469
+
470
+ return merged_assertion_input
471
+
472
+ def _retrieve_and_merge_native_volume_assertion_and_monitor(
473
+ self,
474
+ assertion_input: _VolumeAssertionInput,
475
+ dataset_urn: Union[str, DatasetUrn],
476
+ urn: Union[str, AssertionUrn],
477
+ display_name: Optional[str],
478
+ enabled: Optional[bool],
479
+ detection_mechanism: DetectionMechanismInputTypes,
480
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
481
+ tags: Optional[TagsInputType],
482
+ updated_by: Optional[Union[str, CorpUserUrn]],
483
+ now_utc: datetime,
484
+ schedule: Optional[Union[str, models.CronScheduleClass]],
485
+ definition: VolumeAssertionDefinitionInputTypes,
486
+ use_backend_definition: bool = False,
487
+ ) -> Union[VolumeAssertion, _VolumeAssertionInput]:
488
+ # 1. Retrieve any existing assertion and monitor entities:
489
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
490
+ self._retrieve_assertion_and_monitor(assertion_input)
491
+ )
492
+
493
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
494
+ if maybe_assertion_entity and maybe_monitor_entity:
495
+ existing_assertion = VolumeAssertion._from_entities(
496
+ maybe_assertion_entity, maybe_monitor_entity
497
+ )
498
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
499
+ elif maybe_assertion_entity and not maybe_monitor_entity:
500
+ monitor_mode = (
501
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
502
+ )
503
+ existing_assertion = VolumeAssertion._from_entities(
504
+ maybe_assertion_entity,
505
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
506
+ )
507
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
508
+ elif not maybe_assertion_entity:
509
+ if use_backend_definition:
510
+ raise SDKUsageError(
511
+ f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
512
+ )
513
+ logger.info(
514
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
515
+ )
516
+ # Extract criteria from definition to call the new signature
517
+ parsed_definition = VolumeAssertionDefinition.parse(definition)
518
+ assert isinstance(parsed_definition, (RowCountTotal, RowCountChange))
519
+ return self._create_volume_assertion(
520
+ dataset_urn=dataset_urn,
521
+ display_name=display_name,
522
+ detection_mechanism=detection_mechanism,
523
+ incident_behavior=incident_behavior,
524
+ tags=tags,
525
+ created_by=updated_by,
526
+ schedule=schedule,
527
+ criteria_type=parsed_definition.type,
528
+ criteria_change_type=parsed_definition.kind
529
+ if isinstance(parsed_definition, RowCountChange)
530
+ else None,
531
+ criteria_operator=parsed_definition.operator,
532
+ criteria_parameters=parsed_definition.parameters,
533
+ )
534
+
535
+ # 3. Check for any issues e.g. different dataset urns
536
+ if (
537
+ existing_assertion
538
+ and hasattr(existing_assertion, "dataset_urn")
539
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
540
+ ):
541
+ raise SDKUsageError(
542
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
543
+ )
544
+
545
+ # 4. Handle definition: use backend definition if flag is set and backend has one
546
+ if use_backend_definition:
547
+ if maybe_assertion_entity is not None:
548
+ # Use definition from backend
549
+ backend_definition = VolumeAssertionDefinition.from_assertion(
550
+ maybe_assertion_entity
551
+ )
552
+ # Update the assertion_input with the real definition from backend
553
+ assertion_input.definition = backend_definition
554
+ effective_definition = backend_definition
555
+ logger.info("Using definition from backend assertion")
556
+ else:
557
+ # No backend assertion and no user-provided definition - this is an error
558
+ raise SDKUsageError(
559
+ f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
560
+ )
561
+ else:
562
+ # Use the already-parsed definition from assertion_input
563
+ effective_definition = assertion_input.definition
564
+
565
+ # 5. Merge the existing assertion with the validated input:
566
+ merged_assertion_input = self._merge_volume_input(
567
+ dataset_urn=dataset_urn,
568
+ urn=urn,
569
+ display_name=display_name,
570
+ enabled=enabled,
571
+ detection_mechanism=detection_mechanism,
572
+ incident_behavior=incident_behavior,
573
+ tags=tags,
574
+ now_utc=now_utc,
575
+ assertion_input=assertion_input,
576
+ maybe_assertion_entity=maybe_assertion_entity,
577
+ maybe_monitor_entity=maybe_monitor_entity,
578
+ existing_assertion=existing_assertion,
579
+ schedule=schedule,
580
+ definition=effective_definition,
581
+ )
582
+
583
+ return merged_assertion_input
584
+
585
+ def _retrieve_and_merge_sql_assertion_and_monitor(
586
+ self,
587
+ assertion_input: _SqlAssertionInput,
588
+ dataset_urn: Union[str, DatasetUrn],
589
+ urn: Union[str, AssertionUrn],
590
+ display_name: Optional[str],
591
+ enabled: Optional[bool],
592
+ criteria: SqlAssertionCriteria,
593
+ statement: str,
594
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
595
+ tags: Optional[TagsInputType],
596
+ updated_by: Optional[Union[str, CorpUserUrn]],
597
+ now_utc: datetime,
598
+ schedule: Optional[Union[str, models.CronScheduleClass]],
599
+ ) -> Union[SqlAssertion, _SqlAssertionInput]:
600
+ # 1. Retrieve any existing assertion and monitor entities:
601
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
602
+ self._retrieve_assertion_and_monitor(assertion_input)
603
+ )
604
+
605
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
606
+ if maybe_assertion_entity and maybe_monitor_entity:
607
+ existing_assertion = SqlAssertion._from_entities(
608
+ maybe_assertion_entity, maybe_monitor_entity
609
+ )
610
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
611
+ elif maybe_assertion_entity and not maybe_monitor_entity:
612
+ monitor_mode = (
613
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
614
+ )
615
+ existing_assertion = SqlAssertion._from_entities(
616
+ maybe_assertion_entity,
617
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
618
+ )
619
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
620
+ elif not maybe_assertion_entity:
621
+ logger.info(
622
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
623
+ )
624
+ return self._create_sql_assertion(
625
+ dataset_urn=dataset_urn,
626
+ display_name=display_name,
627
+ criteria_type=criteria.type,
628
+ criteria_change_type=criteria.change_type,
629
+ criteria_operator=criteria.operator,
630
+ criteria_parameters=criteria.parameters,
631
+ statement=statement,
632
+ incident_behavior=incident_behavior,
633
+ tags=tags,
634
+ created_by=updated_by,
635
+ schedule=schedule,
636
+ )
637
+
638
+ # 3. Check for any issues e.g. different dataset urns
639
+ if (
640
+ existing_assertion
641
+ and hasattr(existing_assertion, "dataset_urn")
642
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
643
+ ):
644
+ raise SDKUsageError(
645
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
646
+ )
647
+
648
+ # 4. Merge the existing assertion with the validated input:
649
+ merged_assertion_input = self._merge_sql_input(
650
+ dataset_urn=dataset_urn,
651
+ urn=urn,
652
+ display_name=display_name,
653
+ enabled=enabled,
654
+ incident_behavior=incident_behavior,
655
+ tags=tags,
656
+ now_utc=now_utc,
657
+ assertion_input=assertion_input,
658
+ maybe_assertion_entity=maybe_assertion_entity,
659
+ existing_assertion=existing_assertion,
660
+ schedule=schedule,
661
+ criteria=criteria,
662
+ statement=statement,
663
+ )
664
+
665
+ return merged_assertion_input
666
+
667
+ def _retrieve_assertion_and_monitor(
668
+ self,
669
+ assertion_input: _AssertionInput,
670
+ ) -> tuple[Optional[Assertion], MonitorUrn, Optional[Monitor]]:
671
+ """Retrieve the assertion and monitor entities from the DataHub instance.
672
+
673
+ Args:
674
+ assertion_input: The validated input to the function.
675
+
676
+ Returns:
677
+ The assertion and monitor entities.
678
+ """
679
+ assert assertion_input.urn is not None, "URN is required"
680
+
681
+ # Get assertion entity
682
+ maybe_assertion_entity: Optional[Assertion] = None
683
+ try:
684
+ entity = self.client.entities.get(assertion_input.urn)
685
+ if entity is not None:
686
+ assert isinstance(entity, Assertion)
687
+ maybe_assertion_entity = entity
688
+ except ItemNotFoundError:
689
+ pass
690
+
691
+ # Get monitor entity
692
+ monitor_urn = Monitor._ensure_id(
693
+ id=(assertion_input.dataset_urn, assertion_input.urn)
694
+ )
695
+ maybe_monitor_entity: Optional[Monitor] = None
696
+ try:
697
+ entity = self.client.entities.get(monitor_urn)
698
+ if entity is not None:
699
+ assert isinstance(entity, Monitor)
700
+ maybe_monitor_entity = entity
701
+ except ItemNotFoundError:
702
+ pass
703
+
704
+ return maybe_assertion_entity, monitor_urn, maybe_monitor_entity
705
+
706
+ def _merge_smart_freshness_input(
707
+ self,
708
+ dataset_urn: Union[str, DatasetUrn],
709
+ urn: Union[str, AssertionUrn],
710
+ display_name: Optional[str],
711
+ enabled: Optional[bool],
712
+ detection_mechanism: DetectionMechanismInputTypes,
713
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
714
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
715
+ training_data_lookback_days: Optional[int],
716
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
717
+ tags: Optional[TagsInputType],
718
+ now_utc: datetime,
719
+ assertion_input: _SmartFreshnessAssertionInput,
720
+ maybe_assertion_entity: Optional[Assertion],
721
+ maybe_monitor_entity: Optional[Monitor],
722
+ existing_assertion: SmartFreshnessAssertion,
723
+ ) -> _SmartFreshnessAssertionInput:
724
+ """Merge the input with the existing assertion and monitor entities.
725
+
726
+ Args:
727
+ dataset_urn: The urn of the dataset to be monitored.
728
+ urn: The urn of the assertion.
729
+ display_name: The display name of the assertion.
730
+ enabled: Whether the assertion is enabled.
731
+ detection_mechanism: The detection mechanism to be used for the assertion.
732
+ sensitivity: The sensitivity to be applied to the assertion.
733
+ exclusion_windows: The exclusion windows to be applied to the assertion.
734
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
735
+ incident_behavior: The incident behavior to be applied to the assertion.
736
+ tags: The tags to be applied to the assertion.
737
+ now_utc: The current UTC time from when the function is called.
738
+ assertion_input: The validated input to the function.
739
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
740
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
741
+ existing_assertion: The existing assertion from the DataHub instance.
742
+
743
+ Returns:
744
+ The merged assertion input.
745
+ """
746
+ merged_assertion_input = _SmartFreshnessAssertionInput(
747
+ urn=urn,
748
+ entity_client=self.client.entities,
749
+ dataset_urn=dataset_urn,
750
+ display_name=_merge_field(
751
+ display_name,
752
+ "display_name",
753
+ assertion_input,
754
+ existing_assertion,
755
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
756
+ ),
757
+ enabled=_merge_field(
758
+ enabled,
759
+ "enabled",
760
+ assertion_input,
761
+ existing_assertion,
762
+ existing_assertion.mode == AssertionMode.ACTIVE
763
+ if existing_assertion
764
+ else None,
765
+ ),
766
+ schedule=_merge_field(
767
+ None, # Don't allow schedule modification in updates - always preserve existing
768
+ "schedule",
769
+ assertion_input,
770
+ existing_assertion,
771
+ existing_assertion.schedule if existing_assertion else None,
772
+ ),
773
+ detection_mechanism=_merge_field(
774
+ detection_mechanism,
775
+ "detection_mechanism",
776
+ assertion_input,
777
+ existing_assertion,
778
+ SmartFreshnessAssertion._get_detection_mechanism( # TODO: Consider moving this conversion to DetectionMechanism.parse(), it could avoid having to use Optional on the return type of SmartFreshnessAssertion.get_detection_mechanism()
779
+ maybe_assertion_entity, maybe_monitor_entity, default=None
780
+ )
781
+ if maybe_assertion_entity and maybe_monitor_entity
782
+ else None,
783
+ ),
784
+ sensitivity=_merge_field(
785
+ sensitivity,
786
+ "sensitivity",
787
+ assertion_input,
788
+ existing_assertion,
789
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
790
+ ),
791
+ exclusion_windows=_merge_field(
792
+ exclusion_windows,
793
+ "exclusion_windows",
794
+ assertion_input,
795
+ existing_assertion,
796
+ maybe_monitor_entity.exclusion_windows
797
+ if maybe_monitor_entity
798
+ else None,
799
+ ),
800
+ training_data_lookback_days=_merge_field(
801
+ training_data_lookback_days,
802
+ "training_data_lookback_days",
803
+ assertion_input,
804
+ existing_assertion,
805
+ maybe_monitor_entity.training_data_lookback_days
806
+ if maybe_monitor_entity
807
+ else None,
808
+ ),
809
+ incident_behavior=_merge_field(
810
+ incident_behavior,
811
+ "incident_behavior",
812
+ assertion_input,
813
+ existing_assertion,
814
+ SmartFreshnessAssertion._get_incident_behavior(maybe_assertion_entity)
815
+ if maybe_assertion_entity
816
+ else None,
817
+ ),
818
+ tags=_merge_field(
819
+ tags,
820
+ "tags",
821
+ assertion_input,
822
+ existing_assertion,
823
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
824
+ ),
825
+ created_by=existing_assertion.created_by
826
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
827
+ created_at=existing_assertion.created_at
828
+ or now_utc, # Override with the existing assertion's created_at or now if not set
829
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
830
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
831
+ )
832
+
833
+ return merged_assertion_input
834
+
835
+ def _merge_freshness_input(
836
+ self,
837
+ dataset_urn: Union[str, DatasetUrn],
838
+ urn: Union[str, AssertionUrn],
839
+ display_name: Optional[str],
840
+ enabled: Optional[bool],
841
+ detection_mechanism: DetectionMechanismInputTypes,
842
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
843
+ tags: Optional[TagsInputType],
844
+ now_utc: datetime,
845
+ assertion_input: _FreshnessAssertionInput,
846
+ maybe_assertion_entity: Optional[Assertion],
847
+ maybe_monitor_entity: Optional[Monitor],
848
+ existing_assertion: FreshnessAssertion,
849
+ schedule: Optional[Union[str, models.CronScheduleClass]],
850
+ freshness_schedule_check_type: Optional[
851
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
852
+ ] = None,
853
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
854
+ ) -> _FreshnessAssertionInput:
855
+ """Merge the input with the existing assertion and monitor entities.
856
+
857
+ Args:
858
+ dataset_urn: The urn of the dataset to be monitored.
859
+ urn: The urn of the assertion.
860
+ display_name: The display name of the assertion.
861
+ enabled: Whether the assertion is enabled.
862
+ incident_behavior: The incident behavior to be applied to the assertion.
863
+ tags: The tags to be applied to the assertion.
864
+ now_utc: The current UTC time from when the function is called.
865
+ assertion_input: The validated input to the function.
866
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
867
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
868
+ existing_assertion: The existing assertion from the DataHub instance.
869
+ schedule: The schedule to be applied to the assertion.
870
+ freshness_schedule_check_type: The freshness schedule check type to be applied to the assertion.
871
+ lookback_window: The lookback window to be applied to the assertion.
872
+
873
+ Returns:
874
+ The merged assertion input.
875
+ """
876
+ merged_assertion_input = _FreshnessAssertionInput(
877
+ urn=urn,
878
+ entity_client=self.client.entities,
879
+ dataset_urn=dataset_urn,
880
+ display_name=_merge_field(
881
+ display_name,
882
+ "display_name",
883
+ assertion_input,
884
+ existing_assertion,
885
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
886
+ ),
887
+ enabled=_merge_field(
888
+ enabled,
889
+ "enabled",
890
+ assertion_input,
891
+ existing_assertion,
892
+ existing_assertion.mode == AssertionMode.ACTIVE
893
+ if existing_assertion
894
+ else None,
895
+ ),
896
+ schedule=_merge_field(
897
+ schedule,
898
+ "schedule",
899
+ assertion_input,
900
+ existing_assertion,
901
+ existing_assertion.schedule if existing_assertion else None,
902
+ ),
903
+ freshness_schedule_check_type=_merge_field(
904
+ freshness_schedule_check_type,
905
+ "freshness_schedule_check_type",
906
+ assertion_input,
907
+ existing_assertion,
908
+ existing_assertion._freshness_schedule_check_type
909
+ if existing_assertion
910
+ else None,
911
+ ),
912
+ lookback_window=_merge_field(
913
+ lookback_window,
914
+ "lookback_window",
915
+ assertion_input,
916
+ existing_assertion,
917
+ existing_assertion.lookback_window if existing_assertion else None,
918
+ ),
919
+ detection_mechanism=_merge_field(
920
+ detection_mechanism,
921
+ "detection_mechanism",
922
+ assertion_input,
923
+ existing_assertion,
924
+ FreshnessAssertion._get_detection_mechanism(
925
+ maybe_assertion_entity, maybe_monitor_entity, default=None
926
+ )
927
+ if maybe_assertion_entity and maybe_monitor_entity
928
+ else None,
929
+ ),
930
+ incident_behavior=_merge_field(
931
+ incident_behavior,
932
+ "incident_behavior",
933
+ assertion_input,
934
+ existing_assertion,
935
+ FreshnessAssertion._get_incident_behavior(maybe_assertion_entity)
936
+ if maybe_assertion_entity
937
+ else None,
938
+ ),
939
+ tags=_merge_field(
940
+ tags,
941
+ "tags",
942
+ assertion_input,
943
+ existing_assertion,
944
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
945
+ ),
946
+ created_by=existing_assertion.created_by
947
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
948
+ created_at=existing_assertion.created_at
949
+ or now_utc, # Override with the existing assertion's created_at or now if not set
950
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
951
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
952
+ )
953
+ return merged_assertion_input
954
+
955
+ def _merge_volume_input(
956
+ self,
957
+ dataset_urn: Union[str, DatasetUrn],
958
+ urn: Union[str, AssertionUrn],
959
+ display_name: Optional[str],
960
+ enabled: Optional[bool],
961
+ detection_mechanism: DetectionMechanismInputTypes,
962
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
963
+ tags: Optional[TagsInputType],
964
+ now_utc: datetime,
965
+ assertion_input: _VolumeAssertionInput,
966
+ maybe_assertion_entity: Optional[Assertion],
967
+ maybe_monitor_entity: Optional[Monitor],
968
+ existing_assertion: VolumeAssertion,
969
+ schedule: Optional[Union[str, models.CronScheduleClass]],
970
+ definition: Optional[_VolumeAssertionDefinitionTypes],
971
+ ) -> _VolumeAssertionInput:
972
+ """Merge the input with the existing assertion and monitor entities.
973
+
974
+ Args:
975
+ dataset_urn: The urn of the dataset to be monitored.
976
+ urn: The urn of the assertion.
977
+ display_name: The display name of the assertion.
978
+ enabled: Whether the assertion is enabled.
979
+ detection_mechanism: The detection mechanism to be used for the assertion.
980
+ incident_behavior: The incident behavior to be applied to the assertion.
981
+ tags: The tags to be applied to the assertion.
982
+ now_utc: The current UTC time from when the function is called.
983
+ assertion_input: The validated input to the function.
984
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
985
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
986
+ existing_assertion: The existing assertion from the DataHub instance.
987
+ schedule: The schedule to be applied to the assertion.
988
+ definition: The volume assertion definition to be applied to the assertion.
989
+
990
+ Returns:
991
+ The merged assertion input.
992
+ """
993
+ merged_assertion_input = _VolumeAssertionInput(
994
+ urn=urn,
995
+ entity_client=self.client.entities,
996
+ dataset_urn=dataset_urn,
997
+ display_name=_merge_field(
998
+ display_name,
999
+ "display_name",
1000
+ assertion_input,
1001
+ existing_assertion,
1002
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
1003
+ ),
1004
+ enabled=_merge_field(
1005
+ enabled,
1006
+ "enabled",
1007
+ assertion_input,
1008
+ existing_assertion,
1009
+ existing_assertion.mode == AssertionMode.ACTIVE
1010
+ if existing_assertion
1011
+ else None,
1012
+ ),
1013
+ schedule=_merge_field(
1014
+ schedule,
1015
+ "schedule",
1016
+ assertion_input,
1017
+ existing_assertion,
1018
+ existing_assertion.schedule if existing_assertion else None,
1019
+ ),
1020
+ detection_mechanism=_merge_field(
1021
+ detection_mechanism,
1022
+ "detection_mechanism",
1023
+ assertion_input,
1024
+ existing_assertion,
1025
+ VolumeAssertion._get_detection_mechanism(
1026
+ maybe_assertion_entity, maybe_monitor_entity, default=None
1027
+ )
1028
+ if maybe_assertion_entity and maybe_monitor_entity
1029
+ else None,
1030
+ ),
1031
+ incident_behavior=_merge_field(
1032
+ incident_behavior,
1033
+ "incident_behavior",
1034
+ assertion_input,
1035
+ existing_assertion,
1036
+ VolumeAssertion._get_incident_behavior(maybe_assertion_entity)
1037
+ if maybe_assertion_entity
1038
+ else None,
1039
+ ),
1040
+ tags=_merge_field(
1041
+ tags,
1042
+ "tags",
1043
+ assertion_input,
1044
+ existing_assertion,
1045
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
1046
+ ),
1047
+ definition=_merge_field(
1048
+ definition,
1049
+ "definition",
1050
+ assertion_input,
1051
+ existing_assertion,
1052
+ existing_assertion.definition if existing_assertion else None,
1053
+ ),
1054
+ created_by=existing_assertion.created_by
1055
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
1056
+ created_at=existing_assertion.created_at
1057
+ or now_utc, # Override with the existing assertion's created_at or now if not set
1058
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
1059
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
1060
+ )
1061
+ return merged_assertion_input
1062
+
1063
+ def _merge_sql_input(
1064
+ self,
1065
+ dataset_urn: Union[str, DatasetUrn],
1066
+ urn: Union[str, AssertionUrn],
1067
+ display_name: Optional[str],
1068
+ enabled: Optional[bool],
1069
+ criteria: SqlAssertionCriteria,
1070
+ statement: str,
1071
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
1072
+ tags: Optional[TagsInputType],
1073
+ now_utc: datetime,
1074
+ assertion_input: _SqlAssertionInput,
1075
+ maybe_assertion_entity: Optional[Assertion],
1076
+ # not used: maybe_monitor_entity: Optional[Monitor], as schedule is already set in existing_assertion
1077
+ existing_assertion: SqlAssertion,
1078
+ schedule: Optional[Union[str, models.CronScheduleClass]],
1079
+ ) -> _SqlAssertionInput:
1080
+ """Merge the input with the existing assertion and monitor entities.
1081
+
1082
+ Args:
1083
+ dataset_urn: The urn of the dataset to be monitored.
1084
+ urn: The urn of the assertion.
1085
+ display_name: The display name of the assertion.
1086
+ enabled: Whether the assertion is enabled.
1087
+ criteria: The criteria of the assertion.
1088
+ statement: The statement of the assertion.
1089
+ incident_behavior: The incident behavior to be applied to the assertion.
1090
+ tags: The tags to be applied to the assertion.
1091
+ now_utc: The current UTC time from when the function is called.
1092
+ assertion_input: The validated input to the function.
1093
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
1094
+ existing_assertion: The existing assertion from the DataHub instance.
1095
+ schedule: The schedule to be applied to the assertion.
1096
+
1097
+ Returns:
1098
+ The merged assertion input.
1099
+ """
1100
+ merged_assertion_input = _SqlAssertionInput(
1101
+ urn=urn,
1102
+ entity_client=self.client.entities,
1103
+ dataset_urn=dataset_urn,
1104
+ display_name=_merge_field(
1105
+ display_name,
1106
+ "display_name",
1107
+ assertion_input,
1108
+ existing_assertion,
1109
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
1110
+ ),
1111
+ enabled=_merge_field(
1112
+ enabled,
1113
+ "enabled",
1114
+ assertion_input,
1115
+ existing_assertion,
1116
+ existing_assertion.mode == AssertionMode.ACTIVE
1117
+ if existing_assertion
1118
+ else None,
1119
+ ),
1120
+ schedule=_merge_field(
1121
+ schedule,
1122
+ "schedule",
1123
+ assertion_input,
1124
+ existing_assertion,
1125
+ # TODO should this use maybe_monitor_entity.schedule?
1126
+ existing_assertion.schedule if existing_assertion else None,
1127
+ ),
1128
+ criteria=_merge_field(
1129
+ criteria,
1130
+ "criteria",
1131
+ assertion_input,
1132
+ existing_assertion,
1133
+ existing_assertion._criteria if existing_assertion else None,
1134
+ ),
1135
+ statement=_merge_field(
1136
+ statement,
1137
+ "statement",
1138
+ assertion_input,
1139
+ existing_assertion,
1140
+ existing_assertion.statement if existing_assertion else None,
1141
+ ),
1142
+ incident_behavior=_merge_field(
1143
+ incident_behavior,
1144
+ "incident_behavior",
1145
+ assertion_input,
1146
+ existing_assertion,
1147
+ SqlAssertion._get_incident_behavior(maybe_assertion_entity)
1148
+ if maybe_assertion_entity
1149
+ else None,
1150
+ ),
1151
+ tags=_merge_field(
1152
+ tags,
1153
+ "tags",
1154
+ assertion_input,
1155
+ existing_assertion,
1156
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
1157
+ ),
1158
+ created_by=existing_assertion.created_by
1159
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
1160
+ created_at=existing_assertion.created_at
1161
+ or now_utc, # Override with the existing assertion's created_at or now if not set
1162
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
1163
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
1164
+ )
1165
+ return merged_assertion_input
1166
+
1167
+ def _merge_smart_volume_input(
1168
+ self,
1169
+ dataset_urn: Union[str, DatasetUrn],
1170
+ urn: Union[str, AssertionUrn],
1171
+ display_name: Optional[str],
1172
+ enabled: Optional[bool],
1173
+ detection_mechanism: DetectionMechanismInputTypes,
1174
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
1175
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
1176
+ training_data_lookback_days: Optional[int],
1177
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
1178
+ tags: Optional[TagsInputType],
1179
+ schedule: Optional[Union[str, models.CronScheduleClass]],
1180
+ now_utc: datetime,
1181
+ assertion_input: _SmartVolumeAssertionInput,
1182
+ maybe_assertion_entity: Optional[Assertion],
1183
+ maybe_monitor_entity: Optional[Monitor],
1184
+ existing_assertion: SmartVolumeAssertion,
1185
+ ) -> _SmartVolumeAssertionInput:
1186
+ """Merge the input with the existing assertion and monitor entities.
1187
+
1188
+ Args:
1189
+ dataset_urn: The urn of the dataset to be monitored.
1190
+ urn: The urn of the assertion.
1191
+ display_name: The display name of the assertion.
1192
+ enabled: Whether the assertion is enabled.
1193
+ detection_mechanism: The detection mechanism to be used for the assertion.
1194
+ sensitivity: The sensitivity to be applied to the assertion.
1195
+ exclusion_windows: The exclusion windows to be applied to the assertion.
1196
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
1197
+ incident_behavior: The incident behavior to be applied to the assertion.
1198
+ tags: The tags to be applied to the assertion.
1199
+ now_utc: The current UTC time from when the function is called.
1200
+ assertion_input: The validated input to the function.
1201
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
1202
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
1203
+ existing_assertion: The existing assertion from the DataHub instance.
1204
+
1205
+ Returns:
1206
+ The merged assertion input.
1207
+ """
1208
+ merged_assertion_input = _SmartVolumeAssertionInput(
1209
+ urn=urn,
1210
+ entity_client=self.client.entities,
1211
+ dataset_urn=dataset_urn,
1212
+ display_name=_merge_field(
1213
+ display_name,
1214
+ "display_name",
1215
+ assertion_input,
1216
+ existing_assertion,
1217
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
1218
+ ),
1219
+ enabled=_merge_field(
1220
+ enabled,
1221
+ "enabled",
1222
+ assertion_input,
1223
+ existing_assertion,
1224
+ existing_assertion.mode == AssertionMode.ACTIVE
1225
+ if existing_assertion
1226
+ else None,
1227
+ ),
1228
+ schedule=_merge_field(
1229
+ schedule,
1230
+ "schedule",
1231
+ assertion_input,
1232
+ existing_assertion,
1233
+ existing_assertion.schedule if existing_assertion else None,
1234
+ ),
1235
+ detection_mechanism=_merge_field(
1236
+ detection_mechanism,
1237
+ "detection_mechanism",
1238
+ assertion_input,
1239
+ existing_assertion,
1240
+ SmartVolumeAssertion._get_detection_mechanism(
1241
+ maybe_assertion_entity, maybe_monitor_entity, default=None
1242
+ )
1243
+ if maybe_assertion_entity and maybe_monitor_entity
1244
+ else None,
1245
+ ),
1246
+ sensitivity=_merge_field(
1247
+ sensitivity,
1248
+ "sensitivity",
1249
+ assertion_input,
1250
+ existing_assertion,
1251
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
1252
+ ),
1253
+ exclusion_windows=_merge_field(
1254
+ exclusion_windows,
1255
+ "exclusion_windows",
1256
+ assertion_input,
1257
+ existing_assertion,
1258
+ maybe_monitor_entity.exclusion_windows
1259
+ if maybe_monitor_entity
1260
+ else None,
1261
+ ),
1262
+ training_data_lookback_days=_merge_field(
1263
+ training_data_lookback_days,
1264
+ "training_data_lookback_days",
1265
+ assertion_input,
1266
+ existing_assertion,
1267
+ maybe_monitor_entity.training_data_lookback_days
1268
+ if maybe_monitor_entity
1269
+ else None,
1270
+ ),
1271
+ incident_behavior=_merge_field(
1272
+ incident_behavior,
1273
+ "incident_behavior",
1274
+ assertion_input,
1275
+ existing_assertion,
1276
+ SmartVolumeAssertion._get_incident_behavior(maybe_assertion_entity)
1277
+ if maybe_assertion_entity
1278
+ else None,
1279
+ ),
1280
+ tags=_merge_field(
1281
+ tags,
1282
+ "tags",
1283
+ assertion_input,
1284
+ existing_assertion,
1285
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
1286
+ ),
1287
+ created_by=existing_assertion.created_by
1288
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
1289
+ created_at=existing_assertion.created_at
1290
+ or now_utc, # Override with the existing assertion's created_at or now if not set
1291
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
1292
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
1293
+ )
1294
+
1295
+ return merged_assertion_input
1296
+
1297
+ def _create_smart_freshness_assertion(
1298
+ self,
1299
+ *,
1300
+ dataset_urn: Union[str, DatasetUrn],
1301
+ display_name: Optional[str] = None,
1302
+ enabled: bool = True,
1303
+ detection_mechanism: DetectionMechanismInputTypes = None,
1304
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1305
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1306
+ training_data_lookback_days: Optional[int] = None,
1307
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
1308
+ tags: Optional[TagsInputType] = None,
1309
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1310
+ ) -> SmartFreshnessAssertion:
1311
+ """Create a smart freshness assertion.
1312
+
1313
+ Note: keyword arguments are required.
1314
+
1315
+ The created assertion will use the default hourly schedule ("0 * * * *").
1316
+
1317
+ Args:
1318
+ dataset_urn: The urn of the dataset to be monitored.
1319
+ display_name: The display name of the assertion. If not provided, a random display
1320
+ name will be generated.
1321
+ enabled: Whether the assertion is enabled. Defaults to True.
1322
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1323
+ schema is recommended. Valid values are:
1324
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1325
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1326
+ - {
1327
+ "type": "last_modified_column",
1328
+ "column_name": "last_modified",
1329
+ "additional_filter": "last_modified > '2021-01-01'",
1330
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1331
+ additional_filter='last_modified > 2021-01-01')
1332
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1333
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1334
+ - "low" or InferenceSensitivity.LOW
1335
+ - "medium" or InferenceSensitivity.MEDIUM
1336
+ - "high" or InferenceSensitivity.HIGH
1337
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1338
+ fixed range exclusion windows are supported. Valid values are:
1339
+ - from datetime.datetime objects: {
1340
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1341
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1342
+ }
1343
+ - from string datetimes: {
1344
+ "start": "2025-01-01T00:00:00",
1345
+ "end": "2025-01-02T00:00:00",
1346
+ }
1347
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1348
+ start=datetime(2025, 1, 1, 0, 0, 0),
1349
+ end=datetime(2025, 1, 2, 0, 0, 0)
1350
+ )
1351
+ training_data_lookback_days: The training data lookback days to be applied to the
1352
+ assertion as an integer.
1353
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1354
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1355
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1356
+ - A list of the above values (strings or enum values)
1357
+ - None (default behavior)
1358
+ tags: The tags to be applied to the assertion. Valid values are:
1359
+ - a list of strings (strings will be converted to TagUrn objects)
1360
+ - a list of TagUrn objects
1361
+ - a list of TagAssociationClass objects
1362
+ created_by: Optional urn of the user who created the assertion. The format is
1363
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1364
+ The default is the datahub system user.
1365
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1366
+
1367
+ Returns:
1368
+ SmartFreshnessAssertion: The created assertion.
1369
+ """
1370
+ _print_experimental_warning()
1371
+ now_utc = datetime.now(timezone.utc)
1372
+ if created_by is None:
1373
+ logger.warning(
1374
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1375
+ )
1376
+ created_by = DEFAULT_CREATED_BY
1377
+ assertion_input = _SmartFreshnessAssertionInput(
1378
+ urn=None,
1379
+ entity_client=self.client.entities,
1380
+ dataset_urn=dataset_urn,
1381
+ display_name=display_name,
1382
+ enabled=enabled,
1383
+ detection_mechanism=detection_mechanism,
1384
+ sensitivity=sensitivity,
1385
+ exclusion_windows=exclusion_windows,
1386
+ training_data_lookback_days=training_data_lookback_days,
1387
+ incident_behavior=incident_behavior,
1388
+ tags=tags,
1389
+ created_by=created_by,
1390
+ created_at=now_utc,
1391
+ updated_by=created_by,
1392
+ updated_at=now_utc,
1393
+ )
1394
+ assertion_entity, monitor_entity = (
1395
+ assertion_input.to_assertion_and_monitor_entities()
1396
+ )
1397
+ # If assertion creation fails, we won't try to create the monitor
1398
+ self.client.entities.create(assertion_entity)
1399
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1400
+ # try:
1401
+ self.client.entities.create(monitor_entity)
1402
+ # except Exception as e:
1403
+ # logger.error(f"Error creating monitor: {e}")
1404
+ # self.client.entities.delete(assertion_entity)
1405
+ # raise e
1406
+ return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1407
+
1408
+ def _create_smart_volume_assertion(
1409
+ self,
1410
+ *,
1411
+ dataset_urn: Union[str, DatasetUrn],
1412
+ display_name: Optional[str] = None,
1413
+ enabled: bool = True,
1414
+ detection_mechanism: DetectionMechanismInputTypes = None,
1415
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1416
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1417
+ training_data_lookback_days: Optional[int] = None,
1418
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
1419
+ tags: Optional[TagsInputType] = None,
1420
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1421
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1422
+ ) -> SmartVolumeAssertion:
1423
+ """Create a smart volume assertion.
1424
+
1425
+ Note: keyword arguments are required.
1426
+
1427
+ Args:
1428
+ dataset_urn: The urn of the dataset to be monitored.
1429
+ display_name: The display name of the assertion. If not provided, a random display
1430
+ name will be generated.
1431
+ enabled: Whether the assertion is enabled. Defaults to True.
1432
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1433
+ schema is recommended. Valid values are:
1434
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1435
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1436
+ - {
1437
+ "type": "last_modified_column",
1438
+ "column_name": "last_modified",
1439
+ "additional_filter": "last_modified > '2021-01-01'",
1440
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1441
+ additional_filter='last_modified > 2021-01-01')
1442
+ - {
1443
+ "type": "high_watermark_column",
1444
+ "column_name": "id",
1445
+ "additional_filter": "id > 1000",
1446
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1447
+ additional_filter='id > 1000')
1448
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1449
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1450
+ - "low" or InferenceSensitivity.LOW
1451
+ - "medium" or InferenceSensitivity.MEDIUM
1452
+ - "high" or InferenceSensitivity.HIGH
1453
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1454
+ fixed range exclusion windows are supported. Valid values are:
1455
+ - from datetime.datetime objects: {
1456
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1457
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1458
+ }
1459
+ - from string datetimes: {
1460
+ "start": "2025-01-01T00:00:00",
1461
+ "end": "2025-01-02T00:00:00",
1462
+ }
1463
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1464
+ start=datetime(2025, 1, 1, 0, 0, 0),
1465
+ end=datetime(2025, 1, 2, 0, 0, 0)
1466
+ )
1467
+ training_data_lookback_days: The training data lookback days to be applied to the
1468
+ assertion as an integer.
1469
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1470
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1471
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1472
+ - A list of the above values (strings or enum values)
1473
+ - None (default behavior)
1474
+ tags: The tags to be applied to the assertion. Valid values are:
1475
+ - a list of strings (strings will be converted to TagUrn objects)
1476
+ - a list of TagUrn objects
1477
+ - a list of TagAssociationClass objects
1478
+ created_by: Optional urn of the user who created the assertion. The format is
1479
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1480
+ The default is the datahub system user.
1481
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1482
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1483
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1484
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1485
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1486
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1487
+
1488
+ Returns:
1489
+ SmartVolumeAssertion: The created assertion.
1490
+ """
1491
+ _print_experimental_warning()
1492
+ now_utc = datetime.now(timezone.utc)
1493
+ if created_by is None:
1494
+ logger.warning(
1495
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1496
+ )
1497
+ created_by = DEFAULT_CREATED_BY
1498
+ assertion_input = _SmartVolumeAssertionInput(
1499
+ urn=None,
1500
+ entity_client=self.client.entities,
1501
+ dataset_urn=dataset_urn,
1502
+ display_name=display_name,
1503
+ enabled=enabled,
1504
+ detection_mechanism=detection_mechanism,
1505
+ sensitivity=sensitivity,
1506
+ exclusion_windows=exclusion_windows,
1507
+ training_data_lookback_days=training_data_lookback_days,
1508
+ incident_behavior=incident_behavior,
1509
+ tags=tags,
1510
+ created_by=created_by,
1511
+ created_at=now_utc,
1512
+ updated_by=created_by,
1513
+ updated_at=now_utc,
1514
+ schedule=schedule,
1515
+ )
1516
+ assertion_entity, monitor_entity = (
1517
+ assertion_input.to_assertion_and_monitor_entities()
1518
+ )
1519
+ # If assertion creation fails, we won't try to create the monitor
1520
+ self.client.entities.create(assertion_entity)
1521
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1522
+ # try:
1523
+ self.client.entities.create(monitor_entity)
1524
+ # except Exception as e:
1525
+ # logger.error(f"Error creating monitor: {e}")
1526
+ # self.client.entities.delete(assertion_entity)
1527
+ # raise e
1528
+ return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
1529
+
1530
+ def _create_freshness_assertion(
1531
+ self,
1532
+ *,
1533
+ dataset_urn: Union[str, DatasetUrn],
1534
+ display_name: Optional[str] = None,
1535
+ enabled: bool = True,
1536
+ freshness_schedule_check_type: Optional[
1537
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
1538
+ ] = None,
1539
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
1540
+ detection_mechanism: DetectionMechanismInputTypes = None,
1541
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
1542
+ tags: Optional[TagsInputType] = None,
1543
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1544
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1545
+ ) -> FreshnessAssertion:
1546
+ """Create a freshness assertion.
1547
+
1548
+ Note: keyword arguments are required.
1549
+
1550
+ The created assertion will use the default daily schedule ("0 0 * * *").
1551
+
1552
+ Args:
1553
+ dataset_urn: The urn of the dataset to be monitored.
1554
+ display_name: The display name of the assertion. If not provided, a random display
1555
+ name will be generated.
1556
+ enabled: Whether the assertion is enabled. Defaults to True.
1557
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1558
+ schema is recommended. Valid values are:
1559
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1560
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1561
+ - {
1562
+ "type": "last_modified_column",
1563
+ "column_name": "last_modified",
1564
+ "additional_filter": "last_modified > '2021-01-01'",
1565
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1566
+ additional_filter='last_modified > 2021-01-01')
1567
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1568
+ freshness_schedule_check_type: The freshness schedule check type to be applied to the assertion. Valid values are:
1569
+ - "since_the_last_check" or models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK
1570
+ - "cron" or models.FreshnessAssertionScheduleTypeClass.CRON
1571
+ lookback_window: The lookback window to be applied to the assertion. Valid values are:
1572
+ - from models.TimeWindowSize objects: models.TimeWindowSizeClass(
1573
+ unit=models.CalendarIntervalClass.DAY,
1574
+ multiple=1)
1575
+ - from TimeWindowSize objects: TimeWindowSize(unit='DAY', multiple=1)
1576
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1577
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1578
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1579
+ - A list of the above values (strings or enum values)
1580
+ - None (default behavior)
1581
+ tags: The tags to be applied to the assertion. Valid values are:
1582
+ - a list of strings (strings will be converted to TagUrn objects)
1583
+ - a list of TagUrn objects
1584
+ - a list of TagAssociationClass objects
1585
+ created_by: Optional urn of the user who created the assertion. The format is
1586
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1587
+ The default is the datahub system user.
1588
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1589
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1590
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1591
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1592
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1593
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1594
+
1595
+ Returns:
1596
+ FreshnessAssertion: The created assertion.
1597
+ """
1598
+ _print_experimental_warning()
1599
+ now_utc = datetime.now(timezone.utc)
1600
+ if created_by is None:
1601
+ logger.warning(
1602
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1603
+ )
1604
+ created_by = DEFAULT_CREATED_BY
1605
+ assertion_input = _FreshnessAssertionInput(
1606
+ urn=None,
1607
+ entity_client=self.client.entities,
1608
+ dataset_urn=dataset_urn,
1609
+ display_name=display_name,
1610
+ enabled=enabled,
1611
+ detection_mechanism=detection_mechanism,
1612
+ freshness_schedule_check_type=freshness_schedule_check_type,
1613
+ lookback_window=lookback_window,
1614
+ incident_behavior=incident_behavior,
1615
+ tags=tags,
1616
+ created_by=created_by,
1617
+ created_at=now_utc,
1618
+ updated_by=created_by,
1619
+ updated_at=now_utc,
1620
+ schedule=schedule,
1621
+ )
1622
+ assertion_entity, monitor_entity = (
1623
+ assertion_input.to_assertion_and_monitor_entities()
1624
+ )
1625
+ # If assertion creation fails, we won't try to create the monitor
1626
+ self.client.entities.create(assertion_entity)
1627
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1628
+ # try:
1629
+ self.client.entities.create(monitor_entity)
1630
+ # except Exception as e:
1631
+ # logger.error(f"Error creating monitor: {e}")
1632
+ # self.client.entities.delete(assertion_entity)
1633
+ # raise e
1634
+ return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1635
+
1636
+ def _create_volume_assertion(
1637
+ self,
1638
+ *,
1639
+ dataset_urn: Union[str, DatasetUrn],
1640
+ display_name: Optional[str] = None,
1641
+ enabled: bool = True,
1642
+ detection_mechanism: DetectionMechanismInputTypes = None,
1643
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
1644
+ tags: Optional[TagsInputType] = None,
1645
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1646
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1647
+ criteria_type: Union[str, VolumeAssertionDefinitionType],
1648
+ criteria_change_type: Optional[
1649
+ Union[str, VolumeAssertionDefinitionChangeKind]
1650
+ ] = None,
1651
+ criteria_operator: Union[str, VolumeAssertionOperator],
1652
+ criteria_parameters: VolumeAssertionDefinitionParameters,
1653
+ ) -> VolumeAssertion:
1654
+ """Create a volume assertion.
1655
+
1656
+ Note: keyword arguments are required.
1657
+
1658
+ The created assertion will use the default daily schedule ("0 0 * * *").
1659
+
1660
+ Args:
1661
+ dataset_urn: The urn of the dataset to be monitored.
1662
+ display_name: The display name of the assertion. If not provided, a random display
1663
+ name will be generated.
1664
+ enabled: Whether the assertion is enabled. Defaults to True.
1665
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1666
+ schema is recommended. Valid values are:
1667
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1668
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1669
+ - {
1670
+ "type": "last_modified_column",
1671
+ "column_name": "last_modified",
1672
+ "additional_filter": "last_modified > '2021-01-01'",
1673
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1674
+ additional_filter='last_modified > 2021-01-01')
1675
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1676
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1677
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1678
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1679
+ - A list of the above values (strings or enum values)
1680
+ - None (default behavior)
1681
+ tags: The tags to be applied to the assertion. Valid values are:
1682
+ - a list of strings (strings will be converted to TagUrn objects)
1683
+ - a list of TagUrn objects
1684
+ - a list of TagAssociationClass objects
1685
+ created_by: Optional urn of the user who created the assertion. The format is
1686
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1687
+ The default is the datahub system user.
1688
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1689
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1690
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1691
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1692
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1693
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1694
+ criteria_type: The type of volume assertion. Must be either VolumeAssertionDefinitionType.ROW_COUNT_TOTAL or VolumeAssertionDefinitionType.ROW_COUNT_CHANGE.
1695
+ Raw string values are also accepted: "ROW_COUNT_TOTAL" or "ROW_COUNT_CHANGE".
1696
+ criteria_change_type: Required when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_CHANGE. Must be either VolumeAssertionDefinitionChangeKind.ABSOLUTE
1697
+ or VolumeAssertionDefinitionChangeKind.PERCENT. Optional (ignored) when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_TOTAL.
1698
+ Raw string values are also accepted: "ABSOLUTE" or "PERCENTAGE".
1699
+ criteria_operator: The comparison operator for the assertion. Must be a VolumeAssertionOperator value:
1700
+ - VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO
1701
+ - VolumeAssertionOperator.LESS_THAN_OR_EQUAL_TO
1702
+ - VolumeAssertionOperator.BETWEEN
1703
+ Raw string values are also accepted: "GREATER_THAN_OR_EQUAL_TO", "LESS_THAN_OR_EQUAL_TO", "BETWEEN".
1704
+ criteria_parameters: The parameters for the assertion. For single-value operators
1705
+ (GREATER_THAN_OR_EQUAL_TO, LESS_THAN_OR_EQUAL_TO), provide a single number.
1706
+ For BETWEEN operator, provide a tuple of two numbers (min_value, max_value).
1707
+
1708
+ Examples:
1709
+ - For single value: 100 or 50.5
1710
+ - For BETWEEN: (10, 100) or (5.0, 15.5)
1711
+
1712
+ Returns:
1713
+ VolumeAssertion: The created assertion.
1714
+ """
1715
+ _print_experimental_warning()
1716
+ now_utc = datetime.now(timezone.utc)
1717
+ if created_by is None:
1718
+ logger.warning(
1719
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1720
+ )
1721
+ created_by = DEFAULT_CREATED_BY
1722
+
1723
+ # Create definition from individual criteria parameters
1724
+ # The dictionary object will be fully validated down in the _VolumeAssertionInput class
1725
+ definition: dict[str, Any] = {
1726
+ "type": criteria_type,
1727
+ "operator": criteria_operator,
1728
+ "parameters": criteria_parameters,
1729
+ }
1730
+ if criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE:
1731
+ definition["kind"] = criteria_change_type
1732
+
1733
+ assertion_input = _VolumeAssertionInput(
1734
+ urn=None,
1735
+ entity_client=self.client.entities,
1736
+ dataset_urn=dataset_urn,
1737
+ display_name=display_name,
1738
+ enabled=enabled,
1739
+ detection_mechanism=detection_mechanism,
1740
+ incident_behavior=incident_behavior,
1741
+ tags=tags,
1742
+ created_by=created_by,
1743
+ created_at=now_utc,
1744
+ updated_by=created_by,
1745
+ updated_at=now_utc,
1746
+ schedule=schedule,
1747
+ definition=definition,
1748
+ )
1749
+ assertion_entity, monitor_entity = (
1750
+ assertion_input.to_assertion_and_monitor_entities()
1751
+ )
1752
+ # If assertion creation fails, we won't try to create the monitor
1753
+ self.client.entities.create(assertion_entity)
1754
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1755
+ # try:
1756
+ self.client.entities.create(monitor_entity)
1757
+ # except Exception as e:
1758
+ # logger.error(f"Error creating monitor: {e}")
1759
+ # self.client.entities.delete(assertion_entity)
1760
+ # raise e
1761
+ return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
1762
+
1763
+ def _create_sql_assertion(
1764
+ self,
1765
+ *,
1766
+ dataset_urn: Union[str, DatasetUrn],
1767
+ display_name: Optional[str] = None,
1768
+ enabled: bool = True,
1769
+ criteria_type: Union[SqlAssertionType, str],
1770
+ criteria_change_type: Optional[Union[SqlAssertionChangeType, str]] = None,
1771
+ criteria_operator: Union[SqlAssertionOperator, str],
1772
+ criteria_parameters: Union[
1773
+ Union[float, int], tuple[Union[float, int], Union[float, int]]
1774
+ ],
1775
+ statement: str,
1776
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
1777
+ tags: Optional[TagsInputType],
1778
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1779
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1780
+ ) -> SqlAssertion:
1781
+ """Create a sql assertion.
1782
+
1783
+ Args:
1784
+ dataset_urn: The urn of the dataset to be monitored.
1785
+ display_name: The display name of the assertion. If not provided, a random display
1786
+ name will be generated.
1787
+ enabled: Whether the assertion is enabled. Defaults to True.
1788
+ criteria_type: The type of sql assertion. Valid values are:
1789
+ - "METRIC" -> Looks at the current value of the metric.
1790
+ - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
1791
+ criteria_change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
1792
+ - "ABSOLUTE" -> Looks at the absolute change in the metric.
1793
+ - "PERCENTAGE" -> Looks at the percentage change in the metric.
1794
+ criteria_operator: The operator to be used for the assertion. Valid values are:
1795
+ - "GREATER_THAN" -> The metric value is greater than the threshold.
1796
+ - "LESS_THAN" -> The metric value is less than the threshold.
1797
+ - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
1798
+ - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
1799
+ - "EQUAL_TO" -> The metric value is equal to the threshold.
1800
+ - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
1801
+ - "BETWEEN" -> The metric value is between the two thresholds.
1802
+ criteria_parameters: The parameters to be used for the assertion. This can be a single value or a tuple range.
1803
+ - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
1804
+ - If the operator is not "BETWEEN", the value is a single value.
1805
+ statement: The statement to be used for the assertion.
1806
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1807
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1808
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1809
+ - A list of the above values (strings or enum values)
1810
+ - None (default behavior)
1811
+ tags: The tags to be applied to the assertion. Valid values are:
1812
+ - a list of strings (strings will be converted to TagUrn objects)
1813
+ - a list of TagUrn objects
1814
+ - a list of TagAssociationClass objects
1815
+ created_by: Optional urn of the user who created the assertion. The format is
1816
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1817
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1818
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1819
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1820
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1821
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1822
+
1823
+ Returns:
1824
+ SqlAssertion: The created assertion.
1825
+ """
1826
+ _print_experimental_warning()
1827
+ now_utc = datetime.now(timezone.utc)
1828
+ if created_by is None:
1829
+ logger.warning(
1830
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1831
+ )
1832
+ created_by = DEFAULT_CREATED_BY
1833
+ criteria = SqlAssertionCriteria(
1834
+ type=criteria_type,
1835
+ change_type=criteria_change_type,
1836
+ operator=criteria_operator,
1837
+ parameters=criteria_parameters,
1838
+ )
1839
+ assertion_input = _SqlAssertionInput(
1840
+ urn=None,
1841
+ entity_client=self.client.entities,
1842
+ dataset_urn=dataset_urn,
1843
+ display_name=display_name,
1844
+ enabled=enabled,
1845
+ criteria=criteria,
1846
+ statement=statement,
1847
+ incident_behavior=incident_behavior,
1848
+ tags=tags,
1849
+ created_by=created_by,
1850
+ created_at=now_utc,
1851
+ updated_by=created_by,
1852
+ updated_at=now_utc,
1853
+ schedule=schedule,
1854
+ )
1855
+ assertion_entity, monitor_entity = (
1856
+ assertion_input.to_assertion_and_monitor_entities()
1857
+ )
1858
+ # If assertion creation fails, we won't try to create the monitor
1859
+ self.client.entities.create(assertion_entity)
1860
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1861
+ # try:
1862
+ self.client.entities.create(monitor_entity)
1863
+ # except Exception as e:
1864
+ # logger.error(f"Error creating monitor: {e}")
1865
+ # self.client.entities.delete(assertion_entity)
1866
+ # raise e
1867
+ return SqlAssertion._from_entities(assertion_entity, monitor_entity)
1868
+
1869
+ def sync_smart_volume_assertion(
1870
+ self,
1871
+ *,
1872
+ dataset_urn: Union[str, DatasetUrn],
1873
+ urn: Optional[Union[str, AssertionUrn]] = None,
1874
+ display_name: Optional[str] = None,
1875
+ enabled: Optional[bool] = None,
1876
+ detection_mechanism: DetectionMechanismInputTypes = None,
1877
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1878
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1879
+ training_data_lookback_days: Optional[int] = None,
1880
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
1881
+ tags: Optional[TagsInputType] = None,
1882
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
1883
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1884
+ ) -> SmartVolumeAssertion:
1885
+ """Upsert and merge a smart volume assertion.
1886
+
1887
+ Note:
1888
+ Keyword arguments are required.
1889
+
1890
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
1891
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
1892
+ be updated if the input value is not None. If the input value is None, the existing value
1893
+ will be preserved. If the input value can be un-set (e.g. by passing an empty list or
1894
+ empty string), it will be unset.
1895
+
1896
+ Schedule behavior:
1897
+ - Create case: Uses default hourly schedule ("0 * * * *") or provided schedule
1898
+ - Update case: Schedule is updated if provided, otherwise existing schedule is preserved.
1899
+
1900
+ Args:
1901
+ dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
1902
+ urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
1903
+ display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
1904
+ enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
1905
+ detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Information schema is recommended. Valid values are:
1906
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1907
+ - {"type": "query", "additional_filter": "value > 1000"} or DetectionMechanism.QUERY(additional_filter='value > 1000')
1908
+ - "dataset_profile" or DetectionMechanism.DATASET_PROFILE
1909
+ sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
1910
+ exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported.
1911
+ training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
1912
+ incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
1913
+ tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
1914
+ updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
1915
+ schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
1916
+
1917
+ Returns:
1918
+ SmartVolumeAssertion: The created or updated assertion.
1919
+ """
1920
+ _print_experimental_warning()
1921
+ now_utc = datetime.now(timezone.utc)
1922
+
1923
+ if updated_by is None:
1924
+ logger.warning(
1925
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1926
+ )
1927
+ updated_by = DEFAULT_CREATED_BY
1928
+
1929
+ # 1. If urn is not set, create a new assertion
1930
+ if urn is None:
1931
+ logger.info("URN is not set, creating a new assertion")
1932
+ return self._create_smart_volume_assertion(
1933
+ dataset_urn=dataset_urn,
1934
+ display_name=display_name,
1935
+ enabled=enabled if enabled is not None else True,
1936
+ detection_mechanism=detection_mechanism,
1937
+ sensitivity=sensitivity,
1938
+ exclusion_windows=exclusion_windows,
1939
+ training_data_lookback_days=training_data_lookback_days,
1940
+ incident_behavior=incident_behavior,
1941
+ tags=tags,
1942
+ created_by=updated_by,
1943
+ schedule=schedule,
1944
+ )
1945
+
1946
+ # 2. If urn is set, first validate the input:
1947
+ assertion_input = _SmartVolumeAssertionInput(
1948
+ urn=urn,
1949
+ entity_client=self.client.entities,
1950
+ dataset_urn=dataset_urn,
1951
+ display_name=display_name,
1952
+ detection_mechanism=detection_mechanism,
1953
+ sensitivity=sensitivity,
1954
+ exclusion_windows=exclusion_windows,
1955
+ training_data_lookback_days=training_data_lookback_days,
1956
+ incident_behavior=incident_behavior,
1957
+ tags=tags,
1958
+ created_by=updated_by, # This will be overridden by the actual created_by
1959
+ created_at=now_utc, # This will be overridden by the actual created_at
1960
+ updated_by=updated_by,
1961
+ updated_at=now_utc,
1962
+ schedule=schedule,
1963
+ )
1964
+
1965
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
1966
+ # if the assertion does not exist:
1967
+ merged_assertion_input_or_created_assertion = (
1968
+ self._retrieve_and_merge_volume_assertion_and_monitor(
1969
+ assertion_input=assertion_input,
1970
+ dataset_urn=dataset_urn,
1971
+ urn=urn,
1972
+ display_name=display_name,
1973
+ enabled=enabled,
1974
+ detection_mechanism=detection_mechanism,
1975
+ sensitivity=sensitivity,
1976
+ exclusion_windows=exclusion_windows,
1977
+ training_data_lookback_days=training_data_lookback_days,
1978
+ incident_behavior=incident_behavior,
1979
+ tags=tags,
1980
+ updated_by=updated_by,
1981
+ now_utc=now_utc,
1982
+ schedule=schedule,
1983
+ )
1984
+ )
1985
+
1986
+ # Return early if we created a new assertion in the merge:
1987
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
1988
+ # We know this is the correct type because we passed the assertion_class parameter
1989
+ assert isinstance(
1990
+ merged_assertion_input_or_created_assertion, SmartVolumeAssertion
1991
+ )
1992
+ return merged_assertion_input_or_created_assertion
1993
+
1994
+ # 4. Upsert the assertion and monitor entities:
1995
+ assertion_entity, monitor_entity = (
1996
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
1997
+ )
1998
+ # If assertion upsert fails, we won't try to upsert the monitor
1999
+ self.client.entities.upsert(assertion_entity)
2000
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
2001
+ # try:
2002
+ self.client.entities.upsert(monitor_entity)
2003
+ # except Exception as e:
2004
+ # logger.error(f"Error upserting monitor: {e}")
2005
+ # self.client.entities.delete(assertion_entity)
2006
+ # raise e
2007
+
2008
+ return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
2009
+
2010
+ def sync_smart_column_metric_assertion(
2011
+ self,
2012
+ *,
2013
+ dataset_urn: Union[str, DatasetUrn],
2014
+ column_name: str,
2015
+ metric_type: MetricInputType,
2016
+ operator: OperatorInputType,
2017
+ value: Optional[ValueInputType] = None,
2018
+ value_type: Optional[ValueTypeInputType] = None,
2019
+ range: Optional[RangeInputType] = None,
2020
+ range_type: Optional[RangeTypeInputType] = None,
2021
+ urn: Optional[Union[str, AssertionUrn]] = None,
2022
+ display_name: Optional[str] = None,
2023
+ enabled: Optional[bool] = None,
2024
+ detection_mechanism: DetectionMechanismInputTypes = None,
2025
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
2026
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
2027
+ training_data_lookback_days: Optional[int] = None,
2028
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
2029
+ tags: Optional[TagsInputType] = None,
2030
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
2031
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2032
+ ) -> SmartColumnMetricAssertion:
2033
+ """Upsert and merge a smart column metric assertion.
2034
+
2035
+ Note:
2036
+ Keyword arguments are required.
2037
+
2038
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
2039
+ it will be created. If it does exist, it will be updated.
2040
+
2041
+ Existing assertion fields will be updated if the input value is not None. If the input value is None, the existing value
2042
+ will be preserved. If the input value can be un-set (e.g. by passing an empty list or
2043
+ empty string), it will be unset.
2044
+
2045
+ Schedule behavior:
2046
+ - Create case: Uses default schedule of every 6 hours or provided schedule
2047
+ - Update case: Uses existing schedule or provided schedule.
2048
+
2049
+ Args:
2050
+ dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
2051
+ column_name (str): The name of the column to be monitored.
2052
+ metric_type (MetricInputType): The type of the metric to be monitored.
2053
+ operator (OperatorInputType): The operator to be used for the assertion.
2054
+ value (Optional[ValueInputType]): The value to be used for the assertion. Required if operator requires a value.
2055
+ value_type (Optional[ValueTypeInputType]): The type of the value to be used for the assertion. Required if operator requires a value.
2056
+ range (Optional[RangeInputType]): The range to be used for the assertion. Required if operator requires a range.
2057
+ range_type (Optional[RangeTypeInputType]): The type of the range to be used for the assertion. Required if operator requires a range.
2058
+ urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
2059
+ display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
2060
+ enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
2061
+ detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Valid values are (additional_filter is optional):
2062
+ - "all_rows_query_datahub_dataset_profile" or DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
2063
+ - "all_rows_query" or DetectionMechanism.ALL_ROWS_QUERY(), or with additional_filter: {"type": "all_rows_query", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.ALL_ROWS_QUERY(additional_filter='last_modified > 2021-01-01')
2064
+ - {"type": "changed_rows_query", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
2065
+ sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
2066
+ exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported.
2067
+ training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
2068
+ incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
2069
+ tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
2070
+ updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
2071
+ schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule of every 6 hours will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
2072
+
2073
+ Returns:
2074
+ SmartColumnMetricAssertion: The created or updated assertion.
2075
+ """
2076
+ _print_experimental_warning()
2077
+ now_utc = datetime.now(timezone.utc)
2078
+
2079
+ if updated_by is None:
2080
+ logger.warning(
2081
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
2082
+ )
2083
+ updated_by = DEFAULT_CREATED_BY
2084
+
2085
+ # 1. If urn is not set, create a new assertion
2086
+ if urn is None:
2087
+ logger.info("URN is not set, creating a new assertion")
2088
+ return self._create_smart_column_metric_assertion(
2089
+ dataset_urn=dataset_urn,
2090
+ column_name=column_name,
2091
+ metric_type=metric_type,
2092
+ operator=operator,
2093
+ value=value,
2094
+ value_type=value_type,
2095
+ range=range,
2096
+ range_type=range_type,
2097
+ display_name=display_name,
2098
+ enabled=enabled if enabled is not None else True,
2099
+ detection_mechanism=detection_mechanism,
2100
+ sensitivity=sensitivity,
2101
+ exclusion_windows=exclusion_windows,
2102
+ training_data_lookback_days=training_data_lookback_days,
2103
+ incident_behavior=incident_behavior,
2104
+ tags=tags,
2105
+ created_by=updated_by,
2106
+ schedule=schedule,
2107
+ )
2108
+
2109
+ # 2. If urn is set, first validate the input:
2110
+ assertion_input = _SmartColumnMetricAssertionInput(
2111
+ urn=urn,
2112
+ entity_client=self.client.entities,
2113
+ dataset_urn=dataset_urn,
2114
+ column_name=column_name,
2115
+ metric_type=metric_type,
2116
+ operator=operator,
2117
+ value=value,
2118
+ value_type=value_type,
2119
+ range=range,
2120
+ range_type=range_type,
2121
+ display_name=display_name,
2122
+ detection_mechanism=detection_mechanism,
2123
+ sensitivity=sensitivity,
2124
+ exclusion_windows=exclusion_windows,
2125
+ training_data_lookback_days=training_data_lookback_days,
2126
+ incident_behavior=incident_behavior,
2127
+ tags=tags,
2128
+ created_by=updated_by, # This will be overridden by the actual created_by
2129
+ created_at=now_utc, # This will be overridden by the actual created_at
2130
+ updated_by=updated_by,
2131
+ updated_at=now_utc,
2132
+ schedule=schedule,
2133
+ )
2134
+
2135
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
2136
+ # if the assertion does not exist:
2137
+ merged_assertion_input_or_created_assertion = (
2138
+ self._retrieve_and_merge_smart_column_metric_assertion_and_monitor(
2139
+ assertion_input=assertion_input,
2140
+ dataset_urn=dataset_urn,
2141
+ column_name=column_name,
2142
+ metric_type=metric_type,
2143
+ operator=operator,
2144
+ value=value,
2145
+ value_type=value_type,
2146
+ range=range,
2147
+ range_type=range_type,
2148
+ urn=urn,
2149
+ display_name=display_name,
2150
+ enabled=enabled,
2151
+ detection_mechanism=detection_mechanism,
2152
+ sensitivity=sensitivity,
2153
+ exclusion_windows=exclusion_windows,
2154
+ training_data_lookback_days=training_data_lookback_days,
2155
+ incident_behavior=incident_behavior,
2156
+ tags=tags,
2157
+ updated_by=updated_by,
2158
+ now_utc=now_utc,
2159
+ schedule=schedule,
2160
+ )
2161
+ )
2162
+
2163
+ # Return early if we created a new assertion in the merge:
2164
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
2165
+ # We know this is the correct type because we passed the assertion_class parameter
2166
+ assert isinstance(
2167
+ merged_assertion_input_or_created_assertion, SmartColumnMetricAssertion
2168
+ )
2169
+ return merged_assertion_input_or_created_assertion
2170
+
2171
+ # 4. Upsert the assertion and monitor entities:
2172
+ assertion_entity, monitor_entity = (
2173
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
2174
+ )
2175
+ # If assertion upsert fails, we won't try to upsert the monitor
2176
+ self.client.entities.upsert(assertion_entity)
2177
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
2178
+ # try:
2179
+ self.client.entities.upsert(monitor_entity)
2180
+ # except Exception as e:
2181
+ # logger.error(f"Error upserting monitor: {e}")
2182
+ # self.client.entities.delete(assertion_entity)
2183
+ # raise e
2184
+
2185
+ return SmartColumnMetricAssertion._from_entities(
2186
+ assertion_entity, monitor_entity
2187
+ )
2188
+
2189
+ def _create_smart_column_metric_assertion(
2190
+ self,
2191
+ *,
2192
+ dataset_urn: Union[str, DatasetUrn],
2193
+ column_name: str,
2194
+ metric_type: MetricInputType,
2195
+ operator: OperatorInputType,
2196
+ value: Optional[ValueInputType] = None,
2197
+ value_type: Optional[ValueTypeInputType] = None,
2198
+ range: Optional[RangeInputType] = None,
2199
+ range_type: Optional[RangeTypeInputType] = None,
2200
+ display_name: Optional[str] = None,
2201
+ enabled: bool = True,
2202
+ detection_mechanism: DetectionMechanismInputTypes = None,
2203
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
2204
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
2205
+ training_data_lookback_days: Optional[int] = None,
2206
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
2207
+ tags: Optional[TagsInputType] = None,
2208
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
2209
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2210
+ ) -> SmartColumnMetricAssertion:
2211
+ """Create a smart column metric assertion.
2212
+
2213
+ Note: keyword arguments are required.
2214
+
2215
+ Args:
2216
+ dataset_urn: The urn of the dataset to be monitored. (Required)
2217
+ column_name: The name of the column to be monitored. (Required)
2218
+ metric_type: The type of the metric to be monitored. (Required)
2219
+ operator: The operator to be used for the assertion. (Required)
2220
+ value: The value to be used for the assertion. (Required if operator requires a value)
2221
+ value_type: The type of the value to be used for the assertion. (Required if operator requires a value)
2222
+ range: The range to be used for the assertion. (Required if operator requires a range)
2223
+ range_type: The type of the range to be used for the assertion. (Required if operator requires a range)
2224
+ display_name: The display name of the assertion. If not provided, a random display
2225
+ name will be generated.
2226
+ enabled: Whether the assertion is enabled. Defaults to True.
2227
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
2228
+ schema is recommended. Valid values are:
2229
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
2230
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
2231
+ - {
2232
+ "type": "last_modified_column",
2233
+ "column_name": "last_modified",
2234
+ "additional_filter": "last_modified > '2021-01-01'",
2235
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
2236
+ additional_filter='last_modified > 2021-01-01')
2237
+ - {
2238
+ "type": "high_watermark_column",
2239
+ "column_name": "id",
2240
+ "additional_filter": "id > 1000",
2241
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
2242
+ additional_filter='id > 1000')
2243
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
2244
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
2245
+ - "low" or InferenceSensitivity.LOW
2246
+ - "medium" or InferenceSensitivity.MEDIUM
2247
+ - "high" or InferenceSensitivity.HIGH
2248
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
2249
+ fixed range exclusion windows are supported. Valid values are:
2250
+ - from datetime.datetime objects: {
2251
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
2252
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
2253
+ }
2254
+ - from string datetimes: {
2255
+ "start": "2025-01-01T00:00:00",
2256
+ "end": "2025-01-02T00:00:00",
2257
+ }
2258
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
2259
+ start=datetime(2025, 1, 1, 0, 0, 0),
2260
+ end=datetime(2025, 1, 2, 0, 0, 0)
2261
+ )
2262
+ training_data_lookback_days: The training data lookback days to be applied to the
2263
+ assertion as an integer.
2264
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
2265
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
2266
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
2267
+ - A list of the above values (strings or enum values)
2268
+ - None (default behavior)
2269
+ tags: The tags to be applied to the assertion. Valid values are:
2270
+ - a list of strings (strings will be converted to TagUrn objects)
2271
+ - a list of TagUrn objects
2272
+ - a list of TagAssociationClass objects
2273
+ created_by: Optional urn of the user who created the assertion. The format is
2274
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
2275
+ The default is the datahub system user.
2276
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
2277
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
2278
+ schedule will be used. The schedule determines when the assertion will be evaluated.
2279
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
2280
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
2281
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
2282
+
2283
+ Returns:
2284
+ SmartVolumeAssertion: The created assertion.
2285
+ """
2286
+ _print_experimental_warning()
2287
+ now_utc = datetime.now(timezone.utc)
2288
+ if created_by is None:
2289
+ logger.warning(
2290
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
2291
+ )
2292
+ created_by = DEFAULT_CREATED_BY
2293
+ assertion_input = _SmartColumnMetricAssertionInput(
2294
+ urn=None,
2295
+ entity_client=self.client.entities,
2296
+ dataset_urn=dataset_urn,
2297
+ column_name=column_name,
2298
+ metric_type=metric_type,
2299
+ operator=operator,
2300
+ value=value,
2301
+ value_type=value_type,
2302
+ range=range,
2303
+ range_type=range_type,
2304
+ display_name=display_name,
2305
+ enabled=enabled,
2306
+ detection_mechanism=detection_mechanism,
2307
+ sensitivity=sensitivity,
2308
+ exclusion_windows=exclusion_windows,
2309
+ training_data_lookback_days=training_data_lookback_days,
2310
+ incident_behavior=incident_behavior,
2311
+ tags=tags,
2312
+ created_by=created_by,
2313
+ created_at=now_utc,
2314
+ updated_by=created_by,
2315
+ updated_at=now_utc,
2316
+ schedule=schedule,
2317
+ )
2318
+ assertion_entity, monitor_entity = (
2319
+ assertion_input.to_assertion_and_monitor_entities()
2320
+ )
2321
+ # If assertion creation fails, we won't try to create the monitor
2322
+ self.client.entities.create(assertion_entity)
2323
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
2324
+ # try:
2325
+ self.client.entities.create(monitor_entity)
2326
+ # except Exception as e:
2327
+ # logger.error(f"Error creating monitor: {e}")
2328
+ # self.client.entities.delete(assertion_entity)
2329
+ # raise e
2330
+ return SmartColumnMetricAssertion._from_entities(
2331
+ assertion_entity, monitor_entity
2332
+ )
2333
+
2334
+ def _retrieve_and_merge_smart_column_metric_assertion_and_monitor(
2335
+ self,
2336
+ assertion_input: _SmartColumnMetricAssertionInput,
2337
+ dataset_urn: Union[str, DatasetUrn],
2338
+ column_name: str,
2339
+ metric_type: MetricInputType,
2340
+ operator: OperatorInputType,
2341
+ value: Optional[ValueInputType],
2342
+ value_type: Optional[ValueTypeInputType],
2343
+ range: Optional[RangeInputType],
2344
+ range_type: Optional[RangeTypeInputType],
2345
+ urn: Union[str, AssertionUrn],
2346
+ display_name: Optional[str],
2347
+ enabled: Optional[bool],
2348
+ detection_mechanism: DetectionMechanismInputTypes,
2349
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
2350
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
2351
+ training_data_lookback_days: Optional[int],
2352
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
2353
+ tags: Optional[TagsInputType],
2354
+ updated_by: Optional[Union[str, CorpUserUrn]],
2355
+ now_utc: datetime,
2356
+ schedule: Optional[Union[str, models.CronScheduleClass]],
2357
+ ) -> Union[SmartColumnMetricAssertion, _SmartColumnMetricAssertionInput]:
2358
+ # 1. Retrieve any existing assertion and monitor entities:
2359
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
2360
+ self._retrieve_assertion_and_monitor(assertion_input)
2361
+ )
2362
+
2363
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
2364
+ if maybe_assertion_entity and maybe_monitor_entity:
2365
+ existing_assertion = SmartColumnMetricAssertion._from_entities(
2366
+ maybe_assertion_entity, maybe_monitor_entity
2367
+ )
2368
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
2369
+ elif maybe_assertion_entity and not maybe_monitor_entity:
2370
+ monitor_mode = (
2371
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
2372
+ )
2373
+ existing_assertion = SmartColumnMetricAssertion._from_entities(
2374
+ maybe_assertion_entity,
2375
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
2376
+ )
2377
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
2378
+ elif not maybe_assertion_entity:
2379
+ logger.info(
2380
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
2381
+ )
2382
+ return self._create_smart_column_metric_assertion(
2383
+ dataset_urn=dataset_urn,
2384
+ column_name=column_name,
2385
+ metric_type=metric_type,
2386
+ operator=operator,
2387
+ value=value,
2388
+ value_type=value_type,
2389
+ range=range,
2390
+ range_type=range_type,
2391
+ schedule=schedule,
2392
+ display_name=display_name,
2393
+ detection_mechanism=detection_mechanism,
2394
+ sensitivity=sensitivity,
2395
+ exclusion_windows=exclusion_windows,
2396
+ training_data_lookback_days=training_data_lookback_days,
2397
+ incident_behavior=incident_behavior,
2398
+ tags=tags,
2399
+ created_by=updated_by,
2400
+ )
2401
+
2402
+ # 3. Check for any issues e.g. different dataset urns
2403
+ if (
2404
+ existing_assertion
2405
+ and hasattr(existing_assertion, "dataset_urn")
2406
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
2407
+ ):
2408
+ raise SDKUsageError(
2409
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
2410
+ )
2411
+
2412
+ # 4. Merge the existing assertion with the validated input:
2413
+ merged_assertion_input = self._merge_smart_column_metric_input(
2414
+ dataset_urn=dataset_urn,
2415
+ column_name=column_name,
2416
+ metric_type=metric_type,
2417
+ operator=operator,
2418
+ value=value,
2419
+ value_type=value_type,
2420
+ range=range,
2421
+ range_type=range_type,
2422
+ urn=urn,
2423
+ display_name=display_name,
2424
+ enabled=enabled,
2425
+ schedule=schedule,
2426
+ detection_mechanism=detection_mechanism,
2427
+ sensitivity=sensitivity,
2428
+ exclusion_windows=exclusion_windows,
2429
+ training_data_lookback_days=training_data_lookback_days,
2430
+ incident_behavior=incident_behavior,
2431
+ tags=tags,
2432
+ now_utc=now_utc,
2433
+ assertion_input=assertion_input,
2434
+ maybe_assertion_entity=maybe_assertion_entity,
2435
+ maybe_monitor_entity=maybe_monitor_entity,
2436
+ existing_assertion=existing_assertion,
2437
+ )
2438
+
2439
+ return merged_assertion_input
2440
+
2441
+ def _merge_smart_column_metric_input(
2442
+ self,
2443
+ dataset_urn: Union[str, DatasetUrn],
2444
+ column_name: str,
2445
+ metric_type: MetricInputType,
2446
+ operator: OperatorInputType,
2447
+ value: Optional[ValueInputType],
2448
+ value_type: Optional[ValueTypeInputType],
2449
+ range: Optional[RangeInputType],
2450
+ range_type: Optional[RangeTypeInputType],
2451
+ urn: Union[str, AssertionUrn],
2452
+ display_name: Optional[str],
2453
+ enabled: Optional[bool],
2454
+ detection_mechanism: DetectionMechanismInputTypes,
2455
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
2456
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
2457
+ training_data_lookback_days: Optional[int],
2458
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
2459
+ tags: Optional[TagsInputType],
2460
+ schedule: Optional[Union[str, models.CronScheduleClass]],
2461
+ now_utc: datetime,
2462
+ assertion_input: _SmartColumnMetricAssertionInput,
2463
+ maybe_assertion_entity: Optional[Assertion],
2464
+ maybe_monitor_entity: Optional[Monitor],
2465
+ existing_assertion: SmartColumnMetricAssertion,
2466
+ ) -> _SmartColumnMetricAssertionInput:
2467
+ """Merge the input with the existing assertion and monitor entities.
2468
+
2469
+ Args:
2470
+ dataset_urn: The urn of the dataset to be monitored.
2471
+ column_name: The name of the column to be monitored.
2472
+ metric_type: The type of the metric to be monitored.
2473
+ operator: The operator to be used for the assertion.
2474
+ value: The value to be used for the assertion.
2475
+ value_type: The type of the value to be used for the assertion.
2476
+ range: The range to be used for the assertion.
2477
+ range_type: The type of the range to be used for the assertion.
2478
+ urn: The urn of the assertion.
2479
+ display_name: The display name of the assertion.
2480
+ enabled: Whether the assertion is enabled.
2481
+ detection_mechanism: The detection mechanism to be used for the assertion.
2482
+ sensitivity: The sensitivity to be applied to the assertion.
2483
+ exclusion_windows: The exclusion windows to be applied to the assertion.
2484
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
2485
+ incident_behavior: The incident behavior to be applied to the assertion.
2486
+ tags: The tags to be applied to the assertion.
2487
+ now_utc: The current UTC time from when the function is called.
2488
+ assertion_input: The validated input to the function.
2489
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
2490
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
2491
+ existing_assertion: The existing assertion from the DataHub instance.
2492
+
2493
+ Returns:
2494
+ The merged assertion input.
2495
+ """
2496
+ merged_assertion_input = _SmartColumnMetricAssertionInput(
2497
+ urn=urn,
2498
+ entity_client=self.client.entities,
2499
+ dataset_urn=dataset_urn,
2500
+ column_name=_merge_field(
2501
+ input_field_value=column_name,
2502
+ input_field_name="column_name",
2503
+ validated_assertion_input=assertion_input,
2504
+ validated_existing_assertion=existing_assertion,
2505
+ existing_entity_value=SmartColumnMetricAssertion._get_column_name(
2506
+ maybe_assertion_entity
2507
+ )
2508
+ if maybe_assertion_entity
2509
+ else None,
2510
+ ),
2511
+ metric_type=_merge_field(
2512
+ input_field_value=metric_type,
2513
+ input_field_name="metric_type",
2514
+ validated_assertion_input=assertion_input,
2515
+ validated_existing_assertion=existing_assertion,
2516
+ existing_entity_value=SmartColumnMetricAssertion._get_metric_type(
2517
+ maybe_assertion_entity
2518
+ )
2519
+ if maybe_assertion_entity
2520
+ else None,
2521
+ ),
2522
+ operator=_merge_field(
2523
+ input_field_value=operator,
2524
+ input_field_name="operator",
2525
+ validated_assertion_input=assertion_input,
2526
+ validated_existing_assertion=existing_assertion,
2527
+ existing_entity_value=SmartColumnMetricAssertion._get_operator(
2528
+ maybe_assertion_entity
2529
+ )
2530
+ if maybe_assertion_entity
2531
+ else None,
2532
+ ),
2533
+ value=_merge_field(
2534
+ input_field_value=value,
2535
+ input_field_name="value",
2536
+ validated_assertion_input=assertion_input,
2537
+ validated_existing_assertion=existing_assertion,
2538
+ existing_entity_value=SmartColumnMetricAssertion._get_value(
2539
+ maybe_assertion_entity
2540
+ )
2541
+ if maybe_assertion_entity
2542
+ else None,
2543
+ ),
2544
+ value_type=_merge_field(
2545
+ input_field_value=value_type,
2546
+ input_field_name="value_type",
2547
+ validated_assertion_input=assertion_input,
2548
+ validated_existing_assertion=existing_assertion,
2549
+ existing_entity_value=SmartColumnMetricAssertion._get_value_type(
2550
+ maybe_assertion_entity
2551
+ )
2552
+ if maybe_assertion_entity
2553
+ else None,
2554
+ ),
2555
+ range=_merge_field(
2556
+ input_field_value=range,
2557
+ input_field_name="range",
2558
+ validated_assertion_input=assertion_input,
2559
+ validated_existing_assertion=existing_assertion,
2560
+ existing_entity_value=SmartColumnMetricAssertion._get_range(
2561
+ maybe_assertion_entity
2562
+ )
2563
+ if maybe_assertion_entity
2564
+ else None,
2565
+ ),
2566
+ range_type=_merge_field(
2567
+ input_field_value=range_type,
2568
+ input_field_name="range_type",
2569
+ validated_assertion_input=assertion_input,
2570
+ validated_existing_assertion=existing_assertion,
2571
+ existing_entity_value=SmartColumnMetricAssertion._get_range_type(
2572
+ maybe_assertion_entity
2573
+ )
2574
+ if maybe_assertion_entity
2575
+ else None,
2576
+ ),
2577
+ display_name=_merge_field(
2578
+ input_field_value=display_name,
2579
+ input_field_name="display_name",
2580
+ validated_assertion_input=assertion_input,
2581
+ validated_existing_assertion=existing_assertion,
2582
+ existing_entity_value=maybe_assertion_entity.description
2583
+ if maybe_assertion_entity
2584
+ else None,
2585
+ ),
2586
+ enabled=_merge_field(
2587
+ input_field_value=enabled,
2588
+ input_field_name="enabled",
2589
+ validated_assertion_input=assertion_input,
2590
+ validated_existing_assertion=existing_assertion,
2591
+ existing_entity_value=existing_assertion.mode == AssertionMode.ACTIVE
2592
+ if existing_assertion
2593
+ else None,
2594
+ ),
2595
+ schedule=_merge_field(
2596
+ input_field_value=schedule,
2597
+ input_field_name="schedule",
2598
+ validated_assertion_input=assertion_input,
2599
+ validated_existing_assertion=existing_assertion,
2600
+ existing_entity_value=existing_assertion.schedule
2601
+ if existing_assertion
2602
+ else None,
2603
+ ),
2604
+ detection_mechanism=_merge_field(
2605
+ input_field_value=detection_mechanism,
2606
+ input_field_name="detection_mechanism",
2607
+ validated_assertion_input=assertion_input,
2608
+ validated_existing_assertion=existing_assertion,
2609
+ existing_entity_value=SmartColumnMetricAssertion._get_detection_mechanism(
2610
+ maybe_assertion_entity, maybe_monitor_entity, default=None
2611
+ )
2612
+ if maybe_assertion_entity and maybe_monitor_entity
2613
+ else None,
2614
+ ),
2615
+ sensitivity=_merge_field(
2616
+ input_field_value=sensitivity,
2617
+ input_field_name="sensitivity",
2618
+ validated_assertion_input=assertion_input,
2619
+ validated_existing_assertion=existing_assertion,
2620
+ existing_entity_value=maybe_monitor_entity.sensitivity
2621
+ if maybe_monitor_entity
2622
+ else None,
2623
+ ),
2624
+ exclusion_windows=_merge_field(
2625
+ input_field_value=exclusion_windows,
2626
+ input_field_name="exclusion_windows",
2627
+ validated_assertion_input=assertion_input,
2628
+ validated_existing_assertion=existing_assertion,
2629
+ existing_entity_value=maybe_monitor_entity.exclusion_windows
2630
+ if maybe_monitor_entity
2631
+ else None,
2632
+ ),
2633
+ training_data_lookback_days=_merge_field(
2634
+ input_field_value=training_data_lookback_days,
2635
+ input_field_name="training_data_lookback_days",
2636
+ validated_assertion_input=assertion_input,
2637
+ validated_existing_assertion=existing_assertion,
2638
+ existing_entity_value=maybe_monitor_entity.training_data_lookback_days
2639
+ if maybe_monitor_entity
2640
+ else None,
2641
+ ),
2642
+ incident_behavior=_merge_field(
2643
+ input_field_value=incident_behavior,
2644
+ input_field_name="incident_behavior",
2645
+ validated_assertion_input=assertion_input,
2646
+ validated_existing_assertion=existing_assertion,
2647
+ existing_entity_value=SmartColumnMetricAssertion._get_incident_behavior(
2648
+ maybe_assertion_entity
2649
+ )
2650
+ if maybe_assertion_entity
2651
+ else None,
2652
+ ),
2653
+ tags=_merge_field(
2654
+ input_field_value=tags,
2655
+ input_field_name="tags",
2656
+ validated_assertion_input=assertion_input,
2657
+ validated_existing_assertion=existing_assertion,
2658
+ existing_entity_value=maybe_assertion_entity.tags
2659
+ if maybe_assertion_entity
2660
+ else None,
2661
+ ),
2662
+ created_by=existing_assertion.created_by
2663
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
2664
+ created_at=existing_assertion.created_at
2665
+ or now_utc, # Override with the existing assertion's created_at or now if not set
2666
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
2667
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
2668
+ )
2669
+
2670
+ return merged_assertion_input
2671
+
2672
+ def sync_freshness_assertion(
2673
+ self,
2674
+ *,
2675
+ dataset_urn: Union[str, DatasetUrn],
2676
+ urn: Optional[Union[str, AssertionUrn]] = None,
2677
+ display_name: Optional[str] = None,
2678
+ enabled: Optional[bool] = None,
2679
+ detection_mechanism: DetectionMechanismInputTypes = None,
2680
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
2681
+ tags: Optional[TagsInputType] = None,
2682
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
2683
+ freshness_schedule_check_type: Optional[
2684
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
2685
+ ] = None,
2686
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2687
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
2688
+ ) -> FreshnessAssertion:
2689
+ """Upsert and merge a freshness assertion.
2690
+
2691
+ Note:
2692
+ Keyword arguments are required.
2693
+
2694
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
2695
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
2696
+ be updated if the input value is not None. If the input value is None, the existing value
2697
+ will be preserved. If the input value can be un-set (e.g. by passing an empty list or
2698
+ empty string), it will be unset.
2699
+
2700
+ Schedule behavior:
2701
+ - Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
2702
+ - Update case: Uses existing schedule or provided schedule.
2703
+
2704
+ Args:
2705
+ dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
2706
+ urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
2707
+ display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
2708
+ enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
2709
+ detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Information schema is recommended. Valid values are:
2710
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
2711
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
2712
+ - {"type": "last_modified_column", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
2713
+ - {"type": "high_watermark_column", "column_name": "id", "additional_filter": "id > 1000"} or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id', additional_filter='id > 1000')
2714
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
2715
+ incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
2716
+ tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
2717
+ updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
2718
+ freshness_schedule_check_type (Optional[Union[str, models.FreshnessAssertionScheduleTypeClass]]): The freshness schedule check type to be applied to the assertion. Valid values are: "since_the_last_check", "cron".
2719
+ schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
2720
+ lookback_window (Optional[TimeWindowSizeInputTypes]): The lookback window to be applied to the assertion.
2721
+
2722
+ Returns:
2723
+ FreshnessAssertion: The created or updated assertion.
2724
+ """
2725
+ _print_experimental_warning()
2726
+ now_utc = datetime.now(timezone.utc)
2727
+
2728
+ if updated_by is None:
2729
+ logger.warning(
2730
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
2731
+ )
2732
+ updated_by = DEFAULT_CREATED_BY
2733
+
2734
+ # 1. If urn is not set, create a new assertion
2735
+ if urn is None:
2736
+ logger.info("URN is not set, creating a new assertion")
2737
+ return self._create_freshness_assertion(
2738
+ dataset_urn=dataset_urn,
2739
+ display_name=display_name,
2740
+ enabled=enabled if enabled is not None else True,
2741
+ detection_mechanism=detection_mechanism,
2742
+ incident_behavior=incident_behavior,
2743
+ tags=tags,
2744
+ created_by=updated_by,
2745
+ schedule=schedule,
2746
+ freshness_schedule_check_type=freshness_schedule_check_type,
2747
+ lookback_window=lookback_window,
2748
+ )
2749
+
2750
+ # 2. If urn is set, first validate the input:
2751
+ assertion_input = _FreshnessAssertionInput(
2752
+ urn=urn,
2753
+ entity_client=self.client.entities,
2754
+ dataset_urn=dataset_urn,
2755
+ display_name=display_name,
2756
+ detection_mechanism=detection_mechanism,
2757
+ incident_behavior=incident_behavior,
2758
+ tags=tags,
2759
+ created_by=updated_by, # This will be overridden by the actual created_by
2760
+ created_at=now_utc, # This will be overridden by the actual created_at
2761
+ updated_by=updated_by,
2762
+ updated_at=now_utc,
2763
+ schedule=schedule,
2764
+ freshness_schedule_check_type=freshness_schedule_check_type,
2765
+ lookback_window=lookback_window,
2766
+ )
2767
+
2768
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
2769
+ # if the assertion does not exist:
2770
+ merged_assertion_input_or_created_assertion = (
2771
+ self._retrieve_and_merge_freshness_assertion_and_monitor(
2772
+ assertion_input=assertion_input,
2773
+ dataset_urn=dataset_urn,
2774
+ urn=urn,
2775
+ display_name=display_name,
2776
+ enabled=enabled,
2777
+ detection_mechanism=detection_mechanism,
2778
+ incident_behavior=incident_behavior,
2779
+ tags=tags,
2780
+ updated_by=updated_by,
2781
+ now_utc=now_utc,
2782
+ schedule=schedule,
2783
+ freshness_schedule_check_type=freshness_schedule_check_type,
2784
+ lookback_window=lookback_window,
2785
+ )
2786
+ )
2787
+
2788
+ # Return early if we created a new assertion in the merge:
2789
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
2790
+ # We know this is the correct type because we passed the assertion_class parameter
2791
+ assert isinstance(
2792
+ merged_assertion_input_or_created_assertion, FreshnessAssertion
2793
+ )
2794
+ return merged_assertion_input_or_created_assertion
2795
+
2796
+ # 4. Upsert the assertion and monitor entities:
2797
+ assertion_entity, monitor_entity = (
2798
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
2799
+ )
2800
+ # If assertion upsert fails, we won't try to upsert the monitor
2801
+ self.client.entities.upsert(assertion_entity)
2802
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
2803
+ # try:
2804
+ self.client.entities.upsert(monitor_entity)
2805
+ # except Exception as e:
2806
+ # logger.error(f"Error upserting monitor: {e}")
2807
+ # self.client.entities.delete(assertion_entity)
2808
+ # raise e
2809
+
2810
+ return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
2811
+
2812
+ def sync_volume_assertion(
2813
+ self,
2814
+ *,
2815
+ dataset_urn: Union[str, DatasetUrn],
2816
+ urn: Optional[Union[str, AssertionUrn]] = None,
2817
+ display_name: Optional[str] = None,
2818
+ enabled: Optional[bool] = None,
2819
+ detection_mechanism: DetectionMechanismInputTypes = None,
2820
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
2821
+ tags: Optional[TagsInputType] = None,
2822
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
2823
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2824
+ criteria_type: Optional[Union[str, VolumeAssertionDefinitionType]] = None,
2825
+ criteria_change_type: Optional[
2826
+ Union[str, VolumeAssertionDefinitionChangeKind]
2827
+ ] = None,
2828
+ criteria_operator: Optional[Union[str, VolumeAssertionOperator]] = None,
2829
+ criteria_parameters: Optional[VolumeAssertionDefinitionParameters] = None,
2830
+ ) -> VolumeAssertion:
2831
+ """Upsert and merge a volume assertion.
2832
+
2833
+ Note:
2834
+ Keyword arguments are required.
2835
+
2836
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
2837
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
2838
+ be updated if the input value is not None. If the input value is None, the existing value
2839
+ will be preserved. If the input value can be un-set (e.g. by passing an empty list or
2840
+ empty string), it will be unset.
2841
+
2842
+ Schedule behavior:
2843
+ - Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
2844
+ - Update case: Uses existing schedule or provided schedule.
2845
+
2846
+ Args:
2847
+ dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
2848
+ urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
2849
+ display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
2850
+ enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
2851
+ detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Information schema is recommended. Valid values are (additional_filter is optional):
2852
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
2853
+ - {"type": "query", "additional_filter": "value > 1000"} or DetectionMechanism.QUERY(additional_filter='value > 1000')
2854
+ - "dataset_profile" or DetectionMechanism.DATASET_PROFILE
2855
+ incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
2856
+ tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
2857
+ updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
2858
+ schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
2859
+ criteria_type (Optional[Union[str, VolumeAssertionDefinitionType]]): Optional type of volume assertion. Must be either VolumeAssertionDefinitionType.ROW_COUNT_TOTAL or VolumeAssertionDefinitionType.ROW_COUNT_CHANGE. If not provided, the existing definition from the backend will be preserved (for update operations). Required when creating a new assertion (when urn is None).
2860
+ criteria_change_type (Optional[Union[str, VolumeAssertionDefinitionChangeKind]]): Optional change type for row count change assertions. Required when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_CHANGE. Ignored when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_TOTAL. If not provided, existing value is preserved for updates.
2861
+ criteria_operator (Optional[Union[str, VolumeAssertionOperator]]): Optional comparison operator for the assertion. Must be a VolumeAssertionOperator value. If not provided, existing value is preserved for updates. Required when creating a new assertion.
2862
+ criteria_parameters (Optional[VolumeAssertionDefinitionParameters]): Optional parameters for the assertion. For single-value operators provide a single number. For BETWEEN operator, provide a tuple of two numbers (min_value, max_value). If not provided, existing value is preserved for updates. Required when creating a new assertion.
2863
+
2864
+ Returns:
2865
+ VolumeAssertion: The created or updated assertion.
2866
+ """
2867
+ _print_experimental_warning()
2868
+ now_utc = datetime.now(timezone.utc)
2869
+
2870
+ if updated_by is None:
2871
+ logger.warning(
2872
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
2873
+ )
2874
+ updated_by = DEFAULT_CREATED_BY
2875
+
2876
+ # 1. Validate criteria parameters if any are provided
2877
+ if (
2878
+ criteria_type is not None
2879
+ or criteria_operator is not None
2880
+ or criteria_parameters is not None
2881
+ ) and (
2882
+ criteria_type is None
2883
+ or criteria_operator is None
2884
+ or criteria_parameters is None
2885
+ or (
2886
+ criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
2887
+ and criteria_change_type is None
2888
+ )
2889
+ ):
2890
+ raise SDKUsageError(
2891
+ "When providing volume assertion criteria, all required parameters must be provided "
2892
+ "(criteria_type, criteria_operator, criteria_parameters must be provided, "
2893
+ "and criteria_change_type is required when criteria_type is 'row_count_change')"
2894
+ )
2895
+
2896
+ # Assert the invariant: if criteria_type is provided, all required parameters are provided
2897
+ assert criteria_type is None or (
2898
+ criteria_operator is not None
2899
+ and criteria_parameters is not None
2900
+ and (
2901
+ criteria_type != VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
2902
+ or criteria_change_type is not None
2903
+ )
2904
+ ), "criteria fields already validated"
2905
+
2906
+ # 2. If urn is not set, create a new assertion
2907
+ if urn is None:
2908
+ if criteria_type is None:
2909
+ raise SDKUsageError(
2910
+ "Volume assertion criteria are required when creating a new assertion"
2911
+ )
2912
+ logger.info("URN is not set, creating a new assertion")
2913
+ # Type narrowing: we know these are not None because of validation above
2914
+ assert criteria_operator is not None
2915
+ assert criteria_parameters is not None
2916
+ return self._create_volume_assertion(
2917
+ dataset_urn=dataset_urn,
2918
+ display_name=display_name,
2919
+ enabled=enabled if enabled is not None else True,
2920
+ detection_mechanism=detection_mechanism,
2921
+ incident_behavior=incident_behavior,
2922
+ tags=tags,
2923
+ created_by=updated_by,
2924
+ schedule=schedule,
2925
+ criteria_type=criteria_type,
2926
+ criteria_change_type=criteria_change_type,
2927
+ criteria_operator=criteria_operator,
2928
+ criteria_parameters=criteria_parameters,
2929
+ )
2930
+
2931
+ # 2. If urn is set, prepare definition for validation
2932
+ # If criteria parameters are provided, create definition from them
2933
+ # Otherwise, we use temporary default definition if None is provided, just to pass the _VolumeAssertionInput validation.
2934
+ # However, we keep memory of this in use_backend_definition flag, so we can later
2935
+ # fail if there is no definition in backend (basically, there is no assertion). That would mean that
2936
+ # this is a creation case and the user missed the definition parameter, which is required.
2937
+ # Likely this pattern never happened before because there is no a publicly documented default definition
2938
+ # that we can use as fallback.
2939
+ if criteria_type is not None:
2940
+ # Create definition from individual criteria parameters
2941
+ temp_definition: dict[str, Any] = {
2942
+ "type": criteria_type,
2943
+ "operator": criteria_operator,
2944
+ "parameters": criteria_parameters,
2945
+ }
2946
+
2947
+ if criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE:
2948
+ temp_definition["kind"] = criteria_change_type
2949
+
2950
+ use_backend_definition = False
2951
+ else:
2952
+ # No criteria provided, use backend definition
2953
+ use_backend_definition = True
2954
+ temp_definition = {
2955
+ "type": VolumeAssertionDefinitionType.ROW_COUNT_TOTAL,
2956
+ "operator": VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO,
2957
+ "parameters": 0, # Temporary placeholder
2958
+ }
2959
+
2960
+ # 3. Create assertion input with effective definition
2961
+ assertion_input = _VolumeAssertionInput(
2962
+ urn=urn,
2963
+ dataset_urn=dataset_urn,
2964
+ entity_client=self.client.entities,
2965
+ detection_mechanism=detection_mechanism,
2966
+ incident_behavior=incident_behavior,
2967
+ tags=tags,
2968
+ created_by=updated_by, # This will be overridden by the actual created_by
2969
+ created_at=now_utc, # This will be overridden by the actual created_at
2970
+ updated_by=updated_by,
2971
+ updated_at=now_utc,
2972
+ schedule=schedule,
2973
+ definition=temp_definition,
2974
+ )
2975
+
2976
+ # 4. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
2977
+ # if the assertion does not exist:
2978
+ merged_assertion_input_or_created_assertion = (
2979
+ self._retrieve_and_merge_native_volume_assertion_and_monitor(
2980
+ assertion_input=assertion_input,
2981
+ dataset_urn=dataset_urn,
2982
+ urn=urn,
2983
+ display_name=display_name,
2984
+ enabled=enabled,
2985
+ detection_mechanism=detection_mechanism,
2986
+ definition=temp_definition,
2987
+ use_backend_definition=use_backend_definition,
2988
+ incident_behavior=incident_behavior,
2989
+ tags=tags,
2990
+ updated_by=updated_by,
2991
+ now_utc=now_utc,
2992
+ schedule=schedule,
2993
+ )
2994
+ )
2995
+
2996
+ # Return early if we created a new assertion in the merge:
2997
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
2998
+ # We know this is the correct type because we passed the assertion_class parameter
2999
+ assert isinstance(
3000
+ merged_assertion_input_or_created_assertion, VolumeAssertion
3001
+ )
3002
+ return merged_assertion_input_or_created_assertion
3003
+
3004
+ # 4. Upsert the assertion and monitor entities:
3005
+ assertion_entity, monitor_entity = (
3006
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
3007
+ )
3008
+ # If assertion upsert fails, we won't try to upsert the monitor
3009
+ self.client.entities.upsert(assertion_entity)
3010
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
3011
+ # try:
3012
+ self.client.entities.upsert(monitor_entity)
3013
+ # except Exception as e:
3014
+ # logger.error(f"Error upserting monitor: {e}")
3015
+ # self.client.entities.delete(assertion_entity)
3016
+ # raise e
3017
+ return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
3018
+
3019
+ def sync_sql_assertion(
3020
+ self,
3021
+ *,
3022
+ dataset_urn: Union[str, DatasetUrn],
3023
+ urn: Optional[Union[str, AssertionUrn]] = None,
3024
+ display_name: Optional[str] = None,
3025
+ enabled: Optional[bool] = None,
3026
+ statement: str,
3027
+ criteria_type: Union[SqlAssertionType, str],
3028
+ criteria_change_type: Optional[Union[SqlAssertionChangeType, str]] = None,
3029
+ criteria_operator: Union[SqlAssertionOperator, str],
3030
+ criteria_parameters: Union[
3031
+ Union[float, int], tuple[Union[float, int], Union[float, int]]
3032
+ ],
3033
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
3034
+ tags: Optional[TagsInputType] = None,
3035
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
3036
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
3037
+ ) -> SqlAssertion:
3038
+ """Upsert and merge a sql assertion.
3039
+
3040
+ Note:
3041
+ Keyword arguments are required.
3042
+
3043
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
3044
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
3045
+ be updated if the input value is not None. If the input value is None, the existing value
3046
+ will be preserved. If the input value can be un-set (e.g. by passing an empty list or
3047
+ empty string), it will be unset.
3048
+
3049
+ Schedule behavior:
3050
+ - Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
3051
+ - Update case: Uses existing schedule or provided schedule.
3052
+
3053
+ Args:
3054
+ dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
3055
+ urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
3056
+ display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
3057
+ enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
3058
+ statement (str): The SQL statement to be used for the assertion.
3059
+ criteria_type (Union[SqlAssertionType, str]): The type of sql assertion. Valid values are: "METRIC", "METRIC_CHANGE".
3060
+ criteria_change_type (Optional[Union[SqlAssertionChangeType, str]]): The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are: "ABSOLUTE", "PERCENTAGE".
3061
+ criteria_operator (Union[SqlAssertionOperator, str]): The operator to be used for the assertion. Valid values are: "GREATER_THAN", "LESS_THAN", "GREATER_THAN_OR_EQUAL_TO", "LESS_THAN_OR_EQUAL_TO", "EQUAL_TO", "NOT_EQUAL_TO", "BETWEEN".
3062
+ criteria_parameters (Union[float, int, tuple[float, int]]): The parameters to be used for the assertion. This can be a single value or a tuple range. If the operator is "BETWEEN", the value is a tuple of two values, with format min, max. If the operator is not "BETWEEN", the value is a single value.
3063
+ incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
3064
+ tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
3065
+ updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
3066
+ schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
3067
+
3068
+ Returns:
3069
+ SqlAssertion: The created or updated assertion.
3070
+ """
3071
+ _print_experimental_warning()
3072
+ now_utc = datetime.now(timezone.utc)
3073
+
3074
+ if updated_by is None:
3075
+ logger.warning(
3076
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
3077
+ )
3078
+ updated_by = DEFAULT_CREATED_BY
3079
+
3080
+ # 1. If urn is not set, create a new assertion
3081
+ if urn is None:
3082
+ logger.info("URN is not set, creating a new assertion")
3083
+ return self._create_sql_assertion(
3084
+ dataset_urn=dataset_urn,
3085
+ display_name=display_name,
3086
+ enabled=enabled if enabled is not None else True,
3087
+ criteria_type=criteria_type,
3088
+ criteria_change_type=criteria_change_type,
3089
+ criteria_operator=criteria_operator,
3090
+ criteria_parameters=criteria_parameters,
3091
+ statement=statement,
3092
+ incident_behavior=incident_behavior,
3093
+ tags=tags,
3094
+ created_by=updated_by,
3095
+ schedule=schedule,
3096
+ )
3097
+
3098
+ # 2. If urn is set, first validate the input:
3099
+ criteria = SqlAssertionCriteria(
3100
+ type=criteria_type,
3101
+ change_type=criteria_change_type,
3102
+ operator=criteria_operator,
3103
+ parameters=criteria_parameters,
3104
+ )
3105
+ assertion_input = _SqlAssertionInput(
3106
+ urn=urn,
3107
+ entity_client=self.client.entities,
3108
+ dataset_urn=dataset_urn,
3109
+ display_name=display_name,
3110
+ criteria=criteria,
3111
+ statement=statement,
3112
+ incident_behavior=incident_behavior,
3113
+ tags=tags,
3114
+ created_by=updated_by, # This will be overridden by the actual created_by
3115
+ created_at=now_utc, # This will be overridden by the actual created_at
3116
+ updated_by=updated_by,
3117
+ updated_at=now_utc,
3118
+ schedule=schedule,
3119
+ )
3120
+
3121
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
3122
+ # if the assertion does not exist:
3123
+ merged_assertion_input_or_created_assertion = (
3124
+ self._retrieve_and_merge_sql_assertion_and_monitor(
3125
+ assertion_input=assertion_input,
3126
+ dataset_urn=dataset_urn,
3127
+ urn=urn,
3128
+ display_name=display_name,
3129
+ enabled=enabled,
3130
+ criteria=criteria,
3131
+ statement=statement,
3132
+ incident_behavior=incident_behavior,
3133
+ tags=tags,
3134
+ updated_by=updated_by,
3135
+ now_utc=now_utc,
3136
+ schedule=schedule,
3137
+ )
3138
+ )
3139
+
3140
+ # Return early if we created a new assertion in the merge:
3141
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
3142
+ # We know this is the correct type because we passed the assertion_class parameter
3143
+ assert isinstance(merged_assertion_input_or_created_assertion, SqlAssertion)
3144
+ return merged_assertion_input_or_created_assertion
3145
+
3146
+ # 4. Upsert the assertion and monitor entities:
3147
+ assertion_entity, monitor_entity = (
3148
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
3149
+ )
3150
+ # If assertion upsert fails, we won't try to upsert the monitor
3151
+ self.client.entities.upsert(assertion_entity)
3152
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
3153
+ # try:
3154
+ self.client.entities.upsert(monitor_entity)
3155
+ # except Exception as e:
3156
+ # logger.error(f"Error upserting monitor: {e}")
3157
+ # self.client.entities.delete(assertion_entity)
3158
+ # raise e
3159
+
3160
+ return SqlAssertion._from_entities(assertion_entity, monitor_entity)
3161
+
3162
+
3163
+ def _merge_field(
3164
+ input_field_value: Any,
3165
+ input_field_name: str,
3166
+ validated_assertion_input: _AssertionInput,
3167
+ validated_existing_assertion: _AssertionPublic,
3168
+ existing_entity_value: Optional[Any] = None, # TODO: Can we do better than Any?
3169
+ ) -> Any:
3170
+ """Merge the input field value with any existing entity value or default value.
3171
+
3172
+ The merge logic is as follows:
3173
+ - If the input is None, use the existing value
3174
+ - If the input is not None, use the input value
3175
+ - If the input is an empty list or empty string, still use the input value (falsy values can be used to unset fields)
3176
+ - If the input is a non-empty list or non-empty string, use the input value
3177
+ - If the input is None and the existing value is None, use the default value from _AssertionInput
3178
+
3179
+ Args:
3180
+ input_field_value: The value of the field in the input e.g. passed to the function.
3181
+ input_field_name: The name of the field in the input.
3182
+ validated_assertion_input: The *validated* input to the function.
3183
+ validated_existing_assertion: The *validated* existing assertion from the DataHub instance.
3184
+ existing_entity_value: The value of the field in the existing entity from the DataHub instance, directly retrieved from the entity.
3185
+
3186
+ Returns:
3187
+ The merged value of the field.
3188
+
3189
+ """
3190
+ if input_field_value is None: # Input value default
3191
+ if existing_entity_value is not None: # Existing entity value set
3192
+ return existing_entity_value
3193
+ elif (
3194
+ getattr(validated_existing_assertion, input_field_name) is None
3195
+ ): # Validated existing value not set
3196
+ return getattr(validated_assertion_input, input_field_name)
3197
+ else: # Validated existing value set
3198
+ return getattr(validated_existing_assertion, input_field_name)
3199
+ else: # Input value set
3200
+ return input_field_value
3201
+
3202
+
3203
+ def _print_experimental_warning() -> None:
3204
+ print(
3205
+ "Warning: The assertions client is experimental and under heavy development. Expect breaking changes."
3206
+ )