acryl-datahub-cloud 0.3.11.1rc7__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (94) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  3. acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
  4. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  7. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  8. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  9. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  10. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +29 -13
  11. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  12. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
  13. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2011 -1955
  14. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
  17. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  18. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  19. acryl_datahub_cloud/metadata/schema.avsc +25413 -25425
  20. acryl_datahub_cloud/metadata/schema_classes.py +1316 -791
  21. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  22. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
  23. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  24. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +223 -202
  25. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +36 -7
  26. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  27. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +40 -8
  28. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
  29. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
  30. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  31. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  32. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  33. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  34. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
  35. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  36. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  37. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  38. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  39. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  40. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
  41. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
  42. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
  43. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  44. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  45. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
  46. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  47. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
  48. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
  49. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
  50. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
  51. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  52. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
  53. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
  54. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  55. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
  56. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
  57. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +39 -10
  58. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
  59. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  60. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
  61. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  62. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
  63. acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
  64. acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
  65. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  66. acryl_datahub_cloud/notifications/__init__.py +0 -0
  67. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  68. acryl_datahub_cloud/sdk/__init__.py +39 -0
  69. acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
  70. acryl_datahub_cloud/sdk/assertion/assertion_base.py +1467 -0
  71. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
  72. acryl_datahub_cloud/sdk/assertion/types.py +20 -0
  73. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  74. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1648 -0
  75. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +258 -0
  76. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +914 -0
  77. acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +272 -0
  78. acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +648 -0
  79. acryl_datahub_cloud/sdk/assertions_client.py +3206 -0
  80. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  81. acryl_datahub_cloud/sdk/entities/assertion.py +432 -0
  82. acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
  83. acryl_datahub_cloud/sdk/entities/subscription.py +84 -0
  84. acryl_datahub_cloud/sdk/errors.py +34 -0
  85. acryl_datahub_cloud/sdk/resolver_client.py +39 -0
  86. acryl_datahub_cloud/sdk/subscription_client.py +714 -0
  87. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/METADATA +47 -42
  88. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/RECORD +91 -58
  89. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/WHEEL +1 -1
  90. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/entry_points.txt +1 -0
  91. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
  92. acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
  93. acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
  94. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1467 @@
1
+ """
2
+ This module contains the classes that represent assertions. These
3
+ classes are used to provide a user-friendly interface for creating and
4
+ managing assertions.
5
+
6
+ The actual Assertion Entity classes are defined in `metadata-ingestion/src/datahub/sdk`.
7
+ """
8
+
9
+ import logging
10
+ from abc import ABC, abstractmethod
11
+ from datetime import datetime
12
+ from enum import Enum
13
+ from typing import Optional, Union
14
+
15
+ from typing_extensions import Self
16
+
17
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
18
+ ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
19
+ DEFAULT_DETECTION_MECHANISM,
20
+ DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
21
+ DEFAULT_SCHEDULE,
22
+ DEFAULT_SENSITIVITY,
23
+ AssertionIncidentBehavior,
24
+ DetectionMechanism,
25
+ ExclusionWindowTypes,
26
+ FixedRangeExclusionWindow,
27
+ InferenceSensitivity,
28
+ TimeWindowSizeInputTypes,
29
+ _DetectionMechanismTypes,
30
+ )
31
+ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
32
+ MetricInputType,
33
+ OperatorInputType,
34
+ RangeInputType,
35
+ RangeTypeInputType,
36
+ ValueInputType,
37
+ ValueTypeInputType,
38
+ )
39
+ from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
40
+ SqlAssertionChangeType,
41
+ SqlAssertionCriteria,
42
+ SqlAssertionOperator,
43
+ SqlAssertionType,
44
+ )
45
+ from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
46
+ VolumeAssertionDefinition,
47
+ _VolumeAssertionDefinitionTypes,
48
+ )
49
+ from acryl_datahub_cloud.sdk.entities.assertion import Assertion
50
+ from acryl_datahub_cloud.sdk.entities.monitor import (
51
+ Monitor,
52
+ _get_nested_field_for_entity_with_default,
53
+ )
54
+ from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError, SDKUsageError
55
+ from datahub.emitter.mce_builder import parse_ts_millis
56
+ from datahub.metadata import schema_classes as models
57
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
58
+
59
+ logger = logging.getLogger(__name__)
60
+
61
+
62
+ class AssertionMode(Enum):
63
+ """
64
+ The mode of an assertion, e.g. whether it is active or inactive.
65
+ """
66
+
67
+ # Note: Modeled here after MonitorStatus but called AssertionMode in this user facing interface
68
+ # to keep all naming related to assertions.
69
+ ACTIVE = "ACTIVE"
70
+ INACTIVE = "INACTIVE"
71
+ # PASSIVE = "PASSIVE" # Not supported in the user facing interface.
72
+
73
+
74
+ class _HasSchedule:
75
+ """
76
+ Mixin class that provides schedule functionality for assertions.
77
+ """
78
+
79
+ def __init__(self, schedule: models.CronScheduleClass) -> None:
80
+ self._schedule = schedule
81
+
82
+ @property
83
+ def schedule(self) -> models.CronScheduleClass:
84
+ return self._schedule
85
+
86
+ @staticmethod
87
+ def _get_schedule(
88
+ monitor: Monitor, default: models.CronScheduleClass = DEFAULT_SCHEDULE
89
+ ) -> models.CronScheduleClass:
90
+ """Get the schedule from the monitor."""
91
+ assertion_evaluation_specs = _get_nested_field_for_entity_with_default(
92
+ monitor,
93
+ "info.assertionMonitor.assertions",
94
+ [],
95
+ )
96
+ if len(assertion_evaluation_specs) == 0:
97
+ return default
98
+ assertion_evaluation_spec = assertion_evaluation_specs[0]
99
+ schedule = assertion_evaluation_spec.schedule
100
+ if schedule is None:
101
+ return default
102
+ return schedule
103
+
104
+
105
+ class _HasSmartFunctionality:
106
+ """
107
+ Mixin class that provides smart functionality for assertions.
108
+ """
109
+
110
+ def __init__(
111
+ self,
112
+ *,
113
+ sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
114
+ exclusion_windows: list[ExclusionWindowTypes],
115
+ training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
116
+ ) -> None:
117
+ """
118
+ Initialize the smart functionality mixin.
119
+
120
+ Args:
121
+ sensitivity: The sensitivity of the assertion (low, medium, high).
122
+ exclusion_windows: The exclusion windows of the assertion.
123
+ training_data_lookback_days: The max number of days of data to use for training the assertion.
124
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
125
+ detection_mechanism: The detection mechanism of the assertion.
126
+ **kwargs: Additional arguments to pass to the parent class (_Assertion).
127
+ """
128
+ self._sensitivity = sensitivity
129
+ self._exclusion_windows = exclusion_windows
130
+ self._training_data_lookback_days = training_data_lookback_days
131
+
132
+ @property
133
+ def sensitivity(self) -> InferenceSensitivity:
134
+ return self._sensitivity
135
+
136
+ @property
137
+ def exclusion_windows(self) -> list[ExclusionWindowTypes]:
138
+ return self._exclusion_windows
139
+
140
+ @property
141
+ def training_data_lookback_days(self) -> int:
142
+ return self._training_data_lookback_days
143
+
144
+ @staticmethod
145
+ def _get_sensitivity(monitor: Monitor) -> InferenceSensitivity:
146
+ # 1. Check if the monitor has a sensitivity field
147
+ raw_sensitivity = _get_nested_field_for_entity_with_default(
148
+ monitor,
149
+ "info.assertionMonitor.settings.adjustmentSettings.sensitivity.level",
150
+ DEFAULT_SENSITIVITY,
151
+ )
152
+
153
+ # 2. Convert the raw sensitivity to the SDK sensitivity enum (1-3: LOW, 4-6: MEDIUM, 7-10: HIGH)
154
+ return InferenceSensitivity.parse(raw_sensitivity)
155
+
156
+ @staticmethod
157
+ def _get_exclusion_windows(monitor: Monitor) -> list[ExclusionWindowTypes]:
158
+ # 1. Check if the monitor has an exclusion windows field
159
+ raw_windows = monitor.exclusion_windows or []
160
+
161
+ # 2. Convert the raw exclusion windows to the SDK exclusion windows
162
+ exclusion_windows = []
163
+ for raw_window in raw_windows:
164
+ if raw_window.type == models.AssertionExclusionWindowTypeClass.FIXED_RANGE:
165
+ if raw_window.fixedRange is None:
166
+ logger.warning(
167
+ f"Monitor {monitor.urn} has a fixed range exclusion window with no fixed range, skipping"
168
+ )
169
+ continue
170
+ exclusion_windows.append(
171
+ FixedRangeExclusionWindow(
172
+ start=parse_ts_millis(raw_window.fixedRange.startTimeMillis),
173
+ end=parse_ts_millis(raw_window.fixedRange.endTimeMillis),
174
+ )
175
+ )
176
+ else:
177
+ raise SDKNotYetSupportedError(
178
+ f"AssertionExclusionWindowType {raw_window.type}"
179
+ )
180
+ return exclusion_windows
181
+
182
+ @staticmethod
183
+ def _get_training_data_lookback_days(monitor: Monitor) -> int:
184
+ retrieved = monitor.training_data_lookback_days
185
+ if (
186
+ retrieved is None
187
+ ): # Explicitly check for None since retrieved can be 0 which is falsy
188
+ return ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS
189
+ assert isinstance(retrieved, int)
190
+ return retrieved
191
+
192
+
193
+ class _HasColumnMetricFunctionality:
194
+ """
195
+ Mixin class that provides column metric functionality for assertions.
196
+ """
197
+
198
+ def __init__(
199
+ self,
200
+ column_name: str,
201
+ metric_type: MetricInputType,
202
+ operator: OperatorInputType,
203
+ value: Optional[ValueInputType] = None,
204
+ value_type: Optional[ValueTypeInputType] = None,
205
+ range: Optional[RangeInputType] = None,
206
+ range_type: Optional[RangeTypeInputType] = None,
207
+ ):
208
+ self._column_name = column_name
209
+ self._metric_type = metric_type
210
+ self._operator = operator
211
+ self._value = value
212
+ self._value_type = value_type
213
+ self._range = range
214
+ self._range_type = range_type
215
+
216
+ @property
217
+ def column_name(self) -> str:
218
+ return self._column_name
219
+
220
+ @property
221
+ def metric_type(self) -> MetricInputType:
222
+ return self._metric_type
223
+
224
+ @property
225
+ def operator(self) -> OperatorInputType:
226
+ return self._operator
227
+
228
+ @property
229
+ def value(self) -> Optional[ValueInputType]:
230
+ return self._value
231
+
232
+ @property
233
+ def value_type(self) -> Optional[ValueTypeInputType]:
234
+ return self._value_type
235
+
236
+ @property
237
+ def range(self) -> Optional[RangeInputType]:
238
+ return self._range
239
+
240
+ @property
241
+ def range_type(self) -> Optional[RangeTypeInputType]:
242
+ return self._range_type
243
+
244
+ @staticmethod
245
+ def _get_column_name(assertion: Assertion) -> str:
246
+ column_name = _get_nested_field_for_entity_with_default(
247
+ assertion,
248
+ field_path="info.fieldMetricAssertion.field.path",
249
+ default=None,
250
+ )
251
+ if column_name is None:
252
+ raise SDKUsageError(
253
+ f"Column name is required for column metric assertions. Assertion {assertion.urn} does not have a column name"
254
+ )
255
+ return column_name
256
+
257
+ @staticmethod
258
+ def _get_metric_type(assertion: Assertion) -> MetricInputType:
259
+ metric_type = _get_nested_field_for_entity_with_default(
260
+ assertion,
261
+ field_path="info.fieldMetricAssertion.metric",
262
+ default=None,
263
+ )
264
+ if metric_type is None:
265
+ raise SDKUsageError(
266
+ f"Metric type is required for column metric assertions. Assertion {assertion.urn} does not have a metric type"
267
+ )
268
+ return metric_type
269
+
270
+ @staticmethod
271
+ def _get_operator(assertion: Assertion) -> OperatorInputType:
272
+ operator = _get_nested_field_for_entity_with_default(
273
+ assertion,
274
+ field_path="info.fieldMetricAssertion.operator",
275
+ default=None,
276
+ )
277
+ if operator is None:
278
+ raise SDKUsageError(
279
+ f"Operator is required for column metric assertions. Assertion {assertion.urn} does not have an operator"
280
+ )
281
+ return operator
282
+
283
+ @staticmethod
284
+ def _get_value(assertion: Assertion) -> Optional[ValueInputType]:
285
+ value = _get_nested_field_for_entity_with_default(
286
+ assertion,
287
+ field_path="info.fieldMetricAssertion.parameters.value.value",
288
+ default=None,
289
+ )
290
+ return value
291
+
292
+ @staticmethod
293
+ def _get_value_type(assertion: Assertion) -> Optional[ValueTypeInputType]:
294
+ value_type = _get_nested_field_for_entity_with_default(
295
+ assertion,
296
+ field_path="info.fieldMetricAssertion.parameters.value.type",
297
+ default=None,
298
+ )
299
+ return value_type
300
+
301
+ @staticmethod
302
+ def _get_range(assertion: Assertion) -> Optional[RangeInputType]:
303
+ min_value = _get_nested_field_for_entity_with_default(
304
+ assertion,
305
+ field_path="info.fieldMetricAssertion.parameters.minValue",
306
+ default=None,
307
+ )
308
+ max_value = _get_nested_field_for_entity_with_default(
309
+ assertion,
310
+ field_path="info.fieldMetricAssertion.parameters.maxValue",
311
+ default=None,
312
+ )
313
+
314
+ # If both are None, return None
315
+ if min_value is None and max_value is None:
316
+ return None
317
+
318
+ # Extract the value from the parameter objects if they exist
319
+ if min_value is not None and hasattr(min_value, "value"):
320
+ min_value = min_value.value
321
+ if max_value is not None and hasattr(max_value, "value"):
322
+ max_value = max_value.value
323
+
324
+ return (min_value, max_value)
325
+
326
+ @staticmethod
327
+ def _get_range_type(assertion: Assertion) -> Optional[RangeTypeInputType]:
328
+ min_value_range_type = _get_nested_field_for_entity_with_default(
329
+ assertion,
330
+ field_path="info.fieldMetricAssertion.parameters.minValue.type",
331
+ default=None,
332
+ )
333
+ max_value_range_type = _get_nested_field_for_entity_with_default(
334
+ assertion,
335
+ field_path="info.fieldMetricAssertion.parameters.maxValue.type",
336
+ default=None,
337
+ )
338
+
339
+ # If both are None, return None instead of a tuple of Nones
340
+ if min_value_range_type is None and max_value_range_type is None:
341
+ return None
342
+
343
+ return (min_value_range_type, max_value_range_type)
344
+
345
+
346
+ class _AssertionPublic(ABC):
347
+ """
348
+ Abstract base class that represents a public facing assertion and contains the common properties of all assertions.
349
+ """
350
+
351
+ # TODO: have the individual classes self-declare this
352
+ _SUPPORTED_WITH_FILTER_ASSERTION_TYPES = (
353
+ models.FreshnessAssertionInfoClass,
354
+ models.VolumeAssertionInfoClass,
355
+ models.FieldAssertionInfoClass,
356
+ )
357
+
358
+ def __init__(
359
+ self,
360
+ *,
361
+ urn: AssertionUrn,
362
+ dataset_urn: DatasetUrn,
363
+ display_name: str,
364
+ mode: AssertionMode,
365
+ tags: list[TagUrn],
366
+ incident_behavior: list[AssertionIncidentBehavior],
367
+ detection_mechanism: Optional[
368
+ _DetectionMechanismTypes
369
+ ] = DEFAULT_DETECTION_MECHANISM,
370
+ created_by: Optional[CorpUserUrn] = None,
371
+ created_at: Union[datetime, None] = None,
372
+ updated_by: Optional[CorpUserUrn] = None,
373
+ updated_at: Optional[datetime] = None,
374
+ ):
375
+ """
376
+ Initialize the public facing assertion class.
377
+
378
+ Args:
379
+ urn: The urn of the assertion.
380
+ dataset_urn: The urn of the dataset that the assertion is for.
381
+ display_name: The display name of the assertion.
382
+ mode: The mode of the assertion (active, inactive).
383
+ tags: The tags of the assertion.
384
+ created_by: The urn of the user that created the assertion.
385
+ created_at: The timestamp of when the assertion was created.
386
+ updated_by: The urn of the user that updated the assertion.
387
+ updated_at: The timestamp of when the assertion was updated.
388
+ """
389
+ self._urn = urn
390
+ self._dataset_urn = dataset_urn
391
+ self._display_name = display_name
392
+ self._mode = mode
393
+ self._incident_behavior = incident_behavior
394
+ self._detection_mechanism = detection_mechanism
395
+ self._created_by = created_by
396
+ self._created_at = created_at
397
+ self._updated_by = updated_by
398
+ self._updated_at = updated_at
399
+ self._tags = tags
400
+
401
+ @property
402
+ def urn(self) -> AssertionUrn:
403
+ return self._urn
404
+
405
+ @property
406
+ def dataset_urn(self) -> DatasetUrn:
407
+ return self._dataset_urn
408
+
409
+ @property
410
+ def display_name(self) -> str:
411
+ return self._display_name
412
+
413
+ @property
414
+ def mode(self) -> AssertionMode:
415
+ return self._mode
416
+
417
+ @property
418
+ def incident_behavior(self) -> list[AssertionIncidentBehavior]:
419
+ return self._incident_behavior
420
+
421
+ @property
422
+ def detection_mechanism(self) -> Optional[_DetectionMechanismTypes]:
423
+ return self._detection_mechanism
424
+
425
+ @property
426
+ def created_by(self) -> Optional[CorpUserUrn]:
427
+ return self._created_by
428
+
429
+ @property
430
+ def created_at(self) -> Union[datetime, None]:
431
+ return self._created_at
432
+
433
+ @property
434
+ def updated_by(self) -> Optional[CorpUserUrn]:
435
+ return self._updated_by
436
+
437
+ @property
438
+ def updated_at(self) -> Union[datetime, None]:
439
+ return self._updated_at
440
+
441
+ @property
442
+ def tags(self) -> list[TagUrn]:
443
+ return self._tags
444
+
445
+ @staticmethod
446
+ def _get_incident_behavior(assertion: Assertion) -> list[AssertionIncidentBehavior]:
447
+ incident_behaviors = []
448
+ for action in assertion.on_failure + assertion.on_success:
449
+ if action.type == models.AssertionActionTypeClass.RAISE_INCIDENT:
450
+ incident_behaviors.append(AssertionIncidentBehavior.RAISE_ON_FAIL)
451
+ elif action.type == models.AssertionActionTypeClass.RESOLVE_INCIDENT:
452
+ incident_behaviors.append(AssertionIncidentBehavior.RESOLVE_ON_PASS)
453
+
454
+ return incident_behaviors
455
+
456
+ @staticmethod
457
+ @abstractmethod
458
+ def _get_detection_mechanism(
459
+ assertion: Assertion,
460
+ monitor: Monitor,
461
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
462
+ ) -> Optional[_DetectionMechanismTypes]:
463
+ """Get the detection mechanism from the monitor and assertion.
464
+
465
+ This method should be implemented by each assertion class to handle
466
+ its specific detection mechanism logic.
467
+
468
+ Args:
469
+ assertion: The assertion entity
470
+ monitor: The monitor entity
471
+ default: Default detection mechanism to return if none is found
472
+
473
+ Returns:
474
+ The detection mechanism or default if none is found
475
+ """
476
+ pass
477
+
478
+ @staticmethod
479
+ def _has_valid_monitor_info(monitor: Monitor) -> bool:
480
+ """Check if monitor has valid info and assertion monitor."""
481
+
482
+ def _warn_and_return_false(field_name: str) -> bool:
483
+ logger.warning(
484
+ f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
485
+ )
486
+ return False
487
+
488
+ if monitor.info is None:
489
+ return _warn_and_return_false("info")
490
+ if monitor.info.assertionMonitor is None:
491
+ return _warn_and_return_false("assertionMonitor")
492
+ if (
493
+ monitor.info.assertionMonitor.assertions is None
494
+ or len(monitor.info.assertionMonitor.assertions) == 0
495
+ ):
496
+ return _warn_and_return_false("assertionMonitor.assertions")
497
+
498
+ return True
499
+
500
+ @staticmethod
501
+ def _get_assertion_parameters(
502
+ monitor: Monitor,
503
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
504
+ ) -> Optional[models.AssertionEvaluationParametersClass]:
505
+ """Get the assertion parameters from the monitor."""
506
+ # We know these are not None from _has_valid_monitor_info check
507
+ assert (
508
+ monitor is not None
509
+ and monitor.info is not None
510
+ and monitor.info.assertionMonitor is not None
511
+ )
512
+ assertion_monitor = monitor.info.assertionMonitor
513
+ assert (
514
+ assertion_monitor is not None and assertion_monitor.assertions is not None
515
+ )
516
+ assertions = assertion_monitor.assertions
517
+
518
+ if assertions[0].parameters is None:
519
+ logger.warning(
520
+ f"Monitor {monitor.urn} does not have a assertionMonitor.assertions[0].parameters, defaulting detection mechanism to {default}"
521
+ )
522
+ return None
523
+ return assertions[0].parameters
524
+
525
+ @staticmethod
526
+ def _get_created_by(assertion: Assertion) -> Optional[CorpUserUrn]:
527
+ if assertion.source is None:
528
+ logger.warning(f"Assertion {assertion.urn} does not have a source")
529
+ return None
530
+ if isinstance(assertion.source, models.AssertionSourceClass):
531
+ if assertion.source.created is None:
532
+ logger.warning(
533
+ f"Assertion {assertion.urn} does not have a created by in the source"
534
+ )
535
+ return None
536
+ return CorpUserUrn.from_string(assertion.source.created.actor)
537
+ elif isinstance(assertion.source, models.AssertionSourceTypeClass):
538
+ logger.warning(
539
+ f"Assertion {assertion.urn} has a source type with no created by"
540
+ )
541
+ return None
542
+ return None
543
+
544
+ @staticmethod
545
+ def _get_created_at(assertion: Assertion) -> Union[datetime, None]:
546
+ if assertion.source is None:
547
+ logger.warning(f"Assertion {assertion.urn} does not have a source")
548
+ return None
549
+ if isinstance(assertion.source, models.AssertionSourceClass):
550
+ if assertion.source.created is None:
551
+ logger.warning(
552
+ f"Assertion {assertion.urn} does not have a created by in the source"
553
+ )
554
+ return None
555
+ return parse_ts_millis(assertion.source.created.time)
556
+ elif isinstance(assertion.source, models.AssertionSourceTypeClass):
557
+ logger.warning(
558
+ f"Assertion {assertion.urn} has a source type with no created by"
559
+ )
560
+ return None
561
+ return None
562
+
563
+ @staticmethod
564
+ def _get_updated_by(assertion: Assertion) -> Optional[CorpUserUrn]:
565
+ if assertion.last_updated is None:
566
+ logger.warning(f"Assertion {assertion.urn} does not have a last updated")
567
+ return None
568
+ return CorpUserUrn.from_string(assertion.last_updated.actor)
569
+
570
+ @staticmethod
571
+ def _get_updated_at(assertion: Assertion) -> Union[datetime, None]:
572
+ if assertion.last_updated is None:
573
+ logger.warning(f"Assertion {assertion.urn} does not have a last updated")
574
+ return None
575
+ return parse_ts_millis(assertion.last_updated.time)
576
+
577
+ @staticmethod
578
+ def _get_tags(assertion: Assertion) -> list[TagUrn]:
579
+ return [TagUrn.from_string(t.tag) for t in assertion.tags or []]
580
+
581
+ @staticmethod
582
+ def _get_mode(monitor: Monitor) -> AssertionMode:
583
+ if monitor.info is None:
584
+ logger.warning(
585
+ f"Monitor {monitor.urn} does not have a info, defaulting status to INACTIVE"
586
+ )
587
+ return AssertionMode.INACTIVE
588
+ return AssertionMode(monitor.info.status.mode)
589
+
590
+ @classmethod
591
+ @abstractmethod
592
+ def _from_entities(
593
+ cls,
594
+ assertion: Assertion,
595
+ monitor: Monitor,
596
+ ) -> Self:
597
+ """
598
+ Create an assertion from the assertion and monitor entities.
599
+
600
+ Note: This is a private method since it is intended to be called internally by the client.
601
+ """
602
+ pass
603
+
604
+ @staticmethod
605
+ def _get_additional_filter(assertion: Assertion) -> Optional[str]:
606
+ """Get the additional filter SQL from the assertion."""
607
+ if assertion.info is None:
608
+ logger.warning(
609
+ f"Assertion {assertion.urn} does not have an info, defaulting additional filter to None"
610
+ )
611
+ return None
612
+ if (
613
+ not isinstance(
614
+ assertion.info,
615
+ _AssertionPublic._SUPPORTED_WITH_FILTER_ASSERTION_TYPES,
616
+ )
617
+ or assertion.info.filter is None
618
+ ):
619
+ logger.warning(
620
+ f"Assertion {assertion.urn} does not have a filter, defaulting additional filter to None"
621
+ )
622
+ return None
623
+ if assertion.info.filter.type != models.DatasetFilterTypeClass.SQL:
624
+ raise SDKNotYetSupportedError(
625
+ f"DatasetFilterType {assertion.info.filter.type}"
626
+ )
627
+ return assertion.info.filter.sql
628
+
629
+ @staticmethod
630
+ def _get_field_value_detection_mechanism(
631
+ assertion: Assertion,
632
+ parameters: models.AssertionEvaluationParametersClass,
633
+ ) -> _DetectionMechanismTypes:
634
+ """Get the detection mechanism for field value based freshness."""
635
+ # We know datasetFreshnessParameters is not None from _get_freshness_detection_mechanism check
636
+ assert parameters.datasetFreshnessParameters is not None
637
+ field = parameters.datasetFreshnessParameters.field
638
+
639
+ if field is None or field.kind is None:
640
+ logger.warning(
641
+ f"Monitor does not have valid field info, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
642
+ )
643
+ return DEFAULT_DETECTION_MECHANISM
644
+
645
+ column_name = field.path
646
+ additional_filter = _AssertionPublic._get_additional_filter(assertion)
647
+
648
+ if field.kind == models.FreshnessFieldKindClass.LAST_MODIFIED:
649
+ return DetectionMechanism.LAST_MODIFIED_COLUMN(
650
+ column_name=column_name, additional_filter=additional_filter
651
+ )
652
+ elif field.kind == models.FreshnessFieldKindClass.HIGH_WATERMARK:
653
+ return DetectionMechanism.HIGH_WATERMARK_COLUMN(
654
+ column_name=column_name, additional_filter=additional_filter
655
+ )
656
+ else:
657
+ raise SDKNotYetSupportedError(f"FreshnessFieldKind {field.kind}")
658
+
659
+ @staticmethod
660
+ def _warn_and_return_default_detection_mechanism(
661
+ monitor: Monitor,
662
+ field_name: str,
663
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
664
+ ) -> Optional[_DetectionMechanismTypes]:
665
+ """Helper method to log a warning and return default detection mechanism."""
666
+ logger.warning(
667
+ f"Monitor {monitor.urn} does not have an `{field_name}` field, defaulting detection mechanism to {default}"
668
+ )
669
+ return default
670
+
671
+ @staticmethod
672
+ def _check_valid_monitor_info(
673
+ monitor: Monitor,
674
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
675
+ ) -> Optional[models.AssertionEvaluationParametersClass]:
676
+ """Check if monitor has valid info and get assertion parameters.
677
+
678
+ Returns:
679
+ The assertion parameters if monitor info is valid, None otherwise.
680
+ """
681
+ if not _AssertionPublic._has_valid_monitor_info(monitor):
682
+ return None
683
+
684
+ parameters = _AssertionPublic._get_assertion_parameters(monitor)
685
+ if parameters is None:
686
+ return None
687
+
688
+ return parameters
689
+
690
+ @staticmethod
691
+ def _get_validated_detection_context(
692
+ monitor: Monitor,
693
+ assertion: Assertion,
694
+ expected_parameters_type: str,
695
+ expected_info_class: type,
696
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
697
+ ) -> Optional[models.AssertionEvaluationParametersClass]:
698
+ """
699
+ Validate and extract the detection context (parameters) for detection mechanism logic.
700
+ Returns the parameters if all checks pass, otherwise None.
701
+ """
702
+ parameters = _AssertionPublic._check_valid_monitor_info(monitor, default)
703
+ if parameters is None:
704
+ return None
705
+ if parameters.type != expected_parameters_type:
706
+ logger.warning(
707
+ f"Expected {expected_parameters_type} parameters type, got {parameters.type}, defaulting detection mechanism to {default}"
708
+ )
709
+ return None
710
+ if assertion.info is None:
711
+ _AssertionPublic._warn_and_return_default_detection_mechanism(
712
+ monitor, "info", default
713
+ )
714
+ return None
715
+ if not isinstance(assertion.info, expected_info_class):
716
+ logger.warning(
717
+ f"Expected {expected_info_class.__name__}, got {type(assertion.info).__name__}, defaulting detection mechanism to {default}"
718
+ )
719
+ return None
720
+ return parameters
721
+
722
+
723
+ class SmartFreshnessAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
724
+ """
725
+ A class that represents a smart freshness assertion.
726
+ """
727
+
728
+ def __init__(
729
+ self,
730
+ *,
731
+ urn: AssertionUrn,
732
+ dataset_urn: DatasetUrn,
733
+ display_name: str,
734
+ mode: AssertionMode,
735
+ schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
736
+ sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
737
+ exclusion_windows: list[ExclusionWindowTypes],
738
+ training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
739
+ incident_behavior: list[AssertionIncidentBehavior],
740
+ detection_mechanism: Optional[
741
+ _DetectionMechanismTypes
742
+ ] = DEFAULT_DETECTION_MECHANISM,
743
+ tags: list[TagUrn],
744
+ created_by: Optional[CorpUserUrn] = None,
745
+ created_at: Union[datetime, None] = None,
746
+ updated_by: Optional[CorpUserUrn] = None,
747
+ updated_at: Optional[datetime] = None,
748
+ ):
749
+ """
750
+ Initialize a smart freshness assertion.
751
+
752
+ Note: Values can be accessed, but not set on the assertion object.
753
+ To update an assertion, use the `upsert_*` method.
754
+ Args:
755
+ urn: The urn of the assertion.
756
+ dataset_urn: The urn of the dataset that the assertion is for.
757
+ display_name: The display name of the assertion.
758
+ mode: The mode of the assertion (active, inactive).
759
+ schedule: The schedule of the assertion.
760
+ sensitivity: The sensitivity of the assertion (low, medium, high).
761
+ exclusion_windows: The exclusion windows of the assertion.
762
+ training_data_lookback_days: The max number of days of data to use for training the assertion.
763
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
764
+ detection_mechanism: The detection mechanism of the assertion.
765
+ tags: The tags applied to the assertion.
766
+ created_by: The urn of the user that created the assertion.
767
+ created_at: The timestamp of when the assertion was created.
768
+ updated_by: The urn of the user that updated the assertion.
769
+ updated_at: The timestamp of when the assertion was updated.
770
+ """
771
+ # Initialize the mixins first
772
+ _HasSchedule.__init__(self, schedule=schedule)
773
+ _HasSmartFunctionality.__init__(
774
+ self,
775
+ sensitivity=sensitivity,
776
+ exclusion_windows=exclusion_windows,
777
+ training_data_lookback_days=training_data_lookback_days,
778
+ )
779
+ # Then initialize the parent class
780
+ _AssertionPublic.__init__(
781
+ self,
782
+ urn=urn,
783
+ dataset_urn=dataset_urn,
784
+ display_name=display_name,
785
+ mode=mode,
786
+ incident_behavior=incident_behavior,
787
+ detection_mechanism=detection_mechanism,
788
+ created_by=created_by,
789
+ created_at=created_at,
790
+ updated_by=updated_by,
791
+ updated_at=updated_at,
792
+ tags=tags,
793
+ )
794
+
795
+ @classmethod
796
+ def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
797
+ """
798
+ Create a smart freshness assertion from the assertion and monitor entities.
799
+
800
+ Note: This is a private method since it is intended to be called internally by the client.
801
+ """
802
+ return cls(
803
+ urn=assertion.urn,
804
+ dataset_urn=assertion.dataset,
805
+ display_name=assertion.description or "",
806
+ mode=cls._get_mode(monitor),
807
+ schedule=cls._get_schedule(monitor),
808
+ sensitivity=cls._get_sensitivity(monitor),
809
+ exclusion_windows=cls._get_exclusion_windows(monitor),
810
+ training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
811
+ incident_behavior=cls._get_incident_behavior(assertion),
812
+ detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
813
+ created_by=cls._get_created_by(assertion),
814
+ created_at=cls._get_created_at(assertion),
815
+ updated_by=cls._get_updated_by(assertion),
816
+ updated_at=cls._get_updated_at(assertion),
817
+ tags=cls._get_tags(assertion),
818
+ )
819
+
820
+ @staticmethod
821
+ def _get_detection_mechanism(
822
+ assertion: Assertion,
823
+ monitor: Monitor,
824
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
825
+ ) -> Optional[_DetectionMechanismTypes]:
826
+ """Get the detection mechanism for freshness assertions."""
827
+ parameters = _AssertionPublic._get_validated_detection_context(
828
+ monitor,
829
+ assertion,
830
+ models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
831
+ models.FreshnessAssertionInfoClass,
832
+ default,
833
+ )
834
+ if parameters is None:
835
+ return default
836
+ if parameters.datasetFreshnessParameters is None:
837
+ logger.warning(
838
+ f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
839
+ )
840
+ return default
841
+ source_type = parameters.datasetFreshnessParameters.sourceType
842
+ if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
843
+ return DetectionMechanism.INFORMATION_SCHEMA
844
+ elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
845
+ return DetectionMechanism.AUDIT_LOG
846
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
847
+ return _AssertionPublic._get_field_value_detection_mechanism(
848
+ assertion, parameters
849
+ )
850
+ elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
851
+ return DetectionMechanism.DATAHUB_OPERATION
852
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
853
+ raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
854
+ else:
855
+ raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
856
+
857
+
858
+ class SmartVolumeAssertion(_HasSchedule, _HasSmartFunctionality, _AssertionPublic):
859
+ """
860
+ A class that represents a smart volume assertion.
861
+ """
862
+
863
+ def __init__(
864
+ self,
865
+ *,
866
+ urn: AssertionUrn,
867
+ dataset_urn: DatasetUrn,
868
+ display_name: str,
869
+ mode: AssertionMode,
870
+ schedule: models.CronScheduleClass,
871
+ sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
872
+ exclusion_windows: list[ExclusionWindowTypes],
873
+ training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
874
+ incident_behavior: list[AssertionIncidentBehavior],
875
+ detection_mechanism: Optional[
876
+ _DetectionMechanismTypes
877
+ ] = DEFAULT_DETECTION_MECHANISM,
878
+ tags: list[TagUrn],
879
+ created_by: Optional[CorpUserUrn] = None,
880
+ created_at: Union[datetime, None] = None,
881
+ updated_by: Optional[CorpUserUrn] = None,
882
+ updated_at: Optional[datetime] = None,
883
+ ):
884
+ """
885
+ Initialize a smart volume assertion.
886
+
887
+ Note: Values can be accessed, but not set on the assertion object.
888
+ To update an assertion, use the `upsert_*` method.
889
+ Args:
890
+ urn: The urn of the assertion.
891
+ dataset_urn: The urn of the dataset that the assertion is for.
892
+ display_name: The display name of the assertion.
893
+ mode: The mode of the assertion (active, inactive).
894
+ schedule: The schedule of the assertion.
895
+ sensitivity: The sensitivity of the assertion (low, medium, high).
896
+ exclusion_windows: The exclusion windows of the assertion.
897
+ training_data_lookback_days: The max number of days of data to use for training the assertion.
898
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
899
+ detection_mechanism: The detection mechanism of the assertion.
900
+ tags: The tags applied to the assertion.
901
+ created_by: The urn of the user that created the assertion.
902
+ created_at: The timestamp of when the assertion was created.
903
+ updated_by: The urn of the user that updated the assertion.
904
+ updated_at: The timestamp of when the assertion was updated.
905
+ """
906
+ # Initialize the mixins first
907
+ _HasSchedule.__init__(self, schedule=schedule)
908
+ _HasSmartFunctionality.__init__(
909
+ self,
910
+ sensitivity=sensitivity,
911
+ exclusion_windows=exclusion_windows,
912
+ training_data_lookback_days=training_data_lookback_days,
913
+ )
914
+ # Then initialize the parent class
915
+ _AssertionPublic.__init__(
916
+ self,
917
+ urn=urn,
918
+ dataset_urn=dataset_urn,
919
+ display_name=display_name,
920
+ mode=mode,
921
+ incident_behavior=incident_behavior,
922
+ detection_mechanism=detection_mechanism,
923
+ created_by=created_by,
924
+ created_at=created_at,
925
+ updated_by=updated_by,
926
+ updated_at=updated_at,
927
+ tags=tags,
928
+ )
929
+
930
+ @classmethod
931
+ def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
932
+ """
933
+ Create a smart freshness assertion from the assertion and monitor entities.
934
+
935
+ Note: This is a private method since it is intended to be called internally by the client.
936
+ """
937
+ return cls(
938
+ urn=assertion.urn,
939
+ dataset_urn=assertion.dataset,
940
+ display_name=assertion.description or "",
941
+ mode=cls._get_mode(monitor),
942
+ schedule=cls._get_schedule(monitor),
943
+ sensitivity=cls._get_sensitivity(monitor),
944
+ exclusion_windows=cls._get_exclusion_windows(monitor),
945
+ training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
946
+ incident_behavior=cls._get_incident_behavior(assertion),
947
+ detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
948
+ created_by=cls._get_created_by(assertion),
949
+ created_at=cls._get_created_at(assertion),
950
+ updated_by=cls._get_updated_by(assertion),
951
+ updated_at=cls._get_updated_at(assertion),
952
+ tags=cls._get_tags(assertion),
953
+ )
954
+
955
+ @staticmethod
956
+ def _get_detection_mechanism(
957
+ assertion: Assertion,
958
+ monitor: Monitor,
959
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
960
+ ) -> Optional[_DetectionMechanismTypes]:
961
+ """Get the detection mechanism for volume assertions."""
962
+ parameters = _AssertionPublic._get_validated_detection_context(
963
+ monitor,
964
+ assertion,
965
+ models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
966
+ models.VolumeAssertionInfoClass,
967
+ default,
968
+ )
969
+ if parameters is None:
970
+ return default
971
+ if parameters.datasetVolumeParameters is None:
972
+ logger.warning(
973
+ f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
974
+ )
975
+ if default is None:
976
+ return DEFAULT_DETECTION_MECHANISM
977
+ else:
978
+ return default
979
+ source_type = parameters.datasetVolumeParameters.sourceType
980
+ if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
981
+ return DetectionMechanism.INFORMATION_SCHEMA
982
+ elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
983
+ additional_filter = _AssertionPublic._get_additional_filter(assertion)
984
+ return DetectionMechanism.QUERY(additional_filter=additional_filter)
985
+ elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
986
+ return DetectionMechanism.DATASET_PROFILE
987
+ else:
988
+ raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
989
+
990
+
991
+ class VolumeAssertion(_HasSchedule, _AssertionPublic):
992
+ """
993
+ A class that represents a volume assertion.
994
+ """
995
+
996
+ def __init__(
997
+ self,
998
+ *,
999
+ urn: AssertionUrn,
1000
+ dataset_urn: DatasetUrn,
1001
+ display_name: str,
1002
+ mode: AssertionMode,
1003
+ schedule: models.CronScheduleClass,
1004
+ definition: _VolumeAssertionDefinitionTypes,
1005
+ tags: list[TagUrn],
1006
+ incident_behavior: list[AssertionIncidentBehavior],
1007
+ detection_mechanism: Optional[
1008
+ _DetectionMechanismTypes
1009
+ ] = DEFAULT_DETECTION_MECHANISM,
1010
+ created_by: Optional[CorpUserUrn] = None,
1011
+ created_at: Union[datetime, None] = None,
1012
+ updated_by: Optional[CorpUserUrn] = None,
1013
+ updated_at: Optional[datetime] = None,
1014
+ ):
1015
+ """
1016
+ Initialize a volume assertion.
1017
+
1018
+ Note: Values can be accessed, but not set on the assertion object.
1019
+ To update an assertion, use the `upsert_*` method.
1020
+ Args:
1021
+ urn: The urn of the assertion.
1022
+ dataset_urn: The urn of the dataset that the assertion is for.
1023
+ display_name: The display name of the assertion.
1024
+ mode: The mode of the assertion (active, inactive).
1025
+ schedule: The schedule of the assertion.
1026
+ definition: The volume assertion definition (RowCountTotal or RowCountChange).
1027
+ tags: The tags applied to the assertion.
1028
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
1029
+ detection_mechanism: The detection mechanism of the assertion.
1030
+ created_by: The urn of the user that created the assertion.
1031
+ created_at: The timestamp of when the assertion was created.
1032
+ updated_by: The urn of the user that updated the assertion.
1033
+ updated_at: The timestamp of when the assertion was updated.
1034
+ """
1035
+ _HasSchedule.__init__(self, schedule=schedule)
1036
+ _AssertionPublic.__init__(
1037
+ self,
1038
+ urn=urn,
1039
+ dataset_urn=dataset_urn,
1040
+ display_name=display_name,
1041
+ mode=mode,
1042
+ incident_behavior=incident_behavior,
1043
+ detection_mechanism=detection_mechanism,
1044
+ created_by=created_by,
1045
+ created_at=created_at,
1046
+ updated_by=updated_by,
1047
+ updated_at=updated_at,
1048
+ tags=tags,
1049
+ )
1050
+ self._definition = definition
1051
+
1052
+ @property
1053
+ def definition(self) -> _VolumeAssertionDefinitionTypes:
1054
+ return self._definition
1055
+
1056
+ @staticmethod
1057
+ def _get_volume_definition(
1058
+ assertion: Assertion,
1059
+ ) -> _VolumeAssertionDefinitionTypes:
1060
+ """Get volume assertion definition from a DataHub assertion entity."""
1061
+ return VolumeAssertionDefinition.from_assertion(assertion)
1062
+
1063
+ @staticmethod
1064
+ def _get_detection_mechanism(
1065
+ assertion: Assertion,
1066
+ monitor: Monitor,
1067
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
1068
+ ) -> Optional[_DetectionMechanismTypes]:
1069
+ """Get the detection mechanism for volume assertions."""
1070
+ parameters = _AssertionPublic._get_validated_detection_context(
1071
+ monitor,
1072
+ assertion,
1073
+ models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
1074
+ models.VolumeAssertionInfoClass,
1075
+ default,
1076
+ )
1077
+ if parameters is None:
1078
+ return default
1079
+ if parameters.datasetVolumeParameters is None:
1080
+ logger.warning(
1081
+ f"Monitor does not have datasetVolumeParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
1082
+ )
1083
+ if default is None:
1084
+ return DEFAULT_DETECTION_MECHANISM
1085
+ else:
1086
+ return default
1087
+ source_type = parameters.datasetVolumeParameters.sourceType
1088
+ if source_type == models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA:
1089
+ return DetectionMechanism.INFORMATION_SCHEMA
1090
+ elif source_type == models.DatasetVolumeSourceTypeClass.QUERY:
1091
+ additional_filter = _AssertionPublic._get_additional_filter(assertion)
1092
+ return DetectionMechanism.QUERY(additional_filter=additional_filter)
1093
+ elif source_type == models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE:
1094
+ return DetectionMechanism.DATASET_PROFILE
1095
+ else:
1096
+ raise SDKNotYetSupportedError(f"DatasetVolumeSourceType {source_type}")
1097
+
1098
+ @classmethod
1099
+ def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
1100
+ """
1101
+ Create a volume assertion from the assertion and monitor entities.
1102
+ """
1103
+ return cls(
1104
+ urn=assertion.urn,
1105
+ dataset_urn=assertion.dataset,
1106
+ display_name=assertion.description or "",
1107
+ mode=cls._get_mode(monitor),
1108
+ schedule=cls._get_schedule(monitor),
1109
+ definition=cls._get_volume_definition(assertion),
1110
+ incident_behavior=cls._get_incident_behavior(assertion),
1111
+ detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
1112
+ created_by=cls._get_created_by(assertion),
1113
+ created_at=cls._get_created_at(assertion),
1114
+ updated_by=cls._get_updated_by(assertion),
1115
+ updated_at=cls._get_updated_at(assertion),
1116
+ tags=cls._get_tags(assertion),
1117
+ )
1118
+
1119
+
1120
+ class FreshnessAssertion(_HasSchedule, _AssertionPublic):
1121
+ """
1122
+ A class that represents a freshness assertion.
1123
+ """
1124
+
1125
+ def __init__(
1126
+ self,
1127
+ *,
1128
+ urn: AssertionUrn,
1129
+ dataset_urn: DatasetUrn,
1130
+ display_name: str,
1131
+ mode: AssertionMode,
1132
+ schedule: models.CronScheduleClass,
1133
+ freshness_schedule_check_type: Union[
1134
+ str, models.FreshnessAssertionScheduleTypeClass
1135
+ ],
1136
+ lookback_window: Optional[TimeWindowSizeInputTypes],
1137
+ tags: list[TagUrn],
1138
+ incident_behavior: list[AssertionIncidentBehavior],
1139
+ detection_mechanism: Optional[
1140
+ _DetectionMechanismTypes
1141
+ ] = DEFAULT_DETECTION_MECHANISM,
1142
+ created_by: Optional[CorpUserUrn] = None,
1143
+ created_at: Union[datetime, None] = None,
1144
+ updated_by: Optional[CorpUserUrn] = None,
1145
+ updated_at: Optional[datetime] = None,
1146
+ ):
1147
+ """
1148
+ Initialize a freshness assertion.
1149
+
1150
+ Note: Values can be accessed, but not set on the assertion object.
1151
+ To update an assertion, use the `upsert_*` method.
1152
+ Args:
1153
+ urn: The urn of the assertion.
1154
+ dataset_urn: The urn of the dataset that the assertion is for.
1155
+ display_name: The display name of the assertion.
1156
+ mode: The mode of the assertion (active, inactive).
1157
+ schedule: The schedule of the assertion.
1158
+ freshness_schedule_check_type: The type of freshness schedule check to be used for the assertion.
1159
+ lookback_window: The lookback window to be used for the assertion.
1160
+ tags: The tags applied to the assertion.
1161
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
1162
+ detection_mechanism: The detection mechanism of the assertion.
1163
+ created_by: The urn of the user that created the assertion.
1164
+ created_at: The timestamp of when the assertion was created.
1165
+ updated_by: The urn of the user that updated the assertion.
1166
+ updated_at: The timestamp of when the assertion was updated.
1167
+ """
1168
+ _HasSchedule.__init__(self, schedule=schedule)
1169
+ _AssertionPublic.__init__(
1170
+ self,
1171
+ urn=urn,
1172
+ dataset_urn=dataset_urn,
1173
+ display_name=display_name,
1174
+ mode=mode,
1175
+ incident_behavior=incident_behavior,
1176
+ detection_mechanism=detection_mechanism,
1177
+ created_by=created_by,
1178
+ created_at=created_at,
1179
+ updated_by=updated_by,
1180
+ updated_at=updated_at,
1181
+ tags=tags,
1182
+ )
1183
+ self._freshness_schedule_check_type = freshness_schedule_check_type
1184
+ self._lookback_window = lookback_window
1185
+
1186
+ @property
1187
+ def freshness_schedule_check_type(
1188
+ self,
1189
+ ) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
1190
+ return self._freshness_schedule_check_type
1191
+
1192
+ @property
1193
+ def lookback_window(self) -> Optional[TimeWindowSizeInputTypes]:
1194
+ return self._lookback_window
1195
+
1196
+ @staticmethod
1197
+ def _get_freshness_schedule_check_type(
1198
+ assertion: Assertion,
1199
+ ) -> Union[str, models.FreshnessAssertionScheduleTypeClass]:
1200
+ if assertion.info is None:
1201
+ raise SDKNotYetSupportedError(
1202
+ f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
1203
+ )
1204
+ if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
1205
+ if assertion.info.schedule is None:
1206
+ raise SDKNotYetSupportedError(
1207
+ f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
1208
+ )
1209
+ return assertion.info.schedule.type
1210
+ else:
1211
+ raise SDKNotYetSupportedError(
1212
+ f"Assertion {assertion.urn} is not a freshness assertion"
1213
+ )
1214
+
1215
+ @staticmethod
1216
+ def _get_lookback_window(
1217
+ assertion: Assertion,
1218
+ ) -> Optional[models.FixedIntervalScheduleClass]:
1219
+ if assertion.info is None:
1220
+ raise SDKNotYetSupportedError(
1221
+ f"Assertion {assertion.urn} does not have a freshness assertion info, which is not supported"
1222
+ )
1223
+ if isinstance(assertion.info, models.FreshnessAssertionInfoClass):
1224
+ if assertion.info.schedule is None:
1225
+ raise SDKNotYetSupportedError(
1226
+ f"Traditional freshness assertion {assertion.urn} does not have a schedule, which is not supported"
1227
+ )
1228
+ return assertion.info.schedule.fixedInterval
1229
+ else:
1230
+ raise SDKNotYetSupportedError(
1231
+ f"Assertion {assertion.urn} is not a freshness assertion"
1232
+ )
1233
+
1234
+ @staticmethod
1235
+ def _get_detection_mechanism(
1236
+ assertion: Assertion,
1237
+ monitor: Monitor,
1238
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
1239
+ ) -> Optional[_DetectionMechanismTypes]:
1240
+ """Get the detection mechanism for freshness assertions."""
1241
+ parameters = _AssertionPublic._get_validated_detection_context(
1242
+ monitor,
1243
+ assertion,
1244
+ models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
1245
+ models.FreshnessAssertionInfoClass,
1246
+ default,
1247
+ )
1248
+ if parameters is None:
1249
+ return default
1250
+ if parameters.datasetFreshnessParameters is None:
1251
+ logger.warning(
1252
+ f"Monitor does not have datasetFreshnessParameters, defaulting detection mechanism to {DEFAULT_DETECTION_MECHANISM}"
1253
+ )
1254
+ return default
1255
+ source_type = parameters.datasetFreshnessParameters.sourceType
1256
+ if source_type == models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA:
1257
+ return DetectionMechanism.INFORMATION_SCHEMA
1258
+ elif source_type == models.DatasetFreshnessSourceTypeClass.AUDIT_LOG:
1259
+ return DetectionMechanism.AUDIT_LOG
1260
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FIELD_VALUE:
1261
+ return _AssertionPublic._get_field_value_detection_mechanism(
1262
+ assertion, parameters
1263
+ )
1264
+ elif source_type == models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION:
1265
+ return DetectionMechanism.DATAHUB_OPERATION
1266
+ elif source_type == models.DatasetFreshnessSourceTypeClass.FILE_METADATA:
1267
+ raise SDKNotYetSupportedError("FILE_METADATA DatasetFreshnessSourceType")
1268
+ else:
1269
+ raise SDKNotYetSupportedError(f"DatasetFreshnessSourceType {source_type}")
1270
+
1271
+ @classmethod
1272
+ def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
1273
+ """
1274
+ Create a freshness assertion from the assertion and monitor entities.
1275
+ """
1276
+ return cls(
1277
+ urn=assertion.urn,
1278
+ dataset_urn=assertion.dataset,
1279
+ display_name=assertion.description or "",
1280
+ mode=cls._get_mode(monitor),
1281
+ schedule=cls._get_schedule(monitor),
1282
+ freshness_schedule_check_type=cls._get_freshness_schedule_check_type(
1283
+ assertion
1284
+ ),
1285
+ lookback_window=cls._get_lookback_window(assertion),
1286
+ incident_behavior=cls._get_incident_behavior(assertion),
1287
+ detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
1288
+ created_by=cls._get_created_by(assertion),
1289
+ created_at=cls._get_created_at(assertion),
1290
+ updated_by=cls._get_updated_by(assertion),
1291
+ updated_at=cls._get_updated_at(assertion),
1292
+ tags=cls._get_tags(assertion),
1293
+ )
1294
+
1295
+
1296
+ class SqlAssertion(_AssertionPublic, _HasSchedule):
1297
+ """
1298
+ A class that represents a SQL assertion.
1299
+ """
1300
+
1301
+ def __init__(
1302
+ self,
1303
+ *,
1304
+ urn: AssertionUrn,
1305
+ dataset_urn: DatasetUrn,
1306
+ display_name: str,
1307
+ mode: AssertionMode,
1308
+ statement: str,
1309
+ criteria: SqlAssertionCriteria,
1310
+ schedule: models.CronScheduleClass,
1311
+ tags: list[TagUrn],
1312
+ incident_behavior: list[AssertionIncidentBehavior],
1313
+ created_by: Optional[CorpUserUrn] = None,
1314
+ created_at: Union[datetime, None] = None,
1315
+ updated_by: Optional[CorpUserUrn] = None,
1316
+ updated_at: Optional[datetime] = None,
1317
+ ):
1318
+ """
1319
+ Initialize a SQL assertion.
1320
+
1321
+ Note: Values can be accessed, but not set on the assertion object.
1322
+ To update an assertion, use the `upsert_*` method.
1323
+ Args:
1324
+ urn: The urn of the assertion.
1325
+ dataset_urn: The urn of the dataset that the assertion is for.
1326
+ display_name: The display name of the assertion.
1327
+ mode: The mode of the assertion (active, inactive).
1328
+ statement: The SQL statement to be used for the assertion.
1329
+ criteria: The criteria to be used for the assertion.
1330
+ schedule: The schedule of the assertion.
1331
+ tags: The tags applied to the assertion.
1332
+ incident_behavior: Whether to raise or resolve an incident when the assertion fails / passes.
1333
+ created_by: The urn of the user that created the assertion.
1334
+ created_at: The timestamp of when the assertion was created.
1335
+ updated_by: The urn of the user that updated the assertion.
1336
+ updated_at: The timestamp of when the assertion was updated.
1337
+ """
1338
+ # Initialize the mixins first
1339
+ _AssertionPublic.__init__(
1340
+ self,
1341
+ urn=urn,
1342
+ dataset_urn=dataset_urn,
1343
+ display_name=display_name,
1344
+ mode=mode,
1345
+ tags=tags,
1346
+ incident_behavior=incident_behavior,
1347
+ created_by=created_by,
1348
+ created_at=created_at,
1349
+ updated_by=updated_by,
1350
+ updated_at=updated_at,
1351
+ )
1352
+ _HasSchedule.__init__(self, schedule=schedule)
1353
+ # Then initialize the parent class
1354
+ self._statement = statement
1355
+ self._criteria = criteria
1356
+
1357
+ @property
1358
+ def statement(self) -> str:
1359
+ return self._statement
1360
+
1361
+ @property
1362
+ def criteria_type(self) -> Union[SqlAssertionType, str]:
1363
+ return self._criteria.type
1364
+
1365
+ @property
1366
+ def criteria_change_type(self) -> Optional[Union[SqlAssertionChangeType, str]]:
1367
+ return self._criteria.change_type
1368
+
1369
+ @property
1370
+ def criteria_operator(self) -> Union[SqlAssertionOperator, str]:
1371
+ return self._criteria.operator
1372
+
1373
+ @property
1374
+ def criteria_parameters(
1375
+ self,
1376
+ ) -> Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]:
1377
+ return self._criteria.parameters
1378
+
1379
+ @staticmethod
1380
+ def _get_detection_mechanism(
1381
+ assertion: Assertion,
1382
+ monitor: Monitor,
1383
+ default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
1384
+ ) -> Optional[_DetectionMechanismTypes]:
1385
+ """Sql assertions do not have a detection mechanism."""
1386
+ return None
1387
+
1388
+ @staticmethod
1389
+ def _get_statement(assertion: Assertion) -> str:
1390
+ if assertion.info is None:
1391
+ raise SDKNotYetSupportedError(
1392
+ f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
1393
+ )
1394
+ if isinstance(assertion.info, models.SqlAssertionInfoClass):
1395
+ return assertion.info.statement
1396
+ else:
1397
+ raise SDKNotYetSupportedError(
1398
+ f"Assertion {assertion.urn} is not a SQL assertion"
1399
+ )
1400
+
1401
+ @staticmethod
1402
+ def _get_criteria(assertion: Assertion) -> SqlAssertionCriteria:
1403
+ if assertion.info is None:
1404
+ raise SDKNotYetSupportedError(
1405
+ f"Assertion {assertion.urn} does not have a SQL assertion info, which is not supported"
1406
+ )
1407
+ if isinstance(assertion.info, models.SqlAssertionInfoClass):
1408
+ parameters: Union[float, tuple[float, float]]
1409
+ if assertion.info.parameters.value is not None:
1410
+ parameters = float(assertion.info.parameters.value.value)
1411
+ elif (
1412
+ assertion.info.parameters.maxValue is not None
1413
+ and assertion.info.parameters.minValue is not None
1414
+ ):
1415
+ # min and max values are in the order of min, max
1416
+ parameters = (
1417
+ float(assertion.info.parameters.minValue.value),
1418
+ float(assertion.info.parameters.maxValue.value),
1419
+ )
1420
+ else:
1421
+ raise SDKNotYetSupportedError(
1422
+ f"Assertion {assertion.urn} does not have a valid parameters for the SQL assertion"
1423
+ )
1424
+
1425
+ return SqlAssertionCriteria(
1426
+ type=assertion.info.type
1427
+ if isinstance(assertion.info.type, str)
1428
+ else str(assertion.info.type),
1429
+ change_type=assertion.info.changeType
1430
+ if assertion.info.changeType is None
1431
+ else (
1432
+ assertion.info.changeType
1433
+ if isinstance(assertion.info.changeType, str)
1434
+ else str(assertion.info.changeType)
1435
+ ),
1436
+ operator=assertion.info.operator
1437
+ if isinstance(assertion.info.operator, str)
1438
+ else str(assertion.info.operator),
1439
+ parameters=parameters,
1440
+ )
1441
+ else:
1442
+ raise SDKNotYetSupportedError(
1443
+ f"Assertion {assertion.urn} is not a SQL assertion"
1444
+ )
1445
+
1446
+ @classmethod
1447
+ def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
1448
+ """
1449
+ Create a SQL assertion from the assertion and monitor entities.
1450
+ """
1451
+ return cls(
1452
+ urn=assertion.urn,
1453
+ dataset_urn=assertion.dataset,
1454
+ display_name=assertion.description or "",
1455
+ mode=cls._get_mode(monitor),
1456
+ statement=cls._get_statement(assertion),
1457
+ criteria=cls._get_criteria(assertion),
1458
+ schedule=cls._get_schedule(
1459
+ monitor, default=DEFAULT_EVERY_SIX_HOURS_SCHEDULE
1460
+ ),
1461
+ tags=cls._get_tags(assertion),
1462
+ incident_behavior=cls._get_incident_behavior(assertion),
1463
+ created_by=cls._get_created_by(assertion),
1464
+ created_at=cls._get_created_at(assertion),
1465
+ updated_by=cls._get_updated_by(assertion),
1466
+ updated_at=cls._get_updated_at(assertion),
1467
+ )