acryl-datahub-cloud 0.3.11.1rc7__py3-none-any.whl → 0.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (94) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  3. acryl_datahub_cloud/datahub_forms_notifications/__init__.py +0 -0
  4. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +569 -0
  5. acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
  6. acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
  7. acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
  8. acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
  9. acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
  10. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +29 -13
  11. acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
  12. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
  13. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2011 -1955
  14. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +8 -0
  17. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
  18. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  19. acryl_datahub_cloud/metadata/schema.avsc +25413 -25425
  20. acryl_datahub_cloud/metadata/schema_classes.py +1316 -791
  21. acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
  22. acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
  23. acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
  24. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +223 -202
  25. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +36 -7
  26. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  27. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +40 -8
  28. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
  29. acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
  30. acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
  31. acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
  32. acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
  33. acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
  34. acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
  35. acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
  36. acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
  37. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  38. acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  39. acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
  40. acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
  41. acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
  42. acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
  43. acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
  44. acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
  45. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +3 -1
  46. acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
  47. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +30 -0
  48. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
  49. acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
  50. acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
  51. acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  52. acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
  53. acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
  54. acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  55. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
  56. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
  57. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +39 -10
  58. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
  59. acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
  60. acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
  61. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  62. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
  63. acryl_datahub_cloud/metadata/schemas/SubscriptionKey.avsc +2 -1
  64. acryl_datahub_cloud/metadata/schemas/UsageFeatures.avsc +10 -0
  65. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  66. acryl_datahub_cloud/notifications/__init__.py +0 -0
  67. acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
  68. acryl_datahub_cloud/sdk/__init__.py +39 -0
  69. acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
  70. acryl_datahub_cloud/sdk/assertion/assertion_base.py +1467 -0
  71. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
  72. acryl_datahub_cloud/sdk/assertion/types.py +20 -0
  73. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  74. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +1648 -0
  75. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +258 -0
  76. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +914 -0
  77. acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +272 -0
  78. acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +648 -0
  79. acryl_datahub_cloud/sdk/assertions_client.py +3206 -0
  80. acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
  81. acryl_datahub_cloud/sdk/entities/assertion.py +432 -0
  82. acryl_datahub_cloud/sdk/entities/monitor.py +291 -0
  83. acryl_datahub_cloud/sdk/entities/subscription.py +84 -0
  84. acryl_datahub_cloud/sdk/errors.py +34 -0
  85. acryl_datahub_cloud/sdk/resolver_client.py +39 -0
  86. acryl_datahub_cloud/sdk/subscription_client.py +714 -0
  87. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/METADATA +47 -42
  88. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/RECORD +91 -58
  89. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/WHEEL +1 -1
  90. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/entry_points.txt +1 -0
  91. acryl_datahub_cloud/_sdk_extras/__init__.py +0 -4
  92. acryl_datahub_cloud/_sdk_extras/assertion.py +0 -15
  93. acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -23
  94. {acryl_datahub_cloud-0.3.11.1rc7.dist-info → acryl_datahub_cloud-0.3.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1648 @@
1
+ """
2
+ This file contains the AssertionInput class and related classes, which are used to
3
+ validate and represent the input for creating an Assertion in DataHub.
4
+ """
5
+
6
+ import random
7
+ import string
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
10
+ from datetime import datetime
11
+ from enum import Enum
12
+ from typing import Callable, Literal, Optional, Type, TypeAlias, TypeVar, Union
13
+
14
+ import pydantic
15
+ import pytz
16
+ import tzlocal
17
+ from avrogen.dict_wrapper import DictWrapper
18
+ from croniter import croniter
19
+ from pydantic import BaseModel, Extra, ValidationError
20
+
21
+ from acryl_datahub_cloud.sdk.entities.assertion import (
22
+ Assertion,
23
+ AssertionActionsInputType,
24
+ AssertionInfoInputType,
25
+ TagsInputType,
26
+ )
27
+ from acryl_datahub_cloud.sdk.entities.monitor import Monitor
28
+ from acryl_datahub_cloud.sdk.errors import (
29
+ SDKNotYetSupportedError,
30
+ SDKUsageError,
31
+ SDKUsageErrorWithExamples,
32
+ )
33
+ from datahub.emitter.enum_helpers import get_enum_options
34
+ from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
35
+ from datahub.metadata import schema_classes as models
36
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
37
+ from datahub.sdk import Dataset
38
+ from datahub.sdk.entity_client import EntityClient
39
+
40
+ # TODO: Import ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS from datahub_executor.config
41
+ ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS = 60
42
+
43
+ DEFAULT_NAME_PREFIX = "New Assertion"
44
+ DEFAULT_NAME_SUFFIX_LENGTH = 8
45
+
46
+
47
+ DEFAULT_HOURLY_SCHEDULE: models.CronScheduleClass = models.CronScheduleClass(
48
+ cron="0 * * * *", # Every hour, matches the UI default
49
+ timezone=str(
50
+ tzlocal.get_localzone()
51
+ ), # User local timezone, matches the UI default
52
+ )
53
+ DEFAULT_SCHEDULE: models.CronScheduleClass = DEFAULT_HOURLY_SCHEDULE
54
+
55
+ DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
56
+ cron="0 0 * * *", # Every day at midnight, matches the UI default
57
+ timezone=str(
58
+ tzlocal.get_localzone()
59
+ ), # User local timezone, matches the UI default
60
+ )
61
+
62
+ DEFAULT_EVERY_SIX_HOURS_SCHEDULE = models.CronScheduleClass(
63
+ cron="0 */6 * * *", # Every 6 hours, matches the UI default
64
+ timezone=str(
65
+ tzlocal.get_localzone()
66
+ ), # User local timezone, matches the UI default
67
+ )
68
+
69
+
70
+ class AbstractDetectionMechanism(BaseModel, ABC):
71
+ type: str
72
+
73
+ class Config:
74
+ extra = Extra.forbid
75
+
76
+
77
+ class _InformationSchema(AbstractDetectionMechanism):
78
+ type: Literal["information_schema"] = "information_schema"
79
+
80
+
81
+ class _AuditLog(AbstractDetectionMechanism):
82
+ type: Literal["audit_log"] = "audit_log"
83
+
84
+
85
+ # Keep this in sync with the allowed field types in the UI, currently in
86
+ # datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertion/builder/constants.ts: LAST_MODIFIED_FIELD_TYPES
87
+ LAST_MODIFIED_ALLOWED_FIELD_TYPES = [models.DateTypeClass(), models.TimeTypeClass()]
88
+
89
+
90
+ class _LastModifiedColumn(AbstractDetectionMechanism):
91
+ type: Literal["last_modified_column"] = "last_modified_column"
92
+ column_name: str
93
+ additional_filter: Optional[str] = None
94
+
95
+
96
+ # Keep this in sync with the allowed field types in the UI, currently in
97
+ # datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertion/builder/constants.ts: HIGH_WATERMARK_FIELD_TYPES
98
+ HIGH_WATERMARK_ALLOWED_FIELD_TYPES = [
99
+ models.NumberTypeClass(),
100
+ models.DateTypeClass(),
101
+ models.TimeTypeClass(),
102
+ ]
103
+
104
+
105
+ class _HighWatermarkColumn(AbstractDetectionMechanism):
106
+ type: Literal["high_watermark_column"] = "high_watermark_column"
107
+ column_name: str
108
+ additional_filter: Optional[str] = None
109
+
110
+
111
+ class _DataHubOperation(AbstractDetectionMechanism):
112
+ type: Literal["datahub_operation"] = "datahub_operation"
113
+
114
+
115
+ class _Query(AbstractDetectionMechanism):
116
+ # COUNT(*) query
117
+ type: Literal["query"] = "query"
118
+ additional_filter: Optional[str] = None
119
+
120
+
121
+ class _AllRowsQuery(AbstractDetectionMechanism):
122
+ # For column-based assertions, this is the default detection mechanism.
123
+ type: Literal["all_rows_query"] = "all_rows_query"
124
+ additional_filter: Optional[str] = None
125
+
126
+
127
+ class _AllRowsQueryDataHubDatasetProfile(AbstractDetectionMechanism):
128
+ # Used for column-based assertions.
129
+ type: Literal["all_rows_query_datahub_dataset_profile"] = (
130
+ "all_rows_query_datahub_dataset_profile"
131
+ )
132
+
133
+
134
+ class _ChangedRowsQuery(AbstractDetectionMechanism):
135
+ # Used for column-based assertions.
136
+ type: Literal["changed_rows_query"] = "changed_rows_query"
137
+ column_name: str
138
+ additional_filter: Optional[str] = None
139
+
140
+
141
+ class _DatasetProfile(AbstractDetectionMechanism):
142
+ type: Literal["dataset_profile"] = "dataset_profile"
143
+
144
+
145
+ # Operators that require a single value numeric parameter
146
+ SINGLE_VALUE_NUMERIC_OPERATORS = [
147
+ models.AssertionStdOperatorClass.EQUAL_TO,
148
+ models.AssertionStdOperatorClass.NOT_EQUAL_TO,
149
+ models.AssertionStdOperatorClass.GREATER_THAN,
150
+ models.AssertionStdOperatorClass.LESS_THAN,
151
+ models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
152
+ models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
153
+ ]
154
+
155
+ # Operators that require a single value parameter
156
+ SINGLE_VALUE_OPERATORS = [
157
+ models.AssertionStdOperatorClass.CONTAIN,
158
+ models.AssertionStdOperatorClass.END_WITH,
159
+ models.AssertionStdOperatorClass.START_WITH,
160
+ models.AssertionStdOperatorClass.REGEX_MATCH,
161
+ models.AssertionStdOperatorClass.IN,
162
+ models.AssertionStdOperatorClass.NOT_IN,
163
+ ] + SINGLE_VALUE_NUMERIC_OPERATORS
164
+
165
+ # Operators that require a numeric range parameter
166
+ RANGE_OPERATORS = [
167
+ models.AssertionStdOperatorClass.BETWEEN,
168
+ ]
169
+
170
+ # Operators that require no parameters
171
+ NO_PARAMETER_OPERATORS = [
172
+ models.AssertionStdOperatorClass.NULL,
173
+ models.AssertionStdOperatorClass.NOT_NULL,
174
+ models.AssertionStdOperatorClass.IS_TRUE,
175
+ models.AssertionStdOperatorClass.IS_FALSE,
176
+ ]
177
+
178
+
179
+ # Keep these two lists in sync:
180
+ _DETECTION_MECHANISM_CONCRETE_TYPES = (
181
+ _InformationSchema,
182
+ _AuditLog,
183
+ _LastModifiedColumn,
184
+ _HighWatermarkColumn,
185
+ _DataHubOperation,
186
+ _Query,
187
+ _DatasetProfile,
188
+ _AllRowsQuery,
189
+ _ChangedRowsQuery,
190
+ _AllRowsQueryDataHubDatasetProfile,
191
+ )
192
+ _DetectionMechanismTypes = Union[
193
+ _InformationSchema,
194
+ _AuditLog,
195
+ _LastModifiedColumn,
196
+ _HighWatermarkColumn,
197
+ _DataHubOperation,
198
+ _Query,
199
+ _DatasetProfile,
200
+ _AllRowsQuery,
201
+ _ChangedRowsQuery,
202
+ _AllRowsQueryDataHubDatasetProfile,
203
+ ]
204
+
205
+ _DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
206
+ _LastModifiedColumn,
207
+ _HighWatermarkColumn,
208
+ _Query,
209
+ _AllRowsQuery,
210
+ _ChangedRowsQuery,
211
+ )
212
+
213
+ DEFAULT_DETECTION_MECHANISM: _DetectionMechanismTypes = _InformationSchema()
214
+
215
+
216
+ class DetectionMechanism:
217
+ # To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
218
+ # The options with sub parameters are the classes themselves so that parameters can be applied, and the rest are already instantiated instances of the classes.
219
+ INFORMATION_SCHEMA = _InformationSchema()
220
+ AUDIT_LOG = _AuditLog()
221
+ LAST_MODIFIED_COLUMN = _LastModifiedColumn
222
+ HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
223
+ DATAHUB_OPERATION = _DataHubOperation()
224
+ QUERY = _Query
225
+ ALL_ROWS_QUERY = _AllRowsQuery
226
+ CHANGED_ROWS_QUERY = _ChangedRowsQuery
227
+ ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
228
+ DATASET_PROFILE = _DatasetProfile()
229
+
230
+ _DETECTION_MECHANISM_EXAMPLES = {
231
+ "Information Schema from string": "information_schema",
232
+ "Information Schema from DetectionMechanism": "DetectionMechanism.INFORMATION_SCHEMA",
233
+ "Audit Log from string": "audit_log",
234
+ "Audit Log from DetectionMechanism": "DetectionMechanism.AUDIT_LOG",
235
+ "Last Modified Column from dict": {
236
+ "type": "last_modified_column",
237
+ "column_name": "last_modified",
238
+ "additional_filter": "last_modified > '2021-01-01'",
239
+ },
240
+ "Last Modified Column from DetectionMechanism": "DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')",
241
+ "High Watermark Column from dict": {
242
+ "type": "high_watermark_column",
243
+ "column_name": "id",
244
+ "additional_filter": "id > 1000",
245
+ },
246
+ "High Watermark Column from DetectionMechanism": "DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id', additional_filter='id > 1000')",
247
+ "DataHub Operation from string": "datahub_operation",
248
+ "DataHub Operation from DetectionMechanism": "DetectionMechanism.DATAHUB_OPERATION",
249
+ "Query from string": "query",
250
+ "Query from dict": {
251
+ "type": "query",
252
+ "additional_filter": "id > 1000",
253
+ },
254
+ "Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
255
+ "Dataset Profile from string": "dataset_profile",
256
+ "Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
257
+ "All Rows Query from string": "all_rows_query",
258
+ "All Rows Query from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY",
259
+ "All Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.ALL_ROWS_QUERY(additional_filter='id > 1000')",
260
+ "Changed Rows Query from dict (with optional additional filter)": {
261
+ "type": "changed_rows_query",
262
+ "column_name": "id",
263
+ "additional_filter": "id > 1000",
264
+ },
265
+ "Changed Rows Query from DetectionMechanism": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id')",
266
+ "Changed Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id', additional_filter='id > 1000')",
267
+ "All Rows Query DataHub Dataset Profile from string": "all_rows_query_datahub_dataset_profile",
268
+ "All Rows Query DataHub Dataset Profile from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE",
269
+ }
270
+
271
+ @staticmethod
272
+ def parse(
273
+ detection_mechanism_config: Optional[
274
+ Union[str, dict[str, str], _DetectionMechanismTypes]
275
+ ] = None,
276
+ default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
277
+ ) -> _DetectionMechanismTypes:
278
+ if detection_mechanism_config is None:
279
+ return default_detection_mechanism
280
+ if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
281
+ return detection_mechanism_config
282
+ elif isinstance(detection_mechanism_config, str):
283
+ return DetectionMechanism._try_parse_from_string(detection_mechanism_config)
284
+ elif isinstance(detection_mechanism_config, dict):
285
+ return DetectionMechanism._try_parse_from_dict(detection_mechanism_config)
286
+ else:
287
+ raise SDKUsageErrorWithExamples(
288
+ msg=f"Invalid detection mechanism: {detection_mechanism_config}",
289
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
290
+ )
291
+
292
+ @staticmethod
293
+ def _try_parse_from_string(
294
+ detection_mechanism_config: str,
295
+ ) -> _DetectionMechanismTypes:
296
+ try:
297
+ return_value = getattr(
298
+ DetectionMechanism, detection_mechanism_config.upper()
299
+ )
300
+ if isinstance(return_value, pydantic.main.ModelMetaclass):
301
+ try:
302
+ # We try to instantiate here to let pydantic raise a helpful error
303
+ # about which parameters are missing
304
+ return_value = return_value()
305
+ except ValidationError as e:
306
+ raise SDKUsageErrorWithExamples(
307
+ msg=f"Detection mechanism type '{detection_mechanism_config}' requires additional parameters: {e}",
308
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
309
+ ) from e
310
+ return return_value
311
+ except AttributeError as e:
312
+ raise SDKUsageErrorWithExamples(
313
+ msg=f"Invalid detection mechanism type: {detection_mechanism_config}",
314
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
315
+ ) from e
316
+
317
+ @staticmethod
318
+ def _try_parse_from_dict(
319
+ detection_mechanism_config: dict[str, str],
320
+ ) -> _DetectionMechanismTypes:
321
+ try:
322
+ detection_mechanism_type = detection_mechanism_config.pop("type")
323
+ except KeyError as e:
324
+ raise SDKUsageErrorWithExamples(
325
+ msg="Detection mechanism type is required if using a dict to create a DetectionMechanism",
326
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
327
+ ) from e
328
+ try:
329
+ detection_mechanism_obj = getattr(
330
+ DetectionMechanism, detection_mechanism_type.upper()
331
+ )
332
+ except AttributeError as e:
333
+ raise SDKUsageErrorWithExamples(
334
+ msg=f"Invalid detection mechanism type: {detection_mechanism_type}",
335
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
336
+ ) from e
337
+
338
+ try:
339
+ return detection_mechanism_obj(**detection_mechanism_config)
340
+ except TypeError as e:
341
+ if "object is not callable" not in e.args[0]:
342
+ raise e
343
+ if detection_mechanism_config:
344
+ # If we are here in the TypeError case, the detection mechanism is an instance of a class,
345
+ # not a class itself, so we can't instantiate it with the config dict.
346
+ # In this case, the config dict should be empty after the type is popped.
347
+ # If it is not empty, we raise an error.
348
+ raise SDKUsageErrorWithExamples(
349
+ msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {detection_mechanism_config}",
350
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
351
+ ) from e
352
+ return detection_mechanism_obj
353
+ except ValidationError as e:
354
+ raise SDKUsageErrorWithExamples(
355
+ msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {detection_mechanism_config} {e}",
356
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
357
+ ) from e
358
+
359
+
360
+ DetectionMechanismInputTypes: TypeAlias = Union[
361
+ str, dict[str, str], _DetectionMechanismTypes, None
362
+ ]
363
+
364
+
365
+ class InferenceSensitivity(Enum):
366
+ HIGH = "high"
367
+ MEDIUM = "medium"
368
+ LOW = "low"
369
+
370
+ @staticmethod
371
+ def parse(
372
+ sensitivity: Optional[
373
+ Union[
374
+ str,
375
+ int,
376
+ "InferenceSensitivity",
377
+ models.AssertionMonitorSensitivityClass,
378
+ ]
379
+ ],
380
+ ) -> "InferenceSensitivity":
381
+ if sensitivity is None:
382
+ return DEFAULT_SENSITIVITY
383
+ EXAMPLES = {
384
+ "High sensitivity from string": "high",
385
+ "High sensitivity from enum": "InferenceSensitivity.HIGH",
386
+ "Medium sensitivity from string": "medium",
387
+ "Medium sensitivity from enum": "InferenceSensitivity.MEDIUM",
388
+ "Low sensitivity from string": "low",
389
+ "Low sensitivity from enum": "InferenceSensitivity.LOW",
390
+ "Sensitivity from int (1-3: low, 4-6: medium, 7-10: high)": "10",
391
+ }
392
+
393
+ if isinstance(sensitivity, InferenceSensitivity):
394
+ return sensitivity
395
+ if isinstance(sensitivity, models.AssertionMonitorSensitivityClass):
396
+ sensitivity = sensitivity.level
397
+ if isinstance(sensitivity, int):
398
+ if (sensitivity < 1) or (sensitivity > 10):
399
+ raise SDKUsageErrorWithExamples(
400
+ msg=f"Invalid inference sensitivity: {sensitivity}",
401
+ examples=EXAMPLES,
402
+ )
403
+ elif sensitivity < 4:
404
+ return InferenceSensitivity.LOW
405
+ elif sensitivity < 7:
406
+ return InferenceSensitivity.MEDIUM
407
+ else:
408
+ return InferenceSensitivity.HIGH
409
+ try:
410
+ return InferenceSensitivity(sensitivity)
411
+ except ValueError as e:
412
+ raise SDKUsageErrorWithExamples(
413
+ msg=f"Invalid inference sensitivity: {sensitivity}",
414
+ examples=EXAMPLES,
415
+ ) from e
416
+
417
+ @staticmethod
418
+ def to_int(sensitivity: "InferenceSensitivity") -> int:
419
+ return {
420
+ InferenceSensitivity.HIGH: 10,
421
+ InferenceSensitivity.MEDIUM: 5,
422
+ InferenceSensitivity.LOW: 1,
423
+ }[sensitivity]
424
+
425
+
426
+ DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
427
+
428
+ TIME_WINDOW_SIZE_EXAMPLES = {
429
+ "Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
430
+ "Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
431
+ }
432
+
433
+
434
+ class CalendarInterval(Enum):
435
+ MINUTE = "MINUTE"
436
+ HOUR = "HOUR"
437
+ DAY = "DAY"
438
+
439
+
440
+ class TimeWindowSize(BaseModel):
441
+ unit: Union[CalendarInterval, str]
442
+ multiple: int
443
+
444
+
445
+ TimeWindowSizeInputTypes: TypeAlias = Union[
446
+ models.TimeWindowSizeClass,
447
+ models.FixedIntervalScheduleClass,
448
+ TimeWindowSize,
449
+ ]
450
+
451
+
452
+ def _try_parse_time_window_size(
453
+ config: TimeWindowSizeInputTypes,
454
+ ) -> models.TimeWindowSizeClass:
455
+ if isinstance(config, models.TimeWindowSizeClass):
456
+ return config
457
+ elif isinstance(config, models.FixedIntervalScheduleClass):
458
+ return models.TimeWindowSizeClass(
459
+ unit=_try_parse_and_validate_schema_classes_enum(
460
+ config.unit, models.CalendarIntervalClass
461
+ ),
462
+ multiple=config.multiple,
463
+ )
464
+ elif isinstance(config, TimeWindowSize):
465
+ return models.TimeWindowSizeClass(
466
+ unit=_try_parse_and_validate_schema_classes_enum(
467
+ _try_parse_and_validate_schema_classes_enum(
468
+ config.unit, CalendarInterval
469
+ ).value,
470
+ models.CalendarIntervalClass,
471
+ ),
472
+ multiple=config.multiple,
473
+ )
474
+ else:
475
+ raise SDKUsageErrorWithExamples(
476
+ msg=f"Invalid time window size: {config}",
477
+ examples=TIME_WINDOW_SIZE_EXAMPLES,
478
+ )
479
+
480
+
481
+ class FixedRangeExclusionWindow(BaseModel):
482
+ type: Literal["fixed_range_exclusion_window"] = "fixed_range_exclusion_window"
483
+ start: datetime
484
+ end: datetime
485
+
486
+
487
+ ExclusionWindowTypes: TypeAlias = Union[
488
+ FixedRangeExclusionWindow,
489
+ # Add other exclusion window types here as they are added to the SDK.
490
+ ]
491
+
492
+ FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES = {
493
+ "Exclusion Window from datetimes": {
494
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
495
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
496
+ },
497
+ "Exclusion Window from strings": {
498
+ "start": "2025-01-01T00:00:00",
499
+ "end": "2025-01-02T00:00:00",
500
+ },
501
+ "Exclusion Window from object": "ExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0))",
502
+ }
503
+ FixedRangeExclusionWindowInputTypes: TypeAlias = Union[
504
+ dict[str, datetime],
505
+ dict[str, str],
506
+ list[dict[str, datetime]],
507
+ list[dict[str, str]],
508
+ FixedRangeExclusionWindow,
509
+ list[FixedRangeExclusionWindow],
510
+ ]
511
+
512
+ ExclusionWindowInputTypes: TypeAlias = Union[
513
+ models.AssertionExclusionWindowClass,
514
+ list[models.AssertionExclusionWindowClass],
515
+ FixedRangeExclusionWindowInputTypes,
516
+ # Add other exclusion window types here as they are added to the SDK.
517
+ ]
518
+
519
+ IterableExclusionWindowInputTypes: TypeAlias = Union[
520
+ list[dict[str, datetime]],
521
+ list[dict[str, str]],
522
+ list[FixedRangeExclusionWindow],
523
+ list[models.AssertionExclusionWindowClass],
524
+ ]
525
+
526
+
527
+ def _try_parse_exclusion_window(
528
+ config: Optional[ExclusionWindowInputTypes],
529
+ ) -> Union[FixedRangeExclusionWindow, list[FixedRangeExclusionWindow], None]:
530
+ if config is None:
531
+ return []
532
+ if isinstance(config, dict):
533
+ return [FixedRangeExclusionWindow(**config)]
534
+ if isinstance(config, FixedRangeExclusionWindow):
535
+ return [config]
536
+ elif isinstance(config, models.AssertionExclusionWindowClass):
537
+ assert config.fixedRange is not None
538
+ return [
539
+ FixedRangeExclusionWindow(
540
+ start=parse_ts_millis(config.fixedRange.startTimeMillis),
541
+ end=parse_ts_millis(config.fixedRange.endTimeMillis),
542
+ )
543
+ ]
544
+ elif isinstance(config, list):
545
+ return _try_parse_list_of_exclusion_windows(config)
546
+ else:
547
+ raise SDKUsageErrorWithExamples(
548
+ msg=f"Invalid exclusion window: {config}",
549
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
550
+ )
551
+
552
+
553
+ def _try_parse_list_of_exclusion_windows(
554
+ config: IterableExclusionWindowInputTypes,
555
+ ) -> Union[list[FixedRangeExclusionWindow], None]:
556
+ if all(isinstance(item, models.AssertionExclusionWindowClass) for item in config):
557
+ exclusion_windows = []
558
+ for item in config:
559
+ assert isinstance(item, models.AssertionExclusionWindowClass)
560
+ assert item.fixedRange is not None
561
+ exclusion_windows.append(
562
+ FixedRangeExclusionWindow(
563
+ start=parse_ts_millis(item.fixedRange.startTimeMillis),
564
+ end=parse_ts_millis(item.fixedRange.endTimeMillis),
565
+ )
566
+ )
567
+ return exclusion_windows
568
+ else:
569
+ exclusion_windows = []
570
+ for item in config:
571
+ if isinstance(item, dict):
572
+ try:
573
+ exclusion_windows.append(FixedRangeExclusionWindow(**item))
574
+ except ValidationError as e:
575
+ raise SDKUsageErrorWithExamples(
576
+ msg=f"Invalid exclusion window: {item}",
577
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
578
+ ) from e
579
+ elif isinstance(item, FixedRangeExclusionWindow):
580
+ exclusion_windows.append(item)
581
+ elif item is None:
582
+ pass
583
+ else:
584
+ raise SDKUsageErrorWithExamples(
585
+ msg=f"Invalid exclusion window: {item}",
586
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
587
+ )
588
+ return exclusion_windows
589
+
590
+
591
+ class AssertionIncidentBehavior(Enum):
592
+ RAISE_ON_FAIL = "raise_on_fail"
593
+ RESOLVE_ON_PASS = "resolve_on_pass"
594
+
595
+
596
+ ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES = {
597
+ "Raise on fail from string": "raise_on_fail",
598
+ "Raise on fail from enum": "AssertionIncidentBehavior.RAISE_ON_FAIL",
599
+ "Resolve on pass from string": "resolve_on_pass",
600
+ "Resolve on pass from enum": "AssertionIncidentBehavior.RESOLVE_ON_PASS",
601
+ }
602
+
603
+ AssertionIncidentBehaviorInputTypes: TypeAlias = Union[
604
+ str,
605
+ list[str],
606
+ AssertionIncidentBehavior,
607
+ list[AssertionIncidentBehavior],
608
+ None,
609
+ ]
610
+
611
+
612
+ def _try_parse_incident_behavior(
613
+ config: AssertionIncidentBehaviorInputTypes,
614
+ ) -> Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior], None]:
615
+ if config is None:
616
+ return []
617
+ if isinstance(config, str):
618
+ try:
619
+ return [AssertionIncidentBehavior(config)]
620
+ except ValueError as e:
621
+ raise SDKUsageErrorWithExamples(
622
+ msg=f"Invalid incident behavior: {config}",
623
+ examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
624
+ ) from e
625
+ if isinstance(config, AssertionIncidentBehavior):
626
+ return [config]
627
+ elif isinstance(config, list):
628
+ incident_behaviors = []
629
+ for item in config:
630
+ if isinstance(item, str):
631
+ try:
632
+ incident_behaviors.append(AssertionIncidentBehavior(item))
633
+ except ValueError as e:
634
+ raise SDKUsageErrorWithExamples(
635
+ msg=f"Invalid incident behavior: {item}",
636
+ examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
637
+ ) from e
638
+ elif isinstance(item, AssertionIncidentBehavior):
639
+ incident_behaviors.append(item)
640
+ else:
641
+ raise SDKUsageErrorWithExamples(
642
+ msg=f"Invalid incident behavior: {item}",
643
+ examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
644
+ )
645
+ return incident_behaviors
646
+ else:
647
+ raise SDKUsageErrorWithExamples(
648
+ msg=f"Invalid incident behavior: {config}",
649
+ examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
650
+ )
651
+
652
+
653
+ def _generate_default_name(prefix: str, suffix_length: int) -> str:
654
+ return f"{prefix}-{''.join(random.choices(string.ascii_letters + string.digits, k=suffix_length))}"
655
+
656
+
657
+ TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES = {
658
+ "Training data lookback days from int": ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
659
+ f"Training data lookback days from None (uses default of {ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS} days)": None,
660
+ }
661
+
662
+
663
+ def _try_parse_training_data_lookback_days(
664
+ training_data_lookback_days: Optional[int],
665
+ ) -> int:
666
+ if training_data_lookback_days is None:
667
+ return ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS
668
+ if isinstance(training_data_lookback_days, str):
669
+ try:
670
+ training_data_lookback_days = int(training_data_lookback_days)
671
+ except ValueError as e:
672
+ raise SDKUsageErrorWithExamples(
673
+ msg=f"Invalid training data lookback days: {training_data_lookback_days}",
674
+ examples=TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES,
675
+ ) from e
676
+ if not isinstance(training_data_lookback_days, int):
677
+ raise SDKUsageErrorWithExamples(
678
+ msg=f"Invalid training data lookback days: {training_data_lookback_days}",
679
+ examples=TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES,
680
+ )
681
+ if training_data_lookback_days < 0:
682
+ raise SDKUsageError("Training data lookback days must be non-negative")
683
+ return training_data_lookback_days
684
+
685
+
686
+ def _validate_cron_schedule(schedule: str, timezone: str) -> None:
687
+ """We are using the POSIX.1-2017 standard for cron expressions.
688
+
689
+ Note: We are using the croniter library for cron parsing which is different from executor, which uses apscheduler, so there is a risk of mismatch here.
690
+ """
691
+ try:
692
+ # Validate timezone - pytz.timezone() raises UnknownTimeZoneError for invalid timezones
693
+ # Skip timezone validation when empty
694
+ if timezone:
695
+ pytz.timezone(timezone)
696
+
697
+ # Validate 5-field cron expression only (POSIX.1-2017 standard)
698
+ fields = schedule.strip().split()
699
+ if len(fields) != 5:
700
+ raise ValueError("POSIX.1-2017 requires exactly 5 fields")
701
+
702
+ # POSIX.1-2017 specific validation: Sunday must be 0, not 7
703
+ # However croniter accepts 7 as Sunday, so custom check is needed here.
704
+ # Check the day-of-week field (5th field, index 4)
705
+ dow_field = fields[4]
706
+ if "7" in dow_field:
707
+ # Check if 7 appears as a standalone value or in ranges
708
+ import re
709
+
710
+ # Match 7 as standalone, in lists, or in ranges
711
+ if re.search(r"\b7\b|7-|,7,|^7,|,7$|-7\b", dow_field):
712
+ raise ValueError(
713
+ "POSIX.1-2017 standard: Sunday must be represented as 0, not 7"
714
+ )
715
+
716
+ # Validate cron expression - croniter constructor validates the expression
717
+ croniter(schedule)
718
+
719
+ except Exception as e:
720
+ raise SDKUsageError(
721
+ f"Invalid cron expression or timezone: {schedule} {timezone}, please use a POSIX.1-2017 compatible cron expression and timezone."
722
+ ) from e
723
+
724
+
725
+ def _try_parse_schedule(
726
+ schedule: Optional[Union[str, models.CronScheduleClass]],
727
+ ) -> Optional[models.CronScheduleClass]:
728
+ if schedule is None:
729
+ return None
730
+ if isinstance(schedule, str):
731
+ _validate_cron_schedule(schedule, "UTC")
732
+ return models.CronScheduleClass(
733
+ cron=schedule,
734
+ timezone="UTC",
735
+ )
736
+ if isinstance(schedule, models.CronScheduleClass):
737
+ _validate_cron_schedule(schedule.cron, schedule.timezone)
738
+ return schedule
739
+
740
+
741
+ FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
742
+
743
+
744
+ T = TypeVar("T")
745
+
746
+
747
+ def _try_parse_and_validate_schema_classes_enum(
748
+ value: Union[str, T],
749
+ enum_class: Type[T],
750
+ ) -> T:
751
+ if isinstance(value, enum_class):
752
+ return value
753
+ assert isinstance(value, str)
754
+ if value.upper() not in get_enum_options(enum_class):
755
+ raise SDKUsageError(
756
+ f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
757
+ )
758
+ return getattr(enum_class, value.upper())
759
+
760
+
761
+ @dataclass(frozen=True)
762
+ class DatasetSourceType:
763
+ """
764
+ DatasetSourceType is used to represent a dataset source type.
765
+ It is used to check if a source type is valid for a dataset type and assertion type.
766
+
767
+ Args:
768
+ source_type: The source type (e.g. information schema, field value, etc. aka detection mechanism)
769
+ platform: The platform of the dataset as a string OR "all" for all platforms.
770
+ assertion_type: The assertion type as a models.AssertionTypeClass string e.g. models.AssertionTypeClass.FRESHNESS OR "all" for all assertion types.
771
+
772
+ Example:
773
+ DatasetSourceType(
774
+ source_type=_InformationSchema,
775
+ platform="databricks",
776
+ assertion_type="all",
777
+ )
778
+ This means that the source type _InformationSchema is invalid for the dataset type "databricks" and assertion type "all".
779
+ "all" in this example means that the source type is invalid for all assertion types.
780
+ """
781
+
782
+ source_type: Type[_DetectionMechanismTypes]
783
+ platform: str
784
+ assertion_type: Union[models.AssertionTypeClass, str]
785
+
786
+
787
+ INVALID_SOURCE_TYPES = {
788
+ # Add exceptions here if a source type (detection mechanism) is invalid for a dataset type and assertion type.
789
+ DatasetSourceType(
790
+ source_type=_InformationSchema,
791
+ platform="databricks",
792
+ assertion_type="all",
793
+ )
794
+ }
795
+
796
+
797
+ def _is_source_type_valid(
798
+ dataset_source_type: DatasetSourceType,
799
+ invalid_source_types: set[DatasetSourceType] = INVALID_SOURCE_TYPES,
800
+ ) -> bool:
801
+ for invalid in invalid_source_types:
802
+ if invalid.source_type == dataset_source_type.source_type:
803
+ # If both platform and assertion type are "all", the source type is invalid for all combinations
804
+ if invalid.platform == "all" and invalid.assertion_type == "all":
805
+ return False
806
+ # If platform matches and assertion type is "all", the source type is invalid for all assertion types on that platform
807
+ if (
808
+ invalid.platform == dataset_source_type.platform
809
+ and invalid.assertion_type == "all"
810
+ ):
811
+ return False
812
+ # If platform is "all" and assertion type matches, the source type is invalid for all platforms for that assertion type
813
+ if (
814
+ invalid.platform == "all"
815
+ and invalid.assertion_type == dataset_source_type.assertion_type
816
+ ):
817
+ return False
818
+ # If both platform and assertion type match exactly, the source type is invalid
819
+ if (
820
+ invalid.platform == dataset_source_type.platform
821
+ and invalid.assertion_type == dataset_source_type.assertion_type
822
+ ):
823
+ return False
824
+ return True
825
+
826
+
827
+ class _HasSmartAssertionInputs:
828
+ """
829
+ A class that contains the common inputs for smart assertions.
830
+ This is used to avoid code duplication in the smart assertion inputs.
831
+
832
+ Args:
833
+ sensitivity: The sensitivity to be applied to the assertion.
834
+ exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
835
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
836
+ """
837
+
838
+ def __init__(
839
+ self,
840
+ *,
841
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
842
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
843
+ training_data_lookback_days: Optional[int] = None,
844
+ ):
845
+ self.sensitivity = InferenceSensitivity.parse(sensitivity)
846
+ self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
847
+ self.training_data_lookback_days = _try_parse_training_data_lookback_days(
848
+ training_data_lookback_days
849
+ )
850
+
851
+ def _convert_exclusion_windows(
852
+ self,
853
+ ) -> list[models.AssertionExclusionWindowClass]:
854
+ """
855
+ Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
856
+
857
+ Returns:
858
+ A list of AssertionExclusionWindowClass objects.
859
+
860
+ Raises:
861
+ SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
862
+ """
863
+ exclusion_windows: list[models.AssertionExclusionWindowClass] = []
864
+ if self.exclusion_windows:
865
+ for window in self.exclusion_windows:
866
+ if not isinstance(window, FixedRangeExclusionWindow):
867
+ raise SDKUsageErrorWithExamples(
868
+ msg=f"Invalid exclusion window type: {window}",
869
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
870
+ )
871
+ # To match the UI, we generate a display name for the exclusion window.
872
+ # See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
873
+ # Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
874
+ generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
875
+ exclusion_windows.append(
876
+ models.AssertionExclusionWindowClass(
877
+ type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
878
+ displayName=generated_display_name,
879
+ fixedRange=models.AbsoluteTimeWindowClass(
880
+ startTimeMillis=make_ts_millis(window.start),
881
+ endTimeMillis=make_ts_millis(window.end),
882
+ ),
883
+ )
884
+ )
885
+ return exclusion_windows
886
+
887
+ def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
888
+ """
889
+ Convert sensitivity into an AssertionMonitorSensitivityClass.
890
+
891
+ Returns:
892
+ An AssertionMonitorSensitivityClass with the appropriate sensitivity.
893
+ """
894
+ return models.AssertionMonitorSensitivityClass(
895
+ level=InferenceSensitivity.to_int(self.sensitivity),
896
+ )
897
+
898
+
899
+ class _AssertionInput(ABC):
900
+ def __init__(
901
+ self,
902
+ *,
903
+ # Required fields
904
+ dataset_urn: Union[str, DatasetUrn],
905
+ entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
906
+ # Optional fields
907
+ urn: Optional[
908
+ Union[str, AssertionUrn]
909
+ ] = None, # Can be None if the assertion is not yet created
910
+ display_name: Optional[str] = None,
911
+ enabled: bool = True,
912
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
913
+ detection_mechanism: DetectionMechanismInputTypes = None,
914
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
915
+ tags: Optional[TagsInputType] = None,
916
+ source_type: str = models.AssertionSourceTypeClass.NATIVE, # Verified on init to be a valid enum value
917
+ created_by: Union[str, CorpUserUrn],
918
+ created_at: datetime,
919
+ updated_by: Union[str, CorpUserUrn],
920
+ updated_at: datetime,
921
+ default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
922
+ ):
923
+ """
924
+ Create an AssertionInput object.
925
+
926
+ Args:
927
+ dataset_urn: The urn of the dataset to be monitored.
928
+ entity_client: The entity client to be used for creating the assertion.
929
+ urn: The urn of the assertion. If not provided, a random urn will be generated.
930
+ display_name: The display name of the assertion. If not provided, a random display name will be generated.
931
+ enabled: Whether the assertion is enabled. Defaults to True.
932
+ detection_mechanism: The detection mechanism to be used for the assertion.
933
+ incident_behavior: The incident behavior to be applied to the assertion. Accepts:
934
+ - String values: "raise_on_fail", "resolve_on_pass"
935
+ - Enum values: AssertionIncidentBehavior.RAISE_ON_FAIL, AssertionIncidentBehavior.RESOLVE_ON_PASS
936
+ - Lists of any of the above values
937
+ - None (default behavior)
938
+ tags: The tags to be applied to the assertion.
939
+ source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
940
+ created_by: The actor that created the assertion.
941
+ created_at: The timestamp of the assertion creation.
942
+ updated_by: The actor that last updated the assertion.
943
+ updated_at: The timestamp of the assertion last update.
944
+ """
945
+ self.dataset_urn = DatasetUrn.from_string(dataset_urn)
946
+ self.entity_client = entity_client
947
+ self.urn = AssertionUrn(urn) if urn else None
948
+ self.display_name = (
949
+ display_name
950
+ if display_name is not None
951
+ else _generate_default_name(DEFAULT_NAME_PREFIX, DEFAULT_NAME_SUFFIX_LENGTH)
952
+ )
953
+ self.enabled = enabled
954
+ self.schedule = _try_parse_schedule(schedule)
955
+ self.detection_mechanism = DetectionMechanism.parse(
956
+ detection_mechanism, default_detection_mechanism
957
+ )
958
+ if not _is_source_type_valid(
959
+ DatasetSourceType(
960
+ source_type=type(self.detection_mechanism),
961
+ platform=self.dataset_urn.platform,
962
+ assertion_type=self._assertion_type(),
963
+ )
964
+ ):
965
+ raise SDKUsageError(
966
+ f"Invalid source type: {self.detection_mechanism} for dataset type: {self.dataset_urn.platform} and assertion type: {self._assertion_type()}"
967
+ )
968
+ self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
969
+ self.tags = tags
970
+ if source_type not in get_enum_options(models.AssertionSourceTypeClass):
971
+ raise SDKUsageError(
972
+ msg=f"Invalid source type: {source_type}, valid options are {get_enum_options(models.AssertionSourceTypeClass)}",
973
+ )
974
+ self.source_type = source_type
975
+ self.created_by = created_by
976
+ self.created_at = created_at
977
+ self.updated_by = updated_by
978
+ self.updated_at = updated_at
979
+ self.cached_dataset: Optional[Dataset] = None
980
+
981
+ def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
982
+ """
983
+ Convert the assertion input to an assertion and monitor entity.
984
+
985
+ Returns:
986
+ A tuple of (assertion, monitor) entities.
987
+ """
988
+ assertion = self.to_assertion_entity()
989
+ monitor = self.to_monitor_entity(assertion.urn)
990
+ return assertion, monitor
991
+
992
+ def to_assertion_entity(self) -> Assertion:
993
+ """
994
+ Convert the assertion input to an assertion entity.
995
+
996
+ Returns:
997
+ The created assertion entity.
998
+ """
999
+ on_success, on_failure = self._convert_incident_behavior()
1000
+ filter = self._create_filter_from_detection_mechanism()
1001
+
1002
+ return Assertion(
1003
+ id=self.urn,
1004
+ info=self._create_assertion_info(filter),
1005
+ description=self.display_name,
1006
+ on_success=on_success,
1007
+ on_failure=on_failure,
1008
+ tags=self._convert_tags(),
1009
+ source=self._convert_source(),
1010
+ last_updated=self._convert_last_updated(),
1011
+ )
1012
+
1013
+ def _convert_incident_behavior(
1014
+ self,
1015
+ ) -> tuple[
1016
+ Optional[AssertionActionsInputType],
1017
+ Optional[AssertionActionsInputType],
1018
+ ]:
1019
+ """
1020
+ Convert incident behavior to on_success and on_failure actions.
1021
+
1022
+ Returns:
1023
+ A tuple of (on_success, on_failure) actions.
1024
+ """
1025
+ if not self.incident_behavior:
1026
+ return None, None
1027
+
1028
+ behaviors = (
1029
+ [self.incident_behavior]
1030
+ if isinstance(self.incident_behavior, AssertionIncidentBehavior)
1031
+ else self.incident_behavior
1032
+ )
1033
+
1034
+ on_success: Optional[AssertionActionsInputType] = [
1035
+ models.AssertionActionClass(
1036
+ type=models.AssertionActionTypeClass.RESOLVE_INCIDENT
1037
+ )
1038
+ for behavior in behaviors
1039
+ if behavior == AssertionIncidentBehavior.RESOLVE_ON_PASS
1040
+ ] or None
1041
+
1042
+ on_failure: Optional[AssertionActionsInputType] = [
1043
+ models.AssertionActionClass(
1044
+ type=models.AssertionActionTypeClass.RAISE_INCIDENT
1045
+ )
1046
+ for behavior in behaviors
1047
+ if behavior == AssertionIncidentBehavior.RAISE_ON_FAIL
1048
+ ] or None
1049
+
1050
+ return on_success, on_failure
1051
+
1052
+ def _create_filter_from_detection_mechanism(
1053
+ self,
1054
+ ) -> Optional[models.DatasetFilterClass]:
1055
+ """
1056
+ Create a filter from the detection mechanism if it has an additional filter.
1057
+
1058
+ Returns:
1059
+ A DatasetFilterClass if the detection mechanism has an additional filter, None otherwise.
1060
+ """
1061
+ if not isinstance(
1062
+ self.detection_mechanism,
1063
+ _DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER,
1064
+ ):
1065
+ return None
1066
+
1067
+ additional_filter = self.detection_mechanism.additional_filter
1068
+ if not additional_filter:
1069
+ return None
1070
+
1071
+ return models.DatasetFilterClass(
1072
+ type=models.DatasetFilterTypeClass.SQL,
1073
+ sql=additional_filter,
1074
+ )
1075
+
1076
+ def _convert_tags(self) -> Optional[TagsInputType]:
1077
+ """
1078
+ Convert the tags input into a standardized format.
1079
+
1080
+ Returns:
1081
+ A list of tags or None if no tags are provided.
1082
+
1083
+ Raises:
1084
+ SDKUsageErrorWithExamples: If the tags input is invalid.
1085
+ """
1086
+ if not self.tags:
1087
+ return None
1088
+
1089
+ if isinstance(self.tags, str):
1090
+ return [self.tags]
1091
+ elif isinstance(self.tags, list):
1092
+ return self.tags
1093
+ else:
1094
+ raise SDKUsageErrorWithExamples(
1095
+ msg=f"Invalid tags: {self.tags}",
1096
+ examples={
1097
+ "Tags from string": "urn:li:tag:my_tag_1",
1098
+ "Tags from list": [
1099
+ "urn:li:tag:my_tag_1",
1100
+ "urn:li:tag:my_tag_2",
1101
+ ],
1102
+ },
1103
+ )
1104
+
1105
+ def _convert_source(self) -> models.AssertionSourceClass:
1106
+ """
1107
+ Convert the source input into a models.AssertionSourceClass.
1108
+ """
1109
+ return models.AssertionSourceClass(
1110
+ type=self.source_type,
1111
+ created=models.AuditStampClass(
1112
+ time=make_ts_millis(self.created_at),
1113
+ actor=str(self.created_by),
1114
+ ),
1115
+ )
1116
+
1117
+ def _convert_last_updated(self) -> tuple[datetime, str]:
1118
+ """
1119
+ Convert the last updated input into a tuple of (datetime, str).
1120
+
1121
+ Validation is handled in the Assertion entity constructor.
1122
+ """
1123
+ return (self.updated_at, str(self.updated_by))
1124
+
1125
+ def to_monitor_entity(self, assertion_urn: AssertionUrn) -> Monitor:
1126
+ """
1127
+ Convert the assertion input to a monitor entity.
1128
+
1129
+ Args:
1130
+ assertion_urn: The URN of the assertion to monitor.
1131
+
1132
+ Returns:
1133
+ A Monitor entity configured with the assertion input parameters.
1134
+ """
1135
+ return Monitor(
1136
+ id=(self.dataset_urn, assertion_urn),
1137
+ info=self._create_monitor_info(
1138
+ assertion_urn=assertion_urn,
1139
+ status=self._convert_monitor_status(),
1140
+ schedule=self._convert_schedule(),
1141
+ ),
1142
+ )
1143
+
1144
+ def _convert_monitor_status(self) -> models.MonitorStatusClass:
1145
+ """
1146
+ Convert the enabled flag into a MonitorStatusClass.
1147
+
1148
+ Returns:
1149
+ A MonitorStatusClass with ACTIVE or INACTIVE mode based on the enabled flag.
1150
+ """
1151
+ return models.MonitorStatusClass(
1152
+ mode=models.MonitorModeClass.ACTIVE
1153
+ if self.enabled
1154
+ else models.MonitorModeClass.INACTIVE,
1155
+ )
1156
+
1157
+ def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
1158
+ """
1159
+ Get the schema field spec for the detection mechanism if needed.
1160
+ """
1161
+ # Only fetch the dataset if it's not already cached.
1162
+ # Also we only fetch the dataset if it's needed for the detection mechanism.
1163
+ if self.cached_dataset is None:
1164
+ self.cached_dataset = self.entity_client.get(self.dataset_urn)
1165
+
1166
+ # Handle case where dataset doesn't exist
1167
+ if self.cached_dataset is None:
1168
+ raise SDKUsageError(
1169
+ f"Dataset {self.dataset_urn} not found. Cannot validate column {column_name}."
1170
+ )
1171
+
1172
+ # TODO: Make a public accessor for _schema_dict in the SDK
1173
+ schema_fields = self.cached_dataset._schema_dict()
1174
+ field = schema_fields.get(column_name)
1175
+ if field:
1176
+ return models.SchemaFieldSpecClass(
1177
+ path=field.fieldPath,
1178
+ type=field.type.type.__class__.__name__,
1179
+ nativeType=field.nativeDataType,
1180
+ )
1181
+ else:
1182
+ raise SDKUsageError(
1183
+ msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
1184
+ )
1185
+
1186
+ def _validate_field_type(
1187
+ self,
1188
+ field_spec: models.SchemaFieldSpecClass,
1189
+ column_name: str,
1190
+ allowed_types: list[DictWrapper],
1191
+ field_type_name: str,
1192
+ ) -> None:
1193
+ """
1194
+ Validate that a field has an allowed type.
1195
+
1196
+ Args:
1197
+ field_spec: The field specification to validate
1198
+ column_name: The name of the column for error messages
1199
+ allowed_types: List of allowed field types
1200
+ field_type_name: Human-readable name of the field type for error messages
1201
+
1202
+ Raises:
1203
+ SDKUsageError: If the field has an invalid type
1204
+ """
1205
+ allowed_type_names = [t.__class__.__name__ for t in allowed_types]
1206
+ if field_spec.type not in allowed_type_names:
1207
+ raise SDKUsageError(
1208
+ msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
1209
+ f"Allowed types are {allowed_type_names}.",
1210
+ )
1211
+
1212
+ @abstractmethod
1213
+ def _create_monitor_info(
1214
+ self,
1215
+ assertion_urn: AssertionUrn,
1216
+ status: models.MonitorStatusClass,
1217
+ schedule: models.CronScheduleClass,
1218
+ ) -> models.MonitorInfoClass:
1219
+ """
1220
+ Create a MonitorInfoClass with all the necessary components.
1221
+
1222
+ Args:
1223
+ status: The monitor status.
1224
+ schedule: The monitor schedule.
1225
+ Returns:
1226
+ A MonitorInfoClass configured with all the provided components.
1227
+ """
1228
+ pass
1229
+
1230
+ @abstractmethod
1231
+ def _assertion_type(self) -> str:
1232
+ """Get the assertion type."""
1233
+ pass
1234
+
1235
+ @abstractmethod
1236
+ def _create_assertion_info(
1237
+ self, filter: Optional[models.DatasetFilterClass]
1238
+ ) -> AssertionInfoInputType:
1239
+ """Create assertion info specific to the assertion type."""
1240
+ pass
1241
+
1242
+ @abstractmethod
1243
+ def _convert_schedule(self) -> models.CronScheduleClass:
1244
+ """Convert schedule to appropriate format for the assertion type."""
1245
+ pass
1246
+
1247
+ @abstractmethod
1248
+ def _get_assertion_evaluation_parameters(
1249
+ self, source_type: str, field: Optional[FieldSpecType]
1250
+ ) -> models.AssertionEvaluationParametersClass:
1251
+ """Get evaluation parameters specific to the assertion type."""
1252
+ pass
1253
+
1254
+ @abstractmethod
1255
+ def _convert_assertion_source_type_and_field(
1256
+ self,
1257
+ ) -> tuple[str, Optional[FieldSpecType]]:
1258
+ """Convert detection mechanism to source type and field spec."""
1259
+ pass
1260
+
1261
+
1262
+ class _HasFreshnessFeatures:
1263
+ def _create_field_spec(
1264
+ self,
1265
+ column_name: str,
1266
+ allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
1267
+ field_type_name: str,
1268
+ kind: str,
1269
+ get_schema_field_spec: Callable[[str], models.SchemaFieldSpecClass],
1270
+ validate_field_type: Callable[
1271
+ [models.SchemaFieldSpecClass, str, list[DictWrapper], str], None
1272
+ ],
1273
+ ) -> models.FreshnessFieldSpecClass:
1274
+ """
1275
+ Create a field specification for a column, validating its type.
1276
+
1277
+ Args:
1278
+ column_name: The name of the column to create a spec for
1279
+ allowed_types: List of allowed field types
1280
+ field_type_name: Human-readable name of the field type for error messages
1281
+ kind: The kind of field to create
1282
+
1283
+ Returns:
1284
+ A FreshnessFieldSpecClass for the column
1285
+
1286
+ Raises:
1287
+ SDKUsageError: If the column is not found or has an invalid type
1288
+ """
1289
+ SUPPORTED_KINDS = [
1290
+ models.FreshnessFieldKindClass.LAST_MODIFIED,
1291
+ models.FreshnessFieldKindClass.HIGH_WATERMARK,
1292
+ ]
1293
+ if kind not in SUPPORTED_KINDS:
1294
+ raise SDKUsageError(
1295
+ msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
1296
+ )
1297
+
1298
+ field_spec = get_schema_field_spec(column_name)
1299
+ validate_field_type(field_spec, column_name, allowed_types, field_type_name)
1300
+ return models.FreshnessFieldSpecClass(
1301
+ path=field_spec.path,
1302
+ type=field_spec.type,
1303
+ nativeType=field_spec.nativeType,
1304
+ kind=kind,
1305
+ )
1306
+
1307
+
1308
+ class _SmartFreshnessAssertionInput(
1309
+ _AssertionInput, _HasSmartAssertionInputs, _HasFreshnessFeatures
1310
+ ):
1311
+ def __init__(
1312
+ self,
1313
+ *,
1314
+ # Required fields
1315
+ dataset_urn: Union[str, DatasetUrn],
1316
+ entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
1317
+ # Optional fields
1318
+ urn: Optional[Union[str, AssertionUrn]] = None,
1319
+ display_name: Optional[str] = None,
1320
+ enabled: bool = True,
1321
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1322
+ detection_mechanism: DetectionMechanismInputTypes = None,
1323
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1324
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1325
+ training_data_lookback_days: Optional[int] = None,
1326
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
1327
+ tags: Optional[TagsInputType] = None,
1328
+ created_by: Union[str, CorpUserUrn],
1329
+ created_at: datetime,
1330
+ updated_by: Union[str, CorpUserUrn],
1331
+ updated_at: datetime,
1332
+ ):
1333
+ _AssertionInput.__init__(
1334
+ self,
1335
+ dataset_urn=dataset_urn,
1336
+ entity_client=entity_client,
1337
+ urn=urn,
1338
+ display_name=display_name,
1339
+ enabled=enabled,
1340
+ schedule=schedule
1341
+ if schedule is not None
1342
+ else DEFAULT_HOURLY_SCHEDULE, # Use provided schedule or default for create case
1343
+ detection_mechanism=detection_mechanism,
1344
+ incident_behavior=incident_behavior,
1345
+ tags=tags,
1346
+ source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
1347
+ created_by=created_by,
1348
+ created_at=created_at,
1349
+ updated_by=updated_by,
1350
+ updated_at=updated_at,
1351
+ )
1352
+ _HasSmartAssertionInputs.__init__(
1353
+ self,
1354
+ sensitivity=sensitivity,
1355
+ exclusion_windows=exclusion_windows,
1356
+ training_data_lookback_days=training_data_lookback_days,
1357
+ )
1358
+
1359
+ def _assertion_type(self) -> str:
1360
+ """Get the assertion type."""
1361
+ return models.AssertionTypeClass.FRESHNESS
1362
+
1363
+ def _create_assertion_info(
1364
+ self, filter: Optional[models.DatasetFilterClass]
1365
+ ) -> AssertionInfoInputType:
1366
+ """
1367
+ Create a FreshnessAssertionInfoClass for a smart freshness assertion.
1368
+
1369
+ Args:
1370
+ filter: Optional filter to apply to the assertion.
1371
+
1372
+ Returns:
1373
+ A FreshnessAssertionInfoClass configured for smart freshness.
1374
+ """
1375
+ return models.FreshnessAssertionInfoClass(
1376
+ type=models.FreshnessAssertionTypeClass.DATASET_CHANGE, # Currently only dataset change is supported
1377
+ entity=str(self.dataset_urn),
1378
+ # schedule (optional, must be left empty for smart freshness assertions - managed by the AI inference engine)
1379
+ filter=filter,
1380
+ )
1381
+
1382
+ def _convert_schedule(self) -> models.CronScheduleClass:
1383
+ """Create a schedule for a smart freshness assertion.
1384
+
1385
+ For create case, uses DEFAULT_HOURLY_SCHEDULE. For update case, preserves existing schedule.
1386
+
1387
+ Returns:
1388
+ A CronScheduleClass with appropriate schedule settings.
1389
+ """
1390
+ assert self.schedule is not None, (
1391
+ "Schedule should never be None due to constructor logic"
1392
+ )
1393
+ return self.schedule
1394
+
1395
+ def _get_assertion_evaluation_parameters(
1396
+ self, source_type: str, field: Optional[FieldSpecType]
1397
+ ) -> models.AssertionEvaluationParametersClass:
1398
+ # Ensure field is either None or FreshnessFieldSpecClass
1399
+ freshness_field = None
1400
+ if field is not None:
1401
+ if not isinstance(field, models.FreshnessFieldSpecClass):
1402
+ raise SDKUsageError(
1403
+ f"Expected FreshnessFieldSpecClass for freshness assertion, got {type(field).__name__}"
1404
+ )
1405
+ freshness_field = field
1406
+
1407
+ return models.AssertionEvaluationParametersClass(
1408
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
1409
+ datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
1410
+ sourceType=source_type, field=freshness_field
1411
+ ),
1412
+ )
1413
+
1414
+ def _convert_assertion_source_type_and_field(
1415
+ self,
1416
+ ) -> tuple[str, Optional[FieldSpecType]]:
1417
+ """
1418
+ Convert detection mechanism into source type and field specification for freshness assertions.
1419
+
1420
+ Returns:
1421
+ A tuple of (source_type, field) where field may be None.
1422
+ Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
1423
+ the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
1424
+
1425
+ Raises:
1426
+ SDKNotYetSupportedError: If the detection mechanism is not supported.
1427
+ SDKUsageError: If the field (column) is not found in the dataset,
1428
+ and the detection mechanism requires a field. Also if the field
1429
+ is not an allowed type for the detection mechanism.
1430
+ """
1431
+ source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
1432
+ field = None
1433
+
1434
+ if isinstance(self.detection_mechanism, _LastModifiedColumn):
1435
+ source_type = models.DatasetFreshnessSourceTypeClass.FIELD_VALUE
1436
+ field = self._create_field_spec(
1437
+ self.detection_mechanism.column_name,
1438
+ LAST_MODIFIED_ALLOWED_FIELD_TYPES,
1439
+ "last modified column",
1440
+ models.FreshnessFieldKindClass.LAST_MODIFIED,
1441
+ self._get_schema_field_spec,
1442
+ self._validate_field_type,
1443
+ )
1444
+ elif isinstance(self.detection_mechanism, _InformationSchema):
1445
+ source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
1446
+ elif isinstance(self.detection_mechanism, _DataHubOperation):
1447
+ source_type = models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION
1448
+ elif isinstance(self.detection_mechanism, _AuditLog):
1449
+ source_type = models.DatasetFreshnessSourceTypeClass.AUDIT_LOG
1450
+ else:
1451
+ raise SDKNotYetSupportedError(
1452
+ f"Detection mechanism {self.detection_mechanism} not yet supported for smart freshness assertions"
1453
+ )
1454
+
1455
+ return source_type, field
1456
+
1457
+ def _create_monitor_info(
1458
+ self,
1459
+ assertion_urn: AssertionUrn,
1460
+ status: models.MonitorStatusClass,
1461
+ schedule: models.CronScheduleClass,
1462
+ ) -> models.MonitorInfoClass:
1463
+ """
1464
+ Create a MonitorInfoClass with all the necessary components.
1465
+ """
1466
+ source_type, field = self._convert_assertion_source_type_and_field()
1467
+ return models.MonitorInfoClass(
1468
+ type=models.MonitorTypeClass.ASSERTION,
1469
+ status=status,
1470
+ assertionMonitor=models.AssertionMonitorClass(
1471
+ assertions=[
1472
+ models.AssertionEvaluationSpecClass(
1473
+ assertion=str(assertion_urn),
1474
+ schedule=schedule,
1475
+ parameters=self._get_assertion_evaluation_parameters(
1476
+ str(source_type), field
1477
+ ),
1478
+ ),
1479
+ ],
1480
+ settings=models.AssertionMonitorSettingsClass(
1481
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
1482
+ sensitivity=self._convert_sensitivity(),
1483
+ exclusionWindows=self._convert_exclusion_windows(),
1484
+ trainingDataLookbackWindowDays=self.training_data_lookback_days,
1485
+ ),
1486
+ ),
1487
+ ),
1488
+ )
1489
+
1490
+
1491
+ class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
1492
+ def __init__(
1493
+ self,
1494
+ *,
1495
+ # Required fields
1496
+ dataset_urn: Union[str, DatasetUrn],
1497
+ entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
1498
+ # Optional fields
1499
+ urn: Optional[Union[str, AssertionUrn]] = None,
1500
+ display_name: Optional[str] = None,
1501
+ enabled: bool = True,
1502
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1503
+ detection_mechanism: DetectionMechanismInputTypes = None,
1504
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1505
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1506
+ training_data_lookback_days: Optional[int] = None,
1507
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
1508
+ tags: Optional[TagsInputType] = None,
1509
+ created_by: Union[str, CorpUserUrn],
1510
+ created_at: datetime,
1511
+ updated_by: Union[str, CorpUserUrn],
1512
+ updated_at: datetime,
1513
+ ):
1514
+ _AssertionInput.__init__(
1515
+ self,
1516
+ dataset_urn=dataset_urn,
1517
+ entity_client=entity_client,
1518
+ urn=urn,
1519
+ display_name=display_name,
1520
+ enabled=enabled,
1521
+ schedule=schedule,
1522
+ detection_mechanism=detection_mechanism,
1523
+ incident_behavior=incident_behavior,
1524
+ tags=tags,
1525
+ source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
1526
+ created_by=created_by,
1527
+ created_at=created_at,
1528
+ updated_by=updated_by,
1529
+ updated_at=updated_at,
1530
+ )
1531
+ _HasSmartAssertionInputs.__init__(
1532
+ self,
1533
+ sensitivity=sensitivity,
1534
+ exclusion_windows=exclusion_windows,
1535
+ training_data_lookback_days=training_data_lookback_days,
1536
+ )
1537
+
1538
+ def _create_assertion_info(
1539
+ self, filter: Optional[models.DatasetFilterClass]
1540
+ ) -> AssertionInfoInputType:
1541
+ """
1542
+ Create a VolumeAssertionInfoClass for a smart volume assertion.
1543
+
1544
+ Args:
1545
+ filter: Optional filter to apply to the assertion.
1546
+
1547
+ Returns:
1548
+ A VolumeAssertionInfoClass configured for smart volume.
1549
+ """
1550
+ return models.VolumeAssertionInfoClass(
1551
+ type=models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL, # Currently only ROW_COUNT_TOTAL is supported for smart volume
1552
+ entity=str(self.dataset_urn),
1553
+ filter=filter,
1554
+ )
1555
+
1556
+ def _convert_schedule(self) -> models.CronScheduleClass:
1557
+ """Create a schedule for a smart volume assertion.
1558
+
1559
+ Returns:
1560
+ A CronScheduleClass with appropriate schedule settings.
1561
+ """
1562
+ if self.schedule is None:
1563
+ return DEFAULT_HOURLY_SCHEDULE
1564
+
1565
+ return models.CronScheduleClass(
1566
+ cron=self.schedule.cron,
1567
+ timezone=self.schedule.timezone,
1568
+ )
1569
+
1570
+ def _get_assertion_evaluation_parameters(
1571
+ self, source_type: str, field: Optional[FieldSpecType]
1572
+ ) -> models.AssertionEvaluationParametersClass:
1573
+ return models.AssertionEvaluationParametersClass(
1574
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
1575
+ datasetVolumeParameters=models.DatasetVolumeAssertionParametersClass(
1576
+ sourceType=source_type,
1577
+ ),
1578
+ )
1579
+
1580
+ def _convert_assertion_source_type_and_field(
1581
+ self,
1582
+ ) -> tuple[str, Optional[FieldSpecType]]:
1583
+ """
1584
+ Convert detection mechanism into source type and field specification for volume assertions.
1585
+
1586
+ Returns:
1587
+ A tuple of (source_type, field) where field may be None.
1588
+ Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
1589
+ the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
1590
+
1591
+ Raises:
1592
+ SDKNotYetSupportedError: If the detection mechanism is not supported.
1593
+ SDKUsageError: If the field (column) is not found in the dataset,
1594
+ and the detection mechanism requires a field. Also if the field
1595
+ is not an allowed type for the detection mechanism.
1596
+ """
1597
+ source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
1598
+ field = None
1599
+
1600
+ if isinstance(self.detection_mechanism, _Query):
1601
+ source_type = models.DatasetVolumeSourceTypeClass.QUERY
1602
+ elif isinstance(self.detection_mechanism, _InformationSchema):
1603
+ source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
1604
+ elif isinstance(self.detection_mechanism, _DatasetProfile):
1605
+ source_type = models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE
1606
+ else:
1607
+ raise SDKNotYetSupportedError(
1608
+ f"Detection mechanism {self.detection_mechanism} not yet supported for smart volume assertions"
1609
+ )
1610
+
1611
+ return source_type, field
1612
+
1613
+ def _create_monitor_info(
1614
+ self,
1615
+ assertion_urn: AssertionUrn,
1616
+ status: models.MonitorStatusClass,
1617
+ schedule: models.CronScheduleClass,
1618
+ ) -> models.MonitorInfoClass:
1619
+ """
1620
+ Create a MonitorInfoClass with all the necessary components.
1621
+ """
1622
+ source_type, field = self._convert_assertion_source_type_and_field()
1623
+ return models.MonitorInfoClass(
1624
+ type=models.MonitorTypeClass.ASSERTION,
1625
+ status=status,
1626
+ assertionMonitor=models.AssertionMonitorClass(
1627
+ assertions=[
1628
+ models.AssertionEvaluationSpecClass(
1629
+ assertion=str(assertion_urn),
1630
+ schedule=schedule,
1631
+ parameters=self._get_assertion_evaluation_parameters(
1632
+ str(source_type), field
1633
+ ),
1634
+ ),
1635
+ ],
1636
+ settings=models.AssertionMonitorSettingsClass(
1637
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
1638
+ sensitivity=self._convert_sensitivity(),
1639
+ exclusionWindows=self._convert_exclusion_windows(),
1640
+ trainingDataLookbackWindowDays=self.training_data_lookback_days,
1641
+ ),
1642
+ ),
1643
+ ),
1644
+ )
1645
+
1646
+ def _assertion_type(self) -> str:
1647
+ """Get the assertion type."""
1648
+ return models.AssertionTypeClass.VOLUME