acryl-datahub-cloud 0.3.11.1rc8__py3-none-any.whl → 0.3.12rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (37) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/_sdk_extras/__init__.py +17 -2
  3. acryl_datahub_cloud/_sdk_extras/assertion.py +603 -8
  4. acryl_datahub_cloud/_sdk_extras/assertion_input.py +1074 -0
  5. acryl_datahub_cloud/_sdk_extras/assertions_client.py +705 -11
  6. acryl_datahub_cloud/_sdk_extras/entities/__init__.py +0 -0
  7. acryl_datahub_cloud/_sdk_extras/entities/assertion.py +425 -0
  8. acryl_datahub_cloud/_sdk_extras/entities/monitor.py +291 -0
  9. acryl_datahub_cloud/_sdk_extras/entities/subscription.py +84 -0
  10. acryl_datahub_cloud/_sdk_extras/errors.py +34 -0
  11. acryl_datahub_cloud/_sdk_extras/resolver_client.py +39 -0
  12. acryl_datahub_cloud/_sdk_extras/subscription_client.py +565 -0
  13. acryl_datahub_cloud/action_request/action_request_owner_source.py +36 -6
  14. acryl_datahub_cloud/metadata/_urns/urn_defs.py +2023 -2023
  15. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/assertion/__init__.py +2 -2
  16. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
  17. acryl_datahub_cloud/metadata/schema.avsc +24889 -25252
  18. acryl_datahub_cloud/metadata/schema_classes.py +1133 -1008
  19. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +189 -201
  20. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +9 -1
  21. acryl_datahub_cloud/metadata/schemas/AssertionKey.avsc +1 -1
  22. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +9 -1
  23. acryl_datahub_cloud/metadata/schemas/{AssertionSummary.avsc → AssertionRunSummary.avsc} +2 -2
  24. acryl_datahub_cloud/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  25. acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
  26. acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +27 -0
  27. acryl_datahub_cloud/metadata/schemas/MonitorAnomalyEvent.avsc +21 -9
  28. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +12 -4
  29. acryl_datahub_cloud/metadata/schemas/MonitorSuiteInfo.avsc +1 -1
  30. acryl_datahub_cloud/metadata/schemas/Operation.avsc +17 -0
  31. acryl_datahub_cloud/metadata/schemas/SubscriptionInfo.avsc +3 -3
  32. acryl_datahub_cloud/metadata/schemas/__init__.py +3 -3
  33. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/METADATA +46 -46
  34. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/RECORD +37 -28
  35. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/WHEEL +1 -1
  36. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/entry_points.txt +0 -0
  37. {acryl_datahub_cloud-0.3.11.1rc8.dist-info → acryl_datahub_cloud-0.3.12rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1074 @@
1
+ """
2
+ This file contains the AssertionInput class and related classes, which are used to
3
+ validate and represent the input for creating an Assertion in DataHub.
4
+ """
5
+
6
+ import random
7
+ import string
8
+ from abc import ABC, abstractmethod
9
+ from datetime import datetime
10
+ from enum import Enum
11
+ from typing import Literal, Optional, TypeAlias, Union
12
+
13
+ import pydantic
14
+ from avrogen.dict_wrapper import DictWrapper
15
+ from pydantic import BaseModel, Extra, ValidationError
16
+
17
+ from acryl_datahub_cloud._sdk_extras.entities.assertion import (
18
+ Assertion,
19
+ AssertionActionsInputType,
20
+ AssertionInfoInputType,
21
+ TagsInputType,
22
+ )
23
+ from acryl_datahub_cloud._sdk_extras.entities.monitor import Monitor
24
+ from acryl_datahub_cloud._sdk_extras.errors import (
25
+ SDKNotYetSupportedError,
26
+ SDKUsageError,
27
+ SDKUsageErrorWithExamples,
28
+ )
29
+ from datahub.emitter.enum_helpers import get_enum_options
30
+ from datahub.emitter.mce_builder import make_ts_millis, parse_ts_millis
31
+ from datahub.metadata import schema_classes as models
32
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
33
+ from datahub.sdk import Dataset
34
+ from datahub.sdk.entity_client import EntityClient
35
+
36
+ # TODO: Import ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS from datahub_executor.config
37
+ ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS = 60
38
+
39
+ DEFAULT_NAME_PREFIX = "New Assertion"
40
+ DEFAULT_NAME_SUFFIX_LENGTH = 8
41
+
42
+
43
+ class AbstractDetectionMechanism(BaseModel, ABC):
44
+ type: str
45
+
46
+ class Config:
47
+ extra = Extra.forbid
48
+
49
+
50
+ class _InformationSchema(AbstractDetectionMechanism):
51
+ type: Literal["information_schema"] = "information_schema"
52
+
53
+
54
+ class _AuditLog(AbstractDetectionMechanism):
55
+ type: Literal["audit_log"] = "audit_log"
56
+
57
+
58
+ # Keep this in sync with the allowed field types in the UI, currently in
59
+ # datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertion/builder/constants.ts: LAST_MODIFIED_FIELD_TYPES
60
+ LAST_MODIFIED_ALLOWED_FIELD_TYPES = [models.DateTypeClass(), models.TimeTypeClass()]
61
+
62
+
63
+ class _LastModifiedColumn(AbstractDetectionMechanism):
64
+ type: Literal["last_modified_column"] = "last_modified_column"
65
+ column_name: str
66
+ additional_filter: Optional[str] = None
67
+
68
+
69
+ # Keep this in sync with the allowed field types in the UI, currently in
70
+ # datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/assertion/builder/constants.ts: HIGH_WATERMARK_FIELD_TYPES
71
+ HIGH_WATERMARK_ALLOWED_FIELD_TYPES = [
72
+ models.NumberTypeClass(),
73
+ models.DateTypeClass(),
74
+ models.TimeTypeClass(),
75
+ ]
76
+
77
+
78
+ class _HighWatermarkColumn(AbstractDetectionMechanism):
79
+ type: Literal["high_watermark_column"] = "high_watermark_column"
80
+ column_name: str
81
+ additional_filter: Optional[str] = None
82
+
83
+
84
+ class _DataHubOperation(AbstractDetectionMechanism):
85
+ type: Literal["datahub_operation"] = "datahub_operation"
86
+
87
+
88
+ # Keep these two lists in sync:
89
+ _DETECTION_MECHANISM_CONCRETE_TYPES = (
90
+ _InformationSchema,
91
+ _AuditLog,
92
+ _LastModifiedColumn,
93
+ _HighWatermarkColumn,
94
+ _DataHubOperation,
95
+ )
96
+ _DetectionMechanismTypes = Union[
97
+ _InformationSchema,
98
+ _AuditLog,
99
+ _LastModifiedColumn,
100
+ _HighWatermarkColumn,
101
+ _DataHubOperation,
102
+ ]
103
+
104
+
105
+ class DetectionMechanism:
106
+ # To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
107
+ # The options with sub parameters are the classes themselves so that parameters can be applied, and the rest are already instantiated instances of the classes.
108
+ INFORMATION_SCHEMA = _InformationSchema()
109
+ AUDIT_LOG = _AuditLog()
110
+ LAST_MODIFIED_COLUMN = _LastModifiedColumn
111
+ HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
112
+ DATAHUB_OPERATION = _DataHubOperation()
113
+
114
+ _DETECTION_MECHANISM_EXAMPLES = {
115
+ "Information Schema from string": "information_schema",
116
+ "Information Schema from DetectionMechanism": "DetectionMechanism.INFORMATION_SCHEMA",
117
+ "Audit Log from string": "audit_log",
118
+ "Audit Log from DetectionMechanism": "DetectionMechanism.AUDIT_LOG",
119
+ "Last Modified Column from dict": {
120
+ "type": "last_modified_column",
121
+ "column_name": "last_modified",
122
+ "additional_filter": "last_modified > '2021-01-01'",
123
+ },
124
+ "Last Modified Column from DetectionMechanism": "DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')",
125
+ "High Watermark Column from dict": {
126
+ "type": "high_watermark_column",
127
+ "column_name": "id",
128
+ "additional_filter": "id > 1000",
129
+ },
130
+ "High Watermark Column from DetectionMechanism": "DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id', additional_filter='id > 1000')",
131
+ "DataHub Operation from string": "datahub_operation",
132
+ "DataHub Operation from DetectionMechanism": "DetectionMechanism.DATAHUB_OPERATION",
133
+ }
134
+
135
+ @staticmethod
136
+ def parse(
137
+ detection_mechanism_config: Optional[
138
+ Union[str, dict[str, str], _DetectionMechanismTypes]
139
+ ] = None,
140
+ ) -> _DetectionMechanismTypes:
141
+ if detection_mechanism_config is None:
142
+ return DEFAULT_DETECTION_MECHANISM
143
+ if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
144
+ return detection_mechanism_config
145
+ elif isinstance(detection_mechanism_config, str):
146
+ return DetectionMechanism._try_parse_from_string(detection_mechanism_config)
147
+ elif isinstance(detection_mechanism_config, dict):
148
+ return DetectionMechanism._try_parse_from_dict(detection_mechanism_config)
149
+ else:
150
+ raise SDKUsageErrorWithExamples(
151
+ msg=f"Invalid detection mechanism: {detection_mechanism_config}",
152
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
153
+ )
154
+
155
+ @staticmethod
156
+ def _try_parse_from_string(
157
+ detection_mechanism_config: str,
158
+ ) -> _DetectionMechanismTypes:
159
+ try:
160
+ return_value = getattr(
161
+ DetectionMechanism, detection_mechanism_config.upper()
162
+ )
163
+ if isinstance(return_value, pydantic.main.ModelMetaclass):
164
+ try:
165
+ # We try to instantiate here to let pydantic raise a helpful error
166
+ # about which parameters are missing
167
+ return_value = return_value()
168
+ except ValidationError as e:
169
+ raise SDKUsageErrorWithExamples(
170
+ msg=f"Detection mechanism type '{detection_mechanism_config}' requires additional parameters: {e}",
171
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
172
+ ) from e
173
+ return return_value
174
+ except AttributeError as e:
175
+ raise SDKUsageErrorWithExamples(
176
+ msg=f"Invalid detection mechanism type: {detection_mechanism_config}",
177
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
178
+ ) from e
179
+
180
+ @staticmethod
181
+ def _try_parse_from_dict(
182
+ detection_mechanism_config: dict[str, str],
183
+ ) -> _DetectionMechanismTypes:
184
+ try:
185
+ detection_mechanism_type = detection_mechanism_config.pop("type")
186
+ except KeyError as e:
187
+ raise SDKUsageErrorWithExamples(
188
+ msg="Detection mechanism type is required if using a dict to create a DetectionMechanism",
189
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
190
+ ) from e
191
+ try:
192
+ detection_mechanism_obj = getattr(
193
+ DetectionMechanism, detection_mechanism_type.upper()
194
+ )
195
+ except AttributeError as e:
196
+ raise SDKUsageErrorWithExamples(
197
+ msg=f"Invalid detection mechanism type: {detection_mechanism_type}",
198
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
199
+ ) from e
200
+
201
+ try:
202
+ return detection_mechanism_obj(**detection_mechanism_config)
203
+ except TypeError as e:
204
+ if "object is not callable" not in e.args[0]:
205
+ raise e
206
+ if detection_mechanism_config:
207
+ # If we are here in the TypeError case, the detection mechanism is an instance of a class,
208
+ # not a class itself, so we can't instantiate it with the config dict.
209
+ # In this case, the config dict should be empty after the type is popped.
210
+ # If it is not empty, we raise an error.
211
+ raise SDKUsageErrorWithExamples(
212
+ msg=f"Invalid additional fields specified for detection mechanism '{detection_mechanism_type}': {detection_mechanism_config}",
213
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
214
+ ) from e
215
+ return detection_mechanism_obj
216
+ except ValidationError as e:
217
+ raise SDKUsageErrorWithExamples(
218
+ msg=f"Invalid detection mechanism type '{detection_mechanism_type}': {detection_mechanism_config} {e}",
219
+ examples=DetectionMechanism._DETECTION_MECHANISM_EXAMPLES,
220
+ ) from e
221
+
222
+
223
+ DEFAULT_DETECTION_MECHANISM = DetectionMechanism.INFORMATION_SCHEMA
224
+
225
+ DetectionMechanismInputTypes: TypeAlias = Union[
226
+ str, dict[str, str], _DetectionMechanismTypes, None
227
+ ]
228
+
229
+
230
+ class InferenceSensitivity(Enum):
231
+ HIGH = "high"
232
+ MEDIUM = "medium"
233
+ LOW = "low"
234
+
235
+ @staticmethod
236
+ def parse(
237
+ sensitivity: Optional[
238
+ Union[
239
+ str,
240
+ int,
241
+ "InferenceSensitivity",
242
+ models.AssertionMonitorSensitivityClass,
243
+ ]
244
+ ],
245
+ ) -> "InferenceSensitivity":
246
+ if sensitivity is None:
247
+ return DEFAULT_SENSITIVITY
248
+ EXAMPLES = {
249
+ "High sensitivity from string": "high",
250
+ "High sensitivity from enum": "InferenceSensitivity.HIGH",
251
+ "Medium sensitivity from string": "medium",
252
+ "Medium sensitivity from enum": "InferenceSensitivity.MEDIUM",
253
+ "Low sensitivity from string": "low",
254
+ "Low sensitivity from enum": "InferenceSensitivity.LOW",
255
+ "Sensitivity from int (1-3: low, 4-6: medium, 7-10: high)": "10",
256
+ }
257
+
258
+ if isinstance(sensitivity, InferenceSensitivity):
259
+ return sensitivity
260
+ if isinstance(sensitivity, models.AssertionMonitorSensitivityClass):
261
+ sensitivity = sensitivity.level
262
+ if isinstance(sensitivity, int):
263
+ if (sensitivity < 1) or (sensitivity > 10):
264
+ raise SDKUsageErrorWithExamples(
265
+ msg=f"Invalid inference sensitivity: {sensitivity}",
266
+ examples=EXAMPLES,
267
+ )
268
+ elif sensitivity < 4:
269
+ return InferenceSensitivity.LOW
270
+ elif sensitivity < 7:
271
+ return InferenceSensitivity.MEDIUM
272
+ else:
273
+ return InferenceSensitivity.HIGH
274
+ try:
275
+ return InferenceSensitivity(sensitivity)
276
+ except ValueError as e:
277
+ raise SDKUsageErrorWithExamples(
278
+ msg=f"Invalid inference sensitivity: {sensitivity}",
279
+ examples=EXAMPLES,
280
+ ) from e
281
+
282
+ @staticmethod
283
+ def to_int(sensitivity: "InferenceSensitivity") -> int:
284
+ return {
285
+ InferenceSensitivity.HIGH: 10,
286
+ InferenceSensitivity.MEDIUM: 5,
287
+ InferenceSensitivity.LOW: 1,
288
+ }[sensitivity]
289
+
290
+
291
+ DEFAULT_SENSITIVITY = InferenceSensitivity.MEDIUM
292
+
293
+
294
+ class FixedRangeExclusionWindow(BaseModel):
295
+ type: Literal["fixed_range_exclusion_window"] = "fixed_range_exclusion_window"
296
+ start: datetime
297
+ end: datetime
298
+
299
+
300
+ ExclusionWindowTypes: TypeAlias = Union[
301
+ FixedRangeExclusionWindow,
302
+ # Add other exclusion window types here as they are added to the SDK.
303
+ ]
304
+
305
+ FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES = {
306
+ "Exclusion Window from datetimes": {
307
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
308
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
309
+ },
310
+ "Exclusion Window from strings": {
311
+ "start": "2025-01-01T00:00:00",
312
+ "end": "2025-01-02T00:00:00",
313
+ },
314
+ "Exclusion Window from object": "ExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0))",
315
+ }
316
+ FixedRangeExclusionWindowInputTypes: TypeAlias = Union[
317
+ dict[str, datetime],
318
+ dict[str, str],
319
+ list[dict[str, datetime]],
320
+ list[dict[str, str]],
321
+ FixedRangeExclusionWindow,
322
+ list[FixedRangeExclusionWindow],
323
+ ]
324
+
325
+ ExclusionWindowInputTypes: TypeAlias = Union[
326
+ models.AssertionExclusionWindowClass,
327
+ list[models.AssertionExclusionWindowClass],
328
+ FixedRangeExclusionWindowInputTypes,
329
+ # Add other exclusion window types here as they are added to the SDK.
330
+ ]
331
+
332
+ IterableExclusionWindowInputTypes: TypeAlias = Union[
333
+ list[dict[str, datetime]],
334
+ list[dict[str, str]],
335
+ list[FixedRangeExclusionWindow],
336
+ list[models.AssertionExclusionWindowClass],
337
+ ]
338
+
339
+
340
+ def _try_parse_exclusion_window(
341
+ config: Optional[ExclusionWindowInputTypes],
342
+ ) -> Union[FixedRangeExclusionWindow, list[FixedRangeExclusionWindow], None]:
343
+ if config is None:
344
+ return []
345
+ if isinstance(config, dict):
346
+ return [FixedRangeExclusionWindow(**config)]
347
+ if isinstance(config, FixedRangeExclusionWindow):
348
+ return [config]
349
+ elif isinstance(config, models.AssertionExclusionWindowClass):
350
+ assert config.fixedRange is not None
351
+ return [
352
+ FixedRangeExclusionWindow(
353
+ start=parse_ts_millis(config.fixedRange.startTimeMillis),
354
+ end=parse_ts_millis(config.fixedRange.endTimeMillis),
355
+ )
356
+ ]
357
+ elif isinstance(config, list):
358
+ return _try_parse_list_of_exclusion_windows(config)
359
+ else:
360
+ raise SDKUsageErrorWithExamples(
361
+ msg=f"Invalid exclusion window: {config}",
362
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
363
+ )
364
+
365
+
366
+ def _try_parse_list_of_exclusion_windows(
367
+ config: IterableExclusionWindowInputTypes,
368
+ ) -> Union[list[FixedRangeExclusionWindow], None]:
369
+ if all(isinstance(item, models.AssertionExclusionWindowClass) for item in config):
370
+ exclusion_windows = []
371
+ for item in config:
372
+ assert isinstance(item, models.AssertionExclusionWindowClass)
373
+ assert item.fixedRange is not None
374
+ exclusion_windows.append(
375
+ FixedRangeExclusionWindow(
376
+ start=parse_ts_millis(item.fixedRange.startTimeMillis),
377
+ end=parse_ts_millis(item.fixedRange.endTimeMillis),
378
+ )
379
+ )
380
+ return exclusion_windows
381
+ else:
382
+ exclusion_windows = []
383
+ for item in config:
384
+ if isinstance(item, dict):
385
+ try:
386
+ exclusion_windows.append(FixedRangeExclusionWindow(**item))
387
+ except ValidationError as e:
388
+ raise SDKUsageErrorWithExamples(
389
+ msg=f"Invalid exclusion window: {item}",
390
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
391
+ ) from e
392
+ elif isinstance(item, FixedRangeExclusionWindow):
393
+ exclusion_windows.append(item)
394
+ elif item is None:
395
+ pass
396
+ else:
397
+ raise SDKUsageErrorWithExamples(
398
+ msg=f"Invalid exclusion window: {item}",
399
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
400
+ )
401
+ return exclusion_windows
402
+
403
+
404
+ class AssertionIncidentBehavior(Enum):
405
+ RAISE_ON_FAIL = "raise_on_fail"
406
+ RESOLVE_ON_PASS = "resolve_on_pass"
407
+
408
+
409
+ ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES = {
410
+ "Raise on fail from string": "raise_on_fail",
411
+ "Raise on fail from enum": "AssertionIncidentBehavior.RAISE_ON_FAIL",
412
+ "Resolve on pass from string": "resolve_on_pass",
413
+ "Resolve on pass from enum": "AssertionIncidentBehavior.RESOLVE_ON_PASS",
414
+ }
415
+
416
+ AssertionIncidentBehaviorInputTypes: TypeAlias = Union[
417
+ str,
418
+ list[str],
419
+ AssertionIncidentBehavior,
420
+ list[AssertionIncidentBehavior],
421
+ None,
422
+ ]
423
+
424
+
425
+ def _try_parse_incident_behavior(
426
+ config: AssertionIncidentBehaviorInputTypes,
427
+ ) -> Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior], None]:
428
+ if config is None:
429
+ return []
430
+ if isinstance(config, str):
431
+ try:
432
+ return [AssertionIncidentBehavior(config)]
433
+ except ValueError as e:
434
+ raise SDKUsageErrorWithExamples(
435
+ msg=f"Invalid incident behavior: {config}",
436
+ examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
437
+ ) from e
438
+ if isinstance(config, AssertionIncidentBehavior):
439
+ return [config]
440
+ elif isinstance(config, list):
441
+ incident_behaviors = []
442
+ for item in config:
443
+ if isinstance(item, str):
444
+ try:
445
+ incident_behaviors.append(AssertionIncidentBehavior(item))
446
+ except ValueError as e:
447
+ raise SDKUsageErrorWithExamples(
448
+ msg=f"Invalid incident behavior: {item}",
449
+ examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
450
+ ) from e
451
+ elif isinstance(item, AssertionIncidentBehavior):
452
+ incident_behaviors.append(item)
453
+ else:
454
+ raise SDKUsageErrorWithExamples(
455
+ msg=f"Invalid incident behavior: {item}",
456
+ examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
457
+ )
458
+ return incident_behaviors
459
+ else:
460
+ raise SDKUsageErrorWithExamples(
461
+ msg=f"Invalid incident behavior: {config}",
462
+ examples=ASSERTION_INCIDENT_BEHAVIOR_EXAMPLES,
463
+ )
464
+
465
+
466
+ def _generate_default_name(prefix: str, suffix_length: int) -> str:
467
+ return f"{prefix}-{''.join(random.choices(string.ascii_letters + string.digits, k=suffix_length))}"
468
+
469
+
470
+ TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES = {
471
+ "Training data lookback days from int": ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
472
+ f"Training data lookback days from None (uses default of {ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS} days)": None,
473
+ }
474
+
475
+
476
+ def _try_parse_training_data_lookback_days(
477
+ training_data_lookback_days: Optional[int],
478
+ ) -> int:
479
+ if training_data_lookback_days is None:
480
+ return ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS
481
+ if isinstance(training_data_lookback_days, str):
482
+ try:
483
+ training_data_lookback_days = int(training_data_lookback_days)
484
+ except ValueError as e:
485
+ raise SDKUsageErrorWithExamples(
486
+ msg=f"Invalid training data lookback days: {training_data_lookback_days}",
487
+ examples=TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES,
488
+ ) from e
489
+ if not isinstance(training_data_lookback_days, int):
490
+ raise SDKUsageErrorWithExamples(
491
+ msg=f"Invalid training data lookback days: {training_data_lookback_days}",
492
+ examples=TRAINING_DATA_LOOKBACK_DAYS_EXAMPLES,
493
+ )
494
+ if training_data_lookback_days < 0:
495
+ raise SDKUsageError("Training data lookback days must be non-negative")
496
+ return training_data_lookback_days
497
+
498
+
499
+ class _AssertionInput(ABC):
500
+ def __init__(
501
+ self,
502
+ *,
503
+ # Required fields
504
+ dataset_urn: Union[str, DatasetUrn],
505
+ entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
506
+ # Optional fields
507
+ urn: Optional[
508
+ Union[str, AssertionUrn]
509
+ ] = None, # Can be None if the assertion is not yet created
510
+ display_name: Optional[str] = None,
511
+ enabled: bool = True,
512
+ detection_mechanism: DetectionMechanismInputTypes = None,
513
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
514
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
515
+ training_data_lookback_days: Optional[int] = None,
516
+ incident_behavior: Optional[
517
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
518
+ ] = None,
519
+ tags: Optional[TagsInputType] = None,
520
+ source_type: str = models.AssertionSourceTypeClass.NATIVE, # Verified on init to be a valid enum value
521
+ created_by: Union[str, CorpUserUrn],
522
+ created_at: datetime,
523
+ updated_by: Union[str, CorpUserUrn],
524
+ updated_at: datetime,
525
+ ):
526
+ """
527
+ Create an AssertionInput object.
528
+
529
+ Args:
530
+ dataset_urn: The urn of the dataset to be monitored.
531
+ entity_client: The entity client to be used for creating the assertion.
532
+ urn: The urn of the assertion. If not provided, a random urn will be generated.
533
+ display_name: The display name of the assertion. If not provided, a random display name will be generated.
534
+ enabled: Whether the assertion is enabled. Defaults to True.
535
+ detection_mechanism: The detection mechanism to be used for the assertion.
536
+ sensitivity: The sensitivity to be applied to the assertion.
537
+ exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
538
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
539
+ incident_behavior: The incident behavior to be applied to the assertion.
540
+ tags: The tags to be applied to the assertion.
541
+ source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
542
+ created_by: The actor that created the assertion.
543
+ created_at: The timestamp of the assertion creation.
544
+ updated_by: The actor that last updated the assertion.
545
+ updated_at: The timestamp of the assertion last update.
546
+ """
547
+ self.dataset_urn = DatasetUrn.from_string(dataset_urn)
548
+ self.entity_client = entity_client
549
+ self.urn = AssertionUrn(urn) if urn else None
550
+ self.display_name = (
551
+ display_name
552
+ if display_name is not None
553
+ else _generate_default_name(DEFAULT_NAME_PREFIX, DEFAULT_NAME_SUFFIX_LENGTH)
554
+ )
555
+ self.enabled = enabled
556
+
557
+ self.detection_mechanism = DetectionMechanism.parse(detection_mechanism)
558
+ self.sensitivity = InferenceSensitivity.parse(sensitivity)
559
+ self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
560
+ self.training_data_lookback_days = _try_parse_training_data_lookback_days(
561
+ training_data_lookback_days
562
+ )
563
+ self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
564
+ self.tags = tags
565
+ if source_type not in get_enum_options(models.AssertionSourceTypeClass):
566
+ raise SDKUsageError(
567
+ msg=f"Invalid source type: {source_type}, valid options are {get_enum_options(models.AssertionSourceTypeClass)}",
568
+ )
569
+ self.source_type = source_type
570
+ self.created_by = created_by
571
+ self.created_at = created_at
572
+ self.updated_by = updated_by
573
+ self.updated_at = updated_at
574
+
575
+ self.cached_dataset: Optional[Dataset] = None
576
+
577
+ def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
578
+ """
579
+ Convert the assertion input to an assertion and monitor entity.
580
+
581
+ Returns:
582
+ A tuple of (assertion, monitor) entities.
583
+ """
584
+ assertion = self.to_assertion_entity()
585
+ monitor = self.to_monitor_entity(assertion.urn)
586
+ return assertion, monitor
587
+
588
+ def to_assertion_entity(self) -> Assertion:
589
+ """
590
+ Convert the assertion input to an assertion entity.
591
+
592
+ Returns:
593
+ The created assertion entity.
594
+ """
595
+ on_success, on_failure = self._convert_incident_behavior()
596
+ filter = self._create_filter_from_detection_mechanism()
597
+
598
+ return Assertion(
599
+ id=self.urn,
600
+ info=self._create_assertion_info(filter),
601
+ description=self.display_name,
602
+ on_success=on_success,
603
+ on_failure=on_failure,
604
+ tags=self._convert_tags(),
605
+ source=self._convert_source(),
606
+ last_updated=self._convert_last_updated(),
607
+ )
608
+
609
+ def _convert_incident_behavior(
610
+ self,
611
+ ) -> tuple[
612
+ Optional[AssertionActionsInputType],
613
+ Optional[AssertionActionsInputType],
614
+ ]:
615
+ """
616
+ Convert incident behavior to on_success and on_failure actions.
617
+
618
+ Returns:
619
+ A tuple of (on_success, on_failure) actions.
620
+ """
621
+ if not self.incident_behavior:
622
+ return None, None
623
+
624
+ behaviors = (
625
+ [self.incident_behavior]
626
+ if isinstance(self.incident_behavior, AssertionIncidentBehavior)
627
+ else self.incident_behavior
628
+ )
629
+
630
+ on_success: Optional[AssertionActionsInputType] = [
631
+ models.AssertionActionClass(
632
+ type=models.AssertionActionTypeClass.RESOLVE_INCIDENT
633
+ )
634
+ for behavior in behaviors
635
+ if behavior == AssertionIncidentBehavior.RESOLVE_ON_PASS
636
+ ] or None
637
+
638
+ on_failure: Optional[AssertionActionsInputType] = [
639
+ models.AssertionActionClass(
640
+ type=models.AssertionActionTypeClass.RAISE_INCIDENT
641
+ )
642
+ for behavior in behaviors
643
+ if behavior == AssertionIncidentBehavior.RAISE_ON_FAIL
644
+ ] or None
645
+
646
+ return on_success, on_failure
647
+
648
+ def _create_filter_from_detection_mechanism(
649
+ self,
650
+ ) -> Optional[models.DatasetFilterClass]:
651
+ """
652
+ Create a filter from the detection mechanism if it has an additional filter.
653
+
654
+ Returns:
655
+ A DatasetFilterClass if the detection mechanism has an additional filter, None otherwise.
656
+ """
657
+ if not isinstance(
658
+ self.detection_mechanism,
659
+ (
660
+ DetectionMechanism.LAST_MODIFIED_COLUMN,
661
+ DetectionMechanism.HIGH_WATERMARK_COLUMN,
662
+ ),
663
+ ):
664
+ return None
665
+
666
+ additional_filter = self.detection_mechanism.additional_filter
667
+ if not additional_filter:
668
+ return None
669
+
670
+ return models.DatasetFilterClass(
671
+ type=models.DatasetFilterTypeClass.SQL,
672
+ sql=additional_filter,
673
+ )
674
+
675
+ @abstractmethod
676
+ def _create_assertion_info(
677
+ self, filter: Optional[models.DatasetFilterClass]
678
+ ) -> AssertionInfoInputType:
679
+ pass
680
+
681
+ def _convert_tags(self) -> Optional[TagsInputType]:
682
+ """
683
+ Convert the tags input into a standardized format.
684
+
685
+ Returns:
686
+ A list of tags or None if no tags are provided.
687
+
688
+ Raises:
689
+ SDKUsageErrorWithExamples: If the tags input is invalid.
690
+ """
691
+ if not self.tags:
692
+ return None
693
+
694
+ if isinstance(self.tags, str):
695
+ return [self.tags]
696
+ elif isinstance(self.tags, list):
697
+ return self.tags
698
+ else:
699
+ raise SDKUsageErrorWithExamples(
700
+ msg=f"Invalid tags: {self.tags}",
701
+ examples={
702
+ "Tags from string": "urn:li:tag:my_tag_1",
703
+ "Tags from list": [
704
+ "urn:li:tag:my_tag_1",
705
+ "urn:li:tag:my_tag_2",
706
+ ],
707
+ },
708
+ )
709
+
710
+ def _convert_source(self) -> models.AssertionSourceClass:
711
+ """
712
+ Convert the source input into a models.AssertionSourceClass.
713
+ """
714
+ return models.AssertionSourceClass(
715
+ type=self.source_type,
716
+ created=models.AuditStampClass(
717
+ time=make_ts_millis(self.created_at),
718
+ actor=str(self.created_by),
719
+ ),
720
+ )
721
+
722
+ def _convert_last_updated(self) -> tuple[datetime, str]:
723
+ """
724
+ Convert the last updated input into a tuple of (datetime, str).
725
+
726
+ Validation is handled in the Assertion entity constructor.
727
+ """
728
+ return (self.updated_at, str(self.updated_by))
729
+
730
+ def to_monitor_entity(self, assertion_urn: AssertionUrn) -> Monitor:
731
+ """
732
+ Convert the assertion input to a monitor entity.
733
+
734
+ Args:
735
+ assertion_urn: The URN of the assertion to monitor.
736
+
737
+ Returns:
738
+ A Monitor entity configured with the assertion input parameters.
739
+ """
740
+ source_type, field = self._convert_assertion_source_type_and_field()
741
+ return Monitor(
742
+ id=(self.dataset_urn, assertion_urn),
743
+ info=self._create_monitor_info(
744
+ assertion_urn=assertion_urn,
745
+ status=self._convert_monitor_status(),
746
+ schedule=self._convert_schedule(),
747
+ source_type=source_type,
748
+ field=field,
749
+ sensitivity=self._convert_sensitivity(),
750
+ exclusion_windows=self._convert_exclusion_windows(),
751
+ ),
752
+ )
753
+
754
+ def _convert_monitor_status(self) -> models.MonitorStatusClass:
755
+ """
756
+ Convert the enabled flag into a MonitorStatusClass.
757
+
758
+ Returns:
759
+ A MonitorStatusClass with ACTIVE or INACTIVE mode based on the enabled flag.
760
+ """
761
+ return models.MonitorStatusClass(
762
+ mode=models.MonitorModeClass.ACTIVE
763
+ if self.enabled
764
+ else models.MonitorModeClass.INACTIVE,
765
+ )
766
+
767
+ def _convert_exclusion_windows(
768
+ self,
769
+ ) -> list[models.AssertionExclusionWindowClass]:
770
+ """
771
+ Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
772
+
773
+ Returns:
774
+ A list of AssertionExclusionWindowClass objects.
775
+
776
+ Raises:
777
+ SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
778
+ """
779
+ exclusion_windows: list[models.AssertionExclusionWindowClass] = []
780
+ if self.exclusion_windows:
781
+ for window in self.exclusion_windows:
782
+ if not isinstance(window, FixedRangeExclusionWindow):
783
+ raise SDKUsageErrorWithExamples(
784
+ msg=f"Invalid exclusion window type: {window}",
785
+ examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
786
+ )
787
+ # To match the UI, we generate a display name for the exclusion window.
788
+ # See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
789
+ # Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
790
+ generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
791
+ exclusion_windows.append(
792
+ models.AssertionExclusionWindowClass(
793
+ type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
794
+ displayName=generated_display_name,
795
+ fixedRange=models.AbsoluteTimeWindowClass(
796
+ startTimeMillis=make_ts_millis(window.start),
797
+ endTimeMillis=make_ts_millis(window.end),
798
+ ),
799
+ )
800
+ )
801
+ return exclusion_windows
802
+
803
+ @abstractmethod
804
+ def _convert_assertion_source_type_and_field(
805
+ self,
806
+ ) -> tuple[str, Optional[models.FreshnessFieldSpecClass]]:
807
+ """
808
+ Convert detection mechanism into source type and field specification for freshness assertions.
809
+
810
+ Returns:
811
+ A tuple of (source_type, field) where field may be None.
812
+ Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass since
813
+ the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
814
+
815
+ Raises:
816
+ SDKNotYetSupportedError: If the detection mechanism is not supported.
817
+ SDKUsageError: If the field (column) is not found in the dataset,
818
+ and the detection mechanism requires a field. Also if the field
819
+ is not an allowed type for the detection mechanism.
820
+ """
821
+ pass
822
+
823
+ @abstractmethod
824
+ def _convert_schedule(self) -> models.CronScheduleClass:
825
+ pass
826
+
827
+ def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
828
+ """
829
+ Convert sensitivity into an AssertionMonitorSensitivityClass.
830
+
831
+ Returns:
832
+ An AssertionMonitorSensitivityClass with the appropriate sensitivity.
833
+ """
834
+ return models.AssertionMonitorSensitivityClass(
835
+ level=InferenceSensitivity.to_int(self.sensitivity),
836
+ )
837
+
838
+ def _create_monitor_info(
839
+ self,
840
+ assertion_urn: AssertionUrn,
841
+ status: models.MonitorStatusClass,
842
+ schedule: models.CronScheduleClass,
843
+ source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
844
+ field: Optional[models.FreshnessFieldSpecClass],
845
+ sensitivity: models.AssertionMonitorSensitivityClass,
846
+ exclusion_windows: list[models.AssertionExclusionWindowClass],
847
+ ) -> models.MonitorInfoClass:
848
+ """
849
+ Create a MonitorInfoClass with all the necessary components.
850
+
851
+ Args:
852
+ status: The monitor status.
853
+ schedule: The monitor schedule.
854
+ source_type: The freshness source type.
855
+ field: Optional field specification.
856
+ sensitivity: The monitor sensitivity.
857
+ exclusion_windows: List of exclusion windows.
858
+
859
+ Returns:
860
+ A MonitorInfoClass configured with all the provided components.
861
+ """
862
+ return models.MonitorInfoClass(
863
+ type=models.MonitorTypeClass.ASSERTION,
864
+ status=status,
865
+ assertionMonitor=models.AssertionMonitorClass(
866
+ assertions=[
867
+ models.AssertionEvaluationSpecClass(
868
+ assertion=str(assertion_urn),
869
+ schedule=schedule,
870
+ parameters=models.AssertionEvaluationParametersClass(
871
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
872
+ datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
873
+ sourceType=source_type,
874
+ field=field,
875
+ ),
876
+ ),
877
+ )
878
+ ],
879
+ settings=models.AssertionMonitorSettingsClass(
880
+ adjustmentSettings=models.AssertionAdjustmentSettingsClass(
881
+ sensitivity=sensitivity,
882
+ exclusionWindows=exclusion_windows,
883
+ trainingDataLookbackWindowDays=self.training_data_lookback_days,
884
+ ),
885
+ ),
886
+ ),
887
+ )
888
+
889
+ def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
890
+ """
891
+ Get the schema field spec for the detection mechanism if needed.
892
+ """
893
+ # Only fetch the dataset if it's not already cached.
894
+ # Also we only fetch the dataset if it's needed for the detection mechanism.
895
+ if self.cached_dataset is None:
896
+ self.cached_dataset = self.entity_client.get(self.dataset_urn)
897
+
898
+ # TODO: Make a public accessor for _schema_dict in the SDK
899
+ schema_fields = self.cached_dataset._schema_dict()
900
+ field = schema_fields.get(column_name)
901
+ if field:
902
+ return models.SchemaFieldSpecClass(
903
+ path=field.fieldPath,
904
+ type=field.type.type.__class__.__name__,
905
+ nativeType=field.nativeDataType,
906
+ )
907
+ else:
908
+ raise SDKUsageError(
909
+ msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
910
+ )
911
+
912
+
913
+ class _SmartFreshnessAssertionInput(_AssertionInput):
914
+ DEFAULT_SCHEDULE = models.CronScheduleClass(
915
+ cron="0 0 * * *",
916
+ timezone="UTC",
917
+ )
918
+
919
+ def __init__(
920
+ self,
921
+ *,
922
+ # Required fields
923
+ dataset_urn: Union[str, DatasetUrn],
924
+ entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
925
+ # Optional fields
926
+ urn: Optional[Union[str, AssertionUrn]] = None,
927
+ display_name: Optional[str] = None,
928
+ enabled: bool = True,
929
+ detection_mechanism: DetectionMechanismInputTypes = None,
930
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
931
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
932
+ training_data_lookback_days: Optional[int] = None,
933
+ incident_behavior: Optional[
934
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
935
+ ] = None,
936
+ tags: Optional[TagsInputType] = None,
937
+ created_by: Union[str, CorpUserUrn],
938
+ created_at: datetime,
939
+ updated_by: Union[str, CorpUserUrn],
940
+ updated_at: datetime,
941
+ ):
942
+ super().__init__(
943
+ dataset_urn=dataset_urn,
944
+ entity_client=entity_client,
945
+ urn=urn,
946
+ display_name=display_name,
947
+ enabled=enabled,
948
+ detection_mechanism=detection_mechanism,
949
+ sensitivity=sensitivity,
950
+ exclusion_windows=exclusion_windows,
951
+ training_data_lookback_days=training_data_lookback_days,
952
+ incident_behavior=incident_behavior,
953
+ tags=tags,
954
+ source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
955
+ created_by=created_by,
956
+ created_at=created_at,
957
+ updated_by=updated_by,
958
+ updated_at=updated_at,
959
+ )
960
+
961
+ def _create_assertion_info(
962
+ self, filter: Optional[models.DatasetFilterClass]
963
+ ) -> AssertionInfoInputType:
964
+ """
965
+ Create a FreshnessAssertionInfoClass for a smart freshness assertion.
966
+
967
+ Args:
968
+ filter: Optional filter to apply to the assertion.
969
+
970
+ Returns:
971
+ A FreshnessAssertionInfoClass configured for smart freshness.
972
+ """
973
+ return models.FreshnessAssertionInfoClass(
974
+ type=models.FreshnessAssertionTypeClass.DATASET_CHANGE, # Currently only dataset change is supported
975
+ entity=str(self.dataset_urn),
976
+ # schedule (optional, not used for smart freshness assertions)
977
+ filter=filter,
978
+ )
979
+
980
+ def _convert_schedule(self) -> models.CronScheduleClass:
981
+ """Create a schedule for a smart freshness assertion.
982
+
983
+ Since the schedule is not used for smart freshness assertions, we return a default schedule.
984
+
985
+ Returns:
986
+ A CronScheduleClass with appropriate schedule settings.
987
+ """
988
+ return self.DEFAULT_SCHEDULE
989
+
990
+ def _convert_assertion_source_type_and_field(
991
+ self,
992
+ ) -> tuple[str, Optional[models.FreshnessFieldSpecClass]]:
993
+ """
994
+ Convert detection mechanism into source type and field specification for freshness assertions.
995
+
996
+ Returns:
997
+ A tuple of (source_type, field) where field may be None.
998
+ Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass since
999
+ the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
1000
+
1001
+ Raises:
1002
+ SDKNotYetSupportedError: If the detection mechanism is not supported.
1003
+ SDKUsageError: If the field (column) is not found in the dataset,
1004
+ and the detection mechanism requires a field. Also if the field
1005
+ is not an allowed type for the detection mechanism.
1006
+ """
1007
+ source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
1008
+ field = None
1009
+
1010
+ if isinstance(self.detection_mechanism, _LastModifiedColumn):
1011
+ source_type = models.DatasetFreshnessSourceTypeClass.FIELD_VALUE
1012
+ field = self._create_field_spec(
1013
+ self.detection_mechanism.column_name,
1014
+ LAST_MODIFIED_ALLOWED_FIELD_TYPES,
1015
+ "last modified column",
1016
+ models.FreshnessFieldKindClass.LAST_MODIFIED,
1017
+ )
1018
+ elif isinstance(self.detection_mechanism, _InformationSchema):
1019
+ source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
1020
+ elif isinstance(self.detection_mechanism, _DataHubOperation):
1021
+ source_type = models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION
1022
+ elif isinstance(self.detection_mechanism, _AuditLog):
1023
+ source_type = models.DatasetFreshnessSourceTypeClass.AUDIT_LOG
1024
+ else:
1025
+ raise SDKNotYetSupportedError(
1026
+ f"Detection mechanism {self.detection_mechanism} not yet supported for smart freshness assertions"
1027
+ )
1028
+
1029
+ return source_type, field
1030
+
1031
+ def _create_field_spec(
1032
+ self,
1033
+ column_name: str,
1034
+ allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
1035
+ field_type_name: str,
1036
+ kind: str,
1037
+ ) -> models.FreshnessFieldSpecClass:
1038
+ """
1039
+ Create a field specification for a column, validating its type.
1040
+
1041
+ Args:
1042
+ column_name: The name of the column to create a spec for
1043
+ allowed_types: List of allowed field types
1044
+ field_type_name: Human-readable name of the field type for error messages
1045
+ kind: The kind of field to create
1046
+
1047
+ Returns:
1048
+ A FreshnessFieldSpecClass for the column
1049
+
1050
+ Raises:
1051
+ SDKUsageError: If the column is not found or has an invalid type
1052
+ """
1053
+ SUPPORTED_KINDS = [
1054
+ models.FreshnessFieldKindClass.LAST_MODIFIED,
1055
+ models.FreshnessFieldKindClass.HIGH_WATERMARK,
1056
+ ]
1057
+ if kind not in SUPPORTED_KINDS:
1058
+ raise SDKUsageError(
1059
+ msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
1060
+ )
1061
+
1062
+ field_spec = self._get_schema_field_spec(column_name)
1063
+ allowed_type_names = [t.__class__.__name__ for t in allowed_types]
1064
+ if field_spec.type not in allowed_type_names:
1065
+ raise SDKUsageError(
1066
+ msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
1067
+ f"Allowed types are {allowed_type_names}.",
1068
+ )
1069
+ return models.FreshnessFieldSpecClass(
1070
+ path=field_spec.path,
1071
+ type=field_spec.type,
1072
+ nativeType=field_spec.nativeType,
1073
+ kind=kind,
1074
+ )