acryl-datahub-cloud 0.3.12rc1__py3-none-any.whl → 0.3.12rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +559 -0
- acryl_datahub_cloud/datahub_forms_notifications/get_search_results_total.gql +14 -0
- acryl_datahub_cloud/datahub_forms_notifications/query.py +17 -0
- acryl_datahub_cloud/datahub_forms_notifications/scroll_forms_for_notification.gql +29 -0
- acryl_datahub_cloud/datahub_forms_notifications/send_form_notification_request.gql +5 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +48 -8
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +49 -40
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1842 -1786
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/form/__init__.py +4 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/notification/__init__.py +19 -0
- acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
- acryl_datahub_cloud/metadata/schema.avsc +24861 -24050
- acryl_datahub_cloud/metadata/schema_classes.py +1031 -631
- acryl_datahub_cloud/metadata/schemas/ApplicationKey.avsc +31 -0
- acryl_datahub_cloud/metadata/schemas/ApplicationProperties.avsc +72 -0
- acryl_datahub_cloud/metadata/schemas/Applications.avsc +38 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +40 -7
- acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +27 -6
- acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +31 -7
- acryl_datahub_cloud/metadata/schemas/AssertionsSummary.avsc +14 -0
- acryl_datahub_cloud/metadata/schemas/ChartKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/ConstraintInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/ContainerKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/CorpGroupKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/CorpUserKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/DashboardKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataFlowKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/DataJobKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataProductKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/DataProductProperties.avsc +1 -1
- acryl_datahub_cloud/metadata/schemas/DatasetKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/FormAssignmentStatus.avsc +36 -0
- acryl_datahub_cloud/metadata/schemas/FormInfo.avsc +6 -0
- acryl_datahub_cloud/metadata/schemas/FormKey.avsc +2 -1
- acryl_datahub_cloud/metadata/schemas/FormNotifications.avsc +69 -0
- acryl_datahub_cloud/metadata/schemas/FormSettings.avsc +3 -0
- acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +22 -0
- acryl_datahub_cloud/metadata/schemas/GlossaryTermKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLFeatureTableKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLModelGroupKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLModelKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +12 -1
- acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +27 -6
- acryl_datahub_cloud/metadata/schemas/NotebookKey.avsc +1 -0
- acryl_datahub_cloud/metadata/schemas/NotificationRequest.avsc +1 -0
- acryl_datahub_cloud/notifications/__init__.py +0 -0
- acryl_datahub_cloud/notifications/notification_recipient_builder.py +399 -0
- acryl_datahub_cloud/sdk/__init__.py +29 -0
- acryl_datahub_cloud/{_sdk_extras → sdk}/assertion.py +501 -193
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/{_sdk_extras → sdk/assertion_input}/assertion_input.py +733 -189
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +947 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1639 -0
- acryl_datahub_cloud/sdk/entities/__init__.py +0 -0
- acryl_datahub_cloud/{_sdk_extras → sdk}/entities/assertion.py +5 -2
- acryl_datahub_cloud/{_sdk_extras → sdk}/subscription_client.py +146 -33
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/METADATA +48 -43
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/RECORD +72 -54
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/entry_points.txt +1 -0
- acryl_datahub_cloud/_sdk_extras/__init__.py +0 -19
- acryl_datahub_cloud/_sdk_extras/assertions_client.py +0 -717
- /acryl_datahub_cloud/{_sdk_extras/entities → datahub_forms_notifications}/__init__.py +0 -0
- /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/monitor.py +0 -0
- /acryl_datahub_cloud/{_sdk_extras → sdk}/entities/subscription.py +0 -0
- /acryl_datahub_cloud/{_sdk_extras → sdk}/errors.py +0 -0
- /acryl_datahub_cloud/{_sdk_extras → sdk}/resolver_client.py +0 -0
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc1.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/top_level.txt +0 -0
|
@@ -6,22 +6,26 @@ validate and represent the input for creating an Assertion in DataHub.
|
|
|
6
6
|
import random
|
|
7
7
|
import string
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
9
10
|
from datetime import datetime
|
|
10
11
|
from enum import Enum
|
|
11
|
-
from typing import Literal, Optional, TypeAlias, Union
|
|
12
|
+
from typing import Callable, Literal, Optional, Type, TypeAlias, TypeVar, Union
|
|
12
13
|
|
|
13
14
|
import pydantic
|
|
15
|
+
import pytz
|
|
16
|
+
import tzlocal
|
|
14
17
|
from avrogen.dict_wrapper import DictWrapper
|
|
18
|
+
from croniter import croniter
|
|
15
19
|
from pydantic import BaseModel, Extra, ValidationError
|
|
16
20
|
|
|
17
|
-
from acryl_datahub_cloud.
|
|
21
|
+
from acryl_datahub_cloud.sdk.entities.assertion import (
|
|
18
22
|
Assertion,
|
|
19
23
|
AssertionActionsInputType,
|
|
20
24
|
AssertionInfoInputType,
|
|
21
25
|
TagsInputType,
|
|
22
26
|
)
|
|
23
|
-
from acryl_datahub_cloud.
|
|
24
|
-
from acryl_datahub_cloud.
|
|
27
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
28
|
+
from acryl_datahub_cloud.sdk.errors import (
|
|
25
29
|
SDKNotYetSupportedError,
|
|
26
30
|
SDKUsageError,
|
|
27
31
|
SDKUsageErrorWithExamples,
|
|
@@ -40,6 +44,22 @@ DEFAULT_NAME_PREFIX = "New Assertion"
|
|
|
40
44
|
DEFAULT_NAME_SUFFIX_LENGTH = 8
|
|
41
45
|
|
|
42
46
|
|
|
47
|
+
DEFAULT_HOURLY_SCHEDULE = models.CronScheduleClass(
|
|
48
|
+
cron="0 * * * *", # Every hour, matches the UI default
|
|
49
|
+
timezone=str(
|
|
50
|
+
tzlocal.get_localzone()
|
|
51
|
+
), # User local timezone, matches the UI default
|
|
52
|
+
)
|
|
53
|
+
DEFAULT_SCHEDULE: models.CronScheduleClass = DEFAULT_HOURLY_SCHEDULE
|
|
54
|
+
|
|
55
|
+
DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
|
|
56
|
+
cron="0 0 * * *", # Every day at midnight, matches the UI default
|
|
57
|
+
timezone=str(
|
|
58
|
+
tzlocal.get_localzone()
|
|
59
|
+
), # User local timezone, matches the UI default
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
43
63
|
class AbstractDetectionMechanism(BaseModel, ABC):
|
|
44
64
|
type: str
|
|
45
65
|
|
|
@@ -85,6 +105,36 @@ class _DataHubOperation(AbstractDetectionMechanism):
|
|
|
85
105
|
type: Literal["datahub_operation"] = "datahub_operation"
|
|
86
106
|
|
|
87
107
|
|
|
108
|
+
class _Query(AbstractDetectionMechanism):
|
|
109
|
+
# COUNT(*) query
|
|
110
|
+
type: Literal["query"] = "query"
|
|
111
|
+
additional_filter: Optional[str] = None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class _AllRowsQuery(AbstractDetectionMechanism):
|
|
115
|
+
# For column-based assertions, this is the default detection mechanism.
|
|
116
|
+
type: Literal["all_rows_query"] = "all_rows_query"
|
|
117
|
+
additional_filter: Optional[str] = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class _AllRowsQueryDataHubDatasetProfile(AbstractDetectionMechanism):
|
|
121
|
+
# Used for column-based assertions.
|
|
122
|
+
type: Literal["all_rows_query_datahub_dataset_profile"] = (
|
|
123
|
+
"all_rows_query_datahub_dataset_profile"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class _ChangedRowsQuery(AbstractDetectionMechanism):
|
|
128
|
+
# Used for column-based assertions.
|
|
129
|
+
type: Literal["changed_rows_query"] = "changed_rows_query"
|
|
130
|
+
column_name: str
|
|
131
|
+
additional_filter: Optional[str] = None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class _DatasetProfile(AbstractDetectionMechanism):
|
|
135
|
+
type: Literal["dataset_profile"] = "dataset_profile"
|
|
136
|
+
|
|
137
|
+
|
|
88
138
|
# Keep these two lists in sync:
|
|
89
139
|
_DETECTION_MECHANISM_CONCRETE_TYPES = (
|
|
90
140
|
_InformationSchema,
|
|
@@ -92,6 +142,11 @@ _DETECTION_MECHANISM_CONCRETE_TYPES = (
|
|
|
92
142
|
_LastModifiedColumn,
|
|
93
143
|
_HighWatermarkColumn,
|
|
94
144
|
_DataHubOperation,
|
|
145
|
+
_Query,
|
|
146
|
+
_DatasetProfile,
|
|
147
|
+
_AllRowsQuery,
|
|
148
|
+
_ChangedRowsQuery,
|
|
149
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
95
150
|
)
|
|
96
151
|
_DetectionMechanismTypes = Union[
|
|
97
152
|
_InformationSchema,
|
|
@@ -99,8 +154,23 @@ _DetectionMechanismTypes = Union[
|
|
|
99
154
|
_LastModifiedColumn,
|
|
100
155
|
_HighWatermarkColumn,
|
|
101
156
|
_DataHubOperation,
|
|
157
|
+
_Query,
|
|
158
|
+
_DatasetProfile,
|
|
159
|
+
_AllRowsQuery,
|
|
160
|
+
_ChangedRowsQuery,
|
|
161
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
102
162
|
]
|
|
103
163
|
|
|
164
|
+
_DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
|
|
165
|
+
_LastModifiedColumn,
|
|
166
|
+
_HighWatermarkColumn,
|
|
167
|
+
_Query,
|
|
168
|
+
_AllRowsQuery,
|
|
169
|
+
_ChangedRowsQuery,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
DEFAULT_DETECTION_MECHANISM: _DetectionMechanismTypes = _InformationSchema()
|
|
173
|
+
|
|
104
174
|
|
|
105
175
|
class DetectionMechanism:
|
|
106
176
|
# To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
|
|
@@ -110,6 +180,11 @@ class DetectionMechanism:
|
|
|
110
180
|
LAST_MODIFIED_COLUMN = _LastModifiedColumn
|
|
111
181
|
HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
|
|
112
182
|
DATAHUB_OPERATION = _DataHubOperation()
|
|
183
|
+
QUERY = _Query
|
|
184
|
+
ALL_ROWS_QUERY = _AllRowsQuery()
|
|
185
|
+
CHANGED_ROWS_QUERY = _ChangedRowsQuery
|
|
186
|
+
ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
|
|
187
|
+
DATASET_PROFILE = _DatasetProfile()
|
|
113
188
|
|
|
114
189
|
_DETECTION_MECHANISM_EXAMPLES = {
|
|
115
190
|
"Information Schema from string": "information_schema",
|
|
@@ -130,6 +205,26 @@ class DetectionMechanism:
|
|
|
130
205
|
"High Watermark Column from DetectionMechanism": "DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id', additional_filter='id > 1000')",
|
|
131
206
|
"DataHub Operation from string": "datahub_operation",
|
|
132
207
|
"DataHub Operation from DetectionMechanism": "DetectionMechanism.DATAHUB_OPERATION",
|
|
208
|
+
"Query from string": "query",
|
|
209
|
+
"Query from dict": {
|
|
210
|
+
"type": "query",
|
|
211
|
+
"additional_filter": "id > 1000",
|
|
212
|
+
},
|
|
213
|
+
"Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
|
|
214
|
+
"Dataset Profile from string": "dataset_profile",
|
|
215
|
+
"Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
|
|
216
|
+
"All Rows Query from string": "all_rows_query",
|
|
217
|
+
"All Rows Query from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY",
|
|
218
|
+
"All Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.ALL_ROWS_QUERY(additional_filter='id > 1000')",
|
|
219
|
+
"Changed Rows Query from dict (with optional additional filter)": {
|
|
220
|
+
"type": "changed_rows_query",
|
|
221
|
+
"column_name": "id",
|
|
222
|
+
"additional_filter": "id > 1000",
|
|
223
|
+
},
|
|
224
|
+
"Changed Rows Query from DetectionMechanism": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id')",
|
|
225
|
+
"Changed Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id', additional_filter='id > 1000')",
|
|
226
|
+
"All Rows Query DataHub Dataset Profile from string": "all_rows_query_datahub_dataset_profile",
|
|
227
|
+
"All Rows Query DataHub Dataset Profile from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE",
|
|
133
228
|
}
|
|
134
229
|
|
|
135
230
|
@staticmethod
|
|
@@ -137,9 +232,10 @@ class DetectionMechanism:
|
|
|
137
232
|
detection_mechanism_config: Optional[
|
|
138
233
|
Union[str, dict[str, str], _DetectionMechanismTypes]
|
|
139
234
|
] = None,
|
|
235
|
+
default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
|
|
140
236
|
) -> _DetectionMechanismTypes:
|
|
141
237
|
if detection_mechanism_config is None:
|
|
142
|
-
return
|
|
238
|
+
return default_detection_mechanism
|
|
143
239
|
if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
|
|
144
240
|
return detection_mechanism_config
|
|
145
241
|
elif isinstance(detection_mechanism_config, str):
|
|
@@ -220,8 +316,6 @@ class DetectionMechanism:
|
|
|
220
316
|
) from e
|
|
221
317
|
|
|
222
318
|
|
|
223
|
-
DEFAULT_DETECTION_MECHANISM = DetectionMechanism.INFORMATION_SCHEMA
|
|
224
|
-
|
|
225
319
|
DetectionMechanismInputTypes: TypeAlias = Union[
|
|
226
320
|
str, dict[str, str], _DetectionMechanismTypes, None
|
|
227
321
|
]
|
|
@@ -288,7 +382,59 @@ class InferenceSensitivity(Enum):
|
|
|
288
382
|
}[sensitivity]
|
|
289
383
|
|
|
290
384
|
|
|
291
|
-
DEFAULT_SENSITIVITY = InferenceSensitivity.MEDIUM
|
|
385
|
+
DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
|
|
386
|
+
|
|
387
|
+
TIME_WINDOW_SIZE_EXAMPLES = {
|
|
388
|
+
"Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
|
|
389
|
+
"Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class CalendarInterval(Enum):
|
|
394
|
+
MINUTE = "MINUTE"
|
|
395
|
+
HOUR = "HOUR"
|
|
396
|
+
DAY = "DAY"
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
class TimeWindowSize(BaseModel):
|
|
400
|
+
unit: Union[CalendarInterval, str]
|
|
401
|
+
multiple: int
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
TimeWindowSizeInputTypes: TypeAlias = Union[
|
|
405
|
+
models.TimeWindowSizeClass,
|
|
406
|
+
models.FixedIntervalScheduleClass,
|
|
407
|
+
TimeWindowSize,
|
|
408
|
+
]
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def _try_parse_time_window_size(
|
|
412
|
+
config: TimeWindowSizeInputTypes,
|
|
413
|
+
) -> models.TimeWindowSizeClass:
|
|
414
|
+
if isinstance(config, models.TimeWindowSizeClass):
|
|
415
|
+
return config
|
|
416
|
+
elif isinstance(config, models.FixedIntervalScheduleClass):
|
|
417
|
+
return models.TimeWindowSizeClass(
|
|
418
|
+
unit=_try_parse_and_validate_schema_classes_enum(
|
|
419
|
+
config.unit, models.CalendarIntervalClass
|
|
420
|
+
),
|
|
421
|
+
multiple=config.multiple,
|
|
422
|
+
)
|
|
423
|
+
elif isinstance(config, TimeWindowSize):
|
|
424
|
+
return models.TimeWindowSizeClass(
|
|
425
|
+
unit=_try_parse_and_validate_schema_classes_enum(
|
|
426
|
+
_try_parse_and_validate_schema_classes_enum(
|
|
427
|
+
config.unit, CalendarInterval
|
|
428
|
+
).value,
|
|
429
|
+
models.CalendarIntervalClass,
|
|
430
|
+
),
|
|
431
|
+
multiple=config.multiple,
|
|
432
|
+
)
|
|
433
|
+
else:
|
|
434
|
+
raise SDKUsageErrorWithExamples(
|
|
435
|
+
msg=f"Invalid time window size: {config}",
|
|
436
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
437
|
+
)
|
|
292
438
|
|
|
293
439
|
|
|
294
440
|
class FixedRangeExclusionWindow(BaseModel):
|
|
@@ -496,6 +642,219 @@ def _try_parse_training_data_lookback_days(
|
|
|
496
642
|
return training_data_lookback_days
|
|
497
643
|
|
|
498
644
|
|
|
645
|
+
def _validate_cron_schedule(schedule: str, timezone: str) -> None:
|
|
646
|
+
"""We are using the POSIX.1-2017 standard for cron expressions.
|
|
647
|
+
|
|
648
|
+
Note: We are using the croniter library for cron parsing which is different from executor, which uses apscheduler, so there is a risk of mismatch here.
|
|
649
|
+
"""
|
|
650
|
+
try:
|
|
651
|
+
# Validate timezone - pytz.timezone() raises UnknownTimeZoneError for invalid timezones
|
|
652
|
+
# Skip timezone validation when empty
|
|
653
|
+
if timezone:
|
|
654
|
+
pytz.timezone(timezone)
|
|
655
|
+
|
|
656
|
+
# Validate 5-field cron expression only (POSIX.1-2017 standard)
|
|
657
|
+
fields = schedule.strip().split()
|
|
658
|
+
if len(fields) != 5:
|
|
659
|
+
raise ValueError("POSIX.1-2017 requires exactly 5 fields")
|
|
660
|
+
|
|
661
|
+
# POSIX.1-2017 specific validation: Sunday must be 0, not 7
|
|
662
|
+
# However croniter accepts 7 as Sunday, so custom check is needed here.
|
|
663
|
+
# Check the day-of-week field (5th field, index 4)
|
|
664
|
+
dow_field = fields[4]
|
|
665
|
+
if "7" in dow_field:
|
|
666
|
+
# Check if 7 appears as a standalone value or in ranges
|
|
667
|
+
import re
|
|
668
|
+
|
|
669
|
+
# Match 7 as standalone, in lists, or in ranges
|
|
670
|
+
if re.search(r"\b7\b|7-|,7,|^7,|,7$|-7\b", dow_field):
|
|
671
|
+
raise ValueError(
|
|
672
|
+
"POSIX.1-2017 standard: Sunday must be represented as 0, not 7"
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
# Validate cron expression - croniter constructor validates the expression
|
|
676
|
+
croniter(schedule)
|
|
677
|
+
|
|
678
|
+
except Exception as e:
|
|
679
|
+
raise SDKUsageError(
|
|
680
|
+
f"Invalid cron expression or timezone: {schedule} {timezone}, please use a POSIX.1-2017 compatible cron expression and timezone."
|
|
681
|
+
) from e
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def _try_parse_schedule(
|
|
685
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
686
|
+
) -> Optional[models.CronScheduleClass]:
|
|
687
|
+
if schedule is None:
|
|
688
|
+
return None
|
|
689
|
+
if isinstance(schedule, str):
|
|
690
|
+
_validate_cron_schedule(schedule, "UTC")
|
|
691
|
+
return models.CronScheduleClass(
|
|
692
|
+
cron=schedule,
|
|
693
|
+
timezone="UTC",
|
|
694
|
+
)
|
|
695
|
+
if isinstance(schedule, models.CronScheduleClass):
|
|
696
|
+
_validate_cron_schedule(schedule.cron, schedule.timezone)
|
|
697
|
+
return schedule
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
|
|
701
|
+
|
|
702
|
+
|
|
703
|
+
T = TypeVar("T")
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
def _try_parse_and_validate_schema_classes_enum(
|
|
707
|
+
value: Union[str, T],
|
|
708
|
+
enum_class: Type[T],
|
|
709
|
+
) -> T:
|
|
710
|
+
if isinstance(value, enum_class):
|
|
711
|
+
return value
|
|
712
|
+
assert isinstance(value, str)
|
|
713
|
+
if value not in get_enum_options(enum_class):
|
|
714
|
+
raise SDKUsageError(
|
|
715
|
+
f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
|
|
716
|
+
)
|
|
717
|
+
return getattr(enum_class, value.upper())
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
@dataclass(frozen=True)
|
|
721
|
+
class DatasetSourceType:
|
|
722
|
+
"""
|
|
723
|
+
DatasetSourceType is used to represent a dataset source type.
|
|
724
|
+
It is used to check if a source type is valid for a dataset type and assertion type.
|
|
725
|
+
|
|
726
|
+
Args:
|
|
727
|
+
source_type: The source type (e.g. information schema, field value, etc. aka detection mechanism)
|
|
728
|
+
platform: The platform of the dataset as a string OR "all" for all platforms.
|
|
729
|
+
assertion_type: The assertion type as a models.AssertionTypeClass string e.g. models.AssertionTypeClass.FRESHNESS OR "all" for all assertion types.
|
|
730
|
+
|
|
731
|
+
Example:
|
|
732
|
+
DatasetSourceType(
|
|
733
|
+
source_type=_InformationSchema,
|
|
734
|
+
platform="databricks",
|
|
735
|
+
assertion_type="all",
|
|
736
|
+
)
|
|
737
|
+
This means that the source type _InformationSchema is invalid for the dataset type "databricks" and assertion type "all".
|
|
738
|
+
"all" in this example means that the source type is invalid for all assertion types.
|
|
739
|
+
"""
|
|
740
|
+
|
|
741
|
+
source_type: Type[_DetectionMechanismTypes]
|
|
742
|
+
platform: str
|
|
743
|
+
assertion_type: Union[models.AssertionTypeClass, str]
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
INVALID_SOURCE_TYPES = {
|
|
747
|
+
# Add exceptions here if a source type (detection mechanism) is invalid for a dataset type and assertion type.
|
|
748
|
+
DatasetSourceType(
|
|
749
|
+
source_type=_InformationSchema,
|
|
750
|
+
platform="databricks",
|
|
751
|
+
assertion_type="all",
|
|
752
|
+
)
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def _is_source_type_valid(
|
|
757
|
+
dataset_source_type: DatasetSourceType,
|
|
758
|
+
invalid_source_types: set[DatasetSourceType] = INVALID_SOURCE_TYPES,
|
|
759
|
+
) -> bool:
|
|
760
|
+
for invalid in invalid_source_types:
|
|
761
|
+
if invalid.source_type == dataset_source_type.source_type:
|
|
762
|
+
# If both platform and assertion type are "all", the source type is invalid for all combinations
|
|
763
|
+
if invalid.platform == "all" and invalid.assertion_type == "all":
|
|
764
|
+
return False
|
|
765
|
+
# If platform matches and assertion type is "all", the source type is invalid for all assertion types on that platform
|
|
766
|
+
if (
|
|
767
|
+
invalid.platform == dataset_source_type.platform
|
|
768
|
+
and invalid.assertion_type == "all"
|
|
769
|
+
):
|
|
770
|
+
return False
|
|
771
|
+
# If platform is "all" and assertion type matches, the source type is invalid for all platforms for that assertion type
|
|
772
|
+
if (
|
|
773
|
+
invalid.platform == "all"
|
|
774
|
+
and invalid.assertion_type == dataset_source_type.assertion_type
|
|
775
|
+
):
|
|
776
|
+
return False
|
|
777
|
+
# If both platform and assertion type match exactly, the source type is invalid
|
|
778
|
+
if (
|
|
779
|
+
invalid.platform == dataset_source_type.platform
|
|
780
|
+
and invalid.assertion_type == dataset_source_type.assertion_type
|
|
781
|
+
):
|
|
782
|
+
return False
|
|
783
|
+
return True
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
class _HasSmartAssertionInputs:
|
|
787
|
+
"""
|
|
788
|
+
A class that contains the common inputs for smart assertions.
|
|
789
|
+
This is used to avoid code duplication in the smart assertion inputs.
|
|
790
|
+
|
|
791
|
+
Args:
|
|
792
|
+
sensitivity: The sensitivity to be applied to the assertion.
|
|
793
|
+
exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
|
|
794
|
+
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
795
|
+
"""
|
|
796
|
+
|
|
797
|
+
def __init__(
|
|
798
|
+
self,
|
|
799
|
+
*,
|
|
800
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
801
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
802
|
+
training_data_lookback_days: Optional[int] = None,
|
|
803
|
+
):
|
|
804
|
+
self.sensitivity = InferenceSensitivity.parse(sensitivity)
|
|
805
|
+
self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
|
|
806
|
+
self.training_data_lookback_days = _try_parse_training_data_lookback_days(
|
|
807
|
+
training_data_lookback_days
|
|
808
|
+
)
|
|
809
|
+
|
|
810
|
+
def _convert_exclusion_windows(
|
|
811
|
+
self,
|
|
812
|
+
) -> list[models.AssertionExclusionWindowClass]:
|
|
813
|
+
"""
|
|
814
|
+
Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
|
|
815
|
+
|
|
816
|
+
Returns:
|
|
817
|
+
A list of AssertionExclusionWindowClass objects.
|
|
818
|
+
|
|
819
|
+
Raises:
|
|
820
|
+
SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
|
|
821
|
+
"""
|
|
822
|
+
exclusion_windows: list[models.AssertionExclusionWindowClass] = []
|
|
823
|
+
if self.exclusion_windows:
|
|
824
|
+
for window in self.exclusion_windows:
|
|
825
|
+
if not isinstance(window, FixedRangeExclusionWindow):
|
|
826
|
+
raise SDKUsageErrorWithExamples(
|
|
827
|
+
msg=f"Invalid exclusion window type: {window}",
|
|
828
|
+
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
829
|
+
)
|
|
830
|
+
# To match the UI, we generate a display name for the exclusion window.
|
|
831
|
+
# See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
|
|
832
|
+
# Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
|
|
833
|
+
generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
|
|
834
|
+
exclusion_windows.append(
|
|
835
|
+
models.AssertionExclusionWindowClass(
|
|
836
|
+
type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
|
|
837
|
+
displayName=generated_display_name,
|
|
838
|
+
fixedRange=models.AbsoluteTimeWindowClass(
|
|
839
|
+
startTimeMillis=make_ts_millis(window.start),
|
|
840
|
+
endTimeMillis=make_ts_millis(window.end),
|
|
841
|
+
),
|
|
842
|
+
)
|
|
843
|
+
)
|
|
844
|
+
return exclusion_windows
|
|
845
|
+
|
|
846
|
+
def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
|
|
847
|
+
"""
|
|
848
|
+
Convert sensitivity into an AssertionMonitorSensitivityClass.
|
|
849
|
+
|
|
850
|
+
Returns:
|
|
851
|
+
An AssertionMonitorSensitivityClass with the appropriate sensitivity.
|
|
852
|
+
"""
|
|
853
|
+
return models.AssertionMonitorSensitivityClass(
|
|
854
|
+
level=InferenceSensitivity.to_int(self.sensitivity),
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
|
|
499
858
|
class _AssertionInput(ABC):
|
|
500
859
|
def __init__(
|
|
501
860
|
self,
|
|
@@ -509,10 +868,8 @@ class _AssertionInput(ABC):
|
|
|
509
868
|
] = None, # Can be None if the assertion is not yet created
|
|
510
869
|
display_name: Optional[str] = None,
|
|
511
870
|
enabled: bool = True,
|
|
871
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
512
872
|
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
513
|
-
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
514
|
-
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
515
|
-
training_data_lookback_days: Optional[int] = None,
|
|
516
873
|
incident_behavior: Optional[
|
|
517
874
|
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
518
875
|
] = None,
|
|
@@ -522,6 +879,7 @@ class _AssertionInput(ABC):
|
|
|
522
879
|
created_at: datetime,
|
|
523
880
|
updated_by: Union[str, CorpUserUrn],
|
|
524
881
|
updated_at: datetime,
|
|
882
|
+
default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
|
|
525
883
|
):
|
|
526
884
|
"""
|
|
527
885
|
Create an AssertionInput object.
|
|
@@ -533,9 +891,6 @@ class _AssertionInput(ABC):
|
|
|
533
891
|
display_name: The display name of the assertion. If not provided, a random display name will be generated.
|
|
534
892
|
enabled: Whether the assertion is enabled. Defaults to True.
|
|
535
893
|
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
536
|
-
sensitivity: The sensitivity to be applied to the assertion.
|
|
537
|
-
exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
|
|
538
|
-
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
539
894
|
incident_behavior: The incident behavior to be applied to the assertion.
|
|
540
895
|
tags: The tags to be applied to the assertion.
|
|
541
896
|
source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
|
|
@@ -553,13 +908,20 @@ class _AssertionInput(ABC):
|
|
|
553
908
|
else _generate_default_name(DEFAULT_NAME_PREFIX, DEFAULT_NAME_SUFFIX_LENGTH)
|
|
554
909
|
)
|
|
555
910
|
self.enabled = enabled
|
|
556
|
-
|
|
557
|
-
self.detection_mechanism = DetectionMechanism.parse(
|
|
558
|
-
|
|
559
|
-
self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
|
|
560
|
-
self.training_data_lookback_days = _try_parse_training_data_lookback_days(
|
|
561
|
-
training_data_lookback_days
|
|
911
|
+
self.schedule = _try_parse_schedule(schedule)
|
|
912
|
+
self.detection_mechanism = DetectionMechanism.parse(
|
|
913
|
+
detection_mechanism, default_detection_mechanism
|
|
562
914
|
)
|
|
915
|
+
if not _is_source_type_valid(
|
|
916
|
+
DatasetSourceType(
|
|
917
|
+
source_type=type(self.detection_mechanism),
|
|
918
|
+
platform=self.dataset_urn.platform,
|
|
919
|
+
assertion_type=self._assertion_type(),
|
|
920
|
+
)
|
|
921
|
+
):
|
|
922
|
+
raise SDKUsageError(
|
|
923
|
+
f"Invalid source type: {self.detection_mechanism} for dataset type: {self.dataset_urn.platform} and assertion type: {self._assertion_type()}"
|
|
924
|
+
)
|
|
563
925
|
self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
|
|
564
926
|
self.tags = tags
|
|
565
927
|
if source_type not in get_enum_options(models.AssertionSourceTypeClass):
|
|
@@ -571,7 +933,6 @@ class _AssertionInput(ABC):
|
|
|
571
933
|
self.created_at = created_at
|
|
572
934
|
self.updated_by = updated_by
|
|
573
935
|
self.updated_at = updated_at
|
|
574
|
-
|
|
575
936
|
self.cached_dataset: Optional[Dataset] = None
|
|
576
937
|
|
|
577
938
|
def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
|
|
@@ -656,10 +1017,7 @@ class _AssertionInput(ABC):
|
|
|
656
1017
|
"""
|
|
657
1018
|
if not isinstance(
|
|
658
1019
|
self.detection_mechanism,
|
|
659
|
-
|
|
660
|
-
DetectionMechanism.LAST_MODIFIED_COLUMN,
|
|
661
|
-
DetectionMechanism.HIGH_WATERMARK_COLUMN,
|
|
662
|
-
),
|
|
1020
|
+
_DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER,
|
|
663
1021
|
):
|
|
664
1022
|
return None
|
|
665
1023
|
|
|
@@ -672,12 +1030,6 @@ class _AssertionInput(ABC):
|
|
|
672
1030
|
sql=additional_filter,
|
|
673
1031
|
)
|
|
674
1032
|
|
|
675
|
-
@abstractmethod
|
|
676
|
-
def _create_assertion_info(
|
|
677
|
-
self, filter: Optional[models.DatasetFilterClass]
|
|
678
|
-
) -> AssertionInfoInputType:
|
|
679
|
-
pass
|
|
680
|
-
|
|
681
1033
|
def _convert_tags(self) -> Optional[TagsInputType]:
|
|
682
1034
|
"""
|
|
683
1035
|
Convert the tags input into a standardized format.
|
|
@@ -746,8 +1098,6 @@ class _AssertionInput(ABC):
|
|
|
746
1098
|
schedule=self._convert_schedule(),
|
|
747
1099
|
source_type=source_type,
|
|
748
1100
|
field=field,
|
|
749
|
-
sensitivity=self._convert_sensitivity(),
|
|
750
|
-
exclusion_windows=self._convert_exclusion_windows(),
|
|
751
1101
|
),
|
|
752
1102
|
)
|
|
753
1103
|
|
|
@@ -764,86 +1114,69 @@ class _AssertionInput(ABC):
|
|
|
764
1114
|
else models.MonitorModeClass.INACTIVE,
|
|
765
1115
|
)
|
|
766
1116
|
|
|
767
|
-
def
|
|
768
|
-
self,
|
|
769
|
-
) -> list[models.AssertionExclusionWindowClass]:
|
|
1117
|
+
def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
|
|
770
1118
|
"""
|
|
771
|
-
|
|
1119
|
+
Get the schema field spec for the detection mechanism if needed.
|
|
1120
|
+
"""
|
|
1121
|
+
# Only fetch the dataset if it's not already cached.
|
|
1122
|
+
# Also we only fetch the dataset if it's needed for the detection mechanism.
|
|
1123
|
+
if self.cached_dataset is None:
|
|
1124
|
+
self.cached_dataset = self.entity_client.get(self.dataset_urn)
|
|
772
1125
|
|
|
773
|
-
|
|
774
|
-
|
|
1126
|
+
# Handle case where dataset doesn't exist
|
|
1127
|
+
if self.cached_dataset is None:
|
|
1128
|
+
raise SDKUsageError(
|
|
1129
|
+
f"Dataset {self.dataset_urn} not found. Cannot validate column {column_name}."
|
|
1130
|
+
)
|
|
775
1131
|
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
# Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
|
|
790
|
-
generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
|
|
791
|
-
exclusion_windows.append(
|
|
792
|
-
models.AssertionExclusionWindowClass(
|
|
793
|
-
type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
|
|
794
|
-
displayName=generated_display_name,
|
|
795
|
-
fixedRange=models.AbsoluteTimeWindowClass(
|
|
796
|
-
startTimeMillis=make_ts_millis(window.start),
|
|
797
|
-
endTimeMillis=make_ts_millis(window.end),
|
|
798
|
-
),
|
|
799
|
-
)
|
|
800
|
-
)
|
|
801
|
-
return exclusion_windows
|
|
1132
|
+
# TODO: Make a public accessor for _schema_dict in the SDK
|
|
1133
|
+
schema_fields = self.cached_dataset._schema_dict()
|
|
1134
|
+
field = schema_fields.get(column_name)
|
|
1135
|
+
if field:
|
|
1136
|
+
return models.SchemaFieldSpecClass(
|
|
1137
|
+
path=field.fieldPath,
|
|
1138
|
+
type=field.type.type.__class__.__name__,
|
|
1139
|
+
nativeType=field.nativeDataType,
|
|
1140
|
+
)
|
|
1141
|
+
else:
|
|
1142
|
+
raise SDKUsageError(
|
|
1143
|
+
msg=f"Column {column_name} not found in dataset {self.dataset_urn}",
|
|
1144
|
+
)
|
|
802
1145
|
|
|
803
|
-
|
|
804
|
-
def _convert_assertion_source_type_and_field(
|
|
1146
|
+
def _validate_field_type(
|
|
805
1147
|
self,
|
|
806
|
-
|
|
1148
|
+
field_spec: models.SchemaFieldSpecClass,
|
|
1149
|
+
column_name: str,
|
|
1150
|
+
allowed_types: list[DictWrapper],
|
|
1151
|
+
field_type_name: str,
|
|
1152
|
+
) -> None:
|
|
807
1153
|
"""
|
|
808
|
-
|
|
1154
|
+
Validate that a field has an allowed type.
|
|
809
1155
|
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
1156
|
+
Args:
|
|
1157
|
+
field_spec: The field specification to validate
|
|
1158
|
+
column_name: The name of the column for error messages
|
|
1159
|
+
allowed_types: List of allowed field types
|
|
1160
|
+
field_type_name: Human-readable name of the field type for error messages
|
|
814
1161
|
|
|
815
1162
|
Raises:
|
|
816
|
-
|
|
817
|
-
SDKUsageError: If the field (column) is not found in the dataset,
|
|
818
|
-
and the detection mechanism requires a field. Also if the field
|
|
819
|
-
is not an allowed type for the detection mechanism.
|
|
1163
|
+
SDKUsageError: If the field has an invalid type
|
|
820
1164
|
"""
|
|
821
|
-
|
|
1165
|
+
allowed_type_names = [t.__class__.__name__ for t in allowed_types]
|
|
1166
|
+
if field_spec.type not in allowed_type_names:
|
|
1167
|
+
raise SDKUsageError(
|
|
1168
|
+
msg=f"Column {column_name} with type {field_spec.type} does not have an allowed type for a {field_type_name} in dataset {self.dataset_urn}. "
|
|
1169
|
+
f"Allowed types are {allowed_type_names}.",
|
|
1170
|
+
)
|
|
822
1171
|
|
|
823
1172
|
@abstractmethod
|
|
824
|
-
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
825
|
-
pass
|
|
826
|
-
|
|
827
|
-
def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
|
|
828
|
-
"""
|
|
829
|
-
Convert sensitivity into an AssertionMonitorSensitivityClass.
|
|
830
|
-
|
|
831
|
-
Returns:
|
|
832
|
-
An AssertionMonitorSensitivityClass with the appropriate sensitivity.
|
|
833
|
-
"""
|
|
834
|
-
return models.AssertionMonitorSensitivityClass(
|
|
835
|
-
level=InferenceSensitivity.to_int(self.sensitivity),
|
|
836
|
-
)
|
|
837
|
-
|
|
838
1173
|
def _create_monitor_info(
|
|
839
1174
|
self,
|
|
840
1175
|
assertion_urn: AssertionUrn,
|
|
841
1176
|
status: models.MonitorStatusClass,
|
|
842
1177
|
schedule: models.CronScheduleClass,
|
|
843
1178
|
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
844
|
-
field: Optional[
|
|
845
|
-
sensitivity: models.AssertionMonitorSensitivityClass,
|
|
846
|
-
exclusion_windows: list[models.AssertionExclusionWindowClass],
|
|
1179
|
+
field: Optional[FieldSpecType],
|
|
847
1180
|
) -> models.MonitorInfoClass:
|
|
848
1181
|
"""
|
|
849
1182
|
Create a MonitorInfoClass with all the necessary components.
|
|
@@ -851,71 +1184,94 @@ class _AssertionInput(ABC):
|
|
|
851
1184
|
Args:
|
|
852
1185
|
status: The monitor status.
|
|
853
1186
|
schedule: The monitor schedule.
|
|
854
|
-
source_type: The
|
|
1187
|
+
source_type: The source type.
|
|
855
1188
|
field: Optional field specification.
|
|
856
|
-
sensitivity: The monitor sensitivity.
|
|
857
|
-
exclusion_windows: List of exclusion windows.
|
|
858
|
-
|
|
859
1189
|
Returns:
|
|
860
1190
|
A MonitorInfoClass configured with all the provided components.
|
|
861
1191
|
"""
|
|
862
|
-
|
|
863
|
-
type=models.MonitorTypeClass.ASSERTION,
|
|
864
|
-
status=status,
|
|
865
|
-
assertionMonitor=models.AssertionMonitorClass(
|
|
866
|
-
assertions=[
|
|
867
|
-
models.AssertionEvaluationSpecClass(
|
|
868
|
-
assertion=str(assertion_urn),
|
|
869
|
-
schedule=schedule,
|
|
870
|
-
parameters=models.AssertionEvaluationParametersClass(
|
|
871
|
-
type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
872
|
-
datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
|
|
873
|
-
sourceType=source_type,
|
|
874
|
-
field=field,
|
|
875
|
-
),
|
|
876
|
-
),
|
|
877
|
-
)
|
|
878
|
-
],
|
|
879
|
-
settings=models.AssertionMonitorSettingsClass(
|
|
880
|
-
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
881
|
-
sensitivity=sensitivity,
|
|
882
|
-
exclusionWindows=exclusion_windows,
|
|
883
|
-
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
884
|
-
),
|
|
885
|
-
),
|
|
886
|
-
),
|
|
887
|
-
)
|
|
1192
|
+
pass
|
|
888
1193
|
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
Get the
|
|
1194
|
+
@abstractmethod
|
|
1195
|
+
def _assertion_type(self) -> str:
|
|
1196
|
+
"""Get the assertion type."""
|
|
1197
|
+
pass
|
|
1198
|
+
|
|
1199
|
+
@abstractmethod
|
|
1200
|
+
def _create_assertion_info(
|
|
1201
|
+
self, filter: Optional[models.DatasetFilterClass]
|
|
1202
|
+
) -> AssertionInfoInputType:
|
|
1203
|
+
"""Create assertion info specific to the assertion type."""
|
|
1204
|
+
pass
|
|
1205
|
+
|
|
1206
|
+
@abstractmethod
|
|
1207
|
+
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
1208
|
+
"""Convert schedule to appropriate format for the assertion type."""
|
|
1209
|
+
pass
|
|
1210
|
+
|
|
1211
|
+
@abstractmethod
|
|
1212
|
+
def _get_assertion_evaluation_parameters(
|
|
1213
|
+
self, source_type: str, field: Optional[FieldSpecType]
|
|
1214
|
+
) -> models.AssertionEvaluationParametersClass:
|
|
1215
|
+
"""Get evaluation parameters specific to the assertion type."""
|
|
1216
|
+
pass
|
|
1217
|
+
|
|
1218
|
+
@abstractmethod
|
|
1219
|
+
def _convert_assertion_source_type_and_field(
|
|
1220
|
+
self,
|
|
1221
|
+
) -> tuple[str, Optional[FieldSpecType]]:
|
|
1222
|
+
"""Convert detection mechanism to source type and field spec."""
|
|
1223
|
+
pass
|
|
1224
|
+
|
|
1225
|
+
|
|
1226
|
+
class _HasFreshnessFeatures:
|
|
1227
|
+
def _create_field_spec(
|
|
1228
|
+
self,
|
|
1229
|
+
column_name: str,
|
|
1230
|
+
allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
|
|
1231
|
+
field_type_name: str,
|
|
1232
|
+
kind: str,
|
|
1233
|
+
get_schema_field_spec: Callable[[str], models.SchemaFieldSpecClass],
|
|
1234
|
+
validate_field_type: Callable[
|
|
1235
|
+
[models.SchemaFieldSpecClass, str, list[DictWrapper], str], None
|
|
1236
|
+
],
|
|
1237
|
+
) -> models.FreshnessFieldSpecClass:
|
|
892
1238
|
"""
|
|
893
|
-
|
|
894
|
-
# Also we only fetch the dataset if it's needed for the detection mechanism.
|
|
895
|
-
if self.cached_dataset is None:
|
|
896
|
-
self.cached_dataset = self.entity_client.get(self.dataset_urn)
|
|
1239
|
+
Create a field specification for a column, validating its type.
|
|
897
1240
|
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
1241
|
+
Args:
|
|
1242
|
+
column_name: The name of the column to create a spec for
|
|
1243
|
+
allowed_types: List of allowed field types
|
|
1244
|
+
field_type_name: Human-readable name of the field type for error messages
|
|
1245
|
+
kind: The kind of field to create
|
|
1246
|
+
|
|
1247
|
+
Returns:
|
|
1248
|
+
A FreshnessFieldSpecClass for the column
|
|
1249
|
+
|
|
1250
|
+
Raises:
|
|
1251
|
+
SDKUsageError: If the column is not found or has an invalid type
|
|
1252
|
+
"""
|
|
1253
|
+
SUPPORTED_KINDS = [
|
|
1254
|
+
models.FreshnessFieldKindClass.LAST_MODIFIED,
|
|
1255
|
+
models.FreshnessFieldKindClass.HIGH_WATERMARK,
|
|
1256
|
+
]
|
|
1257
|
+
if kind not in SUPPORTED_KINDS:
|
|
908
1258
|
raise SDKUsageError(
|
|
909
|
-
msg=f"
|
|
1259
|
+
msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
|
|
910
1260
|
)
|
|
911
1261
|
|
|
1262
|
+
field_spec = get_schema_field_spec(column_name)
|
|
1263
|
+
validate_field_type(field_spec, column_name, allowed_types, field_type_name)
|
|
1264
|
+
return models.FreshnessFieldSpecClass(
|
|
1265
|
+
path=field_spec.path,
|
|
1266
|
+
type=field_spec.type,
|
|
1267
|
+
nativeType=field_spec.nativeType,
|
|
1268
|
+
kind=kind,
|
|
1269
|
+
)
|
|
912
1270
|
|
|
913
|
-
class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
914
|
-
DEFAULT_SCHEDULE = models.CronScheduleClass(
|
|
915
|
-
cron="0 0 * * *",
|
|
916
|
-
timezone="UTC",
|
|
917
|
-
)
|
|
918
1271
|
|
|
1272
|
+
class _SmartFreshnessAssertionInput(
|
|
1273
|
+
_AssertionInput, _HasSmartAssertionInputs, _HasFreshnessFeatures
|
|
1274
|
+
):
|
|
919
1275
|
def __init__(
|
|
920
1276
|
self,
|
|
921
1277
|
*,
|
|
@@ -926,6 +1282,7 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
926
1282
|
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
927
1283
|
display_name: Optional[str] = None,
|
|
928
1284
|
enabled: bool = True,
|
|
1285
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
929
1286
|
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
930
1287
|
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
931
1288
|
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
@@ -939,16 +1296,17 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
939
1296
|
updated_by: Union[str, CorpUserUrn],
|
|
940
1297
|
updated_at: datetime,
|
|
941
1298
|
):
|
|
942
|
-
|
|
1299
|
+
_AssertionInput.__init__(
|
|
1300
|
+
self,
|
|
943
1301
|
dataset_urn=dataset_urn,
|
|
944
1302
|
entity_client=entity_client,
|
|
945
1303
|
urn=urn,
|
|
946
1304
|
display_name=display_name,
|
|
947
1305
|
enabled=enabled,
|
|
1306
|
+
schedule=schedule
|
|
1307
|
+
if schedule is not None
|
|
1308
|
+
else DEFAULT_HOURLY_SCHEDULE, # Use provided schedule or default for create case
|
|
948
1309
|
detection_mechanism=detection_mechanism,
|
|
949
|
-
sensitivity=sensitivity,
|
|
950
|
-
exclusion_windows=exclusion_windows,
|
|
951
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
952
1310
|
incident_behavior=incident_behavior,
|
|
953
1311
|
tags=tags,
|
|
954
1312
|
source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
|
|
@@ -957,6 +1315,16 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
957
1315
|
updated_by=updated_by,
|
|
958
1316
|
updated_at=updated_at,
|
|
959
1317
|
)
|
|
1318
|
+
_HasSmartAssertionInputs.__init__(
|
|
1319
|
+
self,
|
|
1320
|
+
sensitivity=sensitivity,
|
|
1321
|
+
exclusion_windows=exclusion_windows,
|
|
1322
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1323
|
+
)
|
|
1324
|
+
|
|
1325
|
+
def _assertion_type(self) -> str:
|
|
1326
|
+
"""Get the assertion type."""
|
|
1327
|
+
return models.AssertionTypeClass.FRESHNESS
|
|
960
1328
|
|
|
961
1329
|
def _create_assertion_info(
|
|
962
1330
|
self, filter: Optional[models.DatasetFilterClass]
|
|
@@ -973,29 +1341,51 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
973
1341
|
return models.FreshnessAssertionInfoClass(
|
|
974
1342
|
type=models.FreshnessAssertionTypeClass.DATASET_CHANGE, # Currently only dataset change is supported
|
|
975
1343
|
entity=str(self.dataset_urn),
|
|
976
|
-
# schedule (optional,
|
|
1344
|
+
# schedule (optional, must be left empty for smart freshness assertions - managed by the AI inference engine)
|
|
977
1345
|
filter=filter,
|
|
978
1346
|
)
|
|
979
1347
|
|
|
980
1348
|
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
981
1349
|
"""Create a schedule for a smart freshness assertion.
|
|
982
1350
|
|
|
983
|
-
|
|
1351
|
+
For create case, uses DEFAULT_HOURLY_SCHEDULE. For update case, preserves existing schedule.
|
|
984
1352
|
|
|
985
1353
|
Returns:
|
|
986
1354
|
A CronScheduleClass with appropriate schedule settings.
|
|
987
1355
|
"""
|
|
988
|
-
|
|
1356
|
+
assert self.schedule is not None, (
|
|
1357
|
+
"Schedule should never be None due to constructor logic"
|
|
1358
|
+
)
|
|
1359
|
+
return self.schedule
|
|
1360
|
+
|
|
1361
|
+
def _get_assertion_evaluation_parameters(
|
|
1362
|
+
self, source_type: str, field: Optional[FieldSpecType]
|
|
1363
|
+
) -> models.AssertionEvaluationParametersClass:
|
|
1364
|
+
# Ensure field is either None or FreshnessFieldSpecClass
|
|
1365
|
+
freshness_field = None
|
|
1366
|
+
if field is not None:
|
|
1367
|
+
if not isinstance(field, models.FreshnessFieldSpecClass):
|
|
1368
|
+
raise SDKUsageError(
|
|
1369
|
+
f"Expected FreshnessFieldSpecClass for freshness assertion, got {type(field).__name__}"
|
|
1370
|
+
)
|
|
1371
|
+
freshness_field = field
|
|
1372
|
+
|
|
1373
|
+
return models.AssertionEvaluationParametersClass(
|
|
1374
|
+
type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
|
|
1375
|
+
datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
|
|
1376
|
+
sourceType=source_type, field=freshness_field
|
|
1377
|
+
),
|
|
1378
|
+
)
|
|
989
1379
|
|
|
990
1380
|
def _convert_assertion_source_type_and_field(
|
|
991
1381
|
self,
|
|
992
|
-
) -> tuple[str, Optional[
|
|
1382
|
+
) -> tuple[str, Optional[FieldSpecType]]:
|
|
993
1383
|
"""
|
|
994
1384
|
Convert detection mechanism into source type and field specification for freshness assertions.
|
|
995
1385
|
|
|
996
1386
|
Returns:
|
|
997
1387
|
A tuple of (source_type, field) where field may be None.
|
|
998
|
-
Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass since
|
|
1388
|
+
Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
|
|
999
1389
|
the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
|
|
1000
1390
|
|
|
1001
1391
|
Raises:
|
|
@@ -1014,6 +1404,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1014
1404
|
LAST_MODIFIED_ALLOWED_FIELD_TYPES,
|
|
1015
1405
|
"last modified column",
|
|
1016
1406
|
models.FreshnessFieldKindClass.LAST_MODIFIED,
|
|
1407
|
+
self._get_schema_field_spec,
|
|
1408
|
+
self._validate_field_type,
|
|
1017
1409
|
)
|
|
1018
1410
|
elif isinstance(self.detection_mechanism, _InformationSchema):
|
|
1019
1411
|
source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
|
|
@@ -1028,47 +1420,199 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1028
1420
|
|
|
1029
1421
|
return source_type, field
|
|
1030
1422
|
|
|
1031
|
-
def
|
|
1423
|
+
def _create_monitor_info(
|
|
1032
1424
|
self,
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1425
|
+
assertion_urn: AssertionUrn,
|
|
1426
|
+
status: models.MonitorStatusClass,
|
|
1427
|
+
schedule: models.CronScheduleClass,
|
|
1428
|
+
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1429
|
+
field: Optional[FieldSpecType],
|
|
1430
|
+
) -> models.MonitorInfoClass:
|
|
1038
1431
|
"""
|
|
1039
|
-
Create a
|
|
1432
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
1433
|
+
"""
|
|
1434
|
+
return models.MonitorInfoClass(
|
|
1435
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
1436
|
+
status=status,
|
|
1437
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
1438
|
+
assertions=[
|
|
1439
|
+
models.AssertionEvaluationSpecClass(
|
|
1440
|
+
assertion=str(assertion_urn),
|
|
1441
|
+
schedule=schedule,
|
|
1442
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
1443
|
+
str(source_type), field
|
|
1444
|
+
),
|
|
1445
|
+
),
|
|
1446
|
+
],
|
|
1447
|
+
settings=models.AssertionMonitorSettingsClass(
|
|
1448
|
+
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
1449
|
+
sensitivity=self._convert_sensitivity(),
|
|
1450
|
+
exclusionWindows=self._convert_exclusion_windows(),
|
|
1451
|
+
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
1452
|
+
),
|
|
1453
|
+
),
|
|
1454
|
+
),
|
|
1455
|
+
)
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
|
|
1459
|
+
def __init__(
|
|
1460
|
+
self,
|
|
1461
|
+
*,
|
|
1462
|
+
# Required fields
|
|
1463
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
1464
|
+
entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
|
|
1465
|
+
# Optional fields
|
|
1466
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
1467
|
+
display_name: Optional[str] = None,
|
|
1468
|
+
enabled: bool = True,
|
|
1469
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
1470
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
1471
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
1472
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
1473
|
+
training_data_lookback_days: Optional[int] = None,
|
|
1474
|
+
incident_behavior: Optional[
|
|
1475
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
1476
|
+
] = None,
|
|
1477
|
+
tags: Optional[TagsInputType] = None,
|
|
1478
|
+
created_by: Union[str, CorpUserUrn],
|
|
1479
|
+
created_at: datetime,
|
|
1480
|
+
updated_by: Union[str, CorpUserUrn],
|
|
1481
|
+
updated_at: datetime,
|
|
1482
|
+
):
|
|
1483
|
+
_AssertionInput.__init__(
|
|
1484
|
+
self,
|
|
1485
|
+
dataset_urn=dataset_urn,
|
|
1486
|
+
entity_client=entity_client,
|
|
1487
|
+
urn=urn,
|
|
1488
|
+
display_name=display_name,
|
|
1489
|
+
enabled=enabled,
|
|
1490
|
+
schedule=schedule,
|
|
1491
|
+
detection_mechanism=detection_mechanism,
|
|
1492
|
+
incident_behavior=incident_behavior,
|
|
1493
|
+
tags=tags,
|
|
1494
|
+
source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
|
|
1495
|
+
created_by=created_by,
|
|
1496
|
+
created_at=created_at,
|
|
1497
|
+
updated_by=updated_by,
|
|
1498
|
+
updated_at=updated_at,
|
|
1499
|
+
)
|
|
1500
|
+
_HasSmartAssertionInputs.__init__(
|
|
1501
|
+
self,
|
|
1502
|
+
sensitivity=sensitivity,
|
|
1503
|
+
exclusion_windows=exclusion_windows,
|
|
1504
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1505
|
+
)
|
|
1506
|
+
|
|
1507
|
+
def _create_assertion_info(
|
|
1508
|
+
self, filter: Optional[models.DatasetFilterClass]
|
|
1509
|
+
) -> AssertionInfoInputType:
|
|
1510
|
+
"""
|
|
1511
|
+
Create a VolumeAssertionInfoClass for a smart volume assertion.
|
|
1040
1512
|
|
|
1041
1513
|
Args:
|
|
1042
|
-
|
|
1043
|
-
allowed_types: List of allowed field types
|
|
1044
|
-
field_type_name: Human-readable name of the field type for error messages
|
|
1045
|
-
kind: The kind of field to create
|
|
1514
|
+
filter: Optional filter to apply to the assertion.
|
|
1046
1515
|
|
|
1047
1516
|
Returns:
|
|
1048
|
-
A
|
|
1517
|
+
A VolumeAssertionInfoClass configured for smart volume.
|
|
1518
|
+
"""
|
|
1519
|
+
return models.VolumeAssertionInfoClass(
|
|
1520
|
+
type=models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL, # Currently only ROW_COUNT_TOTAL is supported for smart volume
|
|
1521
|
+
entity=str(self.dataset_urn),
|
|
1522
|
+
filter=filter,
|
|
1523
|
+
)
|
|
1524
|
+
|
|
1525
|
+
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
1526
|
+
"""Create a schedule for a smart volume assertion.
|
|
1527
|
+
|
|
1528
|
+
Returns:
|
|
1529
|
+
A CronScheduleClass with appropriate schedule settings.
|
|
1530
|
+
"""
|
|
1531
|
+
if self.schedule is None:
|
|
1532
|
+
return DEFAULT_HOURLY_SCHEDULE
|
|
1533
|
+
|
|
1534
|
+
return models.CronScheduleClass(
|
|
1535
|
+
cron=self.schedule.cron,
|
|
1536
|
+
timezone=self.schedule.timezone,
|
|
1537
|
+
)
|
|
1538
|
+
|
|
1539
|
+
def _get_assertion_evaluation_parameters(
|
|
1540
|
+
self, source_type: str, field: Optional[FieldSpecType]
|
|
1541
|
+
) -> models.AssertionEvaluationParametersClass:
|
|
1542
|
+
return models.AssertionEvaluationParametersClass(
|
|
1543
|
+
type=models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
1544
|
+
datasetVolumeParameters=models.DatasetVolumeAssertionParametersClass(
|
|
1545
|
+
sourceType=source_type,
|
|
1546
|
+
),
|
|
1547
|
+
)
|
|
1548
|
+
|
|
1549
|
+
def _convert_assertion_source_type_and_field(
|
|
1550
|
+
self,
|
|
1551
|
+
) -> tuple[str, Optional[FieldSpecType]]:
|
|
1552
|
+
"""
|
|
1553
|
+
Convert detection mechanism into source type and field specification for volume assertions.
|
|
1554
|
+
|
|
1555
|
+
Returns:
|
|
1556
|
+
A tuple of (source_type, field) where field may be None.
|
|
1557
|
+
Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
|
|
1558
|
+
the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
|
|
1049
1559
|
|
|
1050
1560
|
Raises:
|
|
1051
|
-
|
|
1561
|
+
SDKNotYetSupportedError: If the detection mechanism is not supported.
|
|
1562
|
+
SDKUsageError: If the field (column) is not found in the dataset,
|
|
1563
|
+
and the detection mechanism requires a field. Also if the field
|
|
1564
|
+
is not an allowed type for the detection mechanism.
|
|
1052
1565
|
"""
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
models.FreshnessFieldKindClass.HIGH_WATERMARK,
|
|
1056
|
-
]
|
|
1057
|
-
if kind not in SUPPORTED_KINDS:
|
|
1058
|
-
raise SDKUsageError(
|
|
1059
|
-
msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
|
|
1060
|
-
)
|
|
1566
|
+
source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
|
|
1567
|
+
field = None
|
|
1061
1568
|
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1569
|
+
if isinstance(self.detection_mechanism, _Query):
|
|
1570
|
+
source_type = models.DatasetVolumeSourceTypeClass.QUERY
|
|
1571
|
+
elif isinstance(self.detection_mechanism, _InformationSchema):
|
|
1572
|
+
source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
|
|
1573
|
+
elif isinstance(self.detection_mechanism, _DatasetProfile):
|
|
1574
|
+
source_type = models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
1575
|
+
else:
|
|
1576
|
+
raise SDKNotYetSupportedError(
|
|
1577
|
+
f"Detection mechanism {self.detection_mechanism} not yet supported for smart volume assertions"
|
|
1068
1578
|
)
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1579
|
+
|
|
1580
|
+
return source_type, field
|
|
1581
|
+
|
|
1582
|
+
def _create_monitor_info(
|
|
1583
|
+
self,
|
|
1584
|
+
assertion_urn: AssertionUrn,
|
|
1585
|
+
status: models.MonitorStatusClass,
|
|
1586
|
+
schedule: models.CronScheduleClass,
|
|
1587
|
+
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1588
|
+
field: Optional[FieldSpecType],
|
|
1589
|
+
) -> models.MonitorInfoClass:
|
|
1590
|
+
"""
|
|
1591
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
1592
|
+
"""
|
|
1593
|
+
return models.MonitorInfoClass(
|
|
1594
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
1595
|
+
status=status,
|
|
1596
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
1597
|
+
assertions=[
|
|
1598
|
+
models.AssertionEvaluationSpecClass(
|
|
1599
|
+
assertion=str(assertion_urn),
|
|
1600
|
+
schedule=schedule,
|
|
1601
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
1602
|
+
str(source_type), field
|
|
1603
|
+
),
|
|
1604
|
+
),
|
|
1605
|
+
],
|
|
1606
|
+
settings=models.AssertionMonitorSettingsClass(
|
|
1607
|
+
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
1608
|
+
sensitivity=self._convert_sensitivity(),
|
|
1609
|
+
exclusionWindows=self._convert_exclusion_windows(),
|
|
1610
|
+
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
1611
|
+
),
|
|
1612
|
+
),
|
|
1613
|
+
),
|
|
1074
1614
|
)
|
|
1615
|
+
|
|
1616
|
+
def _assertion_type(self) -> str:
|
|
1617
|
+
"""Get the assertion type."""
|
|
1618
|
+
return models.AssertionTypeClass.VOLUME
|