acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
- acryl_datahub_cloud/metadata/schema.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
- acryl_datahub_cloud/sdk/__init__.py +10 -2
- acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +614 -231
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
- acryl_datahub_cloud/sdk/assertion/types.py +18 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +437 -147
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +943 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1281 -70
- acryl_datahub_cloud/sdk/entities/assertion.py +8 -1
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +41 -41
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +20 -14
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
|
@@ -6,9 +6,10 @@ validate and represent the input for creating an Assertion in DataHub.
|
|
|
6
6
|
import random
|
|
7
7
|
import string
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
9
10
|
from datetime import datetime
|
|
10
11
|
from enum import Enum
|
|
11
|
-
from typing import Literal, Optional, TypeAlias, Union
|
|
12
|
+
from typing import Callable, Literal, Optional, Type, TypeAlias, TypeVar, Union
|
|
12
13
|
|
|
13
14
|
import pydantic
|
|
14
15
|
import pytz
|
|
@@ -42,12 +43,28 @@ ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS = 60
|
|
|
42
43
|
DEFAULT_NAME_PREFIX = "New Assertion"
|
|
43
44
|
DEFAULT_NAME_SUFFIX_LENGTH = 8
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
|
|
47
|
+
DEFAULT_HOURLY_SCHEDULE: models.CronScheduleClass = models.CronScheduleClass(
|
|
46
48
|
cron="0 * * * *", # Every hour, matches the UI default
|
|
47
49
|
timezone=str(
|
|
48
50
|
tzlocal.get_localzone()
|
|
49
51
|
), # User local timezone, matches the UI default
|
|
50
52
|
)
|
|
53
|
+
DEFAULT_SCHEDULE: models.CronScheduleClass = DEFAULT_HOURLY_SCHEDULE
|
|
54
|
+
|
|
55
|
+
DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
|
|
56
|
+
cron="0 0 * * *", # Every day at midnight, matches the UI default
|
|
57
|
+
timezone=str(
|
|
58
|
+
tzlocal.get_localzone()
|
|
59
|
+
), # User local timezone, matches the UI default
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE = models.CronScheduleClass(
|
|
63
|
+
cron="0 */6 * * *", # Every 6 hours, matches the UI default
|
|
64
|
+
timezone=str(
|
|
65
|
+
tzlocal.get_localzone()
|
|
66
|
+
), # User local timezone, matches the UI default
|
|
67
|
+
)
|
|
51
68
|
|
|
52
69
|
|
|
53
70
|
class AbstractDetectionMechanism(BaseModel, ABC):
|
|
@@ -101,6 +118,26 @@ class _Query(AbstractDetectionMechanism):
|
|
|
101
118
|
additional_filter: Optional[str] = None
|
|
102
119
|
|
|
103
120
|
|
|
121
|
+
class _AllRowsQuery(AbstractDetectionMechanism):
|
|
122
|
+
# For column-based assertions, this is the default detection mechanism.
|
|
123
|
+
type: Literal["all_rows_query"] = "all_rows_query"
|
|
124
|
+
additional_filter: Optional[str] = None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class _AllRowsQueryDataHubDatasetProfile(AbstractDetectionMechanism):
|
|
128
|
+
# Used for column-based assertions.
|
|
129
|
+
type: Literal["all_rows_query_datahub_dataset_profile"] = (
|
|
130
|
+
"all_rows_query_datahub_dataset_profile"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class _ChangedRowsQuery(AbstractDetectionMechanism):
|
|
135
|
+
# Used for column-based assertions.
|
|
136
|
+
type: Literal["changed_rows_query"] = "changed_rows_query"
|
|
137
|
+
column_name: str
|
|
138
|
+
additional_filter: Optional[str] = None
|
|
139
|
+
|
|
140
|
+
|
|
104
141
|
class _DatasetProfile(AbstractDetectionMechanism):
|
|
105
142
|
type: Literal["dataset_profile"] = "dataset_profile"
|
|
106
143
|
|
|
@@ -114,6 +151,9 @@ _DETECTION_MECHANISM_CONCRETE_TYPES = (
|
|
|
114
151
|
_DataHubOperation,
|
|
115
152
|
_Query,
|
|
116
153
|
_DatasetProfile,
|
|
154
|
+
_AllRowsQuery,
|
|
155
|
+
_ChangedRowsQuery,
|
|
156
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
117
157
|
)
|
|
118
158
|
_DetectionMechanismTypes = Union[
|
|
119
159
|
_InformationSchema,
|
|
@@ -123,14 +163,21 @@ _DetectionMechanismTypes = Union[
|
|
|
123
163
|
_DataHubOperation,
|
|
124
164
|
_Query,
|
|
125
165
|
_DatasetProfile,
|
|
166
|
+
_AllRowsQuery,
|
|
167
|
+
_ChangedRowsQuery,
|
|
168
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
126
169
|
]
|
|
127
170
|
|
|
128
171
|
_DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
|
|
129
172
|
_LastModifiedColumn,
|
|
130
173
|
_HighWatermarkColumn,
|
|
131
174
|
_Query,
|
|
175
|
+
_AllRowsQuery,
|
|
176
|
+
_ChangedRowsQuery,
|
|
132
177
|
)
|
|
133
178
|
|
|
179
|
+
DEFAULT_DETECTION_MECHANISM: _DetectionMechanismTypes = _InformationSchema()
|
|
180
|
+
|
|
134
181
|
|
|
135
182
|
class DetectionMechanism:
|
|
136
183
|
# To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
|
|
@@ -141,6 +188,9 @@ class DetectionMechanism:
|
|
|
141
188
|
HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
|
|
142
189
|
DATAHUB_OPERATION = _DataHubOperation()
|
|
143
190
|
QUERY = _Query
|
|
191
|
+
ALL_ROWS_QUERY = _AllRowsQuery
|
|
192
|
+
CHANGED_ROWS_QUERY = _ChangedRowsQuery
|
|
193
|
+
ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
|
|
144
194
|
DATASET_PROFILE = _DatasetProfile()
|
|
145
195
|
|
|
146
196
|
_DETECTION_MECHANISM_EXAMPLES = {
|
|
@@ -170,6 +220,18 @@ class DetectionMechanism:
|
|
|
170
220
|
"Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
|
|
171
221
|
"Dataset Profile from string": "dataset_profile",
|
|
172
222
|
"Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
|
|
223
|
+
"All Rows Query from string": "all_rows_query",
|
|
224
|
+
"All Rows Query from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY",
|
|
225
|
+
"All Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.ALL_ROWS_QUERY(additional_filter='id > 1000')",
|
|
226
|
+
"Changed Rows Query from dict (with optional additional filter)": {
|
|
227
|
+
"type": "changed_rows_query",
|
|
228
|
+
"column_name": "id",
|
|
229
|
+
"additional_filter": "id > 1000",
|
|
230
|
+
},
|
|
231
|
+
"Changed Rows Query from DetectionMechanism": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id')",
|
|
232
|
+
"Changed Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id', additional_filter='id > 1000')",
|
|
233
|
+
"All Rows Query DataHub Dataset Profile from string": "all_rows_query_datahub_dataset_profile",
|
|
234
|
+
"All Rows Query DataHub Dataset Profile from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE",
|
|
173
235
|
}
|
|
174
236
|
|
|
175
237
|
@staticmethod
|
|
@@ -177,9 +239,10 @@ class DetectionMechanism:
|
|
|
177
239
|
detection_mechanism_config: Optional[
|
|
178
240
|
Union[str, dict[str, str], _DetectionMechanismTypes]
|
|
179
241
|
] = None,
|
|
242
|
+
default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
|
|
180
243
|
) -> _DetectionMechanismTypes:
|
|
181
244
|
if detection_mechanism_config is None:
|
|
182
|
-
return
|
|
245
|
+
return default_detection_mechanism
|
|
183
246
|
if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
|
|
184
247
|
return detection_mechanism_config
|
|
185
248
|
elif isinstance(detection_mechanism_config, str):
|
|
@@ -260,8 +323,6 @@ class DetectionMechanism:
|
|
|
260
323
|
) from e
|
|
261
324
|
|
|
262
325
|
|
|
263
|
-
DEFAULT_DETECTION_MECHANISM = DetectionMechanism.INFORMATION_SCHEMA
|
|
264
|
-
|
|
265
326
|
DetectionMechanismInputTypes: TypeAlias = Union[
|
|
266
327
|
str, dict[str, str], _DetectionMechanismTypes, None
|
|
267
328
|
]
|
|
@@ -328,7 +389,59 @@ class InferenceSensitivity(Enum):
|
|
|
328
389
|
}[sensitivity]
|
|
329
390
|
|
|
330
391
|
|
|
331
|
-
DEFAULT_SENSITIVITY = InferenceSensitivity.MEDIUM
|
|
392
|
+
DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
|
|
393
|
+
|
|
394
|
+
TIME_WINDOW_SIZE_EXAMPLES = {
|
|
395
|
+
"Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
|
|
396
|
+
"Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
class CalendarInterval(Enum):
|
|
401
|
+
MINUTE = "MINUTE"
|
|
402
|
+
HOUR = "HOUR"
|
|
403
|
+
DAY = "DAY"
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
class TimeWindowSize(BaseModel):
|
|
407
|
+
unit: Union[CalendarInterval, str]
|
|
408
|
+
multiple: int
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
TimeWindowSizeInputTypes: TypeAlias = Union[
|
|
412
|
+
models.TimeWindowSizeClass,
|
|
413
|
+
models.FixedIntervalScheduleClass,
|
|
414
|
+
TimeWindowSize,
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def _try_parse_time_window_size(
|
|
419
|
+
config: TimeWindowSizeInputTypes,
|
|
420
|
+
) -> models.TimeWindowSizeClass:
|
|
421
|
+
if isinstance(config, models.TimeWindowSizeClass):
|
|
422
|
+
return config
|
|
423
|
+
elif isinstance(config, models.FixedIntervalScheduleClass):
|
|
424
|
+
return models.TimeWindowSizeClass(
|
|
425
|
+
unit=_try_parse_and_validate_schema_classes_enum(
|
|
426
|
+
config.unit, models.CalendarIntervalClass
|
|
427
|
+
),
|
|
428
|
+
multiple=config.multiple,
|
|
429
|
+
)
|
|
430
|
+
elif isinstance(config, TimeWindowSize):
|
|
431
|
+
return models.TimeWindowSizeClass(
|
|
432
|
+
unit=_try_parse_and_validate_schema_classes_enum(
|
|
433
|
+
_try_parse_and_validate_schema_classes_enum(
|
|
434
|
+
config.unit, CalendarInterval
|
|
435
|
+
).value,
|
|
436
|
+
models.CalendarIntervalClass,
|
|
437
|
+
),
|
|
438
|
+
multiple=config.multiple,
|
|
439
|
+
)
|
|
440
|
+
else:
|
|
441
|
+
raise SDKUsageErrorWithExamples(
|
|
442
|
+
msg=f"Invalid time window size: {config}",
|
|
443
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
444
|
+
)
|
|
332
445
|
|
|
333
446
|
|
|
334
447
|
class FixedRangeExclusionWindow(BaseModel):
|
|
@@ -594,6 +707,161 @@ def _try_parse_schedule(
|
|
|
594
707
|
FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
|
|
595
708
|
|
|
596
709
|
|
|
710
|
+
T = TypeVar("T")
|
|
711
|
+
|
|
712
|
+
|
|
713
|
+
def _try_parse_and_validate_schema_classes_enum(
|
|
714
|
+
value: Union[str, T],
|
|
715
|
+
enum_class: Type[T],
|
|
716
|
+
) -> T:
|
|
717
|
+
if isinstance(value, enum_class):
|
|
718
|
+
return value
|
|
719
|
+
assert isinstance(value, str)
|
|
720
|
+
if value.upper() not in get_enum_options(enum_class):
|
|
721
|
+
raise SDKUsageError(
|
|
722
|
+
f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
|
|
723
|
+
)
|
|
724
|
+
return getattr(enum_class, value.upper())
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
@dataclass(frozen=True)
|
|
728
|
+
class DatasetSourceType:
|
|
729
|
+
"""
|
|
730
|
+
DatasetSourceType is used to represent a dataset source type.
|
|
731
|
+
It is used to check if a source type is valid for a dataset type and assertion type.
|
|
732
|
+
|
|
733
|
+
Args:
|
|
734
|
+
source_type: The source type (e.g. information schema, field value, etc. aka detection mechanism)
|
|
735
|
+
platform: The platform of the dataset as a string OR "all" for all platforms.
|
|
736
|
+
assertion_type: The assertion type as a models.AssertionTypeClass string e.g. models.AssertionTypeClass.FRESHNESS OR "all" for all assertion types.
|
|
737
|
+
|
|
738
|
+
Example:
|
|
739
|
+
DatasetSourceType(
|
|
740
|
+
source_type=_InformationSchema,
|
|
741
|
+
platform="databricks",
|
|
742
|
+
assertion_type="all",
|
|
743
|
+
)
|
|
744
|
+
This means that the source type _InformationSchema is invalid for the dataset type "databricks" and assertion type "all".
|
|
745
|
+
"all" in this example means that the source type is invalid for all assertion types.
|
|
746
|
+
"""
|
|
747
|
+
|
|
748
|
+
source_type: Type[_DetectionMechanismTypes]
|
|
749
|
+
platform: str
|
|
750
|
+
assertion_type: Union[models.AssertionTypeClass, str]
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
INVALID_SOURCE_TYPES = {
|
|
754
|
+
# Add exceptions here if a source type (detection mechanism) is invalid for a dataset type and assertion type.
|
|
755
|
+
DatasetSourceType(
|
|
756
|
+
source_type=_InformationSchema,
|
|
757
|
+
platform="databricks",
|
|
758
|
+
assertion_type="all",
|
|
759
|
+
)
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def _is_source_type_valid(
|
|
764
|
+
dataset_source_type: DatasetSourceType,
|
|
765
|
+
invalid_source_types: set[DatasetSourceType] = INVALID_SOURCE_TYPES,
|
|
766
|
+
) -> bool:
|
|
767
|
+
for invalid in invalid_source_types:
|
|
768
|
+
if invalid.source_type == dataset_source_type.source_type:
|
|
769
|
+
# If both platform and assertion type are "all", the source type is invalid for all combinations
|
|
770
|
+
if invalid.platform == "all" and invalid.assertion_type == "all":
|
|
771
|
+
return False
|
|
772
|
+
# If platform matches and assertion type is "all", the source type is invalid for all assertion types on that platform
|
|
773
|
+
if (
|
|
774
|
+
invalid.platform == dataset_source_type.platform
|
|
775
|
+
and invalid.assertion_type == "all"
|
|
776
|
+
):
|
|
777
|
+
return False
|
|
778
|
+
# If platform is "all" and assertion type matches, the source type is invalid for all platforms for that assertion type
|
|
779
|
+
if (
|
|
780
|
+
invalid.platform == "all"
|
|
781
|
+
and invalid.assertion_type == dataset_source_type.assertion_type
|
|
782
|
+
):
|
|
783
|
+
return False
|
|
784
|
+
# If both platform and assertion type match exactly, the source type is invalid
|
|
785
|
+
if (
|
|
786
|
+
invalid.platform == dataset_source_type.platform
|
|
787
|
+
and invalid.assertion_type == dataset_source_type.assertion_type
|
|
788
|
+
):
|
|
789
|
+
return False
|
|
790
|
+
return True
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
class _HasSmartAssertionInputs:
|
|
794
|
+
"""
|
|
795
|
+
A class that contains the common inputs for smart assertions.
|
|
796
|
+
This is used to avoid code duplication in the smart assertion inputs.
|
|
797
|
+
|
|
798
|
+
Args:
|
|
799
|
+
sensitivity: The sensitivity to be applied to the assertion.
|
|
800
|
+
exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
|
|
801
|
+
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
802
|
+
"""
|
|
803
|
+
|
|
804
|
+
def __init__(
|
|
805
|
+
self,
|
|
806
|
+
*,
|
|
807
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
808
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
809
|
+
training_data_lookback_days: Optional[int] = None,
|
|
810
|
+
):
|
|
811
|
+
self.sensitivity = InferenceSensitivity.parse(sensitivity)
|
|
812
|
+
self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
|
|
813
|
+
self.training_data_lookback_days = _try_parse_training_data_lookback_days(
|
|
814
|
+
training_data_lookback_days
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
def _convert_exclusion_windows(
|
|
818
|
+
self,
|
|
819
|
+
) -> list[models.AssertionExclusionWindowClass]:
|
|
820
|
+
"""
|
|
821
|
+
Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
|
|
822
|
+
|
|
823
|
+
Returns:
|
|
824
|
+
A list of AssertionExclusionWindowClass objects.
|
|
825
|
+
|
|
826
|
+
Raises:
|
|
827
|
+
SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
|
|
828
|
+
"""
|
|
829
|
+
exclusion_windows: list[models.AssertionExclusionWindowClass] = []
|
|
830
|
+
if self.exclusion_windows:
|
|
831
|
+
for window in self.exclusion_windows:
|
|
832
|
+
if not isinstance(window, FixedRangeExclusionWindow):
|
|
833
|
+
raise SDKUsageErrorWithExamples(
|
|
834
|
+
msg=f"Invalid exclusion window type: {window}",
|
|
835
|
+
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
836
|
+
)
|
|
837
|
+
# To match the UI, we generate a display name for the exclusion window.
|
|
838
|
+
# See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
|
|
839
|
+
# Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
|
|
840
|
+
generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
|
|
841
|
+
exclusion_windows.append(
|
|
842
|
+
models.AssertionExclusionWindowClass(
|
|
843
|
+
type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
|
|
844
|
+
displayName=generated_display_name,
|
|
845
|
+
fixedRange=models.AbsoluteTimeWindowClass(
|
|
846
|
+
startTimeMillis=make_ts_millis(window.start),
|
|
847
|
+
endTimeMillis=make_ts_millis(window.end),
|
|
848
|
+
),
|
|
849
|
+
)
|
|
850
|
+
)
|
|
851
|
+
return exclusion_windows
|
|
852
|
+
|
|
853
|
+
def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
|
|
854
|
+
"""
|
|
855
|
+
Convert sensitivity into an AssertionMonitorSensitivityClass.
|
|
856
|
+
|
|
857
|
+
Returns:
|
|
858
|
+
An AssertionMonitorSensitivityClass with the appropriate sensitivity.
|
|
859
|
+
"""
|
|
860
|
+
return models.AssertionMonitorSensitivityClass(
|
|
861
|
+
level=InferenceSensitivity.to_int(self.sensitivity),
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
|
|
597
865
|
class _AssertionInput(ABC):
|
|
598
866
|
def __init__(
|
|
599
867
|
self,
|
|
@@ -609,9 +877,6 @@ class _AssertionInput(ABC):
|
|
|
609
877
|
enabled: bool = True,
|
|
610
878
|
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
611
879
|
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
612
|
-
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
613
|
-
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
614
|
-
training_data_lookback_days: Optional[int] = None,
|
|
615
880
|
incident_behavior: Optional[
|
|
616
881
|
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
617
882
|
] = None,
|
|
@@ -621,6 +886,7 @@ class _AssertionInput(ABC):
|
|
|
621
886
|
created_at: datetime,
|
|
622
887
|
updated_by: Union[str, CorpUserUrn],
|
|
623
888
|
updated_at: datetime,
|
|
889
|
+
default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
|
|
624
890
|
):
|
|
625
891
|
"""
|
|
626
892
|
Create an AssertionInput object.
|
|
@@ -632,9 +898,6 @@ class _AssertionInput(ABC):
|
|
|
632
898
|
display_name: The display name of the assertion. If not provided, a random display name will be generated.
|
|
633
899
|
enabled: Whether the assertion is enabled. Defaults to True.
|
|
634
900
|
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
635
|
-
sensitivity: The sensitivity to be applied to the assertion.
|
|
636
|
-
exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
|
|
637
|
-
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
638
901
|
incident_behavior: The incident behavior to be applied to the assertion.
|
|
639
902
|
tags: The tags to be applied to the assertion.
|
|
640
903
|
source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
|
|
@@ -653,12 +916,19 @@ class _AssertionInput(ABC):
|
|
|
653
916
|
)
|
|
654
917
|
self.enabled = enabled
|
|
655
918
|
self.schedule = _try_parse_schedule(schedule)
|
|
656
|
-
self.detection_mechanism = DetectionMechanism.parse(
|
|
657
|
-
|
|
658
|
-
self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
|
|
659
|
-
self.training_data_lookback_days = _try_parse_training_data_lookback_days(
|
|
660
|
-
training_data_lookback_days
|
|
919
|
+
self.detection_mechanism = DetectionMechanism.parse(
|
|
920
|
+
detection_mechanism, default_detection_mechanism
|
|
661
921
|
)
|
|
922
|
+
if not _is_source_type_valid(
|
|
923
|
+
DatasetSourceType(
|
|
924
|
+
source_type=type(self.detection_mechanism),
|
|
925
|
+
platform=self.dataset_urn.platform,
|
|
926
|
+
assertion_type=self._assertion_type(),
|
|
927
|
+
)
|
|
928
|
+
):
|
|
929
|
+
raise SDKUsageError(
|
|
930
|
+
f"Invalid source type: {self.detection_mechanism} for dataset type: {self.dataset_urn.platform} and assertion type: {self._assertion_type()}"
|
|
931
|
+
)
|
|
662
932
|
self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
|
|
663
933
|
self.tags = tags
|
|
664
934
|
if source_type not in get_enum_options(models.AssertionSourceTypeClass):
|
|
@@ -670,7 +940,6 @@ class _AssertionInput(ABC):
|
|
|
670
940
|
self.created_at = created_at
|
|
671
941
|
self.updated_by = updated_by
|
|
672
942
|
self.updated_at = updated_at
|
|
673
|
-
|
|
674
943
|
self.cached_dataset: Optional[Dataset] = None
|
|
675
944
|
|
|
676
945
|
def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
|
|
@@ -836,8 +1105,6 @@ class _AssertionInput(ABC):
|
|
|
836
1105
|
schedule=self._convert_schedule(),
|
|
837
1106
|
source_type=source_type,
|
|
838
1107
|
field=field,
|
|
839
|
-
sensitivity=self._convert_sensitivity(),
|
|
840
|
-
exclusion_windows=self._convert_exclusion_windows(),
|
|
841
1108
|
),
|
|
842
1109
|
)
|
|
843
1110
|
|
|
@@ -854,53 +1121,6 @@ class _AssertionInput(ABC):
|
|
|
854
1121
|
else models.MonitorModeClass.INACTIVE,
|
|
855
1122
|
)
|
|
856
1123
|
|
|
857
|
-
def _convert_exclusion_windows(
|
|
858
|
-
self,
|
|
859
|
-
) -> list[models.AssertionExclusionWindowClass]:
|
|
860
|
-
"""
|
|
861
|
-
Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
|
|
862
|
-
|
|
863
|
-
Returns:
|
|
864
|
-
A list of AssertionExclusionWindowClass objects.
|
|
865
|
-
|
|
866
|
-
Raises:
|
|
867
|
-
SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
|
|
868
|
-
"""
|
|
869
|
-
exclusion_windows: list[models.AssertionExclusionWindowClass] = []
|
|
870
|
-
if self.exclusion_windows:
|
|
871
|
-
for window in self.exclusion_windows:
|
|
872
|
-
if not isinstance(window, FixedRangeExclusionWindow):
|
|
873
|
-
raise SDKUsageErrorWithExamples(
|
|
874
|
-
msg=f"Invalid exclusion window type: {window}",
|
|
875
|
-
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
876
|
-
)
|
|
877
|
-
# To match the UI, we generate a display name for the exclusion window.
|
|
878
|
-
# See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
|
|
879
|
-
# Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
|
|
880
|
-
generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
|
|
881
|
-
exclusion_windows.append(
|
|
882
|
-
models.AssertionExclusionWindowClass(
|
|
883
|
-
type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
|
|
884
|
-
displayName=generated_display_name,
|
|
885
|
-
fixedRange=models.AbsoluteTimeWindowClass(
|
|
886
|
-
startTimeMillis=make_ts_millis(window.start),
|
|
887
|
-
endTimeMillis=make_ts_millis(window.end),
|
|
888
|
-
),
|
|
889
|
-
)
|
|
890
|
-
)
|
|
891
|
-
return exclusion_windows
|
|
892
|
-
|
|
893
|
-
def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
|
|
894
|
-
"""
|
|
895
|
-
Convert sensitivity into an AssertionMonitorSensitivityClass.
|
|
896
|
-
|
|
897
|
-
Returns:
|
|
898
|
-
An AssertionMonitorSensitivityClass with the appropriate sensitivity.
|
|
899
|
-
"""
|
|
900
|
-
return models.AssertionMonitorSensitivityClass(
|
|
901
|
-
level=InferenceSensitivity.to_int(self.sensitivity),
|
|
902
|
-
)
|
|
903
|
-
|
|
904
1124
|
def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
|
|
905
1125
|
"""
|
|
906
1126
|
Get the schema field spec for the detection mechanism if needed.
|
|
@@ -956,6 +1176,7 @@ class _AssertionInput(ABC):
|
|
|
956
1176
|
f"Allowed types are {allowed_type_names}.",
|
|
957
1177
|
)
|
|
958
1178
|
|
|
1179
|
+
@abstractmethod
|
|
959
1180
|
def _create_monitor_info(
|
|
960
1181
|
self,
|
|
961
1182
|
assertion_urn: AssertionUrn,
|
|
@@ -963,8 +1184,6 @@ class _AssertionInput(ABC):
|
|
|
963
1184
|
schedule: models.CronScheduleClass,
|
|
964
1185
|
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
965
1186
|
field: Optional[FieldSpecType],
|
|
966
|
-
sensitivity: models.AssertionMonitorSensitivityClass,
|
|
967
|
-
exclusion_windows: list[models.AssertionExclusionWindowClass],
|
|
968
1187
|
) -> models.MonitorInfoClass:
|
|
969
1188
|
"""
|
|
970
1189
|
Create a MonitorInfoClass with all the necessary components.
|
|
@@ -974,34 +1193,15 @@ class _AssertionInput(ABC):
|
|
|
974
1193
|
schedule: The monitor schedule.
|
|
975
1194
|
source_type: The source type.
|
|
976
1195
|
field: Optional field specification.
|
|
977
|
-
sensitivity: The monitor sensitivity.
|
|
978
|
-
exclusion_windows: List of exclusion windows.
|
|
979
|
-
|
|
980
1196
|
Returns:
|
|
981
1197
|
A MonitorInfoClass configured with all the provided components.
|
|
982
1198
|
"""
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
assertion=str(assertion_urn),
|
|
990
|
-
schedule=schedule,
|
|
991
|
-
parameters=self._get_assertion_evaluation_parameters(
|
|
992
|
-
str(source_type), field
|
|
993
|
-
),
|
|
994
|
-
)
|
|
995
|
-
],
|
|
996
|
-
settings=models.AssertionMonitorSettingsClass(
|
|
997
|
-
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
998
|
-
sensitivity=sensitivity,
|
|
999
|
-
exclusionWindows=exclusion_windows,
|
|
1000
|
-
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
1001
|
-
),
|
|
1002
|
-
),
|
|
1003
|
-
),
|
|
1004
|
-
)
|
|
1199
|
+
pass
|
|
1200
|
+
|
|
1201
|
+
@abstractmethod
|
|
1202
|
+
def _assertion_type(self) -> str:
|
|
1203
|
+
"""Get the assertion type."""
|
|
1204
|
+
pass
|
|
1005
1205
|
|
|
1006
1206
|
@abstractmethod
|
|
1007
1207
|
def _create_assertion_info(
|
|
@@ -1030,7 +1230,55 @@ class _AssertionInput(ABC):
|
|
|
1030
1230
|
pass
|
|
1031
1231
|
|
|
1032
1232
|
|
|
1033
|
-
class
|
|
1233
|
+
class _HasFreshnessFeatures:
|
|
1234
|
+
def _create_field_spec(
|
|
1235
|
+
self,
|
|
1236
|
+
column_name: str,
|
|
1237
|
+
allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
|
|
1238
|
+
field_type_name: str,
|
|
1239
|
+
kind: str,
|
|
1240
|
+
get_schema_field_spec: Callable[[str], models.SchemaFieldSpecClass],
|
|
1241
|
+
validate_field_type: Callable[
|
|
1242
|
+
[models.SchemaFieldSpecClass, str, list[DictWrapper], str], None
|
|
1243
|
+
],
|
|
1244
|
+
) -> models.FreshnessFieldSpecClass:
|
|
1245
|
+
"""
|
|
1246
|
+
Create a field specification for a column, validating its type.
|
|
1247
|
+
|
|
1248
|
+
Args:
|
|
1249
|
+
column_name: The name of the column to create a spec for
|
|
1250
|
+
allowed_types: List of allowed field types
|
|
1251
|
+
field_type_name: Human-readable name of the field type for error messages
|
|
1252
|
+
kind: The kind of field to create
|
|
1253
|
+
|
|
1254
|
+
Returns:
|
|
1255
|
+
A FreshnessFieldSpecClass for the column
|
|
1256
|
+
|
|
1257
|
+
Raises:
|
|
1258
|
+
SDKUsageError: If the column is not found or has an invalid type
|
|
1259
|
+
"""
|
|
1260
|
+
SUPPORTED_KINDS = [
|
|
1261
|
+
models.FreshnessFieldKindClass.LAST_MODIFIED,
|
|
1262
|
+
models.FreshnessFieldKindClass.HIGH_WATERMARK,
|
|
1263
|
+
]
|
|
1264
|
+
if kind not in SUPPORTED_KINDS:
|
|
1265
|
+
raise SDKUsageError(
|
|
1266
|
+
msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
|
|
1267
|
+
)
|
|
1268
|
+
|
|
1269
|
+
field_spec = get_schema_field_spec(column_name)
|
|
1270
|
+
validate_field_type(field_spec, column_name, allowed_types, field_type_name)
|
|
1271
|
+
return models.FreshnessFieldSpecClass(
|
|
1272
|
+
path=field_spec.path,
|
|
1273
|
+
type=field_spec.type,
|
|
1274
|
+
nativeType=field_spec.nativeType,
|
|
1275
|
+
kind=kind,
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
|
|
1279
|
+
class _SmartFreshnessAssertionInput(
|
|
1280
|
+
_AssertionInput, _HasSmartAssertionInputs, _HasFreshnessFeatures
|
|
1281
|
+
):
|
|
1034
1282
|
def __init__(
|
|
1035
1283
|
self,
|
|
1036
1284
|
*,
|
|
@@ -1055,7 +1303,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1055
1303
|
updated_by: Union[str, CorpUserUrn],
|
|
1056
1304
|
updated_at: datetime,
|
|
1057
1305
|
):
|
|
1058
|
-
|
|
1306
|
+
_AssertionInput.__init__(
|
|
1307
|
+
self,
|
|
1059
1308
|
dataset_urn=dataset_urn,
|
|
1060
1309
|
entity_client=entity_client,
|
|
1061
1310
|
urn=urn,
|
|
@@ -1063,11 +1312,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1063
1312
|
enabled=enabled,
|
|
1064
1313
|
schedule=schedule
|
|
1065
1314
|
if schedule is not None
|
|
1066
|
-
else
|
|
1315
|
+
else DEFAULT_HOURLY_SCHEDULE, # Use provided schedule or default for create case
|
|
1067
1316
|
detection_mechanism=detection_mechanism,
|
|
1068
|
-
sensitivity=sensitivity,
|
|
1069
|
-
exclusion_windows=exclusion_windows,
|
|
1070
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
1071
1317
|
incident_behavior=incident_behavior,
|
|
1072
1318
|
tags=tags,
|
|
1073
1319
|
source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
|
|
@@ -1076,6 +1322,16 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1076
1322
|
updated_by=updated_by,
|
|
1077
1323
|
updated_at=updated_at,
|
|
1078
1324
|
)
|
|
1325
|
+
_HasSmartAssertionInputs.__init__(
|
|
1326
|
+
self,
|
|
1327
|
+
sensitivity=sensitivity,
|
|
1328
|
+
exclusion_windows=exclusion_windows,
|
|
1329
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1330
|
+
)
|
|
1331
|
+
|
|
1332
|
+
def _assertion_type(self) -> str:
|
|
1333
|
+
"""Get the assertion type."""
|
|
1334
|
+
return models.AssertionTypeClass.FRESHNESS
|
|
1079
1335
|
|
|
1080
1336
|
def _create_assertion_info(
|
|
1081
1337
|
self, filter: Optional[models.DatasetFilterClass]
|
|
@@ -1099,7 +1355,7 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1099
1355
|
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
1100
1356
|
"""Create a schedule for a smart freshness assertion.
|
|
1101
1357
|
|
|
1102
|
-
For create case, uses
|
|
1358
|
+
For create case, uses DEFAULT_HOURLY_SCHEDULE. For update case, preserves existing schedule.
|
|
1103
1359
|
|
|
1104
1360
|
Returns:
|
|
1105
1361
|
A CronScheduleClass with appropriate schedule settings.
|
|
@@ -1155,6 +1411,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1155
1411
|
LAST_MODIFIED_ALLOWED_FIELD_TYPES,
|
|
1156
1412
|
"last modified column",
|
|
1157
1413
|
models.FreshnessFieldKindClass.LAST_MODIFIED,
|
|
1414
|
+
self._get_schema_field_spec,
|
|
1415
|
+
self._validate_field_type,
|
|
1158
1416
|
)
|
|
1159
1417
|
elif isinstance(self.detection_mechanism, _InformationSchema):
|
|
1160
1418
|
source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
|
|
@@ -1169,50 +1427,42 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1169
1427
|
|
|
1170
1428
|
return source_type, field
|
|
1171
1429
|
|
|
1172
|
-
def
|
|
1430
|
+
def _create_monitor_info(
|
|
1173
1431
|
self,
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1432
|
+
assertion_urn: AssertionUrn,
|
|
1433
|
+
status: models.MonitorStatusClass,
|
|
1434
|
+
schedule: models.CronScheduleClass,
|
|
1435
|
+
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1436
|
+
field: Optional[FieldSpecType],
|
|
1437
|
+
) -> models.MonitorInfoClass:
|
|
1179
1438
|
"""
|
|
1180
|
-
Create a
|
|
1181
|
-
|
|
1182
|
-
Args:
|
|
1183
|
-
column_name: The name of the column to create a spec for
|
|
1184
|
-
allowed_types: List of allowed field types
|
|
1185
|
-
field_type_name: Human-readable name of the field type for error messages
|
|
1186
|
-
kind: The kind of field to create
|
|
1187
|
-
|
|
1188
|
-
Returns:
|
|
1189
|
-
A FreshnessFieldSpecClass for the column
|
|
1190
|
-
|
|
1191
|
-
Raises:
|
|
1192
|
-
SDKUsageError: If the column is not found or has an invalid type
|
|
1439
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
1193
1440
|
"""
|
|
1194
|
-
|
|
1195
|
-
models.
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1441
|
+
return models.MonitorInfoClass(
|
|
1442
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
1443
|
+
status=status,
|
|
1444
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
1445
|
+
assertions=[
|
|
1446
|
+
models.AssertionEvaluationSpecClass(
|
|
1447
|
+
assertion=str(assertion_urn),
|
|
1448
|
+
schedule=schedule,
|
|
1449
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
1450
|
+
str(source_type), field
|
|
1451
|
+
),
|
|
1452
|
+
),
|
|
1453
|
+
],
|
|
1454
|
+
settings=models.AssertionMonitorSettingsClass(
|
|
1455
|
+
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
1456
|
+
sensitivity=self._convert_sensitivity(),
|
|
1457
|
+
exclusionWindows=self._convert_exclusion_windows(),
|
|
1458
|
+
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
1459
|
+
),
|
|
1460
|
+
),
|
|
1461
|
+
),
|
|
1212
1462
|
)
|
|
1213
1463
|
|
|
1214
1464
|
|
|
1215
|
-
class _SmartVolumeAssertionInput(_AssertionInput):
|
|
1465
|
+
class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
|
|
1216
1466
|
def __init__(
|
|
1217
1467
|
self,
|
|
1218
1468
|
*,
|
|
@@ -1237,7 +1487,8 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1237
1487
|
updated_by: Union[str, CorpUserUrn],
|
|
1238
1488
|
updated_at: datetime,
|
|
1239
1489
|
):
|
|
1240
|
-
|
|
1490
|
+
_AssertionInput.__init__(
|
|
1491
|
+
self,
|
|
1241
1492
|
dataset_urn=dataset_urn,
|
|
1242
1493
|
entity_client=entity_client,
|
|
1243
1494
|
urn=urn,
|
|
@@ -1245,9 +1496,6 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1245
1496
|
enabled=enabled,
|
|
1246
1497
|
schedule=schedule,
|
|
1247
1498
|
detection_mechanism=detection_mechanism,
|
|
1248
|
-
sensitivity=sensitivity,
|
|
1249
|
-
exclusion_windows=exclusion_windows,
|
|
1250
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
1251
1499
|
incident_behavior=incident_behavior,
|
|
1252
1500
|
tags=tags,
|
|
1253
1501
|
source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
|
|
@@ -1256,6 +1504,12 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1256
1504
|
updated_by=updated_by,
|
|
1257
1505
|
updated_at=updated_at,
|
|
1258
1506
|
)
|
|
1507
|
+
_HasSmartAssertionInputs.__init__(
|
|
1508
|
+
self,
|
|
1509
|
+
sensitivity=sensitivity,
|
|
1510
|
+
exclusion_windows=exclusion_windows,
|
|
1511
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1512
|
+
)
|
|
1259
1513
|
|
|
1260
1514
|
def _create_assertion_info(
|
|
1261
1515
|
self, filter: Optional[models.DatasetFilterClass]
|
|
@@ -1276,15 +1530,13 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1276
1530
|
)
|
|
1277
1531
|
|
|
1278
1532
|
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
1279
|
-
"""Create a schedule for a smart
|
|
1280
|
-
|
|
1281
|
-
Since the schedule is not used for smart freshness assertions, we return a default schedule.
|
|
1533
|
+
"""Create a schedule for a smart volume assertion.
|
|
1282
1534
|
|
|
1283
1535
|
Returns:
|
|
1284
1536
|
A CronScheduleClass with appropriate schedule settings.
|
|
1285
1537
|
"""
|
|
1286
1538
|
if self.schedule is None:
|
|
1287
|
-
return
|
|
1539
|
+
return DEFAULT_HOURLY_SCHEDULE
|
|
1288
1540
|
|
|
1289
1541
|
return models.CronScheduleClass(
|
|
1290
1542
|
cron=self.schedule.cron,
|
|
@@ -1333,3 +1585,41 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1333
1585
|
)
|
|
1334
1586
|
|
|
1335
1587
|
return source_type, field
|
|
1588
|
+
|
|
1589
|
+
def _create_monitor_info(
|
|
1590
|
+
self,
|
|
1591
|
+
assertion_urn: AssertionUrn,
|
|
1592
|
+
status: models.MonitorStatusClass,
|
|
1593
|
+
schedule: models.CronScheduleClass,
|
|
1594
|
+
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1595
|
+
field: Optional[FieldSpecType],
|
|
1596
|
+
) -> models.MonitorInfoClass:
|
|
1597
|
+
"""
|
|
1598
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
1599
|
+
"""
|
|
1600
|
+
return models.MonitorInfoClass(
|
|
1601
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
1602
|
+
status=status,
|
|
1603
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
1604
|
+
assertions=[
|
|
1605
|
+
models.AssertionEvaluationSpecClass(
|
|
1606
|
+
assertion=str(assertion_urn),
|
|
1607
|
+
schedule=schedule,
|
|
1608
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
1609
|
+
str(source_type), field
|
|
1610
|
+
),
|
|
1611
|
+
),
|
|
1612
|
+
],
|
|
1613
|
+
settings=models.AssertionMonitorSettingsClass(
|
|
1614
|
+
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
1615
|
+
sensitivity=self._convert_sensitivity(),
|
|
1616
|
+
exclusionWindows=self._convert_exclusion_windows(),
|
|
1617
|
+
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
1618
|
+
),
|
|
1619
|
+
),
|
|
1620
|
+
),
|
|
1621
|
+
)
|
|
1622
|
+
|
|
1623
|
+
def _assertion_type(self) -> str:
|
|
1624
|
+
"""Get the assertion type."""
|
|
1625
|
+
return models.AssertionTypeClass.VOLUME
|