acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
- acryl_datahub_cloud/metadata/schema.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
- acryl_datahub_cloud/sdk/__init__.py +5 -1
- acryl_datahub_cloud/sdk/assertion.py +313 -162
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +430 -147
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +947 -0
- acryl_datahub_cloud/sdk/assertions_client.py +493 -7
- acryl_datahub_cloud/sdk/entities/assertion.py +4 -1
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/METADATA +38 -38
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/RECORD +17 -14
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc4.dist-info}/top_level.txt +0 -0
|
@@ -6,9 +6,10 @@ validate and represent the input for creating an Assertion in DataHub.
|
|
|
6
6
|
import random
|
|
7
7
|
import string
|
|
8
8
|
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
9
10
|
from datetime import datetime
|
|
10
11
|
from enum import Enum
|
|
11
|
-
from typing import Literal, Optional, TypeAlias, Union
|
|
12
|
+
from typing import Callable, Literal, Optional, Type, TypeAlias, TypeVar, Union
|
|
12
13
|
|
|
13
14
|
import pydantic
|
|
14
15
|
import pytz
|
|
@@ -42,12 +43,21 @@ ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS = 60
|
|
|
42
43
|
DEFAULT_NAME_PREFIX = "New Assertion"
|
|
43
44
|
DEFAULT_NAME_SUFFIX_LENGTH = 8
|
|
44
45
|
|
|
45
|
-
|
|
46
|
+
|
|
47
|
+
DEFAULT_HOURLY_SCHEDULE = models.CronScheduleClass(
|
|
46
48
|
cron="0 * * * *", # Every hour, matches the UI default
|
|
47
49
|
timezone=str(
|
|
48
50
|
tzlocal.get_localzone()
|
|
49
51
|
), # User local timezone, matches the UI default
|
|
50
52
|
)
|
|
53
|
+
DEFAULT_SCHEDULE: models.CronScheduleClass = DEFAULT_HOURLY_SCHEDULE
|
|
54
|
+
|
|
55
|
+
DEFAULT_DAILY_SCHEDULE = models.CronScheduleClass(
|
|
56
|
+
cron="0 0 * * *", # Every day at midnight, matches the UI default
|
|
57
|
+
timezone=str(
|
|
58
|
+
tzlocal.get_localzone()
|
|
59
|
+
), # User local timezone, matches the UI default
|
|
60
|
+
)
|
|
51
61
|
|
|
52
62
|
|
|
53
63
|
class AbstractDetectionMechanism(BaseModel, ABC):
|
|
@@ -101,6 +111,26 @@ class _Query(AbstractDetectionMechanism):
|
|
|
101
111
|
additional_filter: Optional[str] = None
|
|
102
112
|
|
|
103
113
|
|
|
114
|
+
class _AllRowsQuery(AbstractDetectionMechanism):
|
|
115
|
+
# For column-based assertions, this is the default detection mechanism.
|
|
116
|
+
type: Literal["all_rows_query"] = "all_rows_query"
|
|
117
|
+
additional_filter: Optional[str] = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class _AllRowsQueryDataHubDatasetProfile(AbstractDetectionMechanism):
|
|
121
|
+
# Used for column-based assertions.
|
|
122
|
+
type: Literal["all_rows_query_datahub_dataset_profile"] = (
|
|
123
|
+
"all_rows_query_datahub_dataset_profile"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class _ChangedRowsQuery(AbstractDetectionMechanism):
|
|
128
|
+
# Used for column-based assertions.
|
|
129
|
+
type: Literal["changed_rows_query"] = "changed_rows_query"
|
|
130
|
+
column_name: str
|
|
131
|
+
additional_filter: Optional[str] = None
|
|
132
|
+
|
|
133
|
+
|
|
104
134
|
class _DatasetProfile(AbstractDetectionMechanism):
|
|
105
135
|
type: Literal["dataset_profile"] = "dataset_profile"
|
|
106
136
|
|
|
@@ -114,6 +144,9 @@ _DETECTION_MECHANISM_CONCRETE_TYPES = (
|
|
|
114
144
|
_DataHubOperation,
|
|
115
145
|
_Query,
|
|
116
146
|
_DatasetProfile,
|
|
147
|
+
_AllRowsQuery,
|
|
148
|
+
_ChangedRowsQuery,
|
|
149
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
117
150
|
)
|
|
118
151
|
_DetectionMechanismTypes = Union[
|
|
119
152
|
_InformationSchema,
|
|
@@ -123,14 +156,21 @@ _DetectionMechanismTypes = Union[
|
|
|
123
156
|
_DataHubOperation,
|
|
124
157
|
_Query,
|
|
125
158
|
_DatasetProfile,
|
|
159
|
+
_AllRowsQuery,
|
|
160
|
+
_ChangedRowsQuery,
|
|
161
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
126
162
|
]
|
|
127
163
|
|
|
128
164
|
_DETECTION_MECHANISM_TYPES_WITH_ADDITIONAL_FILTER = (
|
|
129
165
|
_LastModifiedColumn,
|
|
130
166
|
_HighWatermarkColumn,
|
|
131
167
|
_Query,
|
|
168
|
+
_AllRowsQuery,
|
|
169
|
+
_ChangedRowsQuery,
|
|
132
170
|
)
|
|
133
171
|
|
|
172
|
+
DEFAULT_DETECTION_MECHANISM: _DetectionMechanismTypes = _InformationSchema()
|
|
173
|
+
|
|
134
174
|
|
|
135
175
|
class DetectionMechanism:
|
|
136
176
|
# To have a more enum-like user experience even with sub parameters, we define the detection mechanisms as class attributes.
|
|
@@ -141,6 +181,9 @@ class DetectionMechanism:
|
|
|
141
181
|
HIGH_WATERMARK_COLUMN = _HighWatermarkColumn
|
|
142
182
|
DATAHUB_OPERATION = _DataHubOperation()
|
|
143
183
|
QUERY = _Query
|
|
184
|
+
ALL_ROWS_QUERY = _AllRowsQuery()
|
|
185
|
+
CHANGED_ROWS_QUERY = _ChangedRowsQuery
|
|
186
|
+
ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE = _AllRowsQueryDataHubDatasetProfile()
|
|
144
187
|
DATASET_PROFILE = _DatasetProfile()
|
|
145
188
|
|
|
146
189
|
_DETECTION_MECHANISM_EXAMPLES = {
|
|
@@ -170,6 +213,18 @@ class DetectionMechanism:
|
|
|
170
213
|
"Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.QUERY(additional_filter='id > 1000')",
|
|
171
214
|
"Dataset Profile from string": "dataset_profile",
|
|
172
215
|
"Dataset Profile from DetectionMechanism": "DetectionMechanism.DATASET_PROFILE",
|
|
216
|
+
"All Rows Query from string": "all_rows_query",
|
|
217
|
+
"All Rows Query from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY",
|
|
218
|
+
"All Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.ALL_ROWS_QUERY(additional_filter='id > 1000')",
|
|
219
|
+
"Changed Rows Query from dict (with optional additional filter)": {
|
|
220
|
+
"type": "changed_rows_query",
|
|
221
|
+
"column_name": "id",
|
|
222
|
+
"additional_filter": "id > 1000",
|
|
223
|
+
},
|
|
224
|
+
"Changed Rows Query from DetectionMechanism": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id')",
|
|
225
|
+
"Changed Rows Query from DetectionMechanism (with optional additional filter)": "DetectionMechanism.CHANGED_ROWS_QUERY(column_name='id', additional_filter='id > 1000')",
|
|
226
|
+
"All Rows Query DataHub Dataset Profile from string": "all_rows_query_datahub_dataset_profile",
|
|
227
|
+
"All Rows Query DataHub Dataset Profile from DetectionMechanism": "DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE",
|
|
173
228
|
}
|
|
174
229
|
|
|
175
230
|
@staticmethod
|
|
@@ -177,9 +232,10 @@ class DetectionMechanism:
|
|
|
177
232
|
detection_mechanism_config: Optional[
|
|
178
233
|
Union[str, dict[str, str], _DetectionMechanismTypes]
|
|
179
234
|
] = None,
|
|
235
|
+
default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
|
|
180
236
|
) -> _DetectionMechanismTypes:
|
|
181
237
|
if detection_mechanism_config is None:
|
|
182
|
-
return
|
|
238
|
+
return default_detection_mechanism
|
|
183
239
|
if isinstance(detection_mechanism_config, _DETECTION_MECHANISM_CONCRETE_TYPES):
|
|
184
240
|
return detection_mechanism_config
|
|
185
241
|
elif isinstance(detection_mechanism_config, str):
|
|
@@ -260,8 +316,6 @@ class DetectionMechanism:
|
|
|
260
316
|
) from e
|
|
261
317
|
|
|
262
318
|
|
|
263
|
-
DEFAULT_DETECTION_MECHANISM = DetectionMechanism.INFORMATION_SCHEMA
|
|
264
|
-
|
|
265
319
|
DetectionMechanismInputTypes: TypeAlias = Union[
|
|
266
320
|
str, dict[str, str], _DetectionMechanismTypes, None
|
|
267
321
|
]
|
|
@@ -328,7 +382,59 @@ class InferenceSensitivity(Enum):
|
|
|
328
382
|
}[sensitivity]
|
|
329
383
|
|
|
330
384
|
|
|
331
|
-
DEFAULT_SENSITIVITY = InferenceSensitivity.MEDIUM
|
|
385
|
+
DEFAULT_SENSITIVITY: InferenceSensitivity = InferenceSensitivity.MEDIUM
|
|
386
|
+
|
|
387
|
+
TIME_WINDOW_SIZE_EXAMPLES = {
|
|
388
|
+
"Time window size from models.TimeWindowSizeClass": "models.TimeWindowSizeClass(unit='MINUTE', multiple=10)",
|
|
389
|
+
"Time window size from object": "TimeWindowSize(unit='MINUTE', multiple=10)",
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class CalendarInterval(Enum):
|
|
394
|
+
MINUTE = "MINUTE"
|
|
395
|
+
HOUR = "HOUR"
|
|
396
|
+
DAY = "DAY"
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
class TimeWindowSize(BaseModel):
|
|
400
|
+
unit: Union[CalendarInterval, str]
|
|
401
|
+
multiple: int
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
TimeWindowSizeInputTypes: TypeAlias = Union[
|
|
405
|
+
models.TimeWindowSizeClass,
|
|
406
|
+
models.FixedIntervalScheduleClass,
|
|
407
|
+
TimeWindowSize,
|
|
408
|
+
]
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def _try_parse_time_window_size(
|
|
412
|
+
config: TimeWindowSizeInputTypes,
|
|
413
|
+
) -> models.TimeWindowSizeClass:
|
|
414
|
+
if isinstance(config, models.TimeWindowSizeClass):
|
|
415
|
+
return config
|
|
416
|
+
elif isinstance(config, models.FixedIntervalScheduleClass):
|
|
417
|
+
return models.TimeWindowSizeClass(
|
|
418
|
+
unit=_try_parse_and_validate_schema_classes_enum(
|
|
419
|
+
config.unit, models.CalendarIntervalClass
|
|
420
|
+
),
|
|
421
|
+
multiple=config.multiple,
|
|
422
|
+
)
|
|
423
|
+
elif isinstance(config, TimeWindowSize):
|
|
424
|
+
return models.TimeWindowSizeClass(
|
|
425
|
+
unit=_try_parse_and_validate_schema_classes_enum(
|
|
426
|
+
_try_parse_and_validate_schema_classes_enum(
|
|
427
|
+
config.unit, CalendarInterval
|
|
428
|
+
).value,
|
|
429
|
+
models.CalendarIntervalClass,
|
|
430
|
+
),
|
|
431
|
+
multiple=config.multiple,
|
|
432
|
+
)
|
|
433
|
+
else:
|
|
434
|
+
raise SDKUsageErrorWithExamples(
|
|
435
|
+
msg=f"Invalid time window size: {config}",
|
|
436
|
+
examples=TIME_WINDOW_SIZE_EXAMPLES,
|
|
437
|
+
)
|
|
332
438
|
|
|
333
439
|
|
|
334
440
|
class FixedRangeExclusionWindow(BaseModel):
|
|
@@ -594,6 +700,161 @@ def _try_parse_schedule(
|
|
|
594
700
|
FieldSpecType = Union[models.FreshnessFieldSpecClass, models.SchemaFieldSpecClass]
|
|
595
701
|
|
|
596
702
|
|
|
703
|
+
T = TypeVar("T")
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
def _try_parse_and_validate_schema_classes_enum(
|
|
707
|
+
value: Union[str, T],
|
|
708
|
+
enum_class: Type[T],
|
|
709
|
+
) -> T:
|
|
710
|
+
if isinstance(value, enum_class):
|
|
711
|
+
return value
|
|
712
|
+
assert isinstance(value, str)
|
|
713
|
+
if value not in get_enum_options(enum_class):
|
|
714
|
+
raise SDKUsageError(
|
|
715
|
+
f"Invalid value for {enum_class.__name__}: {value}, valid options are {get_enum_options(enum_class)}"
|
|
716
|
+
)
|
|
717
|
+
return getattr(enum_class, value.upper())
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
@dataclass(frozen=True)
|
|
721
|
+
class DatasetSourceType:
|
|
722
|
+
"""
|
|
723
|
+
DatasetSourceType is used to represent a dataset source type.
|
|
724
|
+
It is used to check if a source type is valid for a dataset type and assertion type.
|
|
725
|
+
|
|
726
|
+
Args:
|
|
727
|
+
source_type: The source type (e.g. information schema, field value, etc. aka detection mechanism)
|
|
728
|
+
platform: The platform of the dataset as a string OR "all" for all platforms.
|
|
729
|
+
assertion_type: The assertion type as a models.AssertionTypeClass string e.g. models.AssertionTypeClass.FRESHNESS OR "all" for all assertion types.
|
|
730
|
+
|
|
731
|
+
Example:
|
|
732
|
+
DatasetSourceType(
|
|
733
|
+
source_type=_InformationSchema,
|
|
734
|
+
platform="databricks",
|
|
735
|
+
assertion_type="all",
|
|
736
|
+
)
|
|
737
|
+
This means that the source type _InformationSchema is invalid for the dataset type "databricks" and assertion type "all".
|
|
738
|
+
"all" in this example means that the source type is invalid for all assertion types.
|
|
739
|
+
"""
|
|
740
|
+
|
|
741
|
+
source_type: Type[_DetectionMechanismTypes]
|
|
742
|
+
platform: str
|
|
743
|
+
assertion_type: Union[models.AssertionTypeClass, str]
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
INVALID_SOURCE_TYPES = {
|
|
747
|
+
# Add exceptions here if a source type (detection mechanism) is invalid for a dataset type and assertion type.
|
|
748
|
+
DatasetSourceType(
|
|
749
|
+
source_type=_InformationSchema,
|
|
750
|
+
platform="databricks",
|
|
751
|
+
assertion_type="all",
|
|
752
|
+
)
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def _is_source_type_valid(
|
|
757
|
+
dataset_source_type: DatasetSourceType,
|
|
758
|
+
invalid_source_types: set[DatasetSourceType] = INVALID_SOURCE_TYPES,
|
|
759
|
+
) -> bool:
|
|
760
|
+
for invalid in invalid_source_types:
|
|
761
|
+
if invalid.source_type == dataset_source_type.source_type:
|
|
762
|
+
# If both platform and assertion type are "all", the source type is invalid for all combinations
|
|
763
|
+
if invalid.platform == "all" and invalid.assertion_type == "all":
|
|
764
|
+
return False
|
|
765
|
+
# If platform matches and assertion type is "all", the source type is invalid for all assertion types on that platform
|
|
766
|
+
if (
|
|
767
|
+
invalid.platform == dataset_source_type.platform
|
|
768
|
+
and invalid.assertion_type == "all"
|
|
769
|
+
):
|
|
770
|
+
return False
|
|
771
|
+
# If platform is "all" and assertion type matches, the source type is invalid for all platforms for that assertion type
|
|
772
|
+
if (
|
|
773
|
+
invalid.platform == "all"
|
|
774
|
+
and invalid.assertion_type == dataset_source_type.assertion_type
|
|
775
|
+
):
|
|
776
|
+
return False
|
|
777
|
+
# If both platform and assertion type match exactly, the source type is invalid
|
|
778
|
+
if (
|
|
779
|
+
invalid.platform == dataset_source_type.platform
|
|
780
|
+
and invalid.assertion_type == dataset_source_type.assertion_type
|
|
781
|
+
):
|
|
782
|
+
return False
|
|
783
|
+
return True
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
class _HasSmartAssertionInputs:
|
|
787
|
+
"""
|
|
788
|
+
A class that contains the common inputs for smart assertions.
|
|
789
|
+
This is used to avoid code duplication in the smart assertion inputs.
|
|
790
|
+
|
|
791
|
+
Args:
|
|
792
|
+
sensitivity: The sensitivity to be applied to the assertion.
|
|
793
|
+
exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
|
|
794
|
+
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
795
|
+
"""
|
|
796
|
+
|
|
797
|
+
def __init__(
|
|
798
|
+
self,
|
|
799
|
+
*,
|
|
800
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
801
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
802
|
+
training_data_lookback_days: Optional[int] = None,
|
|
803
|
+
):
|
|
804
|
+
self.sensitivity = InferenceSensitivity.parse(sensitivity)
|
|
805
|
+
self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
|
|
806
|
+
self.training_data_lookback_days = _try_parse_training_data_lookback_days(
|
|
807
|
+
training_data_lookback_days
|
|
808
|
+
)
|
|
809
|
+
|
|
810
|
+
def _convert_exclusion_windows(
|
|
811
|
+
self,
|
|
812
|
+
) -> list[models.AssertionExclusionWindowClass]:
|
|
813
|
+
"""
|
|
814
|
+
Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
|
|
815
|
+
|
|
816
|
+
Returns:
|
|
817
|
+
A list of AssertionExclusionWindowClass objects.
|
|
818
|
+
|
|
819
|
+
Raises:
|
|
820
|
+
SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
|
|
821
|
+
"""
|
|
822
|
+
exclusion_windows: list[models.AssertionExclusionWindowClass] = []
|
|
823
|
+
if self.exclusion_windows:
|
|
824
|
+
for window in self.exclusion_windows:
|
|
825
|
+
if not isinstance(window, FixedRangeExclusionWindow):
|
|
826
|
+
raise SDKUsageErrorWithExamples(
|
|
827
|
+
msg=f"Invalid exclusion window type: {window}",
|
|
828
|
+
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
829
|
+
)
|
|
830
|
+
# To match the UI, we generate a display name for the exclusion window.
|
|
831
|
+
# See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
|
|
832
|
+
# Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
|
|
833
|
+
generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
|
|
834
|
+
exclusion_windows.append(
|
|
835
|
+
models.AssertionExclusionWindowClass(
|
|
836
|
+
type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
|
|
837
|
+
displayName=generated_display_name,
|
|
838
|
+
fixedRange=models.AbsoluteTimeWindowClass(
|
|
839
|
+
startTimeMillis=make_ts_millis(window.start),
|
|
840
|
+
endTimeMillis=make_ts_millis(window.end),
|
|
841
|
+
),
|
|
842
|
+
)
|
|
843
|
+
)
|
|
844
|
+
return exclusion_windows
|
|
845
|
+
|
|
846
|
+
def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
|
|
847
|
+
"""
|
|
848
|
+
Convert sensitivity into an AssertionMonitorSensitivityClass.
|
|
849
|
+
|
|
850
|
+
Returns:
|
|
851
|
+
An AssertionMonitorSensitivityClass with the appropriate sensitivity.
|
|
852
|
+
"""
|
|
853
|
+
return models.AssertionMonitorSensitivityClass(
|
|
854
|
+
level=InferenceSensitivity.to_int(self.sensitivity),
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
|
|
597
858
|
class _AssertionInput(ABC):
|
|
598
859
|
def __init__(
|
|
599
860
|
self,
|
|
@@ -609,9 +870,6 @@ class _AssertionInput(ABC):
|
|
|
609
870
|
enabled: bool = True,
|
|
610
871
|
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
611
872
|
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
612
|
-
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
613
|
-
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
614
|
-
training_data_lookback_days: Optional[int] = None,
|
|
615
873
|
incident_behavior: Optional[
|
|
616
874
|
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
617
875
|
] = None,
|
|
@@ -621,6 +879,7 @@ class _AssertionInput(ABC):
|
|
|
621
879
|
created_at: datetime,
|
|
622
880
|
updated_by: Union[str, CorpUserUrn],
|
|
623
881
|
updated_at: datetime,
|
|
882
|
+
default_detection_mechanism: _DetectionMechanismTypes = DEFAULT_DETECTION_MECHANISM,
|
|
624
883
|
):
|
|
625
884
|
"""
|
|
626
885
|
Create an AssertionInput object.
|
|
@@ -632,9 +891,6 @@ class _AssertionInput(ABC):
|
|
|
632
891
|
display_name: The display name of the assertion. If not provided, a random display name will be generated.
|
|
633
892
|
enabled: Whether the assertion is enabled. Defaults to True.
|
|
634
893
|
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
635
|
-
sensitivity: The sensitivity to be applied to the assertion.
|
|
636
|
-
exclusion_windows: The exclusion windows to be applied to the assertion. If not provided, no exclusion windows will be applied.
|
|
637
|
-
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
638
894
|
incident_behavior: The incident behavior to be applied to the assertion.
|
|
639
895
|
tags: The tags to be applied to the assertion.
|
|
640
896
|
source_type: The source type of the assertion. Defaults to models.AssertionSourceTypeClass.NATIVE.
|
|
@@ -653,12 +909,19 @@ class _AssertionInput(ABC):
|
|
|
653
909
|
)
|
|
654
910
|
self.enabled = enabled
|
|
655
911
|
self.schedule = _try_parse_schedule(schedule)
|
|
656
|
-
self.detection_mechanism = DetectionMechanism.parse(
|
|
657
|
-
|
|
658
|
-
self.exclusion_windows = _try_parse_exclusion_window(exclusion_windows)
|
|
659
|
-
self.training_data_lookback_days = _try_parse_training_data_lookback_days(
|
|
660
|
-
training_data_lookback_days
|
|
912
|
+
self.detection_mechanism = DetectionMechanism.parse(
|
|
913
|
+
detection_mechanism, default_detection_mechanism
|
|
661
914
|
)
|
|
915
|
+
if not _is_source_type_valid(
|
|
916
|
+
DatasetSourceType(
|
|
917
|
+
source_type=type(self.detection_mechanism),
|
|
918
|
+
platform=self.dataset_urn.platform,
|
|
919
|
+
assertion_type=self._assertion_type(),
|
|
920
|
+
)
|
|
921
|
+
):
|
|
922
|
+
raise SDKUsageError(
|
|
923
|
+
f"Invalid source type: {self.detection_mechanism} for dataset type: {self.dataset_urn.platform} and assertion type: {self._assertion_type()}"
|
|
924
|
+
)
|
|
662
925
|
self.incident_behavior = _try_parse_incident_behavior(incident_behavior)
|
|
663
926
|
self.tags = tags
|
|
664
927
|
if source_type not in get_enum_options(models.AssertionSourceTypeClass):
|
|
@@ -670,7 +933,6 @@ class _AssertionInput(ABC):
|
|
|
670
933
|
self.created_at = created_at
|
|
671
934
|
self.updated_by = updated_by
|
|
672
935
|
self.updated_at = updated_at
|
|
673
|
-
|
|
674
936
|
self.cached_dataset: Optional[Dataset] = None
|
|
675
937
|
|
|
676
938
|
def to_assertion_and_monitor_entities(self) -> tuple[Assertion, Monitor]:
|
|
@@ -836,8 +1098,6 @@ class _AssertionInput(ABC):
|
|
|
836
1098
|
schedule=self._convert_schedule(),
|
|
837
1099
|
source_type=source_type,
|
|
838
1100
|
field=field,
|
|
839
|
-
sensitivity=self._convert_sensitivity(),
|
|
840
|
-
exclusion_windows=self._convert_exclusion_windows(),
|
|
841
1101
|
),
|
|
842
1102
|
)
|
|
843
1103
|
|
|
@@ -854,53 +1114,6 @@ class _AssertionInput(ABC):
|
|
|
854
1114
|
else models.MonitorModeClass.INACTIVE,
|
|
855
1115
|
)
|
|
856
1116
|
|
|
857
|
-
def _convert_exclusion_windows(
|
|
858
|
-
self,
|
|
859
|
-
) -> list[models.AssertionExclusionWindowClass]:
|
|
860
|
-
"""
|
|
861
|
-
Convert exclusion windows into AssertionExclusionWindowClass objects including generating display names for them.
|
|
862
|
-
|
|
863
|
-
Returns:
|
|
864
|
-
A list of AssertionExclusionWindowClass objects.
|
|
865
|
-
|
|
866
|
-
Raises:
|
|
867
|
-
SDKUsageErrorWithExamples: If an exclusion window is of an invalid type.
|
|
868
|
-
"""
|
|
869
|
-
exclusion_windows: list[models.AssertionExclusionWindowClass] = []
|
|
870
|
-
if self.exclusion_windows:
|
|
871
|
-
for window in self.exclusion_windows:
|
|
872
|
-
if not isinstance(window, FixedRangeExclusionWindow):
|
|
873
|
-
raise SDKUsageErrorWithExamples(
|
|
874
|
-
msg=f"Invalid exclusion window type: {window}",
|
|
875
|
-
examples=FIXED_RANGE_EXCLUSION_WINDOW_EXAMPLES,
|
|
876
|
-
)
|
|
877
|
-
# To match the UI, we generate a display name for the exclusion window.
|
|
878
|
-
# See here for the UI code: https://github.com/acryldata/datahub-fork/blob/acryl-main/datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/inferred/common/ExclusionWindowAdjuster.tsx#L31
|
|
879
|
-
# Copied here for reference: displayName: `${dayjs(startTime).format('MMM D, h:mm A')} - ${dayjs(endTime).format('MMM D, h:mm A')}`,
|
|
880
|
-
generated_display_name = f"{window.start.strftime('%b %-d, %-I:%M %p')} - {window.end.strftime('%b %-d, %-I:%M %p')}"
|
|
881
|
-
exclusion_windows.append(
|
|
882
|
-
models.AssertionExclusionWindowClass(
|
|
883
|
-
type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE, # Currently only fixed range is supported
|
|
884
|
-
displayName=generated_display_name,
|
|
885
|
-
fixedRange=models.AbsoluteTimeWindowClass(
|
|
886
|
-
startTimeMillis=make_ts_millis(window.start),
|
|
887
|
-
endTimeMillis=make_ts_millis(window.end),
|
|
888
|
-
),
|
|
889
|
-
)
|
|
890
|
-
)
|
|
891
|
-
return exclusion_windows
|
|
892
|
-
|
|
893
|
-
def _convert_sensitivity(self) -> models.AssertionMonitorSensitivityClass:
|
|
894
|
-
"""
|
|
895
|
-
Convert sensitivity into an AssertionMonitorSensitivityClass.
|
|
896
|
-
|
|
897
|
-
Returns:
|
|
898
|
-
An AssertionMonitorSensitivityClass with the appropriate sensitivity.
|
|
899
|
-
"""
|
|
900
|
-
return models.AssertionMonitorSensitivityClass(
|
|
901
|
-
level=InferenceSensitivity.to_int(self.sensitivity),
|
|
902
|
-
)
|
|
903
|
-
|
|
904
1117
|
def _get_schema_field_spec(self, column_name: str) -> models.SchemaFieldSpecClass:
|
|
905
1118
|
"""
|
|
906
1119
|
Get the schema field spec for the detection mechanism if needed.
|
|
@@ -956,6 +1169,7 @@ class _AssertionInput(ABC):
|
|
|
956
1169
|
f"Allowed types are {allowed_type_names}.",
|
|
957
1170
|
)
|
|
958
1171
|
|
|
1172
|
+
@abstractmethod
|
|
959
1173
|
def _create_monitor_info(
|
|
960
1174
|
self,
|
|
961
1175
|
assertion_urn: AssertionUrn,
|
|
@@ -963,8 +1177,6 @@ class _AssertionInput(ABC):
|
|
|
963
1177
|
schedule: models.CronScheduleClass,
|
|
964
1178
|
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
965
1179
|
field: Optional[FieldSpecType],
|
|
966
|
-
sensitivity: models.AssertionMonitorSensitivityClass,
|
|
967
|
-
exclusion_windows: list[models.AssertionExclusionWindowClass],
|
|
968
1180
|
) -> models.MonitorInfoClass:
|
|
969
1181
|
"""
|
|
970
1182
|
Create a MonitorInfoClass with all the necessary components.
|
|
@@ -974,34 +1186,15 @@ class _AssertionInput(ABC):
|
|
|
974
1186
|
schedule: The monitor schedule.
|
|
975
1187
|
source_type: The source type.
|
|
976
1188
|
field: Optional field specification.
|
|
977
|
-
sensitivity: The monitor sensitivity.
|
|
978
|
-
exclusion_windows: List of exclusion windows.
|
|
979
|
-
|
|
980
1189
|
Returns:
|
|
981
1190
|
A MonitorInfoClass configured with all the provided components.
|
|
982
1191
|
"""
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
assertion=str(assertion_urn),
|
|
990
|
-
schedule=schedule,
|
|
991
|
-
parameters=self._get_assertion_evaluation_parameters(
|
|
992
|
-
str(source_type), field
|
|
993
|
-
),
|
|
994
|
-
)
|
|
995
|
-
],
|
|
996
|
-
settings=models.AssertionMonitorSettingsClass(
|
|
997
|
-
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
998
|
-
sensitivity=sensitivity,
|
|
999
|
-
exclusionWindows=exclusion_windows,
|
|
1000
|
-
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
1001
|
-
),
|
|
1002
|
-
),
|
|
1003
|
-
),
|
|
1004
|
-
)
|
|
1192
|
+
pass
|
|
1193
|
+
|
|
1194
|
+
@abstractmethod
|
|
1195
|
+
def _assertion_type(self) -> str:
|
|
1196
|
+
"""Get the assertion type."""
|
|
1197
|
+
pass
|
|
1005
1198
|
|
|
1006
1199
|
@abstractmethod
|
|
1007
1200
|
def _create_assertion_info(
|
|
@@ -1030,7 +1223,55 @@ class _AssertionInput(ABC):
|
|
|
1030
1223
|
pass
|
|
1031
1224
|
|
|
1032
1225
|
|
|
1033
|
-
class
|
|
1226
|
+
class _HasFreshnessFeatures:
|
|
1227
|
+
def _create_field_spec(
|
|
1228
|
+
self,
|
|
1229
|
+
column_name: str,
|
|
1230
|
+
allowed_types: list[DictWrapper], # TODO: Use the type from the PDL
|
|
1231
|
+
field_type_name: str,
|
|
1232
|
+
kind: str,
|
|
1233
|
+
get_schema_field_spec: Callable[[str], models.SchemaFieldSpecClass],
|
|
1234
|
+
validate_field_type: Callable[
|
|
1235
|
+
[models.SchemaFieldSpecClass, str, list[DictWrapper], str], None
|
|
1236
|
+
],
|
|
1237
|
+
) -> models.FreshnessFieldSpecClass:
|
|
1238
|
+
"""
|
|
1239
|
+
Create a field specification for a column, validating its type.
|
|
1240
|
+
|
|
1241
|
+
Args:
|
|
1242
|
+
column_name: The name of the column to create a spec for
|
|
1243
|
+
allowed_types: List of allowed field types
|
|
1244
|
+
field_type_name: Human-readable name of the field type for error messages
|
|
1245
|
+
kind: The kind of field to create
|
|
1246
|
+
|
|
1247
|
+
Returns:
|
|
1248
|
+
A FreshnessFieldSpecClass for the column
|
|
1249
|
+
|
|
1250
|
+
Raises:
|
|
1251
|
+
SDKUsageError: If the column is not found or has an invalid type
|
|
1252
|
+
"""
|
|
1253
|
+
SUPPORTED_KINDS = [
|
|
1254
|
+
models.FreshnessFieldKindClass.LAST_MODIFIED,
|
|
1255
|
+
models.FreshnessFieldKindClass.HIGH_WATERMARK,
|
|
1256
|
+
]
|
|
1257
|
+
if kind not in SUPPORTED_KINDS:
|
|
1258
|
+
raise SDKUsageError(
|
|
1259
|
+
msg=f"Invalid kind: {kind}. Must be one of {SUPPORTED_KINDS}",
|
|
1260
|
+
)
|
|
1261
|
+
|
|
1262
|
+
field_spec = get_schema_field_spec(column_name)
|
|
1263
|
+
validate_field_type(field_spec, column_name, allowed_types, field_type_name)
|
|
1264
|
+
return models.FreshnessFieldSpecClass(
|
|
1265
|
+
path=field_spec.path,
|
|
1266
|
+
type=field_spec.type,
|
|
1267
|
+
nativeType=field_spec.nativeType,
|
|
1268
|
+
kind=kind,
|
|
1269
|
+
)
|
|
1270
|
+
|
|
1271
|
+
|
|
1272
|
+
class _SmartFreshnessAssertionInput(
|
|
1273
|
+
_AssertionInput, _HasSmartAssertionInputs, _HasFreshnessFeatures
|
|
1274
|
+
):
|
|
1034
1275
|
def __init__(
|
|
1035
1276
|
self,
|
|
1036
1277
|
*,
|
|
@@ -1055,7 +1296,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1055
1296
|
updated_by: Union[str, CorpUserUrn],
|
|
1056
1297
|
updated_at: datetime,
|
|
1057
1298
|
):
|
|
1058
|
-
|
|
1299
|
+
_AssertionInput.__init__(
|
|
1300
|
+
self,
|
|
1059
1301
|
dataset_urn=dataset_urn,
|
|
1060
1302
|
entity_client=entity_client,
|
|
1061
1303
|
urn=urn,
|
|
@@ -1063,11 +1305,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1063
1305
|
enabled=enabled,
|
|
1064
1306
|
schedule=schedule
|
|
1065
1307
|
if schedule is not None
|
|
1066
|
-
else
|
|
1308
|
+
else DEFAULT_HOURLY_SCHEDULE, # Use provided schedule or default for create case
|
|
1067
1309
|
detection_mechanism=detection_mechanism,
|
|
1068
|
-
sensitivity=sensitivity,
|
|
1069
|
-
exclusion_windows=exclusion_windows,
|
|
1070
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
1071
1310
|
incident_behavior=incident_behavior,
|
|
1072
1311
|
tags=tags,
|
|
1073
1312
|
source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
|
|
@@ -1076,6 +1315,16 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1076
1315
|
updated_by=updated_by,
|
|
1077
1316
|
updated_at=updated_at,
|
|
1078
1317
|
)
|
|
1318
|
+
_HasSmartAssertionInputs.__init__(
|
|
1319
|
+
self,
|
|
1320
|
+
sensitivity=sensitivity,
|
|
1321
|
+
exclusion_windows=exclusion_windows,
|
|
1322
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1323
|
+
)
|
|
1324
|
+
|
|
1325
|
+
def _assertion_type(self) -> str:
|
|
1326
|
+
"""Get the assertion type."""
|
|
1327
|
+
return models.AssertionTypeClass.FRESHNESS
|
|
1079
1328
|
|
|
1080
1329
|
def _create_assertion_info(
|
|
1081
1330
|
self, filter: Optional[models.DatasetFilterClass]
|
|
@@ -1099,7 +1348,7 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1099
1348
|
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
1100
1349
|
"""Create a schedule for a smart freshness assertion.
|
|
1101
1350
|
|
|
1102
|
-
For create case, uses
|
|
1351
|
+
For create case, uses DEFAULT_HOURLY_SCHEDULE. For update case, preserves existing schedule.
|
|
1103
1352
|
|
|
1104
1353
|
Returns:
|
|
1105
1354
|
A CronScheduleClass with appropriate schedule settings.
|
|
@@ -1155,6 +1404,8 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1155
1404
|
LAST_MODIFIED_ALLOWED_FIELD_TYPES,
|
|
1156
1405
|
"last modified column",
|
|
1157
1406
|
models.FreshnessFieldKindClass.LAST_MODIFIED,
|
|
1407
|
+
self._get_schema_field_spec,
|
|
1408
|
+
self._validate_field_type,
|
|
1158
1409
|
)
|
|
1159
1410
|
elif isinstance(self.detection_mechanism, _InformationSchema):
|
|
1160
1411
|
source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
|
|
@@ -1169,50 +1420,42 @@ class _SmartFreshnessAssertionInput(_AssertionInput):
|
|
|
1169
1420
|
|
|
1170
1421
|
return source_type, field
|
|
1171
1422
|
|
|
1172
|
-
def
|
|
1423
|
+
def _create_monitor_info(
|
|
1173
1424
|
self,
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1425
|
+
assertion_urn: AssertionUrn,
|
|
1426
|
+
status: models.MonitorStatusClass,
|
|
1427
|
+
schedule: models.CronScheduleClass,
|
|
1428
|
+
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1429
|
+
field: Optional[FieldSpecType],
|
|
1430
|
+
) -> models.MonitorInfoClass:
|
|
1179
1431
|
"""
|
|
1180
|
-
Create a
|
|
1181
|
-
|
|
1182
|
-
Args:
|
|
1183
|
-
column_name: The name of the column to create a spec for
|
|
1184
|
-
allowed_types: List of allowed field types
|
|
1185
|
-
field_type_name: Human-readable name of the field type for error messages
|
|
1186
|
-
kind: The kind of field to create
|
|
1187
|
-
|
|
1188
|
-
Returns:
|
|
1189
|
-
A FreshnessFieldSpecClass for the column
|
|
1190
|
-
|
|
1191
|
-
Raises:
|
|
1192
|
-
SDKUsageError: If the column is not found or has an invalid type
|
|
1432
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
1193
1433
|
"""
|
|
1194
|
-
|
|
1195
|
-
models.
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1434
|
+
return models.MonitorInfoClass(
|
|
1435
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
1436
|
+
status=status,
|
|
1437
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
1438
|
+
assertions=[
|
|
1439
|
+
models.AssertionEvaluationSpecClass(
|
|
1440
|
+
assertion=str(assertion_urn),
|
|
1441
|
+
schedule=schedule,
|
|
1442
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
1443
|
+
str(source_type), field
|
|
1444
|
+
),
|
|
1445
|
+
),
|
|
1446
|
+
],
|
|
1447
|
+
settings=models.AssertionMonitorSettingsClass(
|
|
1448
|
+
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
1449
|
+
sensitivity=self._convert_sensitivity(),
|
|
1450
|
+
exclusionWindows=self._convert_exclusion_windows(),
|
|
1451
|
+
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
1452
|
+
),
|
|
1453
|
+
),
|
|
1454
|
+
),
|
|
1212
1455
|
)
|
|
1213
1456
|
|
|
1214
1457
|
|
|
1215
|
-
class _SmartVolumeAssertionInput(_AssertionInput):
|
|
1458
|
+
class _SmartVolumeAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
|
|
1216
1459
|
def __init__(
|
|
1217
1460
|
self,
|
|
1218
1461
|
*,
|
|
@@ -1237,7 +1480,8 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1237
1480
|
updated_by: Union[str, CorpUserUrn],
|
|
1238
1481
|
updated_at: datetime,
|
|
1239
1482
|
):
|
|
1240
|
-
|
|
1483
|
+
_AssertionInput.__init__(
|
|
1484
|
+
self,
|
|
1241
1485
|
dataset_urn=dataset_urn,
|
|
1242
1486
|
entity_client=entity_client,
|
|
1243
1487
|
urn=urn,
|
|
@@ -1245,9 +1489,6 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1245
1489
|
enabled=enabled,
|
|
1246
1490
|
schedule=schedule,
|
|
1247
1491
|
detection_mechanism=detection_mechanism,
|
|
1248
|
-
sensitivity=sensitivity,
|
|
1249
|
-
exclusion_windows=exclusion_windows,
|
|
1250
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
1251
1492
|
incident_behavior=incident_behavior,
|
|
1252
1493
|
tags=tags,
|
|
1253
1494
|
source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
|
|
@@ -1256,6 +1497,12 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1256
1497
|
updated_by=updated_by,
|
|
1257
1498
|
updated_at=updated_at,
|
|
1258
1499
|
)
|
|
1500
|
+
_HasSmartAssertionInputs.__init__(
|
|
1501
|
+
self,
|
|
1502
|
+
sensitivity=sensitivity,
|
|
1503
|
+
exclusion_windows=exclusion_windows,
|
|
1504
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
1505
|
+
)
|
|
1259
1506
|
|
|
1260
1507
|
def _create_assertion_info(
|
|
1261
1508
|
self, filter: Optional[models.DatasetFilterClass]
|
|
@@ -1276,15 +1523,13 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1276
1523
|
)
|
|
1277
1524
|
|
|
1278
1525
|
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
1279
|
-
"""Create a schedule for a smart
|
|
1280
|
-
|
|
1281
|
-
Since the schedule is not used for smart freshness assertions, we return a default schedule.
|
|
1526
|
+
"""Create a schedule for a smart volume assertion.
|
|
1282
1527
|
|
|
1283
1528
|
Returns:
|
|
1284
1529
|
A CronScheduleClass with appropriate schedule settings.
|
|
1285
1530
|
"""
|
|
1286
1531
|
if self.schedule is None:
|
|
1287
|
-
return
|
|
1532
|
+
return DEFAULT_HOURLY_SCHEDULE
|
|
1288
1533
|
|
|
1289
1534
|
return models.CronScheduleClass(
|
|
1290
1535
|
cron=self.schedule.cron,
|
|
@@ -1333,3 +1578,41 @@ class _SmartVolumeAssertionInput(_AssertionInput):
|
|
|
1333
1578
|
)
|
|
1334
1579
|
|
|
1335
1580
|
return source_type, field
|
|
1581
|
+
|
|
1582
|
+
def _create_monitor_info(
|
|
1583
|
+
self,
|
|
1584
|
+
assertion_urn: AssertionUrn,
|
|
1585
|
+
status: models.MonitorStatusClass,
|
|
1586
|
+
schedule: models.CronScheduleClass,
|
|
1587
|
+
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
1588
|
+
field: Optional[FieldSpecType],
|
|
1589
|
+
) -> models.MonitorInfoClass:
|
|
1590
|
+
"""
|
|
1591
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
1592
|
+
"""
|
|
1593
|
+
return models.MonitorInfoClass(
|
|
1594
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
1595
|
+
status=status,
|
|
1596
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
1597
|
+
assertions=[
|
|
1598
|
+
models.AssertionEvaluationSpecClass(
|
|
1599
|
+
assertion=str(assertion_urn),
|
|
1600
|
+
schedule=schedule,
|
|
1601
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
1602
|
+
str(source_type), field
|
|
1603
|
+
),
|
|
1604
|
+
),
|
|
1605
|
+
],
|
|
1606
|
+
settings=models.AssertionMonitorSettingsClass(
|
|
1607
|
+
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
1608
|
+
sensitivity=self._convert_sensitivity(),
|
|
1609
|
+
exclusionWindows=self._convert_exclusion_windows(),
|
|
1610
|
+
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
1611
|
+
),
|
|
1612
|
+
),
|
|
1613
|
+
),
|
|
1614
|
+
)
|
|
1615
|
+
|
|
1616
|
+
def _assertion_type(self) -> str:
|
|
1617
|
+
"""Get the assertion type."""
|
|
1618
|
+
return models.AssertionTypeClass.VOLUME
|