acryl-datahub-cloud 0.3.12.1rc3__py3-none-any.whl → 0.3.12.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/sdk/__init__.py +20 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +146 -97
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +10 -22
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +99 -19
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +965 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +191 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +60 -11
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +438 -347
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +105 -61
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +381 -392
- acryl_datahub_cloud/sdk/assertions_client.py +993 -314
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/METADATA +47 -47
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/RECORD +18 -15
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared constants for column metric assertions (both smart and non-smart).
|
|
3
|
+
|
|
4
|
+
This module contains constants that are used by both smart and non-smart column metric assertions
|
|
5
|
+
to ensure consistency and avoid duplication.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
11
|
+
from datahub.metadata import schema_classes as models
|
|
12
|
+
|
|
13
|
+
# Keep this in sync with the frontend in getEligibleFieldColumns
|
|
14
|
+
# datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
|
|
15
|
+
ALLOWED_COLUMN_TYPES_FOR_COLUMN_METRIC_ASSERTION = [
|
|
16
|
+
models.StringTypeClass(),
|
|
17
|
+
models.NumberTypeClass(),
|
|
18
|
+
models.BooleanTypeClass(),
|
|
19
|
+
models.DateTypeClass(),
|
|
20
|
+
models.TimeTypeClass(),
|
|
21
|
+
models.NullTypeClass(),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
# Keep this in sync with FIELD_VALUES_OPERATOR_CONFIG in the frontend
|
|
25
|
+
# datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
|
|
26
|
+
FIELD_VALUES_OPERATOR_CONFIG = {
|
|
27
|
+
"STRING": [
|
|
28
|
+
models.AssertionStdOperatorClass.NULL,
|
|
29
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
30
|
+
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
31
|
+
models.AssertionStdOperatorClass.IN,
|
|
32
|
+
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
33
|
+
models.AssertionStdOperatorClass.REGEX_MATCH,
|
|
34
|
+
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
35
|
+
models.AssertionStdOperatorClass.LESS_THAN,
|
|
36
|
+
models.AssertionStdOperatorClass.BETWEEN,
|
|
37
|
+
],
|
|
38
|
+
"NUMBER": [
|
|
39
|
+
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
40
|
+
models.AssertionStdOperatorClass.LESS_THAN,
|
|
41
|
+
models.AssertionStdOperatorClass.BETWEEN,
|
|
42
|
+
models.AssertionStdOperatorClass.NULL,
|
|
43
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
44
|
+
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
45
|
+
models.AssertionStdOperatorClass.IN,
|
|
46
|
+
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
47
|
+
models.AssertionStdOperatorClass.NOT_EQUAL_TO,
|
|
48
|
+
],
|
|
49
|
+
"BOOLEAN": [
|
|
50
|
+
models.AssertionStdOperatorClass.IS_TRUE,
|
|
51
|
+
models.AssertionStdOperatorClass.IS_FALSE,
|
|
52
|
+
models.AssertionStdOperatorClass.NULL,
|
|
53
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
54
|
+
],
|
|
55
|
+
"DATE": [
|
|
56
|
+
models.AssertionStdOperatorClass.NULL,
|
|
57
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
58
|
+
],
|
|
59
|
+
"TIME": [
|
|
60
|
+
models.AssertionStdOperatorClass.NULL,
|
|
61
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
62
|
+
],
|
|
63
|
+
"NULL": [
|
|
64
|
+
models.AssertionStdOperatorClass.NULL,
|
|
65
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
66
|
+
],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# Keep this in sync with FIELD_METRIC_TYPE_CONFIG in the frontend
|
|
70
|
+
# datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
|
|
71
|
+
FIELD_METRIC_TYPE_CONFIG = {
|
|
72
|
+
"STRING": [
|
|
73
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
74
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
75
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
76
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
77
|
+
models.FieldMetricTypeClass.MAX_LENGTH,
|
|
78
|
+
models.FieldMetricTypeClass.MIN_LENGTH,
|
|
79
|
+
models.FieldMetricTypeClass.EMPTY_COUNT,
|
|
80
|
+
models.FieldMetricTypeClass.EMPTY_PERCENTAGE,
|
|
81
|
+
],
|
|
82
|
+
"NUMBER": [
|
|
83
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
84
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
85
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
86
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
87
|
+
models.FieldMetricTypeClass.MAX,
|
|
88
|
+
models.FieldMetricTypeClass.MIN,
|
|
89
|
+
models.FieldMetricTypeClass.MEAN,
|
|
90
|
+
models.FieldMetricTypeClass.MEDIAN,
|
|
91
|
+
models.FieldMetricTypeClass.STDDEV,
|
|
92
|
+
models.FieldMetricTypeClass.NEGATIVE_COUNT,
|
|
93
|
+
models.FieldMetricTypeClass.NEGATIVE_PERCENTAGE,
|
|
94
|
+
models.FieldMetricTypeClass.ZERO_COUNT,
|
|
95
|
+
models.FieldMetricTypeClass.ZERO_PERCENTAGE,
|
|
96
|
+
],
|
|
97
|
+
"BOOLEAN": [
|
|
98
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
99
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
100
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
101
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
102
|
+
],
|
|
103
|
+
"DATE": [
|
|
104
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
105
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
106
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
107
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
108
|
+
],
|
|
109
|
+
"TIME": [
|
|
110
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
111
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
112
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
113
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
114
|
+
],
|
|
115
|
+
"NULL": [
|
|
116
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
117
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
118
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
119
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
120
|
+
],
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class MetricType(str, Enum):
|
|
125
|
+
"""Enum for field metric types used in column metric assertions."""
|
|
126
|
+
|
|
127
|
+
NULL_COUNT = models.FieldMetricTypeClass.NULL_COUNT
|
|
128
|
+
NULL_PERCENTAGE = models.FieldMetricTypeClass.NULL_PERCENTAGE
|
|
129
|
+
UNIQUE_COUNT = models.FieldMetricTypeClass.UNIQUE_COUNT
|
|
130
|
+
UNIQUE_PERCENTAGE = models.FieldMetricTypeClass.UNIQUE_PERCENTAGE
|
|
131
|
+
MAX_LENGTH = models.FieldMetricTypeClass.MAX_LENGTH
|
|
132
|
+
MIN_LENGTH = models.FieldMetricTypeClass.MIN_LENGTH
|
|
133
|
+
EMPTY_COUNT = models.FieldMetricTypeClass.EMPTY_COUNT
|
|
134
|
+
EMPTY_PERCENTAGE = models.FieldMetricTypeClass.EMPTY_PERCENTAGE
|
|
135
|
+
MIN = models.FieldMetricTypeClass.MIN
|
|
136
|
+
MAX = models.FieldMetricTypeClass.MAX
|
|
137
|
+
MEAN = models.FieldMetricTypeClass.MEAN
|
|
138
|
+
MEDIAN = models.FieldMetricTypeClass.MEDIAN
|
|
139
|
+
STDDEV = models.FieldMetricTypeClass.STDDEV
|
|
140
|
+
NEGATIVE_COUNT = models.FieldMetricTypeClass.NEGATIVE_COUNT
|
|
141
|
+
NEGATIVE_PERCENTAGE = models.FieldMetricTypeClass.NEGATIVE_PERCENTAGE
|
|
142
|
+
ZERO_COUNT = models.FieldMetricTypeClass.ZERO_COUNT
|
|
143
|
+
ZERO_PERCENTAGE = models.FieldMetricTypeClass.ZERO_PERCENTAGE
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class OperatorType(str, Enum):
|
|
147
|
+
"""Enum for assertion operators used in column metric assertions."""
|
|
148
|
+
|
|
149
|
+
EQUAL_TO = models.AssertionStdOperatorClass.EQUAL_TO
|
|
150
|
+
NOT_EQUAL_TO = models.AssertionStdOperatorClass.NOT_EQUAL_TO
|
|
151
|
+
GREATER_THAN = models.AssertionStdOperatorClass.GREATER_THAN
|
|
152
|
+
GREATER_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
|
|
153
|
+
LESS_THAN = models.AssertionStdOperatorClass.LESS_THAN
|
|
154
|
+
LESS_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
|
|
155
|
+
BETWEEN = models.AssertionStdOperatorClass.BETWEEN
|
|
156
|
+
IN = models.AssertionStdOperatorClass.IN
|
|
157
|
+
NOT_IN = models.AssertionStdOperatorClass.NOT_IN
|
|
158
|
+
NULL = models.AssertionStdOperatorClass.NULL
|
|
159
|
+
NOT_NULL = models.AssertionStdOperatorClass.NOT_NULL
|
|
160
|
+
IS_TRUE = models.AssertionStdOperatorClass.IS_TRUE
|
|
161
|
+
IS_FALSE = models.AssertionStdOperatorClass.IS_FALSE
|
|
162
|
+
CONTAIN = models.AssertionStdOperatorClass.CONTAIN
|
|
163
|
+
END_WITH = models.AssertionStdOperatorClass.END_WITH
|
|
164
|
+
START_WITH = models.AssertionStdOperatorClass.START_WITH
|
|
165
|
+
REGEX_MATCH = models.AssertionStdOperatorClass.REGEX_MATCH
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ValueType(str, Enum):
|
|
169
|
+
"""Enum for assertion parameter value types."""
|
|
170
|
+
|
|
171
|
+
STRING = models.AssertionStdParameterTypeClass.STRING
|
|
172
|
+
NUMBER = models.AssertionStdParameterTypeClass.NUMBER
|
|
173
|
+
UNKNOWN = models.AssertionStdParameterTypeClass.UNKNOWN
|
|
174
|
+
# Note: LIST and SET are intentionally excluded as they are not yet supported
|
|
175
|
+
# LIST = models.AssertionStdParameterTypeClass.LIST
|
|
176
|
+
# SET = models.AssertionStdParameterTypeClass.SET
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# Type aliases
|
|
180
|
+
MetricInputType = Union[MetricType, models.FieldMetricTypeClass, str]
|
|
181
|
+
ValueInputType = Union[str, int, float]
|
|
182
|
+
ValueTypeInputType = Union[ValueType, models.AssertionStdParameterTypeClass, str]
|
|
183
|
+
RangeInputType = tuple[ValueInputType, ValueInputType]
|
|
184
|
+
RangeTypeInputType = Union[
|
|
185
|
+
str,
|
|
186
|
+
tuple[str, str],
|
|
187
|
+
ValueTypeInputType,
|
|
188
|
+
tuple[ValueTypeInputType, ValueTypeInputType],
|
|
189
|
+
]
|
|
190
|
+
RangeTypeParsedType = tuple[ValueTypeInputType, ValueTypeInputType]
|
|
191
|
+
OperatorInputType = Union[OperatorType, models.AssertionStdOperatorClass, str]
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
+
from enum import Enum
|
|
2
3
|
from typing import Optional, Union
|
|
3
4
|
|
|
4
5
|
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
@@ -32,6 +33,48 @@ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
|
|
|
32
33
|
from datahub.sdk.entity_client import EntityClient
|
|
33
34
|
|
|
34
35
|
|
|
36
|
+
class FreshnessAssertionScheduleCheckType(str, Enum):
|
|
37
|
+
FIXED_INTERVAL = "FIXED_INTERVAL"
|
|
38
|
+
SINCE_THE_LAST_CHECK = "SINCE_THE_LAST_CHECK"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
DEFAULT_FRESHNESS_SCHEDULE_CHECK_TYPE = (
|
|
42
|
+
FreshnessAssertionScheduleCheckType.SINCE_THE_LAST_CHECK
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse_freshness_schedule_check_type(
|
|
47
|
+
schedule_check_type: Optional[
|
|
48
|
+
Union[
|
|
49
|
+
str,
|
|
50
|
+
FreshnessAssertionScheduleCheckType,
|
|
51
|
+
models.FreshnessAssertionScheduleTypeClass,
|
|
52
|
+
]
|
|
53
|
+
],
|
|
54
|
+
) -> FreshnessAssertionScheduleCheckType:
|
|
55
|
+
"""Parse the freshness schedule check type."""
|
|
56
|
+
if isinstance(schedule_check_type, FreshnessAssertionScheduleCheckType):
|
|
57
|
+
return schedule_check_type
|
|
58
|
+
if isinstance(schedule_check_type, models.FreshnessAssertionScheduleTypeClass):
|
|
59
|
+
return FreshnessAssertionScheduleCheckType(
|
|
60
|
+
_try_parse_and_validate_schema_classes_enum(
|
|
61
|
+
schedule_check_type, models.FreshnessAssertionScheduleTypeClass
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
if not schedule_check_type:
|
|
65
|
+
return DEFAULT_FRESHNESS_SCHEDULE_CHECK_TYPE
|
|
66
|
+
|
|
67
|
+
# Make string comparison case-insensitive
|
|
68
|
+
if isinstance(schedule_check_type, str):
|
|
69
|
+
schedule_check_type_upper = schedule_check_type.upper()
|
|
70
|
+
for member in FreshnessAssertionScheduleCheckType:
|
|
71
|
+
if member.value.upper() == schedule_check_type_upper:
|
|
72
|
+
return member
|
|
73
|
+
# If no match found, fall back to original behavior for error
|
|
74
|
+
|
|
75
|
+
return FreshnessAssertionScheduleCheckType(schedule_check_type)
|
|
76
|
+
|
|
77
|
+
|
|
35
78
|
class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
|
|
36
79
|
def _assertion_type(self) -> str:
|
|
37
80
|
"""Get the assertion type."""
|
|
@@ -56,7 +99,11 @@ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
|
|
|
56
99
|
updated_by: Union[str, CorpUserUrn],
|
|
57
100
|
updated_at: datetime,
|
|
58
101
|
freshness_schedule_check_type: Optional[
|
|
59
|
-
Union[
|
|
102
|
+
Union[
|
|
103
|
+
str,
|
|
104
|
+
FreshnessAssertionScheduleCheckType,
|
|
105
|
+
models.FreshnessAssertionScheduleTypeClass,
|
|
106
|
+
]
|
|
60
107
|
] = None,
|
|
61
108
|
lookback_window: Optional[TimeWindowSizeInputTypes] = None,
|
|
62
109
|
):
|
|
@@ -78,28 +125,30 @@ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
|
|
|
78
125
|
updated_at=updated_at,
|
|
79
126
|
)
|
|
80
127
|
|
|
81
|
-
self.freshness_schedule_check_type = (
|
|
82
|
-
|
|
83
|
-
freshness_schedule_check_type
|
|
84
|
-
or models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK,
|
|
85
|
-
models.FreshnessAssertionScheduleTypeClass,
|
|
86
|
-
)
|
|
128
|
+
self.freshness_schedule_check_type = _parse_freshness_schedule_check_type(
|
|
129
|
+
freshness_schedule_check_type
|
|
87
130
|
)
|
|
88
131
|
self.lookback_window = (
|
|
89
132
|
_try_parse_time_window_size(lookback_window) if lookback_window else None
|
|
90
133
|
)
|
|
134
|
+
self._validate_schedule_check_type()
|
|
135
|
+
|
|
136
|
+
def _validate_schedule_check_type(self) -> None:
|
|
137
|
+
"""Validate the schedule check type."""
|
|
138
|
+
if self.freshness_schedule_check_type is None:
|
|
139
|
+
raise SDKUsageError("Freshness schedule check type is required.")
|
|
91
140
|
if (
|
|
92
141
|
self.freshness_schedule_check_type
|
|
93
|
-
|
|
94
|
-
and lookback_window is None
|
|
142
|
+
== FreshnessAssertionScheduleCheckType.FIXED_INTERVAL
|
|
143
|
+
and self.lookback_window is None
|
|
95
144
|
):
|
|
96
145
|
raise SDKUsageError(
|
|
97
146
|
"Fixed interval freshness assertions must have a lookback_window provided."
|
|
98
147
|
)
|
|
99
148
|
if (
|
|
100
149
|
self.freshness_schedule_check_type
|
|
101
|
-
|
|
102
|
-
and lookback_window is not None
|
|
150
|
+
== FreshnessAssertionScheduleCheckType.SINCE_THE_LAST_CHECK
|
|
151
|
+
and self.lookback_window is not None
|
|
103
152
|
):
|
|
104
153
|
raise SDKUsageError(
|
|
105
154
|
"Since the last check freshness assertions cannot have a lookback_window provided."
|