acryl-datahub-cloud 0.3.12.1rc2__py3-none-any.whl → 0.3.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -0,0 +1,191 @@
1
+ """
2
+ Shared constants for column metric assertions (both smart and non-smart).
3
+
4
+ This module contains constants that are used by both smart and non-smart column metric assertions
5
+ to ensure consistency and avoid duplication.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import Union
10
+
11
+ from datahub.metadata import schema_classes as models
12
+
13
+ # Keep this in sync with the frontend in getEligibleFieldColumns
14
+ # datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
15
+ ALLOWED_COLUMN_TYPES_FOR_COLUMN_METRIC_ASSERTION = [
16
+ models.StringTypeClass(),
17
+ models.NumberTypeClass(),
18
+ models.BooleanTypeClass(),
19
+ models.DateTypeClass(),
20
+ models.TimeTypeClass(),
21
+ models.NullTypeClass(),
22
+ ]
23
+
24
+ # Keep this in sync with FIELD_VALUES_OPERATOR_CONFIG in the frontend
25
+ # datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
26
+ FIELD_VALUES_OPERATOR_CONFIG = {
27
+ "STRING": [
28
+ models.AssertionStdOperatorClass.NULL,
29
+ models.AssertionStdOperatorClass.NOT_NULL,
30
+ models.AssertionStdOperatorClass.EQUAL_TO,
31
+ models.AssertionStdOperatorClass.IN,
32
+ models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
33
+ models.AssertionStdOperatorClass.REGEX_MATCH,
34
+ models.AssertionStdOperatorClass.GREATER_THAN,
35
+ models.AssertionStdOperatorClass.LESS_THAN,
36
+ models.AssertionStdOperatorClass.BETWEEN,
37
+ ],
38
+ "NUMBER": [
39
+ models.AssertionStdOperatorClass.GREATER_THAN,
40
+ models.AssertionStdOperatorClass.LESS_THAN,
41
+ models.AssertionStdOperatorClass.BETWEEN,
42
+ models.AssertionStdOperatorClass.NULL,
43
+ models.AssertionStdOperatorClass.NOT_NULL,
44
+ models.AssertionStdOperatorClass.EQUAL_TO,
45
+ models.AssertionStdOperatorClass.IN,
46
+ models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
47
+ models.AssertionStdOperatorClass.NOT_EQUAL_TO,
48
+ ],
49
+ "BOOLEAN": [
50
+ models.AssertionStdOperatorClass.IS_TRUE,
51
+ models.AssertionStdOperatorClass.IS_FALSE,
52
+ models.AssertionStdOperatorClass.NULL,
53
+ models.AssertionStdOperatorClass.NOT_NULL,
54
+ ],
55
+ "DATE": [
56
+ models.AssertionStdOperatorClass.NULL,
57
+ models.AssertionStdOperatorClass.NOT_NULL,
58
+ ],
59
+ "TIME": [
60
+ models.AssertionStdOperatorClass.NULL,
61
+ models.AssertionStdOperatorClass.NOT_NULL,
62
+ ],
63
+ "NULL": [
64
+ models.AssertionStdOperatorClass.NULL,
65
+ models.AssertionStdOperatorClass.NOT_NULL,
66
+ ],
67
+ }
68
+
69
+ # Keep this in sync with FIELD_METRIC_TYPE_CONFIG in the frontend
70
+ # datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
71
+ FIELD_METRIC_TYPE_CONFIG = {
72
+ "STRING": [
73
+ models.FieldMetricTypeClass.NULL_COUNT,
74
+ models.FieldMetricTypeClass.NULL_PERCENTAGE,
75
+ models.FieldMetricTypeClass.UNIQUE_COUNT,
76
+ models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
77
+ models.FieldMetricTypeClass.MAX_LENGTH,
78
+ models.FieldMetricTypeClass.MIN_LENGTH,
79
+ models.FieldMetricTypeClass.EMPTY_COUNT,
80
+ models.FieldMetricTypeClass.EMPTY_PERCENTAGE,
81
+ ],
82
+ "NUMBER": [
83
+ models.FieldMetricTypeClass.NULL_COUNT,
84
+ models.FieldMetricTypeClass.NULL_PERCENTAGE,
85
+ models.FieldMetricTypeClass.UNIQUE_COUNT,
86
+ models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
87
+ models.FieldMetricTypeClass.MAX,
88
+ models.FieldMetricTypeClass.MIN,
89
+ models.FieldMetricTypeClass.MEAN,
90
+ models.FieldMetricTypeClass.MEDIAN,
91
+ models.FieldMetricTypeClass.STDDEV,
92
+ models.FieldMetricTypeClass.NEGATIVE_COUNT,
93
+ models.FieldMetricTypeClass.NEGATIVE_PERCENTAGE,
94
+ models.FieldMetricTypeClass.ZERO_COUNT,
95
+ models.FieldMetricTypeClass.ZERO_PERCENTAGE,
96
+ ],
97
+ "BOOLEAN": [
98
+ models.FieldMetricTypeClass.NULL_COUNT,
99
+ models.FieldMetricTypeClass.NULL_PERCENTAGE,
100
+ models.FieldMetricTypeClass.UNIQUE_COUNT,
101
+ models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
102
+ ],
103
+ "DATE": [
104
+ models.FieldMetricTypeClass.NULL_COUNT,
105
+ models.FieldMetricTypeClass.NULL_PERCENTAGE,
106
+ models.FieldMetricTypeClass.UNIQUE_COUNT,
107
+ models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
108
+ ],
109
+ "TIME": [
110
+ models.FieldMetricTypeClass.NULL_COUNT,
111
+ models.FieldMetricTypeClass.NULL_PERCENTAGE,
112
+ models.FieldMetricTypeClass.UNIQUE_COUNT,
113
+ models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
114
+ ],
115
+ "NULL": [
116
+ models.FieldMetricTypeClass.NULL_COUNT,
117
+ models.FieldMetricTypeClass.NULL_PERCENTAGE,
118
+ models.FieldMetricTypeClass.UNIQUE_COUNT,
119
+ models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
120
+ ],
121
+ }
122
+
123
+
124
+ class MetricType(str, Enum):
125
+ """Enum for field metric types used in column metric assertions."""
126
+
127
+ NULL_COUNT = models.FieldMetricTypeClass.NULL_COUNT
128
+ NULL_PERCENTAGE = models.FieldMetricTypeClass.NULL_PERCENTAGE
129
+ UNIQUE_COUNT = models.FieldMetricTypeClass.UNIQUE_COUNT
130
+ UNIQUE_PERCENTAGE = models.FieldMetricTypeClass.UNIQUE_PERCENTAGE
131
+ MAX_LENGTH = models.FieldMetricTypeClass.MAX_LENGTH
132
+ MIN_LENGTH = models.FieldMetricTypeClass.MIN_LENGTH
133
+ EMPTY_COUNT = models.FieldMetricTypeClass.EMPTY_COUNT
134
+ EMPTY_PERCENTAGE = models.FieldMetricTypeClass.EMPTY_PERCENTAGE
135
+ MIN = models.FieldMetricTypeClass.MIN
136
+ MAX = models.FieldMetricTypeClass.MAX
137
+ MEAN = models.FieldMetricTypeClass.MEAN
138
+ MEDIAN = models.FieldMetricTypeClass.MEDIAN
139
+ STDDEV = models.FieldMetricTypeClass.STDDEV
140
+ NEGATIVE_COUNT = models.FieldMetricTypeClass.NEGATIVE_COUNT
141
+ NEGATIVE_PERCENTAGE = models.FieldMetricTypeClass.NEGATIVE_PERCENTAGE
142
+ ZERO_COUNT = models.FieldMetricTypeClass.ZERO_COUNT
143
+ ZERO_PERCENTAGE = models.FieldMetricTypeClass.ZERO_PERCENTAGE
144
+
145
+
146
+ class OperatorType(str, Enum):
147
+ """Enum for assertion operators used in column metric assertions."""
148
+
149
+ EQUAL_TO = models.AssertionStdOperatorClass.EQUAL_TO
150
+ NOT_EQUAL_TO = models.AssertionStdOperatorClass.NOT_EQUAL_TO
151
+ GREATER_THAN = models.AssertionStdOperatorClass.GREATER_THAN
152
+ GREATER_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
153
+ LESS_THAN = models.AssertionStdOperatorClass.LESS_THAN
154
+ LESS_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
155
+ BETWEEN = models.AssertionStdOperatorClass.BETWEEN
156
+ IN = models.AssertionStdOperatorClass.IN
157
+ NOT_IN = models.AssertionStdOperatorClass.NOT_IN
158
+ NULL = models.AssertionStdOperatorClass.NULL
159
+ NOT_NULL = models.AssertionStdOperatorClass.NOT_NULL
160
+ IS_TRUE = models.AssertionStdOperatorClass.IS_TRUE
161
+ IS_FALSE = models.AssertionStdOperatorClass.IS_FALSE
162
+ CONTAIN = models.AssertionStdOperatorClass.CONTAIN
163
+ END_WITH = models.AssertionStdOperatorClass.END_WITH
164
+ START_WITH = models.AssertionStdOperatorClass.START_WITH
165
+ REGEX_MATCH = models.AssertionStdOperatorClass.REGEX_MATCH
166
+
167
+
168
+ class ValueType(str, Enum):
169
+ """Enum for assertion parameter value types."""
170
+
171
+ STRING = models.AssertionStdParameterTypeClass.STRING
172
+ NUMBER = models.AssertionStdParameterTypeClass.NUMBER
173
+ UNKNOWN = models.AssertionStdParameterTypeClass.UNKNOWN
174
+ # Note: LIST and SET are intentionally excluded as they are not yet supported
175
+ # LIST = models.AssertionStdParameterTypeClass.LIST
176
+ # SET = models.AssertionStdParameterTypeClass.SET
177
+
178
+
179
+ # Type aliases
180
+ MetricInputType = Union[MetricType, models.FieldMetricTypeClass, str]
181
+ ValueInputType = Union[str, int, float]
182
+ ValueTypeInputType = Union[ValueType, models.AssertionStdParameterTypeClass, str]
183
+ RangeInputType = tuple[ValueInputType, ValueInputType]
184
+ RangeTypeInputType = Union[
185
+ str,
186
+ tuple[str, str],
187
+ ValueTypeInputType,
188
+ tuple[ValueTypeInputType, ValueTypeInputType],
189
+ ]
190
+ RangeTypeParsedType = tuple[ValueTypeInputType, ValueTypeInputType]
191
+ OperatorInputType = Union[OperatorType, models.AssertionStdOperatorClass, str]
@@ -1,4 +1,5 @@
1
1
  from datetime import datetime
2
+ from enum import Enum
2
3
  from typing import Optional, Union
3
4
 
4
5
  from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
@@ -32,6 +33,48 @@ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
32
33
  from datahub.sdk.entity_client import EntityClient
33
34
 
34
35
 
36
+ class FreshnessAssertionScheduleCheckType(str, Enum):
37
+ FIXED_INTERVAL = "FIXED_INTERVAL"
38
+ SINCE_THE_LAST_CHECK = "SINCE_THE_LAST_CHECK"
39
+
40
+
41
+ DEFAULT_FRESHNESS_SCHEDULE_CHECK_TYPE = (
42
+ FreshnessAssertionScheduleCheckType.SINCE_THE_LAST_CHECK
43
+ )
44
+
45
+
46
+ def _parse_freshness_schedule_check_type(
47
+ schedule_check_type: Optional[
48
+ Union[
49
+ str,
50
+ FreshnessAssertionScheduleCheckType,
51
+ models.FreshnessAssertionScheduleTypeClass,
52
+ ]
53
+ ],
54
+ ) -> FreshnessAssertionScheduleCheckType:
55
+ """Parse the freshness schedule check type."""
56
+ if isinstance(schedule_check_type, FreshnessAssertionScheduleCheckType):
57
+ return schedule_check_type
58
+ if isinstance(schedule_check_type, models.FreshnessAssertionScheduleTypeClass):
59
+ return FreshnessAssertionScheduleCheckType(
60
+ _try_parse_and_validate_schema_classes_enum(
61
+ schedule_check_type, models.FreshnessAssertionScheduleTypeClass
62
+ )
63
+ )
64
+ if not schedule_check_type:
65
+ return DEFAULT_FRESHNESS_SCHEDULE_CHECK_TYPE
66
+
67
+ # Make string comparison case-insensitive
68
+ if isinstance(schedule_check_type, str):
69
+ schedule_check_type_upper = schedule_check_type.upper()
70
+ for member in FreshnessAssertionScheduleCheckType:
71
+ if member.value.upper() == schedule_check_type_upper:
72
+ return member
73
+ # If no match found, fall back to original behavior for error
74
+
75
+ return FreshnessAssertionScheduleCheckType(schedule_check_type)
76
+
77
+
35
78
  class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
36
79
  def _assertion_type(self) -> str:
37
80
  """Get the assertion type."""
@@ -56,7 +99,11 @@ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
56
99
  updated_by: Union[str, CorpUserUrn],
57
100
  updated_at: datetime,
58
101
  freshness_schedule_check_type: Optional[
59
- Union[str, models.FreshnessAssertionScheduleTypeClass]
102
+ Union[
103
+ str,
104
+ FreshnessAssertionScheduleCheckType,
105
+ models.FreshnessAssertionScheduleTypeClass,
106
+ ]
60
107
  ] = None,
61
108
  lookback_window: Optional[TimeWindowSizeInputTypes] = None,
62
109
  ):
@@ -78,28 +125,30 @@ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
78
125
  updated_at=updated_at,
79
126
  )
80
127
 
81
- self.freshness_schedule_check_type = (
82
- _try_parse_and_validate_schema_classes_enum(
83
- freshness_schedule_check_type
84
- or models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK,
85
- models.FreshnessAssertionScheduleTypeClass,
86
- )
128
+ self.freshness_schedule_check_type = _parse_freshness_schedule_check_type(
129
+ freshness_schedule_check_type
87
130
  )
88
131
  self.lookback_window = (
89
132
  _try_parse_time_window_size(lookback_window) if lookback_window else None
90
133
  )
134
+ self._validate_schedule_check_type()
135
+
136
+ def _validate_schedule_check_type(self) -> None:
137
+ """Validate the schedule check type."""
138
+ if self.freshness_schedule_check_type is None:
139
+ raise SDKUsageError("Freshness schedule check type is required.")
91
140
  if (
92
141
  self.freshness_schedule_check_type
93
- is models.FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL
94
- and lookback_window is None
142
+ == FreshnessAssertionScheduleCheckType.FIXED_INTERVAL
143
+ and self.lookback_window is None
95
144
  ):
96
145
  raise SDKUsageError(
97
146
  "Fixed interval freshness assertions must have a lookback_window provided."
98
147
  )
99
148
  if (
100
149
  self.freshness_schedule_check_type
101
- is models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK
102
- and lookback_window is not None
150
+ == FreshnessAssertionScheduleCheckType.SINCE_THE_LAST_CHECK
151
+ and self.lookback_window is not None
103
152
  ):
104
153
  raise SDKUsageError(
105
154
  "Since the last check freshness assertions cannot have a lookback_window provided."