acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
- acryl_datahub_cloud/metadata/schema.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
- acryl_datahub_cloud/sdk/__init__.py +10 -2
- acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +614 -231
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
- acryl_datahub_cloud/sdk/assertion/types.py +18 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +437 -147
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +943 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1281 -70
- acryl_datahub_cloud/sdk/entities/assertion.py +8 -1
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +41 -41
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +20 -14
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,943 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
6
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
|
|
7
|
+
HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
|
|
8
|
+
AssertionIncidentBehavior,
|
|
9
|
+
AssertionInfoInputType,
|
|
10
|
+
DetectionMechanismInputTypes,
|
|
11
|
+
ExclusionWindowInputTypes,
|
|
12
|
+
FieldSpecType,
|
|
13
|
+
InferenceSensitivity,
|
|
14
|
+
_AllRowsQuery,
|
|
15
|
+
_AllRowsQueryDataHubDatasetProfile,
|
|
16
|
+
_AssertionInput,
|
|
17
|
+
_ChangedRowsQuery,
|
|
18
|
+
_DatasetProfile,
|
|
19
|
+
_HasSmartAssertionInputs,
|
|
20
|
+
_try_parse_and_validate_schema_classes_enum,
|
|
21
|
+
)
|
|
22
|
+
from acryl_datahub_cloud.sdk.entities.assertion import TagsInputType
|
|
23
|
+
from acryl_datahub_cloud.sdk.errors import (
|
|
24
|
+
SDKNotYetSupportedError,
|
|
25
|
+
SDKUsageError,
|
|
26
|
+
)
|
|
27
|
+
from datahub.emitter.enum_helpers import get_enum_options
|
|
28
|
+
from datahub.metadata import schema_classes as models
|
|
29
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
|
|
30
|
+
from datahub.sdk.entity_client import EntityClient
|
|
31
|
+
|
|
32
|
+
# Keep this in sync with the frontend in getEligibleFieldColumns
|
|
33
|
+
# datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
|
|
34
|
+
ALLOWED_COLUMN_TYPES_FOR_SMART_COLUMN_METRIC_ASSERTION = [
|
|
35
|
+
models.StringTypeClass(),
|
|
36
|
+
models.NumberTypeClass(),
|
|
37
|
+
models.BooleanTypeClass(),
|
|
38
|
+
models.DateTypeClass(),
|
|
39
|
+
models.TimeTypeClass(),
|
|
40
|
+
models.NullTypeClass(),
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
# Keep this in sync with FIELD_VALUES_OPERATOR_CONFIG in the frontend
|
|
44
|
+
# datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
|
|
45
|
+
FIELD_VALUES_OPERATOR_CONFIG = {
|
|
46
|
+
"StringTypeClass": [
|
|
47
|
+
models.AssertionStdOperatorClass.NULL,
|
|
48
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
49
|
+
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
50
|
+
models.AssertionStdOperatorClass.IN,
|
|
51
|
+
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
52
|
+
models.AssertionStdOperatorClass.REGEX_MATCH,
|
|
53
|
+
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
54
|
+
models.AssertionStdOperatorClass.LESS_THAN,
|
|
55
|
+
models.AssertionStdOperatorClass.BETWEEN,
|
|
56
|
+
],
|
|
57
|
+
"NumberTypeClass": [
|
|
58
|
+
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
59
|
+
models.AssertionStdOperatorClass.LESS_THAN,
|
|
60
|
+
models.AssertionStdOperatorClass.BETWEEN,
|
|
61
|
+
models.AssertionStdOperatorClass.NULL,
|
|
62
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
63
|
+
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
64
|
+
models.AssertionStdOperatorClass.IN,
|
|
65
|
+
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
66
|
+
models.AssertionStdOperatorClass.NOT_EQUAL_TO,
|
|
67
|
+
],
|
|
68
|
+
"BooleanTypeClass": [
|
|
69
|
+
models.AssertionStdOperatorClass.IS_TRUE,
|
|
70
|
+
models.AssertionStdOperatorClass.IS_FALSE,
|
|
71
|
+
models.AssertionStdOperatorClass.NULL,
|
|
72
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
73
|
+
],
|
|
74
|
+
"DateTypeClass": [
|
|
75
|
+
models.AssertionStdOperatorClass.NULL,
|
|
76
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
77
|
+
],
|
|
78
|
+
"TimeTypeClass": [
|
|
79
|
+
models.AssertionStdOperatorClass.NULL,
|
|
80
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
81
|
+
],
|
|
82
|
+
"NullTypeClass": [
|
|
83
|
+
models.AssertionStdOperatorClass.NULL,
|
|
84
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
85
|
+
],
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
# Operators that require a single value parameter
|
|
89
|
+
SINGLE_VALUE_OPERATORS = [
|
|
90
|
+
models.AssertionStdOperatorClass.EQUAL_TO,
|
|
91
|
+
models.AssertionStdOperatorClass.NOT_EQUAL_TO,
|
|
92
|
+
models.AssertionStdOperatorClass.GREATER_THAN,
|
|
93
|
+
models.AssertionStdOperatorClass.LESS_THAN,
|
|
94
|
+
models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
|
|
95
|
+
models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO,
|
|
96
|
+
models.AssertionStdOperatorClass.CONTAIN,
|
|
97
|
+
models.AssertionStdOperatorClass.END_WITH,
|
|
98
|
+
models.AssertionStdOperatorClass.START_WITH,
|
|
99
|
+
models.AssertionStdOperatorClass.REGEX_MATCH,
|
|
100
|
+
models.AssertionStdOperatorClass.IN,
|
|
101
|
+
models.AssertionStdOperatorClass.NOT_IN,
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
# Operators that require a range parameter
|
|
105
|
+
RANGE_OPERATORS = [
|
|
106
|
+
models.AssertionStdOperatorClass.BETWEEN,
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
# Operators that require no parameters
|
|
110
|
+
NO_PARAMETER_OPERATORS = [
|
|
111
|
+
models.AssertionStdOperatorClass.NULL,
|
|
112
|
+
models.AssertionStdOperatorClass.NOT_NULL,
|
|
113
|
+
models.AssertionStdOperatorClass.IS_TRUE,
|
|
114
|
+
models.AssertionStdOperatorClass.IS_FALSE,
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
# Keep this in sync with FIELD_METRIC_TYPE_CONFIG in the frontend
|
|
118
|
+
# datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
|
|
119
|
+
FIELD_METRIC_TYPE_CONFIG = {
|
|
120
|
+
"StringTypeClass": [
|
|
121
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
122
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
123
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
124
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
125
|
+
models.FieldMetricTypeClass.MAX_LENGTH,
|
|
126
|
+
models.FieldMetricTypeClass.MIN_LENGTH,
|
|
127
|
+
models.FieldMetricTypeClass.EMPTY_COUNT,
|
|
128
|
+
models.FieldMetricTypeClass.EMPTY_PERCENTAGE,
|
|
129
|
+
],
|
|
130
|
+
"NumberTypeClass": [
|
|
131
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
132
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
133
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
134
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
135
|
+
models.FieldMetricTypeClass.MAX,
|
|
136
|
+
models.FieldMetricTypeClass.MIN,
|
|
137
|
+
models.FieldMetricTypeClass.MEAN,
|
|
138
|
+
models.FieldMetricTypeClass.MEDIAN,
|
|
139
|
+
models.FieldMetricTypeClass.STDDEV,
|
|
140
|
+
models.FieldMetricTypeClass.NEGATIVE_COUNT,
|
|
141
|
+
models.FieldMetricTypeClass.NEGATIVE_PERCENTAGE,
|
|
142
|
+
models.FieldMetricTypeClass.ZERO_COUNT,
|
|
143
|
+
models.FieldMetricTypeClass.ZERO_PERCENTAGE,
|
|
144
|
+
],
|
|
145
|
+
"BooleanTypeClass": [
|
|
146
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
147
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
148
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
149
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
150
|
+
],
|
|
151
|
+
"DateTypeClass": [
|
|
152
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
153
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
154
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
155
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
156
|
+
],
|
|
157
|
+
"TimeTypeClass": [
|
|
158
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
159
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
160
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
161
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
162
|
+
],
|
|
163
|
+
"NullTypeClass": [
|
|
164
|
+
models.FieldMetricTypeClass.NULL_COUNT,
|
|
165
|
+
models.FieldMetricTypeClass.NULL_PERCENTAGE,
|
|
166
|
+
models.FieldMetricTypeClass.UNIQUE_COUNT,
|
|
167
|
+
models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
|
|
168
|
+
],
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
MetricInputType = Union[models.FieldMetricTypeClass, str]
|
|
173
|
+
ValueInputType = Union[str, int, float]
|
|
174
|
+
ValueTypeInputType = Union[str, models.AssertionStdParameterTypeClass]
|
|
175
|
+
RangeInputType = tuple[ValueInputType, ValueInputType]
|
|
176
|
+
RangeTypeInputType = Union[
|
|
177
|
+
str,
|
|
178
|
+
tuple[str, str],
|
|
179
|
+
ValueTypeInputType,
|
|
180
|
+
tuple[ValueTypeInputType, ValueTypeInputType],
|
|
181
|
+
]
|
|
182
|
+
RangeTypeParsedType = tuple[ValueTypeInputType, ValueTypeInputType]
|
|
183
|
+
OperatorInputType = Union[str, models.AssertionStdOperatorClass]
|
|
184
|
+
|
|
185
|
+
DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION: _AllRowsQuery = (
|
|
186
|
+
_AllRowsQuery()
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs):
|
|
191
|
+
"""
|
|
192
|
+
Input used to create a smart column metric assertion.
|
|
193
|
+
|
|
194
|
+
This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
|
|
195
|
+
min, max, median, and more. It uses AI to infer the assertion parameters.
|
|
196
|
+
|
|
197
|
+
Example using the entity models, not comprehensive for all options:
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
models.AssertionInfoClass(
|
|
201
|
+
type=models.AssertionTypeClass.FIELD,
|
|
202
|
+
fieldAssertion=FieldAssertionInfoClass(
|
|
203
|
+
type=models.FieldAssertionTypeClass.FIELD_METRIC,
|
|
204
|
+
entity=str(self.dataset_urn),
|
|
205
|
+
filter=DatasetFilterClass(
|
|
206
|
+
type=models.DatasetFilterTypeClass.SQL,
|
|
207
|
+
sql="SELECT * FROM dataset WHERE column_name = 'value'", # Example filter
|
|
208
|
+
),
|
|
209
|
+
fieldMetricAssertion=FieldMetricAssertionClass(
|
|
210
|
+
field=SchemaFieldSpecClass(
|
|
211
|
+
path="column_name", # The column name to validate
|
|
212
|
+
type="string", # The type of the column
|
|
213
|
+
nativeType="string", # The native type of the column
|
|
214
|
+
),
|
|
215
|
+
metric=models.FieldMetricTypeClass.NULL_COUNT_PERCENTAGE, # The metric to validate
|
|
216
|
+
operator=models.AssertionStdOperatorClass.GREATER_THAN, # The operator to use
|
|
217
|
+
parameters=models.AssertionStdParametersClass(
|
|
218
|
+
value=models.AssertionStdParameterClass(
|
|
219
|
+
value=10, # The value to validate
|
|
220
|
+
type=models.AssertionStdParameterTypeClass.NUMBER, # The type of the value
|
|
221
|
+
),
|
|
222
|
+
),
|
|
223
|
+
),
|
|
224
|
+
),
|
|
225
|
+
source=models.AssertionSourceClass(
|
|
226
|
+
type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
|
|
227
|
+
created=AuditStampClass(
|
|
228
|
+
time=1717929600,
|
|
229
|
+
actor="urn:li:corpuser:jdoe", # The actor who created the assertion
|
|
230
|
+
),
|
|
231
|
+
),
|
|
232
|
+
lastUpdated=AuditStampClass(
|
|
233
|
+
time=1717929600,
|
|
234
|
+
actor="urn:li:corpuser:jdoe", # The actor who last updated the assertion
|
|
235
|
+
),
|
|
236
|
+
description="This assertion validates the null count percentage of the column 'column_name' is greater than 10.", # Optional description of the assertion
|
|
237
|
+
)
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
models.MonitorInfoClass(
|
|
242
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
243
|
+
status=models.MonitorStatusClass(
|
|
244
|
+
mode=models.MonitorModeClass.ACTIVE, # Active or Inactive
|
|
245
|
+
),
|
|
246
|
+
assertionMonitor=AssertionMonitorClass(
|
|
247
|
+
assertions=AssertionEvaluationSpecClass(
|
|
248
|
+
assertion="urn:li:assertion:123", # The assertion to monitor
|
|
249
|
+
schedule=models.CronScheduleClass(
|
|
250
|
+
cron="0 0 * * *", # The cron schedule
|
|
251
|
+
timezone="America/New_York", # The timezone
|
|
252
|
+
),
|
|
253
|
+
parameters=models.AssertionEvaluationParametersClass(
|
|
254
|
+
type=models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
255
|
+
datasetFieldParameters=models.DatasetFieldAssertionParametersClass(
|
|
256
|
+
sourceType=models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY, # This can be ALL_ROWS_QUERY, CHANGED_ROWS_QUERY or DATAHUB_DATASET_PROFILE
|
|
257
|
+
changedRowsField=models.FreshnessFieldSpecClass(
|
|
258
|
+
path="column_name",
|
|
259
|
+
type="string",
|
|
260
|
+
nativeType="string",
|
|
261
|
+
kind=models.FreshnessFieldKindClass.HIGH_WATERMARK, # This can be LAST_MODIFIED or HIGH_WATERMARK
|
|
262
|
+
),
|
|
263
|
+
),
|
|
264
|
+
),
|
|
265
|
+
),
|
|
266
|
+
settings=models.AssertionMonitorSettingsClass(
|
|
267
|
+
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
268
|
+
algorithm=models.AdjustmentAlgorithmClass.CUSTOM, # TODO: Do we need to set this in the SDK?
|
|
269
|
+
algorithmName="stddev", # TODO: Do we need to set this in the SDK? What are acceptable values?
|
|
270
|
+
context={
|
|
271
|
+
"stdDev": "1.0", # TODO: Do we need to set this in the SDK? What are acceptable values?
|
|
272
|
+
},
|
|
273
|
+
exclusionWindows=[models.AssertionExclusionWindowClass(
|
|
274
|
+
type=models.AssertionExclusionWindowTypeClass.FIXED_RANGE,
|
|
275
|
+
start=1717929600,
|
|
276
|
+
end=1717929600,
|
|
277
|
+
)],
|
|
278
|
+
trainingDataLookbackWindowDays=10, # The number of days to look back for training data
|
|
279
|
+
sensitivity=models.AssertionMonitorSensitivityClass(
|
|
280
|
+
level=1, # The sensitivity level
|
|
281
|
+
),
|
|
282
|
+
),
|
|
283
|
+
),
|
|
284
|
+
),
|
|
285
|
+
)
|
|
286
|
+
```
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
def __init__(
|
|
290
|
+
self,
|
|
291
|
+
*,
|
|
292
|
+
# Required parameters
|
|
293
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
294
|
+
entity_client: EntityClient,
|
|
295
|
+
column_name: str,
|
|
296
|
+
metric_type: MetricInputType,
|
|
297
|
+
operator: OperatorInputType,
|
|
298
|
+
# Optional parameters
|
|
299
|
+
value: Optional[ValueInputType] = None,
|
|
300
|
+
value_type: Optional[ValueTypeInputType] = None,
|
|
301
|
+
range: Optional[RangeInputType] = None,
|
|
302
|
+
range_type: Optional[RangeTypeInputType] = None,
|
|
303
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
304
|
+
display_name: Optional[str] = None,
|
|
305
|
+
enabled: bool = True,
|
|
306
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
307
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
308
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
309
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
310
|
+
training_data_lookback_days: Optional[int] = None,
|
|
311
|
+
incident_behavior: Optional[
|
|
312
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
313
|
+
] = None,
|
|
314
|
+
tags: Optional[TagsInputType] = None,
|
|
315
|
+
created_by: Union[str, CorpUserUrn],
|
|
316
|
+
created_at: datetime,
|
|
317
|
+
updated_by: Union[str, CorpUserUrn],
|
|
318
|
+
updated_at: datetime,
|
|
319
|
+
):
|
|
320
|
+
"""
|
|
321
|
+
Initialize a smart column metric assertion input.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
dataset_urn: The dataset urn.
|
|
325
|
+
entity_client: The entity client.
|
|
326
|
+
column_name: The name of the column to validate.
|
|
327
|
+
metric_type: The metric type to validate.
|
|
328
|
+
operator: The operator to use.
|
|
329
|
+
value: The value to validate.
|
|
330
|
+
value_type: The type of the value.
|
|
331
|
+
range: The range to validate.
|
|
332
|
+
range_type: The type of the range. If single value, we assume the same type for start and end.
|
|
333
|
+
urn: The urn of the assertion.
|
|
334
|
+
display_name: The display name of the assertion.
|
|
335
|
+
enabled: Whether the assertion is enabled.
|
|
336
|
+
schedule: The schedule of the assertion.
|
|
337
|
+
detection_mechanism: The detection mechanism of the assertion.
|
|
338
|
+
sensitivity: The sensitivity of the assertion.
|
|
339
|
+
exclusion_windows: The exclusion windows of the assertion.
|
|
340
|
+
training_data_lookback_days: The training data lookback days of the assertion.
|
|
341
|
+
incident_behavior: The incident behavior of the assertion.
|
|
342
|
+
tags: The tags of the assertion.
|
|
343
|
+
created_by: The creator of the assertion.
|
|
344
|
+
created_at: The creation time of the assertion.
|
|
345
|
+
updated_by: The updater of the assertion.
|
|
346
|
+
updated_at: The update time of the assertion.
|
|
347
|
+
"""
|
|
348
|
+
# Parent will handle validation of common parameters:
|
|
349
|
+
_AssertionInput.__init__(
|
|
350
|
+
self,
|
|
351
|
+
dataset_urn=dataset_urn,
|
|
352
|
+
entity_client=entity_client,
|
|
353
|
+
urn=urn,
|
|
354
|
+
display_name=display_name,
|
|
355
|
+
enabled=enabled,
|
|
356
|
+
schedule=schedule,
|
|
357
|
+
detection_mechanism=detection_mechanism,
|
|
358
|
+
incident_behavior=incident_behavior,
|
|
359
|
+
tags=tags,
|
|
360
|
+
source_type=models.AssertionSourceTypeClass.INFERRED, # Smart assertions are of type inferred, not native
|
|
361
|
+
created_by=created_by,
|
|
362
|
+
created_at=created_at,
|
|
363
|
+
updated_by=updated_by,
|
|
364
|
+
updated_at=updated_at,
|
|
365
|
+
default_detection_mechanism=DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION,
|
|
366
|
+
)
|
|
367
|
+
_HasSmartAssertionInputs.__init__(
|
|
368
|
+
self,
|
|
369
|
+
sensitivity=sensitivity,
|
|
370
|
+
exclusion_windows=exclusion_windows,
|
|
371
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
# Validate Smart Column Metric Assertion specific parameters
|
|
375
|
+
self.metric_type = _try_parse_and_validate_schema_classes_enum(
|
|
376
|
+
metric_type, models.FieldMetricTypeClass
|
|
377
|
+
)
|
|
378
|
+
self.column_name = self._try_parse_and_validate_column_name_is_valid_type(
|
|
379
|
+
column_name
|
|
380
|
+
)
|
|
381
|
+
self.operator = _try_parse_and_validate_schema_classes_enum(
|
|
382
|
+
operator, models.AssertionStdOperatorClass
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
# Set type annotations for both raw input or parsed parameters
|
|
386
|
+
self.value_type: Optional[ValueTypeInputType] = None
|
|
387
|
+
self.value: Optional[ValueInputType] = None
|
|
388
|
+
if _is_value_required_for_operator(self.operator):
|
|
389
|
+
self.value_type = _try_parse_and_validate_value_type(value_type)
|
|
390
|
+
self.value = _try_parse_and_validate_value(value, self.value_type)
|
|
391
|
+
else:
|
|
392
|
+
# Set these to what was input for later validation, and skip parsing and validation
|
|
393
|
+
self.value_type = value_type
|
|
394
|
+
self.value = value
|
|
395
|
+
|
|
396
|
+
# Set type annotations for both raw input or parsed parameters
|
|
397
|
+
self.range_type: Optional[Union[RangeTypeInputType, RangeTypeParsedType]] = None
|
|
398
|
+
self.range: Optional[RangeInputType] = None
|
|
399
|
+
if _is_range_required_for_operator(self.operator):
|
|
400
|
+
self.range_type = _try_parse_and_validate_range_type(range_type)
|
|
401
|
+
self.range = _try_parse_and_validate_range(
|
|
402
|
+
range, self.range_type, self.operator
|
|
403
|
+
)
|
|
404
|
+
else:
|
|
405
|
+
# Set these to what was input for later validation, and skip parsing and validation
|
|
406
|
+
self.range_type = range_type
|
|
407
|
+
self.range = range
|
|
408
|
+
|
|
409
|
+
_validate_operator_and_input_parameters(
|
|
410
|
+
operator=self.operator,
|
|
411
|
+
value=self.value,
|
|
412
|
+
value_type=_try_parse_and_validate_value_type(self.value_type)
|
|
413
|
+
if self.value_type is not None
|
|
414
|
+
else None,
|
|
415
|
+
range=self.range,
|
|
416
|
+
range_type=_try_parse_and_validate_range_type(self.range_type)
|
|
417
|
+
if self.range_type is not None
|
|
418
|
+
else None,
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# Validate compatibility:
|
|
422
|
+
self._validate_field_type_and_operator_compatibility(
|
|
423
|
+
self.column_name, self.operator
|
|
424
|
+
)
|
|
425
|
+
self._validate_field_type_and_metric_type_compatibility(
|
|
426
|
+
self.column_name, self.metric_type
|
|
427
|
+
)
|
|
428
|
+
self._validate_operator_and_range_or_value_compatibility(
|
|
429
|
+
self.operator,
|
|
430
|
+
self.value,
|
|
431
|
+
_try_parse_and_validate_value_type(self.value_type)
|
|
432
|
+
if self.value_type is not None
|
|
433
|
+
else None,
|
|
434
|
+
self.range,
|
|
435
|
+
_try_parse_and_validate_range_type(self.range_type)
|
|
436
|
+
if self.range_type is not None
|
|
437
|
+
else None,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
def _create_monitor_info(
|
|
441
|
+
self,
|
|
442
|
+
assertion_urn: AssertionUrn,
|
|
443
|
+
status: models.MonitorStatusClass,
|
|
444
|
+
schedule: models.CronScheduleClass,
|
|
445
|
+
source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
|
|
446
|
+
field: Optional[FieldSpecType],
|
|
447
|
+
) -> models.MonitorInfoClass:
|
|
448
|
+
"""
|
|
449
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
450
|
+
"""
|
|
451
|
+
return models.MonitorInfoClass(
|
|
452
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
453
|
+
status=status,
|
|
454
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
455
|
+
assertions=[
|
|
456
|
+
models.AssertionEvaluationSpecClass(
|
|
457
|
+
assertion=str(assertion_urn),
|
|
458
|
+
schedule=schedule,
|
|
459
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
460
|
+
str(source_type), field
|
|
461
|
+
),
|
|
462
|
+
),
|
|
463
|
+
],
|
|
464
|
+
settings=models.AssertionMonitorSettingsClass(
|
|
465
|
+
adjustmentSettings=models.AssertionAdjustmentSettingsClass(
|
|
466
|
+
sensitivity=self._convert_sensitivity(),
|
|
467
|
+
exclusionWindows=self._convert_exclusion_windows(),
|
|
468
|
+
trainingDataLookbackWindowDays=self.training_data_lookback_days,
|
|
469
|
+
),
|
|
470
|
+
),
|
|
471
|
+
),
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
def _create_assertion_info(
|
|
475
|
+
self, filter: Optional[models.DatasetFilterClass]
|
|
476
|
+
) -> AssertionInfoInputType:
|
|
477
|
+
"""
|
|
478
|
+
Create a FieldAssertionInfoClass for a smart column metric assertion.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
filter: Optional filter to apply to the assertion.
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
A FieldAssertionInfoClass configured for smart column metric.
|
|
485
|
+
"""
|
|
486
|
+
# Get the field spec for the column
|
|
487
|
+
field_spec = self._get_schema_field_spec(self.column_name)
|
|
488
|
+
|
|
489
|
+
# Create the field metric assertion
|
|
490
|
+
field_metric_assertion = models.FieldMetricAssertionClass(
|
|
491
|
+
field=field_spec,
|
|
492
|
+
metric=self.metric_type,
|
|
493
|
+
operator=self.operator,
|
|
494
|
+
parameters=self._create_assertion_parameters(),
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Create the field assertion info
|
|
498
|
+
return models.FieldAssertionInfoClass(
|
|
499
|
+
type=models.FieldAssertionTypeClass.FIELD_METRIC,
|
|
500
|
+
entity=str(self.dataset_urn),
|
|
501
|
+
filter=filter,
|
|
502
|
+
fieldMetricAssertion=field_metric_assertion,
|
|
503
|
+
fieldValuesAssertion=None, # Explicitly set to None since this is a field metric assertion
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
507
|
+
"""
|
|
508
|
+
Create a schedule for a smart column metric assertion.
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
A CronScheduleClass with appropriate schedule settings.
|
|
512
|
+
"""
|
|
513
|
+
if self.schedule is None:
|
|
514
|
+
return DEFAULT_EVERY_SIX_HOURS_SCHEDULE
|
|
515
|
+
|
|
516
|
+
return models.CronScheduleClass(
|
|
517
|
+
cron=self.schedule.cron,
|
|
518
|
+
timezone=self.schedule.timezone,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
def _convert_schema_field_spec_to_freshness_field_spec(
|
|
522
|
+
self, field_spec: models.SchemaFieldSpecClass
|
|
523
|
+
) -> models.FreshnessFieldSpecClass:
|
|
524
|
+
"""
|
|
525
|
+
Convert a SchemaFieldSpecClass to a FreshnessFieldSpecClass.
|
|
526
|
+
"""
|
|
527
|
+
return models.FreshnessFieldSpecClass(
|
|
528
|
+
path=field_spec.path,
|
|
529
|
+
type=field_spec.type,
|
|
530
|
+
nativeType=field_spec.nativeType,
|
|
531
|
+
kind=models.FreshnessFieldKindClass.HIGH_WATERMARK,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
def _get_assertion_evaluation_parameters(
|
|
535
|
+
self, source_type: str, field: Optional[FieldSpecType]
|
|
536
|
+
) -> models.AssertionEvaluationParametersClass:
|
|
537
|
+
"""
|
|
538
|
+
Get evaluation parameters for a smart column metric assertion.
|
|
539
|
+
Converts SchemaFieldSpecClass to FreshnessFieldSpecClass if needed.
|
|
540
|
+
"""
|
|
541
|
+
if field is not None:
|
|
542
|
+
if isinstance(field, models.SchemaFieldSpecClass):
|
|
543
|
+
field = self._convert_schema_field_spec_to_freshness_field_spec(field)
|
|
544
|
+
assert isinstance(field, models.FreshnessFieldSpecClass), (
|
|
545
|
+
"Field must be FreshnessFieldSpecClass for monitor info"
|
|
546
|
+
)
|
|
547
|
+
return models.AssertionEvaluationParametersClass(
|
|
548
|
+
type=models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
549
|
+
datasetFieldParameters=models.DatasetFieldAssertionParametersClass(
|
|
550
|
+
sourceType=source_type,
|
|
551
|
+
changedRowsField=field,
|
|
552
|
+
),
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
def _convert_assertion_source_type_and_field(
|
|
556
|
+
self,
|
|
557
|
+
) -> tuple[str, Optional[FieldSpecType]]:
|
|
558
|
+
"""
|
|
559
|
+
Convert detection mechanism into source type and field specification for column metric assertions.
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
A tuple of (source_type, field) where field may be None.
|
|
563
|
+
Note that the source_type is a string, not a models.DatasetFieldAssertionSourceTypeClass (or other assertion source type) since
|
|
564
|
+
the source type is not a enum in the code generated from the DatasetFieldSourceType enum in the PDL.
|
|
565
|
+
|
|
566
|
+
Raises:
|
|
567
|
+
SDKNotYetSupportedError: If the detection mechanism is not supported.
|
|
568
|
+
SDKUsageError: If the field (column) is not found in the dataset,
|
|
569
|
+
and the detection mechanism requires a field. Also if the field
|
|
570
|
+
is not an allowed type for the detection mechanism.
|
|
571
|
+
"""
|
|
572
|
+
source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
|
|
573
|
+
field = None
|
|
574
|
+
|
|
575
|
+
if isinstance(self.detection_mechanism, _ChangedRowsQuery):
|
|
576
|
+
source_type = models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
|
|
577
|
+
column_name = self._try_parse_and_validate_column_name_is_valid_type(
|
|
578
|
+
self.detection_mechanism.column_name, # The high watermark column name
|
|
579
|
+
allowed_column_types=HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
|
|
580
|
+
)
|
|
581
|
+
field = self._get_schema_field_spec(column_name)
|
|
582
|
+
elif isinstance(self.detection_mechanism, _AllRowsQuery):
|
|
583
|
+
source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
|
|
584
|
+
# For query-based detection, we don't need a field specification
|
|
585
|
+
# as the query itself defines what data to analyze
|
|
586
|
+
elif isinstance(
|
|
587
|
+
self.detection_mechanism,
|
|
588
|
+
(_AllRowsQueryDataHubDatasetProfile, _DatasetProfile),
|
|
589
|
+
):
|
|
590
|
+
source_type = (
|
|
591
|
+
models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
592
|
+
)
|
|
593
|
+
# Note: This is only valid on the all rows query
|
|
594
|
+
else:
|
|
595
|
+
raise SDKNotYetSupportedError(
|
|
596
|
+
f"Detection mechanism {self.detection_mechanism} is not supported"
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
return source_type, field
|
|
600
|
+
|
|
601
|
+
def _validate_single_value_operator(
|
|
602
|
+
self,
|
|
603
|
+
operator: models.AssertionStdOperatorClass,
|
|
604
|
+
value: Optional[ValueInputType],
|
|
605
|
+
value_type: Optional[models.AssertionStdParameterTypeClass],
|
|
606
|
+
range: Optional[RangeInputType],
|
|
607
|
+
range_type: Optional[RangeTypeParsedType],
|
|
608
|
+
) -> None:
|
|
609
|
+
"""Validate parameters for a single value operator."""
|
|
610
|
+
if value is None:
|
|
611
|
+
raise SDKUsageError(f"Value is required for operator {operator}")
|
|
612
|
+
if value_type is None:
|
|
613
|
+
raise SDKUsageError(f"Value type is required for operator {operator}")
|
|
614
|
+
if range is not None or range_type is not None:
|
|
615
|
+
raise SDKUsageError(
|
|
616
|
+
f"Range parameters should not be provided for operator {operator}"
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
def _validate_range_operator(
|
|
620
|
+
self,
|
|
621
|
+
operator: models.AssertionStdOperatorClass,
|
|
622
|
+
value: Optional[ValueInputType],
|
|
623
|
+
value_type: Optional[models.AssertionStdParameterTypeClass],
|
|
624
|
+
range: Optional[RangeInputType],
|
|
625
|
+
range_type: Optional[RangeTypeParsedType],
|
|
626
|
+
) -> None:
|
|
627
|
+
"""Validate parameters for a range operator."""
|
|
628
|
+
if range is None:
|
|
629
|
+
raise SDKUsageError(f"Range is required for operator {operator}")
|
|
630
|
+
if range_type is None:
|
|
631
|
+
raise SDKUsageError(f"Range type is required for operator {operator}")
|
|
632
|
+
if value is not None or value_type is not None:
|
|
633
|
+
raise SDKUsageError(
|
|
634
|
+
f"Value parameters should not be provided for operator {operator}"
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
def _validate_no_parameter_operator(
|
|
638
|
+
self,
|
|
639
|
+
operator: models.AssertionStdOperatorClass,
|
|
640
|
+
value: Optional[ValueInputType],
|
|
641
|
+
value_type: Optional[models.AssertionStdParameterTypeClass],
|
|
642
|
+
range: Optional[RangeInputType],
|
|
643
|
+
range_type: Optional[RangeTypeParsedType],
|
|
644
|
+
) -> None:
|
|
645
|
+
"""Validate parameters for a no-parameter operator."""
|
|
646
|
+
if value is not None or value_type is not None:
|
|
647
|
+
raise SDKUsageError(
|
|
648
|
+
f"Value parameters should not be provided for operator {operator}"
|
|
649
|
+
)
|
|
650
|
+
if range is not None or range_type is not None:
|
|
651
|
+
raise SDKUsageError(
|
|
652
|
+
f"Range parameters should not be provided for operator {operator}"
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
def _validate_operator_and_range_or_value_compatibility(
|
|
656
|
+
self,
|
|
657
|
+
operator: models.AssertionStdOperatorClass,
|
|
658
|
+
value: Optional[ValueInputType] = None,
|
|
659
|
+
value_type: Optional[models.AssertionStdParameterTypeClass] = None,
|
|
660
|
+
range: Optional[RangeInputType] = None,
|
|
661
|
+
range_type: Optional[RangeTypeParsedType] = None,
|
|
662
|
+
) -> None:
|
|
663
|
+
"""
|
|
664
|
+
Validate that the operator has the appropriate parameters (range or value) based on its type.
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
operator: The operator to validate.
|
|
668
|
+
value: Optional value parameter.
|
|
669
|
+
value_type: Optional value type parameter.
|
|
670
|
+
range: Optional range parameter.
|
|
671
|
+
range_type: Optional range type parameter.
|
|
672
|
+
|
|
673
|
+
Raises:
|
|
674
|
+
SDKUsageError: If the operator parameters are not compatible with the operator type.
|
|
675
|
+
"""
|
|
676
|
+
if operator in SINGLE_VALUE_OPERATORS:
|
|
677
|
+
self._validate_single_value_operator(
|
|
678
|
+
operator, value, value_type, range, range_type
|
|
679
|
+
)
|
|
680
|
+
elif operator in RANGE_OPERATORS:
|
|
681
|
+
self._validate_range_operator(
|
|
682
|
+
operator, value, value_type, range, range_type
|
|
683
|
+
)
|
|
684
|
+
elif operator in NO_PARAMETER_OPERATORS:
|
|
685
|
+
self._validate_no_parameter_operator(
|
|
686
|
+
operator, value, value_type, range, range_type
|
|
687
|
+
)
|
|
688
|
+
else:
|
|
689
|
+
raise SDKUsageError(f"Unsupported operator type: {operator}")
|
|
690
|
+
|
|
691
|
+
def _create_assertion_parameters(self) -> models.AssertionStdParametersClass:
|
|
692
|
+
"""
|
|
693
|
+
Create assertion parameters based on the operator type and provided values.
|
|
694
|
+
|
|
695
|
+
Returns:
|
|
696
|
+
An AssertionStdParametersClass with the appropriate parameters.
|
|
697
|
+
|
|
698
|
+
Raises:
|
|
699
|
+
SDKUsageError: If the parameters are invalid for the operator type.
|
|
700
|
+
"""
|
|
701
|
+
if self.operator in SINGLE_VALUE_OPERATORS:
|
|
702
|
+
if self.value is None:
|
|
703
|
+
raise SDKUsageError(f"Value is required for operator {self.operator}")
|
|
704
|
+
if self.value_type is None:
|
|
705
|
+
raise SDKUsageError(
|
|
706
|
+
f"Value type is required for operator {self.operator}"
|
|
707
|
+
)
|
|
708
|
+
return models.AssertionStdParametersClass(
|
|
709
|
+
value=models.AssertionStdParameterClass(
|
|
710
|
+
value=str(self.value),
|
|
711
|
+
type=self.value_type,
|
|
712
|
+
),
|
|
713
|
+
)
|
|
714
|
+
elif self.operator in RANGE_OPERATORS:
|
|
715
|
+
if self.range is None:
|
|
716
|
+
raise SDKUsageError(f"Range is required for operator {self.operator}")
|
|
717
|
+
if self.range_type is None:
|
|
718
|
+
raise SDKUsageError(
|
|
719
|
+
f"Range type is required for operator {self.operator}"
|
|
720
|
+
)
|
|
721
|
+
# Ensure we have the parsed range type
|
|
722
|
+
parsed_range_type = _try_parse_and_validate_range_type(self.range_type)
|
|
723
|
+
return models.AssertionStdParametersClass(
|
|
724
|
+
minValue=models.AssertionStdParameterClass(
|
|
725
|
+
value=str(self.range[0]),
|
|
726
|
+
type=parsed_range_type[0],
|
|
727
|
+
),
|
|
728
|
+
maxValue=models.AssertionStdParameterClass(
|
|
729
|
+
value=str(self.range[1]),
|
|
730
|
+
type=parsed_range_type[1],
|
|
731
|
+
),
|
|
732
|
+
)
|
|
733
|
+
elif self.operator in NO_PARAMETER_OPERATORS:
|
|
734
|
+
return models.AssertionStdParametersClass()
|
|
735
|
+
else:
|
|
736
|
+
raise SDKUsageError(f"Unsupported operator type: {self.operator}")
|
|
737
|
+
|
|
738
|
+
def _try_parse_and_validate_column_name_is_valid_type(
|
|
739
|
+
self,
|
|
740
|
+
column_name: str,
|
|
741
|
+
allowed_column_types: list[
|
|
742
|
+
models.DictWrapper
|
|
743
|
+
] = ALLOWED_COLUMN_TYPES_FOR_SMART_COLUMN_METRIC_ASSERTION,
|
|
744
|
+
) -> str:
|
|
745
|
+
"""
|
|
746
|
+
Parse and validate a column name. Determine from the field spec if the column exists and is of the appropriate type for the metric type.
|
|
747
|
+
Validate that this is a column that is valid for the metric type, see also getEligibleFieldColumns and related functions in the frontend
|
|
748
|
+
"""
|
|
749
|
+
field_spec = self._get_schema_field_spec(column_name)
|
|
750
|
+
self._validate_field_type(
|
|
751
|
+
field_spec,
|
|
752
|
+
column_name,
|
|
753
|
+
allowed_column_types,
|
|
754
|
+
"smart column metric assertion",
|
|
755
|
+
)
|
|
756
|
+
return column_name
|
|
757
|
+
|
|
758
|
+
def _assertion_type(self) -> str:
|
|
759
|
+
"""Get the assertion type."""
|
|
760
|
+
return models.AssertionTypeClass.FIELD
|
|
761
|
+
|
|
762
|
+
def _validate_field_type_and_operator_compatibility(
|
|
763
|
+
self, column_name: str, operator: models.AssertionStdOperatorClass
|
|
764
|
+
) -> None:
|
|
765
|
+
"""Validate that the field type is compatible with the operator.
|
|
766
|
+
|
|
767
|
+
See FIELD_VALUES_OPERATOR_CONFIG in the frontend for the allowed operators for each field type.
|
|
768
|
+
|
|
769
|
+
Args:
|
|
770
|
+
column_name: The name of the column to validate.
|
|
771
|
+
operator: The operator to validate against.
|
|
772
|
+
|
|
773
|
+
Raises:
|
|
774
|
+
SDKUsageError: If the field type is not compatible with the operator.
|
|
775
|
+
"""
|
|
776
|
+
field_spec = self._get_schema_field_spec(column_name)
|
|
777
|
+
allowed_operators = FIELD_VALUES_OPERATOR_CONFIG.get(field_spec.type, [])
|
|
778
|
+
if operator not in allowed_operators:
|
|
779
|
+
raise SDKUsageError(
|
|
780
|
+
f"Operator {operator} is not allowed for field type {field_spec.type} for column '{column_name}'. Allowed operators: {', '.join(str(op) for op in allowed_operators)}"
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
def _validate_field_type_and_metric_type_compatibility(
|
|
784
|
+
self, column_name: str, metric_type: models.FieldMetricTypeClass
|
|
785
|
+
) -> None:
|
|
786
|
+
"""Validate that the metric type is compatible with the field type.
|
|
787
|
+
|
|
788
|
+
See FIELD_METRIC_TYPE_CONFIG in the frontend for the allowed metric types for each field type.
|
|
789
|
+
|
|
790
|
+
Args:
|
|
791
|
+
column_name: The name of the column to validate.
|
|
792
|
+
metric_type: The metric type to validate.
|
|
793
|
+
|
|
794
|
+
Raises:
|
|
795
|
+
SDKUsageError: If the metric type is not compatible with the field type.
|
|
796
|
+
"""
|
|
797
|
+
field_spec = self._get_schema_field_spec(column_name)
|
|
798
|
+
field_type = field_spec.type
|
|
799
|
+
|
|
800
|
+
if field_type not in FIELD_METRIC_TYPE_CONFIG:
|
|
801
|
+
raise SDKUsageError(
|
|
802
|
+
f"Column {column_name} is of type {field_type}, which is not supported for smart column metric assertions"
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
allowed_metric_types = FIELD_METRIC_TYPE_CONFIG[field_type]
|
|
806
|
+
if metric_type not in allowed_metric_types:
|
|
807
|
+
raise SDKUsageError(
|
|
808
|
+
f"Metric type {metric_type} is not allowed for field type {field_type}. Allowed metric types: {', '.join(str(mt) for mt in allowed_metric_types)}"
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def _try_parse_and_validate_value_type(
|
|
813
|
+
value_type: Optional[ValueTypeInputType],
|
|
814
|
+
) -> models.AssertionStdParameterTypeClass:
|
|
815
|
+
if value_type is None:
|
|
816
|
+
raise SDKUsageError("Value type is required")
|
|
817
|
+
|
|
818
|
+
return _try_parse_and_validate_schema_classes_enum(
|
|
819
|
+
value_type, models.AssertionStdParameterTypeClass
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
def _try_parse_and_validate_value(
|
|
824
|
+
value: Optional[ValueInputType],
|
|
825
|
+
value_type: ValueTypeInputType,
|
|
826
|
+
) -> ValueInputType:
|
|
827
|
+
if value is None:
|
|
828
|
+
raise SDKUsageError("Value parameter is required for the chosen operator")
|
|
829
|
+
# Accept both Python types and JSON strings
|
|
830
|
+
if isinstance(value, str):
|
|
831
|
+
# Try to parse as JSON, but if it fails, treat as a raw string
|
|
832
|
+
try:
|
|
833
|
+
deserialized_value = json.loads(value)
|
|
834
|
+
except json.JSONDecodeError:
|
|
835
|
+
deserialized_value = value
|
|
836
|
+
else:
|
|
837
|
+
deserialized_value = value
|
|
838
|
+
# Validate that the value is of the correct type
|
|
839
|
+
if value_type == models.AssertionStdParameterTypeClass.NUMBER:
|
|
840
|
+
if not isinstance(deserialized_value, (int, float)):
|
|
841
|
+
raise SDKUsageError(f"Invalid value: {value}, must be a number")
|
|
842
|
+
elif value_type == models.AssertionStdParameterTypeClass.STRING:
|
|
843
|
+
if not isinstance(deserialized_value, str):
|
|
844
|
+
raise SDKUsageError(f"Invalid value: {value}, must be a string")
|
|
845
|
+
elif (
|
|
846
|
+
value_type == models.AssertionStdParameterTypeClass.LIST
|
|
847
|
+
or value_type == models.AssertionStdParameterTypeClass.SET
|
|
848
|
+
):
|
|
849
|
+
raise SDKNotYetSupportedError(
|
|
850
|
+
"List and set value types are not supported for smart column metric assertions"
|
|
851
|
+
)
|
|
852
|
+
elif value_type == models.AssertionStdParameterTypeClass.UNKNOWN:
|
|
853
|
+
pass # TODO: What to do with unknown?
|
|
854
|
+
else:
|
|
855
|
+
raise SDKUsageError(
|
|
856
|
+
f"Invalid value type: {value_type}, valid options are {get_enum_options(models.AssertionStdParameterTypeClass)}"
|
|
857
|
+
)
|
|
858
|
+
return deserialized_value
|
|
859
|
+
|
|
860
|
+
|
|
861
|
+
def _is_range_required_for_operator(operator: models.AssertionStdOperatorClass) -> bool:
|
|
862
|
+
return operator in RANGE_OPERATORS
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
def _is_value_required_for_operator(operator: models.AssertionStdOperatorClass) -> bool:
|
|
866
|
+
return operator in SINGLE_VALUE_OPERATORS
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
def _is_no_parameter_operator(operator: models.AssertionStdOperatorClass) -> bool:
|
|
870
|
+
return operator in NO_PARAMETER_OPERATORS
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def _validate_operator_and_input_parameters(
|
|
874
|
+
operator: models.AssertionStdOperatorClass,
|
|
875
|
+
value: Optional[ValueInputType] = None,
|
|
876
|
+
value_type: Optional[models.AssertionStdParameterTypeClass] = None,
|
|
877
|
+
range: Optional[RangeInputType] = None,
|
|
878
|
+
range_type: Optional[RangeTypeParsedType] = None,
|
|
879
|
+
) -> None:
|
|
880
|
+
if _is_value_required_for_operator(operator):
|
|
881
|
+
if value is None:
|
|
882
|
+
raise SDKUsageError(f"Value is required for operator {operator}")
|
|
883
|
+
if value_type is None:
|
|
884
|
+
raise SDKUsageError(f"Value type is required for operator {operator}")
|
|
885
|
+
elif _is_range_required_for_operator(operator):
|
|
886
|
+
if range is None:
|
|
887
|
+
raise SDKUsageError(f"Range is required for operator {operator}")
|
|
888
|
+
if range_type is None:
|
|
889
|
+
raise SDKUsageError(f"Range type is required for operator {operator}")
|
|
890
|
+
elif _is_no_parameter_operator(operator):
|
|
891
|
+
if value is not None or value_type is not None:
|
|
892
|
+
raise SDKUsageError(
|
|
893
|
+
f"Value parameters should not be provided for operator {operator}"
|
|
894
|
+
)
|
|
895
|
+
if range is not None or range_type is not None:
|
|
896
|
+
raise SDKUsageError(
|
|
897
|
+
f"Range parameters should not be provided for operator {operator}"
|
|
898
|
+
)
|
|
899
|
+
else:
|
|
900
|
+
raise SDKUsageError(f"Unsupported operator type: {operator}")
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
def _try_parse_and_validate_range_type(
|
|
904
|
+
range_type: Optional[RangeTypeInputType] = None,
|
|
905
|
+
) -> RangeTypeParsedType:
|
|
906
|
+
if range_type is None:
|
|
907
|
+
return (
|
|
908
|
+
models.AssertionStdParameterTypeClass.UNKNOWN,
|
|
909
|
+
models.AssertionStdParameterTypeClass.UNKNOWN,
|
|
910
|
+
)
|
|
911
|
+
if isinstance(range_type, tuple):
|
|
912
|
+
return (
|
|
913
|
+
_try_parse_and_validate_schema_classes_enum(
|
|
914
|
+
range_type[0], models.AssertionStdParameterTypeClass
|
|
915
|
+
),
|
|
916
|
+
_try_parse_and_validate_schema_classes_enum(
|
|
917
|
+
range_type[1], models.AssertionStdParameterTypeClass
|
|
918
|
+
),
|
|
919
|
+
)
|
|
920
|
+
# Single value, we assume the same type for start and end:
|
|
921
|
+
parsed_range_type = _try_parse_and_validate_schema_classes_enum(
|
|
922
|
+
range_type, models.AssertionStdParameterTypeClass
|
|
923
|
+
)
|
|
924
|
+
return parsed_range_type, parsed_range_type
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
def _try_parse_and_validate_range(
|
|
928
|
+
range: Optional[RangeInputType],
|
|
929
|
+
range_type: RangeTypeParsedType,
|
|
930
|
+
operator: models.AssertionStdOperatorClass,
|
|
931
|
+
) -> RangeInputType:
|
|
932
|
+
if (range is None or range_type is None) and _is_range_required_for_operator(
|
|
933
|
+
operator
|
|
934
|
+
):
|
|
935
|
+
raise SDKUsageError(f"Range is required for operator {operator}")
|
|
936
|
+
|
|
937
|
+
if range is None:
|
|
938
|
+
raise SDKUsageError(f"Range is required for operator {operator}")
|
|
939
|
+
|
|
940
|
+
range_start = _try_parse_and_validate_value(range[0], range_type[0])
|
|
941
|
+
range_end = _try_parse_and_validate_value(range[1], range_type[1])
|
|
942
|
+
|
|
943
|
+
return (range_start, range_end)
|