acryl-datahub-cloud 0.3.12rc6__py3-none-any.whl → 0.3.12rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1934 -1934
- acryl_datahub_cloud/metadata/schema.avsc +23968 -23968
- acryl_datahub_cloud/metadata/schema_classes.py +658 -658
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +157 -24
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +630 -0
- acryl_datahub_cloud/sdk/assertions_client.py +529 -2
- {acryl_datahub_cloud-0.3.12rc6.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/METADATA +41 -41
- {acryl_datahub_cloud-0.3.12rc6.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/RECORD +12 -11
- {acryl_datahub_cloud-0.3.12rc6.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc6.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc6.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
from abc import ABC
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any, Optional, Tuple, Union
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Extra
|
|
7
|
+
from typing_extensions import Literal
|
|
8
|
+
|
|
9
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
10
|
+
DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
|
|
11
|
+
AssertionIncidentBehavior,
|
|
12
|
+
DetectionMechanismInputTypes,
|
|
13
|
+
FieldSpecType,
|
|
14
|
+
_AssertionInput,
|
|
15
|
+
)
|
|
16
|
+
from acryl_datahub_cloud.sdk.entities.assertion import (
|
|
17
|
+
Assertion,
|
|
18
|
+
AssertionInfoInputType,
|
|
19
|
+
TagsInputType,
|
|
20
|
+
)
|
|
21
|
+
from acryl_datahub_cloud.sdk.errors import SDKNotYetSupportedError, SDKUsageError
|
|
22
|
+
from datahub.metadata import schema_classes as models
|
|
23
|
+
from datahub.metadata.urns import (
|
|
24
|
+
AssertionUrn,
|
|
25
|
+
CorpUserUrn,
|
|
26
|
+
DatasetUrn,
|
|
27
|
+
)
|
|
28
|
+
from datahub.sdk.entity_client import EntityClient
|
|
29
|
+
|
|
30
|
+
# TODO: better naming for "volume assertion definition"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class VolumeAssertionOperator(str, Enum):
|
|
34
|
+
"""Valid operators for volume assertions."""
|
|
35
|
+
|
|
36
|
+
LESS_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
|
|
37
|
+
GREATER_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
|
|
38
|
+
BETWEEN = models.AssertionStdOperatorClass.BETWEEN
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class _AbstractVolumeAssertionDefinition(BaseModel, ABC):
|
|
42
|
+
type: str
|
|
43
|
+
|
|
44
|
+
class Config:
|
|
45
|
+
extra = Extra.forbid
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class RowCountTotal(_AbstractVolumeAssertionDefinition):
|
|
49
|
+
type: Literal["row_count_total"] = "row_count_total"
|
|
50
|
+
operator: VolumeAssertionOperator
|
|
51
|
+
parameters: Union[float, Tuple[float, float]]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RowCountChange(_AbstractVolumeAssertionDefinition):
|
|
55
|
+
type: Literal["row_count_change"] = "row_count_change"
|
|
56
|
+
kind: Literal["absolute", "percent"]
|
|
57
|
+
operator: VolumeAssertionOperator
|
|
58
|
+
parameters: Union[float, Tuple[float, float]]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
_VOLUME_ASSERTION_DEFINITION_CONCRETE_TYPES = (
|
|
62
|
+
RowCountTotal,
|
|
63
|
+
RowCountChange,
|
|
64
|
+
)
|
|
65
|
+
_VolumeAssertionDefinitionTypes = Union[
|
|
66
|
+
RowCountTotal,
|
|
67
|
+
RowCountChange,
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
VolumeAssertionDefinitionInputTypes = Union[
|
|
71
|
+
dict[str, Any], _VolumeAssertionDefinitionTypes
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class VolumeAssertionDefinition:
|
|
76
|
+
ROW_COUNT_TOTAL = RowCountTotal
|
|
77
|
+
ROW_COUNT_CHANGE = RowCountChange
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def _validate_between_parameters(
|
|
81
|
+
parameters: Union[float, Tuple[float, float]], assertion_type: str
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Validate parameters for BETWEEN operator."""
|
|
84
|
+
if not isinstance(parameters, tuple) or len(parameters) != 2:
|
|
85
|
+
raise SDKUsageError(
|
|
86
|
+
f"For BETWEEN operator in {assertion_type}, parameters must be a tuple of two numbers (min_value, max_value)."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def _validate_single_value_parameters(
|
|
91
|
+
parameters: Union[float, Tuple[float, float]],
|
|
92
|
+
operator_enum: VolumeAssertionOperator,
|
|
93
|
+
assertion_type: str,
|
|
94
|
+
) -> None:
|
|
95
|
+
"""Validate parameters for single-value operators."""
|
|
96
|
+
if not isinstance(parameters, (int, float)):
|
|
97
|
+
if isinstance(parameters, tuple):
|
|
98
|
+
raise SDKUsageError(
|
|
99
|
+
f"For {operator_enum.value} operator in {assertion_type}, parameters must be a single number, not a tuple."
|
|
100
|
+
)
|
|
101
|
+
else:
|
|
102
|
+
raise SDKUsageError(
|
|
103
|
+
f"For {operator_enum.value} operator in {assertion_type}, parameters must be a single number."
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def _parse_operator(
|
|
108
|
+
operator: Union[str, VolumeAssertionOperator],
|
|
109
|
+
) -> VolumeAssertionOperator:
|
|
110
|
+
"""Parse and validate operator input, converting string to enum if needed."""
|
|
111
|
+
if isinstance(operator, str):
|
|
112
|
+
try:
|
|
113
|
+
return VolumeAssertionOperator(operator)
|
|
114
|
+
except ValueError as e:
|
|
115
|
+
valid_operators = ", ".join(
|
|
116
|
+
[op.value for op in VolumeAssertionOperator]
|
|
117
|
+
)
|
|
118
|
+
raise SDKUsageError(
|
|
119
|
+
f"Invalid operator '{operator}'. Valid operators: {valid_operators}"
|
|
120
|
+
) from e
|
|
121
|
+
return operator
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def _validate_operator_and_parameters(
|
|
125
|
+
operator: Union[str, VolumeAssertionOperator],
|
|
126
|
+
parameters: Union[float, Tuple[float, float]],
|
|
127
|
+
assertion_type: str,
|
|
128
|
+
) -> None:
|
|
129
|
+
"""Validate that operator and parameters are compatible for volume assertions."""
|
|
130
|
+
operator_enum = VolumeAssertionDefinition._parse_operator(operator)
|
|
131
|
+
|
|
132
|
+
# Validate parameter structure based on operator
|
|
133
|
+
if operator_enum == VolumeAssertionOperator.BETWEEN:
|
|
134
|
+
VolumeAssertionDefinition._validate_between_parameters(
|
|
135
|
+
parameters, assertion_type
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
VolumeAssertionDefinition._validate_single_value_parameters(
|
|
139
|
+
parameters, operator_enum, assertion_type
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def _parse_instantiated_object(
|
|
144
|
+
definition: _VolumeAssertionDefinitionTypes,
|
|
145
|
+
) -> _VolumeAssertionDefinitionTypes:
|
|
146
|
+
"""Parse and validate already instantiated volume assertion objects."""
|
|
147
|
+
VolumeAssertionDefinition._validate_operator_and_parameters(
|
|
148
|
+
definition.operator, definition.parameters, definition.type
|
|
149
|
+
)
|
|
150
|
+
return definition
|
|
151
|
+
|
|
152
|
+
@staticmethod
|
|
153
|
+
def _parse_dict_definition(
|
|
154
|
+
definition_dict: dict[str, Any],
|
|
155
|
+
) -> _VolumeAssertionDefinitionTypes:
|
|
156
|
+
"""Parse and validate dictionary-based volume assertion definitions."""
|
|
157
|
+
try:
|
|
158
|
+
assertion_type = definition_dict.pop("type")
|
|
159
|
+
except KeyError as e:
|
|
160
|
+
raise SDKUsageError(
|
|
161
|
+
"Volume assertion definition must include a 'type' field"
|
|
162
|
+
) from e
|
|
163
|
+
|
|
164
|
+
# Check for valid assertion type first
|
|
165
|
+
if assertion_type not in ["row_count_total", "row_count_change"]:
|
|
166
|
+
raise SDKUsageError(
|
|
167
|
+
f"Unknown volume assertion type: {assertion_type}. Supported types: row_count_total, row_count_change"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Extract operator and parameters for validation
|
|
171
|
+
operator = definition_dict.get("operator")
|
|
172
|
+
parameters = definition_dict.get("parameters")
|
|
173
|
+
|
|
174
|
+
if operator is None:
|
|
175
|
+
raise SDKUsageError(
|
|
176
|
+
f"Missing required 'operator' field for {assertion_type}"
|
|
177
|
+
)
|
|
178
|
+
if parameters is None:
|
|
179
|
+
raise SDKUsageError(
|
|
180
|
+
f"Missing required 'parameters' field for {assertion_type}"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Validate basic parameter type first
|
|
184
|
+
if not isinstance(parameters, (int, float, tuple)):
|
|
185
|
+
raise SDKUsageError(
|
|
186
|
+
f"For {assertion_type}, parameters must be a number or a tuple of two numbers, got: {type(parameters)}"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Validate operator and parameters before object creation
|
|
190
|
+
VolumeAssertionDefinition._validate_operator_and_parameters(
|
|
191
|
+
operator, parameters, assertion_type
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Convert string operator to enum for object creation
|
|
195
|
+
if isinstance(operator, str):
|
|
196
|
+
definition_dict["operator"] = VolumeAssertionDefinition._parse_operator(
|
|
197
|
+
operator
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if assertion_type == "row_count_total":
|
|
201
|
+
try:
|
|
202
|
+
return RowCountTotal(**definition_dict)
|
|
203
|
+
except Exception as e:
|
|
204
|
+
raise SDKUsageError(
|
|
205
|
+
f"Failed to create row_count_total volume assertion: {str(e)}"
|
|
206
|
+
) from e
|
|
207
|
+
else: # assertion_type == "row_count_change"
|
|
208
|
+
try:
|
|
209
|
+
return RowCountChange(**definition_dict)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
raise SDKUsageError(
|
|
212
|
+
f"Failed to create row_count_change volume assertion: {str(e)}"
|
|
213
|
+
) from e
|
|
214
|
+
|
|
215
|
+
@staticmethod
|
|
216
|
+
def parse(
|
|
217
|
+
definition: VolumeAssertionDefinitionInputTypes,
|
|
218
|
+
) -> _VolumeAssertionDefinitionTypes:
|
|
219
|
+
"""Parse and validate a volume assertion definition.
|
|
220
|
+
|
|
221
|
+
This method converts dictionary-based volume assertion definitions into typed volume
|
|
222
|
+
assertion objects, or validates already instantiated volume assertion objects. It
|
|
223
|
+
supports two volume assertion types: row_count_total and row_count_change.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
definition: A volume assertion definition that can be either:
|
|
227
|
+
- A dictionary containing volume assertion configuration with keys:
|
|
228
|
+
- type: Must be "row_count_total" or "row_count_change"
|
|
229
|
+
- operator: Must be "LESS_THAN_OR_EQUAL_TO", "GREATER_THAN_OR_EQUAL_TO", or "BETWEEN"
|
|
230
|
+
- parameters: Number for single-value operators, tuple of two numbers for BETWEEN
|
|
231
|
+
- kind: Required for "row_count_change", must be "absolute" or "percent"
|
|
232
|
+
- An already instantiated RowCountTotal or RowCountChange object
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
A validated volume assertion definition object (RowCountTotal or RowCountChange).
|
|
236
|
+
|
|
237
|
+
Raises:
|
|
238
|
+
SDKUsageError: If the definition is invalid, including:
|
|
239
|
+
- Invalid input type (not dict or volume assertion object)
|
|
240
|
+
- Missing required fields (type, operator, parameters, kind for row_count_change)
|
|
241
|
+
- Unknown assertion type (not row_count_total or row_count_change)
|
|
242
|
+
- Invalid operator (not in allowed operators)
|
|
243
|
+
- Invalid parameter structure for operator:
|
|
244
|
+
- Single-value operators require number parameters
|
|
245
|
+
- BETWEEN operator requires tuple of two numbers
|
|
246
|
+
- Object construction failures (extra fields, validation errors)
|
|
247
|
+
|
|
248
|
+
Examples:
|
|
249
|
+
Parse a row count total assertion:
|
|
250
|
+
>>> definition = {
|
|
251
|
+
... "type": "row_count_total",
|
|
252
|
+
... "operator": "GREATER_THAN_OR_EQUAL_TO",
|
|
253
|
+
... "parameters": 100
|
|
254
|
+
... }
|
|
255
|
+
>>> result = VolumeAssertionDefinition.parse(definition)
|
|
256
|
+
>>> isinstance(result, RowCountTotal)
|
|
257
|
+
True
|
|
258
|
+
|
|
259
|
+
Parse a row count change assertion with BETWEEN operator:
|
|
260
|
+
>>> definition = {
|
|
261
|
+
... "type": "row_count_change",
|
|
262
|
+
... "kind": "absolute",
|
|
263
|
+
... "operator": "BETWEEN",
|
|
264
|
+
... "parameters": (10, 50)
|
|
265
|
+
... }
|
|
266
|
+
>>> result = VolumeAssertionDefinition.parse(definition)
|
|
267
|
+
>>> isinstance(result, RowCountChange)
|
|
268
|
+
True
|
|
269
|
+
|
|
270
|
+
Parse an already instantiated object:
|
|
271
|
+
>>> obj = RowCountTotal(
|
|
272
|
+
... operator=VolumeAssertionOperator.LESS_THAN_OR_EQUAL_TO,
|
|
273
|
+
... parameters=200
|
|
274
|
+
... )
|
|
275
|
+
>>> result = VolumeAssertionDefinition.parse(obj)
|
|
276
|
+
>>> result == obj
|
|
277
|
+
True
|
|
278
|
+
"""
|
|
279
|
+
# If already instantiated, validate and return
|
|
280
|
+
if isinstance(definition, _VOLUME_ASSERTION_DEFINITION_CONCRETE_TYPES):
|
|
281
|
+
return VolumeAssertionDefinition._parse_instantiated_object(definition)
|
|
282
|
+
|
|
283
|
+
if not isinstance(definition, dict):
|
|
284
|
+
raise SDKUsageError(
|
|
285
|
+
f"Volume assertion definition must be a dict or a volume assertion definition object, got: {type(definition)}"
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
return VolumeAssertionDefinition._parse_dict_definition(definition.copy())
|
|
289
|
+
|
|
290
|
+
@staticmethod
|
|
291
|
+
def build_model_volume_info(
|
|
292
|
+
definition: _VolumeAssertionDefinitionTypes,
|
|
293
|
+
dataset_urn: str,
|
|
294
|
+
filter: Optional[models.DatasetFilterClass] = None,
|
|
295
|
+
) -> models.VolumeAssertionInfoClass:
|
|
296
|
+
"""Build a DataHub VolumeAssertionInfoClass from a validated volume assertion definition.
|
|
297
|
+
|
|
298
|
+
This method converts validated volume assertion definition objects into DataHub model
|
|
299
|
+
classes suitable for creating volume assertions in the DataHub metadata service.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
definition: A validated volume assertion definition object (RowCountTotal or RowCountChange).
|
|
303
|
+
This should be the output of VolumeAssertionDefinition.parse().
|
|
304
|
+
dataset_urn: The dataset URN that this assertion applies to.
|
|
305
|
+
filter: Optional filter to apply to the assertion.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
A VolumeAssertionInfoClass configured for the specific volume assertion type.
|
|
309
|
+
|
|
310
|
+
Raises:
|
|
311
|
+
SDKUsageError: If the definition type is not supported.
|
|
312
|
+
"""
|
|
313
|
+
if isinstance(definition, RowCountTotal):
|
|
314
|
+
volume_info = models.VolumeAssertionInfoClass(
|
|
315
|
+
type=models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL,
|
|
316
|
+
entity=dataset_urn,
|
|
317
|
+
rowCountTotal=models.RowCountTotalClass(
|
|
318
|
+
operator=definition.operator.value,
|
|
319
|
+
parameters=VolumeAssertionDefinition._build_assertion_parameters(
|
|
320
|
+
definition.operator, definition.parameters
|
|
321
|
+
),
|
|
322
|
+
),
|
|
323
|
+
)
|
|
324
|
+
if filter is not None:
|
|
325
|
+
volume_info.filter = filter
|
|
326
|
+
return volume_info
|
|
327
|
+
elif isinstance(definition, RowCountChange):
|
|
328
|
+
# Map kind to DataHub assertion value change type
|
|
329
|
+
change_type = (
|
|
330
|
+
models.AssertionValueChangeTypeClass.ABSOLUTE
|
|
331
|
+
if definition.kind == "absolute"
|
|
332
|
+
else models.AssertionValueChangeTypeClass.PERCENTAGE
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
volume_info = models.VolumeAssertionInfoClass(
|
|
336
|
+
type=models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE,
|
|
337
|
+
entity=dataset_urn,
|
|
338
|
+
rowCountChange=models.RowCountChangeClass(
|
|
339
|
+
type=change_type,
|
|
340
|
+
operator=definition.operator.value,
|
|
341
|
+
parameters=VolumeAssertionDefinition._build_assertion_parameters(
|
|
342
|
+
definition.operator, definition.parameters
|
|
343
|
+
),
|
|
344
|
+
),
|
|
345
|
+
)
|
|
346
|
+
if filter is not None:
|
|
347
|
+
volume_info.filter = filter
|
|
348
|
+
return volume_info
|
|
349
|
+
else:
|
|
350
|
+
raise SDKUsageError(
|
|
351
|
+
f"Unsupported volume assertion definition type: {type(definition)}"
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
@staticmethod
|
|
355
|
+
def _format_number_value(value: Union[int, float]) -> str:
|
|
356
|
+
"""Format number value for DataHub parameter strings.
|
|
357
|
+
|
|
358
|
+
Converts whole numbers to integers (100.0 -> "100") and keeps decimals (100.5 -> "100.5").
|
|
359
|
+
"""
|
|
360
|
+
if isinstance(value, float) and value.is_integer():
|
|
361
|
+
return str(int(value))
|
|
362
|
+
return str(value)
|
|
363
|
+
|
|
364
|
+
@staticmethod
|
|
365
|
+
def _build_assertion_parameters(
|
|
366
|
+
operator: VolumeAssertionOperator,
|
|
367
|
+
parameters: Union[float, Tuple[float, float]],
|
|
368
|
+
) -> models.AssertionStdParametersClass:
|
|
369
|
+
"""Build assertion parameters for DataHub model classes.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
operator: The volume assertion operator.
|
|
373
|
+
parameters: The parameters (int for single value, tuple for BETWEEN).
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
AssertionStdParametersClass with appropriate parameter structure.
|
|
377
|
+
"""
|
|
378
|
+
if operator == VolumeAssertionOperator.BETWEEN:
|
|
379
|
+
assert isinstance(parameters, tuple) and len(parameters) == 2, (
|
|
380
|
+
f"BETWEEN operator requires tuple of two numbers, got: {parameters}"
|
|
381
|
+
)
|
|
382
|
+
# Sort values to ensure minValue is actually the minimum and maxValue is the maximum
|
|
383
|
+
min_val, max_val = sorted(parameters)
|
|
384
|
+
return models.AssertionStdParametersClass(
|
|
385
|
+
minValue=models.AssertionStdParameterClass(
|
|
386
|
+
value=VolumeAssertionDefinition._format_number_value(min_val),
|
|
387
|
+
type=models.AssertionStdParameterTypeClass.NUMBER,
|
|
388
|
+
),
|
|
389
|
+
maxValue=models.AssertionStdParameterClass(
|
|
390
|
+
value=VolumeAssertionDefinition._format_number_value(max_val),
|
|
391
|
+
type=models.AssertionStdParameterTypeClass.NUMBER,
|
|
392
|
+
),
|
|
393
|
+
)
|
|
394
|
+
else:
|
|
395
|
+
# Single value operators
|
|
396
|
+
assert isinstance(parameters, (int, float)), (
|
|
397
|
+
f"Single value operator {operator} requires number parameter, got: {parameters}"
|
|
398
|
+
)
|
|
399
|
+
return models.AssertionStdParametersClass(
|
|
400
|
+
value=models.AssertionStdParameterClass(
|
|
401
|
+
value=VolumeAssertionDefinition._format_number_value(parameters),
|
|
402
|
+
type=models.AssertionStdParameterTypeClass.NUMBER,
|
|
403
|
+
),
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
@staticmethod
|
|
407
|
+
def _extract_volume_parameters(
|
|
408
|
+
assertion_urn: str,
|
|
409
|
+
operator: VolumeAssertionOperator,
|
|
410
|
+
parameters: models.AssertionStdParametersClass,
|
|
411
|
+
) -> Union[float, Tuple[float, float]]:
|
|
412
|
+
"""Extract parameters from assertion based on operator type."""
|
|
413
|
+
if operator.value == "BETWEEN":
|
|
414
|
+
if parameters.minValue is None or parameters.maxValue is None:
|
|
415
|
+
raise SDKNotYetSupportedError(
|
|
416
|
+
f"Volume assertion {assertion_urn} has BETWEEN operator but missing min/max values"
|
|
417
|
+
)
|
|
418
|
+
return (float(parameters.minValue.value), float(parameters.maxValue.value))
|
|
419
|
+
else:
|
|
420
|
+
if parameters.value is None:
|
|
421
|
+
raise SDKNotYetSupportedError(
|
|
422
|
+
f"Volume assertion {assertion_urn} has {operator.value} operator but missing value"
|
|
423
|
+
)
|
|
424
|
+
return float(parameters.value.value)
|
|
425
|
+
|
|
426
|
+
@staticmethod
|
|
427
|
+
def _get_row_count_total(assertion: Assertion) -> RowCountTotal:
|
|
428
|
+
"""Extract RowCountTotal from assertion."""
|
|
429
|
+
assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
|
|
430
|
+
if assertion.info.rowCountTotal is None:
|
|
431
|
+
raise SDKNotYetSupportedError(
|
|
432
|
+
f"Volume assertion {assertion.urn} has ROW_COUNT_TOTAL type but no rowCountTotal, which is not supported"
|
|
433
|
+
)
|
|
434
|
+
row_count_total = assertion.info.rowCountTotal
|
|
435
|
+
operator = VolumeAssertionOperator(row_count_total.operator)
|
|
436
|
+
parameters = VolumeAssertionDefinition._extract_volume_parameters(
|
|
437
|
+
str(assertion.urn), operator, row_count_total.parameters
|
|
438
|
+
)
|
|
439
|
+
return RowCountTotal(operator=operator, parameters=parameters)
|
|
440
|
+
|
|
441
|
+
@staticmethod
|
|
442
|
+
def _get_row_count_change(assertion: Assertion) -> RowCountChange:
|
|
443
|
+
"""Extract RowCountChange from assertion."""
|
|
444
|
+
assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
|
|
445
|
+
if assertion.info.rowCountChange is None:
|
|
446
|
+
raise SDKNotYetSupportedError(
|
|
447
|
+
f"Volume assertion {assertion.urn} has ROW_COUNT_CHANGE type but no rowCountChange, which is not supported"
|
|
448
|
+
)
|
|
449
|
+
row_count_change = assertion.info.rowCountChange
|
|
450
|
+
operator = VolumeAssertionOperator(row_count_change.operator)
|
|
451
|
+
parameters = VolumeAssertionDefinition._extract_volume_parameters(
|
|
452
|
+
str(assertion.urn), operator, row_count_change.parameters
|
|
453
|
+
)
|
|
454
|
+
kind: Literal["absolute", "percent"] = (
|
|
455
|
+
"absolute"
|
|
456
|
+
if row_count_change.type == models.AssertionValueChangeTypeClass.ABSOLUTE
|
|
457
|
+
else "percent"
|
|
458
|
+
)
|
|
459
|
+
return RowCountChange(operator=operator, parameters=parameters, kind=kind)
|
|
460
|
+
|
|
461
|
+
@staticmethod
|
|
462
|
+
def from_assertion(assertion: Assertion) -> _VolumeAssertionDefinitionTypes:
|
|
463
|
+
"""Create a volume assertion definition from a DataHub assertion entity.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
assertion: The DataHub assertion entity to extract the definition from.
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
A volume assertion definition object (RowCountTotal or RowCountChange).
|
|
470
|
+
|
|
471
|
+
Raises:
|
|
472
|
+
SDKNotYetSupportedError: If the assertion is not a volume assertion or has
|
|
473
|
+
unsupported configuration.
|
|
474
|
+
"""
|
|
475
|
+
if assertion.info is None:
|
|
476
|
+
raise SDKNotYetSupportedError(
|
|
477
|
+
f"Assertion {assertion.urn} does not have a volume assertion info, which is not supported"
|
|
478
|
+
)
|
|
479
|
+
if not isinstance(assertion.info, models.VolumeAssertionInfoClass):
|
|
480
|
+
raise SDKNotYetSupportedError(
|
|
481
|
+
f"Assertion {assertion.urn} is not a volume assertion"
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
if assertion.info.type == models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL:
|
|
485
|
+
return VolumeAssertionDefinition._get_row_count_total(assertion)
|
|
486
|
+
elif assertion.info.type == models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE:
|
|
487
|
+
return VolumeAssertionDefinition._get_row_count_change(assertion)
|
|
488
|
+
else:
|
|
489
|
+
raise SDKNotYetSupportedError(
|
|
490
|
+
f"Volume assertion {assertion.urn} has unsupported type {assertion.info.type}"
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
class _VolumeAssertionInput(_AssertionInput):
|
|
495
|
+
def __init__(
|
|
496
|
+
self,
|
|
497
|
+
*,
|
|
498
|
+
# Required fields
|
|
499
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
500
|
+
entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
|
|
501
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
502
|
+
# Optional fields
|
|
503
|
+
display_name: Optional[str] = None,
|
|
504
|
+
enabled: bool = True,
|
|
505
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
506
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
507
|
+
incident_behavior: Optional[
|
|
508
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
509
|
+
] = None,
|
|
510
|
+
tags: Optional[TagsInputType] = None,
|
|
511
|
+
created_by: Union[str, CorpUserUrn],
|
|
512
|
+
created_at: datetime,
|
|
513
|
+
updated_by: Union[str, CorpUserUrn],
|
|
514
|
+
updated_at: datetime,
|
|
515
|
+
# volume assertion fields
|
|
516
|
+
definition: Optional[
|
|
517
|
+
VolumeAssertionDefinitionInputTypes
|
|
518
|
+
] = None, # TBC: default value does not make sense
|
|
519
|
+
):
|
|
520
|
+
_AssertionInput.__init__(
|
|
521
|
+
self,
|
|
522
|
+
dataset_urn=dataset_urn,
|
|
523
|
+
entity_client=entity_client,
|
|
524
|
+
urn=urn,
|
|
525
|
+
display_name=display_name,
|
|
526
|
+
enabled=enabled,
|
|
527
|
+
schedule=schedule,
|
|
528
|
+
detection_mechanism=detection_mechanism,
|
|
529
|
+
incident_behavior=incident_behavior,
|
|
530
|
+
tags=tags,
|
|
531
|
+
source_type=models.AssertionSourceTypeClass.NATIVE, # Native assertions are of type native, not inferred
|
|
532
|
+
created_by=created_by,
|
|
533
|
+
created_at=created_at,
|
|
534
|
+
updated_by=updated_by,
|
|
535
|
+
updated_at=updated_at,
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
if definition is None:
|
|
539
|
+
raise SDKUsageError("Volume assertion definition is required")
|
|
540
|
+
self.definition = VolumeAssertionDefinition.parse(definition)
|
|
541
|
+
|
|
542
|
+
def _assertion_type(self) -> str:
|
|
543
|
+
return models.AssertionTypeClass.VOLUME
|
|
544
|
+
|
|
545
|
+
def _create_assertion_info(
|
|
546
|
+
self, filter: Optional[models.DatasetFilterClass]
|
|
547
|
+
) -> AssertionInfoInputType:
|
|
548
|
+
"""
|
|
549
|
+
Create a VolumeAssertionInfoClass for a volume assertion.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
filter: Optional filter to apply to the assertion.
|
|
553
|
+
|
|
554
|
+
Returns:
|
|
555
|
+
A VolumeAssertionInfoClass configured for volume assertions.
|
|
556
|
+
"""
|
|
557
|
+
return VolumeAssertionDefinition.build_model_volume_info(
|
|
558
|
+
self.definition, str(self.dataset_urn), filter
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
def _create_monitor_info(
|
|
562
|
+
self,
|
|
563
|
+
assertion_urn: AssertionUrn,
|
|
564
|
+
status: models.MonitorStatusClass,
|
|
565
|
+
schedule: models.CronScheduleClass,
|
|
566
|
+
) -> models.MonitorInfoClass:
|
|
567
|
+
"""
|
|
568
|
+
Create a MonitorInfoClass with all the necessary components.
|
|
569
|
+
"""
|
|
570
|
+
source_type, field = self._convert_assertion_source_type_and_field()
|
|
571
|
+
return models.MonitorInfoClass(
|
|
572
|
+
type=models.MonitorTypeClass.ASSERTION,
|
|
573
|
+
status=status,
|
|
574
|
+
assertionMonitor=models.AssertionMonitorClass(
|
|
575
|
+
assertions=[
|
|
576
|
+
models.AssertionEvaluationSpecClass(
|
|
577
|
+
assertion=str(assertion_urn),
|
|
578
|
+
schedule=schedule,
|
|
579
|
+
parameters=self._get_assertion_evaluation_parameters(
|
|
580
|
+
str(source_type), field
|
|
581
|
+
),
|
|
582
|
+
)
|
|
583
|
+
]
|
|
584
|
+
),
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
def _convert_schedule(self) -> models.CronScheduleClass:
|
|
588
|
+
"""Create a schedule for a volume assertion.
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
A CronScheduleClass with appropriate schedule settings.
|
|
592
|
+
"""
|
|
593
|
+
if self.schedule is None:
|
|
594
|
+
return DEFAULT_EVERY_SIX_HOURS_SCHEDULE
|
|
595
|
+
|
|
596
|
+
return models.CronScheduleClass(
|
|
597
|
+
cron=self.schedule.cron,
|
|
598
|
+
timezone=self.schedule.timezone,
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
def _get_assertion_evaluation_parameters(
|
|
602
|
+
self, source_type: str, field: Optional[FieldSpecType]
|
|
603
|
+
) -> models.AssertionEvaluationParametersClass:
|
|
604
|
+
return models.AssertionEvaluationParametersClass(
|
|
605
|
+
type=models.AssertionEvaluationParametersTypeClass.DATASET_VOLUME,
|
|
606
|
+
datasetVolumeParameters=models.DatasetVolumeAssertionParametersClass(
|
|
607
|
+
sourceType=source_type
|
|
608
|
+
),
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
def _convert_assertion_source_type_and_field(
|
|
612
|
+
self,
|
|
613
|
+
) -> tuple[str, Optional[FieldSpecType]]:
|
|
614
|
+
"""Convert the detection mechanism to source type and field."""
|
|
615
|
+
default_source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
|
|
616
|
+
|
|
617
|
+
if self.detection_mechanism is None:
|
|
618
|
+
return default_source_type, None
|
|
619
|
+
|
|
620
|
+
# Convert detection mechanism to volume source type
|
|
621
|
+
if isinstance(self.detection_mechanism, str):
|
|
622
|
+
if self.detection_mechanism == "information_schema":
|
|
623
|
+
return models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA, None
|
|
624
|
+
elif self.detection_mechanism == "datahub_operation":
|
|
625
|
+
return models.DatasetVolumeSourceTypeClass.OPERATION, None
|
|
626
|
+
else:
|
|
627
|
+
return default_source_type, None
|
|
628
|
+
|
|
629
|
+
# For more complex detection mechanisms, we might need additional logic
|
|
630
|
+
return default_source_type, None
|