acryl-datahub-cloud 0.3.12.1rc3__py3-none-any.whl → 0.3.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -1,4 +1,3 @@
1
- from abc import ABC
2
1
  from datetime import datetime
3
2
  from enum import Enum
4
3
  from typing import Any, Optional, Tuple, Union
@@ -11,6 +10,9 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
11
10
  DetectionMechanismInputTypes,
12
11
  FieldSpecType,
13
12
  _AssertionInput,
13
+ _DatasetProfile,
14
+ _InformationSchema,
15
+ _Query,
14
16
  )
15
17
  from acryl_datahub_cloud.sdk.entities.assertion import (
16
18
  Assertion,
@@ -26,354 +28,426 @@ from datahub.metadata.urns import (
26
28
  )
27
29
  from datahub.sdk.entity_client import EntityClient
28
30
 
29
- # TODO: better naming for "volume assertion definition"
31
+ # Type aliases and enums for volume assertions
30
32
 
31
33
 
32
- # Type aliases and enums for volume assertions
34
+ class VolumeAssertionCondition(Enum):
35
+ """Valid conditions for volume assertions combining type, operator, and change kind."""
33
36
 
37
+ # Row count total conditions
38
+ ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO = "ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO" # models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL + models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
39
+ ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO = "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO" # models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL + models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
40
+ ROW_COUNT_IS_WITHIN_A_RANGE = "ROW_COUNT_IS_WITHIN_A_RANGE" # models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL + models.AssertionStdOperatorClass.BETWEEN
34
41
 
35
- class VolumeAssertionDefinitionType(str, Enum):
36
- ROW_COUNT_TOTAL = models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL
37
- ROW_COUNT_CHANGE = models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE
42
+ # Row count change conditions - absolute
43
+ ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE = "ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE" # models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE + models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO + models.AssertionValueChangeTypeClass.ABSOLUTE
44
+ ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE = "ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE" # models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE + models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO + models.AssertionValueChangeTypeClass.ABSOLUTE
45
+ ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE = "ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE" # models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE + models.AssertionStdOperatorClass.BETWEEN + models.AssertionValueChangeTypeClass.ABSOLUTE
38
46
 
39
-
40
- # Currently supported volume assertion definition types
41
- CURRENTLY_SUPPORTED_VOLUME_ASSERTION_DEFINITIONS = [
42
- VolumeAssertionDefinitionType.ROW_COUNT_TOTAL,
43
- VolumeAssertionDefinitionType.ROW_COUNT_CHANGE,
44
- ]
45
-
46
-
47
- class VolumeAssertionDefinitionChangeKind(str, Enum):
48
- ABSOLUTE = models.AssertionValueChangeTypeClass.ABSOLUTE
49
- PERCENTAGE = models.AssertionValueChangeTypeClass.PERCENTAGE
47
+ # Row count change conditions - percentage
48
+ ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE = "ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE" # models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE + models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO + models.AssertionValueChangeTypeClass.PERCENTAGE
49
+ ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE = "ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE" # models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE + models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO + models.AssertionValueChangeTypeClass.PERCENTAGE
50
+ ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE = "ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE" # models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE + models.AssertionStdOperatorClass.BETWEEN + models.AssertionValueChangeTypeClass.PERCENTAGE
50
51
 
51
52
 
52
53
  VolumeAssertionDefinitionParameters = Union[float, Tuple[float, float]]
53
54
 
55
+ VolumeAssertionCriteriaInputTypes = Union[dict[str, Any], "VolumeAssertionCriteria"]
54
56
 
55
- class VolumeAssertionOperator(str, Enum):
56
- """Valid operators for volume assertions."""
57
-
58
- LESS_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
59
- GREATER_THAN_OR_EQUAL_TO = models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
60
- BETWEEN = models.AssertionStdOperatorClass.BETWEEN
61
57
 
62
-
63
- class _AbstractVolumeAssertionDefinition(BaseModel, ABC):
64
- type: str
58
+ class VolumeAssertionCriteria(BaseModel):
59
+ condition: VolumeAssertionCondition
60
+ parameters: VolumeAssertionDefinitionParameters
65
61
 
66
62
  class Config:
67
63
  extra = Extra.forbid
68
64
 
65
+ @staticmethod
66
+ def parse(criteria: VolumeAssertionCriteriaInputTypes) -> "VolumeAssertionCriteria":
67
+ """Parse and validate volume assertion criteria.
69
68
 
70
- class RowCountTotal(_AbstractVolumeAssertionDefinition):
71
- type: VolumeAssertionDefinitionType = VolumeAssertionDefinitionType.ROW_COUNT_TOTAL
72
- operator: VolumeAssertionOperator
73
- parameters: VolumeAssertionDefinitionParameters
74
-
75
-
76
- class RowCountChange(_AbstractVolumeAssertionDefinition):
77
- type: VolumeAssertionDefinitionType = VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
78
- kind: VolumeAssertionDefinitionChangeKind
79
- operator: VolumeAssertionOperator
80
- parameters: VolumeAssertionDefinitionParameters
69
+ This method converts dictionary-based volume assertion criteria into typed volume
70
+ assertion objects, or validates already instantiated volume assertion objects. It
71
+ supports nine volume assertion conditions covering both total row count checks and
72
+ row count change monitoring with absolute or percentage thresholds.
81
73
 
74
+ Args:
75
+ criteria: A volume assertion criteria that can be either:
76
+ - A dictionary containing volume assertion configuration with keys:
77
+ - condition: Must be one of the VolumeAssertionCondition enum values:
78
+ - "ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO": Total row count threshold (upper bound)
79
+ - "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO": Total row count threshold (lower bound)
80
+ - "ROW_COUNT_IS_WITHIN_A_RANGE": Total row count within specified range
81
+ - "ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE": Row count change upper bound (absolute)
82
+ - "ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE": Row count change lower bound (absolute)
83
+ - "ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE": Row count change within range (absolute)
84
+ - "ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE": Row count change upper bound (percentage)
85
+ - "ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE": Row count change lower bound (percentage)
86
+ - "ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE": Row count change within range (percentage)
87
+ - parameters: Numeric threshold(s) for the condition:
88
+ - Single number (int/float) for single-bound conditions
89
+ - Tuple of two numbers for range conditions (WITHIN_A_RANGE)
90
+ - An already instantiated VolumeAssertionCriteria object
82
91
 
83
- _VOLUME_ASSERTION_DEFINITION_CONCRETE_TYPES = (
84
- RowCountTotal,
85
- RowCountChange,
86
- )
87
- _VolumeAssertionDefinitionTypes = Union[
88
- RowCountTotal,
89
- RowCountChange,
90
- ]
92
+ Returns:
93
+ A validated VolumeAssertionCriteria object with the specified condition and parameters.
91
94
 
92
- VolumeAssertionDefinitionInputTypes = Union[
93
- dict[str, Any], _VolumeAssertionDefinitionTypes
94
- ]
95
+ Raises:
96
+ SDKUsageError: If the criteria is invalid, including:
97
+ - Invalid input type (not dict or VolumeAssertionCriteria object)
98
+ - Missing required fields (condition or parameters)
99
+ - Invalid condition value (not in VolumeAssertionCondition enum)
100
+ - Invalid parameter structure for condition:
101
+ - Single-bound conditions require a single number
102
+ - Range conditions require a tuple of two numbers
103
+ - Parameters must be numeric (int or float)
104
+ - Parameter validation failures (negative values, invalid ranges)
95
105
 
106
+ Examples:
107
+ Parse a total row count assertion with single threshold:
108
+ >>> criteria = {
109
+ ... "condition": "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO",
110
+ ... "parameters": 100
111
+ ... }
112
+ >>> result = VolumeAssertionCriteria.parse(criteria)
113
+ >>> result.condition.value
114
+ "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO"
115
+
116
+ Parse a row count change assertion with range:
117
+ >>> criteria = {
118
+ ... "condition": "ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE",
119
+ ... "parameters": (10.0, 50.0)
120
+ ... }
121
+ >>> result = VolumeAssertionCriteria.parse(criteria)
122
+ >>> result.parameters
123
+ (10.0, 50.0)
96
124
 
97
- class VolumeAssertionDefinition:
98
- ROW_COUNT_TOTAL = RowCountTotal
99
- ROW_COUNT_CHANGE = RowCountChange
125
+ Parse an already instantiated object:
126
+ >>> obj = VolumeAssertionCriteria(
127
+ ... condition=VolumeAssertionCondition.ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO,
128
+ ... parameters=200
129
+ ... )
130
+ >>> result = VolumeAssertionCriteria.parse(obj)
131
+ >>> result == obj
132
+ True
133
+ """
134
+ if isinstance(criteria, VolumeAssertionCriteria):
135
+ return criteria
100
136
 
101
- @staticmethod
102
- def _validate_between_parameters(
103
- parameters: VolumeAssertionDefinitionParameters, assertion_type: str
104
- ) -> None:
105
- """Validate parameters for BETWEEN operator."""
106
- if not isinstance(parameters, tuple) or len(parameters) != 2:
107
- raise SDKUsageError(
108
- f"For BETWEEN operator in {assertion_type}, parameters must be a tuple of two numbers (min_value, max_value)."
109
- )
137
+ if isinstance(criteria, dict):
138
+ condition = criteria.get("condition")
139
+ parameters = criteria.get("parameters")
110
140
 
111
- @staticmethod
112
- def _validate_single_value_parameters(
113
- parameters: VolumeAssertionDefinitionParameters,
114
- operator_enum: VolumeAssertionOperator,
115
- assertion_type: str,
116
- ) -> None:
117
- """Validate parameters for single-value operators."""
118
- if not isinstance(parameters, (int, float)):
119
- if isinstance(parameters, tuple):
141
+ if condition is None:
120
142
  raise SDKUsageError(
121
- f"For {operator_enum.value} operator in {assertion_type}, parameters must be a single number, not a tuple."
143
+ "Volume assertion criteria must include a 'condition' field"
122
144
  )
123
- else:
145
+ if parameters is None:
124
146
  raise SDKUsageError(
125
- f"For {operator_enum.value} operator in {assertion_type}, parameters must be a single number."
147
+ "Volume assertion criteria must include a 'parameters' field"
126
148
  )
127
149
 
150
+ # Validate condition and parameters compatibility
151
+ VolumeAssertionCriteria._validate_condition_and_parameters(
152
+ condition, parameters
153
+ )
154
+
155
+ return VolumeAssertionCriteria(condition=condition, parameters=parameters)
156
+
157
+ raise SDKUsageError(
158
+ f"Volume assertion criteria must be a dict or VolumeAssertionCriteria object, got: {type(criteria)}"
159
+ )
160
+
128
161
  @staticmethod
129
- def _parse_operator(
130
- operator: Union[str, VolumeAssertionOperator],
131
- ) -> VolumeAssertionOperator:
132
- """Parse and validate operator input, converting string to enum if needed."""
133
- if isinstance(operator, str):
134
- try:
135
- return VolumeAssertionOperator(operator)
136
- except ValueError as e:
137
- valid_operators = ", ".join(
138
- [op.value for op in VolumeAssertionOperator]
139
- )
140
- raise SDKUsageError(
141
- f"Invalid operator '{operator}'. Valid operators: {valid_operators}"
142
- ) from e
143
- return operator
162
+ def build_model_volume_info(
163
+ criteria: "VolumeAssertionCriteria",
164
+ dataset_urn: str,
165
+ filter: Optional[models.DatasetFilterClass] = None,
166
+ ) -> models.VolumeAssertionInfoClass:
167
+ """Build a DataHub VolumeAssertionInfoClass from volume assertion criteria."""
168
+ condition = criteria.condition
169
+ parameters = criteria.parameters
170
+
171
+ # Convert condition to DataHub models based on condition type
172
+ if condition.value.startswith("ROW_COUNT_IS_"):
173
+ volume_info = VolumeAssertionCriteria._build_row_count_total_info(
174
+ condition, parameters, dataset_urn
175
+ )
176
+ elif condition.value.startswith("ROW_COUNT_GROWS_"):
177
+ volume_info = VolumeAssertionCriteria._build_row_count_change_info(
178
+ condition, parameters, dataset_urn
179
+ )
180
+ else:
181
+ raise SDKUsageError(f"Unsupported volume assertion condition: {condition}")
182
+
183
+ if filter is not None:
184
+ volume_info.filter = filter
185
+ return volume_info
144
186
 
145
187
  @staticmethod
146
- def _validate_operator_and_parameters(
147
- operator: Union[str, VolumeAssertionOperator],
188
+ def _build_row_count_total_info(
189
+ condition: VolumeAssertionCondition,
148
190
  parameters: VolumeAssertionDefinitionParameters,
149
- assertion_type: str,
150
- ) -> None:
151
- """Validate that operator and parameters are compatible for volume assertions."""
152
- operator_enum = VolumeAssertionDefinition._parse_operator(operator)
191
+ dataset_urn: str,
192
+ ) -> models.VolumeAssertionInfoClass:
193
+ """Build VolumeAssertionInfoClass for row count total assertions."""
194
+ if condition.value.endswith("_WITHIN_A_RANGE"):
195
+ operator = models.AssertionStdOperatorClass.BETWEEN
196
+ elif condition.value.endswith("_LESS_THAN_OR_EQUAL_TO"):
197
+ operator = models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
198
+ elif condition.value.endswith("_GREATER_THAN_OR_EQUAL_TO"):
199
+ operator = models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
200
+ else:
201
+ raise SDKUsageError(f"Unknown row count condition: {condition}")
202
+
203
+ return models.VolumeAssertionInfoClass(
204
+ type=models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL,
205
+ entity=dataset_urn,
206
+ rowCountTotal=models.RowCountTotalClass(
207
+ operator=operator,
208
+ parameters=VolumeAssertionCriteria._build_assertion_parameters(
209
+ operator, parameters
210
+ ),
211
+ ),
212
+ )
153
213
 
154
- # Validate parameter structure based on operator
155
- if operator_enum == VolumeAssertionOperator.BETWEEN:
156
- VolumeAssertionDefinition._validate_between_parameters(
157
- parameters, assertion_type
158
- )
214
+ @staticmethod
215
+ def _build_row_count_change_info(
216
+ condition: VolumeAssertionCondition,
217
+ parameters: VolumeAssertionDefinitionParameters,
218
+ dataset_urn: str,
219
+ ) -> models.VolumeAssertionInfoClass:
220
+ """Build VolumeAssertionInfoClass for row count change assertions."""
221
+ # Determine operator
222
+ if condition.value.endswith(
223
+ "_WITHIN_A_RANGE_ABSOLUTE"
224
+ ) or condition.value.endswith("_WITHIN_A_RANGE_PERCENTAGE"):
225
+ operator = models.AssertionStdOperatorClass.BETWEEN
226
+ elif condition.value.endswith("_AT_MOST_ABSOLUTE") or condition.value.endswith(
227
+ "_AT_MOST_PERCENTAGE"
228
+ ):
229
+ operator = models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO
230
+ elif condition.value.endswith("_AT_LEAST_ABSOLUTE") or condition.value.endswith(
231
+ "_AT_LEAST_PERCENTAGE"
232
+ ):
233
+ operator = models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO
159
234
  else:
160
- VolumeAssertionDefinition._validate_single_value_parameters(
161
- parameters, operator_enum, assertion_type
235
+ raise SDKUsageError(f"Unknown row count change condition: {condition}")
236
+
237
+ # Determine change type
238
+ if condition.value.endswith("_ABSOLUTE") or condition.value.endswith(
239
+ "_ABSOLUTE_WITHIN_A_RANGE"
240
+ ):
241
+ change_type = models.AssertionValueChangeTypeClass.ABSOLUTE
242
+ elif condition.value.endswith("_PERCENTAGE") or condition.value.endswith(
243
+ "_PERCENTAGE_WITHIN_A_RANGE"
244
+ ):
245
+ change_type = models.AssertionValueChangeTypeClass.PERCENTAGE
246
+ else:
247
+ raise SDKUsageError(
248
+ f"Cannot determine change type for condition: {condition}"
162
249
  )
163
250
 
164
- @staticmethod
165
- def _parse_instantiated_object(
166
- definition: _VolumeAssertionDefinitionTypes,
167
- ) -> _VolumeAssertionDefinitionTypes:
168
- """Parse and validate already instantiated volume assertion objects."""
169
- VolumeAssertionDefinition._validate_operator_and_parameters(
170
- definition.operator, definition.parameters, definition.type
251
+ return models.VolumeAssertionInfoClass(
252
+ type=models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE,
253
+ entity=dataset_urn,
254
+ rowCountChange=models.RowCountChangeClass(
255
+ type=change_type,
256
+ operator=operator,
257
+ parameters=VolumeAssertionCriteria._build_assertion_parameters(
258
+ operator, parameters
259
+ ),
260
+ ),
171
261
  )
172
- return definition
173
262
 
174
263
  @staticmethod
175
- def _parse_dict_definition(
176
- definition_dict: dict[str, Any],
177
- ) -> _VolumeAssertionDefinitionTypes:
178
- """Parse and validate dictionary-based volume assertion definitions."""
179
- try:
180
- assertion_type = definition_dict.pop("type")
181
- except KeyError as e:
182
- raise SDKUsageError(
183
- "Volume assertion definition must include a 'type' field"
184
- ) from e
264
+ def from_assertion(assertion: Assertion) -> "VolumeAssertionCriteria":
265
+ """Create volume assertion criteria from a DataHub assertion entity."""
266
+ VolumeAssertionCriteria._validate_assertion_info(assertion)
185
267
 
186
- # Check for valid assertion type first
187
- if assertion_type not in CURRENTLY_SUPPORTED_VOLUME_ASSERTION_DEFINITIONS:
188
- supported_types = ", ".join(
189
- [t.value for t in CURRENTLY_SUPPORTED_VOLUME_ASSERTION_DEFINITIONS]
190
- )
191
- raise SDKUsageError(
192
- f"Unknown volume assertion type: {assertion_type}. Supported types: {supported_types}"
193
- )
268
+ # Type narrowing: we know assertion.info is VolumeAssertionInfoClass after validation
269
+ assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
194
270
 
195
- # Extract operator and parameters for validation
196
- operator = definition_dict.get("operator")
197
- parameters = definition_dict.get("parameters")
271
+ if assertion.info.type == models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL:
272
+ return VolumeAssertionCriteria._extract_row_count_total_criteria(assertion)
273
+ elif assertion.info.type == models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE:
274
+ return VolumeAssertionCriteria._extract_row_count_change_criteria(assertion)
275
+ else:
276
+ raise SDKNotYetSupportedError(
277
+ f"Unsupported volume assertion type: {assertion.info.type}"
278
+ )
198
279
 
199
- if operator is None:
200
- raise SDKUsageError(
201
- f"Missing required 'operator' field for {assertion_type}"
280
+ @staticmethod
281
+ def _validate_assertion_info(assertion: Assertion) -> None:
282
+ """Validate that assertion has valid volume assertion info."""
283
+ if assertion.info is None:
284
+ raise SDKNotYetSupportedError(
285
+ f"Assertion {assertion.urn} does not have a volume assertion info, which is not supported"
202
286
  )
203
- if parameters is None:
204
- raise SDKUsageError(
205
- f"Missing required 'parameters' field for {assertion_type}"
287
+ if not isinstance(assertion.info, models.VolumeAssertionInfoClass):
288
+ raise SDKNotYetSupportedError(
289
+ f"Assertion {assertion.urn} is not a volume assertion"
206
290
  )
207
291
 
208
- # Validate basic parameter type first
209
- if not isinstance(parameters, (int, float, tuple)):
210
- raise SDKUsageError(
211
- f"For {assertion_type}, parameters must be a number or a tuple of two numbers, got: {type(parameters)}"
292
+ @staticmethod
293
+ def _extract_row_count_total_criteria(
294
+ assertion: Assertion,
295
+ ) -> "VolumeAssertionCriteria":
296
+ """Extract criteria from row count total assertion."""
297
+ # Type narrowing: we know assertion.info is VolumeAssertionInfoClass
298
+ assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
299
+
300
+ if assertion.info.rowCountTotal is None:
301
+ raise SDKNotYetSupportedError(
302
+ f"Volume assertion {assertion.urn} has ROW_COUNT_TOTAL type but no rowCountTotal"
212
303
  )
213
304
 
214
- # Validate operator and parameters before object creation
215
- VolumeAssertionDefinition._validate_operator_and_parameters(
216
- operator, parameters, assertion_type
305
+ operator = assertion.info.rowCountTotal.operator
306
+ parameters = VolumeAssertionCriteria._extract_volume_parameters(
307
+ str(assertion.urn), str(operator), assertion.info.rowCountTotal.parameters
217
308
  )
218
309
 
219
- # Convert string operator to enum for object creation
220
- if isinstance(operator, str):
221
- definition_dict["operator"] = VolumeAssertionDefinition._parse_operator(
222
- operator
310
+ if operator == models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO:
311
+ condition = VolumeAssertionCondition.ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO
312
+ elif operator == models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO:
313
+ condition = VolumeAssertionCondition.ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO
314
+ elif operator == models.AssertionStdOperatorClass.BETWEEN:
315
+ condition = VolumeAssertionCondition.ROW_COUNT_IS_WITHIN_A_RANGE
316
+ else:
317
+ raise SDKNotYetSupportedError(
318
+ f"Unsupported operator for row count total: {operator}"
223
319
  )
224
320
 
225
- if assertion_type == VolumeAssertionDefinitionType.ROW_COUNT_TOTAL:
226
- try:
227
- return RowCountTotal(**definition_dict)
228
- except Exception as e:
229
- raise SDKUsageError(
230
- f"Failed to create {VolumeAssertionDefinitionType.ROW_COUNT_TOTAL.value} volume assertion: {str(e)}"
231
- ) from e
232
- else: # assertion_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
233
- try:
234
- return RowCountChange(**definition_dict)
235
- except Exception as e:
236
- raise SDKUsageError(
237
- f"Failed to create {VolumeAssertionDefinitionType.ROW_COUNT_CHANGE.value} volume assertion: {str(e)}"
238
- ) from e
321
+ return VolumeAssertionCriteria(condition=condition, parameters=parameters)
239
322
 
240
323
  @staticmethod
241
- def parse(
242
- definition: VolumeAssertionDefinitionInputTypes,
243
- ) -> _VolumeAssertionDefinitionTypes:
244
- """Parse and validate a volume assertion definition.
245
-
246
- This method converts dictionary-based volume assertion definitions into typed volume
247
- assertion objects, or validates already instantiated volume assertion objects. It
248
- supports two volume assertion types: row_count_total and row_count_change.
249
-
250
- Args:
251
- definition: A volume assertion definition that can be either:
252
- - A dictionary containing volume assertion configuration with keys:
253
- - type: Must be "row_count_total" or "row_count_change"
254
- - operator: Must be "LESS_THAN_OR_EQUAL_TO", "GREATER_THAN_OR_EQUAL_TO", or "BETWEEN"
255
- - parameters: Number for single-value operators, tuple of two numbers for BETWEEN
256
- - kind: Required for "row_count_change", must be "absolute" or "percent"
257
- - An already instantiated RowCountTotal or RowCountChange object
324
+ def _extract_row_count_change_criteria(
325
+ assertion: Assertion,
326
+ ) -> "VolumeAssertionCriteria":
327
+ """Extract criteria from row count change assertion."""
328
+ # Type narrowing: we know assertion.info is VolumeAssertionInfoClass
329
+ assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
258
330
 
259
- Returns:
260
- A validated volume assertion definition object (RowCountTotal or RowCountChange).
331
+ if assertion.info.rowCountChange is None:
332
+ raise SDKNotYetSupportedError(
333
+ f"Volume assertion {assertion.urn} has ROW_COUNT_CHANGE type but no rowCountChange"
334
+ )
261
335
 
262
- Raises:
263
- SDKUsageError: If the definition is invalid, including:
264
- - Invalid input type (not dict or volume assertion object)
265
- - Missing required fields (type, operator, parameters, kind for row_count_change)
266
- - Unknown assertion type (not row_count_total or row_count_change)
267
- - Invalid operator (not in allowed operators)
268
- - Invalid parameter structure for operator:
269
- - Single-value operators require number parameters
270
- - BETWEEN operator requires tuple of two numbers
271
- - Object construction failures (extra fields, validation errors)
336
+ operator = assertion.info.rowCountChange.operator
337
+ change_type = assertion.info.rowCountChange.type
338
+ parameters = VolumeAssertionCriteria._extract_volume_parameters(
339
+ str(assertion.urn), str(operator), assertion.info.rowCountChange.parameters
340
+ )
272
341
 
273
- Examples:
274
- Parse a row count total assertion:
275
- >>> definition = {
276
- ... "type": "row_count_total",
277
- ... "operator": "GREATER_THAN_OR_EQUAL_TO",
278
- ... "parameters": 100
279
- ... }
280
- >>> result = VolumeAssertionDefinition.parse(definition)
281
- >>> isinstance(result, RowCountTotal)
282
- True
342
+ # Determine condition based on operator and change type
343
+ if operator == models.AssertionStdOperatorClass.LESS_THAN_OR_EQUAL_TO:
344
+ condition = (
345
+ VolumeAssertionCondition.ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE
346
+ if change_type == models.AssertionValueChangeTypeClass.ABSOLUTE
347
+ else VolumeAssertionCondition.ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE
348
+ )
349
+ elif operator == models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO:
350
+ condition = (
351
+ VolumeAssertionCondition.ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE
352
+ if change_type == models.AssertionValueChangeTypeClass.ABSOLUTE
353
+ else VolumeAssertionCondition.ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE
354
+ )
355
+ elif operator == models.AssertionStdOperatorClass.BETWEEN:
356
+ condition = (
357
+ VolumeAssertionCondition.ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE
358
+ if change_type == models.AssertionValueChangeTypeClass.ABSOLUTE
359
+ else VolumeAssertionCondition.ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE
360
+ )
361
+ else:
362
+ raise SDKNotYetSupportedError(
363
+ f"Unsupported operator for row count change: {operator}"
364
+ )
283
365
 
284
- Parse a row count change assertion with BETWEEN operator:
285
- >>> definition = {
286
- ... "type": "row_count_change",
287
- ... "kind": "absolute",
288
- ... "operator": "BETWEEN",
289
- ... "parameters": (10, 50)
290
- ... }
291
- >>> result = VolumeAssertionDefinition.parse(definition)
292
- >>> isinstance(result, RowCountChange)
293
- True
366
+ return VolumeAssertionCriteria(condition=condition, parameters=parameters)
294
367
 
295
- Parse an already instantiated object:
296
- >>> obj = RowCountTotal(
297
- ... operator=VolumeAssertionOperator.LESS_THAN_OR_EQUAL_TO,
298
- ... parameters=200
299
- ... )
300
- >>> result = VolumeAssertionDefinition.parse(obj)
301
- >>> result == obj
302
- True
303
- """
304
- # If already instantiated, validate and return
305
- if isinstance(definition, _VOLUME_ASSERTION_DEFINITION_CONCRETE_TYPES):
306
- return VolumeAssertionDefinition._parse_instantiated_object(definition)
368
+ @staticmethod
369
+ def _extract_volume_parameters(
370
+ assertion_urn: str,
371
+ operator: str,
372
+ parameters: models.AssertionStdParametersClass,
373
+ ) -> VolumeAssertionDefinitionParameters:
374
+ """Extract parameters from assertion based on operator type."""
375
+ if operator == "BETWEEN":
376
+ if parameters.minValue is None or parameters.maxValue is None:
377
+ raise SDKNotYetSupportedError(
378
+ f"Volume assertion {assertion_urn} has BETWEEN operator but missing min/max values"
379
+ )
380
+ return (float(parameters.minValue.value), float(parameters.maxValue.value))
381
+ else:
382
+ if parameters.value is None:
383
+ raise SDKNotYetSupportedError(
384
+ f"Volume assertion {assertion_urn} has {operator} operator but missing value"
385
+ )
386
+ return float(parameters.value.value)
307
387
 
308
- if not isinstance(definition, dict):
388
+ @staticmethod
389
+ def _validate_between_parameters(
390
+ parameters: VolumeAssertionDefinitionParameters, condition: str
391
+ ) -> None:
392
+ """Validate parameters for WITHIN_A_RANGE conditions."""
393
+ if not isinstance(parameters, tuple) or len(parameters) != 2:
309
394
  raise SDKUsageError(
310
- f"Volume assertion definition must be a dict or a volume assertion definition object, got: {type(definition)}"
395
+ f"For WITHIN_A_RANGE condition {condition}, parameters must be a tuple of two numbers (min_value, max_value)."
311
396
  )
312
397
 
313
- return VolumeAssertionDefinition._parse_dict_definition(definition.copy())
314
-
315
398
  @staticmethod
316
- def build_model_volume_info(
317
- definition: _VolumeAssertionDefinitionTypes,
318
- dataset_urn: str,
319
- filter: Optional[models.DatasetFilterClass] = None,
320
- ) -> models.VolumeAssertionInfoClass:
321
- """Build a DataHub VolumeAssertionInfoClass from a validated volume assertion definition.
322
-
323
- This method converts validated volume assertion definition objects into DataHub model
324
- classes suitable for creating volume assertions in the DataHub metadata service.
325
-
326
- Args:
327
- definition: A validated volume assertion definition object (RowCountTotal or RowCountChange).
328
- This should be the output of VolumeAssertionDefinition.parse().
329
- dataset_urn: The dataset URN that this assertion applies to.
330
- filter: Optional filter to apply to the assertion.
331
-
332
- Returns:
333
- A VolumeAssertionInfoClass configured for the specific volume assertion type.
399
+ def _validate_single_value_parameters(
400
+ parameters: VolumeAssertionDefinitionParameters,
401
+ condition: str,
402
+ ) -> None:
403
+ """Validate parameters for single-value conditions."""
404
+ if not isinstance(parameters, (int, float)):
405
+ if isinstance(parameters, tuple):
406
+ raise SDKUsageError(
407
+ f"For condition {condition}, parameters must be a single number, not a tuple."
408
+ )
409
+ else:
410
+ raise SDKUsageError(
411
+ f"For condition {condition}, parameters must be a single number."
412
+ )
334
413
 
335
- Raises:
336
- SDKUsageError: If the definition type is not supported.
337
- """
338
- if isinstance(definition, RowCountTotal):
339
- volume_info = models.VolumeAssertionInfoClass(
340
- type=models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL,
341
- entity=dataset_urn,
342
- rowCountTotal=models.RowCountTotalClass(
343
- operator=definition.operator.value,
344
- parameters=VolumeAssertionDefinition._build_assertion_parameters(
345
- definition.operator, definition.parameters
346
- ),
347
- ),
348
- )
349
- if filter is not None:
350
- volume_info.filter = filter
351
- return volume_info
352
- elif isinstance(definition, RowCountChange):
353
- # Map kind to DataHub assertion value change type
354
- change_type = (
355
- models.AssertionValueChangeTypeClass.ABSOLUTE
356
- if definition.kind == VolumeAssertionDefinitionChangeKind.ABSOLUTE
357
- else models.AssertionValueChangeTypeClass.PERCENTAGE
358
- )
414
+ @staticmethod
415
+ def _parse_condition(
416
+ condition: Union[str, VolumeAssertionCondition],
417
+ ) -> VolumeAssertionCondition:
418
+ """Parse and validate condition input, converting string to enum if needed."""
419
+ if isinstance(condition, str):
420
+ try:
421
+ return VolumeAssertionCondition(condition)
422
+ except ValueError as e:
423
+ valid_conditions = ", ".join(
424
+ [cond.value for cond in VolumeAssertionCondition]
425
+ )
426
+ raise SDKUsageError(
427
+ f"Invalid condition '{condition}'. Valid conditions: {valid_conditions}"
428
+ ) from e
429
+ return condition
359
430
 
360
- volume_info = models.VolumeAssertionInfoClass(
361
- type=models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE,
362
- entity=dataset_urn,
363
- rowCountChange=models.RowCountChangeClass(
364
- type=change_type,
365
- operator=definition.operator.value,
366
- parameters=VolumeAssertionDefinition._build_assertion_parameters(
367
- definition.operator, definition.parameters
368
- ),
369
- ),
431
+ @staticmethod
432
+ def _validate_condition_and_parameters(
433
+ condition: Union[str, VolumeAssertionCondition],
434
+ parameters: VolumeAssertionDefinitionParameters,
435
+ ) -> None:
436
+ """Validate that condition and parameters are compatible for volume assertions."""
437
+ condition_enum = VolumeAssertionCriteria._parse_condition(condition)
438
+
439
+ # Validate parameter structure based on condition
440
+ if (
441
+ condition_enum.value.endswith("_WITHIN_A_RANGE")
442
+ or condition_enum.value.endswith("_WITHIN_A_RANGE_ABSOLUTE")
443
+ or condition_enum.value.endswith("_WITHIN_A_RANGE_PERCENTAGE")
444
+ ):
445
+ VolumeAssertionCriteria._validate_between_parameters(
446
+ parameters, condition_enum.value
370
447
  )
371
- if filter is not None:
372
- volume_info.filter = filter
373
- return volume_info
374
448
  else:
375
- raise SDKUsageError(
376
- f"Unsupported volume assertion definition type: {type(definition)}"
449
+ VolumeAssertionCriteria._validate_single_value_parameters(
450
+ parameters, condition_enum.value
377
451
  )
378
452
 
379
453
  @staticmethod
@@ -388,19 +462,19 @@ class VolumeAssertionDefinition:
388
462
 
389
463
  @staticmethod
390
464
  def _build_assertion_parameters(
391
- operator: VolumeAssertionOperator,
465
+ operator: str,
392
466
  parameters: VolumeAssertionDefinitionParameters,
393
467
  ) -> models.AssertionStdParametersClass:
394
468
  """Build assertion parameters for DataHub model classes.
395
469
 
396
470
  Args:
397
- operator: The volume assertion operator.
471
+ operator: The assertion operator (from models.AssertionStdOperatorClass).
398
472
  parameters: The parameters (int for single value, tuple for BETWEEN).
399
473
 
400
474
  Returns:
401
475
  AssertionStdParametersClass with appropriate parameter structure.
402
476
  """
403
- if operator == VolumeAssertionOperator.BETWEEN:
477
+ if operator == models.AssertionStdOperatorClass.BETWEEN:
404
478
  assert isinstance(parameters, tuple) and len(parameters) == 2, (
405
479
  f"BETWEEN operator requires tuple of two numbers, got: {parameters}"
406
480
  )
@@ -408,11 +482,11 @@ class VolumeAssertionDefinition:
408
482
  min_val, max_val = sorted(parameters)
409
483
  return models.AssertionStdParametersClass(
410
484
  minValue=models.AssertionStdParameterClass(
411
- value=VolumeAssertionDefinition._format_number_value(min_val),
485
+ value=VolumeAssertionCriteria._format_number_value(min_val),
412
486
  type=models.AssertionStdParameterTypeClass.NUMBER,
413
487
  ),
414
488
  maxValue=models.AssertionStdParameterClass(
415
- value=VolumeAssertionDefinition._format_number_value(max_val),
489
+ value=VolumeAssertionCriteria._format_number_value(max_val),
416
490
  type=models.AssertionStdParameterTypeClass.NUMBER,
417
491
  ),
418
492
  )
@@ -423,98 +497,11 @@ class VolumeAssertionDefinition:
423
497
  )
424
498
  return models.AssertionStdParametersClass(
425
499
  value=models.AssertionStdParameterClass(
426
- value=VolumeAssertionDefinition._format_number_value(parameters),
500
+ value=VolumeAssertionCriteria._format_number_value(parameters),
427
501
  type=models.AssertionStdParameterTypeClass.NUMBER,
428
502
  ),
429
503
  )
430
504
 
431
- @staticmethod
432
- def _extract_volume_parameters(
433
- assertion_urn: str,
434
- operator: VolumeAssertionOperator,
435
- parameters: models.AssertionStdParametersClass,
436
- ) -> VolumeAssertionDefinitionParameters:
437
- """Extract parameters from assertion based on operator type."""
438
- if operator.value == "BETWEEN":
439
- if parameters.minValue is None or parameters.maxValue is None:
440
- raise SDKNotYetSupportedError(
441
- f"Volume assertion {assertion_urn} has BETWEEN operator but missing min/max values"
442
- )
443
- return (float(parameters.minValue.value), float(parameters.maxValue.value))
444
- else:
445
- if parameters.value is None:
446
- raise SDKNotYetSupportedError(
447
- f"Volume assertion {assertion_urn} has {operator.value} operator but missing value"
448
- )
449
- return float(parameters.value.value)
450
-
451
- @staticmethod
452
- def _get_row_count_total(assertion: Assertion) -> RowCountTotal:
453
- """Extract RowCountTotal from assertion."""
454
- assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
455
- if assertion.info.rowCountTotal is None:
456
- raise SDKNotYetSupportedError(
457
- f"Volume assertion {assertion.urn} has ROW_COUNT_TOTAL type but no rowCountTotal, which is not supported"
458
- )
459
- row_count_total = assertion.info.rowCountTotal
460
- operator = VolumeAssertionOperator(row_count_total.operator)
461
- parameters = VolumeAssertionDefinition._extract_volume_parameters(
462
- str(assertion.urn), operator, row_count_total.parameters
463
- )
464
- return RowCountTotal(operator=operator, parameters=parameters)
465
-
466
- @staticmethod
467
- def _get_row_count_change(assertion: Assertion) -> RowCountChange:
468
- """Extract RowCountChange from assertion."""
469
- assert isinstance(assertion.info, models.VolumeAssertionInfoClass)
470
- if assertion.info.rowCountChange is None:
471
- raise SDKNotYetSupportedError(
472
- f"Volume assertion {assertion.urn} has ROW_COUNT_CHANGE type but no rowCountChange, which is not supported"
473
- )
474
- row_count_change = assertion.info.rowCountChange
475
- operator = VolumeAssertionOperator(row_count_change.operator)
476
- parameters = VolumeAssertionDefinition._extract_volume_parameters(
477
- str(assertion.urn), operator, row_count_change.parameters
478
- )
479
- kind: VolumeAssertionDefinitionChangeKind = (
480
- VolumeAssertionDefinitionChangeKind.ABSOLUTE
481
- if row_count_change.type == models.AssertionValueChangeTypeClass.ABSOLUTE
482
- else VolumeAssertionDefinitionChangeKind.PERCENTAGE
483
- )
484
- return RowCountChange(operator=operator, parameters=parameters, kind=kind)
485
-
486
- @staticmethod
487
- def from_assertion(assertion: Assertion) -> _VolumeAssertionDefinitionTypes:
488
- """Create a volume assertion definition from a DataHub assertion entity.
489
-
490
- Args:
491
- assertion: The DataHub assertion entity to extract the definition from.
492
-
493
- Returns:
494
- A volume assertion definition object (RowCountTotal or RowCountChange).
495
-
496
- Raises:
497
- SDKNotYetSupportedError: If the assertion is not a volume assertion or has
498
- unsupported configuration.
499
- """
500
- if assertion.info is None:
501
- raise SDKNotYetSupportedError(
502
- f"Assertion {assertion.urn} does not have a volume assertion info, which is not supported"
503
- )
504
- if not isinstance(assertion.info, models.VolumeAssertionInfoClass):
505
- raise SDKNotYetSupportedError(
506
- f"Assertion {assertion.urn} is not a volume assertion"
507
- )
508
-
509
- if assertion.info.type == models.VolumeAssertionTypeClass.ROW_COUNT_TOTAL:
510
- return VolumeAssertionDefinition._get_row_count_total(assertion)
511
- elif assertion.info.type == models.VolumeAssertionTypeClass.ROW_COUNT_CHANGE:
512
- return VolumeAssertionDefinition._get_row_count_change(assertion)
513
- else:
514
- raise SDKNotYetSupportedError(
515
- f"Volume assertion {assertion.urn} has unsupported type {assertion.info.type}"
516
- )
517
-
518
505
 
519
506
  class _VolumeAssertionInput(_AssertionInput):
520
507
  def __init__(
@@ -523,7 +510,7 @@ class _VolumeAssertionInput(_AssertionInput):
523
510
  # Required fields
524
511
  dataset_urn: Union[str, DatasetUrn],
525
512
  entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
526
- definition: VolumeAssertionDefinitionInputTypes,
513
+ criteria: VolumeAssertionCriteriaInputTypes,
527
514
  urn: Optional[Union[str, AssertionUrn]] = None,
528
515
  # Optional fields
529
516
  display_name: Optional[str] = None,
@@ -555,7 +542,7 @@ class _VolumeAssertionInput(_AssertionInput):
555
542
  updated_at=updated_at,
556
543
  )
557
544
 
558
- self.definition = VolumeAssertionDefinition.parse(definition)
545
+ self.criteria = VolumeAssertionCriteria.parse(criteria)
559
546
 
560
547
  def _assertion_type(self) -> str:
561
548
  return models.AssertionTypeClass.VOLUME
@@ -572,8 +559,8 @@ class _VolumeAssertionInput(_AssertionInput):
572
559
  Returns:
573
560
  A VolumeAssertionInfoClass configured for volume assertions.
574
561
  """
575
- return VolumeAssertionDefinition.build_model_volume_info(
576
- self.definition, str(self.dataset_urn), filter
562
+ return VolumeAssertionCriteria.build_model_volume_info(
563
+ self.criteria, str(self.dataset_urn), filter
577
564
  )
578
565
 
579
566
  def _create_monitor_info(
@@ -630,19 +617,21 @@ class _VolumeAssertionInput(_AssertionInput):
630
617
  self,
631
618
  ) -> tuple[str, Optional[FieldSpecType]]:
632
619
  """Convert the detection mechanism to source type and field."""
633
- default_source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
620
+ source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
621
+ field = None
634
622
 
635
623
  if self.detection_mechanism is None:
636
- return default_source_type, None
637
-
638
- # Convert detection mechanism to volume source type
639
- if isinstance(self.detection_mechanism, str):
640
- if self.detection_mechanism == "information_schema":
641
- return models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA, None
642
- elif self.detection_mechanism == "datahub_operation":
643
- return models.DatasetVolumeSourceTypeClass.OPERATION, None
644
- else:
645
- return default_source_type, None
624
+ return source_type, field
625
+
626
+ if isinstance(self.detection_mechanism, _Query):
627
+ source_type = models.DatasetVolumeSourceTypeClass.QUERY
628
+ elif isinstance(self.detection_mechanism, _InformationSchema):
629
+ source_type = models.DatasetVolumeSourceTypeClass.INFORMATION_SCHEMA
630
+ elif isinstance(self.detection_mechanism, _DatasetProfile):
631
+ source_type = models.DatasetVolumeSourceTypeClass.DATAHUB_DATASET_PROFILE
632
+ else:
633
+ raise SDKNotYetSupportedError(
634
+ f"Detection mechanism {self.detection_mechanism} not yet supported for volume assertions"
635
+ )
646
636
 
647
- # For more complex detection mechanisms, we might need additional logic
648
- return default_source_type, None
637
+ return source_type, field