acryl-datahub-cloud 0.3.12.1rc2__py3-none-any.whl → 0.3.12.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -1,6 +1,9 @@
1
1
  import json
2
2
  from datetime import datetime
3
- from typing import Optional, Union
3
+ from typing import TYPE_CHECKING, Optional, Union
4
+
5
+ if TYPE_CHECKING:
6
+ pass
4
7
 
5
8
  from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
6
9
  DEFAULT_EVERY_SIX_HOURS_SCHEDULE,
@@ -22,6 +25,19 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
22
25
  _HasSmartAssertionInputs,
23
26
  _try_parse_and_validate_schema_classes_enum,
24
27
  )
28
+ from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
29
+ ALLOWED_COLUMN_TYPES_FOR_COLUMN_METRIC_ASSERTION,
30
+ FIELD_METRIC_TYPE_CONFIG,
31
+ FIELD_VALUES_OPERATOR_CONFIG,
32
+ MetricInputType,
33
+ OperatorType,
34
+ RangeInputType,
35
+ RangeTypeInputType,
36
+ RangeTypeParsedType,
37
+ ValueInputType,
38
+ ValueType,
39
+ ValueTypeInputType,
40
+ )
25
41
  from acryl_datahub_cloud.sdk.entities.assertion import TagsInputType
26
42
  from acryl_datahub_cloud.sdk.errors import (
27
43
  SDKNotYetSupportedError,
@@ -32,129 +48,17 @@ from datahub.metadata import schema_classes as models
32
48
  from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
33
49
  from datahub.sdk.entity_client import EntityClient
34
50
 
35
- # Keep this in sync with the frontend in getEligibleFieldColumns
36
- # datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
37
- ALLOWED_COLUMN_TYPES_FOR_SMART_COLUMN_METRIC_ASSERTION = [
38
- models.StringTypeClass(),
39
- models.NumberTypeClass(),
40
- models.BooleanTypeClass(),
41
- models.DateTypeClass(),
42
- models.TimeTypeClass(),
43
- models.NullTypeClass(),
44
- ]
51
+ # Keep the smart-specific name for backward compatibility
52
+ ALLOWED_COLUMN_TYPES_FOR_SMART_COLUMN_METRIC_ASSERTION = (
53
+ ALLOWED_COLUMN_TYPES_FOR_COLUMN_METRIC_ASSERTION
54
+ )
45
55
 
46
- # Keep this in sync with FIELD_VALUES_OPERATOR_CONFIG in the frontend
47
- # datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
48
- FIELD_VALUES_OPERATOR_CONFIG = {
49
- "StringTypeClass": [
50
- models.AssertionStdOperatorClass.NULL,
51
- models.AssertionStdOperatorClass.NOT_NULL,
52
- models.AssertionStdOperatorClass.EQUAL_TO,
53
- models.AssertionStdOperatorClass.IN,
54
- models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
55
- models.AssertionStdOperatorClass.REGEX_MATCH,
56
- models.AssertionStdOperatorClass.GREATER_THAN,
57
- models.AssertionStdOperatorClass.LESS_THAN,
58
- models.AssertionStdOperatorClass.BETWEEN,
59
- ],
60
- "NumberTypeClass": [
61
- models.AssertionStdOperatorClass.GREATER_THAN,
62
- models.AssertionStdOperatorClass.LESS_THAN,
63
- models.AssertionStdOperatorClass.BETWEEN,
64
- models.AssertionStdOperatorClass.NULL,
65
- models.AssertionStdOperatorClass.NOT_NULL,
66
- models.AssertionStdOperatorClass.EQUAL_TO,
67
- models.AssertionStdOperatorClass.IN,
68
- models.AssertionStdOperatorClass.GREATER_THAN_OR_EQUAL_TO,
69
- models.AssertionStdOperatorClass.NOT_EQUAL_TO,
70
- ],
71
- "BooleanTypeClass": [
72
- models.AssertionStdOperatorClass.IS_TRUE,
73
- models.AssertionStdOperatorClass.IS_FALSE,
74
- models.AssertionStdOperatorClass.NULL,
75
- models.AssertionStdOperatorClass.NOT_NULL,
76
- ],
77
- "DateTypeClass": [
78
- models.AssertionStdOperatorClass.NULL,
79
- models.AssertionStdOperatorClass.NOT_NULL,
80
- ],
81
- "TimeTypeClass": [
82
- models.AssertionStdOperatorClass.NULL,
83
- models.AssertionStdOperatorClass.NOT_NULL,
84
- ],
85
- "NullTypeClass": [
86
- models.AssertionStdOperatorClass.NULL,
87
- models.AssertionStdOperatorClass.NOT_NULL,
88
- ],
89
- }
90
-
91
- # Keep this in sync with FIELD_METRIC_TYPE_CONFIG in the frontend
92
- # datahub-web-react/src/app/entityV2/shared/tabs/Dataset/Validations/assertion/builder/steps/field/utils.ts
93
- FIELD_METRIC_TYPE_CONFIG = {
94
- "StringTypeClass": [
95
- models.FieldMetricTypeClass.NULL_COUNT,
96
- models.FieldMetricTypeClass.NULL_PERCENTAGE,
97
- models.FieldMetricTypeClass.UNIQUE_COUNT,
98
- models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
99
- models.FieldMetricTypeClass.MAX_LENGTH,
100
- models.FieldMetricTypeClass.MIN_LENGTH,
101
- models.FieldMetricTypeClass.EMPTY_COUNT,
102
- models.FieldMetricTypeClass.EMPTY_PERCENTAGE,
103
- ],
104
- "NumberTypeClass": [
105
- models.FieldMetricTypeClass.NULL_COUNT,
106
- models.FieldMetricTypeClass.NULL_PERCENTAGE,
107
- models.FieldMetricTypeClass.UNIQUE_COUNT,
108
- models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
109
- models.FieldMetricTypeClass.MAX,
110
- models.FieldMetricTypeClass.MIN,
111
- models.FieldMetricTypeClass.MEAN,
112
- models.FieldMetricTypeClass.MEDIAN,
113
- models.FieldMetricTypeClass.STDDEV,
114
- models.FieldMetricTypeClass.NEGATIVE_COUNT,
115
- models.FieldMetricTypeClass.NEGATIVE_PERCENTAGE,
116
- models.FieldMetricTypeClass.ZERO_COUNT,
117
- models.FieldMetricTypeClass.ZERO_PERCENTAGE,
118
- ],
119
- "BooleanTypeClass": [
120
- models.FieldMetricTypeClass.NULL_COUNT,
121
- models.FieldMetricTypeClass.NULL_PERCENTAGE,
122
- models.FieldMetricTypeClass.UNIQUE_COUNT,
123
- models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
124
- ],
125
- "DateTypeClass": [
126
- models.FieldMetricTypeClass.NULL_COUNT,
127
- models.FieldMetricTypeClass.NULL_PERCENTAGE,
128
- models.FieldMetricTypeClass.UNIQUE_COUNT,
129
- models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
130
- ],
131
- "TimeTypeClass": [
132
- models.FieldMetricTypeClass.NULL_COUNT,
133
- models.FieldMetricTypeClass.NULL_PERCENTAGE,
134
- models.FieldMetricTypeClass.UNIQUE_COUNT,
135
- models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
136
- ],
137
- "NullTypeClass": [
138
- models.FieldMetricTypeClass.NULL_COUNT,
139
- models.FieldMetricTypeClass.NULL_PERCENTAGE,
140
- models.FieldMetricTypeClass.UNIQUE_COUNT,
141
- models.FieldMetricTypeClass.UNIQUE_PERCENTAGE,
142
- ],
143
- }
144
-
145
-
146
- MetricInputType = Union[models.FieldMetricTypeClass, str]
147
- ValueInputType = Union[str, int, float]
148
- ValueTypeInputType = Union[str, models.AssertionStdParameterTypeClass]
149
- RangeInputType = tuple[ValueInputType, ValueInputType]
150
- RangeTypeInputType = Union[
151
- str,
152
- tuple[str, str],
153
- ValueTypeInputType,
154
- tuple[ValueTypeInputType, ValueTypeInputType],
56
+ # New unified criteria parameters type
57
+ SmartColumnMetricAssertionParameters = Union[
58
+ None, # For operators that don't require parameters (NULL, NOT_NULL)
59
+ ValueInputType, # Single value
60
+ RangeInputType, # Range as tuple
155
61
  ]
156
- RangeTypeParsedType = tuple[ValueTypeInputType, ValueTypeInputType]
157
- OperatorInputType = Union[str, models.AssertionStdOperatorClass]
158
62
 
159
63
  DEFAULT_DETECTION_MECHANISM_SMART_COLUMN_METRIC_ASSERTION: _AllRowsQuery = (
160
64
  _AllRowsQuery()
@@ -166,7 +70,8 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
166
70
  Input used to create a smart column metric assertion.
167
71
 
168
72
  This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
169
- min, max, median, and more. It uses AI to infer the assertion parameters.
73
+ min, max, median, and more. It uses AI to infer the assertion parameters. The operator is fixed to BETWEEN and criteria_parameters
74
+ are set to (0, 0) since the actual values will be inferred by AI.
170
75
 
171
76
  Example using the entity models, not comprehensive for all options:
172
77
 
@@ -187,11 +92,15 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
187
92
  nativeType="string", # The native type of the column
188
93
  ),
189
94
  metric=models.FieldMetricTypeClass.NULL_COUNT_PERCENTAGE, # The metric to validate
190
- operator=models.AssertionStdOperatorClass.GREATER_THAN, # The operator to use
95
+ operator=models.AssertionStdOperatorClass.BETWEEN, # Fixed operator for smart assertions
191
96
  parameters=models.AssertionStdParametersClass(
192
- value=models.AssertionStdParameterClass(
193
- value=10, # The value to validate
194
- type=models.AssertionStdParameterTypeClass.NUMBER, # The type of the value
97
+ minValue=models.AssertionStdParameterClass(
98
+ value="0", # Fixed min value for smart assertions
99
+ type=models.AssertionStdParameterTypeClass.NUMBER,
100
+ ),
101
+ maxValue=models.AssertionStdParameterClass(
102
+ value="0", # Fixed max value for smart assertions
103
+ type=models.AssertionStdParameterTypeClass.NUMBER,
195
104
  ),
196
105
  ),
197
106
  ),
@@ -268,12 +177,6 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
268
177
  entity_client: EntityClient,
269
178
  column_name: str,
270
179
  metric_type: MetricInputType,
271
- operator: OperatorInputType,
272
- # Optional parameters
273
- value: Optional[ValueInputType] = None,
274
- value_type: Optional[ValueTypeInputType] = None,
275
- range: Optional[RangeInputType] = None,
276
- range_type: Optional[RangeTypeInputType] = None,
277
180
  urn: Optional[Union[str, AssertionUrn]] = None,
278
181
  display_name: Optional[str] = None,
279
182
  enabled: bool = True,
@@ -297,11 +200,6 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
297
200
  entity_client: The entity client.
298
201
  column_name: The name of the column to validate.
299
202
  metric_type: The metric type to validate.
300
- operator: The operator to use.
301
- value: The value to validate.
302
- value_type: The type of the value.
303
- range: The range to validate.
304
- range_type: The type of the range. If single value, we assume the same type for start and end.
305
203
  urn: The urn of the assertion.
306
204
  display_name: The display name of the assertion.
307
205
  enabled: Whether the assertion is enabled.
@@ -350,65 +248,253 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
350
248
  self.column_name = self._try_parse_and_validate_column_name_is_valid_type(
351
249
  column_name
352
250
  )
251
+
252
+ # Smart assertions use fixed operator and criteria_parameters since they are inferred by AI
353
253
  self.operator = _try_parse_and_validate_schema_classes_enum(
354
- operator, models.AssertionStdOperatorClass
254
+ OperatorType.BETWEEN, models.AssertionStdOperatorClass
355
255
  )
356
256
 
357
- # Set type annotations for both raw input or parsed parameters
358
- self.value_type: Optional[ValueTypeInputType] = None
359
- self.value: Optional[ValueInputType] = None
360
- if _is_value_required_for_operator(self.operator):
361
- self.value_type = _try_parse_and_validate_value_type(value_type)
362
- self.value = _try_parse_and_validate_value(value, self.value_type)
257
+ # Initialize instance variables with fixed values for smart assertions
258
+ self.criteria_parameters: Optional[SmartColumnMetricAssertionParameters] = (
259
+ 0,
260
+ 0,
261
+ )
262
+ self.criteria_type: Optional[Union[ValueTypeInputType, RangeTypeInputType]] = (
263
+ ValueType.NUMBER,
264
+ ValueType.NUMBER,
265
+ )
266
+
267
+ # Validate compatibility:
268
+ # Skip operator validation for smart assertions since operator is a placeholder (AI inferred)
269
+ # Only validate metric type compatibility
270
+ self._validate_field_type_and_metric_type_compatibility(
271
+ self.column_name, self.metric_type
272
+ )
273
+
274
+ def _infer_criteria_type_from_parameters(
275
+ self,
276
+ criteria_parameters: Optional[SmartColumnMetricAssertionParameters],
277
+ ) -> Optional[Union[ValueTypeInputType, RangeTypeInputType]]:
278
+ """
279
+ Infer the criteria type from the parameters based on Python types.
280
+
281
+ Args:
282
+ criteria_parameters: The criteria parameters to infer type from.
283
+
284
+ Returns:
285
+ The inferred type(s) for the criteria parameters.
286
+ """
287
+ if criteria_parameters is None:
288
+ return None
289
+
290
+ if isinstance(criteria_parameters, tuple):
291
+ # Range parameters - infer type for each value
292
+ if len(criteria_parameters) != 2:
293
+ raise SDKUsageError(
294
+ "Range parameters must be a tuple of exactly 2 values"
295
+ )
296
+
297
+ type1 = self._infer_single_value_type(criteria_parameters[0])
298
+ type2 = self._infer_single_value_type(criteria_parameters[1])
299
+ return (type1, type2)
363
300
  else:
364
- # Set these to what was input for later validation, and skip parsing and validation
365
- self.value_type = value_type
366
- self.value = value
367
-
368
- # Set type annotations for both raw input or parsed parameters
369
- self.range_type: Optional[Union[RangeTypeInputType, RangeTypeParsedType]] = None
370
- self.range: Optional[RangeInputType] = None
371
- if _is_range_required_for_operator(self.operator):
372
- self.range_type = _try_parse_and_validate_range_type(range_type)
373
- self.range = _try_parse_and_validate_range(
374
- range, self.range_type, self.operator
375
- )
301
+ # Single value parameter
302
+ return self._infer_single_value_type(criteria_parameters)
303
+
304
+ def _infer_single_value_type(self, value: ValueInputType) -> ValueTypeInputType:
305
+ """
306
+ Infer the type of a single value based on its Python type.
307
+
308
+ Args:
309
+ value: The value to infer type from.
310
+
311
+ Returns:
312
+ The inferred ValueType.
313
+ """
314
+ if isinstance(value, (int, float)):
315
+ return ValueType.NUMBER
316
+ elif isinstance(value, str):
317
+ return ValueType.STRING
376
318
  else:
377
- # Set these to what was input for later validation, and skip parsing and validation
378
- self.range_type = range_type
379
- self.range = range
319
+ # Default fallback
320
+ return ValueType.UNKNOWN
380
321
 
381
- _validate_operator_and_input_parameters(
382
- operator=self.operator,
383
- value=self.value,
384
- value_type=_try_parse_and_validate_value_type(self.value_type)
385
- if self.value_type is not None
386
- else None,
387
- range=self.range,
388
- range_type=_try_parse_and_validate_range_type(self.range_type)
389
- if self.range_type is not None
390
- else None,
322
+ def _process_criteria_parameters_with_gms_type(
323
+ self,
324
+ criteria_parameters: Optional[SmartColumnMetricAssertionParameters],
325
+ gms_type_info: Optional[Union[models.AssertionStdParameterTypeClass, tuple]],
326
+ ) -> None:
327
+ """Process criteria_parameters using explicit type information from GMS."""
328
+ if criteria_parameters is None:
329
+ self._process_none_parameters()
330
+ elif isinstance(criteria_parameters, tuple):
331
+ # Range parameters with GMS types
332
+ if gms_type_info and isinstance(gms_type_info, tuple):
333
+ self._process_range_parameters_with_types(
334
+ criteria_parameters, gms_type_info
335
+ )
336
+ else:
337
+ self._process_range_parameters(criteria_parameters)
338
+ else:
339
+ # Single value with GMS type
340
+ if gms_type_info and not isinstance(gms_type_info, tuple):
341
+ self._process_single_value_parameters_with_type(
342
+ criteria_parameters, gms_type_info
343
+ )
344
+ else:
345
+ self._process_single_value_parameters(criteria_parameters)
346
+
347
+ def _process_criteria_parameters(
348
+ self,
349
+ criteria_parameters: Optional[SmartColumnMetricAssertionParameters],
350
+ ) -> None:
351
+ """Process the new consolidated criteria_parameters with automatic type inference."""
352
+ if criteria_parameters is None:
353
+ self._process_none_parameters()
354
+ elif isinstance(criteria_parameters, tuple):
355
+ self._process_range_parameters(criteria_parameters)
356
+ else:
357
+ self._process_single_value_parameters(criteria_parameters)
358
+
359
+ def _process_none_parameters(self) -> None:
360
+ """Process None criteria_parameters."""
361
+ # No parameters - validation is now handled at the client level
362
+ # This allows both creation and update scenarios to be handled appropriately
363
+ self.criteria_parameters = None
364
+ self.criteria_type = None
365
+
366
+ def _process_range_parameters(self, criteria_parameters: tuple) -> None:
367
+ """Process tuple criteria_parameters for range operators."""
368
+ # Range parameters
369
+ if not _is_range_required_for_operator(self.operator):
370
+ raise SDKUsageError(
371
+ f"Operator {self.operator} does not support range parameters. "
372
+ "Provide a single value instead of a tuple."
373
+ )
374
+
375
+ # Infer range type automatically
376
+ inferred_range_type = self._infer_criteria_type_from_parameters(
377
+ criteria_parameters
391
378
  )
392
379
 
393
- # Validate compatibility:
394
- self._validate_field_type_and_operator_compatibility(
395
- self.column_name, self.operator
380
+ # Validate and parse the range type
381
+ validated_range_type = _try_parse_and_validate_range_type(inferred_range_type)
382
+
383
+ # Validate and parse the range values
384
+ validated_range = _try_parse_and_validate_range(
385
+ criteria_parameters, validated_range_type, self.operator
396
386
  )
397
- self._validate_field_type_and_metric_type_compatibility(
398
- self.column_name, self.metric_type
387
+
388
+ # Store validated parameters
389
+ self.criteria_parameters = validated_range
390
+ self.criteria_type = validated_range_type
391
+
392
+ def _process_single_value_parameters(
393
+ self, criteria_parameters: Union[str, int, float]
394
+ ) -> None:
395
+ """Process single value criteria_parameters."""
396
+ # Single value parameters
397
+ if _is_no_parameter_operator(self.operator):
398
+ raise SDKUsageError(
399
+ f"Value parameters should not be provided for operator {self.operator}"
400
+ )
401
+ if not _is_value_required_for_operator(self.operator):
402
+ raise SDKUsageError(
403
+ f"Operator {self.operator} does not support value parameters. "
404
+ "Use criteria_parameters=None or omit criteria_parameters."
405
+ )
406
+
407
+ # Infer value type automatically
408
+ inferred_value_type = self._infer_criteria_type_from_parameters(
409
+ criteria_parameters
399
410
  )
400
- self._validate_operator_and_range_or_value_compatibility(
401
- self.operator,
402
- self.value,
403
- _try_parse_and_validate_value_type(self.value_type)
404
- if self.value_type is not None
405
- else None,
406
- self.range,
407
- _try_parse_and_validate_range_type(self.range_type)
408
- if self.range_type is not None
409
- else None,
411
+
412
+ # Validate value if required
413
+ if _is_value_required_for_operator(self.operator):
414
+ # Validate and parse the value type - make sure it's a single type, not a tuple
415
+ if isinstance(inferred_value_type, tuple):
416
+ raise SDKUsageError("Single value type expected, not a tuple type")
417
+
418
+ validated_value_type = _try_parse_and_validate_value_type(
419
+ inferred_value_type
420
+ )
421
+ validated_value = _try_parse_and_validate_value(
422
+ criteria_parameters, validated_value_type
423
+ )
424
+
425
+ # Store validated parameters
426
+ self.criteria_parameters = validated_value
427
+ self.criteria_type = validated_value_type
428
+ else:
429
+ # Store raw parameters for operators that don't require validation
430
+ self.criteria_parameters = criteria_parameters
431
+ self.criteria_type = inferred_value_type
432
+
433
+ def _process_single_value_parameters_with_type(
434
+ self,
435
+ criteria_parameters: Union[str, int, float],
436
+ gms_type: models.AssertionStdParameterTypeClass,
437
+ ) -> None:
438
+ """Process single value criteria_parameters using explicit GMS type information."""
439
+ # Single value parameters
440
+ if _is_no_parameter_operator(self.operator):
441
+ raise SDKUsageError(
442
+ f"Value parameters should not be provided for operator {self.operator}"
443
+ )
444
+ if not _is_value_required_for_operator(self.operator):
445
+ raise SDKUsageError(
446
+ f"Operator {self.operator} does not support value parameters. "
447
+ "Use criteria_parameters=None or omit criteria_parameters."
448
+ )
449
+
450
+ # Use GMS type instead of inferring
451
+ validated_value_type = _try_parse_and_validate_value_type(gms_type)
452
+ validated_value = _try_parse_and_validate_value(
453
+ criteria_parameters, validated_value_type
410
454
  )
411
455
 
456
+ # Store validated parameters
457
+ self.criteria_parameters = validated_value
458
+ self.criteria_type = validated_value_type
459
+
460
+ def _process_range_parameters_with_types(
461
+ self,
462
+ criteria_parameters: tuple,
463
+ gms_types: tuple,
464
+ ) -> None:
465
+ """Process range criteria_parameters using explicit GMS type information."""
466
+ # Range parameters with GMS types
467
+ if _is_no_parameter_operator(self.operator):
468
+ raise SDKUsageError(
469
+ f"Range parameters should not be provided for operator {self.operator}"
470
+ )
471
+ if not _is_range_required_for_operator(self.operator):
472
+ raise SDKUsageError(
473
+ f"Operator {self.operator} does not support range parameters. "
474
+ "Use a single value or criteria_parameters=None."
475
+ )
476
+
477
+ if len(criteria_parameters) != 2:
478
+ raise SDKUsageError("Range parameters must be a tuple of exactly 2 values")
479
+
480
+ min_value, max_value = criteria_parameters
481
+ min_type, max_type = gms_types
482
+
483
+ # Use GMS types instead of inferring
484
+ validated_min_type = _try_parse_and_validate_value_type(min_type)
485
+ validated_max_type = _try_parse_and_validate_value_type(max_type)
486
+
487
+ validated_min_value = _try_parse_and_validate_value(
488
+ min_value, validated_min_type
489
+ )
490
+ validated_max_value = _try_parse_and_validate_value(
491
+ max_value, validated_max_type
492
+ )
493
+
494
+ # Store validated parameters
495
+ self.criteria_parameters = (validated_min_value, validated_max_value)
496
+ self.criteria_type = (validated_min_type, validated_max_type)
497
+
412
498
  def _create_monitor_info(
413
499
  self,
414
500
  assertion_urn: AssertionUrn,
@@ -542,6 +628,11 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
542
628
  """
543
629
  source_type = models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY
544
630
  field = None
631
+ SUPPORTED_DETECTION_MECHANISMS = [
632
+ _AllRowsQuery().type,
633
+ _AllRowsQueryDataHubDatasetProfile().type,
634
+ _ChangedRowsQuery(column_name="").type,
635
+ ]
545
636
 
546
637
  if isinstance(self.detection_mechanism, _ChangedRowsQuery):
547
638
  source_type = models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
@@ -564,101 +655,11 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
564
655
  # Note: This is only valid on the all rows query
565
656
  else:
566
657
  raise SDKNotYetSupportedError(
567
- f"Detection mechanism {self.detection_mechanism} is not supported"
658
+ f"Detection mechanism {self.detection_mechanism} is not supported for smart column metric assertions, please use a supported detection mechanism: {', '.join(SUPPORTED_DETECTION_MECHANISMS)}"
568
659
  )
569
660
 
570
661
  return source_type, field
571
662
 
572
- def _validate_single_value_operator(
573
- self,
574
- operator: models.AssertionStdOperatorClass,
575
- value: Optional[ValueInputType],
576
- value_type: Optional[models.AssertionStdParameterTypeClass],
577
- range: Optional[RangeInputType],
578
- range_type: Optional[RangeTypeParsedType],
579
- ) -> None:
580
- """Validate parameters for a single value operator."""
581
- if value is None:
582
- raise SDKUsageError(f"Value is required for operator {operator}")
583
- if value_type is None:
584
- raise SDKUsageError(f"Value type is required for operator {operator}")
585
- if range is not None or range_type is not None:
586
- raise SDKUsageError(
587
- f"Range parameters should not be provided for operator {operator}"
588
- )
589
-
590
- def _validate_range_operator(
591
- self,
592
- operator: models.AssertionStdOperatorClass,
593
- value: Optional[ValueInputType],
594
- value_type: Optional[models.AssertionStdParameterTypeClass],
595
- range: Optional[RangeInputType],
596
- range_type: Optional[RangeTypeParsedType],
597
- ) -> None:
598
- """Validate parameters for a range operator."""
599
- if range is None:
600
- raise SDKUsageError(f"Range is required for operator {operator}")
601
- if range_type is None:
602
- raise SDKUsageError(f"Range type is required for operator {operator}")
603
- if value is not None or value_type is not None:
604
- raise SDKUsageError(
605
- f"Value parameters should not be provided for operator {operator}"
606
- )
607
-
608
- def _validate_no_parameter_operator(
609
- self,
610
- operator: models.AssertionStdOperatorClass,
611
- value: Optional[ValueInputType],
612
- value_type: Optional[models.AssertionStdParameterTypeClass],
613
- range: Optional[RangeInputType],
614
- range_type: Optional[RangeTypeParsedType],
615
- ) -> None:
616
- """Validate parameters for a no-parameter operator."""
617
- if value is not None or value_type is not None:
618
- raise SDKUsageError(
619
- f"Value parameters should not be provided for operator {operator}"
620
- )
621
- if range is not None or range_type is not None:
622
- raise SDKUsageError(
623
- f"Range parameters should not be provided for operator {operator}"
624
- )
625
-
626
- def _validate_operator_and_range_or_value_compatibility(
627
- self,
628
- operator: models.AssertionStdOperatorClass,
629
- value: Optional[ValueInputType] = None,
630
- value_type: Optional[models.AssertionStdParameterTypeClass] = None,
631
- range: Optional[RangeInputType] = None,
632
- range_type: Optional[RangeTypeParsedType] = None,
633
- ) -> None:
634
- """
635
- Validate that the operator has the appropriate parameters (range or value) based on its type.
636
-
637
- Args:
638
- operator: The operator to validate.
639
- value: Optional value parameter.
640
- value_type: Optional value type parameter.
641
- range: Optional range parameter.
642
- range_type: Optional range type parameter.
643
-
644
- Raises:
645
- SDKUsageError: If the operator parameters are not compatible with the operator type.
646
- """
647
- if operator in SINGLE_VALUE_OPERATORS:
648
- self._validate_single_value_operator(
649
- operator, value, value_type, range, range_type
650
- )
651
- elif operator in RANGE_OPERATORS:
652
- self._validate_range_operator(
653
- operator, value, value_type, range, range_type
654
- )
655
- elif operator in NO_PARAMETER_OPERATORS:
656
- self._validate_no_parameter_operator(
657
- operator, value, value_type, range, range_type
658
- )
659
- else:
660
- raise SDKUsageError(f"Unsupported operator type: {operator}")
661
-
662
663
  def _create_assertion_parameters(self) -> models.AssertionStdParametersClass:
663
664
  """
664
665
  Create assertion parameters based on the operator type and provided values.
@@ -670,35 +671,39 @@ class _SmartColumnMetricAssertionInput(_AssertionInput, _HasSmartAssertionInputs
670
671
  SDKUsageError: If the parameters are invalid for the operator type.
671
672
  """
672
673
  if self.operator in SINGLE_VALUE_OPERATORS:
673
- if self.value is None:
674
- raise SDKUsageError(f"Value is required for operator {self.operator}")
675
- if self.value_type is None:
674
+ if self.criteria_parameters is None or isinstance(
675
+ self.criteria_parameters, tuple
676
+ ):
676
677
  raise SDKUsageError(
677
- f"Value type is required for operator {self.operator}"
678
+ f"Single value is required for operator {self.operator}"
679
+ )
680
+ if self.criteria_type is None or isinstance(self.criteria_type, tuple):
681
+ raise SDKUsageError(
682
+ f"Single value type is required for operator {self.operator}"
678
683
  )
679
684
  return models.AssertionStdParametersClass(
680
685
  value=models.AssertionStdParameterClass(
681
- value=str(self.value),
682
- type=self.value_type,
686
+ value=str(self.criteria_parameters),
687
+ type=self.criteria_type,
683
688
  ),
684
689
  )
685
690
  elif self.operator in RANGE_OPERATORS:
686
- if self.range is None:
687
- raise SDKUsageError(f"Range is required for operator {self.operator}")
688
- if self.range_type is None:
691
+ if not isinstance(self.criteria_parameters, tuple):
692
+ raise SDKUsageError(
693
+ f"Range parameters are required for operator {self.operator}"
694
+ )
695
+ if not isinstance(self.criteria_type, tuple):
689
696
  raise SDKUsageError(
690
697
  f"Range type is required for operator {self.operator}"
691
698
  )
692
- # Ensure we have the parsed range type
693
- parsed_range_type = _try_parse_and_validate_range_type(self.range_type)
694
699
  return models.AssertionStdParametersClass(
695
700
  minValue=models.AssertionStdParameterClass(
696
- value=str(self.range[0]),
697
- type=parsed_range_type[0],
701
+ value=str(self.criteria_parameters[0]),
702
+ type=self.criteria_type[0],
698
703
  ),
699
704
  maxValue=models.AssertionStdParameterClass(
700
- value=str(self.range[1]),
701
- type=parsed_range_type[1],
705
+ value=str(self.criteria_parameters[1]),
706
+ type=self.criteria_type[1],
702
707
  ),
703
708
  )
704
709
  elif self.operator in NO_PARAMETER_OPERATORS:
@@ -791,42 +796,158 @@ def _try_parse_and_validate_value_type(
791
796
  )
792
797
 
793
798
 
794
- def _try_parse_and_validate_value(
795
- value: Optional[ValueInputType],
796
- value_type: ValueTypeInputType,
797
- ) -> ValueInputType:
798
- if value is None:
799
- raise SDKUsageError("Value parameter is required for the chosen operator")
800
- # Accept both Python types and JSON strings
799
+ def _deserialize_json_value(value: ValueInputType) -> ValueInputType:
800
+ """
801
+ Deserialize a value that might be a JSON string.
802
+
803
+ Args:
804
+ value: The value to deserialize, potentially a JSON string.
805
+
806
+ Returns:
807
+ The deserialized value or the original value if not JSON.
808
+ """
801
809
  if isinstance(value, str):
802
- # Try to parse as JSON, but if it fails, treat as a raw string
803
810
  try:
804
- deserialized_value = json.loads(value)
811
+ return json.loads(value)
805
812
  except json.JSONDecodeError:
806
- deserialized_value = value
807
- else:
808
- deserialized_value = value
809
- # Validate that the value is of the correct type
810
- if value_type == models.AssertionStdParameterTypeClass.NUMBER:
811
- if not isinstance(deserialized_value, (int, float)):
812
- raise SDKUsageError(f"Invalid value: {value}, must be a number")
813
- elif value_type == models.AssertionStdParameterTypeClass.STRING:
814
- if not isinstance(deserialized_value, str):
815
- raise SDKUsageError(f"Invalid value: {value}, must be a string")
816
- elif (
817
- value_type == models.AssertionStdParameterTypeClass.LIST
818
- or value_type == models.AssertionStdParameterTypeClass.SET
813
+ return value
814
+ return value
815
+
816
+
817
+ def _convert_string_to_number(value: str) -> Union[int, float]:
818
+ """
819
+ Convert a string to a number (int or float).
820
+
821
+ Args:
822
+ value: The string value to convert.
823
+
824
+ Returns:
825
+ The converted number.
826
+
827
+ Raises:
828
+ ValueError: If the string cannot be converted to a number.
829
+ """
830
+ if "." in value:
831
+ return float(value)
832
+ return int(value)
833
+
834
+
835
+ def _validate_number_type(
836
+ value: ValueInputType, original_value: ValueInputType
837
+ ) -> ValueInputType:
838
+ """
839
+ Validate and convert a value to a number type.
840
+
841
+ Args:
842
+ value: The deserialized value to validate.
843
+ original_value: The original input value for error messages.
844
+
845
+ Returns:
846
+ The validated number value.
847
+
848
+ Raises:
849
+ SDKUsageError: If the value cannot be converted to a number.
850
+ """
851
+ if isinstance(value, (int, float)):
852
+ return value
853
+
854
+ if isinstance(value, str):
855
+ try:
856
+ return _convert_string_to_number(value)
857
+ except ValueError as e:
858
+ raise SDKUsageError(
859
+ f"Invalid value: {original_value}, must be a number"
860
+ ) from e
861
+
862
+ raise SDKUsageError(f"Invalid value: {original_value}, must be a number")
863
+
864
+
865
+ def _validate_string_type(
866
+ value: ValueInputType, original_value: ValueInputType
867
+ ) -> ValueInputType:
868
+ """
869
+ Validate that a value is a string type.
870
+
871
+ Args:
872
+ value: The deserialized value to validate.
873
+ original_value: The original input value for error messages.
874
+
875
+ Returns:
876
+ The validated string value.
877
+
878
+ Raises:
879
+ SDKUsageError: If the value is not a string.
880
+ """
881
+ if not isinstance(value, str):
882
+ raise SDKUsageError(f"Invalid value: {original_value}, must be a string")
883
+ return value
884
+
885
+
886
+ def _validate_unsupported_types(value_type: ValueTypeInputType) -> None:
887
+ """
888
+ Check for unsupported value types and raise appropriate errors.
889
+
890
+ Args:
891
+ value_type: The value type to check.
892
+
893
+ Raises:
894
+ SDKNotYetSupportedError: If the value type is LIST or SET.
895
+ SDKUsageError: If the value type is invalid.
896
+ """
897
+ if value_type in (
898
+ models.AssertionStdParameterTypeClass.LIST,
899
+ models.AssertionStdParameterTypeClass.SET,
819
900
  ):
820
901
  raise SDKNotYetSupportedError(
821
902
  "List and set value types are not supported for smart column metric assertions"
822
903
  )
823
- elif value_type == models.AssertionStdParameterTypeClass.UNKNOWN:
824
- pass # TODO: What to do with unknown?
825
- else:
904
+
905
+ valid_types = {
906
+ models.AssertionStdParameterTypeClass.NUMBER,
907
+ models.AssertionStdParameterTypeClass.STRING,
908
+ models.AssertionStdParameterTypeClass.UNKNOWN,
909
+ }
910
+
911
+ if value_type not in valid_types:
826
912
  raise SDKUsageError(
827
913
  f"Invalid value type: {value_type}, valid options are {get_enum_options(models.AssertionStdParameterTypeClass)}"
828
914
  )
829
- return deserialized_value
915
+
916
+
917
+ def _try_parse_and_validate_value(
918
+ value: Optional[ValueInputType],
919
+ value_type: ValueTypeInputType,
920
+ ) -> ValueInputType:
921
+ """
922
+ Parse and validate a value according to its expected type.
923
+
924
+ Args:
925
+ value: The value to parse and validate.
926
+ value_type: The expected type of the value.
927
+
928
+ Returns:
929
+ The validated and potentially converted value.
930
+
931
+ Raises:
932
+ SDKUsageError: If the value is None, invalid, or cannot be converted.
933
+ SDKNotYetSupportedError: If the value type is not supported.
934
+ """
935
+ if value is None:
936
+ raise SDKUsageError("Value parameter is required for the chosen operator")
937
+
938
+ # Deserialize JSON strings if applicable
939
+ deserialized_value = _deserialize_json_value(value)
940
+
941
+ # Validate based on expected type
942
+ if value_type == models.AssertionStdParameterTypeClass.NUMBER:
943
+ return _validate_number_type(deserialized_value, value)
944
+ elif value_type == models.AssertionStdParameterTypeClass.STRING:
945
+ return _validate_string_type(deserialized_value, value)
946
+ elif value_type == models.AssertionStdParameterTypeClass.UNKNOWN:
947
+ return deserialized_value # Accept any type for unknown
948
+ else:
949
+ _validate_unsupported_types(value_type)
950
+ return deserialized_value
830
951
 
831
952
 
832
953
  def _is_range_required_for_operator(operator: models.AssertionStdOperatorClass) -> bool:
@@ -841,36 +962,6 @@ def _is_no_parameter_operator(operator: models.AssertionStdOperatorClass) -> boo
841
962
  return operator in NO_PARAMETER_OPERATORS
842
963
 
843
964
 
844
- def _validate_operator_and_input_parameters(
845
- operator: models.AssertionStdOperatorClass,
846
- value: Optional[ValueInputType] = None,
847
- value_type: Optional[models.AssertionStdParameterTypeClass] = None,
848
- range: Optional[RangeInputType] = None,
849
- range_type: Optional[RangeTypeParsedType] = None,
850
- ) -> None:
851
- if _is_value_required_for_operator(operator):
852
- if value is None:
853
- raise SDKUsageError(f"Value is required for operator {operator}")
854
- if value_type is None:
855
- raise SDKUsageError(f"Value type is required for operator {operator}")
856
- elif _is_range_required_for_operator(operator):
857
- if range is None:
858
- raise SDKUsageError(f"Range is required for operator {operator}")
859
- if range_type is None:
860
- raise SDKUsageError(f"Range type is required for operator {operator}")
861
- elif _is_no_parameter_operator(operator):
862
- if value is not None or value_type is not None:
863
- raise SDKUsageError(
864
- f"Value parameters should not be provided for operator {operator}"
865
- )
866
- if range is not None or range_type is not None:
867
- raise SDKUsageError(
868
- f"Range parameters should not be provided for operator {operator}"
869
- )
870
- else:
871
- raise SDKUsageError(f"Unsupported operator type: {operator}")
872
-
873
-
874
965
  def _try_parse_and_validate_range_type(
875
966
  range_type: Optional[RangeTypeInputType] = None,
876
967
  ) -> RangeTypeParsedType: