acryl-datahub-cloud 0.3.12.1rc3__py3-none-any.whl → 0.3.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
  from datetime import datetime, timezone
5
- from typing import TYPE_CHECKING, Any, Optional, Union
5
+ from typing import TYPE_CHECKING, Any, Optional, TypedDict, Union
6
6
 
7
7
  from acryl_datahub_cloud.sdk.assertion.assertion_base import (
8
8
  AssertionMode,
@@ -12,6 +12,10 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
12
12
  SqlAssertion,
13
13
  VolumeAssertion,
14
14
  _AssertionPublic,
15
+ _HasColumnMetricFunctionality,
16
+ )
17
+ from acryl_datahub_cloud.sdk.assertion.column_metric_assertion import (
18
+ ColumnMetricAssertion,
15
19
  )
16
20
  from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
17
21
  SmartColumnMetricAssertion,
@@ -26,35 +30,31 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
26
30
  _SmartFreshnessAssertionInput,
27
31
  _SmartVolumeAssertionInput,
28
32
  )
33
+ from acryl_datahub_cloud.sdk.assertion_input.column_metric_assertion_input import (
34
+ ColumnMetricAssertionParameters,
35
+ _ColumnMetricAssertionInput,
36
+ )
37
+ from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
38
+ MetricInputType,
39
+ OperatorInputType,
40
+ )
29
41
  from acryl_datahub_cloud.sdk.assertion_input.freshness_assertion_input import (
42
+ FreshnessAssertionScheduleCheckType,
30
43
  _FreshnessAssertionInput,
31
44
  )
32
45
  from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
33
- MetricInputType,
34
- OperatorInputType,
35
- RangeInputType,
36
- RangeTypeInputType,
37
- ValueInputType,
38
- ValueTypeInputType,
39
46
  _SmartColumnMetricAssertionInput,
40
47
  )
41
48
  from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
42
- SqlAssertionChangeType,
49
+ SqlAssertionCondition,
43
50
  SqlAssertionCriteria,
44
- SqlAssertionOperator,
45
- SqlAssertionType,
46
51
  _SqlAssertionInput,
47
52
  )
48
53
  from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
49
- RowCountChange,
50
- RowCountTotal,
51
- VolumeAssertionDefinition,
52
- VolumeAssertionDefinitionChangeKind,
53
- VolumeAssertionDefinitionInputTypes,
54
+ VolumeAssertionCondition,
55
+ VolumeAssertionCriteria,
56
+ VolumeAssertionCriteriaInputTypes,
54
57
  VolumeAssertionDefinitionParameters,
55
- VolumeAssertionDefinitionType,
56
- VolumeAssertionOperator,
57
- _VolumeAssertionDefinitionTypes,
58
58
  _VolumeAssertionInput,
59
59
  )
60
60
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
@@ -73,11 +73,58 @@ logger = logging.getLogger(__name__)
73
73
  DEFAULT_CREATED_BY = CorpUserUrn.from_string("urn:li:corpuser:__datahub_system")
74
74
 
75
75
 
76
+ class _AssertionLookupInfo(TypedDict):
77
+ """Minimal info needed to look up an assertion and monitor."""
78
+
79
+ dataset_urn: Union[str, DatasetUrn]
80
+ urn: Union[str, AssertionUrn]
81
+
82
+
76
83
  class AssertionsClient:
77
84
  def __init__(self, client: "DataHubClient"):
78
85
  self.client = client
79
86
  _print_experimental_warning()
80
87
 
88
+ def _validate_required_field(
89
+ self, field_value: Optional[Any], field_name: str, context: str
90
+ ) -> None:
91
+ """Validate that a required field is not None and raise SDKUsageError if it is."""
92
+ if field_value is None:
93
+ raise SDKUsageError(f"{field_name} is required {context}")
94
+
95
+ def _validate_required_smart_column_fields_for_creation(
96
+ self,
97
+ column_name: Optional[str],
98
+ metric_type: Optional[MetricInputType],
99
+ ) -> None:
100
+ """Validate required fields for smart column metric assertion creation."""
101
+ self._validate_required_field(
102
+ column_name, "column_name", "when creating a new assertion (urn is None)"
103
+ )
104
+ self._validate_required_field(
105
+ metric_type, "metric_type", "when creating a new assertion (urn is None)"
106
+ )
107
+
108
+ def _validate_required_smart_column_fields_for_update(
109
+ self,
110
+ column_name: Optional[str],
111
+ metric_type: Optional[MetricInputType],
112
+ assertion_urn: Union[str, AssertionUrn],
113
+ ) -> None:
114
+ """Validate required fields after attempting to fetch from existing assertion."""
115
+ context = f"and not found in existing assertion {assertion_urn}. The existing assertion may be invalid or corrupted."
116
+ self._validate_required_field(column_name, "column_name", context)
117
+ self._validate_required_field(metric_type, "metric_type", context)
118
+
119
+ def _validate_criteria_parameters_for_creation(
120
+ self,
121
+ urn: Optional[Union[str, AssertionUrn]],
122
+ ) -> None:
123
+ """Validate criteria_parameters for creation scenario."""
124
+ # Smart assertions always use BETWEEN operator with (0, 0) criteria_parameters
125
+ # No validation needed since these values are fixed
126
+ pass
127
+
81
128
  def sync_smart_freshness_assertion(
82
129
  self,
83
130
  *,
@@ -119,7 +166,11 @@ class AssertionsClient:
119
166
  - {"type": "last_modified_column", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
120
167
  - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
121
168
  sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
122
- exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported.
169
+ exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported. Valid values are:
170
+ - {"start": "2025-01-01T00:00:00", "end": "2025-01-02T00:00:00"} (using ISO strings)
171
+ - {"start": datetime(2025, 1, 1, 0, 0, 0), "end": datetime(2025, 1, 2, 0, 0, 0)} (using datetime objects)
172
+ - FixedRangeExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0)) (using typed object)
173
+ - A list of any of the above formats
123
174
  training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
124
175
  incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass" or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
125
176
  tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
@@ -482,8 +533,8 @@ class AssertionsClient:
482
533
  updated_by: Optional[Union[str, CorpUserUrn]],
483
534
  now_utc: datetime,
484
535
  schedule: Optional[Union[str, models.CronScheduleClass]],
485
- definition: VolumeAssertionDefinitionInputTypes,
486
- use_backend_definition: bool = False,
536
+ criteria: VolumeAssertionCriteriaInputTypes,
537
+ use_backend_criteria: bool = False,
487
538
  ) -> Union[VolumeAssertion, _VolumeAssertionInput]:
488
539
  # 1. Retrieve any existing assertion and monitor entities:
489
540
  maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
@@ -506,7 +557,7 @@ class AssertionsClient:
506
557
  )
507
558
  # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
508
559
  elif not maybe_assertion_entity:
509
- if use_backend_definition:
560
+ if use_backend_criteria:
510
561
  raise SDKUsageError(
511
562
  f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
512
563
  )
@@ -514,8 +565,7 @@ class AssertionsClient:
514
565
  f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
515
566
  )
516
567
  # Extract criteria from definition to call the new signature
517
- parsed_definition = VolumeAssertionDefinition.parse(definition)
518
- assert isinstance(parsed_definition, (RowCountTotal, RowCountChange))
568
+ parsed_criteria = VolumeAssertionCriteria.parse(criteria)
519
569
  return self._create_volume_assertion(
520
570
  dataset_urn=dataset_urn,
521
571
  display_name=display_name,
@@ -524,12 +574,8 @@ class AssertionsClient:
524
574
  tags=tags,
525
575
  created_by=updated_by,
526
576
  schedule=schedule,
527
- criteria_type=parsed_definition.type,
528
- criteria_change_type=parsed_definition.kind
529
- if isinstance(parsed_definition, RowCountChange)
530
- else None,
531
- criteria_operator=parsed_definition.operator,
532
- criteria_parameters=parsed_definition.parameters,
577
+ criteria_condition=parsed_criteria.condition,
578
+ criteria_parameters=parsed_criteria.parameters,
533
579
  )
534
580
 
535
581
  # 3. Check for any issues e.g. different dataset urns
@@ -542,25 +588,25 @@ class AssertionsClient:
542
588
  f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
543
589
  )
544
590
 
545
- # 4. Handle definition: use backend definition if flag is set and backend has one
546
- if use_backend_definition:
591
+ # 4. Handle criteria: use backend criteria if flag is set and backend has one
592
+ if use_backend_criteria:
547
593
  if maybe_assertion_entity is not None:
548
- # Use definition from backend
549
- backend_definition = VolumeAssertionDefinition.from_assertion(
594
+ # Use criteria from backend
595
+ backend_criteria = VolumeAssertionCriteria.from_assertion(
550
596
  maybe_assertion_entity
551
597
  )
552
- # Update the assertion_input with the real definition from backend
553
- assertion_input.definition = backend_definition
554
- effective_definition = backend_definition
555
- logger.info("Using definition from backend assertion")
598
+ # Update the assertion_input with the real criteria from backend
599
+ assertion_input.criteria = backend_criteria
600
+ effective_criteria = backend_criteria
601
+ logger.info("Using criteria from backend assertion")
556
602
  else:
557
- # No backend assertion and no user-provided definition - this is an error
603
+ # No backend assertion and no user-provided criteria - this is an error
558
604
  raise SDKUsageError(
559
- f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
605
+ f"Cannot sync assertion {urn}: no existing criteria found in backend and no criteria provided in request"
560
606
  )
561
607
  else:
562
- # Use the already-parsed definition from assertion_input
563
- effective_definition = assertion_input.definition
608
+ # Use the already-parsed criteria from assertion_input
609
+ effective_criteria = assertion_input.criteria
564
610
 
565
611
  # 5. Merge the existing assertion with the validated input:
566
612
  merged_assertion_input = self._merge_volume_input(
@@ -577,7 +623,7 @@ class AssertionsClient:
577
623
  maybe_monitor_entity=maybe_monitor_entity,
578
624
  existing_assertion=existing_assertion,
579
625
  schedule=schedule,
580
- definition=effective_definition,
626
+ criteria=effective_criteria,
581
627
  )
582
628
 
583
629
  return merged_assertion_input
@@ -624,9 +670,7 @@ class AssertionsClient:
624
670
  return self._create_sql_assertion(
625
671
  dataset_urn=dataset_urn,
626
672
  display_name=display_name,
627
- criteria_type=criteria.type,
628
- criteria_change_type=criteria.change_type,
629
- criteria_operator=criteria.operator,
673
+ criteria_condition=criteria.condition,
630
674
  criteria_parameters=criteria.parameters,
631
675
  statement=statement,
632
676
  incident_behavior=incident_behavior,
@@ -666,22 +710,40 @@ class AssertionsClient:
666
710
 
667
711
  def _retrieve_assertion_and_monitor(
668
712
  self,
669
- assertion_input: _AssertionInput,
713
+ assertion_input: Union[_AssertionInput, _AssertionLookupInfo],
670
714
  ) -> tuple[Optional[Assertion], MonitorUrn, Optional[Monitor]]:
671
715
  """Retrieve the assertion and monitor entities from the DataHub instance.
672
716
 
673
717
  Args:
674
- assertion_input: The validated input to the function.
718
+ assertion_input: The validated input to the function or minimal lookup info.
675
719
 
676
720
  Returns:
677
721
  The assertion and monitor entities.
678
722
  """
679
- assert assertion_input.urn is not None, "URN is required"
723
+ # Extract URN and dataset URN from input
724
+ _urn: Union[str, AssertionUrn]
725
+ _dataset_urn: Union[str, DatasetUrn]
726
+ if isinstance(assertion_input, dict):
727
+ _urn = assertion_input["urn"]
728
+ _dataset_urn = assertion_input["dataset_urn"]
729
+ else:
730
+ assert assertion_input.urn is not None, "URN is required"
731
+ _urn = assertion_input.urn
732
+ _dataset_urn = assertion_input.dataset_urn
733
+
734
+ urn: AssertionUrn = (
735
+ _urn if isinstance(_urn, AssertionUrn) else AssertionUrn.from_string(_urn)
736
+ )
737
+ dataset_urn: DatasetUrn = (
738
+ _dataset_urn
739
+ if isinstance(_dataset_urn, DatasetUrn)
740
+ else DatasetUrn.from_string(_dataset_urn)
741
+ )
680
742
 
681
743
  # Get assertion entity
682
744
  maybe_assertion_entity: Optional[Assertion] = None
683
745
  try:
684
- entity = self.client.entities.get(assertion_input.urn)
746
+ entity = self.client.entities.get(urn)
685
747
  if entity is not None:
686
748
  assert isinstance(entity, Assertion)
687
749
  maybe_assertion_entity = entity
@@ -689,9 +751,7 @@ class AssertionsClient:
689
751
  pass
690
752
 
691
753
  # Get monitor entity
692
- monitor_urn = Monitor._ensure_id(
693
- id=(assertion_input.dataset_urn, assertion_input.urn)
694
- )
754
+ monitor_urn = Monitor._ensure_id(id=(dataset_urn, urn))
695
755
  maybe_monitor_entity: Optional[Monitor] = None
696
756
  try:
697
757
  entity = self.client.entities.get(monitor_urn)
@@ -967,7 +1027,7 @@ class AssertionsClient:
967
1027
  maybe_monitor_entity: Optional[Monitor],
968
1028
  existing_assertion: VolumeAssertion,
969
1029
  schedule: Optional[Union[str, models.CronScheduleClass]],
970
- definition: Optional[_VolumeAssertionDefinitionTypes],
1030
+ criteria: Optional[VolumeAssertionCriteria],
971
1031
  ) -> _VolumeAssertionInput:
972
1032
  """Merge the input with the existing assertion and monitor entities.
973
1033
 
@@ -1044,12 +1104,12 @@ class AssertionsClient:
1044
1104
  existing_assertion,
1045
1105
  maybe_assertion_entity.tags if maybe_assertion_entity else None,
1046
1106
  ),
1047
- definition=_merge_field(
1048
- definition,
1049
- "definition",
1107
+ criteria=_merge_field(
1108
+ criteria,
1109
+ "criteria",
1050
1110
  assertion_input,
1051
1111
  existing_assertion,
1052
- existing_assertion.definition if existing_assertion else None,
1112
+ existing_assertion.criteria if existing_assertion else None,
1053
1113
  ),
1054
1114
  created_by=existing_assertion.created_by
1055
1115
  or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
@@ -1644,11 +1704,7 @@ class AssertionsClient:
1644
1704
  tags: Optional[TagsInputType] = None,
1645
1705
  created_by: Optional[Union[str, CorpUserUrn]] = None,
1646
1706
  schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1647
- criteria_type: Union[str, VolumeAssertionDefinitionType],
1648
- criteria_change_type: Optional[
1649
- Union[str, VolumeAssertionDefinitionChangeKind]
1650
- ] = None,
1651
- criteria_operator: Union[str, VolumeAssertionOperator],
1707
+ criteria_condition: Union[str, VolumeAssertionCondition],
1652
1708
  criteria_parameters: VolumeAssertionDefinitionParameters,
1653
1709
  ) -> VolumeAssertion:
1654
1710
  """Create a volume assertion.
@@ -1691,23 +1747,19 @@ class AssertionsClient:
1691
1747
  The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1692
1748
  Alternatively, a models.CronScheduleClass object can be provided with string parameters
1693
1749
  cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1694
- criteria_type: The type of volume assertion. Must be either VolumeAssertionDefinitionType.ROW_COUNT_TOTAL or VolumeAssertionDefinitionType.ROW_COUNT_CHANGE.
1695
- Raw string values are also accepted: "ROW_COUNT_TOTAL" or "ROW_COUNT_CHANGE".
1696
- criteria_change_type: Required when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_CHANGE. Must be either VolumeAssertionDefinitionChangeKind.ABSOLUTE
1697
- or VolumeAssertionDefinitionChangeKind.PERCENT. Optional (ignored) when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_TOTAL.
1698
- Raw string values are also accepted: "ABSOLUTE" or "PERCENTAGE".
1699
- criteria_operator: The comparison operator for the assertion. Must be a VolumeAssertionOperator value:
1700
- - VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO
1701
- - VolumeAssertionOperator.LESS_THAN_OR_EQUAL_TO
1702
- - VolumeAssertionOperator.BETWEEN
1703
- Raw string values are also accepted: "GREATER_THAN_OR_EQUAL_TO", "LESS_THAN_OR_EQUAL_TO", "BETWEEN".
1704
- criteria_parameters: The parameters for the assertion. For single-value operators
1705
- (GREATER_THAN_OR_EQUAL_TO, LESS_THAN_OR_EQUAL_TO), provide a single number.
1706
- For BETWEEN operator, provide a tuple of two numbers (min_value, max_value).
1707
-
1708
- Examples:
1709
- - For single value: 100 or 50.5
1710
- - For BETWEEN: (10, 100) or (5.0, 15.5)
1750
+ criteria_condition: The condition for the volume assertion. Valid values are:
1751
+ - "ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO" -> The row count is less than or equal to the threshold.
1752
+ - "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO" -> The row count is greater than or equal to the threshold.
1753
+ - "ROW_COUNT_IS_WITHIN_A_RANGE" -> The row count is within the specified range.
1754
+ - "ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE" -> The row count growth is at most the threshold (absolute change).
1755
+ - "ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE" -> The row count growth is at least the threshold (absolute change).
1756
+ - "ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The row count growth is within the specified range (absolute change).
1757
+ - "ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE" -> The row count growth is at most the threshold (percentage change).
1758
+ - "ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE" -> The row count growth is at least the threshold (percentage change).
1759
+ - "ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The row count growth is within the specified range (percentage change).
1760
+ criteria_parameters: The threshold parameters to be used for the assertion. This can be a single threshold value or a tuple range.
1761
+ - If the condition is range-based (ROW_COUNT_IS_WITHIN_A_RANGE, ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE, ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
1762
+ - For other conditions, the value is a single numeric threshold value.
1711
1763
 
1712
1764
  Returns:
1713
1765
  VolumeAssertion: The created assertion.
@@ -1720,15 +1772,12 @@ class AssertionsClient:
1720
1772
  )
1721
1773
  created_by = DEFAULT_CREATED_BY
1722
1774
 
1723
- # Create definition from individual criteria parameters
1775
+ # Create criteria from criteria_condition and parameters
1724
1776
  # The dictionary object will be fully validated down in the _VolumeAssertionInput class
1725
- definition: dict[str, Any] = {
1726
- "type": criteria_type,
1727
- "operator": criteria_operator,
1777
+ criteria: dict[str, Any] = {
1778
+ "condition": criteria_condition,
1728
1779
  "parameters": criteria_parameters,
1729
1780
  }
1730
- if criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE:
1731
- definition["kind"] = criteria_change_type
1732
1781
 
1733
1782
  assertion_input = _VolumeAssertionInput(
1734
1783
  urn=None,
@@ -1744,7 +1793,7 @@ class AssertionsClient:
1744
1793
  updated_by=created_by,
1745
1794
  updated_at=now_utc,
1746
1795
  schedule=schedule,
1747
- definition=definition,
1796
+ criteria=criteria,
1748
1797
  )
1749
1798
  assertion_entity, monitor_entity = (
1750
1799
  assertion_input.to_assertion_and_monitor_entities()
@@ -1766,9 +1815,7 @@ class AssertionsClient:
1766
1815
  dataset_urn: Union[str, DatasetUrn],
1767
1816
  display_name: Optional[str] = None,
1768
1817
  enabled: bool = True,
1769
- criteria_type: Union[SqlAssertionType, str],
1770
- criteria_change_type: Optional[Union[SqlAssertionChangeType, str]] = None,
1771
- criteria_operator: Union[SqlAssertionOperator, str],
1818
+ criteria_condition: Union[SqlAssertionCondition, str],
1772
1819
  criteria_parameters: Union[
1773
1820
  Union[float, int], tuple[Union[float, int], Union[float, int]]
1774
1821
  ],
@@ -1785,23 +1832,21 @@ class AssertionsClient:
1785
1832
  display_name: The display name of the assertion. If not provided, a random display
1786
1833
  name will be generated.
1787
1834
  enabled: Whether the assertion is enabled. Defaults to True.
1788
- criteria_type: The type of sql assertion. Valid values are:
1789
- - "METRIC" -> Looks at the current value of the metric.
1790
- - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
1791
- criteria_change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
1792
- - "ABSOLUTE" -> Looks at the absolute change in the metric.
1793
- - "PERCENTAGE" -> Looks at the percentage change in the metric.
1794
- criteria_operator: The operator to be used for the assertion. Valid values are:
1795
- - "GREATER_THAN" -> The metric value is greater than the threshold.
1796
- - "LESS_THAN" -> The metric value is less than the threshold.
1797
- - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
1798
- - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
1799
- - "EQUAL_TO" -> The metric value is equal to the threshold.
1800
- - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
1801
- - "BETWEEN" -> The metric value is between the two thresholds.
1802
- criteria_parameters: The parameters to be used for the assertion. This can be a single value or a tuple range.
1803
- - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
1804
- - If the operator is not "BETWEEN", the value is a single value.
1835
+ criteria_condition: The condition for the sql assertion. Valid values are:
1836
+ - "IS_EQUAL_TO" -> The metric value equals the threshold.
1837
+ - "IS_NOT_EQUAL_TO" -> The metric value does not equal the threshold.
1838
+ - "IS_GREATER_THAN" -> The metric value is greater than the threshold.
1839
+ - "IS_LESS_THAN" -> The metric value is less than the threshold.
1840
+ - "IS_WITHIN_A_RANGE" -> The metric value is within the specified range.
1841
+ - "GROWS_AT_MOST_ABSOLUTE" -> The metric growth is at most the threshold (absolute change).
1842
+ - "GROWS_AT_MOST_PERCENTAGE" -> The metric growth is at most the threshold (percentage change).
1843
+ - "GROWS_AT_LEAST_ABSOLUTE" -> The metric growth is at least the threshold (absolute change).
1844
+ - "GROWS_AT_LEAST_PERCENTAGE" -> The metric growth is at least the threshold (percentage change).
1845
+ - "GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The metric growth is within the specified range (absolute change).
1846
+ - "GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The metric growth is within the specified range (percentage change).
1847
+ criteria_parameters: The threshold parameters to be used for the assertion. This can be a single threshold value or a tuple range.
1848
+ - If the condition is range-based (IS_WITHIN_A_RANGE, GROWS_WITHIN_A_RANGE_ABSOLUTE, GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
1849
+ - For other conditions, the value is a single numeric threshold value.
1805
1850
  statement: The statement to be used for the assertion.
1806
1851
  incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1807
1852
  - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
@@ -1831,9 +1876,7 @@ class AssertionsClient:
1831
1876
  )
1832
1877
  created_by = DEFAULT_CREATED_BY
1833
1878
  criteria = SqlAssertionCriteria(
1834
- type=criteria_type,
1835
- change_type=criteria_change_type,
1836
- operator=criteria_operator,
1879
+ condition=criteria_condition,
1837
1880
  parameters=criteria_parameters,
1838
1881
  )
1839
1882
  assertion_input = _SqlAssertionInput(
@@ -1907,7 +1950,11 @@ class AssertionsClient:
1907
1950
  - {"type": "query", "additional_filter": "value > 1000"} or DetectionMechanism.QUERY(additional_filter='value > 1000')
1908
1951
  - "dataset_profile" or DetectionMechanism.DATASET_PROFILE
1909
1952
  sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
1910
- exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported.
1953
+ exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported. Valid values are:
1954
+ - {"start": "2025-01-01T00:00:00", "end": "2025-01-02T00:00:00"} (using ISO strings)
1955
+ - {"start": datetime(2025, 1, 1, 0, 0, 0), "end": datetime(2025, 1, 2, 0, 0, 0)} (using datetime objects)
1956
+ - FixedRangeExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0)) (using typed object)
1957
+ - A list of any of the above formats
1911
1958
  training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
1912
1959
  incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
1913
1960
  tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
@@ -2007,17 +2054,281 @@ class AssertionsClient:
2007
2054
 
2008
2055
  return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
2009
2056
 
2057
+ def sync_column_metric_assertion( # noqa: C901 # TODO: Refactor
2058
+ self,
2059
+ *,
2060
+ dataset_urn: Union[str, DatasetUrn],
2061
+ column_name: Optional[str] = None,
2062
+ metric_type: Optional[MetricInputType] = None,
2063
+ operator: Optional[OperatorInputType] = None,
2064
+ criteria_parameters: Optional[ColumnMetricAssertionParameters] = None,
2065
+ urn: Optional[Union[str, AssertionUrn]] = None,
2066
+ display_name: Optional[str] = None,
2067
+ enabled: Optional[bool] = None,
2068
+ detection_mechanism: DetectionMechanismInputTypes = None,
2069
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
2070
+ tags: Optional[TagsInputType] = None,
2071
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
2072
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2073
+ ) -> ColumnMetricAssertion:
2074
+ """Upsert and merge a column metric assertion.
2075
+
2076
+ Note:
2077
+ Keyword arguments are required.
2078
+
2079
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
2080
+ it will be created. If it does exist, it will be updated.
2081
+
2082
+ Existing assertion fields will be updated if the input value is not None. If the input value is None, the existing value
2083
+ will be preserved. If the input value can be un-set (e.g. by passing an empty list or
2084
+ empty string), it will be unset.
2085
+
2086
+ Schedule behavior:
2087
+ - Create case: Uses default schedule of every 6 hours or provided schedule
2088
+ - Update case: Uses existing schedule or provided schedule.
2089
+
2090
+ Examples:
2091
+ # Using enum values (recommended for type safety)
2092
+ from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import MetricType, OperatorType
2093
+ client.sync_column_metric_assertion(
2094
+ dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
2095
+ column_name="user_id",
2096
+ metric_type=MetricType.NULL_COUNT,
2097
+ operator=OperatorType.GREATER_THAN,
2098
+ criteria_parameters=10
2099
+ )
2100
+
2101
+ # Using case-insensitive strings (more flexible)
2102
+ client.sync_column_metric_assertion(
2103
+ dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
2104
+ column_name="price",
2105
+ metric_type="mean",
2106
+ operator="between",
2107
+ criteria_parameters=(100.0, 500.0)
2108
+ )
2109
+
2110
+ Args:
2111
+ dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
2112
+ column_name (Optional[str]): The name of the column to be monitored. Required for creation, optional for updates.
2113
+ metric_type (Optional[MetricInputType]): The type of the metric to be monitored. Required for creation, optional for updates. Valid values are:
2114
+ - Using MetricType enum: MetricType.NULL_COUNT, MetricType.NULL_PERCENTAGE, MetricType.UNIQUE_COUNT,
2115
+ MetricType.UNIQUE_PERCENTAGE, MetricType.MAX_LENGTH, MetricType.MIN_LENGTH, MetricType.EMPTY_COUNT,
2116
+ MetricType.EMPTY_PERCENTAGE, MetricType.MIN, MetricType.MAX, MetricType.MEAN, MetricType.MEDIAN,
2117
+ MetricType.STDDEV, MetricType.NEGATIVE_COUNT, MetricType.NEGATIVE_PERCENTAGE, MetricType.ZERO_COUNT,
2118
+ MetricType.ZERO_PERCENTAGE
2119
+ - Using case-insensitive strings: "null_count", "MEAN", "Max_Length", etc.
2120
+ - Using models enum: models.FieldMetricTypeClass.NULL_COUNT, etc. (import with: from datahub.metadata import schema_classes as models)
2121
+ operator (Optional[OperatorInputType]): The operator to be used for the assertion. Required for creation, optional for updates. Valid values are:
2122
+ - Using OperatorType enum: OperatorType.EQUAL_TO, OperatorType.NOT_EQUAL_TO, OperatorType.GREATER_THAN,
2123
+ OperatorType.GREATER_THAN_OR_EQUAL_TO, OperatorType.LESS_THAN, OperatorType.LESS_THAN_OR_EQUAL_TO,
2124
+ OperatorType.BETWEEN, OperatorType.IN, OperatorType.NOT_IN, OperatorType.NULL, OperatorType.NOT_NULL,
2125
+ OperatorType.IS_TRUE, OperatorType.IS_FALSE, OperatorType.CONTAIN, OperatorType.END_WITH,
2126
+ OperatorType.START_WITH, OperatorType.REGEX_MATCH
2127
+ - Using case-insensitive strings: "equal_to", "not_equal_to", "greater_than", "greater_than_or_equal_to",
2128
+ "less_than", "less_than_or_equal_to", "between", "in", "not_in", "null", "not_null", "is_true",
2129
+ "is_false", "contain", "end_with", "start_with", "regex_match"
2130
+ - Using models enum: models.AssertionStdOperatorClass.EQUAL_TO, models.AssertionStdOperatorClass.GREATER_THAN, etc.
2131
+ criteria_parameters (Optional[ColumnMetricAssertionParameters]): The criteria parameters for the assertion. Required for creation (except for operators that don't need parameters), optional for updates.
2132
+ - Single value operators (EQUAL_TO, NOT_EQUAL_TO, GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, CONTAIN, END_WITH, START_WITH, REGEX_MATCH): pass a single number or string
2133
+ - Range operators (BETWEEN): pass a tuple of two numbers (min_value, max_value)
2134
+ - List operators (IN, NOT_IN): pass a list of values
2135
+ - No parameter operators (NULL, NOT_NULL, IS_TRUE, IS_FALSE): pass None or omit this parameter
2136
+ urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
2137
+ display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
2138
+ enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
2139
+ detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Valid values are (additional_filter is optional):
2140
+ - "all_rows_query_datahub_dataset_profile" or DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
2141
+ - "all_rows_query" or DetectionMechanism.ALL_ROWS_QUERY(), or with additional_filter: {"type": "all_rows_query", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.ALL_ROWS_QUERY(additional_filter='last_modified > 2021-01-01')
2142
+ - {"type": "changed_rows_query", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
2143
+ incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
2144
+ tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
2145
+ updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
2146
+ schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule of every 6 hours will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
2147
+
2148
+ Returns:
2149
+ ColumnMetricAssertion: The created or updated assertion.
2150
+ """
2151
+ now_utc = datetime.now(timezone.utc)
2152
+ gms_criteria_type_info = None
2153
+
2154
+ if updated_by is None:
2155
+ logger.warning(
2156
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
2157
+ )
2158
+ updated_by = DEFAULT_CREATED_BY
2159
+
2160
+ # 1. If urn is not set, create a new assertion
2161
+ if urn is None:
2162
+ self._validate_required_column_fields_for_creation(
2163
+ column_name, metric_type, operator
2164
+ )
2165
+ assert (
2166
+ column_name is not None
2167
+ and metric_type is not None
2168
+ and operator is not None
2169
+ ), "Fields guaranteed non-None after validation"
2170
+ logger.info("URN is not set, creating a new assertion")
2171
+ return self._create_column_metric_assertion(
2172
+ dataset_urn=dataset_urn,
2173
+ column_name=column_name,
2174
+ metric_type=metric_type,
2175
+ operator=operator,
2176
+ criteria_parameters=criteria_parameters,
2177
+ display_name=display_name,
2178
+ enabled=enabled if enabled is not None else True,
2179
+ detection_mechanism=detection_mechanism,
2180
+ incident_behavior=incident_behavior,
2181
+ tags=tags,
2182
+ created_by=updated_by,
2183
+ schedule=schedule,
2184
+ )
2185
+
2186
+ # 2.1 If urn is set, fetch missing required parameters from backend if needed:
2187
+ # NOTE: This is a tactical solution. The problem is we fetch twice (once for validation,
2188
+ # once for merge). Strategic solution would be to merge first, then validate after,
2189
+ # but that requires heavy refactor and is skipped for now.
2190
+ if urn is not None and (
2191
+ column_name is None
2192
+ or metric_type is None
2193
+ or operator is None
2194
+ or criteria_parameters is None
2195
+ ):
2196
+ # Fetch existing assertion to get missing required parameters
2197
+ maybe_assertion_entity, _, maybe_monitor_entity = (
2198
+ self._retrieve_assertion_and_monitor(
2199
+ {"dataset_urn": dataset_urn, "urn": urn}
2200
+ )
2201
+ )
2202
+
2203
+ if maybe_assertion_entity is not None:
2204
+ assertion_info = maybe_assertion_entity.info
2205
+ if (
2206
+ hasattr(assertion_info, "fieldMetricAssertion")
2207
+ and assertion_info.fieldMetricAssertion
2208
+ ):
2209
+ field_metric_assertion = assertion_info.fieldMetricAssertion
2210
+ # Use existing values for missing required parameters
2211
+ if (
2212
+ column_name is None
2213
+ and hasattr(field_metric_assertion, "field")
2214
+ and hasattr(field_metric_assertion.field, "path")
2215
+ ):
2216
+ column_name = field_metric_assertion.field.path
2217
+ if metric_type is None and hasattr(
2218
+ field_metric_assertion, "metric"
2219
+ ):
2220
+ metric_type = field_metric_assertion.metric
2221
+ if operator is None and hasattr(field_metric_assertion, "operator"):
2222
+ operator = field_metric_assertion.operator
2223
+ if criteria_parameters is None and hasattr(
2224
+ field_metric_assertion, "parameters"
2225
+ ):
2226
+ # Extract criteria_parameters from existing assertion
2227
+ # This logic should match the parameter extraction in the assertion input class
2228
+ params = field_metric_assertion.parameters
2229
+ if params and hasattr(params, "value") and params.value:
2230
+ criteria_parameters = params.value.value
2231
+ elif (
2232
+ params
2233
+ and hasattr(params, "minValue")
2234
+ and hasattr(params, "maxValue")
2235
+ and params.minValue
2236
+ and params.maxValue
2237
+ ):
2238
+ criteria_parameters = (
2239
+ params.minValue.value,
2240
+ params.maxValue.value,
2241
+ )
2242
+
2243
+ # Extract gms_criteria_type_info to preserve original parameter types
2244
+ gms_criteria_type_info = (
2245
+ _HasColumnMetricFunctionality._get_criteria_parameters_with_type(
2246
+ maybe_assertion_entity
2247
+ )
2248
+ )
2249
+
2250
+ self._validate_required_column_fields_for_update(
2251
+ column_name, metric_type, operator, urn
2252
+ )
2253
+ assert (
2254
+ column_name is not None
2255
+ and metric_type is not None
2256
+ and operator is not None
2257
+ ), "Fields guaranteed non-None after validation"
2258
+
2259
+ # 2.2 Now validate the input with all required parameters:
2260
+ assertion_input = _ColumnMetricAssertionInput(
2261
+ urn=urn,
2262
+ entity_client=self.client.entities,
2263
+ dataset_urn=dataset_urn,
2264
+ column_name=column_name,
2265
+ metric_type=metric_type,
2266
+ operator=operator,
2267
+ criteria_parameters=criteria_parameters,
2268
+ display_name=display_name,
2269
+ detection_mechanism=detection_mechanism,
2270
+ incident_behavior=incident_behavior,
2271
+ tags=tags,
2272
+ created_by=updated_by, # This will be overridden by the actual created_by
2273
+ created_at=now_utc, # This will be overridden by the actual created_at
2274
+ updated_by=updated_by,
2275
+ updated_at=now_utc,
2276
+ schedule=schedule,
2277
+ gms_criteria_type_info=gms_criteria_type_info,
2278
+ )
2279
+
2280
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
2281
+ # if the assertion does not exist:
2282
+ merged_assertion_input_or_created_assertion = (
2283
+ self._retrieve_and_merge_column_metric_assertion_and_monitor(
2284
+ assertion_input=assertion_input,
2285
+ dataset_urn=dataset_urn,
2286
+ column_name=column_name,
2287
+ metric_type=metric_type,
2288
+ operator=operator,
2289
+ criteria_parameters=criteria_parameters,
2290
+ urn=urn,
2291
+ display_name=display_name,
2292
+ enabled=enabled,
2293
+ detection_mechanism=detection_mechanism,
2294
+ incident_behavior=incident_behavior,
2295
+ tags=tags,
2296
+ updated_by=updated_by,
2297
+ now_utc=now_utc,
2298
+ schedule=schedule,
2299
+ )
2300
+ )
2301
+
2302
+ # Return early if we created a new assertion in the merge:
2303
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
2304
+ # We know this is the correct type because we passed the assertion_class parameter
2305
+ assert isinstance(
2306
+ merged_assertion_input_or_created_assertion, ColumnMetricAssertion
2307
+ )
2308
+ return merged_assertion_input_or_created_assertion
2309
+
2310
+ # 4. Upsert the assertion and monitor entities:
2311
+ assertion_entity, monitor_entity = (
2312
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
2313
+ )
2314
+ # If assertion upsert fails, we won't try to upsert the monitor
2315
+ self.client.entities.upsert(assertion_entity)
2316
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
2317
+ # try:
2318
+ self.client.entities.upsert(monitor_entity)
2319
+ # except Exception as e:
2320
+ # logger.error(f"Error upserting monitor: {e}")
2321
+ # self.client.entities.delete(assertion_entity)
2322
+ # raise e
2323
+
2324
+ return ColumnMetricAssertion._from_entities(assertion_entity, monitor_entity)
2325
+
2010
2326
  def sync_smart_column_metric_assertion(
2011
2327
  self,
2012
2328
  *,
2013
2329
  dataset_urn: Union[str, DatasetUrn],
2014
- column_name: str,
2015
- metric_type: MetricInputType,
2016
- operator: OperatorInputType,
2017
- value: Optional[ValueInputType] = None,
2018
- value_type: Optional[ValueTypeInputType] = None,
2019
- range: Optional[RangeInputType] = None,
2020
- range_type: Optional[RangeTypeInputType] = None,
2330
+ column_name: Optional[str] = None,
2331
+ metric_type: Optional[MetricInputType] = None,
2021
2332
  urn: Optional[Union[str, AssertionUrn]] = None,
2022
2333
  display_name: Optional[str] = None,
2023
2334
  enabled: Optional[bool] = None,
@@ -2046,15 +2357,32 @@ class AssertionsClient:
2046
2357
  - Create case: Uses default schedule of every 6 hours or provided schedule
2047
2358
  - Update case: Uses existing schedule or provided schedule.
2048
2359
 
2360
+ Examples:
2361
+ # Using enum values (recommended for type safety)
2362
+ client.sync_smart_column_metric_assertion(
2363
+ dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
2364
+ column_name="user_id",
2365
+ metric_type=MetricType.NULL_COUNT
2366
+ )
2367
+
2368
+ # Using case-insensitive strings (more flexible)
2369
+ client.sync_smart_column_metric_assertion(
2370
+ dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
2371
+ column_name="price",
2372
+ metric_type="mean"
2373
+ )
2374
+
2049
2375
  Args:
2050
2376
  dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
2051
- column_name (str): The name of the column to be monitored.
2052
- metric_type (MetricInputType): The type of the metric to be monitored.
2053
- operator (OperatorInputType): The operator to be used for the assertion.
2054
- value (Optional[ValueInputType]): The value to be used for the assertion. Required if operator requires a value.
2055
- value_type (Optional[ValueTypeInputType]): The type of the value to be used for the assertion. Required if operator requires a value.
2056
- range (Optional[RangeInputType]): The range to be used for the assertion. Required if operator requires a range.
2057
- range_type (Optional[RangeTypeInputType]): The type of the range to be used for the assertion. Required if operator requires a range.
2377
+ column_name (Optional[str]): The name of the column to be monitored. Required for creation, optional for updates.
2378
+ metric_type (Optional[MetricInputType]): The type of the metric to be monitored. Required for creation, optional for updates. Valid values are:
2379
+ - Using MetricType enum: MetricType.NULL_COUNT, MetricType.NULL_PERCENTAGE, MetricType.UNIQUE_COUNT,
2380
+ MetricType.UNIQUE_PERCENTAGE, MetricType.MAX_LENGTH, MetricType.MIN_LENGTH, MetricType.EMPTY_COUNT,
2381
+ MetricType.EMPTY_PERCENTAGE, MetricType.MIN, MetricType.MAX, MetricType.MEAN, MetricType.MEDIAN,
2382
+ MetricType.STDDEV, MetricType.NEGATIVE_COUNT, MetricType.NEGATIVE_PERCENTAGE, MetricType.ZERO_COUNT,
2383
+ MetricType.ZERO_PERCENTAGE
2384
+ - Using case-insensitive strings: "null_count", "MEAN", "Max_Length", etc.
2385
+ - Using models enum: models.FieldMetricTypeClass.NULL_COUNT, etc. (import with: from datahub.metadata import schema_classes as models)
2058
2386
  urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
2059
2387
  display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
2060
2388
  enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
@@ -2084,16 +2412,17 @@ class AssertionsClient:
2084
2412
 
2085
2413
  # 1. If urn is not set, create a new assertion
2086
2414
  if urn is None:
2415
+ self._validate_required_smart_column_fields_for_creation(
2416
+ column_name, metric_type
2417
+ )
2418
+ assert column_name is not None and metric_type is not None, (
2419
+ "Fields guaranteed non-None after validation"
2420
+ )
2087
2421
  logger.info("URN is not set, creating a new assertion")
2088
2422
  return self._create_smart_column_metric_assertion(
2089
2423
  dataset_urn=dataset_urn,
2090
2424
  column_name=column_name,
2091
2425
  metric_type=metric_type,
2092
- operator=operator,
2093
- value=value,
2094
- value_type=value_type,
2095
- range=range,
2096
- range_type=range_type,
2097
2426
  display_name=display_name,
2098
2427
  enabled=enabled if enabled is not None else True,
2099
2428
  detection_mechanism=detection_mechanism,
@@ -2106,18 +2435,55 @@ class AssertionsClient:
2106
2435
  schedule=schedule,
2107
2436
  )
2108
2437
 
2109
- # 2. If urn is set, first validate the input:
2438
+ # 2.1 If urn is set, fetch missing required parameters from backend if needed:
2439
+ # NOTE: This is a tactical solution. The problem is we fetch twice (once for validation,
2440
+ # once for merge). Strategic solution would be to merge first, then validate after,
2441
+ # but that requires heavy refactor and is skipped for now.
2442
+ if urn is not None and (column_name is None or metric_type is None):
2443
+ # Fetch existing assertion to get missing required parameters
2444
+ maybe_assertion_entity, _, maybe_monitor_entity = (
2445
+ self._retrieve_assertion_and_monitor(
2446
+ {"dataset_urn": dataset_urn, "urn": urn}
2447
+ )
2448
+ )
2449
+
2450
+ if maybe_assertion_entity is not None:
2451
+ assertion_info = maybe_assertion_entity.info
2452
+ if (
2453
+ hasattr(assertion_info, "fieldMetricAssertion")
2454
+ and assertion_info.fieldMetricAssertion
2455
+ ):
2456
+ field_metric_assertion = assertion_info.fieldMetricAssertion
2457
+ # Use existing values for missing required parameters
2458
+ if (
2459
+ column_name is None
2460
+ and hasattr(field_metric_assertion, "field")
2461
+ and hasattr(field_metric_assertion.field, "path")
2462
+ ):
2463
+ column_name = field_metric_assertion.field.path
2464
+ if metric_type is None and hasattr(
2465
+ field_metric_assertion, "metric"
2466
+ ):
2467
+ metric_type = field_metric_assertion.metric
2468
+ # Smart assertions always use BETWEEN operator - no need to fetch from existing assertion
2469
+
2470
+ self._validate_required_smart_column_fields_for_update(
2471
+ column_name, metric_type, urn
2472
+ )
2473
+ assert column_name is not None and metric_type is not None, (
2474
+ "Fields guaranteed non-None after validation"
2475
+ )
2476
+
2477
+ # 2.1.1 Validate criteria_parameters for creation scenario
2478
+ self._validate_criteria_parameters_for_creation(urn)
2479
+
2480
+ # 2.2 Now validate the input with all required parameters:
2110
2481
  assertion_input = _SmartColumnMetricAssertionInput(
2111
2482
  urn=urn,
2112
2483
  entity_client=self.client.entities,
2113
2484
  dataset_urn=dataset_urn,
2114
2485
  column_name=column_name,
2115
2486
  metric_type=metric_type,
2116
- operator=operator,
2117
- value=value,
2118
- value_type=value_type,
2119
- range=range,
2120
- range_type=range_type,
2121
2487
  display_name=display_name,
2122
2488
  detection_mechanism=detection_mechanism,
2123
2489
  sensitivity=sensitivity,
@@ -2140,11 +2506,6 @@ class AssertionsClient:
2140
2506
  dataset_urn=dataset_urn,
2141
2507
  column_name=column_name,
2142
2508
  metric_type=metric_type,
2143
- operator=operator,
2144
- value=value,
2145
- value_type=value_type,
2146
- range=range,
2147
- range_type=range_type,
2148
2509
  urn=urn,
2149
2510
  display_name=display_name,
2150
2511
  enabled=enabled,
@@ -2192,11 +2553,6 @@ class AssertionsClient:
2192
2553
  dataset_urn: Union[str, DatasetUrn],
2193
2554
  column_name: str,
2194
2555
  metric_type: MetricInputType,
2195
- operator: OperatorInputType,
2196
- value: Optional[ValueInputType] = None,
2197
- value_type: Optional[ValueTypeInputType] = None,
2198
- range: Optional[RangeInputType] = None,
2199
- range_type: Optional[RangeTypeInputType] = None,
2200
2556
  display_name: Optional[str] = None,
2201
2557
  enabled: bool = True,
2202
2558
  detection_mechanism: DetectionMechanismInputTypes = None,
@@ -2216,11 +2572,6 @@ class AssertionsClient:
2216
2572
  dataset_urn: The urn of the dataset to be monitored. (Required)
2217
2573
  column_name: The name of the column to be monitored. (Required)
2218
2574
  metric_type: The type of the metric to be monitored. (Required)
2219
- operator: The operator to be used for the assertion. (Required)
2220
- value: The value to be used for the assertion. (Required if operator requires a value)
2221
- value_type: The type of the value to be used for the assertion. (Required if operator requires a value)
2222
- range: The range to be used for the assertion. (Required if operator requires a range)
2223
- range_type: The type of the range to be used for the assertion. (Required if operator requires a range)
2224
2575
  display_name: The display name of the assertion. If not provided, a random display
2225
2576
  name will be generated.
2226
2577
  enabled: Whether the assertion is enabled. Defaults to True.
@@ -2296,11 +2647,6 @@ class AssertionsClient:
2296
2647
  dataset_urn=dataset_urn,
2297
2648
  column_name=column_name,
2298
2649
  metric_type=metric_type,
2299
- operator=operator,
2300
- value=value,
2301
- value_type=value_type,
2302
- range=range,
2303
- range_type=range_type,
2304
2650
  display_name=display_name,
2305
2651
  enabled=enabled,
2306
2652
  detection_mechanism=detection_mechanism,
@@ -2337,11 +2683,6 @@ class AssertionsClient:
2337
2683
  dataset_urn: Union[str, DatasetUrn],
2338
2684
  column_name: str,
2339
2685
  metric_type: MetricInputType,
2340
- operator: OperatorInputType,
2341
- value: Optional[ValueInputType],
2342
- value_type: Optional[ValueTypeInputType],
2343
- range: Optional[RangeInputType],
2344
- range_type: Optional[RangeTypeInputType],
2345
2686
  urn: Union[str, AssertionUrn],
2346
2687
  display_name: Optional[str],
2347
2688
  enabled: Optional[bool],
@@ -2383,11 +2724,6 @@ class AssertionsClient:
2383
2724
  dataset_urn=dataset_urn,
2384
2725
  column_name=column_name,
2385
2726
  metric_type=metric_type,
2386
- operator=operator,
2387
- value=value,
2388
- value_type=value_type,
2389
- range=range,
2390
- range_type=range_type,
2391
2727
  schedule=schedule,
2392
2728
  display_name=display_name,
2393
2729
  detection_mechanism=detection_mechanism,
@@ -2409,16 +2745,14 @@ class AssertionsClient:
2409
2745
  f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
2410
2746
  )
2411
2747
 
2412
- # 4. Merge the existing assertion with the validated input:
2748
+ # 4. Smart assertions always use fixed criteria_parameters (0, 0) and BETWEEN operator
2749
+ # No GMS type info needed since values are fixed
2750
+
2751
+ # 5. Merge the existing assertion with the validated input:
2413
2752
  merged_assertion_input = self._merge_smart_column_metric_input(
2414
2753
  dataset_urn=dataset_urn,
2415
2754
  column_name=column_name,
2416
2755
  metric_type=metric_type,
2417
- operator=operator,
2418
- value=value,
2419
- value_type=value_type,
2420
- range=range,
2421
- range_type=range_type,
2422
2756
  urn=urn,
2423
2757
  display_name=display_name,
2424
2758
  enabled=enabled,
@@ -2443,11 +2777,6 @@ class AssertionsClient:
2443
2777
  dataset_urn: Union[str, DatasetUrn],
2444
2778
  column_name: str,
2445
2779
  metric_type: MetricInputType,
2446
- operator: OperatorInputType,
2447
- value: Optional[ValueInputType],
2448
- value_type: Optional[ValueTypeInputType],
2449
- range: Optional[RangeInputType],
2450
- range_type: Optional[RangeTypeInputType],
2451
2780
  urn: Union[str, AssertionUrn],
2452
2781
  display_name: Optional[str],
2453
2782
  enabled: Optional[bool],
@@ -2470,11 +2799,6 @@ class AssertionsClient:
2470
2799
  dataset_urn: The urn of the dataset to be monitored.
2471
2800
  column_name: The name of the column to be monitored.
2472
2801
  metric_type: The type of the metric to be monitored.
2473
- operator: The operator to be used for the assertion.
2474
- value: The value to be used for the assertion.
2475
- value_type: The type of the value to be used for the assertion.
2476
- range: The range to be used for the assertion.
2477
- range_type: The type of the range to be used for the assertion.
2478
2802
  urn: The urn of the assertion.
2479
2803
  display_name: The display name of the assertion.
2480
2804
  enabled: Whether the assertion is enabled.
@@ -2519,61 +2843,6 @@ class AssertionsClient:
2519
2843
  if maybe_assertion_entity
2520
2844
  else None,
2521
2845
  ),
2522
- operator=_merge_field(
2523
- input_field_value=operator,
2524
- input_field_name="operator",
2525
- validated_assertion_input=assertion_input,
2526
- validated_existing_assertion=existing_assertion,
2527
- existing_entity_value=SmartColumnMetricAssertion._get_operator(
2528
- maybe_assertion_entity
2529
- )
2530
- if maybe_assertion_entity
2531
- else None,
2532
- ),
2533
- value=_merge_field(
2534
- input_field_value=value,
2535
- input_field_name="value",
2536
- validated_assertion_input=assertion_input,
2537
- validated_existing_assertion=existing_assertion,
2538
- existing_entity_value=SmartColumnMetricAssertion._get_value(
2539
- maybe_assertion_entity
2540
- )
2541
- if maybe_assertion_entity
2542
- else None,
2543
- ),
2544
- value_type=_merge_field(
2545
- input_field_value=value_type,
2546
- input_field_name="value_type",
2547
- validated_assertion_input=assertion_input,
2548
- validated_existing_assertion=existing_assertion,
2549
- existing_entity_value=SmartColumnMetricAssertion._get_value_type(
2550
- maybe_assertion_entity
2551
- )
2552
- if maybe_assertion_entity
2553
- else None,
2554
- ),
2555
- range=_merge_field(
2556
- input_field_value=range,
2557
- input_field_name="range",
2558
- validated_assertion_input=assertion_input,
2559
- validated_existing_assertion=existing_assertion,
2560
- existing_entity_value=SmartColumnMetricAssertion._get_range(
2561
- maybe_assertion_entity
2562
- )
2563
- if maybe_assertion_entity
2564
- else None,
2565
- ),
2566
- range_type=_merge_field(
2567
- input_field_value=range_type,
2568
- input_field_name="range_type",
2569
- validated_assertion_input=assertion_input,
2570
- validated_existing_assertion=existing_assertion,
2571
- existing_entity_value=SmartColumnMetricAssertion._get_range_type(
2572
- maybe_assertion_entity
2573
- )
2574
- if maybe_assertion_entity
2575
- else None,
2576
- ),
2577
2846
  display_name=_merge_field(
2578
2847
  input_field_value=display_name,
2579
2848
  input_field_name="display_name",
@@ -2681,7 +2950,11 @@ class AssertionsClient:
2681
2950
  tags: Optional[TagsInputType] = None,
2682
2951
  updated_by: Optional[Union[str, CorpUserUrn]] = None,
2683
2952
  freshness_schedule_check_type: Optional[
2684
- Union[str, models.FreshnessAssertionScheduleTypeClass]
2953
+ Union[
2954
+ str,
2955
+ FreshnessAssertionScheduleCheckType,
2956
+ models.FreshnessAssertionScheduleTypeClass,
2957
+ ]
2685
2958
  ] = None,
2686
2959
  schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2687
2960
  lookback_window: Optional[TimeWindowSizeInputTypes] = None,
@@ -2715,9 +2988,16 @@ class AssertionsClient:
2715
2988
  incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
2716
2989
  tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
2717
2990
  updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
2718
- freshness_schedule_check_type (Optional[Union[str, models.FreshnessAssertionScheduleTypeClass]]): The freshness schedule check type to be applied to the assertion. Valid values are: "since_the_last_check", "cron".
2991
+ freshness_schedule_check_type (Optional[Union[str, FreshnessAssertionScheduleCheckType, models.FreshnessAssertionScheduleTypeClass]]): The freshness schedule check type to be applied to the assertion. Valid values are: "since_the_last_check", "fixed_interval".
2719
2992
  schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
2720
- lookback_window (Optional[TimeWindowSizeInputTypes]): The lookback window to be applied to the assertion.
2993
+ lookback_window (Optional[TimeWindowSizeInputTypes]): The lookback window to be applied to the assertion. Valid values are:
2994
+ - TimeWindowSize(unit=CalendarInterval.MINUTE, multiple=10) for 10 minutes
2995
+ - TimeWindowSize(unit=CalendarInterval.HOUR, multiple=2) for 2 hours
2996
+ - TimeWindowSize(unit=CalendarInterval.DAY, multiple=1) for 1 day
2997
+ - {"unit": "MINUTE", "multiple": 30} for 30 minutes (using dict)
2998
+ - {"unit": "HOUR", "multiple": 6} for 6 hours (using dict)
2999
+ - {"unit": "DAY", "multiple": 7} for 7 days (using dict)
3000
+ Valid values for CalendarInterval are: "MINUTE", "HOUR", "DAY" and for multiple, the integer number of units.
2721
3001
 
2722
3002
  Returns:
2723
3003
  FreshnessAssertion: The created or updated assertion.
@@ -2821,11 +3101,7 @@ class AssertionsClient:
2821
3101
  tags: Optional[TagsInputType] = None,
2822
3102
  updated_by: Optional[Union[str, CorpUserUrn]] = None,
2823
3103
  schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2824
- criteria_type: Optional[Union[str, VolumeAssertionDefinitionType]] = None,
2825
- criteria_change_type: Optional[
2826
- Union[str, VolumeAssertionDefinitionChangeKind]
2827
- ] = None,
2828
- criteria_operator: Optional[Union[str, VolumeAssertionOperator]] = None,
3104
+ criteria_condition: Optional[Union[str, VolumeAssertionCondition]] = None,
2829
3105
  criteria_parameters: Optional[VolumeAssertionDefinitionParameters] = None,
2830
3106
  ) -> VolumeAssertion:
2831
3107
  """Upsert and merge a volume assertion.
@@ -2856,10 +3132,21 @@ class AssertionsClient:
2856
3132
  tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
2857
3133
  updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
2858
3134
  schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
2859
- criteria_type (Optional[Union[str, VolumeAssertionDefinitionType]]): Optional type of volume assertion. Must be either VolumeAssertionDefinitionType.ROW_COUNT_TOTAL or VolumeAssertionDefinitionType.ROW_COUNT_CHANGE. If not provided, the existing definition from the backend will be preserved (for update operations). Required when creating a new assertion (when urn is None).
2860
- criteria_change_type (Optional[Union[str, VolumeAssertionDefinitionChangeKind]]): Optional change type for row count change assertions. Required when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_CHANGE. Ignored when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_TOTAL. If not provided, existing value is preserved for updates.
2861
- criteria_operator (Optional[Union[str, VolumeAssertionOperator]]): Optional comparison operator for the assertion. Must be a VolumeAssertionOperator value. If not provided, existing value is preserved for updates. Required when creating a new assertion.
2862
- criteria_parameters (Optional[VolumeAssertionDefinitionParameters]): Optional parameters for the assertion. For single-value operators provide a single number. For BETWEEN operator, provide a tuple of two numbers (min_value, max_value). If not provided, existing value is preserved for updates. Required when creating a new assertion.
3135
+ criteria_condition (Optional[Union[str, VolumeAssertionCondition]]): Optional condition for the volume assertion. Valid values are:
3136
+ - "ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO" -> The row count is less than or equal to the threshold.
3137
+ - "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO" -> The row count is greater than or equal to the threshold.
3138
+ - "ROW_COUNT_IS_WITHIN_A_RANGE" -> The row count is within the specified range.
3139
+ - "ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE" -> The row count growth is at most the threshold (absolute change).
3140
+ - "ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE" -> The row count growth is at least the threshold (absolute change).
3141
+ - "ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The row count growth is within the specified range (absolute change).
3142
+ - "ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE" -> The row count growth is at most the threshold (percentage change).
3143
+ - "ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE" -> The row count growth is at least the threshold (percentage change).
3144
+ - "ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The row count growth is within the specified range (percentage change).
3145
+ If not provided, the existing definition from the backend will be preserved (for update operations). Required when creating a new assertion (when urn is None).
3146
+ criteria_parameters (Optional[VolumeAssertionDefinitionParameters]): Optional threshold parameters to be used for the assertion. This can be a single threshold value or a tuple range.
3147
+ - If the condition is range-based (ROW_COUNT_IS_WITHIN_A_RANGE, ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE, ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
3148
+ - For other conditions, the value is a single numeric threshold value.
3149
+ If not provided, existing value is preserved for updates. Required when creating a new assertion.
2863
3150
 
2864
3151
  Returns:
2865
3152
  VolumeAssertion: The created or updated assertion.
@@ -2874,44 +3161,26 @@ class AssertionsClient:
2874
3161
  updated_by = DEFAULT_CREATED_BY
2875
3162
 
2876
3163
  # 1. Validate criteria parameters if any are provided
2877
- if (
2878
- criteria_type is not None
2879
- or criteria_operator is not None
2880
- or criteria_parameters is not None
2881
- ) and (
2882
- criteria_type is None
2883
- or criteria_operator is None
2884
- or criteria_parameters is None
2885
- or (
2886
- criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
2887
- and criteria_change_type is None
2888
- )
3164
+ if (criteria_condition is not None or criteria_parameters is not None) and (
3165
+ criteria_condition is None or criteria_parameters is None
2889
3166
  ):
2890
3167
  raise SDKUsageError(
2891
- "When providing volume assertion criteria, all required parameters must be provided "
2892
- "(criteria_type, criteria_operator, criteria_parameters must be provided, "
2893
- "and criteria_change_type is required when criteria_type is 'row_count_change')"
3168
+ "When providing volume assertion criteria, both criteria_condition and criteria_parameters must be provided"
2894
3169
  )
2895
3170
 
2896
- # Assert the invariant: if criteria_type is provided, all required parameters are provided
2897
- assert criteria_type is None or (
2898
- criteria_operator is not None
2899
- and criteria_parameters is not None
2900
- and (
2901
- criteria_type != VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
2902
- or criteria_change_type is not None
2903
- )
2904
- ), "criteria fields already validated"
3171
+ # Assert the invariant: if criteria_condition is provided, criteria_parameters is also provided
3172
+ assert criteria_condition is None or criteria_parameters is not None, (
3173
+ "criteria fields already validated"
3174
+ )
2905
3175
 
2906
3176
  # 2. If urn is not set, create a new assertion
2907
3177
  if urn is None:
2908
- if criteria_type is None:
3178
+ if criteria_condition is None:
2909
3179
  raise SDKUsageError(
2910
3180
  "Volume assertion criteria are required when creating a new assertion"
2911
3181
  )
2912
3182
  logger.info("URN is not set, creating a new assertion")
2913
3183
  # Type narrowing: we know these are not None because of validation above
2914
- assert criteria_operator is not None
2915
3184
  assert criteria_parameters is not None
2916
3185
  return self._create_volume_assertion(
2917
3186
  dataset_urn=dataset_urn,
@@ -2922,9 +3191,7 @@ class AssertionsClient:
2922
3191
  tags=tags,
2923
3192
  created_by=updated_by,
2924
3193
  schedule=schedule,
2925
- criteria_type=criteria_type,
2926
- criteria_change_type=criteria_change_type,
2927
- criteria_operator=criteria_operator,
3194
+ criteria_condition=criteria_condition,
2928
3195
  criteria_parameters=criteria_parameters,
2929
3196
  )
2930
3197
 
@@ -2936,24 +3203,19 @@ class AssertionsClient:
2936
3203
  # this is a creation case and the user missed the definition parameter, which is required.
2937
3204
  # Likely this pattern never happened before because there is no a publicly documented default definition
2938
3205
  # that we can use as fallback.
2939
- if criteria_type is not None:
2940
- # Create definition from individual criteria parameters
2941
- temp_definition: dict[str, Any] = {
2942
- "type": criteria_type,
2943
- "operator": criteria_operator,
3206
+ if criteria_condition is not None:
3207
+ # Create criteria from criteria_condition and parameters
3208
+ temp_criteria: dict[str, Any] = {
3209
+ "condition": criteria_condition,
2944
3210
  "parameters": criteria_parameters,
2945
3211
  }
2946
3212
 
2947
- if criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE:
2948
- temp_definition["kind"] = criteria_change_type
2949
-
2950
- use_backend_definition = False
3213
+ use_backend_criteria = False
2951
3214
  else:
2952
- # No criteria provided, use backend definition
2953
- use_backend_definition = True
2954
- temp_definition = {
2955
- "type": VolumeAssertionDefinitionType.ROW_COUNT_TOTAL,
2956
- "operator": VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO,
3215
+ # No criteria provided, use backend criteria
3216
+ use_backend_criteria = True
3217
+ temp_criteria = {
3218
+ "condition": VolumeAssertionCondition.ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO,
2957
3219
  "parameters": 0, # Temporary placeholder
2958
3220
  }
2959
3221
 
@@ -2970,7 +3232,7 @@ class AssertionsClient:
2970
3232
  updated_by=updated_by,
2971
3233
  updated_at=now_utc,
2972
3234
  schedule=schedule,
2973
- definition=temp_definition,
3235
+ criteria=temp_criteria,
2974
3236
  )
2975
3237
 
2976
3238
  # 4. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
@@ -2983,8 +3245,8 @@ class AssertionsClient:
2983
3245
  display_name=display_name,
2984
3246
  enabled=enabled,
2985
3247
  detection_mechanism=detection_mechanism,
2986
- definition=temp_definition,
2987
- use_backend_definition=use_backend_definition,
3248
+ criteria=temp_criteria,
3249
+ use_backend_criteria=use_backend_criteria,
2988
3250
  incident_behavior=incident_behavior,
2989
3251
  tags=tags,
2990
3252
  updated_by=updated_by,
@@ -3016,6 +3278,46 @@ class AssertionsClient:
3016
3278
  # raise e
3017
3279
  return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
3018
3280
 
3281
+ def _validate_sql_assertion_creation_params(
3282
+ self,
3283
+ statement: Optional[str],
3284
+ criteria_condition: Optional[Union[SqlAssertionCondition, str]],
3285
+ criteria_parameters: Optional[
3286
+ Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]
3287
+ ],
3288
+ ) -> None:
3289
+ """Validate required parameters for SQL assertion creation."""
3290
+ self._validate_required_field(
3291
+ statement, "statement", "when creating a new assertion (urn is None)"
3292
+ )
3293
+ self._validate_required_field(
3294
+ criteria_condition,
3295
+ "criteria_condition",
3296
+ "when creating a new assertion (urn is None)",
3297
+ )
3298
+ self._validate_required_field(
3299
+ criteria_parameters,
3300
+ "criteria_parameters",
3301
+ "when creating a new assertion (urn is None)",
3302
+ )
3303
+
3304
+ def _validate_required_sql_fields_for_update(
3305
+ self,
3306
+ statement: Optional[str],
3307
+ criteria_condition: Optional[Union[SqlAssertionCondition, str]],
3308
+ criteria_parameters: Optional[
3309
+ Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]
3310
+ ],
3311
+ assertion_urn: Union[str, AssertionUrn],
3312
+ ) -> None:
3313
+ """Validate required fields after attempting to fetch from existing assertion."""
3314
+ context = f"and not found in existing assertion {assertion_urn}. The existing assertion may be invalid or corrupted."
3315
+ self._validate_required_field(statement, "statement", context)
3316
+ self._validate_required_field(criteria_condition, "criteria_condition", context)
3317
+ self._validate_required_field(
3318
+ criteria_parameters, "criteria_parameters", context
3319
+ )
3320
+
3019
3321
  def sync_sql_assertion(
3020
3322
  self,
3021
3323
  *,
@@ -3023,13 +3325,11 @@ class AssertionsClient:
3023
3325
  urn: Optional[Union[str, AssertionUrn]] = None,
3024
3326
  display_name: Optional[str] = None,
3025
3327
  enabled: Optional[bool] = None,
3026
- statement: str,
3027
- criteria_type: Union[SqlAssertionType, str],
3028
- criteria_change_type: Optional[Union[SqlAssertionChangeType, str]] = None,
3029
- criteria_operator: Union[SqlAssertionOperator, str],
3030
- criteria_parameters: Union[
3031
- Union[float, int], tuple[Union[float, int], Union[float, int]]
3032
- ],
3328
+ statement: Optional[str] = None,
3329
+ criteria_condition: Optional[Union[SqlAssertionCondition, str]] = None,
3330
+ criteria_parameters: Optional[
3331
+ Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]
3332
+ ] = None,
3033
3333
  incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
3034
3334
  tags: Optional[TagsInputType] = None,
3035
3335
  updated_by: Optional[Union[str, CorpUserUrn]] = None,
@@ -3055,11 +3355,22 @@ class AssertionsClient:
3055
3355
  urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
3056
3356
  display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
3057
3357
  enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
3058
- statement (str): The SQL statement to be used for the assertion.
3059
- criteria_type (Union[SqlAssertionType, str]): The type of sql assertion. Valid values are: "METRIC", "METRIC_CHANGE".
3060
- criteria_change_type (Optional[Union[SqlAssertionChangeType, str]]): The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are: "ABSOLUTE", "PERCENTAGE".
3061
- criteria_operator (Union[SqlAssertionOperator, str]): The operator to be used for the assertion. Valid values are: "GREATER_THAN", "LESS_THAN", "GREATER_THAN_OR_EQUAL_TO", "LESS_THAN_OR_EQUAL_TO", "EQUAL_TO", "NOT_EQUAL_TO", "BETWEEN".
3062
- criteria_parameters (Union[float, int, tuple[float, int]]): The parameters to be used for the assertion. This can be a single value or a tuple range. If the operator is "BETWEEN", the value is a tuple of two values, with format min, max. If the operator is not "BETWEEN", the value is a single value.
3358
+ statement (Optional[str]): The SQL statement to be used for the assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion.
3359
+ criteria_condition (Optional[Union[SqlAssertionCondition, str]]): The condition for the sql assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion. Valid values are:
3360
+ - "IS_EQUAL_TO" -> The metric value equals the threshold.
3361
+ - "IS_NOT_EQUAL_TO" -> The metric value does not equal the threshold.
3362
+ - "IS_GREATER_THAN" -> The metric value is greater than the threshold.
3363
+ - "IS_LESS_THAN" -> The metric value is less than the threshold.
3364
+ - "IS_WITHIN_A_RANGE" -> The metric value is within the specified range.
3365
+ - "GROWS_AT_MOST_ABSOLUTE" -> The metric growth is at most the threshold (absolute change).
3366
+ - "GROWS_AT_MOST_PERCENTAGE" -> The metric growth is at most the threshold (percentage change).
3367
+ - "GROWS_AT_LEAST_ABSOLUTE" -> The metric growth is at least the threshold (absolute change).
3368
+ - "GROWS_AT_LEAST_PERCENTAGE" -> The metric growth is at least the threshold (percentage change).
3369
+ - "GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The metric growth is within the specified range (absolute change).
3370
+ - "GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The metric growth is within the specified range (percentage change).
3371
+ criteria_parameters (Optional[Union[float, int, tuple[float, int]]]): The threshold parameters to be used for the assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion. This can be a single threshold value or a tuple range.
3372
+ - If the condition is range-based (IS_WITHIN_A_RANGE, GROWS_WITHIN_A_RANGE_ABSOLUTE, GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
3373
+ - For other conditions, the value is a single numeric threshold value.
3063
3374
  incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
3064
3375
  tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
3065
3376
  updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
@@ -3080,13 +3391,20 @@ class AssertionsClient:
3080
3391
  # 1. If urn is not set, create a new assertion
3081
3392
  if urn is None:
3082
3393
  logger.info("URN is not set, creating a new assertion")
3394
+
3395
+ # Validate required parameters for creation
3396
+ self._validate_sql_assertion_creation_params(
3397
+ statement, criteria_condition, criteria_parameters
3398
+ )
3399
+ # After validation, these cannot be None
3400
+ assert statement is not None
3401
+ assert criteria_condition is not None
3402
+ assert criteria_parameters is not None
3083
3403
  return self._create_sql_assertion(
3084
3404
  dataset_urn=dataset_urn,
3085
3405
  display_name=display_name,
3086
3406
  enabled=enabled if enabled is not None else True,
3087
- criteria_type=criteria_type,
3088
- criteria_change_type=criteria_change_type,
3089
- criteria_operator=criteria_operator,
3407
+ criteria_condition=criteria_condition,
3090
3408
  criteria_parameters=criteria_parameters,
3091
3409
  statement=statement,
3092
3410
  incident_behavior=incident_behavior,
@@ -3095,13 +3413,51 @@ class AssertionsClient:
3095
3413
  schedule=schedule,
3096
3414
  )
3097
3415
 
3098
- # 2. If urn is set, first validate the input:
3416
+ # 2.1 If urn is set, fetch missing required parameters from backend if needed:
3417
+ # NOTE: This is a tactical solution. The problem is we fetch twice (once for validation,
3418
+ # once for merge). Strategic solution would be to merge first, then validate after,
3419
+ # but that requires heavy refactor and is skipped for now.
3420
+ if urn is not None and (
3421
+ statement is None
3422
+ or criteria_condition is None
3423
+ or criteria_parameters is None
3424
+ ):
3425
+ # Fetch existing assertion to get missing required parameters
3426
+ maybe_assertion_entity, _, maybe_monitor_entity = (
3427
+ self._retrieve_assertion_and_monitor(
3428
+ {"dataset_urn": dataset_urn, "urn": urn}
3429
+ )
3430
+ )
3431
+
3432
+ if maybe_assertion_entity is not None and maybe_monitor_entity is not None:
3433
+ existing_assertion = SqlAssertion._from_entities(
3434
+ maybe_assertion_entity, maybe_monitor_entity
3435
+ )
3436
+ # Use existing values for missing required parameters
3437
+ if statement is None:
3438
+ statement = existing_assertion.statement
3439
+ if criteria_condition is None or criteria_parameters is None:
3440
+ criteria = existing_assertion._criteria
3441
+ if criteria_condition is None:
3442
+ criteria_condition = criteria.condition
3443
+ if criteria_parameters is None:
3444
+ criteria_parameters = criteria.parameters
3445
+
3446
+ self._validate_required_sql_fields_for_update(
3447
+ statement, criteria_condition, criteria_parameters, urn
3448
+ )
3449
+ assert (
3450
+ statement is not None
3451
+ and criteria_condition is not None
3452
+ and criteria_parameters is not None
3453
+ ), "Fields guaranteed non-None after validation"
3454
+
3455
+ # 2.2 Now validate the input with all required parameters:
3099
3456
  criteria = SqlAssertionCriteria(
3100
- type=criteria_type,
3101
- change_type=criteria_change_type,
3102
- operator=criteria_operator,
3457
+ condition=criteria_condition,
3103
3458
  parameters=criteria_parameters,
3104
3459
  )
3460
+
3105
3461
  assertion_input = _SqlAssertionInput(
3106
3462
  urn=urn,
3107
3463
  entity_client=self.client.entities,
@@ -3159,6 +3515,329 @@ class AssertionsClient:
3159
3515
 
3160
3516
  return SqlAssertion._from_entities(assertion_entity, monitor_entity)
3161
3517
 
3518
+ def _validate_required_column_fields_for_creation(
3519
+ self,
3520
+ column_name: Optional[str],
3521
+ metric_type: Optional[MetricInputType],
3522
+ operator: Optional[OperatorInputType],
3523
+ ) -> None:
3524
+ """Validate required fields for column metric assertion creation."""
3525
+ self._validate_required_field(
3526
+ column_name, "column_name", "when creating a new assertion (urn is None)"
3527
+ )
3528
+ self._validate_required_field(
3529
+ metric_type, "metric_type", "when creating a new assertion (urn is None)"
3530
+ )
3531
+ self._validate_required_field(
3532
+ operator, "operator", "when creating a new assertion (urn is None)"
3533
+ )
3534
+
3535
+ def _validate_required_column_fields_for_update(
3536
+ self,
3537
+ column_name: Optional[str],
3538
+ metric_type: Optional[MetricInputType],
3539
+ operator: Optional[OperatorInputType],
3540
+ assertion_urn: Union[str, AssertionUrn],
3541
+ ) -> None:
3542
+ """Validate required fields after attempting to fetch from existing assertion."""
3543
+ context = f"and not found in existing assertion {assertion_urn}. The existing assertion may be invalid or corrupted."
3544
+ self._validate_required_field(column_name, "column_name", context)
3545
+ self._validate_required_field(metric_type, "metric_type", context)
3546
+ self._validate_required_field(operator, "operator", context)
3547
+
3548
+ def _create_column_metric_assertion(
3549
+ self,
3550
+ *,
3551
+ dataset_urn: Union[str, DatasetUrn],
3552
+ column_name: str,
3553
+ metric_type: MetricInputType,
3554
+ operator: OperatorInputType,
3555
+ criteria_parameters: Optional[ColumnMetricAssertionParameters] = None,
3556
+ display_name: Optional[str] = None,
3557
+ enabled: bool = True,
3558
+ detection_mechanism: DetectionMechanismInputTypes = None,
3559
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
3560
+ tags: Optional[TagsInputType] = None,
3561
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
3562
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
3563
+ ) -> ColumnMetricAssertion:
3564
+ """Create a column metric assertion.
3565
+
3566
+ Note: keyword arguments are required.
3567
+
3568
+ Args:
3569
+ dataset_urn: The urn of the dataset to be monitored. (Required)
3570
+ column_name: The name of the column to be monitored. (Required)
3571
+ metric_type: The type of the metric to be monitored. (Required)
3572
+ operator: The operator to be used for the assertion. (Required)
3573
+ criteria_parameters: The criteria parameters for the assertion. Required for most operators.
3574
+ display_name: The display name of the assertion. If not provided, a random display
3575
+ name will be generated.
3576
+ enabled: Whether the assertion is enabled. Defaults to True.
3577
+ detection_mechanism: The detection mechanism to be used for the assertion.
3578
+ incident_behavior: The incident behavior to be applied to the assertion.
3579
+ tags: The tags to be applied to the assertion.
3580
+ created_by: Optional urn of the user who created the assertion.
3581
+ schedule: Optional cron formatted schedule for the assertion.
3582
+
3583
+ Returns:
3584
+ ColumnMetricAssertion: The created assertion.
3585
+ """
3586
+ now_utc = datetime.now(timezone.utc)
3587
+ if created_by is None:
3588
+ logger.warning(
3589
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
3590
+ )
3591
+ created_by = DEFAULT_CREATED_BY
3592
+ assertion_input = _ColumnMetricAssertionInput(
3593
+ urn=None,
3594
+ entity_client=self.client.entities,
3595
+ dataset_urn=dataset_urn,
3596
+ column_name=column_name,
3597
+ metric_type=metric_type,
3598
+ operator=operator,
3599
+ criteria_parameters=criteria_parameters,
3600
+ display_name=display_name,
3601
+ enabled=enabled,
3602
+ detection_mechanism=detection_mechanism,
3603
+ incident_behavior=incident_behavior,
3604
+ tags=tags,
3605
+ created_by=created_by,
3606
+ created_at=now_utc,
3607
+ updated_by=created_by,
3608
+ updated_at=now_utc,
3609
+ schedule=schedule,
3610
+ gms_criteria_type_info=None,
3611
+ )
3612
+ assertion_entity, monitor_entity = (
3613
+ assertion_input.to_assertion_and_monitor_entities()
3614
+ )
3615
+ # If assertion creation fails, we won't try to create the monitor
3616
+ self.client.entities.create(assertion_entity)
3617
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
3618
+ # try:
3619
+ self.client.entities.create(monitor_entity)
3620
+ # except Exception as e:
3621
+ # logger.error(f"Error creating monitor: {e}")
3622
+ # self.client.entities.delete(assertion_entity)
3623
+ # raise e
3624
+ return ColumnMetricAssertion._from_entities(assertion_entity, monitor_entity)
3625
+
3626
+ def _retrieve_and_merge_column_metric_assertion_and_monitor(
3627
+ self,
3628
+ assertion_input: _ColumnMetricAssertionInput,
3629
+ dataset_urn: Union[str, DatasetUrn],
3630
+ column_name: str,
3631
+ metric_type: MetricInputType,
3632
+ operator: OperatorInputType,
3633
+ criteria_parameters: Optional[ColumnMetricAssertionParameters],
3634
+ urn: Union[str, AssertionUrn],
3635
+ display_name: Optional[str],
3636
+ enabled: Optional[bool],
3637
+ detection_mechanism: DetectionMechanismInputTypes,
3638
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
3639
+ tags: Optional[TagsInputType],
3640
+ updated_by: Optional[Union[str, CorpUserUrn]],
3641
+ now_utc: datetime,
3642
+ schedule: Optional[Union[str, models.CronScheduleClass]],
3643
+ ) -> Union[ColumnMetricAssertion, _ColumnMetricAssertionInput]:
3644
+ # 1. Retrieve any existing assertion and monitor entities:
3645
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
3646
+ self._retrieve_assertion_and_monitor(assertion_input)
3647
+ )
3648
+
3649
+ # Extract gms_criteria_type_info from existing assertion if available
3650
+ gms_criteria_type_info = None
3651
+ if maybe_assertion_entity is not None:
3652
+ gms_criteria_type_info = (
3653
+ _HasColumnMetricFunctionality._get_criteria_parameters_with_type(
3654
+ maybe_assertion_entity
3655
+ )
3656
+ )
3657
+
3658
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
3659
+ if maybe_assertion_entity and maybe_monitor_entity:
3660
+ existing_assertion = ColumnMetricAssertion._from_entities(
3661
+ maybe_assertion_entity, maybe_monitor_entity
3662
+ )
3663
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
3664
+ elif maybe_assertion_entity and not maybe_monitor_entity:
3665
+ monitor_mode = (
3666
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
3667
+ )
3668
+ existing_assertion = ColumnMetricAssertion._from_entities(
3669
+ maybe_assertion_entity,
3670
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
3671
+ )
3672
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
3673
+ elif not maybe_assertion_entity:
3674
+ logger.info(
3675
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
3676
+ )
3677
+ return self._create_column_metric_assertion(
3678
+ dataset_urn=dataset_urn,
3679
+ column_name=column_name,
3680
+ metric_type=metric_type,
3681
+ operator=operator,
3682
+ criteria_parameters=criteria_parameters,
3683
+ schedule=schedule,
3684
+ display_name=display_name,
3685
+ detection_mechanism=detection_mechanism,
3686
+ incident_behavior=incident_behavior,
3687
+ tags=tags,
3688
+ created_by=updated_by,
3689
+ )
3690
+
3691
+ # 3. Check for any issues e.g. different dataset urns
3692
+ if (
3693
+ existing_assertion
3694
+ and hasattr(existing_assertion, "dataset_urn")
3695
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
3696
+ ):
3697
+ raise SDKUsageError(
3698
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
3699
+ )
3700
+
3701
+ # 4. Merge the existing assertion with the validated input:
3702
+ merged_assertion_input = self._merge_column_metric_input(
3703
+ dataset_urn=dataset_urn,
3704
+ column_name=column_name,
3705
+ metric_type=metric_type,
3706
+ operator=operator,
3707
+ criteria_parameters=criteria_parameters,
3708
+ urn=urn,
3709
+ display_name=display_name,
3710
+ enabled=enabled,
3711
+ schedule=schedule,
3712
+ detection_mechanism=detection_mechanism,
3713
+ incident_behavior=incident_behavior,
3714
+ tags=tags,
3715
+ now_utc=now_utc,
3716
+ assertion_input=assertion_input,
3717
+ maybe_assertion_entity=maybe_assertion_entity,
3718
+ maybe_monitor_entity=maybe_monitor_entity,
3719
+ existing_assertion=existing_assertion,
3720
+ gms_criteria_type_info=gms_criteria_type_info,
3721
+ )
3722
+
3723
+ return merged_assertion_input
3724
+
3725
+ def _merge_column_metric_input( # TODO: Refactor
3726
+ self,
3727
+ dataset_urn: Union[str, DatasetUrn],
3728
+ column_name: str,
3729
+ metric_type: MetricInputType,
3730
+ operator: OperatorInputType,
3731
+ criteria_parameters: Optional[ColumnMetricAssertionParameters],
3732
+ urn: Union[str, AssertionUrn],
3733
+ display_name: Optional[str],
3734
+ enabled: Optional[bool],
3735
+ schedule: Optional[Union[str, models.CronScheduleClass]],
3736
+ detection_mechanism: DetectionMechanismInputTypes,
3737
+ incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
3738
+ tags: Optional[TagsInputType],
3739
+ now_utc: datetime,
3740
+ assertion_input: _ColumnMetricAssertionInput,
3741
+ maybe_assertion_entity: Optional[Assertion],
3742
+ maybe_monitor_entity: Optional[Monitor],
3743
+ existing_assertion: ColumnMetricAssertion,
3744
+ gms_criteria_type_info: Optional[tuple] = None,
3745
+ ) -> _ColumnMetricAssertionInput:
3746
+ """Merge the validated assertion input with the existing assertion to create an upsert."""
3747
+
3748
+ # Extract existing values from entities for merging
3749
+ existing_display_name = None
3750
+ existing_enabled = None
3751
+ existing_schedule = None
3752
+ existing_detection_mechanism = None
3753
+ existing_incident_behavior = None
3754
+ existing_tags = None
3755
+
3756
+ if maybe_assertion_entity and maybe_assertion_entity.info:
3757
+ if hasattr(maybe_assertion_entity.info, "displayName"):
3758
+ existing_display_name = maybe_assertion_entity.info.displayName
3759
+ if hasattr(maybe_assertion_entity.info, "tags"):
3760
+ existing_tags = maybe_assertion_entity.info.tags
3761
+
3762
+ if maybe_monitor_entity and maybe_monitor_entity.info:
3763
+ if (
3764
+ hasattr(maybe_monitor_entity.info, "status")
3765
+ and maybe_monitor_entity.info.status
3766
+ ):
3767
+ existing_enabled = maybe_monitor_entity.info.status == "ACTIVE"
3768
+ if (
3769
+ hasattr(maybe_monitor_entity.info, "config")
3770
+ and maybe_monitor_entity.info.config
3771
+ ):
3772
+ if hasattr(maybe_monitor_entity.info.config, "schedule"):
3773
+ existing_schedule = maybe_monitor_entity.info.config.schedule
3774
+ if hasattr(maybe_monitor_entity.info.config, "executorId"):
3775
+ existing_detection_mechanism = (
3776
+ maybe_monitor_entity.info.config.executorId
3777
+ )
3778
+ if hasattr(maybe_monitor_entity.info.config, "actions"):
3779
+ existing_incident_behavior = (
3780
+ maybe_monitor_entity.info.config.actions
3781
+ )
3782
+
3783
+ # Merge each field using the merge logic
3784
+ merged_display_name = _merge_field(
3785
+ display_name,
3786
+ "display_name",
3787
+ assertion_input,
3788
+ existing_assertion,
3789
+ existing_display_name,
3790
+ )
3791
+ merged_enabled = _merge_field(
3792
+ enabled, "mode", assertion_input, existing_assertion, existing_enabled
3793
+ )
3794
+ merged_schedule = _merge_field(
3795
+ schedule, "schedule", assertion_input, existing_assertion, existing_schedule
3796
+ )
3797
+ merged_detection_mechanism = _merge_field(
3798
+ detection_mechanism,
3799
+ "detection_mechanism",
3800
+ assertion_input,
3801
+ existing_assertion,
3802
+ existing_detection_mechanism,
3803
+ )
3804
+ merged_incident_behavior = _merge_field(
3805
+ incident_behavior,
3806
+ "incident_behavior",
3807
+ assertion_input,
3808
+ existing_assertion,
3809
+ existing_incident_behavior,
3810
+ )
3811
+ merged_tags = _merge_field(
3812
+ tags, "tags", assertion_input, existing_assertion, existing_tags
3813
+ )
3814
+
3815
+ # Create the merged assertion input
3816
+ return _ColumnMetricAssertionInput(
3817
+ urn=urn,
3818
+ entity_client=assertion_input.entity_client,
3819
+ dataset_urn=dataset_urn,
3820
+ column_name=column_name,
3821
+ metric_type=metric_type,
3822
+ operator=operator,
3823
+ criteria_parameters=criteria_parameters,
3824
+ display_name=merged_display_name,
3825
+ enabled=merged_enabled,
3826
+ detection_mechanism=merged_detection_mechanism,
3827
+ incident_behavior=merged_incident_behavior,
3828
+ tags=merged_tags,
3829
+ created_by=existing_assertion.created_by
3830
+ if existing_assertion.created_by
3831
+ else assertion_input.created_by,
3832
+ created_at=existing_assertion.created_at
3833
+ if existing_assertion.created_at
3834
+ else assertion_input.created_at,
3835
+ updated_by=assertion_input.updated_by,
3836
+ updated_at=now_utc,
3837
+ schedule=merged_schedule,
3838
+ gms_criteria_type_info=gms_criteria_type_info,
3839
+ )
3840
+
3162
3841
 
3163
3842
  def _merge_field(
3164
3843
  input_field_value: Any,