acryl-datahub-cloud 0.3.12rc6__py3-none-any.whl → 0.3.12rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -10,6 +10,7 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
10
10
  SmartFreshnessAssertion,
11
11
  SmartVolumeAssertion,
12
12
  SqlAssertion,
13
+ VolumeAssertion,
13
14
  _AssertionPublic,
14
15
  )
15
16
  from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
@@ -38,9 +39,24 @@ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input
38
39
  _SmartColumnMetricAssertionInput,
39
40
  )
40
41
  from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
42
+ SqlAssertionChangeType,
41
43
  SqlAssertionCriteria,
44
+ SqlAssertionOperator,
45
+ SqlAssertionType,
42
46
  _SqlAssertionInput,
43
47
  )
48
+ from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
49
+ RowCountChange,
50
+ RowCountTotal,
51
+ VolumeAssertionDefinition,
52
+ VolumeAssertionDefinitionChangeKind,
53
+ VolumeAssertionDefinitionInputTypes,
54
+ VolumeAssertionDefinitionParameters,
55
+ VolumeAssertionDefinitionType,
56
+ VolumeAssertionOperator,
57
+ _VolumeAssertionDefinitionTypes,
58
+ _VolumeAssertionInput,
59
+ )
44
60
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
45
61
  from acryl_datahub_cloud.sdk.entities.monitor import Monitor
46
62
  from acryl_datahub_cloud.sdk.errors import SDKUsageError
@@ -494,6 +510,121 @@ class AssertionsClient:
494
510
 
495
511
  return merged_assertion_input
496
512
 
513
+ def _retrieve_and_merge_native_volume_assertion_and_monitor(
514
+ self,
515
+ assertion_input: _VolumeAssertionInput,
516
+ dataset_urn: Union[str, DatasetUrn],
517
+ urn: Union[str, AssertionUrn],
518
+ display_name: Optional[str],
519
+ enabled: Optional[bool],
520
+ detection_mechanism: DetectionMechanismInputTypes,
521
+ incident_behavior: Optional[
522
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
523
+ ],
524
+ tags: Optional[TagsInputType],
525
+ updated_by: Optional[Union[str, CorpUserUrn]],
526
+ now_utc: datetime,
527
+ schedule: Optional[Union[str, models.CronScheduleClass]],
528
+ definition: VolumeAssertionDefinitionInputTypes,
529
+ use_backend_definition: bool = False,
530
+ ) -> Union[VolumeAssertion, _VolumeAssertionInput]:
531
+ # 1. Retrieve any existing assertion and monitor entities:
532
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
533
+ self._retrieve_assertion_and_monitor(assertion_input)
534
+ )
535
+
536
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
537
+ if maybe_assertion_entity and maybe_monitor_entity:
538
+ existing_assertion = VolumeAssertion._from_entities(
539
+ maybe_assertion_entity, maybe_monitor_entity
540
+ )
541
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
542
+ elif maybe_assertion_entity and not maybe_monitor_entity:
543
+ monitor_mode = (
544
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
545
+ )
546
+ existing_assertion = VolumeAssertion._from_entities(
547
+ maybe_assertion_entity,
548
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
549
+ )
550
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
551
+ elif not maybe_assertion_entity:
552
+ if use_backend_definition:
553
+ raise SDKUsageError(
554
+ f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
555
+ )
556
+ logger.info(
557
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
558
+ )
559
+ # Extract criteria from definition to call the new signature
560
+ parsed_definition = VolumeAssertionDefinition.parse(definition)
561
+ assert isinstance(parsed_definition, (RowCountTotal, RowCountChange))
562
+ return self._create_volume_assertion(
563
+ dataset_urn=dataset_urn,
564
+ display_name=display_name,
565
+ detection_mechanism=detection_mechanism,
566
+ incident_behavior=incident_behavior,
567
+ tags=tags,
568
+ created_by=updated_by,
569
+ schedule=schedule,
570
+ criteria_type=parsed_definition.type,
571
+ criteria_change_type=parsed_definition.kind
572
+ if isinstance(parsed_definition, RowCountChange)
573
+ else None,
574
+ criteria_operator=parsed_definition.operator,
575
+ criteria_parameters=parsed_definition.parameters,
576
+ )
577
+
578
+ # 3. Check for any issues e.g. different dataset urns
579
+ if (
580
+ existing_assertion
581
+ and hasattr(existing_assertion, "dataset_urn")
582
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
583
+ ):
584
+ raise SDKUsageError(
585
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
586
+ )
587
+
588
+ # 4. Handle definition: use backend definition if flag is set and backend has one
589
+ if use_backend_definition:
590
+ if maybe_assertion_entity is not None:
591
+ # Use definition from backend
592
+ backend_definition = VolumeAssertionDefinition.from_assertion(
593
+ maybe_assertion_entity
594
+ )
595
+ # Update the assertion_input with the real definition from backend
596
+ assertion_input.definition = backend_definition
597
+ effective_definition = backend_definition
598
+ logger.info("Using definition from backend assertion")
599
+ else:
600
+ # No backend assertion and no user-provided definition - this is an error
601
+ raise SDKUsageError(
602
+ f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
603
+ )
604
+ else:
605
+ # Use the already-parsed definition from assertion_input
606
+ effective_definition = assertion_input.definition
607
+
608
+ # 5. Merge the existing assertion with the validated input:
609
+ merged_assertion_input = self._merge_volume_input(
610
+ dataset_urn=dataset_urn,
611
+ urn=urn,
612
+ display_name=display_name,
613
+ enabled=enabled,
614
+ detection_mechanism=detection_mechanism,
615
+ incident_behavior=incident_behavior,
616
+ tags=tags,
617
+ now_utc=now_utc,
618
+ assertion_input=assertion_input,
619
+ maybe_assertion_entity=maybe_assertion_entity,
620
+ maybe_monitor_entity=maybe_monitor_entity,
621
+ existing_assertion=existing_assertion,
622
+ schedule=schedule,
623
+ definition=effective_definition,
624
+ )
625
+
626
+ return merged_assertion_input
627
+
497
628
  def _retrieve_and_merge_sql_assertion_and_monitor(
498
629
  self,
499
630
  assertion_input: _SqlAssertionInput,
@@ -538,7 +669,10 @@ class AssertionsClient:
538
669
  return self._create_sql_assertion(
539
670
  dataset_urn=dataset_urn,
540
671
  display_name=display_name,
541
- criteria=criteria,
672
+ criteria_type=criteria.type,
673
+ criteria_change_type=criteria.change_type,
674
+ criteria_operator=criteria.operator,
675
+ criteria_parameters=criteria.parameters,
542
676
  statement=statement,
543
677
  incident_behavior=incident_behavior,
544
678
  tags=tags,
@@ -562,8 +696,6 @@ class AssertionsClient:
562
696
  urn=urn,
563
697
  display_name=display_name,
564
698
  enabled=enabled,
565
- criteria=criteria,
566
- statement=statement,
567
699
  incident_behavior=incident_behavior,
568
700
  tags=tags,
569
701
  now_utc=now_utc,
@@ -571,6 +703,8 @@ class AssertionsClient:
571
703
  maybe_assertion_entity=maybe_assertion_entity,
572
704
  existing_assertion=existing_assertion,
573
705
  schedule=schedule,
706
+ criteria=criteria,
707
+ statement=statement,
574
708
  )
575
709
 
576
710
  return merged_assertion_input
@@ -867,6 +1001,116 @@ class AssertionsClient:
867
1001
  )
868
1002
  return merged_assertion_input
869
1003
 
1004
+ def _merge_volume_input(
1005
+ self,
1006
+ dataset_urn: Union[str, DatasetUrn],
1007
+ urn: Union[str, AssertionUrn],
1008
+ display_name: Optional[str],
1009
+ enabled: Optional[bool],
1010
+ detection_mechanism: DetectionMechanismInputTypes,
1011
+ incident_behavior: Optional[
1012
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1013
+ ],
1014
+ tags: Optional[TagsInputType],
1015
+ now_utc: datetime,
1016
+ assertion_input: _VolumeAssertionInput,
1017
+ maybe_assertion_entity: Optional[Assertion],
1018
+ maybe_monitor_entity: Optional[Monitor],
1019
+ existing_assertion: VolumeAssertion,
1020
+ schedule: Optional[Union[str, models.CronScheduleClass]],
1021
+ definition: Optional[_VolumeAssertionDefinitionTypes],
1022
+ ) -> _VolumeAssertionInput:
1023
+ """Merge the input with the existing assertion and monitor entities.
1024
+
1025
+ Args:
1026
+ dataset_urn: The urn of the dataset to be monitored.
1027
+ urn: The urn of the assertion.
1028
+ display_name: The display name of the assertion.
1029
+ enabled: Whether the assertion is enabled.
1030
+ detection_mechanism: The detection mechanism to be used for the assertion.
1031
+ incident_behavior: The incident behavior to be applied to the assertion.
1032
+ tags: The tags to be applied to the assertion.
1033
+ now_utc: The current UTC time from when the function is called.
1034
+ assertion_input: The validated input to the function.
1035
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
1036
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
1037
+ existing_assertion: The existing assertion from the DataHub instance.
1038
+ schedule: The schedule to be applied to the assertion.
1039
+ definition: The volume assertion definition to be applied to the assertion.
1040
+
1041
+ Returns:
1042
+ The merged assertion input.
1043
+ """
1044
+ merged_assertion_input = _VolumeAssertionInput(
1045
+ urn=urn,
1046
+ entity_client=self.client.entities,
1047
+ dataset_urn=dataset_urn,
1048
+ display_name=_merge_field(
1049
+ display_name,
1050
+ "display_name",
1051
+ assertion_input,
1052
+ existing_assertion,
1053
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
1054
+ ),
1055
+ enabled=_merge_field(
1056
+ enabled,
1057
+ "enabled",
1058
+ assertion_input,
1059
+ existing_assertion,
1060
+ existing_assertion.mode == AssertionMode.ACTIVE
1061
+ if existing_assertion
1062
+ else None,
1063
+ ),
1064
+ schedule=_merge_field(
1065
+ schedule,
1066
+ "schedule",
1067
+ assertion_input,
1068
+ existing_assertion,
1069
+ existing_assertion.schedule if existing_assertion else None,
1070
+ ),
1071
+ detection_mechanism=_merge_field(
1072
+ detection_mechanism,
1073
+ "detection_mechanism",
1074
+ assertion_input,
1075
+ existing_assertion,
1076
+ VolumeAssertion._get_detection_mechanism(
1077
+ maybe_assertion_entity, maybe_monitor_entity, default=None
1078
+ )
1079
+ if maybe_assertion_entity and maybe_monitor_entity
1080
+ else None,
1081
+ ),
1082
+ incident_behavior=_merge_field(
1083
+ incident_behavior,
1084
+ "incident_behavior",
1085
+ assertion_input,
1086
+ existing_assertion,
1087
+ VolumeAssertion._get_incident_behavior(maybe_assertion_entity)
1088
+ if maybe_assertion_entity
1089
+ else None,
1090
+ ),
1091
+ tags=_merge_field(
1092
+ tags,
1093
+ "tags",
1094
+ assertion_input,
1095
+ existing_assertion,
1096
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
1097
+ ),
1098
+ definition=_merge_field(
1099
+ definition,
1100
+ "definition",
1101
+ assertion_input,
1102
+ existing_assertion,
1103
+ existing_assertion.definition if existing_assertion else None,
1104
+ ),
1105
+ created_by=existing_assertion.created_by
1106
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
1107
+ created_at=existing_assertion.created_at
1108
+ or now_utc, # Override with the existing assertion's created_at or now if not set
1109
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
1110
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
1111
+ )
1112
+ return merged_assertion_input
1113
+
870
1114
  def _merge_sql_input(
871
1115
  self,
872
1116
  dataset_urn: Union[str, DatasetUrn],
@@ -939,7 +1183,7 @@ class AssertionsClient:
939
1183
  "criteria",
940
1184
  assertion_input,
941
1185
  existing_assertion,
942
- existing_assertion.criteria if existing_assertion else None,
1186
+ existing_assertion._criteria if existing_assertion else None,
943
1187
  ),
944
1188
  statement=_merge_field(
945
1189
  statement,
@@ -1444,13 +1688,145 @@ class AssertionsClient:
1444
1688
  # raise e
1445
1689
  return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1446
1690
 
1691
+ def _create_volume_assertion(
1692
+ self,
1693
+ *,
1694
+ dataset_urn: Union[str, DatasetUrn],
1695
+ display_name: Optional[str] = None,
1696
+ enabled: bool = True,
1697
+ detection_mechanism: DetectionMechanismInputTypes = None,
1698
+ incident_behavior: Optional[
1699
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1700
+ ] = None,
1701
+ tags: Optional[TagsInputType] = None,
1702
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1703
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1704
+ criteria_type: Union[str, VolumeAssertionDefinitionType],
1705
+ criteria_change_type: Optional[
1706
+ Union[str, VolumeAssertionDefinitionChangeKind]
1707
+ ] = None,
1708
+ criteria_operator: Union[str, VolumeAssertionOperator],
1709
+ criteria_parameters: VolumeAssertionDefinitionParameters,
1710
+ ) -> VolumeAssertion:
1711
+ """Create a volume assertion.
1712
+
1713
+ Note: keyword arguments are required.
1714
+
1715
+ The created assertion will use the default daily schedule ("0 0 * * *").
1716
+
1717
+ Args:
1718
+ dataset_urn: The urn of the dataset to be monitored.
1719
+ display_name: The display name of the assertion. If not provided, a random display
1720
+ name will be generated.
1721
+ enabled: Whether the assertion is enabled. Defaults to True.
1722
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1723
+ schema is recommended. Valid values are:
1724
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1725
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1726
+ - {
1727
+ "type": "last_modified_column",
1728
+ "column_name": "last_modified",
1729
+ "additional_filter": "last_modified > '2021-01-01'",
1730
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1731
+ additional_filter='last_modified > 2021-01-01')
1732
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1733
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1734
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1735
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1736
+ tags: The tags to be applied to the assertion. Valid values are:
1737
+ - a list of strings (strings will be converted to TagUrn objects)
1738
+ - a list of TagUrn objects
1739
+ - a list of TagAssociationClass objects
1740
+ created_by: Optional urn of the user who created the assertion. The format is
1741
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1742
+ The default is the datahub system user.
1743
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1744
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1745
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1746
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1747
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1748
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1749
+ criteria_type: The type of volume assertion. Must be either VolumeAssertionDefinitionType.ROW_COUNT_TOTAL or VolumeAssertionDefinitionType.ROW_COUNT_CHANGE.
1750
+ Raw string values are also accepted: "ROW_COUNT_TOTAL" or "ROW_COUNT_CHANGE".
1751
+ criteria_change_type: Required when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_CHANGE. Must be either VolumeAssertionDefinitionChangeKind.ABSOLUTE
1752
+ or VolumeAssertionDefinitionChangeKind.PERCENT. Optional (ignored) when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_TOTAL.
1753
+ Raw string values are also accepted: "ABSOLUTE" or "PERCENTAGE".
1754
+ criteria_operator: The comparison operator for the assertion. Must be a VolumeAssertionOperator value:
1755
+ - VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO
1756
+ - VolumeAssertionOperator.LESS_THAN_OR_EQUAL_TO
1757
+ - VolumeAssertionOperator.BETWEEN
1758
+ Raw string values are also accepted: "GREATER_THAN_OR_EQUAL_TO", "LESS_THAN_OR_EQUAL_TO", "BETWEEN".
1759
+ criteria_parameters: The parameters for the assertion. For single-value operators
1760
+ (GREATER_THAN_OR_EQUAL_TO, LESS_THAN_OR_EQUAL_TO), provide a single number.
1761
+ For BETWEEN operator, provide a tuple of two numbers (min_value, max_value).
1762
+
1763
+ Examples:
1764
+ - For single value: 100 or 50.5
1765
+ - For BETWEEN: (10, 100) or (5.0, 15.5)
1766
+
1767
+ Returns:
1768
+ VolumeAssertion: The created assertion.
1769
+ """
1770
+ _print_experimental_warning()
1771
+ now_utc = datetime.now(timezone.utc)
1772
+ if created_by is None:
1773
+ logger.warning(
1774
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1775
+ )
1776
+ created_by = DEFAULT_CREATED_BY
1777
+
1778
+ # Create definition from individual criteria parameters
1779
+ # The dictionary object will be fully validated down in the _VolumeAssertionInput class
1780
+ definition: dict[str, Any] = {
1781
+ "type": criteria_type,
1782
+ "operator": criteria_operator,
1783
+ "parameters": criteria_parameters,
1784
+ }
1785
+ if criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE:
1786
+ definition["kind"] = criteria_change_type
1787
+
1788
+ assertion_input = _VolumeAssertionInput(
1789
+ urn=None,
1790
+ entity_client=self.client.entities,
1791
+ dataset_urn=dataset_urn,
1792
+ display_name=display_name,
1793
+ enabled=enabled,
1794
+ detection_mechanism=detection_mechanism,
1795
+ incident_behavior=incident_behavior,
1796
+ tags=tags,
1797
+ created_by=created_by,
1798
+ created_at=now_utc,
1799
+ updated_by=created_by,
1800
+ updated_at=now_utc,
1801
+ schedule=schedule,
1802
+ definition=definition,
1803
+ )
1804
+ assertion_entity, monitor_entity = (
1805
+ assertion_input.to_assertion_and_monitor_entities()
1806
+ )
1807
+ # If assertion creation fails, we won't try to create the monitor
1808
+ self.client.entities.create(assertion_entity)
1809
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1810
+ # try:
1811
+ self.client.entities.create(monitor_entity)
1812
+ # except Exception as e:
1813
+ # logger.error(f"Error creating monitor: {e}")
1814
+ # self.client.entities.delete(assertion_entity)
1815
+ # raise e
1816
+ return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
1817
+
1447
1818
  def _create_sql_assertion(
1448
1819
  self,
1449
1820
  *,
1450
1821
  dataset_urn: Union[str, DatasetUrn],
1451
1822
  display_name: Optional[str] = None,
1452
1823
  enabled: bool = True,
1453
- criteria: SqlAssertionCriteria,
1824
+ criteria_type: Union[SqlAssertionType, str],
1825
+ criteria_change_type: Optional[Union[SqlAssertionChangeType, str]] = None,
1826
+ criteria_operator: Union[SqlAssertionOperator, str],
1827
+ criteria_parameters: Union[
1828
+ Union[float, int], tuple[Union[float, int], Union[float, int]]
1829
+ ],
1454
1830
  statement: str,
1455
1831
  incident_behavior: Optional[
1456
1832
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
@@ -1466,25 +1842,23 @@ class AssertionsClient:
1466
1842
  display_name: The display name of the assertion. If not provided, a random display
1467
1843
  name will be generated.
1468
1844
  enabled: Whether the assertion is enabled. Defaults to True.
1469
- criteria: The criteria to be used for the assertion. This is of type SqlAssertionCriteria. It has the following fields:
1470
- - type: The type of sql assertion. Valid values are:
1471
- - "METRIC" -> Looks at the current value of the metric.
1472
- - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
1473
- - change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
1474
- - "ABSOLUTE" -> Looks at the absolute change in the metric.
1475
- - "PERCENTAGE" -> Looks at the percentage change in the metric.
1476
- - operator: The operator to be used for the assertion. Valid values are:
1477
- - "GREATER_THAN" -> The metric value is greater than the threshold.
1478
- - "LESS_THAN" -> The metric value is less than the threshold.
1479
- - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
1480
- - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
1481
- - "EQUAL_TO" -> The metric value is equal to the threshold.
1482
- - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
1483
- - "BETWEEN" -> The metric value is between the two thresholds.
1484
- - parameters: The parameters to be used for the assertion. This is of type SqlAssertionParameters. It has the following fields:
1485
- - value: The value of the metric. This can be a single value or a tuple range.
1486
- - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
1487
- - If the operator is not "BETWEEN", the value is a single value.
1845
+ criteria_type: The type of sql assertion. Valid values are:
1846
+ - "METRIC" -> Looks at the current value of the metric.
1847
+ - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
1848
+ criteria_change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
1849
+ - "ABSOLUTE" -> Looks at the absolute change in the metric.
1850
+ - "PERCENTAGE" -> Looks at the percentage change in the metric.
1851
+ criteria_operator: The operator to be used for the assertion. Valid values are:
1852
+ - "GREATER_THAN" -> The metric value is greater than the threshold.
1853
+ - "LESS_THAN" -> The metric value is less than the threshold.
1854
+ - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
1855
+ - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
1856
+ - "EQUAL_TO" -> The metric value is equal to the threshold.
1857
+ - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
1858
+ - "BETWEEN" -> The metric value is between the two thresholds.
1859
+ criteria_parameters: The parameters to be used for the assertion. This can be a single value or a tuple range.
1860
+ - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
1861
+ - If the operator is not "BETWEEN", the value is a single value.
1488
1862
  statement: The statement to be used for the assertion.
1489
1863
  incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1490
1864
  - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
@@ -1511,6 +1885,12 @@ class AssertionsClient:
1511
1885
  f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1512
1886
  )
1513
1887
  created_by = DEFAULT_CREATED_BY
1888
+ criteria = SqlAssertionCriteria(
1889
+ type=criteria_type,
1890
+ change_type=criteria_change_type,
1891
+ operator=criteria_operator,
1892
+ parameters=criteria_parameters,
1893
+ )
1514
1894
  assertion_input = _SqlAssertionInput(
1515
1895
  urn=None,
1516
1896
  entity_client=self.client.entities,
@@ -2606,6 +2986,254 @@ class AssertionsClient:
2606
2986
 
2607
2987
  return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
2608
2988
 
2989
+ def sync_volume_assertion(
2990
+ self,
2991
+ *,
2992
+ dataset_urn: Union[str, DatasetUrn],
2993
+ urn: Optional[Union[str, AssertionUrn]] = None,
2994
+ display_name: Optional[str] = None,
2995
+ enabled: Optional[bool] = None,
2996
+ detection_mechanism: DetectionMechanismInputTypes = None,
2997
+ incident_behavior: Optional[
2998
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
2999
+ ] = None,
3000
+ tags: Optional[TagsInputType] = None,
3001
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
3002
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
3003
+ criteria_type: Optional[Union[str, VolumeAssertionDefinitionType]] = None,
3004
+ criteria_change_type: Optional[
3005
+ Union[str, VolumeAssertionDefinitionChangeKind]
3006
+ ] = None,
3007
+ criteria_operator: Optional[Union[str, VolumeAssertionOperator]] = None,
3008
+ criteria_parameters: Optional[VolumeAssertionDefinitionParameters] = None,
3009
+ ) -> VolumeAssertion:
3010
+ """Upsert and merge a volume assertion.
3011
+
3012
+ Note: keyword arguments are required.
3013
+
3014
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
3015
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
3016
+ be updated if the input value is not None. If the input value is None, the existing value
3017
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
3018
+ empty string.
3019
+
3020
+ Schedule behavior:
3021
+ - Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
3022
+ - Update case: Uses existing schedule or provided schedule.
3023
+
3024
+ Args:
3025
+ dataset_urn: The urn of the dataset to be monitored.
3026
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
3027
+ will be _created_ in the DataHub instance.
3028
+ display_name: The display name of the assertion. If not provided, a random display name
3029
+ will be generated.
3030
+ enabled: Whether the assertion is enabled. If not provided, the existing value
3031
+ will be preserved.
3032
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
3033
+ schema is recommended. Valid values are:
3034
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
3035
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
3036
+ - {
3037
+ "type": "last_modified_column",
3038
+ "column_name": "last_modified",
3039
+ "additional_filter": "last_modified > '2021-01-01'",
3040
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
3041
+ additional_filter='last_modified > 2021-01-01')
3042
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
3043
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
3044
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
3045
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
3046
+ tags: The tags to be applied to the assertion. Valid values are:
3047
+ - a list of strings (strings will be converted to TagUrn objects)
3048
+ - a list of TagUrn objects
3049
+ - a list of TagAssociationClass objects
3050
+ updated_by: Optional urn of the user who updated the assertion. The format is
3051
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
3052
+ The default is the datahub system user.
3053
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
3054
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
3055
+ schedule will be used. The schedule determines when the assertion will be evaluated.
3056
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
3057
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
3058
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
3059
+ criteria_type: Optional type of volume assertion. Must be either VolumeAssertionDefinitionType.ROW_COUNT_TOTAL or VolumeAssertionDefinitionType.ROW_COUNT_CHANGE.
3060
+ Raw string values are also accepted: "ROW_COUNT_TOTAL" or "ROW_COUNT_CHANGE".
3061
+ If not provided, the existing definition from the backend will be preserved (for update operations).
3062
+ Required when creating a new assertion (when urn is None).
3063
+ criteria_change_type: Optional change type for row count change assertions. Must be either VolumeAssertionDefinitionChangeKind.ABSOLUTE
3064
+ or VolumeAssertionDefinitionChangeKind.PERCENT. Required when criteria_type is VolumeAssertionDefinitionType.ROW_COUNT_CHANGE. Ignored when criteria_type
3065
+ is VolumeAssertionDefinitionType.ROW_COUNT_TOTAL. If not provided, existing value is preserved for updates.
3066
+ Raw string values are also accepted: "ABSOLUTE" or "PERCENTAGE".
3067
+ criteria_operator: Optional comparison operator for the assertion. Must be a VolumeAssertionOperator value:
3068
+ - VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO
3069
+ - VolumeAssertionOperator.LESS_THAN_OR_EQUAL_TO
3070
+ - VolumeAssertionOperator.BETWEEN
3071
+ Raw string values are also accepted: "GREATER_THAN_OR_EQUAL_TO", "LESS_THAN_OR_EQUAL_TO", "BETWEEN".
3072
+ If not provided, existing value is preserved for updates. Required when creating a new assertion.
3073
+ criteria_parameters: Optional parameters for the assertion. For single-value operators
3074
+ (GREATER_THAN_OR_EQUAL_TO, LESS_THAN_OR_EQUAL_TO), provide a single number.
3075
+ For BETWEEN operator, provide a tuple of two numbers (min_value, max_value).
3076
+ If not provided, existing value is preserved for updates. Required when creating a new assertion.
3077
+
3078
+ Examples:
3079
+ - For single value: 100 or 50.5
3080
+ - For BETWEEN: (10, 100) or (5.0, 15.5)
3081
+
3082
+ Returns:
3083
+ VolumeAssertion: The created or updated assertion.
3084
+ """
3085
+ _print_experimental_warning()
3086
+ now_utc = datetime.now(timezone.utc)
3087
+
3088
+ if updated_by is None:
3089
+ logger.warning(
3090
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
3091
+ )
3092
+ updated_by = DEFAULT_CREATED_BY
3093
+
3094
+ # 1. Validate criteria parameters if any are provided
3095
+ if (
3096
+ criteria_type is not None
3097
+ or criteria_operator is not None
3098
+ or criteria_parameters is not None
3099
+ ) and (
3100
+ criteria_type is None
3101
+ or criteria_operator is None
3102
+ or criteria_parameters is None
3103
+ or (
3104
+ criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
3105
+ and criteria_change_type is None
3106
+ )
3107
+ ):
3108
+ raise SDKUsageError(
3109
+ "When providing volume assertion criteria, all required parameters must be provided "
3110
+ "(criteria_type, criteria_operator, criteria_parameters must be provided, "
3111
+ "and criteria_change_type is required when criteria_type is 'row_count_change')"
3112
+ )
3113
+
3114
+ # Assert the invariant: if criteria_type is provided, all required parameters are provided
3115
+ assert criteria_type is None or (
3116
+ criteria_operator is not None
3117
+ and criteria_parameters is not None
3118
+ and (
3119
+ criteria_type != VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
3120
+ or criteria_change_type is not None
3121
+ )
3122
+ ), "criteria fields already validated"
3123
+
3124
+ # 2. If urn is not set, create a new assertion
3125
+ if urn is None:
3126
+ if criteria_type is None:
3127
+ raise SDKUsageError(
3128
+ "Volume assertion criteria are required when creating a new assertion"
3129
+ )
3130
+ logger.info("URN is not set, creating a new assertion")
3131
+ # Type narrowing: we know these are not None because of validation above
3132
+ assert criteria_operator is not None
3133
+ assert criteria_parameters is not None
3134
+ return self._create_volume_assertion(
3135
+ dataset_urn=dataset_urn,
3136
+ display_name=display_name,
3137
+ enabled=enabled if enabled is not None else True,
3138
+ detection_mechanism=detection_mechanism,
3139
+ incident_behavior=incident_behavior,
3140
+ tags=tags,
3141
+ created_by=updated_by,
3142
+ schedule=schedule,
3143
+ criteria_type=criteria_type,
3144
+ criteria_change_type=criteria_change_type,
3145
+ criteria_operator=criteria_operator,
3146
+ criteria_parameters=criteria_parameters,
3147
+ )
3148
+
3149
+ # 2. If urn is set, prepare definition for validation
3150
+ # If criteria parameters are provided, create definition from them
3151
+ # Otherwise, we use temporary default definition if None is provided, just to pass the _VolumeAssertionInput validation.
3152
+ # However, we keep memory of this in use_backend_definition flag, so we can later
3153
+ # fail if there is no definition in backend (basically, there is no assertion). That would mean that
3154
+ # this is a creation case and the user missed the definition parameter, which is required.
3155
+ # Likely this pattern never happened before because there is no a publicly documented default definition
3156
+ # that we can use as fallback.
3157
+ if criteria_type is not None:
3158
+ # Create definition from individual criteria parameters
3159
+ temp_definition: dict[str, Any] = {
3160
+ "type": criteria_type,
3161
+ "operator": criteria_operator,
3162
+ "parameters": criteria_parameters,
3163
+ }
3164
+
3165
+ if criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE:
3166
+ temp_definition["kind"] = criteria_change_type
3167
+
3168
+ use_backend_definition = False
3169
+ else:
3170
+ # No criteria provided, use backend definition
3171
+ use_backend_definition = True
3172
+ temp_definition = {
3173
+ "type": VolumeAssertionDefinitionType.ROW_COUNT_TOTAL,
3174
+ "operator": VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO,
3175
+ "parameters": 0, # Temporary placeholder
3176
+ }
3177
+
3178
+ # 3. Create assertion input with effective definition
3179
+ assertion_input = _VolumeAssertionInput(
3180
+ urn=urn,
3181
+ dataset_urn=dataset_urn,
3182
+ entity_client=self.client.entities,
3183
+ detection_mechanism=detection_mechanism,
3184
+ incident_behavior=incident_behavior,
3185
+ tags=tags,
3186
+ created_by=updated_by, # This will be overridden by the actual created_by
3187
+ created_at=now_utc, # This will be overridden by the actual created_at
3188
+ updated_by=updated_by,
3189
+ updated_at=now_utc,
3190
+ schedule=schedule,
3191
+ definition=temp_definition,
3192
+ )
3193
+
3194
+ # 4. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
3195
+ # if the assertion does not exist:
3196
+ merged_assertion_input_or_created_assertion = (
3197
+ self._retrieve_and_merge_native_volume_assertion_and_monitor(
3198
+ assertion_input=assertion_input,
3199
+ dataset_urn=dataset_urn,
3200
+ urn=urn,
3201
+ display_name=display_name,
3202
+ enabled=enabled,
3203
+ detection_mechanism=detection_mechanism,
3204
+ definition=temp_definition,
3205
+ use_backend_definition=use_backend_definition,
3206
+ incident_behavior=incident_behavior,
3207
+ tags=tags,
3208
+ updated_by=updated_by,
3209
+ now_utc=now_utc,
3210
+ schedule=schedule,
3211
+ )
3212
+ )
3213
+
3214
+ # Return early if we created a new assertion in the merge:
3215
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
3216
+ # We know this is the correct type because we passed the assertion_class parameter
3217
+ assert isinstance(
3218
+ merged_assertion_input_or_created_assertion, VolumeAssertion
3219
+ )
3220
+ return merged_assertion_input_or_created_assertion
3221
+
3222
+ # 4. Upsert the assertion and monitor entities:
3223
+ assertion_entity, monitor_entity = (
3224
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
3225
+ )
3226
+ # If assertion upsert fails, we won't try to upsert the monitor
3227
+ self.client.entities.upsert(assertion_entity)
3228
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
3229
+ # try:
3230
+ self.client.entities.upsert(monitor_entity)
3231
+ # except Exception as e:
3232
+ # logger.error(f"Error upserting monitor: {e}")
3233
+ # self.client.entities.delete(assertion_entity)
3234
+ # raise e
3235
+ return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
3236
+
2609
3237
  def sync_sql_assertion(
2610
3238
  self,
2611
3239
  *,
@@ -2614,7 +3242,12 @@ class AssertionsClient:
2614
3242
  display_name: Optional[str] = None,
2615
3243
  enabled: Optional[bool] = None,
2616
3244
  statement: str,
2617
- criteria: SqlAssertionCriteria,
3245
+ criteria_type: Union[SqlAssertionType, str],
3246
+ criteria_change_type: Optional[Union[SqlAssertionChangeType, str]] = None,
3247
+ criteria_operator: Union[SqlAssertionOperator, str],
3248
+ criteria_parameters: Union[
3249
+ Union[float, int], tuple[Union[float, int], Union[float, int]]
3250
+ ],
2618
3251
  incident_behavior: Optional[
2619
3252
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
2620
3253
  ] = None,
@@ -2644,25 +3277,23 @@ class AssertionsClient:
2644
3277
  will be generated.
2645
3278
  enabled: Whether the assertion is enabled. If not provided, the existing value
2646
3279
  will be preserved.
2647
- criteria: The criteria to be used for the assertion. This is of type SqlAssertionCriteria. It has the following fields:
2648
- - type: The type of sql assertion. Valid values are:
2649
- - "METRIC" -> Looks at the current value of the metric.
2650
- - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
2651
- - change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
2652
- - "ABSOLUTE" -> Looks at the absolute change in the metric.
2653
- - "PERCENTAGE" -> Looks at the percentage change in the metric.
2654
- - operator: The operator to be used for the assertion. Valid values are:
2655
- - "GREATER_THAN" -> The metric value is greater than the threshold.
2656
- - "LESS_THAN" -> The metric value is less than the threshold.
2657
- - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
2658
- - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
2659
- - "EQUAL_TO" -> The metric value is equal to the threshold.
2660
- - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
2661
- - "BETWEEN" -> The metric value is between the two thresholds.
2662
- - parameters: The parameters to be used for the assertion. This is of type SqlAssertionParameters. It has the following fields:
2663
- - value: The value of the metric. This can be a single value or a tuple range.
2664
- - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
2665
- - If the operator is not "BETWEEN", the value is a single value.
3280
+ criteria_type: The type of sql assertion. Valid values are:
3281
+ - "METRIC" -> Looks at the current value of the metric.
3282
+ - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
3283
+ criteria_change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
3284
+ - "ABSOLUTE" -> Looks at the absolute change in the metric.
3285
+ - "PERCENTAGE" -> Looks at the percentage change in the metric.
3286
+ criteria_operator: The operator to be used for the assertion. Valid values are:
3287
+ - "GREATER_THAN" -> The metric value is greater than the threshold.
3288
+ - "LESS_THAN" -> The metric value is less than the threshold.
3289
+ - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
3290
+ - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
3291
+ - "EQUAL_TO" -> The metric value is equal to the threshold.
3292
+ - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
3293
+ - "BETWEEN" -> The metric value is between the two thresholds.
3294
+ criteria_parameters: The parameters to be used for the assertion. This can be a single value or a tuple range.
3295
+ - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
3296
+ - If the operator is not "BETWEEN", the value is a single value.
2666
3297
  statement: The SQL statement to be used for the assertion.
2667
3298
  - "SELECT COUNT(*) FROM table WHERE column > 100"
2668
3299
  incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
@@ -2701,7 +3332,10 @@ class AssertionsClient:
2701
3332
  dataset_urn=dataset_urn,
2702
3333
  display_name=display_name,
2703
3334
  enabled=enabled if enabled is not None else True,
2704
- criteria=criteria,
3335
+ criteria_type=criteria_type,
3336
+ criteria_change_type=criteria_change_type,
3337
+ criteria_operator=criteria_operator,
3338
+ criteria_parameters=criteria_parameters,
2705
3339
  statement=statement,
2706
3340
  incident_behavior=incident_behavior,
2707
3341
  tags=tags,
@@ -2710,6 +3344,12 @@ class AssertionsClient:
2710
3344
  )
2711
3345
 
2712
3346
  # 2. If urn is set, first validate the input:
3347
+ criteria = SqlAssertionCriteria(
3348
+ type=criteria_type,
3349
+ change_type=criteria_change_type,
3350
+ operator=criteria_operator,
3351
+ parameters=criteria_parameters,
3352
+ )
2713
3353
  assertion_input = _SqlAssertionInput(
2714
3354
  urn=urn,
2715
3355
  entity_client=self.client.entities,