acryl-datahub-cloud 0.3.12rc4__py3-none-any.whl → 0.3.12rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -4,13 +4,17 @@ import logging
4
4
  from datetime import datetime, timezone
5
5
  from typing import TYPE_CHECKING, Any, Optional, Union
6
6
 
7
- from acryl_datahub_cloud.sdk.assertion import (
7
+ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
8
8
  AssertionMode,
9
9
  FreshnessAssertion,
10
10
  SmartFreshnessAssertion,
11
11
  SmartVolumeAssertion,
12
+ SqlAssertion,
12
13
  _AssertionPublic,
13
14
  )
15
+ from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
16
+ SmartColumnMetricAssertion,
17
+ )
14
18
  from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
15
19
  AssertionIncidentBehavior,
16
20
  DetectionMechanismInputTypes,
@@ -24,6 +28,19 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
24
28
  from acryl_datahub_cloud.sdk.assertion_input.freshness_assertion_input import (
25
29
  _FreshnessAssertionInput,
26
30
  )
31
+ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
32
+ MetricInputType,
33
+ OperatorInputType,
34
+ RangeInputType,
35
+ RangeTypeInputType,
36
+ ValueInputType,
37
+ ValueTypeInputType,
38
+ _SmartColumnMetricAssertionInput,
39
+ )
40
+ from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
41
+ SqlAssertionCriteria,
42
+ _SqlAssertionInput,
43
+ )
27
44
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
28
45
  from acryl_datahub_cloud.sdk.entities.monitor import Monitor
29
46
  from acryl_datahub_cloud.sdk.errors import SDKUsageError
@@ -477,6 +494,87 @@ class AssertionsClient:
477
494
 
478
495
  return merged_assertion_input
479
496
 
497
+ def _retrieve_and_merge_sql_assertion_and_monitor(
498
+ self,
499
+ assertion_input: _SqlAssertionInput,
500
+ dataset_urn: Union[str, DatasetUrn],
501
+ urn: Union[str, AssertionUrn],
502
+ display_name: Optional[str],
503
+ enabled: Optional[bool],
504
+ criteria: SqlAssertionCriteria,
505
+ statement: str,
506
+ incident_behavior: Optional[
507
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
508
+ ],
509
+ tags: Optional[TagsInputType],
510
+ updated_by: Optional[Union[str, CorpUserUrn]],
511
+ now_utc: datetime,
512
+ schedule: Optional[Union[str, models.CronScheduleClass]],
513
+ ) -> Union[SqlAssertion, _SqlAssertionInput]:
514
+ # 1. Retrieve any existing assertion and monitor entities:
515
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
516
+ self._retrieve_assertion_and_monitor(assertion_input)
517
+ )
518
+
519
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
520
+ if maybe_assertion_entity and maybe_monitor_entity:
521
+ existing_assertion = SqlAssertion._from_entities(
522
+ maybe_assertion_entity, maybe_monitor_entity
523
+ )
524
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
525
+ elif maybe_assertion_entity and not maybe_monitor_entity:
526
+ monitor_mode = (
527
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
528
+ )
529
+ existing_assertion = SqlAssertion._from_entities(
530
+ maybe_assertion_entity,
531
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
532
+ )
533
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
534
+ elif not maybe_assertion_entity:
535
+ logger.info(
536
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
537
+ )
538
+ return self._create_sql_assertion(
539
+ dataset_urn=dataset_urn,
540
+ display_name=display_name,
541
+ criteria=criteria,
542
+ statement=statement,
543
+ incident_behavior=incident_behavior,
544
+ tags=tags,
545
+ created_by=updated_by,
546
+ schedule=schedule,
547
+ )
548
+
549
+ # 3. Check for any issues e.g. different dataset urns
550
+ if (
551
+ existing_assertion
552
+ and hasattr(existing_assertion, "dataset_urn")
553
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
554
+ ):
555
+ raise SDKUsageError(
556
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
557
+ )
558
+
559
+ # 4. Merge the existing assertion with the validated input:
560
+ merged_assertion_input = self._merge_sql_input(
561
+ dataset_urn=dataset_urn,
562
+ urn=urn,
563
+ display_name=display_name,
564
+ enabled=enabled,
565
+ criteria=criteria,
566
+ statement=statement,
567
+ incident_behavior=incident_behavior,
568
+ tags=tags,
569
+ now_utc=now_utc,
570
+ assertion_input=assertion_input,
571
+ maybe_assertion_entity=maybe_assertion_entity,
572
+ existing_assertion=existing_assertion,
573
+ schedule=schedule,
574
+ )
575
+
576
+ return merged_assertion_input
577
+
480
578
  def _retrieve_assertion_and_monitor(
481
579
  self,
482
580
  assertion_input: _AssertionInput,
@@ -769,6 +867,112 @@ class AssertionsClient:
769
867
  )
770
868
  return merged_assertion_input
771
869
 
870
+ def _merge_sql_input(
871
+ self,
872
+ dataset_urn: Union[str, DatasetUrn],
873
+ urn: Union[str, AssertionUrn],
874
+ display_name: Optional[str],
875
+ enabled: Optional[bool],
876
+ criteria: SqlAssertionCriteria,
877
+ statement: str,
878
+ incident_behavior: Optional[
879
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
880
+ ],
881
+ tags: Optional[TagsInputType],
882
+ now_utc: datetime,
883
+ assertion_input: _SqlAssertionInput,
884
+ maybe_assertion_entity: Optional[Assertion],
885
+ # not used: maybe_monitor_entity: Optional[Monitor], as schedule is already set in existing_assertion
886
+ existing_assertion: SqlAssertion,
887
+ schedule: Optional[Union[str, models.CronScheduleClass]],
888
+ ) -> _SqlAssertionInput:
889
+ """Merge the input with the existing assertion and monitor entities.
890
+
891
+ Args:
892
+ dataset_urn: The urn of the dataset to be monitored.
893
+ urn: The urn of the assertion.
894
+ display_name: The display name of the assertion.
895
+ enabled: Whether the assertion is enabled.
896
+ criteria: The criteria of the assertion.
897
+ statement: The statement of the assertion.
898
+ incident_behavior: The incident behavior to be applied to the assertion.
899
+ tags: The tags to be applied to the assertion.
900
+ now_utc: The current UTC time from when the function is called.
901
+ assertion_input: The validated input to the function.
902
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
903
+ existing_assertion: The existing assertion from the DataHub instance.
904
+ schedule: The schedule to be applied to the assertion.
905
+
906
+ Returns:
907
+ The merged assertion input.
908
+ """
909
+ merged_assertion_input = _SqlAssertionInput(
910
+ urn=urn,
911
+ entity_client=self.client.entities,
912
+ dataset_urn=dataset_urn,
913
+ display_name=_merge_field(
914
+ display_name,
915
+ "display_name",
916
+ assertion_input,
917
+ existing_assertion,
918
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
919
+ ),
920
+ enabled=_merge_field(
921
+ enabled,
922
+ "enabled",
923
+ assertion_input,
924
+ existing_assertion,
925
+ existing_assertion.mode == AssertionMode.ACTIVE
926
+ if existing_assertion
927
+ else None,
928
+ ),
929
+ schedule=_merge_field(
930
+ schedule,
931
+ "schedule",
932
+ assertion_input,
933
+ existing_assertion,
934
+ # TODO should this use maybe_monitor_entity.schedule?
935
+ existing_assertion.schedule if existing_assertion else None,
936
+ ),
937
+ criteria=_merge_field(
938
+ criteria,
939
+ "criteria",
940
+ assertion_input,
941
+ existing_assertion,
942
+ existing_assertion.criteria if existing_assertion else None,
943
+ ),
944
+ statement=_merge_field(
945
+ statement,
946
+ "statement",
947
+ assertion_input,
948
+ existing_assertion,
949
+ existing_assertion.statement if existing_assertion else None,
950
+ ),
951
+ incident_behavior=_merge_field(
952
+ incident_behavior,
953
+ "incident_behavior",
954
+ assertion_input,
955
+ existing_assertion,
956
+ SqlAssertion._get_incident_behavior(maybe_assertion_entity)
957
+ if maybe_assertion_entity
958
+ else None,
959
+ ),
960
+ tags=_merge_field(
961
+ tags,
962
+ "tags",
963
+ assertion_input,
964
+ existing_assertion,
965
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
966
+ ),
967
+ created_by=existing_assertion.created_by
968
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
969
+ created_at=existing_assertion.created_at
970
+ or now_utc, # Override with the existing assertion's created_at or now if not set
971
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
972
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
973
+ )
974
+ return merged_assertion_input
975
+
772
976
  def _merge_smart_volume_input(
773
977
  self,
774
978
  dataset_urn: Union[str, DatasetUrn],
@@ -1240,6 +1444,103 @@ class AssertionsClient:
1240
1444
  # raise e
1241
1445
  return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1242
1446
 
1447
+ def _create_sql_assertion(
1448
+ self,
1449
+ *,
1450
+ dataset_urn: Union[str, DatasetUrn],
1451
+ display_name: Optional[str] = None,
1452
+ enabled: bool = True,
1453
+ criteria: SqlAssertionCriteria,
1454
+ statement: str,
1455
+ incident_behavior: Optional[
1456
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1457
+ ],
1458
+ tags: Optional[TagsInputType],
1459
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1460
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1461
+ ) -> SqlAssertion:
1462
+ """Create a sql assertion.
1463
+
1464
+ Args:
1465
+ dataset_urn: The urn of the dataset to be monitored.
1466
+ display_name: The display name of the assertion. If not provided, a random display
1467
+ name will be generated.
1468
+ enabled: Whether the assertion is enabled. Defaults to True.
1469
+ criteria: The criteria to be used for the assertion. This is of type SqlAssertionCriteria. It has the following fields:
1470
+ - type: The type of sql assertion. Valid values are:
1471
+ - "METRIC" -> Looks at the current value of the metric.
1472
+ - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
1473
+ - change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
1474
+ - "ABSOLUTE" -> Looks at the absolute change in the metric.
1475
+ - "PERCENTAGE" -> Looks at the percentage change in the metric.
1476
+ - operator: The operator to be used for the assertion. Valid values are:
1477
+ - "GREATER_THAN" -> The metric value is greater than the threshold.
1478
+ - "LESS_THAN" -> The metric value is less than the threshold.
1479
+ - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
1480
+ - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
1481
+ - "EQUAL_TO" -> The metric value is equal to the threshold.
1482
+ - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
1483
+ - "BETWEEN" -> The metric value is between the two thresholds.
1484
+ - parameters: The parameters to be used for the assertion. This is of type SqlAssertionParameters. It has the following fields:
1485
+ - value: The value of the metric. This can be a single value or a tuple range.
1486
+ - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
1487
+ - If the operator is not "BETWEEN", the value is a single value.
1488
+ statement: The statement to be used for the assertion.
1489
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1490
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1491
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1492
+ tags: The tags to be applied to the assertion. Valid values are:
1493
+ - a list of strings (strings will be converted to TagUrn objects)
1494
+ - a list of TagUrn objects
1495
+ - a list of TagAssociationClass objects
1496
+ created_by: Optional urn of the user who created the assertion. The format is
1497
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1498
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1499
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1500
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1501
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1502
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1503
+
1504
+ Returns:
1505
+ SqlAssertion: The created assertion.
1506
+ """
1507
+ _print_experimental_warning()
1508
+ now_utc = datetime.now(timezone.utc)
1509
+ if created_by is None:
1510
+ logger.warning(
1511
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1512
+ )
1513
+ created_by = DEFAULT_CREATED_BY
1514
+ assertion_input = _SqlAssertionInput(
1515
+ urn=None,
1516
+ entity_client=self.client.entities,
1517
+ dataset_urn=dataset_urn,
1518
+ display_name=display_name,
1519
+ enabled=enabled,
1520
+ criteria=criteria,
1521
+ statement=statement,
1522
+ incident_behavior=incident_behavior,
1523
+ tags=tags,
1524
+ created_by=created_by,
1525
+ created_at=now_utc,
1526
+ updated_by=created_by,
1527
+ updated_at=now_utc,
1528
+ schedule=schedule,
1529
+ )
1530
+ assertion_entity, monitor_entity = (
1531
+ assertion_input.to_assertion_and_monitor_entities()
1532
+ )
1533
+ # If assertion creation fails, we won't try to create the monitor
1534
+ self.client.entities.create(assertion_entity)
1535
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1536
+ # try:
1537
+ self.client.entities.create(monitor_entity)
1538
+ # except Exception as e:
1539
+ # logger.error(f"Error creating monitor: {e}")
1540
+ # self.client.entities.delete(assertion_entity)
1541
+ # raise e
1542
+ return SqlAssertion._from_entities(assertion_entity, monitor_entity)
1543
+
1243
1544
  def sync_smart_volume_assertion(
1244
1545
  self,
1245
1546
  *,
@@ -1427,60 +1728,773 @@ class AssertionsClient:
1427
1728
 
1428
1729
  return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
1429
1730
 
1430
- def sync_freshness_assertion(
1731
+ def sync_smart_column_metric_assertion(
1431
1732
  self,
1432
1733
  *,
1433
1734
  dataset_urn: Union[str, DatasetUrn],
1735
+ column_name: str,
1736
+ metric_type: MetricInputType,
1737
+ operator: OperatorInputType,
1738
+ value: Optional[ValueInputType] = None,
1739
+ value_type: Optional[ValueTypeInputType] = None,
1740
+ range: Optional[RangeInputType] = None,
1741
+ range_type: Optional[RangeTypeInputType] = None,
1434
1742
  urn: Optional[Union[str, AssertionUrn]] = None,
1435
1743
  display_name: Optional[str] = None,
1436
1744
  enabled: Optional[bool] = None,
1437
1745
  detection_mechanism: DetectionMechanismInputTypes = None,
1746
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1747
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1748
+ training_data_lookback_days: Optional[int] = None,
1438
1749
  incident_behavior: Optional[
1439
1750
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1440
1751
  ] = None,
1441
1752
  tags: Optional[TagsInputType] = None,
1442
1753
  updated_by: Optional[Union[str, CorpUserUrn]] = None,
1443
- freshness_schedule_check_type: Optional[
1444
- Union[str, models.FreshnessAssertionScheduleTypeClass]
1445
- ] = None,
1446
1754
  schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1447
- lookback_window: Optional[TimeWindowSizeInputTypes] = None,
1448
- ) -> FreshnessAssertion:
1449
- """Upsert and merge a freshness assertion.
1755
+ ) -> SmartColumnMetricAssertion:
1756
+ """Upsert and merge a smart column metric assertion.
1450
1757
 
1451
1758
  Note: keyword arguments are required.
1452
1759
 
1453
1760
  Upsert and merge is a combination of create and update. If the assertion does not exist,
1454
- it will be created. If it does exist, it will be updated. Existing assertion fields will
1455
- be updated if the input value is not None. If the input value is None, the existing value
1761
+ it will be created. If it does exist, it will be updated.
1762
+
1763
+ Existing assertion fields will be updated if the input value is not None. If the input value is None, the existing value
1456
1764
  will be preserved. If the input value can be un-set e.g. by passing an empty list or
1457
1765
  empty string.
1458
1766
 
1459
- Schedule behavior:
1460
- - Create case: Uses default daily schedule (\"0 0 * * *\") or provided schedule
1461
- - Update case: Uses existing schedule or provided schedule.
1462
-
1463
1767
  Args:
1464
- dataset_urn: The urn of the dataset to be monitored.
1768
+ dataset_urn: The urn of the dataset to be monitored. (Required)
1769
+ column_name: The name of the column to be monitored. (Required)
1770
+ metric_type: The type of the metric to be monitored. (Required)
1771
+ operator: The operator to be used for the assertion. (Required)
1772
+ value: The value to be used for the assertion. (Required if operator requires a value)
1773
+ value_type: The type of the value to be used for the assertion. (Required if operator requires a value)
1774
+ range: The range to be used for the assertion. (Required if operator requires a range)
1775
+ range_type: The type of the range to be used for the assertion. (Required if operator requires a range)
1465
1776
  urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
1466
1777
  will be _created_ in the DataHub instance.
1467
1778
  display_name: The display name of the assertion. If not provided, a random display name
1468
1779
  will be generated.
1469
1780
  enabled: Whether the assertion is enabled. If not provided, the existing value
1470
1781
  will be preserved.
1471
- detection_mechanism: The detection mechanism to be used for the assertion. Information
1472
- schema is recommended. Valid values are:
1473
- - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1474
- - "audit_log" or DetectionMechanism.AUDIT_LOG
1475
- - {
1476
- "type": "last_modified_column",
1782
+ detection_mechanism: The detection mechanism to be used for the assertion. Valid values are:
1783
+ - All rows query datahub dataset profile:
1784
+ - "all_rows_query_datahub_dataset_profile" or DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
1785
+
1786
+ - All rows query:
1787
+ - "all_rows_query" or DetectionMechanism.ALL_ROWS_QUERY
1788
+ - with optional additional filter: DetectionMechanism.ALL_ROWS_QUERY(additional_filter='last_modified > 2021-01-01')
1789
+ - Or as a dict: {
1790
+ "type": "all_rows_query",
1791
+ "additional_filter": "last_modified > '2021-01-01'", # optional
1792
+ }
1793
+
1794
+ - Changed rows query:
1795
+ - For changed rows query, you need to pass a supported column type (Number, Date or Time)
1796
+ - DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified')
1797
+ - With optional additional filter: DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
1798
+ - Or as a dict: {
1799
+ "type": "changed_rows_query",
1477
1800
  "column_name": "last_modified",
1478
- "additional_filter": "last_modified > '2021-01-01'",
1479
- } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1480
- additional_filter='last_modified > 2021-01-01')
1481
- - {
1482
- "type": "high_watermark_column",
1483
- "column_name": "id",
1801
+ "additional_filter": "last_modified > '2021-01-01'", # optional
1802
+ }
1803
+
1804
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1805
+ - "low" or InferenceSensitivity.LOW
1806
+ - "medium" or InferenceSensitivity.MEDIUM
1807
+ - "high" or InferenceSensitivity.HIGH
1808
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1809
+ fixed range exclusion windows are supported. Valid values are:
1810
+ - from datetime.datetime objects: {
1811
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1812
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1813
+ }
1814
+ - from string datetimes: {
1815
+ "start": "2025-01-01T00:00:00",
1816
+ "end": "2025-01-02T00:00:00",
1817
+ }
1818
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1819
+ start=datetime(2025, 1, 1, 0, 0, 0),
1820
+ end=datetime(2025, 1, 2, 0, 0, 0)
1821
+ )
1822
+ training_data_lookback_days: The training data lookback days to be applied to the
1823
+ assertion as an integer.
1824
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1825
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1826
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1827
+ tags: The tags to be applied to the assertion. Valid values are:
1828
+ - a list of strings (strings will be converted to TagUrn objects)
1829
+ - a list of TagUrn objects
1830
+ - a list of TagAssociationClass objects
1831
+ updated_by: Optional urn of the user who updated the assertion. The format is
1832
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1833
+ The default is the datahub system user.
1834
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1835
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1836
+ schedule of every 6 hours will be used. The schedule determines when the assertion will be evaluated.
1837
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1838
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1839
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1840
+
1841
+ Returns:
1842
+ SmartColumnMetricAssertion: The created or updated assertion.
1843
+ """
1844
+ _print_experimental_warning()
1845
+ now_utc = datetime.now(timezone.utc)
1846
+
1847
+ if updated_by is None:
1848
+ logger.warning(
1849
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1850
+ )
1851
+ updated_by = DEFAULT_CREATED_BY
1852
+
1853
+ # 1. If urn is not set, create a new assertion
1854
+ if urn is None:
1855
+ logger.info("URN is not set, creating a new assertion")
1856
+ return self._create_smart_column_metric_assertion(
1857
+ dataset_urn=dataset_urn,
1858
+ column_name=column_name,
1859
+ metric_type=metric_type,
1860
+ operator=operator,
1861
+ value=value,
1862
+ value_type=value_type,
1863
+ range=range,
1864
+ range_type=range_type,
1865
+ display_name=display_name,
1866
+ enabled=enabled if enabled is not None else True,
1867
+ detection_mechanism=detection_mechanism,
1868
+ sensitivity=sensitivity,
1869
+ exclusion_windows=exclusion_windows,
1870
+ training_data_lookback_days=training_data_lookback_days,
1871
+ incident_behavior=incident_behavior,
1872
+ tags=tags,
1873
+ created_by=updated_by,
1874
+ schedule=schedule,
1875
+ )
1876
+
1877
+ # 2. If urn is set, first validate the input:
1878
+ assertion_input = _SmartColumnMetricAssertionInput(
1879
+ urn=urn,
1880
+ entity_client=self.client.entities,
1881
+ dataset_urn=dataset_urn,
1882
+ column_name=column_name,
1883
+ metric_type=metric_type,
1884
+ operator=operator,
1885
+ value=value,
1886
+ value_type=value_type,
1887
+ range=range,
1888
+ range_type=range_type,
1889
+ display_name=display_name,
1890
+ detection_mechanism=detection_mechanism,
1891
+ sensitivity=sensitivity,
1892
+ exclusion_windows=exclusion_windows,
1893
+ training_data_lookback_days=training_data_lookback_days,
1894
+ incident_behavior=incident_behavior,
1895
+ tags=tags,
1896
+ created_by=updated_by, # This will be overridden by the actual created_by
1897
+ created_at=now_utc, # This will be overridden by the actual created_at
1898
+ updated_by=updated_by,
1899
+ updated_at=now_utc,
1900
+ schedule=schedule,
1901
+ )
1902
+
1903
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
1904
+ # if the assertion does not exist:
1905
+ merged_assertion_input_or_created_assertion = (
1906
+ self._retrieve_and_merge_smart_column_metric_assertion_and_monitor(
1907
+ assertion_input=assertion_input,
1908
+ dataset_urn=dataset_urn,
1909
+ column_name=column_name,
1910
+ metric_type=metric_type,
1911
+ operator=operator,
1912
+ value=value,
1913
+ value_type=value_type,
1914
+ range=range,
1915
+ range_type=range_type,
1916
+ urn=urn,
1917
+ display_name=display_name,
1918
+ enabled=enabled,
1919
+ detection_mechanism=detection_mechanism,
1920
+ sensitivity=sensitivity,
1921
+ exclusion_windows=exclusion_windows,
1922
+ training_data_lookback_days=training_data_lookback_days,
1923
+ incident_behavior=incident_behavior,
1924
+ tags=tags,
1925
+ updated_by=updated_by,
1926
+ now_utc=now_utc,
1927
+ schedule=schedule,
1928
+ )
1929
+ )
1930
+
1931
+ # Return early if we created a new assertion in the merge:
1932
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
1933
+ # We know this is the correct type because we passed the assertion_class parameter
1934
+ assert isinstance(
1935
+ merged_assertion_input_or_created_assertion, SmartColumnMetricAssertion
1936
+ )
1937
+ return merged_assertion_input_or_created_assertion
1938
+
1939
+ # 4. Upsert the assertion and monitor entities:
1940
+ assertion_entity, monitor_entity = (
1941
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
1942
+ )
1943
+ # If assertion upsert fails, we won't try to upsert the monitor
1944
+ self.client.entities.upsert(assertion_entity)
1945
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1946
+ # try:
1947
+ self.client.entities.upsert(monitor_entity)
1948
+ # except Exception as e:
1949
+ # logger.error(f"Error upserting monitor: {e}")
1950
+ # self.client.entities.delete(assertion_entity)
1951
+ # raise e
1952
+
1953
+ return SmartColumnMetricAssertion._from_entities(
1954
+ assertion_entity, monitor_entity
1955
+ )
1956
+
1957
+ def _create_smart_column_metric_assertion(
1958
+ self,
1959
+ *,
1960
+ dataset_urn: Union[str, DatasetUrn],
1961
+ column_name: str,
1962
+ metric_type: MetricInputType,
1963
+ operator: OperatorInputType,
1964
+ value: Optional[ValueInputType] = None,
1965
+ value_type: Optional[ValueTypeInputType] = None,
1966
+ range: Optional[RangeInputType] = None,
1967
+ range_type: Optional[RangeTypeInputType] = None,
1968
+ display_name: Optional[str] = None,
1969
+ enabled: bool = True,
1970
+ detection_mechanism: DetectionMechanismInputTypes = None,
1971
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1972
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1973
+ training_data_lookback_days: Optional[int] = None,
1974
+ incident_behavior: Optional[
1975
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1976
+ ] = None,
1977
+ tags: Optional[TagsInputType] = None,
1978
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1979
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1980
+ ) -> SmartColumnMetricAssertion:
1981
+ """Create a smart column metric assertion.
1982
+
1983
+ Note: keyword arguments are required.
1984
+
1985
+ Args:
1986
+ dataset_urn: The urn of the dataset to be monitored. (Required)
1987
+ column_name: The name of the column to be monitored. (Required)
1988
+ metric_type: The type of the metric to be monitored. (Required)
1989
+ operator: The operator to be used for the assertion. (Required)
1990
+ value: The value to be used for the assertion. (Required if operator requires a value)
1991
+ value_type: The type of the value to be used for the assertion. (Required if operator requires a value)
1992
+ range: The range to be used for the assertion. (Required if operator requires a range)
1993
+ range_type: The type of the range to be used for the assertion. (Required if operator requires a range)
1994
+ display_name: The display name of the assertion. If not provided, a random display
1995
+ name will be generated.
1996
+ enabled: Whether the assertion is enabled. Defaults to True.
1997
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1998
+ schema is recommended. Valid values are:
1999
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
2000
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
2001
+ - {
2002
+ "type": "last_modified_column",
2003
+ "column_name": "last_modified",
2004
+ "additional_filter": "last_modified > '2021-01-01'",
2005
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
2006
+ additional_filter='last_modified > 2021-01-01')
2007
+ - {
2008
+ "type": "high_watermark_column",
2009
+ "column_name": "id",
2010
+ "additional_filter": "id > 1000",
2011
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
2012
+ additional_filter='id > 1000')
2013
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
2014
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
2015
+ - "low" or InferenceSensitivity.LOW
2016
+ - "medium" or InferenceSensitivity.MEDIUM
2017
+ - "high" or InferenceSensitivity.HIGH
2018
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
2019
+ fixed range exclusion windows are supported. Valid values are:
2020
+ - from datetime.datetime objects: {
2021
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
2022
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
2023
+ }
2024
+ - from string datetimes: {
2025
+ "start": "2025-01-01T00:00:00",
2026
+ "end": "2025-01-02T00:00:00",
2027
+ }
2028
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
2029
+ start=datetime(2025, 1, 1, 0, 0, 0),
2030
+ end=datetime(2025, 1, 2, 0, 0, 0)
2031
+ )
2032
+ training_data_lookback_days: The training data lookback days to be applied to the
2033
+ assertion as an integer.
2034
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
2035
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
2036
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
2037
+ tags: The tags to be applied to the assertion. Valid values are:
2038
+ - a list of strings (strings will be converted to TagUrn objects)
2039
+ - a list of TagUrn objects
2040
+ - a list of TagAssociationClass objects
2041
+ created_by: Optional urn of the user who created the assertion. The format is
2042
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
2043
+ The default is the datahub system user.
2044
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
2045
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
2046
+ schedule will be used. The schedule determines when the assertion will be evaluated.
2047
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
2048
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
2049
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
2050
+
2051
+ Returns:
2052
+ SmartVolumeAssertion: The created assertion.
2053
+ """
2054
+ _print_experimental_warning()
2055
+ now_utc = datetime.now(timezone.utc)
2056
+ if created_by is None:
2057
+ logger.warning(
2058
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
2059
+ )
2060
+ created_by = DEFAULT_CREATED_BY
2061
+ assertion_input = _SmartColumnMetricAssertionInput(
2062
+ urn=None,
2063
+ entity_client=self.client.entities,
2064
+ dataset_urn=dataset_urn,
2065
+ column_name=column_name,
2066
+ metric_type=metric_type,
2067
+ operator=operator,
2068
+ value=value,
2069
+ value_type=value_type,
2070
+ range=range,
2071
+ range_type=range_type,
2072
+ display_name=display_name,
2073
+ enabled=enabled,
2074
+ detection_mechanism=detection_mechanism,
2075
+ sensitivity=sensitivity,
2076
+ exclusion_windows=exclusion_windows,
2077
+ training_data_lookback_days=training_data_lookback_days,
2078
+ incident_behavior=incident_behavior,
2079
+ tags=tags,
2080
+ created_by=created_by,
2081
+ created_at=now_utc,
2082
+ updated_by=created_by,
2083
+ updated_at=now_utc,
2084
+ schedule=schedule,
2085
+ )
2086
+ assertion_entity, monitor_entity = (
2087
+ assertion_input.to_assertion_and_monitor_entities()
2088
+ )
2089
+ # If assertion creation fails, we won't try to create the monitor
2090
+ self.client.entities.create(assertion_entity)
2091
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
2092
+ # try:
2093
+ self.client.entities.create(monitor_entity)
2094
+ # except Exception as e:
2095
+ # logger.error(f"Error creating monitor: {e}")
2096
+ # self.client.entities.delete(assertion_entity)
2097
+ # raise e
2098
+ return SmartColumnMetricAssertion._from_entities(
2099
+ assertion_entity, monitor_entity
2100
+ )
2101
+
2102
+ def _retrieve_and_merge_smart_column_metric_assertion_and_monitor(
2103
+ self,
2104
+ assertion_input: _SmartColumnMetricAssertionInput,
2105
+ dataset_urn: Union[str, DatasetUrn],
2106
+ column_name: str,
2107
+ metric_type: MetricInputType,
2108
+ operator: OperatorInputType,
2109
+ value: Optional[ValueInputType],
2110
+ value_type: Optional[ValueTypeInputType],
2111
+ range: Optional[RangeInputType],
2112
+ range_type: Optional[RangeTypeInputType],
2113
+ urn: Union[str, AssertionUrn],
2114
+ display_name: Optional[str],
2115
+ enabled: Optional[bool],
2116
+ detection_mechanism: DetectionMechanismInputTypes,
2117
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
2118
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
2119
+ training_data_lookback_days: Optional[int],
2120
+ incident_behavior: Optional[
2121
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
2122
+ ],
2123
+ tags: Optional[TagsInputType],
2124
+ updated_by: Optional[Union[str, CorpUserUrn]],
2125
+ now_utc: datetime,
2126
+ schedule: Optional[Union[str, models.CronScheduleClass]],
2127
+ ) -> Union[SmartColumnMetricAssertion, _SmartColumnMetricAssertionInput]:
2128
+ # 1. Retrieve any existing assertion and monitor entities:
2129
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
2130
+ self._retrieve_assertion_and_monitor(assertion_input)
2131
+ )
2132
+
2133
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
2134
+ if maybe_assertion_entity and maybe_monitor_entity:
2135
+ existing_assertion = SmartColumnMetricAssertion._from_entities(
2136
+ maybe_assertion_entity, maybe_monitor_entity
2137
+ )
2138
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
2139
+ elif maybe_assertion_entity and not maybe_monitor_entity:
2140
+ monitor_mode = (
2141
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
2142
+ )
2143
+ existing_assertion = SmartColumnMetricAssertion._from_entities(
2144
+ maybe_assertion_entity,
2145
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
2146
+ )
2147
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
2148
+ elif not maybe_assertion_entity:
2149
+ logger.info(
2150
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
2151
+ )
2152
+ return self._create_smart_column_metric_assertion(
2153
+ dataset_urn=dataset_urn,
2154
+ column_name=column_name,
2155
+ metric_type=metric_type,
2156
+ operator=operator,
2157
+ value=value,
2158
+ value_type=value_type,
2159
+ range=range,
2160
+ range_type=range_type,
2161
+ schedule=schedule,
2162
+ display_name=display_name,
2163
+ detection_mechanism=detection_mechanism,
2164
+ sensitivity=sensitivity,
2165
+ exclusion_windows=exclusion_windows,
2166
+ training_data_lookback_days=training_data_lookback_days,
2167
+ incident_behavior=incident_behavior,
2168
+ tags=tags,
2169
+ created_by=updated_by,
2170
+ )
2171
+
2172
+ # 3. Check for any issues e.g. different dataset urns
2173
+ if (
2174
+ existing_assertion
2175
+ and hasattr(existing_assertion, "dataset_urn")
2176
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
2177
+ ):
2178
+ raise SDKUsageError(
2179
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
2180
+ )
2181
+
2182
+ # 4. Merge the existing assertion with the validated input:
2183
+ merged_assertion_input = self._merge_smart_column_metric_input(
2184
+ dataset_urn=dataset_urn,
2185
+ column_name=column_name,
2186
+ metric_type=metric_type,
2187
+ operator=operator,
2188
+ value=value,
2189
+ value_type=value_type,
2190
+ range=range,
2191
+ range_type=range_type,
2192
+ urn=urn,
2193
+ display_name=display_name,
2194
+ enabled=enabled,
2195
+ schedule=schedule,
2196
+ detection_mechanism=detection_mechanism,
2197
+ sensitivity=sensitivity,
2198
+ exclusion_windows=exclusion_windows,
2199
+ training_data_lookback_days=training_data_lookback_days,
2200
+ incident_behavior=incident_behavior,
2201
+ tags=tags,
2202
+ now_utc=now_utc,
2203
+ assertion_input=assertion_input,
2204
+ maybe_assertion_entity=maybe_assertion_entity,
2205
+ maybe_monitor_entity=maybe_monitor_entity,
2206
+ existing_assertion=existing_assertion,
2207
+ )
2208
+
2209
+ return merged_assertion_input
2210
+
2211
+ def _merge_smart_column_metric_input(
2212
+ self,
2213
+ dataset_urn: Union[str, DatasetUrn],
2214
+ column_name: str,
2215
+ metric_type: MetricInputType,
2216
+ operator: OperatorInputType,
2217
+ value: Optional[ValueInputType],
2218
+ value_type: Optional[ValueTypeInputType],
2219
+ range: Optional[RangeInputType],
2220
+ range_type: Optional[RangeTypeInputType],
2221
+ urn: Union[str, AssertionUrn],
2222
+ display_name: Optional[str],
2223
+ enabled: Optional[bool],
2224
+ detection_mechanism: DetectionMechanismInputTypes,
2225
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
2226
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
2227
+ training_data_lookback_days: Optional[int],
2228
+ incident_behavior: Optional[
2229
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
2230
+ ],
2231
+ tags: Optional[TagsInputType],
2232
+ schedule: Optional[Union[str, models.CronScheduleClass]],
2233
+ now_utc: datetime,
2234
+ assertion_input: _SmartColumnMetricAssertionInput,
2235
+ maybe_assertion_entity: Optional[Assertion],
2236
+ maybe_monitor_entity: Optional[Monitor],
2237
+ existing_assertion: SmartColumnMetricAssertion,
2238
+ ) -> _SmartColumnMetricAssertionInput:
2239
+ """Merge the input with the existing assertion and monitor entities.
2240
+
2241
+ Args:
2242
+ dataset_urn: The urn of the dataset to be monitored.
2243
+ column_name: The name of the column to be monitored.
2244
+ metric_type: The type of the metric to be monitored.
2245
+ operator: The operator to be used for the assertion.
2246
+ value: The value to be used for the assertion.
2247
+ value_type: The type of the value to be used for the assertion.
2248
+ range: The range to be used for the assertion.
2249
+ range_type: The type of the range to be used for the assertion.
2250
+ urn: The urn of the assertion.
2251
+ display_name: The display name of the assertion.
2252
+ enabled: Whether the assertion is enabled.
2253
+ detection_mechanism: The detection mechanism to be used for the assertion.
2254
+ sensitivity: The sensitivity to be applied to the assertion.
2255
+ exclusion_windows: The exclusion windows to be applied to the assertion.
2256
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
2257
+ incident_behavior: The incident behavior to be applied to the assertion.
2258
+ tags: The tags to be applied to the assertion.
2259
+ now_utc: The current UTC time from when the function is called.
2260
+ assertion_input: The validated input to the function.
2261
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
2262
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
2263
+ existing_assertion: The existing assertion from the DataHub instance.
2264
+
2265
+ Returns:
2266
+ The merged assertion input.
2267
+ """
2268
+ merged_assertion_input = _SmartColumnMetricAssertionInput(
2269
+ urn=urn,
2270
+ entity_client=self.client.entities,
2271
+ dataset_urn=dataset_urn,
2272
+ column_name=_merge_field(
2273
+ input_field_value=column_name,
2274
+ input_field_name="column_name",
2275
+ validated_assertion_input=assertion_input,
2276
+ validated_existing_assertion=existing_assertion,
2277
+ existing_entity_value=SmartColumnMetricAssertion._get_column_name(
2278
+ maybe_assertion_entity
2279
+ )
2280
+ if maybe_assertion_entity
2281
+ else None,
2282
+ ),
2283
+ metric_type=_merge_field(
2284
+ input_field_value=metric_type,
2285
+ input_field_name="metric_type",
2286
+ validated_assertion_input=assertion_input,
2287
+ validated_existing_assertion=existing_assertion,
2288
+ existing_entity_value=SmartColumnMetricAssertion._get_metric_type(
2289
+ maybe_assertion_entity
2290
+ )
2291
+ if maybe_assertion_entity
2292
+ else None,
2293
+ ),
2294
+ operator=_merge_field(
2295
+ input_field_value=operator,
2296
+ input_field_name="operator",
2297
+ validated_assertion_input=assertion_input,
2298
+ validated_existing_assertion=existing_assertion,
2299
+ existing_entity_value=SmartColumnMetricAssertion._get_operator(
2300
+ maybe_assertion_entity
2301
+ )
2302
+ if maybe_assertion_entity
2303
+ else None,
2304
+ ),
2305
+ value=_merge_field(
2306
+ input_field_value=value,
2307
+ input_field_name="value",
2308
+ validated_assertion_input=assertion_input,
2309
+ validated_existing_assertion=existing_assertion,
2310
+ existing_entity_value=SmartColumnMetricAssertion._get_value(
2311
+ maybe_assertion_entity
2312
+ )
2313
+ if maybe_assertion_entity
2314
+ else None,
2315
+ ),
2316
+ value_type=_merge_field(
2317
+ input_field_value=value_type,
2318
+ input_field_name="value_type",
2319
+ validated_assertion_input=assertion_input,
2320
+ validated_existing_assertion=existing_assertion,
2321
+ existing_entity_value=SmartColumnMetricAssertion._get_value_type(
2322
+ maybe_assertion_entity
2323
+ )
2324
+ if maybe_assertion_entity
2325
+ else None,
2326
+ ),
2327
+ range=_merge_field(
2328
+ input_field_value=range,
2329
+ input_field_name="range",
2330
+ validated_assertion_input=assertion_input,
2331
+ validated_existing_assertion=existing_assertion,
2332
+ existing_entity_value=SmartColumnMetricAssertion._get_range(
2333
+ maybe_assertion_entity
2334
+ )
2335
+ if maybe_assertion_entity
2336
+ else None,
2337
+ ),
2338
+ range_type=_merge_field(
2339
+ input_field_value=range_type,
2340
+ input_field_name="range_type",
2341
+ validated_assertion_input=assertion_input,
2342
+ validated_existing_assertion=existing_assertion,
2343
+ existing_entity_value=SmartColumnMetricAssertion._get_range_type(
2344
+ maybe_assertion_entity
2345
+ )
2346
+ if maybe_assertion_entity
2347
+ else None,
2348
+ ),
2349
+ display_name=_merge_field(
2350
+ input_field_value=display_name,
2351
+ input_field_name="display_name",
2352
+ validated_assertion_input=assertion_input,
2353
+ validated_existing_assertion=existing_assertion,
2354
+ existing_entity_value=maybe_assertion_entity.description
2355
+ if maybe_assertion_entity
2356
+ else None,
2357
+ ),
2358
+ enabled=_merge_field(
2359
+ input_field_value=enabled,
2360
+ input_field_name="enabled",
2361
+ validated_assertion_input=assertion_input,
2362
+ validated_existing_assertion=existing_assertion,
2363
+ existing_entity_value=existing_assertion.mode == AssertionMode.ACTIVE
2364
+ if existing_assertion
2365
+ else None,
2366
+ ),
2367
+ schedule=_merge_field(
2368
+ input_field_value=schedule,
2369
+ input_field_name="schedule",
2370
+ validated_assertion_input=assertion_input,
2371
+ validated_existing_assertion=existing_assertion,
2372
+ existing_entity_value=existing_assertion.schedule
2373
+ if existing_assertion
2374
+ else None,
2375
+ ),
2376
+ detection_mechanism=_merge_field(
2377
+ input_field_value=detection_mechanism,
2378
+ input_field_name="detection_mechanism",
2379
+ validated_assertion_input=assertion_input,
2380
+ validated_existing_assertion=existing_assertion,
2381
+ existing_entity_value=SmartColumnMetricAssertion._get_detection_mechanism(
2382
+ maybe_assertion_entity, maybe_monitor_entity, default=None
2383
+ )
2384
+ if maybe_assertion_entity and maybe_monitor_entity
2385
+ else None,
2386
+ ),
2387
+ sensitivity=_merge_field(
2388
+ input_field_value=sensitivity,
2389
+ input_field_name="sensitivity",
2390
+ validated_assertion_input=assertion_input,
2391
+ validated_existing_assertion=existing_assertion,
2392
+ existing_entity_value=maybe_monitor_entity.sensitivity
2393
+ if maybe_monitor_entity
2394
+ else None,
2395
+ ),
2396
+ exclusion_windows=_merge_field(
2397
+ input_field_value=exclusion_windows,
2398
+ input_field_name="exclusion_windows",
2399
+ validated_assertion_input=assertion_input,
2400
+ validated_existing_assertion=existing_assertion,
2401
+ existing_entity_value=maybe_monitor_entity.exclusion_windows
2402
+ if maybe_monitor_entity
2403
+ else None,
2404
+ ),
2405
+ training_data_lookback_days=_merge_field(
2406
+ input_field_value=training_data_lookback_days,
2407
+ input_field_name="training_data_lookback_days",
2408
+ validated_assertion_input=assertion_input,
2409
+ validated_existing_assertion=existing_assertion,
2410
+ existing_entity_value=maybe_monitor_entity.training_data_lookback_days
2411
+ if maybe_monitor_entity
2412
+ else None,
2413
+ ),
2414
+ incident_behavior=_merge_field(
2415
+ input_field_value=incident_behavior,
2416
+ input_field_name="incident_behavior",
2417
+ validated_assertion_input=assertion_input,
2418
+ validated_existing_assertion=existing_assertion,
2419
+ existing_entity_value=SmartColumnMetricAssertion._get_incident_behavior(
2420
+ maybe_assertion_entity
2421
+ )
2422
+ if maybe_assertion_entity
2423
+ else None,
2424
+ ),
2425
+ tags=_merge_field(
2426
+ input_field_value=tags,
2427
+ input_field_name="tags",
2428
+ validated_assertion_input=assertion_input,
2429
+ validated_existing_assertion=existing_assertion,
2430
+ existing_entity_value=maybe_assertion_entity.tags
2431
+ if maybe_assertion_entity
2432
+ else None,
2433
+ ),
2434
+ created_by=existing_assertion.created_by
2435
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
2436
+ created_at=existing_assertion.created_at
2437
+ or now_utc, # Override with the existing assertion's created_at or now if not set
2438
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
2439
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
2440
+ )
2441
+
2442
+ return merged_assertion_input
2443
+
2444
+ def sync_freshness_assertion(
2445
+ self,
2446
+ *,
2447
+ dataset_urn: Union[str, DatasetUrn],
2448
+ urn: Optional[Union[str, AssertionUrn]] = None,
2449
+ display_name: Optional[str] = None,
2450
+ enabled: Optional[bool] = None,
2451
+ detection_mechanism: DetectionMechanismInputTypes = None,
2452
+ incident_behavior: Optional[
2453
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
2454
+ ] = None,
2455
+ tags: Optional[TagsInputType] = None,
2456
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
2457
+ freshness_schedule_check_type: Optional[
2458
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
2459
+ ] = None,
2460
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2461
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
2462
+ ) -> FreshnessAssertion:
2463
+ """Upsert and merge a freshness assertion.
2464
+
2465
+ Note: keyword arguments are required.
2466
+
2467
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
2468
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
2469
+ be updated if the input value is not None. If the input value is None, the existing value
2470
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
2471
+ empty string.
2472
+
2473
+ Schedule behavior:
2474
+ - Create case: Uses default daily schedule (\"0 0 * * *\") or provided schedule
2475
+ - Update case: Uses existing schedule or provided schedule.
2476
+
2477
+ Args:
2478
+ dataset_urn: The urn of the dataset to be monitored.
2479
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
2480
+ will be _created_ in the DataHub instance.
2481
+ display_name: The display name of the assertion. If not provided, a random display name
2482
+ will be generated.
2483
+ enabled: Whether the assertion is enabled. If not provided, the existing value
2484
+ will be preserved.
2485
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
2486
+ schema is recommended. Valid values are:
2487
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
2488
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
2489
+ - {
2490
+ "type": "last_modified_column",
2491
+ "column_name": "last_modified",
2492
+ "additional_filter": "last_modified > '2021-01-01'",
2493
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
2494
+ additional_filter='last_modified > 2021-01-01')
2495
+ - {
2496
+ "type": "high_watermark_column",
2497
+ "column_name": "id",
1484
2498
  "additional_filter": "id > 1000",
1485
2499
  } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1486
2500
  additional_filter='id > 1000')
@@ -1592,6 +2606,167 @@ class AssertionsClient:
1592
2606
 
1593
2607
  return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1594
2608
 
2609
+ def sync_sql_assertion(
2610
+ self,
2611
+ *,
2612
+ dataset_urn: Union[str, DatasetUrn],
2613
+ urn: Optional[Union[str, AssertionUrn]] = None,
2614
+ display_name: Optional[str] = None,
2615
+ enabled: Optional[bool] = None,
2616
+ statement: str,
2617
+ criteria: SqlAssertionCriteria,
2618
+ incident_behavior: Optional[
2619
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
2620
+ ] = None,
2621
+ tags: Optional[TagsInputType] = None,
2622
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
2623
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2624
+ ) -> SqlAssertion:
2625
+ """Upsert and merge a sql assertion.
2626
+
2627
+ Note: keyword arguments are required.
2628
+
2629
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
2630
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
2631
+ be updated if the input value is not None. If the input value is None, the existing value
2632
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
2633
+ empty string.
2634
+
2635
+ Schedule behavior:
2636
+ - Create case: Uses default daily schedule (\"0 0 * * *\") or provided schedule
2637
+ - Update case: Uses existing schedule or provided schedule.
2638
+
2639
+ Args:
2640
+ dataset_urn: The urn of the dataset to be monitored.
2641
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
2642
+ will be _created_ in the DataHub instance.
2643
+ display_name: The display name of the assertion. If not provided, a random display name
2644
+ will be generated.
2645
+ enabled: Whether the assertion is enabled. If not provided, the existing value
2646
+ will be preserved.
2647
+ criteria: The criteria to be used for the assertion. This is of type SqlAssertionCriteria. It has the following fields:
2648
+ - type: The type of sql assertion. Valid values are:
2649
+ - "METRIC" -> Looks at the current value of the metric.
2650
+ - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
2651
+ - change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
2652
+ - "ABSOLUTE" -> Looks at the absolute change in the metric.
2653
+ - "PERCENTAGE" -> Looks at the percentage change in the metric.
2654
+ - operator: The operator to be used for the assertion. Valid values are:
2655
+ - "GREATER_THAN" -> The metric value is greater than the threshold.
2656
+ - "LESS_THAN" -> The metric value is less than the threshold.
2657
+ - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
2658
+ - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
2659
+ - "EQUAL_TO" -> The metric value is equal to the threshold.
2660
+ - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
2661
+ - "BETWEEN" -> The metric value is between the two thresholds.
2662
+ - parameters: The parameters to be used for the assertion. This is of type SqlAssertionParameters. It has the following fields:
2663
+ - value: The value of the metric. This can be a single value or a tuple range.
2664
+ - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
2665
+ - If the operator is not "BETWEEN", the value is a single value.
2666
+ statement: The SQL statement to be used for the assertion.
2667
+ - "SELECT COUNT(*) FROM table WHERE column > 100"
2668
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
2669
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
2670
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
2671
+ tags: The tags to be applied to the assertion. Valid values are:
2672
+ - a list of strings (strings will be converted to TagUrn objects)
2673
+ - a list of TagUrn objects
2674
+ - a list of TagAssociationClass objects
2675
+ updated_by: Optional urn of the user who updated the assertion. The format is
2676
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
2677
+ The default is the datahub system user.
2678
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
2679
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
2680
+ schedule will be used. The schedule determines when the assertion will be evaluated.
2681
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
2682
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
2683
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
2684
+
2685
+ Returns:
2686
+ SqlAssertion: The created or updated assertion.
2687
+ """
2688
+ _print_experimental_warning()
2689
+ now_utc = datetime.now(timezone.utc)
2690
+
2691
+ if updated_by is None:
2692
+ logger.warning(
2693
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
2694
+ )
2695
+ updated_by = DEFAULT_CREATED_BY
2696
+
2697
+ # 1. If urn is not set, create a new assertion
2698
+ if urn is None:
2699
+ logger.info("URN is not set, creating a new assertion")
2700
+ return self._create_sql_assertion(
2701
+ dataset_urn=dataset_urn,
2702
+ display_name=display_name,
2703
+ enabled=enabled if enabled is not None else True,
2704
+ criteria=criteria,
2705
+ statement=statement,
2706
+ incident_behavior=incident_behavior,
2707
+ tags=tags,
2708
+ created_by=updated_by,
2709
+ schedule=schedule,
2710
+ )
2711
+
2712
+ # 2. If urn is set, first validate the input:
2713
+ assertion_input = _SqlAssertionInput(
2714
+ urn=urn,
2715
+ entity_client=self.client.entities,
2716
+ dataset_urn=dataset_urn,
2717
+ display_name=display_name,
2718
+ criteria=criteria,
2719
+ statement=statement,
2720
+ incident_behavior=incident_behavior,
2721
+ tags=tags,
2722
+ created_by=updated_by, # This will be overridden by the actual created_by
2723
+ created_at=now_utc, # This will be overridden by the actual created_at
2724
+ updated_by=updated_by,
2725
+ updated_at=now_utc,
2726
+ schedule=schedule,
2727
+ )
2728
+
2729
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
2730
+ # if the assertion does not exist:
2731
+ merged_assertion_input_or_created_assertion = (
2732
+ self._retrieve_and_merge_sql_assertion_and_monitor(
2733
+ assertion_input=assertion_input,
2734
+ dataset_urn=dataset_urn,
2735
+ urn=urn,
2736
+ display_name=display_name,
2737
+ enabled=enabled,
2738
+ criteria=criteria,
2739
+ statement=statement,
2740
+ incident_behavior=incident_behavior,
2741
+ tags=tags,
2742
+ updated_by=updated_by,
2743
+ now_utc=now_utc,
2744
+ schedule=schedule,
2745
+ )
2746
+ )
2747
+
2748
+ # Return early if we created a new assertion in the merge:
2749
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
2750
+ # We know this is the correct type because we passed the assertion_class parameter
2751
+ assert isinstance(merged_assertion_input_or_created_assertion, SqlAssertion)
2752
+ return merged_assertion_input_or_created_assertion
2753
+
2754
+ # 4. Upsert the assertion and monitor entities:
2755
+ assertion_entity, monitor_entity = (
2756
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
2757
+ )
2758
+ # If assertion upsert fails, we won't try to upsert the monitor
2759
+ self.client.entities.upsert(assertion_entity)
2760
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
2761
+ # try:
2762
+ self.client.entities.upsert(monitor_entity)
2763
+ # except Exception as e:
2764
+ # logger.error(f"Error upserting monitor: {e}")
2765
+ # self.client.entities.delete(assertion_entity)
2766
+ # raise e
2767
+
2768
+ return SqlAssertion._from_entities(assertion_entity, monitor_entity)
2769
+
1595
2770
 
1596
2771
  def _merge_field(
1597
2772
  input_field_value: Any,