acryl-datahub-cloud 0.3.12rc5__py3-none-any.whl → 0.3.12rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -9,6 +9,8 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
9
9
  FreshnessAssertion,
10
10
  SmartFreshnessAssertion,
11
11
  SmartVolumeAssertion,
12
+ SqlAssertion,
13
+ VolumeAssertion,
12
14
  _AssertionPublic,
13
15
  )
14
16
  from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
@@ -36,6 +38,18 @@ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input
36
38
  ValueTypeInputType,
37
39
  _SmartColumnMetricAssertionInput,
38
40
  )
41
+ from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
42
+ SqlAssertionCriteria,
43
+ _SqlAssertionInput,
44
+ )
45
+ from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
46
+ RowCountTotal,
47
+ VolumeAssertionDefinition,
48
+ VolumeAssertionDefinitionInputTypes,
49
+ VolumeAssertionOperator,
50
+ _VolumeAssertionDefinitionTypes,
51
+ _VolumeAssertionInput,
52
+ )
39
53
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
40
54
  from acryl_datahub_cloud.sdk.entities.monitor import Monitor
41
55
  from acryl_datahub_cloud.sdk.errors import SDKUsageError
@@ -489,6 +503,194 @@ class AssertionsClient:
489
503
 
490
504
  return merged_assertion_input
491
505
 
506
+ def _retrieve_and_merge_native_volume_assertion_and_monitor(
507
+ self,
508
+ assertion_input: _VolumeAssertionInput,
509
+ dataset_urn: Union[str, DatasetUrn],
510
+ urn: Union[str, AssertionUrn],
511
+ display_name: Optional[str],
512
+ enabled: Optional[bool],
513
+ detection_mechanism: DetectionMechanismInputTypes,
514
+ incident_behavior: Optional[
515
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
516
+ ],
517
+ tags: Optional[TagsInputType],
518
+ updated_by: Optional[Union[str, CorpUserUrn]],
519
+ now_utc: datetime,
520
+ schedule: Optional[Union[str, models.CronScheduleClass]],
521
+ definition: Optional[VolumeAssertionDefinitionInputTypes],
522
+ use_backend_definition: bool = False,
523
+ ) -> Union[VolumeAssertion, _VolumeAssertionInput]:
524
+ # 1. Retrieve any existing assertion and monitor entities:
525
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
526
+ self._retrieve_assertion_and_monitor(assertion_input)
527
+ )
528
+
529
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
530
+ if maybe_assertion_entity and maybe_monitor_entity:
531
+ existing_assertion = VolumeAssertion._from_entities(
532
+ maybe_assertion_entity, maybe_monitor_entity
533
+ )
534
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
535
+ elif maybe_assertion_entity and not maybe_monitor_entity:
536
+ monitor_mode = (
537
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
538
+ )
539
+ existing_assertion = VolumeAssertion._from_entities(
540
+ maybe_assertion_entity,
541
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
542
+ )
543
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
544
+ elif not maybe_assertion_entity:
545
+ if use_backend_definition:
546
+ raise SDKUsageError(
547
+ f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
548
+ )
549
+ logger.info(
550
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
551
+ )
552
+ return self._create_volume_assertion(
553
+ dataset_urn=dataset_urn,
554
+ display_name=display_name,
555
+ detection_mechanism=detection_mechanism,
556
+ incident_behavior=incident_behavior,
557
+ tags=tags,
558
+ created_by=updated_by,
559
+ schedule=schedule,
560
+ definition=definition,
561
+ )
562
+
563
+ # 3. Check for any issues e.g. different dataset urns
564
+ if (
565
+ existing_assertion
566
+ and hasattr(existing_assertion, "dataset_urn")
567
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
568
+ ):
569
+ raise SDKUsageError(
570
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
571
+ )
572
+
573
+ # 4. Handle definition: use backend definition if flag is set and backend has one
574
+ if use_backend_definition:
575
+ if maybe_assertion_entity is not None:
576
+ # Use definition from backend
577
+ backend_definition = VolumeAssertionDefinition.from_assertion(
578
+ maybe_assertion_entity
579
+ )
580
+ # Update the assertion_input with the real definition from backend
581
+ assertion_input.definition = backend_definition
582
+ effective_definition = backend_definition
583
+ logger.info("Using definition from backend assertion")
584
+ else:
585
+ # No backend assertion and no user-provided definition - this is an error
586
+ raise SDKUsageError(
587
+ f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
588
+ )
589
+ else:
590
+ # Use the already-parsed definition from assertion_input
591
+ effective_definition = assertion_input.definition
592
+
593
+ # 5. Merge the existing assertion with the validated input:
594
+ merged_assertion_input = self._merge_volume_input(
595
+ dataset_urn=dataset_urn,
596
+ urn=urn,
597
+ display_name=display_name,
598
+ enabled=enabled,
599
+ detection_mechanism=detection_mechanism,
600
+ incident_behavior=incident_behavior,
601
+ tags=tags,
602
+ now_utc=now_utc,
603
+ assertion_input=assertion_input,
604
+ maybe_assertion_entity=maybe_assertion_entity,
605
+ maybe_monitor_entity=maybe_monitor_entity,
606
+ existing_assertion=existing_assertion,
607
+ schedule=schedule,
608
+ definition=effective_definition,
609
+ )
610
+
611
+ return merged_assertion_input
612
+
613
+ def _retrieve_and_merge_sql_assertion_and_monitor(
614
+ self,
615
+ assertion_input: _SqlAssertionInput,
616
+ dataset_urn: Union[str, DatasetUrn],
617
+ urn: Union[str, AssertionUrn],
618
+ display_name: Optional[str],
619
+ enabled: Optional[bool],
620
+ criteria: SqlAssertionCriteria,
621
+ statement: str,
622
+ incident_behavior: Optional[
623
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
624
+ ],
625
+ tags: Optional[TagsInputType],
626
+ updated_by: Optional[Union[str, CorpUserUrn]],
627
+ now_utc: datetime,
628
+ schedule: Optional[Union[str, models.CronScheduleClass]],
629
+ ) -> Union[SqlAssertion, _SqlAssertionInput]:
630
+ # 1. Retrieve any existing assertion and monitor entities:
631
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
632
+ self._retrieve_assertion_and_monitor(assertion_input)
633
+ )
634
+
635
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
636
+ if maybe_assertion_entity and maybe_monitor_entity:
637
+ existing_assertion = SqlAssertion._from_entities(
638
+ maybe_assertion_entity, maybe_monitor_entity
639
+ )
640
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
641
+ elif maybe_assertion_entity and not maybe_monitor_entity:
642
+ monitor_mode = (
643
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
644
+ )
645
+ existing_assertion = SqlAssertion._from_entities(
646
+ maybe_assertion_entity,
647
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
648
+ )
649
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
650
+ elif not maybe_assertion_entity:
651
+ logger.info(
652
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
653
+ )
654
+ return self._create_sql_assertion(
655
+ dataset_urn=dataset_urn,
656
+ display_name=display_name,
657
+ criteria=criteria,
658
+ statement=statement,
659
+ incident_behavior=incident_behavior,
660
+ tags=tags,
661
+ created_by=updated_by,
662
+ schedule=schedule,
663
+ )
664
+
665
+ # 3. Check for any issues e.g. different dataset urns
666
+ if (
667
+ existing_assertion
668
+ and hasattr(existing_assertion, "dataset_urn")
669
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
670
+ ):
671
+ raise SDKUsageError(
672
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
673
+ )
674
+
675
+ # 4. Merge the existing assertion with the validated input:
676
+ merged_assertion_input = self._merge_sql_input(
677
+ dataset_urn=dataset_urn,
678
+ urn=urn,
679
+ display_name=display_name,
680
+ enabled=enabled,
681
+ incident_behavior=incident_behavior,
682
+ tags=tags,
683
+ now_utc=now_utc,
684
+ assertion_input=assertion_input,
685
+ maybe_assertion_entity=maybe_assertion_entity,
686
+ existing_assertion=existing_assertion,
687
+ schedule=schedule,
688
+ criteria=criteria,
689
+ statement=statement,
690
+ )
691
+
692
+ return merged_assertion_input
693
+
492
694
  def _retrieve_assertion_and_monitor(
493
695
  self,
494
696
  assertion_input: _AssertionInput,
@@ -781,27 +983,25 @@ class AssertionsClient:
781
983
  )
782
984
  return merged_assertion_input
783
985
 
784
- def _merge_smart_volume_input(
986
+ def _merge_volume_input(
785
987
  self,
786
988
  dataset_urn: Union[str, DatasetUrn],
787
989
  urn: Union[str, AssertionUrn],
788
990
  display_name: Optional[str],
789
991
  enabled: Optional[bool],
790
992
  detection_mechanism: DetectionMechanismInputTypes,
791
- sensitivity: Optional[Union[str, InferenceSensitivity]],
792
- exclusion_windows: Optional[ExclusionWindowInputTypes],
793
- training_data_lookback_days: Optional[int],
794
993
  incident_behavior: Optional[
795
994
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
796
995
  ],
797
996
  tags: Optional[TagsInputType],
798
- schedule: Optional[Union[str, models.CronScheduleClass]],
799
997
  now_utc: datetime,
800
- assertion_input: _SmartVolumeAssertionInput,
998
+ assertion_input: _VolumeAssertionInput,
801
999
  maybe_assertion_entity: Optional[Assertion],
802
1000
  maybe_monitor_entity: Optional[Monitor],
803
- existing_assertion: SmartVolumeAssertion,
804
- ) -> _SmartVolumeAssertionInput:
1001
+ existing_assertion: VolumeAssertion,
1002
+ schedule: Optional[Union[str, models.CronScheduleClass]],
1003
+ definition: Optional[_VolumeAssertionDefinitionTypes],
1004
+ ) -> _VolumeAssertionInput:
805
1005
  """Merge the input with the existing assertion and monitor entities.
806
1006
 
807
1007
  Args:
@@ -810,9 +1010,6 @@ class AssertionsClient:
810
1010
  display_name: The display name of the assertion.
811
1011
  enabled: Whether the assertion is enabled.
812
1012
  detection_mechanism: The detection mechanism to be used for the assertion.
813
- sensitivity: The sensitivity to be applied to the assertion.
814
- exclusion_windows: The exclusion windows to be applied to the assertion.
815
- training_data_lookback_days: The training data lookback days to be applied to the assertion.
816
1013
  incident_behavior: The incident behavior to be applied to the assertion.
817
1014
  tags: The tags to be applied to the assertion.
818
1015
  now_utc: The current UTC time from when the function is called.
@@ -820,11 +1017,13 @@ class AssertionsClient:
820
1017
  maybe_assertion_entity: The existing assertion entity from the DataHub instance.
821
1018
  maybe_monitor_entity: The existing monitor entity from the DataHub instance.
822
1019
  existing_assertion: The existing assertion from the DataHub instance.
1020
+ schedule: The schedule to be applied to the assertion.
1021
+ definition: The volume assertion definition to be applied to the assertion.
823
1022
 
824
1023
  Returns:
825
1024
  The merged assertion input.
826
1025
  """
827
- merged_assertion_input = _SmartVolumeAssertionInput(
1026
+ merged_assertion_input = _VolumeAssertionInput(
828
1027
  urn=urn,
829
1028
  entity_client=self.client.entities,
830
1029
  dataset_urn=dataset_urn,
@@ -856,43 +1055,262 @@ class AssertionsClient:
856
1055
  "detection_mechanism",
857
1056
  assertion_input,
858
1057
  existing_assertion,
859
- SmartVolumeAssertion._get_detection_mechanism(
1058
+ VolumeAssertion._get_detection_mechanism(
860
1059
  maybe_assertion_entity, maybe_monitor_entity, default=None
861
1060
  )
862
1061
  if maybe_assertion_entity and maybe_monitor_entity
863
1062
  else None,
864
1063
  ),
865
- sensitivity=_merge_field(
866
- sensitivity,
867
- "sensitivity",
1064
+ incident_behavior=_merge_field(
1065
+ incident_behavior,
1066
+ "incident_behavior",
868
1067
  assertion_input,
869
1068
  existing_assertion,
870
- maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
1069
+ VolumeAssertion._get_incident_behavior(maybe_assertion_entity)
1070
+ if maybe_assertion_entity
1071
+ else None,
871
1072
  ),
872
- exclusion_windows=_merge_field(
873
- exclusion_windows,
874
- "exclusion_windows",
1073
+ tags=_merge_field(
1074
+ tags,
1075
+ "tags",
875
1076
  assertion_input,
876
1077
  existing_assertion,
877
- maybe_monitor_entity.exclusion_windows
878
- if maybe_monitor_entity
879
- else None,
1078
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
880
1079
  ),
881
- training_data_lookback_days=_merge_field(
882
- training_data_lookback_days,
883
- "training_data_lookback_days",
1080
+ definition=_merge_field(
1081
+ definition,
1082
+ "definition",
884
1083
  assertion_input,
885
1084
  existing_assertion,
886
- maybe_monitor_entity.training_data_lookback_days
887
- if maybe_monitor_entity
1085
+ existing_assertion.definition if existing_assertion else None,
1086
+ ),
1087
+ created_by=existing_assertion.created_by
1088
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
1089
+ created_at=existing_assertion.created_at
1090
+ or now_utc, # Override with the existing assertion's created_at or now if not set
1091
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
1092
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
1093
+ )
1094
+ return merged_assertion_input
1095
+
1096
+ def _merge_sql_input(
1097
+ self,
1098
+ dataset_urn: Union[str, DatasetUrn],
1099
+ urn: Union[str, AssertionUrn],
1100
+ display_name: Optional[str],
1101
+ enabled: Optional[bool],
1102
+ criteria: SqlAssertionCriteria,
1103
+ statement: str,
1104
+ incident_behavior: Optional[
1105
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1106
+ ],
1107
+ tags: Optional[TagsInputType],
1108
+ now_utc: datetime,
1109
+ assertion_input: _SqlAssertionInput,
1110
+ maybe_assertion_entity: Optional[Assertion],
1111
+ # not used: maybe_monitor_entity: Optional[Monitor], as schedule is already set in existing_assertion
1112
+ existing_assertion: SqlAssertion,
1113
+ schedule: Optional[Union[str, models.CronScheduleClass]],
1114
+ ) -> _SqlAssertionInput:
1115
+ """Merge the input with the existing assertion and monitor entities.
1116
+
1117
+ Args:
1118
+ dataset_urn: The urn of the dataset to be monitored.
1119
+ urn: The urn of the assertion.
1120
+ display_name: The display name of the assertion.
1121
+ enabled: Whether the assertion is enabled.
1122
+ criteria: The criteria of the assertion.
1123
+ statement: The statement of the assertion.
1124
+ incident_behavior: The incident behavior to be applied to the assertion.
1125
+ tags: The tags to be applied to the assertion.
1126
+ now_utc: The current UTC time from when the function is called.
1127
+ assertion_input: The validated input to the function.
1128
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
1129
+ existing_assertion: The existing assertion from the DataHub instance.
1130
+ schedule: The schedule to be applied to the assertion.
1131
+
1132
+ Returns:
1133
+ The merged assertion input.
1134
+ """
1135
+ merged_assertion_input = _SqlAssertionInput(
1136
+ urn=urn,
1137
+ entity_client=self.client.entities,
1138
+ dataset_urn=dataset_urn,
1139
+ display_name=_merge_field(
1140
+ display_name,
1141
+ "display_name",
1142
+ assertion_input,
1143
+ existing_assertion,
1144
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
1145
+ ),
1146
+ enabled=_merge_field(
1147
+ enabled,
1148
+ "enabled",
1149
+ assertion_input,
1150
+ existing_assertion,
1151
+ existing_assertion.mode == AssertionMode.ACTIVE
1152
+ if existing_assertion
888
1153
  else None,
889
1154
  ),
1155
+ schedule=_merge_field(
1156
+ schedule,
1157
+ "schedule",
1158
+ assertion_input,
1159
+ existing_assertion,
1160
+ # TODO should this use maybe_monitor_entity.schedule?
1161
+ existing_assertion.schedule if existing_assertion else None,
1162
+ ),
1163
+ criteria=_merge_field(
1164
+ criteria,
1165
+ "criteria",
1166
+ assertion_input,
1167
+ existing_assertion,
1168
+ existing_assertion.criteria if existing_assertion else None,
1169
+ ),
1170
+ statement=_merge_field(
1171
+ statement,
1172
+ "statement",
1173
+ assertion_input,
1174
+ existing_assertion,
1175
+ existing_assertion.statement if existing_assertion else None,
1176
+ ),
890
1177
  incident_behavior=_merge_field(
891
1178
  incident_behavior,
892
1179
  "incident_behavior",
893
1180
  assertion_input,
894
1181
  existing_assertion,
895
- SmartVolumeAssertion._get_incident_behavior(maybe_assertion_entity)
1182
+ SqlAssertion._get_incident_behavior(maybe_assertion_entity)
1183
+ if maybe_assertion_entity
1184
+ else None,
1185
+ ),
1186
+ tags=_merge_field(
1187
+ tags,
1188
+ "tags",
1189
+ assertion_input,
1190
+ existing_assertion,
1191
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
1192
+ ),
1193
+ created_by=existing_assertion.created_by
1194
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
1195
+ created_at=existing_assertion.created_at
1196
+ or now_utc, # Override with the existing assertion's created_at or now if not set
1197
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
1198
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
1199
+ )
1200
+ return merged_assertion_input
1201
+
1202
+ def _merge_smart_volume_input(
1203
+ self,
1204
+ dataset_urn: Union[str, DatasetUrn],
1205
+ urn: Union[str, AssertionUrn],
1206
+ display_name: Optional[str],
1207
+ enabled: Optional[bool],
1208
+ detection_mechanism: DetectionMechanismInputTypes,
1209
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
1210
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
1211
+ training_data_lookback_days: Optional[int],
1212
+ incident_behavior: Optional[
1213
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1214
+ ],
1215
+ tags: Optional[TagsInputType],
1216
+ schedule: Optional[Union[str, models.CronScheduleClass]],
1217
+ now_utc: datetime,
1218
+ assertion_input: _SmartVolumeAssertionInput,
1219
+ maybe_assertion_entity: Optional[Assertion],
1220
+ maybe_monitor_entity: Optional[Monitor],
1221
+ existing_assertion: SmartVolumeAssertion,
1222
+ ) -> _SmartVolumeAssertionInput:
1223
+ """Merge the input with the existing assertion and monitor entities.
1224
+
1225
+ Args:
1226
+ dataset_urn: The urn of the dataset to be monitored.
1227
+ urn: The urn of the assertion.
1228
+ display_name: The display name of the assertion.
1229
+ enabled: Whether the assertion is enabled.
1230
+ detection_mechanism: The detection mechanism to be used for the assertion.
1231
+ sensitivity: The sensitivity to be applied to the assertion.
1232
+ exclusion_windows: The exclusion windows to be applied to the assertion.
1233
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
1234
+ incident_behavior: The incident behavior to be applied to the assertion.
1235
+ tags: The tags to be applied to the assertion.
1236
+ now_utc: The current UTC time from when the function is called.
1237
+ assertion_input: The validated input to the function.
1238
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
1239
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
1240
+ existing_assertion: The existing assertion from the DataHub instance.
1241
+
1242
+ Returns:
1243
+ The merged assertion input.
1244
+ """
1245
+ merged_assertion_input = _SmartVolumeAssertionInput(
1246
+ urn=urn,
1247
+ entity_client=self.client.entities,
1248
+ dataset_urn=dataset_urn,
1249
+ display_name=_merge_field(
1250
+ display_name,
1251
+ "display_name",
1252
+ assertion_input,
1253
+ existing_assertion,
1254
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
1255
+ ),
1256
+ enabled=_merge_field(
1257
+ enabled,
1258
+ "enabled",
1259
+ assertion_input,
1260
+ existing_assertion,
1261
+ existing_assertion.mode == AssertionMode.ACTIVE
1262
+ if existing_assertion
1263
+ else None,
1264
+ ),
1265
+ schedule=_merge_field(
1266
+ schedule,
1267
+ "schedule",
1268
+ assertion_input,
1269
+ existing_assertion,
1270
+ existing_assertion.schedule if existing_assertion else None,
1271
+ ),
1272
+ detection_mechanism=_merge_field(
1273
+ detection_mechanism,
1274
+ "detection_mechanism",
1275
+ assertion_input,
1276
+ existing_assertion,
1277
+ SmartVolumeAssertion._get_detection_mechanism(
1278
+ maybe_assertion_entity, maybe_monitor_entity, default=None
1279
+ )
1280
+ if maybe_assertion_entity and maybe_monitor_entity
1281
+ else None,
1282
+ ),
1283
+ sensitivity=_merge_field(
1284
+ sensitivity,
1285
+ "sensitivity",
1286
+ assertion_input,
1287
+ existing_assertion,
1288
+ maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
1289
+ ),
1290
+ exclusion_windows=_merge_field(
1291
+ exclusion_windows,
1292
+ "exclusion_windows",
1293
+ assertion_input,
1294
+ existing_assertion,
1295
+ maybe_monitor_entity.exclusion_windows
1296
+ if maybe_monitor_entity
1297
+ else None,
1298
+ ),
1299
+ training_data_lookback_days=_merge_field(
1300
+ training_data_lookback_days,
1301
+ "training_data_lookback_days",
1302
+ assertion_input,
1303
+ existing_assertion,
1304
+ maybe_monitor_entity.training_data_lookback_days
1305
+ if maybe_monitor_entity
1306
+ else None,
1307
+ ),
1308
+ incident_behavior=_merge_field(
1309
+ incident_behavior,
1310
+ "incident_behavior",
1311
+ assertion_input,
1312
+ existing_assertion,
1313
+ SmartVolumeAssertion._get_incident_behavior(maybe_assertion_entity)
896
1314
  if maybe_assertion_entity
897
1315
  else None,
898
1316
  ),
@@ -1252,46 +1670,32 @@ class AssertionsClient:
1252
1670
  # raise e
1253
1671
  return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1254
1672
 
1255
- def sync_smart_volume_assertion(
1673
+ def _create_volume_assertion(
1256
1674
  self,
1257
1675
  *,
1258
1676
  dataset_urn: Union[str, DatasetUrn],
1259
- urn: Optional[Union[str, AssertionUrn]] = None,
1260
1677
  display_name: Optional[str] = None,
1261
- enabled: Optional[bool] = None,
1678
+ enabled: bool = True,
1262
1679
  detection_mechanism: DetectionMechanismInputTypes = None,
1263
- sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1264
- exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1265
- training_data_lookback_days: Optional[int] = None,
1266
1680
  incident_behavior: Optional[
1267
1681
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1268
1682
  ] = None,
1269
1683
  tags: Optional[TagsInputType] = None,
1270
- updated_by: Optional[Union[str, CorpUserUrn]] = None,
1684
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1271
1685
  schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1272
- ) -> SmartVolumeAssertion:
1273
- """Upsert and merge a smart volume assertion.
1686
+ definition: Optional[VolumeAssertionDefinitionInputTypes] = None,
1687
+ ) -> VolumeAssertion:
1688
+ """Create a volume assertion.
1274
1689
 
1275
1690
  Note: keyword arguments are required.
1276
1691
 
1277
- Upsert and merge is a combination of create and update. If the assertion does not exist,
1278
- it will be created. If it does exist, it will be updated. Existing assertion fields will
1279
- be updated if the input value is not None. If the input value is None, the existing value
1280
- will be preserved. If the input value can be un-set e.g. by passing an empty list or
1281
- empty string.
1282
-
1283
- Schedule behavior:
1284
- - Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
1285
- - Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
1692
+ The created assertion will use the default daily schedule ("0 0 * * *").
1286
1693
 
1287
1694
  Args:
1288
1695
  dataset_urn: The urn of the dataset to be monitored.
1289
- urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
1290
- will be _created_ in the DataHub instance.
1291
- display_name: The display name of the assertion. If not provided, a random display name
1292
- will be generated.
1293
- enabled: Whether the assertion is enabled. If not provided, the existing value
1294
- will be preserved.
1696
+ display_name: The display name of the assertion. If not provided, a random display
1697
+ name will be generated.
1698
+ enabled: Whether the assertion is enabled. Defaults to True.
1295
1699
  detection_mechanism: The detection mechanism to be used for the assertion. Information
1296
1700
  schema is recommended. Valid values are:
1297
1701
  - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
@@ -1302,33 +1706,7 @@ class AssertionsClient:
1302
1706
  "additional_filter": "last_modified > '2021-01-01'",
1303
1707
  } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1304
1708
  additional_filter='last_modified > 2021-01-01')
1305
- - {
1306
- "type": "high_watermark_column",
1307
- "column_name": "id",
1308
- "additional_filter": "id > 1000",
1309
- } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1310
- additional_filter='id > 1000')
1311
1709
  - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1312
- sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1313
- - "low" or InferenceSensitivity.LOW
1314
- - "medium" or InferenceSensitivity.MEDIUM
1315
- - "high" or InferenceSensitivity.HIGH
1316
- exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1317
- fixed range exclusion windows are supported. Valid values are:
1318
- - from datetime.datetime objects: {
1319
- "start": "datetime(2025, 1, 1, 0, 0, 0)",
1320
- "end": "datetime(2025, 1, 2, 0, 0, 0)",
1321
- }
1322
- - from string datetimes: {
1323
- "start": "2025-01-01T00:00:00",
1324
- "end": "2025-01-02T00:00:00",
1325
- }
1326
- - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1327
- start=datetime(2025, 1, 1, 0, 0, 0),
1328
- end=datetime(2025, 1, 2, 0, 0, 0)
1329
- )
1330
- training_data_lookback_days: The training data lookback days to be applied to the
1331
- assertion as an integer.
1332
1710
  incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1333
1711
  - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1334
1712
  - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
@@ -1336,7 +1714,7 @@ class AssertionsClient:
1336
1714
  - a list of strings (strings will be converted to TagUrn objects)
1337
1715
  - a list of TagUrn objects
1338
1716
  - a list of TagAssociationClass objects
1339
- updated_by: Optional urn of the user who updated the assertion. The format is
1717
+ created_by: Optional urn of the user who created the assertion. The format is
1340
1718
  "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1341
1719
  The default is the datahub system user.
1342
1720
  TODO: Retrieve the SDK user as the default instead of the datahub system user.
@@ -1345,80 +1723,330 @@ class AssertionsClient:
1345
1723
  The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1346
1724
  Alternatively, a models.CronScheduleClass object can be provided with string parameters
1347
1725
  cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1726
+ definition: The volume assertion definition. Must be provided and include type, operator,
1727
+ and parameters. Can be provided as:
1728
+ - A typed volume assertion object (RowCountTotal or RowCountChange)
1729
+ - A dictionary with keys: type, operator, parameters (and kind for row_count_change)
1730
+
1731
+ Example dictionary for row count total:
1732
+ {
1733
+ "type": "row_count_total",
1734
+ "operator": "GREATER_THAN_OR_EQUAL_TO",
1735
+ "parameters": 100
1736
+ }
1737
+
1738
+ Example dictionary for row count change:
1739
+ {
1740
+ "type": "row_count_change",
1741
+ "kind": "percent",
1742
+ "operator": "BETWEEN",
1743
+ "parameters": (10, 50)
1744
+ }
1348
1745
 
1349
1746
  Returns:
1350
- SmartVolumeAssertion: The created or updated assertion.
1747
+ VolumeAssertion: The created assertion.
1351
1748
  """
1352
1749
  _print_experimental_warning()
1353
1750
  now_utc = datetime.now(timezone.utc)
1354
-
1355
- if updated_by is None:
1751
+ if created_by is None:
1356
1752
  logger.warning(
1357
- f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1358
- )
1359
- updated_by = DEFAULT_CREATED_BY
1360
-
1361
- # 1. If urn is not set, create a new assertion
1362
- if urn is None:
1363
- logger.info("URN is not set, creating a new assertion")
1364
- return self._create_smart_volume_assertion(
1365
- dataset_urn=dataset_urn,
1366
- display_name=display_name,
1367
- enabled=enabled if enabled is not None else True,
1368
- detection_mechanism=detection_mechanism,
1369
- sensitivity=sensitivity,
1370
- exclusion_windows=exclusion_windows,
1371
- training_data_lookback_days=training_data_lookback_days,
1372
- incident_behavior=incident_behavior,
1373
- tags=tags,
1374
- created_by=updated_by,
1375
- schedule=schedule,
1753
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1376
1754
  )
1377
-
1378
- # 2. If urn is set, first validate the input:
1379
- assertion_input = _SmartVolumeAssertionInput(
1380
- urn=urn,
1755
+ created_by = DEFAULT_CREATED_BY
1756
+ assertion_input = _VolumeAssertionInput(
1757
+ urn=None,
1381
1758
  entity_client=self.client.entities,
1382
1759
  dataset_urn=dataset_urn,
1383
1760
  display_name=display_name,
1761
+ enabled=enabled,
1384
1762
  detection_mechanism=detection_mechanism,
1385
- sensitivity=sensitivity,
1386
- exclusion_windows=exclusion_windows,
1387
- training_data_lookback_days=training_data_lookback_days,
1388
1763
  incident_behavior=incident_behavior,
1389
1764
  tags=tags,
1390
- created_by=updated_by, # This will be overridden by the actual created_by
1391
- created_at=now_utc, # This will be overridden by the actual created_at
1392
- updated_by=updated_by,
1765
+ created_by=created_by,
1766
+ created_at=now_utc,
1767
+ updated_by=created_by,
1393
1768
  updated_at=now_utc,
1394
1769
  schedule=schedule,
1770
+ definition=definition,
1395
1771
  )
1396
-
1397
- # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
1398
- # if the assertion does not exist:
1399
- merged_assertion_input_or_created_assertion = (
1400
- self._retrieve_and_merge_volume_assertion_and_monitor(
1401
- assertion_input=assertion_input,
1402
- dataset_urn=dataset_urn,
1403
- urn=urn,
1404
- display_name=display_name,
1405
- enabled=enabled,
1406
- detection_mechanism=detection_mechanism,
1407
- sensitivity=sensitivity,
1408
- exclusion_windows=exclusion_windows,
1409
- training_data_lookback_days=training_data_lookback_days,
1410
- incident_behavior=incident_behavior,
1411
- tags=tags,
1412
- updated_by=updated_by,
1413
- now_utc=now_utc,
1414
- schedule=schedule,
1415
- )
1772
+ assertion_entity, monitor_entity = (
1773
+ assertion_input.to_assertion_and_monitor_entities()
1416
1774
  )
1417
-
1418
- # Return early if we created a new assertion in the merge:
1419
- if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
1420
- # We know this is the correct type because we passed the assertion_class parameter
1421
- assert isinstance(
1775
+ # If assertion creation fails, we won't try to create the monitor
1776
+ self.client.entities.create(assertion_entity)
1777
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1778
+ # try:
1779
+ self.client.entities.create(monitor_entity)
1780
+ # except Exception as e:
1781
+ # logger.error(f"Error creating monitor: {e}")
1782
+ # self.client.entities.delete(assertion_entity)
1783
+ # raise e
1784
+ return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
1785
+
1786
+ def _create_sql_assertion(
1787
+ self,
1788
+ *,
1789
+ dataset_urn: Union[str, DatasetUrn],
1790
+ display_name: Optional[str] = None,
1791
+ enabled: bool = True,
1792
+ criteria: SqlAssertionCriteria,
1793
+ statement: str,
1794
+ incident_behavior: Optional[
1795
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1796
+ ],
1797
+ tags: Optional[TagsInputType],
1798
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1799
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1800
+ ) -> SqlAssertion:
1801
+ """Create a sql assertion.
1802
+
1803
+ Args:
1804
+ dataset_urn: The urn of the dataset to be monitored.
1805
+ display_name: The display name of the assertion. If not provided, a random display
1806
+ name will be generated.
1807
+ enabled: Whether the assertion is enabled. Defaults to True.
1808
+ criteria: The criteria to be used for the assertion. This is of type SqlAssertionCriteria. It has the following fields:
1809
+ - type: The type of sql assertion. Valid values are:
1810
+ - "METRIC" -> Looks at the current value of the metric.
1811
+ - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
1812
+ - change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
1813
+ - "ABSOLUTE" -> Looks at the absolute change in the metric.
1814
+ - "PERCENTAGE" -> Looks at the percentage change in the metric.
1815
+ - operator: The operator to be used for the assertion. Valid values are:
1816
+ - "GREATER_THAN" -> The metric value is greater than the threshold.
1817
+ - "LESS_THAN" -> The metric value is less than the threshold.
1818
+ - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
1819
+ - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
1820
+ - "EQUAL_TO" -> The metric value is equal to the threshold.
1821
+ - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
1822
+ - "BETWEEN" -> The metric value is between the two thresholds.
1823
+ - parameters: The parameters to be used for the assertion. This is of type SqlAssertionParameters. It has the following fields:
1824
+ - value: The value of the metric. This can be a single value or a tuple range.
1825
+ - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
1826
+ - If the operator is not "BETWEEN", the value is a single value.
1827
+ statement: The statement to be used for the assertion.
1828
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1829
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1830
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1831
+ tags: The tags to be applied to the assertion. Valid values are:
1832
+ - a list of strings (strings will be converted to TagUrn objects)
1833
+ - a list of TagUrn objects
1834
+ - a list of TagAssociationClass objects
1835
+ created_by: Optional urn of the user who created the assertion. The format is
1836
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1837
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1838
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1839
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1840
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1841
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1842
+
1843
+ Returns:
1844
+ SqlAssertion: The created assertion.
1845
+ """
1846
+ _print_experimental_warning()
1847
+ now_utc = datetime.now(timezone.utc)
1848
+ if created_by is None:
1849
+ logger.warning(
1850
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1851
+ )
1852
+ created_by = DEFAULT_CREATED_BY
1853
+ assertion_input = _SqlAssertionInput(
1854
+ urn=None,
1855
+ entity_client=self.client.entities,
1856
+ dataset_urn=dataset_urn,
1857
+ display_name=display_name,
1858
+ enabled=enabled,
1859
+ criteria=criteria,
1860
+ statement=statement,
1861
+ incident_behavior=incident_behavior,
1862
+ tags=tags,
1863
+ created_by=created_by,
1864
+ created_at=now_utc,
1865
+ updated_by=created_by,
1866
+ updated_at=now_utc,
1867
+ schedule=schedule,
1868
+ )
1869
+ assertion_entity, monitor_entity = (
1870
+ assertion_input.to_assertion_and_monitor_entities()
1871
+ )
1872
+ # If assertion creation fails, we won't try to create the monitor
1873
+ self.client.entities.create(assertion_entity)
1874
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1875
+ # try:
1876
+ self.client.entities.create(monitor_entity)
1877
+ # except Exception as e:
1878
+ # logger.error(f"Error creating monitor: {e}")
1879
+ # self.client.entities.delete(assertion_entity)
1880
+ # raise e
1881
+ return SqlAssertion._from_entities(assertion_entity, monitor_entity)
1882
+
1883
+ def sync_smart_volume_assertion(
1884
+ self,
1885
+ *,
1886
+ dataset_urn: Union[str, DatasetUrn],
1887
+ urn: Optional[Union[str, AssertionUrn]] = None,
1888
+ display_name: Optional[str] = None,
1889
+ enabled: Optional[bool] = None,
1890
+ detection_mechanism: DetectionMechanismInputTypes = None,
1891
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1892
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1893
+ training_data_lookback_days: Optional[int] = None,
1894
+ incident_behavior: Optional[
1895
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1896
+ ] = None,
1897
+ tags: Optional[TagsInputType] = None,
1898
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
1899
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1900
+ ) -> SmartVolumeAssertion:
1901
+ """Upsert and merge a smart volume assertion.
1902
+
1903
+ Note: keyword arguments are required.
1904
+
1905
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
1906
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
1907
+ be updated if the input value is not None. If the input value is None, the existing value
1908
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
1909
+ empty string.
1910
+
1911
+ Schedule behavior:
1912
+ - Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
1913
+ - Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
1914
+
1915
+ Args:
1916
+ dataset_urn: The urn of the dataset to be monitored.
1917
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
1918
+ will be _created_ in the DataHub instance.
1919
+ display_name: The display name of the assertion. If not provided, a random display name
1920
+ will be generated.
1921
+ enabled: Whether the assertion is enabled. If not provided, the existing value
1922
+ will be preserved.
1923
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1924
+ schema is recommended. Valid values are:
1925
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1926
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1927
+ - {
1928
+ "type": "last_modified_column",
1929
+ "column_name": "last_modified",
1930
+ "additional_filter": "last_modified > '2021-01-01'",
1931
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1932
+ additional_filter='last_modified > 2021-01-01')
1933
+ - {
1934
+ "type": "high_watermark_column",
1935
+ "column_name": "id",
1936
+ "additional_filter": "id > 1000",
1937
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1938
+ additional_filter='id > 1000')
1939
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1940
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1941
+ - "low" or InferenceSensitivity.LOW
1942
+ - "medium" or InferenceSensitivity.MEDIUM
1943
+ - "high" or InferenceSensitivity.HIGH
1944
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1945
+ fixed range exclusion windows are supported. Valid values are:
1946
+ - from datetime.datetime objects: {
1947
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1948
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1949
+ }
1950
+ - from string datetimes: {
1951
+ "start": "2025-01-01T00:00:00",
1952
+ "end": "2025-01-02T00:00:00",
1953
+ }
1954
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1955
+ start=datetime(2025, 1, 1, 0, 0, 0),
1956
+ end=datetime(2025, 1, 2, 0, 0, 0)
1957
+ )
1958
+ training_data_lookback_days: The training data lookback days to be applied to the
1959
+ assertion as an integer.
1960
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1961
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1962
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1963
+ tags: The tags to be applied to the assertion. Valid values are:
1964
+ - a list of strings (strings will be converted to TagUrn objects)
1965
+ - a list of TagUrn objects
1966
+ - a list of TagAssociationClass objects
1967
+ updated_by: Optional urn of the user who updated the assertion. The format is
1968
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1969
+ The default is the datahub system user.
1970
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1971
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1972
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1973
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1974
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1975
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1976
+
1977
+ Returns:
1978
+ SmartVolumeAssertion: The created or updated assertion.
1979
+ """
1980
+ _print_experimental_warning()
1981
+ now_utc = datetime.now(timezone.utc)
1982
+
1983
+ if updated_by is None:
1984
+ logger.warning(
1985
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1986
+ )
1987
+ updated_by = DEFAULT_CREATED_BY
1988
+
1989
+ # 1. If urn is not set, create a new assertion
1990
+ if urn is None:
1991
+ logger.info("URN is not set, creating a new assertion")
1992
+ return self._create_smart_volume_assertion(
1993
+ dataset_urn=dataset_urn,
1994
+ display_name=display_name,
1995
+ enabled=enabled if enabled is not None else True,
1996
+ detection_mechanism=detection_mechanism,
1997
+ sensitivity=sensitivity,
1998
+ exclusion_windows=exclusion_windows,
1999
+ training_data_lookback_days=training_data_lookback_days,
2000
+ incident_behavior=incident_behavior,
2001
+ tags=tags,
2002
+ created_by=updated_by,
2003
+ schedule=schedule,
2004
+ )
2005
+
2006
+ # 2. If urn is set, first validate the input:
2007
+ assertion_input = _SmartVolumeAssertionInput(
2008
+ urn=urn,
2009
+ entity_client=self.client.entities,
2010
+ dataset_urn=dataset_urn,
2011
+ display_name=display_name,
2012
+ detection_mechanism=detection_mechanism,
2013
+ sensitivity=sensitivity,
2014
+ exclusion_windows=exclusion_windows,
2015
+ training_data_lookback_days=training_data_lookback_days,
2016
+ incident_behavior=incident_behavior,
2017
+ tags=tags,
2018
+ created_by=updated_by, # This will be overridden by the actual created_by
2019
+ created_at=now_utc, # This will be overridden by the actual created_at
2020
+ updated_by=updated_by,
2021
+ updated_at=now_utc,
2022
+ schedule=schedule,
2023
+ )
2024
+
2025
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
2026
+ # if the assertion does not exist:
2027
+ merged_assertion_input_or_created_assertion = (
2028
+ self._retrieve_and_merge_volume_assertion_and_monitor(
2029
+ assertion_input=assertion_input,
2030
+ dataset_urn=dataset_urn,
2031
+ urn=urn,
2032
+ display_name=display_name,
2033
+ enabled=enabled,
2034
+ detection_mechanism=detection_mechanism,
2035
+ sensitivity=sensitivity,
2036
+ exclusion_windows=exclusion_windows,
2037
+ training_data_lookback_days=training_data_lookback_days,
2038
+ incident_behavior=incident_behavior,
2039
+ tags=tags,
2040
+ updated_by=updated_by,
2041
+ now_utc=now_utc,
2042
+ schedule=schedule,
2043
+ )
2044
+ )
2045
+
2046
+ # Return early if we created a new assertion in the merge:
2047
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
2048
+ # We know this is the correct type because we passed the assertion_class parameter
2049
+ assert isinstance(
1422
2050
  merged_assertion_input_or_created_assertion, SmartVolumeAssertion
1423
2051
  )
1424
2052
  return merged_assertion_input_or_created_assertion
@@ -2317,6 +2945,355 @@ class AssertionsClient:
2317
2945
 
2318
2946
  return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
2319
2947
 
2948
+ def sync_volume_assertion(
2949
+ self,
2950
+ *,
2951
+ dataset_urn: Union[str, DatasetUrn],
2952
+ urn: Optional[Union[str, AssertionUrn]] = None,
2953
+ display_name: Optional[str] = None,
2954
+ enabled: Optional[bool] = None,
2955
+ detection_mechanism: DetectionMechanismInputTypes = None,
2956
+ incident_behavior: Optional[
2957
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
2958
+ ] = None,
2959
+ tags: Optional[TagsInputType] = None,
2960
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
2961
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2962
+ definition: Optional[VolumeAssertionDefinitionInputTypes] = None,
2963
+ ) -> VolumeAssertion:
2964
+ """Upsert and merge a volume assertion.
2965
+
2966
+ Note: keyword arguments are required.
2967
+
2968
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
2969
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
2970
+ be updated if the input value is not None. If the input value is None, the existing value
2971
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
2972
+ empty string.
2973
+
2974
+ Schedule behavior:
2975
+ - Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
2976
+ - Update case: Uses existing schedule or provided schedule.
2977
+
2978
+ Args:
2979
+ dataset_urn: The urn of the dataset to be monitored.
2980
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
2981
+ will be _created_ in the DataHub instance.
2982
+ display_name: The display name of the assertion. If not provided, a random display name
2983
+ will be generated.
2984
+ enabled: Whether the assertion is enabled. If not provided, the existing value
2985
+ will be preserved.
2986
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
2987
+ schema is recommended. Valid values are:
2988
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
2989
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
2990
+ - {
2991
+ "type": "last_modified_column",
2992
+ "column_name": "last_modified",
2993
+ "additional_filter": "last_modified > '2021-01-01'",
2994
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
2995
+ additional_filter='last_modified > 2021-01-01')
2996
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
2997
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
2998
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
2999
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
3000
+ tags: The tags to be applied to the assertion. Valid values are:
3001
+ - a list of strings (strings will be converted to TagUrn objects)
3002
+ - a list of TagUrn objects
3003
+ - a list of TagAssociationClass objects
3004
+ updated_by: Optional urn of the user who updated the assertion. The format is
3005
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
3006
+ The default is the datahub system user.
3007
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
3008
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
3009
+ schedule will be used. The schedule determines when the assertion will be evaluated.
3010
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
3011
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
3012
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
3013
+ definition: The volume assertion definition. Can be provided as:
3014
+ - A typed volume assertion object (RowCountTotal or RowCountChange)
3015
+ - A dictionary with keys: type, operator, parameters (and kind for row_count_change)
3016
+ - None to preserve the existing definition from the backend (for update operations)
3017
+
3018
+ Example dictionary for row count total:
3019
+ {
3020
+ "type": "row_count_total",
3021
+ "operator": "GREATER_THAN_OR_EQUAL_TO",
3022
+ "parameters": 100
3023
+ }
3024
+
3025
+ Example dictionary for row count change:
3026
+ {
3027
+ "type": "row_count_change",
3028
+ "kind": "absolute",
3029
+ "operator": "LESS_THAN_OR_EQUAL_TO",
3030
+ "parameters": 50
3031
+ }
3032
+
3033
+ Returns:
3034
+ VolumeAssertion: The created or updated assertion.
3035
+ """
3036
+ _print_experimental_warning()
3037
+ now_utc = datetime.now(timezone.utc)
3038
+
3039
+ if updated_by is None:
3040
+ logger.warning(
3041
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
3042
+ )
3043
+ updated_by = DEFAULT_CREATED_BY
3044
+
3045
+ # 1. If urn is not set, create a new assertion
3046
+ if urn is None:
3047
+ logger.info("URN is not set, creating a new assertion")
3048
+ return self._create_volume_assertion(
3049
+ dataset_urn=dataset_urn,
3050
+ display_name=display_name,
3051
+ enabled=enabled if enabled is not None else True,
3052
+ detection_mechanism=detection_mechanism,
3053
+ incident_behavior=incident_behavior,
3054
+ tags=tags,
3055
+ created_by=updated_by,
3056
+ schedule=schedule,
3057
+ definition=definition,
3058
+ )
3059
+
3060
+ # 2. If urn is set, prepare definition for validation
3061
+ # We use temporary default definition if None is provided, just to pass the _VolumeAssertionInput validation.
3062
+ # However, we keep memory of this in use_backend_definition flag, so we can later
3063
+ # fail if there is no definition in backend (basically, there is no assertion). That would mean that
3064
+ # this is a creation case and the user missed the definition parameter, which is required.
3065
+ # Likely this pattern never happened before because there is no a publicly documented default definition
3066
+ # that we can use as fallback.
3067
+ use_backend_definition = definition is None
3068
+ temp_definition = (
3069
+ definition
3070
+ if definition is not None
3071
+ else RowCountTotal(
3072
+ operator=VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO,
3073
+ parameters=0, # Temporary placeholder
3074
+ )
3075
+ )
3076
+
3077
+ # 3. Create assertion input with effective definition
3078
+ assertion_input = _VolumeAssertionInput(
3079
+ urn=urn,
3080
+ dataset_urn=dataset_urn,
3081
+ entity_client=self.client.entities,
3082
+ detection_mechanism=detection_mechanism,
3083
+ incident_behavior=incident_behavior,
3084
+ tags=tags,
3085
+ created_by=updated_by, # This will be overridden by the actual created_by
3086
+ created_at=now_utc, # This will be overridden by the actual created_at
3087
+ updated_by=updated_by,
3088
+ updated_at=now_utc,
3089
+ schedule=schedule,
3090
+ definition=temp_definition,
3091
+ )
3092
+
3093
+ # 4. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
3094
+ # if the assertion does not exist:
3095
+ merged_assertion_input_or_created_assertion = (
3096
+ self._retrieve_and_merge_native_volume_assertion_and_monitor(
3097
+ assertion_input=assertion_input,
3098
+ dataset_urn=dataset_urn,
3099
+ urn=urn,
3100
+ display_name=display_name,
3101
+ enabled=enabled,
3102
+ detection_mechanism=detection_mechanism,
3103
+ definition=definition,
3104
+ use_backend_definition=use_backend_definition,
3105
+ incident_behavior=incident_behavior,
3106
+ tags=tags,
3107
+ updated_by=updated_by,
3108
+ now_utc=now_utc,
3109
+ schedule=schedule,
3110
+ )
3111
+ )
3112
+
3113
+ # Return early if we created a new assertion in the merge:
3114
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
3115
+ # We know this is the correct type because we passed the assertion_class parameter
3116
+ assert isinstance(
3117
+ merged_assertion_input_or_created_assertion, VolumeAssertion
3118
+ )
3119
+ return merged_assertion_input_or_created_assertion
3120
+
3121
+ # 4. Upsert the assertion and monitor entities:
3122
+ assertion_entity, monitor_entity = (
3123
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
3124
+ )
3125
+ # If assertion upsert fails, we won't try to upsert the monitor
3126
+ self.client.entities.upsert(assertion_entity)
3127
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
3128
+ # try:
3129
+ self.client.entities.upsert(monitor_entity)
3130
+ # except Exception as e:
3131
+ # logger.error(f"Error upserting monitor: {e}")
3132
+ # self.client.entities.delete(assertion_entity)
3133
+ # raise e
3134
+ return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
3135
+
3136
+ def sync_sql_assertion(
3137
+ self,
3138
+ *,
3139
+ dataset_urn: Union[str, DatasetUrn],
3140
+ urn: Optional[Union[str, AssertionUrn]] = None,
3141
+ display_name: Optional[str] = None,
3142
+ enabled: Optional[bool] = None,
3143
+ statement: str,
3144
+ criteria: SqlAssertionCriteria,
3145
+ incident_behavior: Optional[
3146
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
3147
+ ] = None,
3148
+ tags: Optional[TagsInputType] = None,
3149
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
3150
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
3151
+ ) -> SqlAssertion:
3152
+ """Upsert and merge a sql assertion.
3153
+
3154
+ Note: keyword arguments are required.
3155
+
3156
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
3157
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
3158
+ be updated if the input value is not None. If the input value is None, the existing value
3159
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
3160
+ empty string.
3161
+
3162
+ Schedule behavior:
3163
+ - Create case: Uses default daily schedule (\"0 0 * * *\") or provided schedule
3164
+ - Update case: Uses existing schedule or provided schedule.
3165
+
3166
+ Args:
3167
+ dataset_urn: The urn of the dataset to be monitored.
3168
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
3169
+ will be _created_ in the DataHub instance.
3170
+ display_name: The display name of the assertion. If not provided, a random display name
3171
+ will be generated.
3172
+ enabled: Whether the assertion is enabled. If not provided, the existing value
3173
+ will be preserved.
3174
+ criteria: The criteria to be used for the assertion. This is of type SqlAssertionCriteria. It has the following fields:
3175
+ - type: The type of sql assertion. Valid values are:
3176
+ - "METRIC" -> Looks at the current value of the metric.
3177
+ - "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
3178
+ - change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
3179
+ - "ABSOLUTE" -> Looks at the absolute change in the metric.
3180
+ - "PERCENTAGE" -> Looks at the percentage change in the metric.
3181
+ - operator: The operator to be used for the assertion. Valid values are:
3182
+ - "GREATER_THAN" -> The metric value is greater than the threshold.
3183
+ - "LESS_THAN" -> The metric value is less than the threshold.
3184
+ - "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
3185
+ - "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
3186
+ - "EQUAL_TO" -> The metric value is equal to the threshold.
3187
+ - "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
3188
+ - "BETWEEN" -> The metric value is between the two thresholds.
3189
+ - parameters: The parameters to be used for the assertion. This is of type SqlAssertionParameters. It has the following fields:
3190
+ - value: The value of the metric. This can be a single value or a tuple range.
3191
+ - If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
3192
+ - If the operator is not "BETWEEN", the value is a single value.
3193
+ statement: The SQL statement to be used for the assertion.
3194
+ - "SELECT COUNT(*) FROM table WHERE column > 100"
3195
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
3196
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
3197
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
3198
+ tags: The tags to be applied to the assertion. Valid values are:
3199
+ - a list of strings (strings will be converted to TagUrn objects)
3200
+ - a list of TagUrn objects
3201
+ - a list of TagAssociationClass objects
3202
+ updated_by: Optional urn of the user who updated the assertion. The format is
3203
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
3204
+ The default is the datahub system user.
3205
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
3206
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
3207
+ schedule will be used. The schedule determines when the assertion will be evaluated.
3208
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
3209
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
3210
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
3211
+
3212
+ Returns:
3213
+ SqlAssertion: The created or updated assertion.
3214
+ """
3215
+ _print_experimental_warning()
3216
+ now_utc = datetime.now(timezone.utc)
3217
+
3218
+ if updated_by is None:
3219
+ logger.warning(
3220
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
3221
+ )
3222
+ updated_by = DEFAULT_CREATED_BY
3223
+
3224
+ # 1. If urn is not set, create a new assertion
3225
+ if urn is None:
3226
+ logger.info("URN is not set, creating a new assertion")
3227
+ return self._create_sql_assertion(
3228
+ dataset_urn=dataset_urn,
3229
+ display_name=display_name,
3230
+ enabled=enabled if enabled is not None else True,
3231
+ criteria=criteria,
3232
+ statement=statement,
3233
+ incident_behavior=incident_behavior,
3234
+ tags=tags,
3235
+ created_by=updated_by,
3236
+ schedule=schedule,
3237
+ )
3238
+
3239
+ # 2. If urn is set, first validate the input:
3240
+ assertion_input = _SqlAssertionInput(
3241
+ urn=urn,
3242
+ entity_client=self.client.entities,
3243
+ dataset_urn=dataset_urn,
3244
+ display_name=display_name,
3245
+ criteria=criteria,
3246
+ statement=statement,
3247
+ incident_behavior=incident_behavior,
3248
+ tags=tags,
3249
+ created_by=updated_by, # This will be overridden by the actual created_by
3250
+ created_at=now_utc, # This will be overridden by the actual created_at
3251
+ updated_by=updated_by,
3252
+ updated_at=now_utc,
3253
+ schedule=schedule,
3254
+ )
3255
+
3256
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
3257
+ # if the assertion does not exist:
3258
+ merged_assertion_input_or_created_assertion = (
3259
+ self._retrieve_and_merge_sql_assertion_and_monitor(
3260
+ assertion_input=assertion_input,
3261
+ dataset_urn=dataset_urn,
3262
+ urn=urn,
3263
+ display_name=display_name,
3264
+ enabled=enabled,
3265
+ criteria=criteria,
3266
+ statement=statement,
3267
+ incident_behavior=incident_behavior,
3268
+ tags=tags,
3269
+ updated_by=updated_by,
3270
+ now_utc=now_utc,
3271
+ schedule=schedule,
3272
+ )
3273
+ )
3274
+
3275
+ # Return early if we created a new assertion in the merge:
3276
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
3277
+ # We know this is the correct type because we passed the assertion_class parameter
3278
+ assert isinstance(merged_assertion_input_or_created_assertion, SqlAssertion)
3279
+ return merged_assertion_input_or_created_assertion
3280
+
3281
+ # 4. Upsert the assertion and monitor entities:
3282
+ assertion_entity, monitor_entity = (
3283
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
3284
+ )
3285
+ # If assertion upsert fails, we won't try to upsert the monitor
3286
+ self.client.entities.upsert(assertion_entity)
3287
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
3288
+ # try:
3289
+ self.client.entities.upsert(monitor_entity)
3290
+ # except Exception as e:
3291
+ # logger.error(f"Error upserting monitor: {e}")
3292
+ # self.client.entities.delete(assertion_entity)
3293
+ # raise e
3294
+
3295
+ return SqlAssertion._from_entities(assertion_entity, monitor_entity)
3296
+
2320
3297
 
2321
3298
  def _merge_field(
2322
3299
  input_field_value: Any,