acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (20) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
  3. acryl_datahub_cloud/metadata/schema.avsc +9 -0
  4. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
  5. acryl_datahub_cloud/sdk/__init__.py +10 -2
  6. acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
  7. acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +614 -231
  8. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
  9. acryl_datahub_cloud/sdk/assertion/types.py +18 -0
  10. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  11. acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +437 -147
  12. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
  13. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +943 -0
  14. acryl_datahub_cloud/sdk/assertions_client.py +1281 -70
  15. acryl_datahub_cloud/sdk/entities/assertion.py +8 -1
  16. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +41 -41
  17. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +20 -14
  18. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
  19. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
  20. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
@@ -4,21 +4,38 @@ import logging
4
4
  from datetime import datetime, timezone
5
5
  from typing import TYPE_CHECKING, Any, Optional, Union
6
6
 
7
- from acryl_datahub_cloud.sdk.assertion import (
7
+ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
8
8
  AssertionMode,
9
+ FreshnessAssertion,
9
10
  SmartFreshnessAssertion,
10
11
  SmartVolumeAssertion,
11
12
  _AssertionPublic,
12
13
  )
13
- from acryl_datahub_cloud.sdk.assertion_input import (
14
+ from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
15
+ SmartColumnMetricAssertion,
16
+ )
17
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
14
18
  AssertionIncidentBehavior,
15
19
  DetectionMechanismInputTypes,
16
20
  ExclusionWindowInputTypes,
17
21
  InferenceSensitivity,
22
+ TimeWindowSizeInputTypes,
18
23
  _AssertionInput,
19
24
  _SmartFreshnessAssertionInput,
20
25
  _SmartVolumeAssertionInput,
21
26
  )
27
+ from acryl_datahub_cloud.sdk.assertion_input.freshness_assertion_input import (
28
+ _FreshnessAssertionInput,
29
+ )
30
+ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
31
+ MetricInputType,
32
+ OperatorInputType,
33
+ RangeInputType,
34
+ RangeTypeInputType,
35
+ ValueInputType,
36
+ ValueTypeInputType,
37
+ _SmartColumnMetricAssertionInput,
38
+ )
22
39
  from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
23
40
  from acryl_datahub_cloud.sdk.entities.monitor import Monitor
24
41
  from acryl_datahub_cloud.sdk.errors import SDKUsageError
@@ -171,7 +188,7 @@ class AssertionsClient:
171
188
  # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
172
189
  # if the assertion does not exist:
173
190
  merged_assertion_input_or_created_assertion = (
174
- self._retrieve_and_merge_freshness_assertion_and_monitor(
191
+ self._retrieve_and_merge_smart_freshness_assertion_and_monitor(
175
192
  assertion_input=assertion_input,
176
193
  dataset_urn=dataset_urn,
177
194
  urn=urn,
@@ -212,7 +229,7 @@ class AssertionsClient:
212
229
 
213
230
  return SmartFreshnessAssertion._from_entities(assertion_entity, monitor_entity)
214
231
 
215
- def _retrieve_and_merge_freshness_assertion_and_monitor(
232
+ def _retrieve_and_merge_smart_freshness_assertion_and_monitor(
216
233
  self,
217
234
  assertion_input: _SmartFreshnessAssertionInput,
218
235
  dataset_urn: Union[str, DatasetUrn],
@@ -277,7 +294,7 @@ class AssertionsClient:
277
294
  )
278
295
 
279
296
  # 4. Merge the existing assertion with the validated input:
280
- merged_assertion_input = self._merge_freshness_input(
297
+ merged_assertion_input = self._merge_smart_freshness_input(
281
298
  dataset_urn=dataset_urn,
282
299
  urn=urn,
283
300
  display_name=display_name,
@@ -350,6 +367,7 @@ class AssertionsClient:
350
367
  incident_behavior=incident_behavior,
351
368
  tags=tags,
352
369
  created_by=updated_by,
370
+ schedule=schedule,
353
371
  )
354
372
 
355
373
  # 3. Check for any issues e.g. different dataset urns
@@ -363,7 +381,7 @@ class AssertionsClient:
363
381
  )
364
382
 
365
383
  # 4. Merge the existing assertion with the validated input:
366
- merged_assertion_input = self._merge_volume_input(
384
+ merged_assertion_input = self._merge_smart_volume_input(
367
385
  dataset_urn=dataset_urn,
368
386
  urn=urn,
369
387
  display_name=display_name,
@@ -384,6 +402,93 @@ class AssertionsClient:
384
402
 
385
403
  return merged_assertion_input
386
404
 
405
+ def _retrieve_and_merge_freshness_assertion_and_monitor(
406
+ self,
407
+ assertion_input: _FreshnessAssertionInput,
408
+ dataset_urn: Union[str, DatasetUrn],
409
+ urn: Union[str, AssertionUrn],
410
+ display_name: Optional[str],
411
+ enabled: Optional[bool],
412
+ detection_mechanism: DetectionMechanismInputTypes,
413
+ incident_behavior: Optional[
414
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
415
+ ],
416
+ tags: Optional[TagsInputType],
417
+ updated_by: Optional[Union[str, CorpUserUrn]],
418
+ now_utc: datetime,
419
+ schedule: Optional[Union[str, models.CronScheduleClass]],
420
+ freshness_schedule_check_type: Optional[
421
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
422
+ ] = None,
423
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
424
+ ) -> Union[FreshnessAssertion, _FreshnessAssertionInput]:
425
+ # 1. Retrieve any existing assertion and monitor entities:
426
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
427
+ self._retrieve_assertion_and_monitor(assertion_input)
428
+ )
429
+
430
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
431
+ if maybe_assertion_entity and maybe_monitor_entity:
432
+ existing_assertion = FreshnessAssertion._from_entities(
433
+ maybe_assertion_entity, maybe_monitor_entity
434
+ )
435
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
436
+ elif maybe_assertion_entity and not maybe_monitor_entity:
437
+ monitor_mode = (
438
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
439
+ )
440
+ existing_assertion = FreshnessAssertion._from_entities(
441
+ maybe_assertion_entity,
442
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
443
+ )
444
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
445
+ elif not maybe_assertion_entity:
446
+ logger.info(
447
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
448
+ )
449
+ return self._create_freshness_assertion(
450
+ dataset_urn=dataset_urn,
451
+ display_name=display_name,
452
+ detection_mechanism=detection_mechanism,
453
+ incident_behavior=incident_behavior,
454
+ tags=tags,
455
+ created_by=updated_by,
456
+ schedule=schedule,
457
+ freshness_schedule_check_type=freshness_schedule_check_type,
458
+ lookback_window=lookback_window,
459
+ )
460
+
461
+ # 3. Check for any issues e.g. different dataset urns
462
+ if (
463
+ existing_assertion
464
+ and hasattr(existing_assertion, "dataset_urn")
465
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
466
+ ):
467
+ raise SDKUsageError(
468
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
469
+ )
470
+
471
+ # 4. Merge the existing assertion with the validated input:
472
+ merged_assertion_input = self._merge_freshness_input(
473
+ dataset_urn=dataset_urn,
474
+ urn=urn,
475
+ display_name=display_name,
476
+ enabled=enabled,
477
+ detection_mechanism=detection_mechanism,
478
+ incident_behavior=incident_behavior,
479
+ tags=tags,
480
+ now_utc=now_utc,
481
+ assertion_input=assertion_input,
482
+ maybe_assertion_entity=maybe_assertion_entity,
483
+ maybe_monitor_entity=maybe_monitor_entity,
484
+ existing_assertion=existing_assertion,
485
+ schedule=schedule,
486
+ freshness_schedule_check_type=freshness_schedule_check_type,
487
+ lookback_window=lookback_window,
488
+ )
489
+
490
+ return merged_assertion_input
491
+
387
492
  def _retrieve_assertion_and_monitor(
388
493
  self,
389
494
  assertion_input: _AssertionInput,
@@ -423,7 +528,7 @@ class AssertionsClient:
423
528
 
424
529
  return maybe_assertion_entity, monitor_urn, maybe_monitor_entity
425
530
 
426
- def _merge_freshness_input(
531
+ def _merge_smart_freshness_input(
427
532
  self,
428
533
  dataset_urn: Union[str, DatasetUrn],
429
534
  urn: Union[str, AssertionUrn],
@@ -554,7 +659,129 @@ class AssertionsClient:
554
659
 
555
660
  return merged_assertion_input
556
661
 
557
- def _merge_volume_input(
662
+ def _merge_freshness_input(
663
+ self,
664
+ dataset_urn: Union[str, DatasetUrn],
665
+ urn: Union[str, AssertionUrn],
666
+ display_name: Optional[str],
667
+ enabled: Optional[bool],
668
+ detection_mechanism: DetectionMechanismInputTypes,
669
+ incident_behavior: Optional[
670
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
671
+ ],
672
+ tags: Optional[TagsInputType],
673
+ now_utc: datetime,
674
+ assertion_input: _FreshnessAssertionInput,
675
+ maybe_assertion_entity: Optional[Assertion],
676
+ maybe_monitor_entity: Optional[Monitor],
677
+ existing_assertion: FreshnessAssertion,
678
+ schedule: Optional[Union[str, models.CronScheduleClass]],
679
+ freshness_schedule_check_type: Optional[
680
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
681
+ ] = None,
682
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
683
+ ) -> _FreshnessAssertionInput:
684
+ """Merge the input with the existing assertion and monitor entities.
685
+
686
+ Args:
687
+ dataset_urn: The urn of the dataset to be monitored.
688
+ urn: The urn of the assertion.
689
+ display_name: The display name of the assertion.
690
+ enabled: Whether the assertion is enabled.
691
+ incident_behavior: The incident behavior to be applied to the assertion.
692
+ tags: The tags to be applied to the assertion.
693
+ now_utc: The current UTC time from when the function is called.
694
+ assertion_input: The validated input to the function.
695
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
696
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
697
+ existing_assertion: The existing assertion from the DataHub instance.
698
+ schedule: The schedule to be applied to the assertion.
699
+ freshness_schedule_check_type: The freshness schedule check type to be applied to the assertion.
700
+ lookback_window: The lookback window to be applied to the assertion.
701
+
702
+ Returns:
703
+ The merged assertion input.
704
+ """
705
+ merged_assertion_input = _FreshnessAssertionInput(
706
+ urn=urn,
707
+ entity_client=self.client.entities,
708
+ dataset_urn=dataset_urn,
709
+ display_name=_merge_field(
710
+ display_name,
711
+ "display_name",
712
+ assertion_input,
713
+ existing_assertion,
714
+ maybe_assertion_entity.description if maybe_assertion_entity else None,
715
+ ),
716
+ enabled=_merge_field(
717
+ enabled,
718
+ "enabled",
719
+ assertion_input,
720
+ existing_assertion,
721
+ existing_assertion.mode == AssertionMode.ACTIVE
722
+ if existing_assertion
723
+ else None,
724
+ ),
725
+ schedule=_merge_field(
726
+ schedule,
727
+ "schedule",
728
+ assertion_input,
729
+ existing_assertion,
730
+ existing_assertion.schedule if existing_assertion else None,
731
+ ),
732
+ freshness_schedule_check_type=_merge_field(
733
+ freshness_schedule_check_type,
734
+ "freshness_schedule_check_type",
735
+ assertion_input,
736
+ existing_assertion,
737
+ existing_assertion._freshness_schedule_check_type
738
+ if existing_assertion
739
+ else None,
740
+ ),
741
+ lookback_window=_merge_field(
742
+ lookback_window,
743
+ "lookback_window",
744
+ assertion_input,
745
+ existing_assertion,
746
+ existing_assertion.lookback_window if existing_assertion else None,
747
+ ),
748
+ detection_mechanism=_merge_field(
749
+ detection_mechanism,
750
+ "detection_mechanism",
751
+ assertion_input,
752
+ existing_assertion,
753
+ FreshnessAssertion._get_detection_mechanism(
754
+ maybe_assertion_entity, maybe_monitor_entity, default=None
755
+ )
756
+ if maybe_assertion_entity and maybe_monitor_entity
757
+ else None,
758
+ ),
759
+ incident_behavior=_merge_field(
760
+ incident_behavior,
761
+ "incident_behavior",
762
+ assertion_input,
763
+ existing_assertion,
764
+ FreshnessAssertion._get_incident_behavior(maybe_assertion_entity)
765
+ if maybe_assertion_entity
766
+ else None,
767
+ ),
768
+ tags=_merge_field(
769
+ tags,
770
+ "tags",
771
+ assertion_input,
772
+ existing_assertion,
773
+ maybe_assertion_entity.tags if maybe_assertion_entity else None,
774
+ ),
775
+ created_by=existing_assertion.created_by
776
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
777
+ created_at=existing_assertion.created_at
778
+ or now_utc, # Override with the existing assertion's created_at or now if not set
779
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
780
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
781
+ )
782
+ return merged_assertion_input
783
+
784
+ def _merge_smart_volume_input(
558
785
  self,
559
786
  dataset_urn: Union[str, DatasetUrn],
560
787
  urn: Union[str, AssertionUrn],
@@ -919,46 +1146,35 @@ class AssertionsClient:
919
1146
  # raise e
920
1147
  return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
921
1148
 
922
- def sync_smart_volume_assertion(
1149
+ def _create_freshness_assertion(
923
1150
  self,
924
1151
  *,
925
1152
  dataset_urn: Union[str, DatasetUrn],
926
- urn: Optional[Union[str, AssertionUrn]] = None,
927
1153
  display_name: Optional[str] = None,
928
- enabled: Optional[bool] = None,
1154
+ enabled: bool = True,
1155
+ freshness_schedule_check_type: Optional[
1156
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
1157
+ ] = None,
1158
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
929
1159
  detection_mechanism: DetectionMechanismInputTypes = None,
930
- sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
931
- exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
932
- training_data_lookback_days: Optional[int] = None,
933
1160
  incident_behavior: Optional[
934
1161
  Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
935
1162
  ] = None,
936
1163
  tags: Optional[TagsInputType] = None,
937
- updated_by: Optional[Union[str, CorpUserUrn]] = None,
1164
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
938
1165
  schedule: Optional[Union[str, models.CronScheduleClass]] = None,
939
- ) -> SmartVolumeAssertion:
940
- """Upsert and merge a smart volume assertion.
1166
+ ) -> FreshnessAssertion:
1167
+ """Create a freshness assertion.
941
1168
 
942
1169
  Note: keyword arguments are required.
943
1170
 
944
- Upsert and merge is a combination of create and update. If the assertion does not exist,
945
- it will be created. If it does exist, it will be updated. Existing assertion fields will
946
- be updated if the input value is not None. If the input value is None, the existing value
947
- will be preserved. If the input value can be un-set e.g. by passing an empty list or
948
- empty string.
949
-
950
- Schedule behavior:
951
- - Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
952
- - Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
1171
+ The created assertion will use the default daily schedule ("0 0 * * *").
953
1172
 
954
1173
  Args:
955
1174
  dataset_urn: The urn of the dataset to be monitored.
956
- urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
957
- will be _created_ in the DataHub instance.
958
- display_name: The display name of the assertion. If not provided, a random display name
959
- will be generated.
960
- enabled: Whether the assertion is enabled. If not provided, the existing value
961
- will be preserved.
1175
+ display_name: The display name of the assertion. If not provided, a random display
1176
+ name will be generated.
1177
+ enabled: Whether the assertion is enabled. Defaults to True.
962
1178
  detection_mechanism: The detection mechanism to be used for the assertion. Information
963
1179
  schema is recommended. Valid values are:
964
1180
  - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
@@ -969,33 +1185,15 @@ class AssertionsClient:
969
1185
  "additional_filter": "last_modified > '2021-01-01'",
970
1186
  } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
971
1187
  additional_filter='last_modified > 2021-01-01')
972
- - {
973
- "type": "high_watermark_column",
974
- "column_name": "id",
975
- "additional_filter": "id > 1000",
976
- } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
977
- additional_filter='id > 1000')
978
1188
  - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
979
- sensitivity: The sensitivity to be applied to the assertion. Valid values are:
980
- - "low" or InferenceSensitivity.LOW
981
- - "medium" or InferenceSensitivity.MEDIUM
982
- - "high" or InferenceSensitivity.HIGH
983
- exclusion_windows: The exclusion windows to be applied to the assertion, currently only
984
- fixed range exclusion windows are supported. Valid values are:
985
- - from datetime.datetime objects: {
986
- "start": "datetime(2025, 1, 1, 0, 0, 0)",
987
- "end": "datetime(2025, 1, 2, 0, 0, 0)",
988
- }
989
- - from string datetimes: {
990
- "start": "2025-01-01T00:00:00",
991
- "end": "2025-01-02T00:00:00",
992
- }
993
- - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
994
- start=datetime(2025, 1, 1, 0, 0, 0),
995
- end=datetime(2025, 1, 2, 0, 0, 0)
996
- )
997
- training_data_lookback_days: The training data lookback days to be applied to the
998
- assertion as an integer.
1189
+ freshness_schedule_check_type: The freshness schedule check type to be applied to the assertion. Valid values are:
1190
+ - "since_the_last_check" or models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK
1191
+ - "cron" or models.FreshnessAssertionScheduleTypeClass.CRON
1192
+ lookback_window: The lookback window to be applied to the assertion. Valid values are:
1193
+ - from models.TimeWindowSize objects: models.TimeWindowSizeClass(
1194
+ unit=models.CalendarIntervalClass.DAY,
1195
+ multiple=1)
1196
+ - from TimeWindowSize objects: TimeWindowSize(unit='DAY', multiple=1)
999
1197
  incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1000
1198
  - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1001
1199
  - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
@@ -1003,7 +1201,7 @@ class AssertionsClient:
1003
1201
  - a list of strings (strings will be converted to TagUrn objects)
1004
1202
  - a list of TagUrn objects
1005
1203
  - a list of TagAssociationClass objects
1006
- updated_by: Optional urn of the user who updated the assertion. The format is
1204
+ created_by: Optional urn of the user who created the assertion. The format is
1007
1205
  "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1008
1206
  The default is the datahub system user.
1009
1207
  TODO: Retrieve the SDK user as the default instead of the datahub system user.
@@ -1014,22 +1212,157 @@ class AssertionsClient:
1014
1212
  cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1015
1213
 
1016
1214
  Returns:
1017
- SmartVolumeAssertion: The created or updated assertion.
1215
+ FreshnessAssertion: The created assertion.
1018
1216
  """
1019
1217
  _print_experimental_warning()
1020
1218
  now_utc = datetime.now(timezone.utc)
1021
-
1022
- if updated_by is None:
1219
+ if created_by is None:
1023
1220
  logger.warning(
1024
- f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1221
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1025
1222
  )
1026
- updated_by = DEFAULT_CREATED_BY
1027
-
1028
- # 1. If urn is not set, create a new assertion
1029
- if urn is None:
1030
- logger.info("URN is not set, creating a new assertion")
1031
- return self._create_smart_volume_assertion(
1032
- dataset_urn=dataset_urn,
1223
+ created_by = DEFAULT_CREATED_BY
1224
+ assertion_input = _FreshnessAssertionInput(
1225
+ urn=None,
1226
+ entity_client=self.client.entities,
1227
+ dataset_urn=dataset_urn,
1228
+ display_name=display_name,
1229
+ enabled=enabled,
1230
+ detection_mechanism=detection_mechanism,
1231
+ freshness_schedule_check_type=freshness_schedule_check_type,
1232
+ lookback_window=lookback_window,
1233
+ incident_behavior=incident_behavior,
1234
+ tags=tags,
1235
+ created_by=created_by,
1236
+ created_at=now_utc,
1237
+ updated_by=created_by,
1238
+ updated_at=now_utc,
1239
+ schedule=schedule,
1240
+ )
1241
+ assertion_entity, monitor_entity = (
1242
+ assertion_input.to_assertion_and_monitor_entities()
1243
+ )
1244
+ # If assertion creation fails, we won't try to create the monitor
1245
+ self.client.entities.create(assertion_entity)
1246
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1247
+ # try:
1248
+ self.client.entities.create(monitor_entity)
1249
+ # except Exception as e:
1250
+ # logger.error(f"Error creating monitor: {e}")
1251
+ # self.client.entities.delete(assertion_entity)
1252
+ # raise e
1253
+ return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
1254
+
1255
+ def sync_smart_volume_assertion(
1256
+ self,
1257
+ *,
1258
+ dataset_urn: Union[str, DatasetUrn],
1259
+ urn: Optional[Union[str, AssertionUrn]] = None,
1260
+ display_name: Optional[str] = None,
1261
+ enabled: Optional[bool] = None,
1262
+ detection_mechanism: DetectionMechanismInputTypes = None,
1263
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1264
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1265
+ training_data_lookback_days: Optional[int] = None,
1266
+ incident_behavior: Optional[
1267
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1268
+ ] = None,
1269
+ tags: Optional[TagsInputType] = None,
1270
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
1271
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1272
+ ) -> SmartVolumeAssertion:
1273
+ """Upsert and merge a smart volume assertion.
1274
+
1275
+ Note: keyword arguments are required.
1276
+
1277
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
1278
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
1279
+ be updated if the input value is not None. If the input value is None, the existing value
1280
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
1281
+ empty string.
1282
+
1283
+ Schedule behavior:
1284
+ - Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
1285
+ - Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
1286
+
1287
+ Args:
1288
+ dataset_urn: The urn of the dataset to be monitored.
1289
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
1290
+ will be _created_ in the DataHub instance.
1291
+ display_name: The display name of the assertion. If not provided, a random display name
1292
+ will be generated.
1293
+ enabled: Whether the assertion is enabled. If not provided, the existing value
1294
+ will be preserved.
1295
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1296
+ schema is recommended. Valid values are:
1297
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1298
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1299
+ - {
1300
+ "type": "last_modified_column",
1301
+ "column_name": "last_modified",
1302
+ "additional_filter": "last_modified > '2021-01-01'",
1303
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1304
+ additional_filter='last_modified > 2021-01-01')
1305
+ - {
1306
+ "type": "high_watermark_column",
1307
+ "column_name": "id",
1308
+ "additional_filter": "id > 1000",
1309
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1310
+ additional_filter='id > 1000')
1311
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1312
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1313
+ - "low" or InferenceSensitivity.LOW
1314
+ - "medium" or InferenceSensitivity.MEDIUM
1315
+ - "high" or InferenceSensitivity.HIGH
1316
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1317
+ fixed range exclusion windows are supported. Valid values are:
1318
+ - from datetime.datetime objects: {
1319
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1320
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1321
+ }
1322
+ - from string datetimes: {
1323
+ "start": "2025-01-01T00:00:00",
1324
+ "end": "2025-01-02T00:00:00",
1325
+ }
1326
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1327
+ start=datetime(2025, 1, 1, 0, 0, 0),
1328
+ end=datetime(2025, 1, 2, 0, 0, 0)
1329
+ )
1330
+ training_data_lookback_days: The training data lookback days to be applied to the
1331
+ assertion as an integer.
1332
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1333
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1334
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1335
+ tags: The tags to be applied to the assertion. Valid values are:
1336
+ - a list of strings (strings will be converted to TagUrn objects)
1337
+ - a list of TagUrn objects
1338
+ - a list of TagAssociationClass objects
1339
+ updated_by: Optional urn of the user who updated the assertion. The format is
1340
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1341
+ The default is the datahub system user.
1342
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1343
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1344
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1345
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1346
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1347
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1348
+
1349
+ Returns:
1350
+ SmartVolumeAssertion: The created or updated assertion.
1351
+ """
1352
+ _print_experimental_warning()
1353
+ now_utc = datetime.now(timezone.utc)
1354
+
1355
+ if updated_by is None:
1356
+ logger.warning(
1357
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1358
+ )
1359
+ updated_by = DEFAULT_CREATED_BY
1360
+
1361
+ # 1. If urn is not set, create a new assertion
1362
+ if urn is None:
1363
+ logger.info("URN is not set, creating a new assertion")
1364
+ return self._create_smart_volume_assertion(
1365
+ dataset_urn=dataset_urn,
1033
1366
  display_name=display_name,
1034
1367
  enabled=enabled if enabled is not None else True,
1035
1368
  detection_mechanism=detection_mechanism,
@@ -1106,6 +1439,884 @@ class AssertionsClient:
1106
1439
 
1107
1440
  return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
1108
1441
 
1442
+ def sync_smart_column_metric_assertion(
1443
+ self,
1444
+ *,
1445
+ dataset_urn: Union[str, DatasetUrn],
1446
+ column_name: str,
1447
+ metric_type: MetricInputType,
1448
+ operator: OperatorInputType,
1449
+ value: Optional[ValueInputType] = None,
1450
+ value_type: Optional[ValueTypeInputType] = None,
1451
+ range: Optional[RangeInputType] = None,
1452
+ range_type: Optional[RangeTypeInputType] = None,
1453
+ urn: Optional[Union[str, AssertionUrn]] = None,
1454
+ display_name: Optional[str] = None,
1455
+ enabled: Optional[bool] = None,
1456
+ detection_mechanism: DetectionMechanismInputTypes = None,
1457
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1458
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1459
+ training_data_lookback_days: Optional[int] = None,
1460
+ incident_behavior: Optional[
1461
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1462
+ ] = None,
1463
+ tags: Optional[TagsInputType] = None,
1464
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
1465
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1466
+ ) -> SmartColumnMetricAssertion:
1467
+ """Upsert and merge a smart column metric assertion.
1468
+
1469
+ Note: keyword arguments are required.
1470
+
1471
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
1472
+ it will be created. If it does exist, it will be updated.
1473
+
1474
+ Existing assertion fields will be updated if the input value is not None. If the input value is None, the existing value
1475
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
1476
+ empty string.
1477
+
1478
+ Args:
1479
+ dataset_urn: The urn of the dataset to be monitored. (Required)
1480
+ column_name: The name of the column to be monitored. (Required)
1481
+ metric_type: The type of the metric to be monitored. (Required)
1482
+ operator: The operator to be used for the assertion. (Required)
1483
+ value: The value to be used for the assertion. (Required if operator requires a value)
1484
+ value_type: The type of the value to be used for the assertion. (Required if operator requires a value)
1485
+ range: The range to be used for the assertion. (Required if operator requires a range)
1486
+ range_type: The type of the range to be used for the assertion. (Required if operator requires a range)
1487
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
1488
+ will be _created_ in the DataHub instance.
1489
+ display_name: The display name of the assertion. If not provided, a random display name
1490
+ will be generated.
1491
+ enabled: Whether the assertion is enabled. If not provided, the existing value
1492
+ will be preserved.
1493
+ detection_mechanism: The detection mechanism to be used for the assertion. Valid values are:
1494
+ - All rows query datahub dataset profile:
1495
+ - "all_rows_query_datahub_dataset_profile" or DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
1496
+
1497
+ - All rows query:
1498
+ - "all_rows_query" or DetectionMechanism.ALL_ROWS_QUERY
1499
+ - with optional additional filter: DetectionMechanism.ALL_ROWS_QUERY(additional_filter='last_modified > 2021-01-01')
1500
+ - Or as a dict: {
1501
+ "type": "all_rows_query",
1502
+ "additional_filter": "last_modified > '2021-01-01'", # optional
1503
+ }
1504
+
1505
+ - Changed rows query:
1506
+ - For changed rows query, you need to pass a supported column type (Number, Date or Time)
1507
+ - DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified')
1508
+ - With optional additional filter: DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
1509
+ - Or as a dict: {
1510
+ "type": "changed_rows_query",
1511
+ "column_name": "last_modified",
1512
+ "additional_filter": "last_modified > '2021-01-01'", # optional
1513
+ }
1514
+
1515
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1516
+ - "low" or InferenceSensitivity.LOW
1517
+ - "medium" or InferenceSensitivity.MEDIUM
1518
+ - "high" or InferenceSensitivity.HIGH
1519
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1520
+ fixed range exclusion windows are supported. Valid values are:
1521
+ - from datetime.datetime objects: {
1522
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1523
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1524
+ }
1525
+ - from string datetimes: {
1526
+ "start": "2025-01-01T00:00:00",
1527
+ "end": "2025-01-02T00:00:00",
1528
+ }
1529
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1530
+ start=datetime(2025, 1, 1, 0, 0, 0),
1531
+ end=datetime(2025, 1, 2, 0, 0, 0)
1532
+ )
1533
+ training_data_lookback_days: The training data lookback days to be applied to the
1534
+ assertion as an integer.
1535
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1536
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1537
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1538
+ tags: The tags to be applied to the assertion. Valid values are:
1539
+ - a list of strings (strings will be converted to TagUrn objects)
1540
+ - a list of TagUrn objects
1541
+ - a list of TagAssociationClass objects
1542
+ updated_by: Optional urn of the user who updated the assertion. The format is
1543
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1544
+ The default is the datahub system user.
1545
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1546
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1547
+ schedule of every 6 hours will be used. The schedule determines when the assertion will be evaluated.
1548
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1549
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1550
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1551
+
1552
+ Returns:
1553
+ SmartColumnMetricAssertion: The created or updated assertion.
1554
+ """
1555
+ _print_experimental_warning()
1556
+ now_utc = datetime.now(timezone.utc)
1557
+
1558
+ if updated_by is None:
1559
+ logger.warning(
1560
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1561
+ )
1562
+ updated_by = DEFAULT_CREATED_BY
1563
+
1564
+ # 1. If urn is not set, create a new assertion
1565
+ if urn is None:
1566
+ logger.info("URN is not set, creating a new assertion")
1567
+ return self._create_smart_column_metric_assertion(
1568
+ dataset_urn=dataset_urn,
1569
+ column_name=column_name,
1570
+ metric_type=metric_type,
1571
+ operator=operator,
1572
+ value=value,
1573
+ value_type=value_type,
1574
+ range=range,
1575
+ range_type=range_type,
1576
+ display_name=display_name,
1577
+ enabled=enabled if enabled is not None else True,
1578
+ detection_mechanism=detection_mechanism,
1579
+ sensitivity=sensitivity,
1580
+ exclusion_windows=exclusion_windows,
1581
+ training_data_lookback_days=training_data_lookback_days,
1582
+ incident_behavior=incident_behavior,
1583
+ tags=tags,
1584
+ created_by=updated_by,
1585
+ schedule=schedule,
1586
+ )
1587
+
1588
+ # 2. If urn is set, first validate the input:
1589
+ assertion_input = _SmartColumnMetricAssertionInput(
1590
+ urn=urn,
1591
+ entity_client=self.client.entities,
1592
+ dataset_urn=dataset_urn,
1593
+ column_name=column_name,
1594
+ metric_type=metric_type,
1595
+ operator=operator,
1596
+ value=value,
1597
+ value_type=value_type,
1598
+ range=range,
1599
+ range_type=range_type,
1600
+ display_name=display_name,
1601
+ detection_mechanism=detection_mechanism,
1602
+ sensitivity=sensitivity,
1603
+ exclusion_windows=exclusion_windows,
1604
+ training_data_lookback_days=training_data_lookback_days,
1605
+ incident_behavior=incident_behavior,
1606
+ tags=tags,
1607
+ created_by=updated_by, # This will be overridden by the actual created_by
1608
+ created_at=now_utc, # This will be overridden by the actual created_at
1609
+ updated_by=updated_by,
1610
+ updated_at=now_utc,
1611
+ schedule=schedule,
1612
+ )
1613
+
1614
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
1615
+ # if the assertion does not exist:
1616
+ merged_assertion_input_or_created_assertion = (
1617
+ self._retrieve_and_merge_smart_column_metric_assertion_and_monitor(
1618
+ assertion_input=assertion_input,
1619
+ dataset_urn=dataset_urn,
1620
+ column_name=column_name,
1621
+ metric_type=metric_type,
1622
+ operator=operator,
1623
+ value=value,
1624
+ value_type=value_type,
1625
+ range=range,
1626
+ range_type=range_type,
1627
+ urn=urn,
1628
+ display_name=display_name,
1629
+ enabled=enabled,
1630
+ detection_mechanism=detection_mechanism,
1631
+ sensitivity=sensitivity,
1632
+ exclusion_windows=exclusion_windows,
1633
+ training_data_lookback_days=training_data_lookback_days,
1634
+ incident_behavior=incident_behavior,
1635
+ tags=tags,
1636
+ updated_by=updated_by,
1637
+ now_utc=now_utc,
1638
+ schedule=schedule,
1639
+ )
1640
+ )
1641
+
1642
+ # Return early if we created a new assertion in the merge:
1643
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
1644
+ # We know this is the correct type because we passed the assertion_class parameter
1645
+ assert isinstance(
1646
+ merged_assertion_input_or_created_assertion, SmartColumnMetricAssertion
1647
+ )
1648
+ return merged_assertion_input_or_created_assertion
1649
+
1650
+ # 4. Upsert the assertion and monitor entities:
1651
+ assertion_entity, monitor_entity = (
1652
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
1653
+ )
1654
+ # If assertion upsert fails, we won't try to upsert the monitor
1655
+ self.client.entities.upsert(assertion_entity)
1656
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1657
+ # try:
1658
+ self.client.entities.upsert(monitor_entity)
1659
+ # except Exception as e:
1660
+ # logger.error(f"Error upserting monitor: {e}")
1661
+ # self.client.entities.delete(assertion_entity)
1662
+ # raise e
1663
+
1664
+ return SmartColumnMetricAssertion._from_entities(
1665
+ assertion_entity, monitor_entity
1666
+ )
1667
+
1668
+ def _create_smart_column_metric_assertion(
1669
+ self,
1670
+ *,
1671
+ dataset_urn: Union[str, DatasetUrn],
1672
+ column_name: str,
1673
+ metric_type: MetricInputType,
1674
+ operator: OperatorInputType,
1675
+ value: Optional[ValueInputType] = None,
1676
+ value_type: Optional[ValueTypeInputType] = None,
1677
+ range: Optional[RangeInputType] = None,
1678
+ range_type: Optional[RangeTypeInputType] = None,
1679
+ display_name: Optional[str] = None,
1680
+ enabled: bool = True,
1681
+ detection_mechanism: DetectionMechanismInputTypes = None,
1682
+ sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
1683
+ exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
1684
+ training_data_lookback_days: Optional[int] = None,
1685
+ incident_behavior: Optional[
1686
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1687
+ ] = None,
1688
+ tags: Optional[TagsInputType] = None,
1689
+ created_by: Optional[Union[str, CorpUserUrn]] = None,
1690
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
1691
+ ) -> SmartColumnMetricAssertion:
1692
+ """Create a smart column metric assertion.
1693
+
1694
+ Note: keyword arguments are required.
1695
+
1696
+ Args:
1697
+ dataset_urn: The urn of the dataset to be monitored. (Required)
1698
+ column_name: The name of the column to be monitored. (Required)
1699
+ metric_type: The type of the metric to be monitored. (Required)
1700
+ operator: The operator to be used for the assertion. (Required)
1701
+ value: The value to be used for the assertion. (Required if operator requires a value)
1702
+ value_type: The type of the value to be used for the assertion. (Required if operator requires a value)
1703
+ range: The range to be used for the assertion. (Required if operator requires a range)
1704
+ range_type: The type of the range to be used for the assertion. (Required if operator requires a range)
1705
+ display_name: The display name of the assertion. If not provided, a random display
1706
+ name will be generated.
1707
+ enabled: Whether the assertion is enabled. Defaults to True.
1708
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
1709
+ schema is recommended. Valid values are:
1710
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
1711
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
1712
+ - {
1713
+ "type": "last_modified_column",
1714
+ "column_name": "last_modified",
1715
+ "additional_filter": "last_modified > '2021-01-01'",
1716
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
1717
+ additional_filter='last_modified > 2021-01-01')
1718
+ - {
1719
+ "type": "high_watermark_column",
1720
+ "column_name": "id",
1721
+ "additional_filter": "id > 1000",
1722
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
1723
+ additional_filter='id > 1000')
1724
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
1725
+ sensitivity: The sensitivity to be applied to the assertion. Valid values are:
1726
+ - "low" or InferenceSensitivity.LOW
1727
+ - "medium" or InferenceSensitivity.MEDIUM
1728
+ - "high" or InferenceSensitivity.HIGH
1729
+ exclusion_windows: The exclusion windows to be applied to the assertion, currently only
1730
+ fixed range exclusion windows are supported. Valid values are:
1731
+ - from datetime.datetime objects: {
1732
+ "start": "datetime(2025, 1, 1, 0, 0, 0)",
1733
+ "end": "datetime(2025, 1, 2, 0, 0, 0)",
1734
+ }
1735
+ - from string datetimes: {
1736
+ "start": "2025-01-01T00:00:00",
1737
+ "end": "2025-01-02T00:00:00",
1738
+ }
1739
+ - from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
1740
+ start=datetime(2025, 1, 1, 0, 0, 0),
1741
+ end=datetime(2025, 1, 2, 0, 0, 0)
1742
+ )
1743
+ training_data_lookback_days: The training data lookback days to be applied to the
1744
+ assertion as an integer.
1745
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
1746
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
1747
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
1748
+ tags: The tags to be applied to the assertion. Valid values are:
1749
+ - a list of strings (strings will be converted to TagUrn objects)
1750
+ - a list of TagUrn objects
1751
+ - a list of TagAssociationClass objects
1752
+ created_by: Optional urn of the user who created the assertion. The format is
1753
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
1754
+ The default is the datahub system user.
1755
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
1756
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
1757
+ schedule will be used. The schedule determines when the assertion will be evaluated.
1758
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
1759
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
1760
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
1761
+
1762
+ Returns:
1763
+ SmartVolumeAssertion: The created assertion.
1764
+ """
1765
+ _print_experimental_warning()
1766
+ now_utc = datetime.now(timezone.utc)
1767
+ if created_by is None:
1768
+ logger.warning(
1769
+ f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
1770
+ )
1771
+ created_by = DEFAULT_CREATED_BY
1772
+ assertion_input = _SmartColumnMetricAssertionInput(
1773
+ urn=None,
1774
+ entity_client=self.client.entities,
1775
+ dataset_urn=dataset_urn,
1776
+ column_name=column_name,
1777
+ metric_type=metric_type,
1778
+ operator=operator,
1779
+ value=value,
1780
+ value_type=value_type,
1781
+ range=range,
1782
+ range_type=range_type,
1783
+ display_name=display_name,
1784
+ enabled=enabled,
1785
+ detection_mechanism=detection_mechanism,
1786
+ sensitivity=sensitivity,
1787
+ exclusion_windows=exclusion_windows,
1788
+ training_data_lookback_days=training_data_lookback_days,
1789
+ incident_behavior=incident_behavior,
1790
+ tags=tags,
1791
+ created_by=created_by,
1792
+ created_at=now_utc,
1793
+ updated_by=created_by,
1794
+ updated_at=now_utc,
1795
+ schedule=schedule,
1796
+ )
1797
+ assertion_entity, monitor_entity = (
1798
+ assertion_input.to_assertion_and_monitor_entities()
1799
+ )
1800
+ # If assertion creation fails, we won't try to create the monitor
1801
+ self.client.entities.create(assertion_entity)
1802
+ # TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
1803
+ # try:
1804
+ self.client.entities.create(monitor_entity)
1805
+ # except Exception as e:
1806
+ # logger.error(f"Error creating monitor: {e}")
1807
+ # self.client.entities.delete(assertion_entity)
1808
+ # raise e
1809
+ return SmartColumnMetricAssertion._from_entities(
1810
+ assertion_entity, monitor_entity
1811
+ )
1812
+
1813
+ def _retrieve_and_merge_smart_column_metric_assertion_and_monitor(
1814
+ self,
1815
+ assertion_input: _SmartColumnMetricAssertionInput,
1816
+ dataset_urn: Union[str, DatasetUrn],
1817
+ column_name: str,
1818
+ metric_type: MetricInputType,
1819
+ operator: OperatorInputType,
1820
+ value: Optional[ValueInputType],
1821
+ value_type: Optional[ValueTypeInputType],
1822
+ range: Optional[RangeInputType],
1823
+ range_type: Optional[RangeTypeInputType],
1824
+ urn: Union[str, AssertionUrn],
1825
+ display_name: Optional[str],
1826
+ enabled: Optional[bool],
1827
+ detection_mechanism: DetectionMechanismInputTypes,
1828
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
1829
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
1830
+ training_data_lookback_days: Optional[int],
1831
+ incident_behavior: Optional[
1832
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1833
+ ],
1834
+ tags: Optional[TagsInputType],
1835
+ updated_by: Optional[Union[str, CorpUserUrn]],
1836
+ now_utc: datetime,
1837
+ schedule: Optional[Union[str, models.CronScheduleClass]],
1838
+ ) -> Union[SmartColumnMetricAssertion, _SmartColumnMetricAssertionInput]:
1839
+ # 1. Retrieve any existing assertion and monitor entities:
1840
+ maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
1841
+ self._retrieve_assertion_and_monitor(assertion_input)
1842
+ )
1843
+
1844
+ # 2.1 If the assertion and monitor entities exist, create an assertion object from them:
1845
+ if maybe_assertion_entity and maybe_monitor_entity:
1846
+ existing_assertion = SmartColumnMetricAssertion._from_entities(
1847
+ maybe_assertion_entity, maybe_monitor_entity
1848
+ )
1849
+ # 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
1850
+ elif maybe_assertion_entity and not maybe_monitor_entity:
1851
+ monitor_mode = (
1852
+ "ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
1853
+ )
1854
+ existing_assertion = SmartColumnMetricAssertion._from_entities(
1855
+ maybe_assertion_entity,
1856
+ Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
1857
+ )
1858
+ # 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
1859
+ elif not maybe_assertion_entity:
1860
+ logger.info(
1861
+ f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
1862
+ )
1863
+ return self._create_smart_column_metric_assertion(
1864
+ dataset_urn=dataset_urn,
1865
+ column_name=column_name,
1866
+ metric_type=metric_type,
1867
+ operator=operator,
1868
+ value=value,
1869
+ value_type=value_type,
1870
+ range=range,
1871
+ range_type=range_type,
1872
+ schedule=schedule,
1873
+ display_name=display_name,
1874
+ detection_mechanism=detection_mechanism,
1875
+ sensitivity=sensitivity,
1876
+ exclusion_windows=exclusion_windows,
1877
+ training_data_lookback_days=training_data_lookback_days,
1878
+ incident_behavior=incident_behavior,
1879
+ tags=tags,
1880
+ created_by=updated_by,
1881
+ )
1882
+
1883
+ # 3. Check for any issues e.g. different dataset urns
1884
+ if (
1885
+ existing_assertion
1886
+ and hasattr(existing_assertion, "dataset_urn")
1887
+ and existing_assertion.dataset_urn != assertion_input.dataset_urn
1888
+ ):
1889
+ raise SDKUsageError(
1890
+ f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
1891
+ )
1892
+
1893
+ # 4. Merge the existing assertion with the validated input:
1894
+ merged_assertion_input = self._merge_smart_column_metric_input(
1895
+ dataset_urn=dataset_urn,
1896
+ column_name=column_name,
1897
+ metric_type=metric_type,
1898
+ operator=operator,
1899
+ value=value,
1900
+ value_type=value_type,
1901
+ range=range,
1902
+ range_type=range_type,
1903
+ urn=urn,
1904
+ display_name=display_name,
1905
+ enabled=enabled,
1906
+ schedule=schedule,
1907
+ detection_mechanism=detection_mechanism,
1908
+ sensitivity=sensitivity,
1909
+ exclusion_windows=exclusion_windows,
1910
+ training_data_lookback_days=training_data_lookback_days,
1911
+ incident_behavior=incident_behavior,
1912
+ tags=tags,
1913
+ now_utc=now_utc,
1914
+ assertion_input=assertion_input,
1915
+ maybe_assertion_entity=maybe_assertion_entity,
1916
+ maybe_monitor_entity=maybe_monitor_entity,
1917
+ existing_assertion=existing_assertion,
1918
+ )
1919
+
1920
+ return merged_assertion_input
1921
+
1922
+ def _merge_smart_column_metric_input(
1923
+ self,
1924
+ dataset_urn: Union[str, DatasetUrn],
1925
+ column_name: str,
1926
+ metric_type: MetricInputType,
1927
+ operator: OperatorInputType,
1928
+ value: Optional[ValueInputType],
1929
+ value_type: Optional[ValueTypeInputType],
1930
+ range: Optional[RangeInputType],
1931
+ range_type: Optional[RangeTypeInputType],
1932
+ urn: Union[str, AssertionUrn],
1933
+ display_name: Optional[str],
1934
+ enabled: Optional[bool],
1935
+ detection_mechanism: DetectionMechanismInputTypes,
1936
+ sensitivity: Optional[Union[str, InferenceSensitivity]],
1937
+ exclusion_windows: Optional[ExclusionWindowInputTypes],
1938
+ training_data_lookback_days: Optional[int],
1939
+ incident_behavior: Optional[
1940
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
1941
+ ],
1942
+ tags: Optional[TagsInputType],
1943
+ schedule: Optional[Union[str, models.CronScheduleClass]],
1944
+ now_utc: datetime,
1945
+ assertion_input: _SmartColumnMetricAssertionInput,
1946
+ maybe_assertion_entity: Optional[Assertion],
1947
+ maybe_monitor_entity: Optional[Monitor],
1948
+ existing_assertion: SmartColumnMetricAssertion,
1949
+ ) -> _SmartColumnMetricAssertionInput:
1950
+ """Merge the input with the existing assertion and monitor entities.
1951
+
1952
+ Args:
1953
+ dataset_urn: The urn of the dataset to be monitored.
1954
+ column_name: The name of the column to be monitored.
1955
+ metric_type: The type of the metric to be monitored.
1956
+ operator: The operator to be used for the assertion.
1957
+ value: The value to be used for the assertion.
1958
+ value_type: The type of the value to be used for the assertion.
1959
+ range: The range to be used for the assertion.
1960
+ range_type: The type of the range to be used for the assertion.
1961
+ urn: The urn of the assertion.
1962
+ display_name: The display name of the assertion.
1963
+ enabled: Whether the assertion is enabled.
1964
+ detection_mechanism: The detection mechanism to be used for the assertion.
1965
+ sensitivity: The sensitivity to be applied to the assertion.
1966
+ exclusion_windows: The exclusion windows to be applied to the assertion.
1967
+ training_data_lookback_days: The training data lookback days to be applied to the assertion.
1968
+ incident_behavior: The incident behavior to be applied to the assertion.
1969
+ tags: The tags to be applied to the assertion.
1970
+ now_utc: The current UTC time from when the function is called.
1971
+ assertion_input: The validated input to the function.
1972
+ maybe_assertion_entity: The existing assertion entity from the DataHub instance.
1973
+ maybe_monitor_entity: The existing monitor entity from the DataHub instance.
1974
+ existing_assertion: The existing assertion from the DataHub instance.
1975
+
1976
+ Returns:
1977
+ The merged assertion input.
1978
+ """
1979
+ merged_assertion_input = _SmartColumnMetricAssertionInput(
1980
+ urn=urn,
1981
+ entity_client=self.client.entities,
1982
+ dataset_urn=dataset_urn,
1983
+ column_name=_merge_field(
1984
+ input_field_value=column_name,
1985
+ input_field_name="column_name",
1986
+ validated_assertion_input=assertion_input,
1987
+ validated_existing_assertion=existing_assertion,
1988
+ existing_entity_value=SmartColumnMetricAssertion._get_column_name(
1989
+ maybe_assertion_entity
1990
+ )
1991
+ if maybe_assertion_entity
1992
+ else None,
1993
+ ),
1994
+ metric_type=_merge_field(
1995
+ input_field_value=metric_type,
1996
+ input_field_name="metric_type",
1997
+ validated_assertion_input=assertion_input,
1998
+ validated_existing_assertion=existing_assertion,
1999
+ existing_entity_value=SmartColumnMetricAssertion._get_metric_type(
2000
+ maybe_assertion_entity
2001
+ )
2002
+ if maybe_assertion_entity
2003
+ else None,
2004
+ ),
2005
+ operator=_merge_field(
2006
+ input_field_value=operator,
2007
+ input_field_name="operator",
2008
+ validated_assertion_input=assertion_input,
2009
+ validated_existing_assertion=existing_assertion,
2010
+ existing_entity_value=SmartColumnMetricAssertion._get_operator(
2011
+ maybe_assertion_entity
2012
+ )
2013
+ if maybe_assertion_entity
2014
+ else None,
2015
+ ),
2016
+ value=_merge_field(
2017
+ input_field_value=value,
2018
+ input_field_name="value",
2019
+ validated_assertion_input=assertion_input,
2020
+ validated_existing_assertion=existing_assertion,
2021
+ existing_entity_value=SmartColumnMetricAssertion._get_value(
2022
+ maybe_assertion_entity
2023
+ )
2024
+ if maybe_assertion_entity
2025
+ else None,
2026
+ ),
2027
+ value_type=_merge_field(
2028
+ input_field_value=value_type,
2029
+ input_field_name="value_type",
2030
+ validated_assertion_input=assertion_input,
2031
+ validated_existing_assertion=existing_assertion,
2032
+ existing_entity_value=SmartColumnMetricAssertion._get_value_type(
2033
+ maybe_assertion_entity
2034
+ )
2035
+ if maybe_assertion_entity
2036
+ else None,
2037
+ ),
2038
+ range=_merge_field(
2039
+ input_field_value=range,
2040
+ input_field_name="range",
2041
+ validated_assertion_input=assertion_input,
2042
+ validated_existing_assertion=existing_assertion,
2043
+ existing_entity_value=SmartColumnMetricAssertion._get_range(
2044
+ maybe_assertion_entity
2045
+ )
2046
+ if maybe_assertion_entity
2047
+ else None,
2048
+ ),
2049
+ range_type=_merge_field(
2050
+ input_field_value=range_type,
2051
+ input_field_name="range_type",
2052
+ validated_assertion_input=assertion_input,
2053
+ validated_existing_assertion=existing_assertion,
2054
+ existing_entity_value=SmartColumnMetricAssertion._get_range_type(
2055
+ maybe_assertion_entity
2056
+ )
2057
+ if maybe_assertion_entity
2058
+ else None,
2059
+ ),
2060
+ display_name=_merge_field(
2061
+ input_field_value=display_name,
2062
+ input_field_name="display_name",
2063
+ validated_assertion_input=assertion_input,
2064
+ validated_existing_assertion=existing_assertion,
2065
+ existing_entity_value=maybe_assertion_entity.description
2066
+ if maybe_assertion_entity
2067
+ else None,
2068
+ ),
2069
+ enabled=_merge_field(
2070
+ input_field_value=enabled,
2071
+ input_field_name="enabled",
2072
+ validated_assertion_input=assertion_input,
2073
+ validated_existing_assertion=existing_assertion,
2074
+ existing_entity_value=existing_assertion.mode == AssertionMode.ACTIVE
2075
+ if existing_assertion
2076
+ else None,
2077
+ ),
2078
+ schedule=_merge_field(
2079
+ input_field_value=schedule,
2080
+ input_field_name="schedule",
2081
+ validated_assertion_input=assertion_input,
2082
+ validated_existing_assertion=existing_assertion,
2083
+ existing_entity_value=existing_assertion.schedule
2084
+ if existing_assertion
2085
+ else None,
2086
+ ),
2087
+ detection_mechanism=_merge_field(
2088
+ input_field_value=detection_mechanism,
2089
+ input_field_name="detection_mechanism",
2090
+ validated_assertion_input=assertion_input,
2091
+ validated_existing_assertion=existing_assertion,
2092
+ existing_entity_value=SmartColumnMetricAssertion._get_detection_mechanism(
2093
+ maybe_assertion_entity, maybe_monitor_entity, default=None
2094
+ )
2095
+ if maybe_assertion_entity and maybe_monitor_entity
2096
+ else None,
2097
+ ),
2098
+ sensitivity=_merge_field(
2099
+ input_field_value=sensitivity,
2100
+ input_field_name="sensitivity",
2101
+ validated_assertion_input=assertion_input,
2102
+ validated_existing_assertion=existing_assertion,
2103
+ existing_entity_value=maybe_monitor_entity.sensitivity
2104
+ if maybe_monitor_entity
2105
+ else None,
2106
+ ),
2107
+ exclusion_windows=_merge_field(
2108
+ input_field_value=exclusion_windows,
2109
+ input_field_name="exclusion_windows",
2110
+ validated_assertion_input=assertion_input,
2111
+ validated_existing_assertion=existing_assertion,
2112
+ existing_entity_value=maybe_monitor_entity.exclusion_windows
2113
+ if maybe_monitor_entity
2114
+ else None,
2115
+ ),
2116
+ training_data_lookback_days=_merge_field(
2117
+ input_field_value=training_data_lookback_days,
2118
+ input_field_name="training_data_lookback_days",
2119
+ validated_assertion_input=assertion_input,
2120
+ validated_existing_assertion=existing_assertion,
2121
+ existing_entity_value=maybe_monitor_entity.training_data_lookback_days
2122
+ if maybe_monitor_entity
2123
+ else None,
2124
+ ),
2125
+ incident_behavior=_merge_field(
2126
+ input_field_value=incident_behavior,
2127
+ input_field_name="incident_behavior",
2128
+ validated_assertion_input=assertion_input,
2129
+ validated_existing_assertion=existing_assertion,
2130
+ existing_entity_value=SmartColumnMetricAssertion._get_incident_behavior(
2131
+ maybe_assertion_entity
2132
+ )
2133
+ if maybe_assertion_entity
2134
+ else None,
2135
+ ),
2136
+ tags=_merge_field(
2137
+ input_field_value=tags,
2138
+ input_field_name="tags",
2139
+ validated_assertion_input=assertion_input,
2140
+ validated_existing_assertion=existing_assertion,
2141
+ existing_entity_value=maybe_assertion_entity.tags
2142
+ if maybe_assertion_entity
2143
+ else None,
2144
+ ),
2145
+ created_by=existing_assertion.created_by
2146
+ or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
2147
+ created_at=existing_assertion.created_at
2148
+ or now_utc, # Override with the existing assertion's created_at or now if not set
2149
+ updated_by=assertion_input.updated_by, # Override with the input's updated_by
2150
+ updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
2151
+ )
2152
+
2153
+ return merged_assertion_input
2154
+
2155
+ def sync_freshness_assertion(
2156
+ self,
2157
+ *,
2158
+ dataset_urn: Union[str, DatasetUrn],
2159
+ urn: Optional[Union[str, AssertionUrn]] = None,
2160
+ display_name: Optional[str] = None,
2161
+ enabled: Optional[bool] = None,
2162
+ detection_mechanism: DetectionMechanismInputTypes = None,
2163
+ incident_behavior: Optional[
2164
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
2165
+ ] = None,
2166
+ tags: Optional[TagsInputType] = None,
2167
+ updated_by: Optional[Union[str, CorpUserUrn]] = None,
2168
+ freshness_schedule_check_type: Optional[
2169
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
2170
+ ] = None,
2171
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
2172
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
2173
+ ) -> FreshnessAssertion:
2174
+ """Upsert and merge a freshness assertion.
2175
+
2176
+ Note: keyword arguments are required.
2177
+
2178
+ Upsert and merge is a combination of create and update. If the assertion does not exist,
2179
+ it will be created. If it does exist, it will be updated. Existing assertion fields will
2180
+ be updated if the input value is not None. If the input value is None, the existing value
2181
+ will be preserved. If the input value can be un-set e.g. by passing an empty list or
2182
+ empty string.
2183
+
2184
+ Schedule behavior:
2185
+ - Create case: Uses default daily schedule (\"0 0 * * *\") or provided schedule
2186
+ - Update case: Uses existing schedule or provided schedule.
2187
+
2188
+ Args:
2189
+ dataset_urn: The urn of the dataset to be monitored.
2190
+ urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
2191
+ will be _created_ in the DataHub instance.
2192
+ display_name: The display name of the assertion. If not provided, a random display name
2193
+ will be generated.
2194
+ enabled: Whether the assertion is enabled. If not provided, the existing value
2195
+ will be preserved.
2196
+ detection_mechanism: The detection mechanism to be used for the assertion. Information
2197
+ schema is recommended. Valid values are:
2198
+ - "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
2199
+ - "audit_log" or DetectionMechanism.AUDIT_LOG
2200
+ - {
2201
+ "type": "last_modified_column",
2202
+ "column_name": "last_modified",
2203
+ "additional_filter": "last_modified > '2021-01-01'",
2204
+ } or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
2205
+ additional_filter='last_modified > 2021-01-01')
2206
+ - {
2207
+ "type": "high_watermark_column",
2208
+ "column_name": "id",
2209
+ "additional_filter": "id > 1000",
2210
+ } or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
2211
+ additional_filter='id > 1000')
2212
+ - "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
2213
+ incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
2214
+ - "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
2215
+ - "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
2216
+ tags: The tags to be applied to the assertion. Valid values are:
2217
+ - a list of strings (strings will be converted to TagUrn objects)
2218
+ - a list of TagUrn objects
2219
+ - a list of TagAssociationClass objects
2220
+ updated_by: Optional urn of the user who updated the assertion. The format is
2221
+ "urn:li:corpuser:<username>", which you can find on the Users & Groups page.
2222
+ The default is the datahub system user.
2223
+ TODO: Retrieve the SDK user as the default instead of the datahub system user.
2224
+ schedule: Optional cron formatted schedule for the assertion. If not provided, a default
2225
+ schedule will be used. The schedule determines when the assertion will be evaluated.
2226
+ The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
2227
+ Alternatively, a models.CronScheduleClass object can be provided with string parameters
2228
+ cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
2229
+
2230
+ Returns:
2231
+ FreshnessAssertion: The created or updated assertion.
2232
+ """
2233
+ _print_experimental_warning()
2234
+ now_utc = datetime.now(timezone.utc)
2235
+
2236
+ if updated_by is None:
2237
+ logger.warning(
2238
+ f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
2239
+ )
2240
+ updated_by = DEFAULT_CREATED_BY
2241
+
2242
+ # 1. If urn is not set, create a new assertion
2243
+ if urn is None:
2244
+ logger.info("URN is not set, creating a new assertion")
2245
+ return self._create_freshness_assertion(
2246
+ dataset_urn=dataset_urn,
2247
+ display_name=display_name,
2248
+ enabled=enabled if enabled is not None else True,
2249
+ detection_mechanism=detection_mechanism,
2250
+ incident_behavior=incident_behavior,
2251
+ tags=tags,
2252
+ created_by=updated_by,
2253
+ schedule=schedule,
2254
+ freshness_schedule_check_type=freshness_schedule_check_type,
2255
+ lookback_window=lookback_window,
2256
+ )
2257
+
2258
+ # 2. If urn is set, first validate the input:
2259
+ assertion_input = _FreshnessAssertionInput(
2260
+ urn=urn,
2261
+ entity_client=self.client.entities,
2262
+ dataset_urn=dataset_urn,
2263
+ display_name=display_name,
2264
+ detection_mechanism=detection_mechanism,
2265
+ incident_behavior=incident_behavior,
2266
+ tags=tags,
2267
+ created_by=updated_by, # This will be overridden by the actual created_by
2268
+ created_at=now_utc, # This will be overridden by the actual created_at
2269
+ updated_by=updated_by,
2270
+ updated_at=now_utc,
2271
+ schedule=schedule,
2272
+ freshness_schedule_check_type=freshness_schedule_check_type,
2273
+ lookback_window=lookback_window,
2274
+ )
2275
+
2276
+ # 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
2277
+ # if the assertion does not exist:
2278
+ merged_assertion_input_or_created_assertion = (
2279
+ self._retrieve_and_merge_freshness_assertion_and_monitor(
2280
+ assertion_input=assertion_input,
2281
+ dataset_urn=dataset_urn,
2282
+ urn=urn,
2283
+ display_name=display_name,
2284
+ enabled=enabled,
2285
+ detection_mechanism=detection_mechanism,
2286
+ incident_behavior=incident_behavior,
2287
+ tags=tags,
2288
+ updated_by=updated_by,
2289
+ now_utc=now_utc,
2290
+ schedule=schedule,
2291
+ freshness_schedule_check_type=freshness_schedule_check_type,
2292
+ lookback_window=lookback_window,
2293
+ )
2294
+ )
2295
+
2296
+ # Return early if we created a new assertion in the merge:
2297
+ if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
2298
+ # We know this is the correct type because we passed the assertion_class parameter
2299
+ assert isinstance(
2300
+ merged_assertion_input_or_created_assertion, FreshnessAssertion
2301
+ )
2302
+ return merged_assertion_input_or_created_assertion
2303
+
2304
+ # 4. Upsert the assertion and monitor entities:
2305
+ assertion_entity, monitor_entity = (
2306
+ merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
2307
+ )
2308
+ # If assertion upsert fails, we won't try to upsert the monitor
2309
+ self.client.entities.upsert(assertion_entity)
2310
+ # TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
2311
+ # try:
2312
+ self.client.entities.upsert(monitor_entity)
2313
+ # except Exception as e:
2314
+ # logger.error(f"Error upserting monitor: {e}")
2315
+ # self.client.entities.delete(assertion_entity)
2316
+ # raise e
2317
+
2318
+ return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
2319
+
1109
2320
 
1110
2321
  def _merge_field(
1111
2322
  input_field_value: Any,