acryl-datahub-cloud 0.3.12rc5__py3-none-any.whl → 0.3.12rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/get_feature_flag.gql +7 -0
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1934 -1934
- acryl_datahub_cloud/metadata/schema.avsc +23968 -23968
- acryl_datahub_cloud/metadata/schema_classes.py +658 -658
- acryl_datahub_cloud/sdk/__init__.py +6 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +327 -27
- acryl_datahub_cloud/sdk/assertion/types.py +2 -0
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +36 -11
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +1 -2
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +4 -31
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +274 -0
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +630 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1114 -137
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/METADATA +43 -43
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/RECORD +19 -16
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc5.dist-info → acryl_datahub_cloud-0.3.12rc7.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,8 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
|
9
9
|
FreshnessAssertion,
|
|
10
10
|
SmartFreshnessAssertion,
|
|
11
11
|
SmartVolumeAssertion,
|
|
12
|
+
SqlAssertion,
|
|
13
|
+
VolumeAssertion,
|
|
12
14
|
_AssertionPublic,
|
|
13
15
|
)
|
|
14
16
|
from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
@@ -36,6 +38,18 @@ from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input
|
|
|
36
38
|
ValueTypeInputType,
|
|
37
39
|
_SmartColumnMetricAssertionInput,
|
|
38
40
|
)
|
|
41
|
+
from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
|
|
42
|
+
SqlAssertionCriteria,
|
|
43
|
+
_SqlAssertionInput,
|
|
44
|
+
)
|
|
45
|
+
from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
|
|
46
|
+
RowCountTotal,
|
|
47
|
+
VolumeAssertionDefinition,
|
|
48
|
+
VolumeAssertionDefinitionInputTypes,
|
|
49
|
+
VolumeAssertionOperator,
|
|
50
|
+
_VolumeAssertionDefinitionTypes,
|
|
51
|
+
_VolumeAssertionInput,
|
|
52
|
+
)
|
|
39
53
|
from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
|
|
40
54
|
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
41
55
|
from acryl_datahub_cloud.sdk.errors import SDKUsageError
|
|
@@ -489,6 +503,194 @@ class AssertionsClient:
|
|
|
489
503
|
|
|
490
504
|
return merged_assertion_input
|
|
491
505
|
|
|
506
|
+
def _retrieve_and_merge_native_volume_assertion_and_monitor(
|
|
507
|
+
self,
|
|
508
|
+
assertion_input: _VolumeAssertionInput,
|
|
509
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
510
|
+
urn: Union[str, AssertionUrn],
|
|
511
|
+
display_name: Optional[str],
|
|
512
|
+
enabled: Optional[bool],
|
|
513
|
+
detection_mechanism: DetectionMechanismInputTypes,
|
|
514
|
+
incident_behavior: Optional[
|
|
515
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
516
|
+
],
|
|
517
|
+
tags: Optional[TagsInputType],
|
|
518
|
+
updated_by: Optional[Union[str, CorpUserUrn]],
|
|
519
|
+
now_utc: datetime,
|
|
520
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
521
|
+
definition: Optional[VolumeAssertionDefinitionInputTypes],
|
|
522
|
+
use_backend_definition: bool = False,
|
|
523
|
+
) -> Union[VolumeAssertion, _VolumeAssertionInput]:
|
|
524
|
+
# 1. Retrieve any existing assertion and monitor entities:
|
|
525
|
+
maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
|
|
526
|
+
self._retrieve_assertion_and_monitor(assertion_input)
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
# 2.1 If the assertion and monitor entities exist, create an assertion object from them:
|
|
530
|
+
if maybe_assertion_entity and maybe_monitor_entity:
|
|
531
|
+
existing_assertion = VolumeAssertion._from_entities(
|
|
532
|
+
maybe_assertion_entity, maybe_monitor_entity
|
|
533
|
+
)
|
|
534
|
+
# 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
|
|
535
|
+
elif maybe_assertion_entity and not maybe_monitor_entity:
|
|
536
|
+
monitor_mode = (
|
|
537
|
+
"ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
|
|
538
|
+
)
|
|
539
|
+
existing_assertion = VolumeAssertion._from_entities(
|
|
540
|
+
maybe_assertion_entity,
|
|
541
|
+
Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
|
|
542
|
+
)
|
|
543
|
+
# 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
|
|
544
|
+
elif not maybe_assertion_entity:
|
|
545
|
+
if use_backend_definition:
|
|
546
|
+
raise SDKUsageError(
|
|
547
|
+
f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
|
|
548
|
+
)
|
|
549
|
+
logger.info(
|
|
550
|
+
f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
|
|
551
|
+
)
|
|
552
|
+
return self._create_volume_assertion(
|
|
553
|
+
dataset_urn=dataset_urn,
|
|
554
|
+
display_name=display_name,
|
|
555
|
+
detection_mechanism=detection_mechanism,
|
|
556
|
+
incident_behavior=incident_behavior,
|
|
557
|
+
tags=tags,
|
|
558
|
+
created_by=updated_by,
|
|
559
|
+
schedule=schedule,
|
|
560
|
+
definition=definition,
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
# 3. Check for any issues e.g. different dataset urns
|
|
564
|
+
if (
|
|
565
|
+
existing_assertion
|
|
566
|
+
and hasattr(existing_assertion, "dataset_urn")
|
|
567
|
+
and existing_assertion.dataset_urn != assertion_input.dataset_urn
|
|
568
|
+
):
|
|
569
|
+
raise SDKUsageError(
|
|
570
|
+
f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
# 4. Handle definition: use backend definition if flag is set and backend has one
|
|
574
|
+
if use_backend_definition:
|
|
575
|
+
if maybe_assertion_entity is not None:
|
|
576
|
+
# Use definition from backend
|
|
577
|
+
backend_definition = VolumeAssertionDefinition.from_assertion(
|
|
578
|
+
maybe_assertion_entity
|
|
579
|
+
)
|
|
580
|
+
# Update the assertion_input with the real definition from backend
|
|
581
|
+
assertion_input.definition = backend_definition
|
|
582
|
+
effective_definition = backend_definition
|
|
583
|
+
logger.info("Using definition from backend assertion")
|
|
584
|
+
else:
|
|
585
|
+
# No backend assertion and no user-provided definition - this is an error
|
|
586
|
+
raise SDKUsageError(
|
|
587
|
+
f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
|
|
588
|
+
)
|
|
589
|
+
else:
|
|
590
|
+
# Use the already-parsed definition from assertion_input
|
|
591
|
+
effective_definition = assertion_input.definition
|
|
592
|
+
|
|
593
|
+
# 5. Merge the existing assertion with the validated input:
|
|
594
|
+
merged_assertion_input = self._merge_volume_input(
|
|
595
|
+
dataset_urn=dataset_urn,
|
|
596
|
+
urn=urn,
|
|
597
|
+
display_name=display_name,
|
|
598
|
+
enabled=enabled,
|
|
599
|
+
detection_mechanism=detection_mechanism,
|
|
600
|
+
incident_behavior=incident_behavior,
|
|
601
|
+
tags=tags,
|
|
602
|
+
now_utc=now_utc,
|
|
603
|
+
assertion_input=assertion_input,
|
|
604
|
+
maybe_assertion_entity=maybe_assertion_entity,
|
|
605
|
+
maybe_monitor_entity=maybe_monitor_entity,
|
|
606
|
+
existing_assertion=existing_assertion,
|
|
607
|
+
schedule=schedule,
|
|
608
|
+
definition=effective_definition,
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
return merged_assertion_input
|
|
612
|
+
|
|
613
|
+
def _retrieve_and_merge_sql_assertion_and_monitor(
|
|
614
|
+
self,
|
|
615
|
+
assertion_input: _SqlAssertionInput,
|
|
616
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
617
|
+
urn: Union[str, AssertionUrn],
|
|
618
|
+
display_name: Optional[str],
|
|
619
|
+
enabled: Optional[bool],
|
|
620
|
+
criteria: SqlAssertionCriteria,
|
|
621
|
+
statement: str,
|
|
622
|
+
incident_behavior: Optional[
|
|
623
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
624
|
+
],
|
|
625
|
+
tags: Optional[TagsInputType],
|
|
626
|
+
updated_by: Optional[Union[str, CorpUserUrn]],
|
|
627
|
+
now_utc: datetime,
|
|
628
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
629
|
+
) -> Union[SqlAssertion, _SqlAssertionInput]:
|
|
630
|
+
# 1. Retrieve any existing assertion and monitor entities:
|
|
631
|
+
maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
|
|
632
|
+
self._retrieve_assertion_and_monitor(assertion_input)
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
# 2.1 If the assertion and monitor entities exist, create an assertion object from them:
|
|
636
|
+
if maybe_assertion_entity and maybe_monitor_entity:
|
|
637
|
+
existing_assertion = SqlAssertion._from_entities(
|
|
638
|
+
maybe_assertion_entity, maybe_monitor_entity
|
|
639
|
+
)
|
|
640
|
+
# 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
|
|
641
|
+
elif maybe_assertion_entity and not maybe_monitor_entity:
|
|
642
|
+
monitor_mode = (
|
|
643
|
+
"ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
|
|
644
|
+
)
|
|
645
|
+
existing_assertion = SqlAssertion._from_entities(
|
|
646
|
+
maybe_assertion_entity,
|
|
647
|
+
Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
|
|
648
|
+
)
|
|
649
|
+
# 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
|
|
650
|
+
elif not maybe_assertion_entity:
|
|
651
|
+
logger.info(
|
|
652
|
+
f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
|
|
653
|
+
)
|
|
654
|
+
return self._create_sql_assertion(
|
|
655
|
+
dataset_urn=dataset_urn,
|
|
656
|
+
display_name=display_name,
|
|
657
|
+
criteria=criteria,
|
|
658
|
+
statement=statement,
|
|
659
|
+
incident_behavior=incident_behavior,
|
|
660
|
+
tags=tags,
|
|
661
|
+
created_by=updated_by,
|
|
662
|
+
schedule=schedule,
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
# 3. Check for any issues e.g. different dataset urns
|
|
666
|
+
if (
|
|
667
|
+
existing_assertion
|
|
668
|
+
and hasattr(existing_assertion, "dataset_urn")
|
|
669
|
+
and existing_assertion.dataset_urn != assertion_input.dataset_urn
|
|
670
|
+
):
|
|
671
|
+
raise SDKUsageError(
|
|
672
|
+
f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
# 4. Merge the existing assertion with the validated input:
|
|
676
|
+
merged_assertion_input = self._merge_sql_input(
|
|
677
|
+
dataset_urn=dataset_urn,
|
|
678
|
+
urn=urn,
|
|
679
|
+
display_name=display_name,
|
|
680
|
+
enabled=enabled,
|
|
681
|
+
incident_behavior=incident_behavior,
|
|
682
|
+
tags=tags,
|
|
683
|
+
now_utc=now_utc,
|
|
684
|
+
assertion_input=assertion_input,
|
|
685
|
+
maybe_assertion_entity=maybe_assertion_entity,
|
|
686
|
+
existing_assertion=existing_assertion,
|
|
687
|
+
schedule=schedule,
|
|
688
|
+
criteria=criteria,
|
|
689
|
+
statement=statement,
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
return merged_assertion_input
|
|
693
|
+
|
|
492
694
|
def _retrieve_assertion_and_monitor(
|
|
493
695
|
self,
|
|
494
696
|
assertion_input: _AssertionInput,
|
|
@@ -781,27 +983,25 @@ class AssertionsClient:
|
|
|
781
983
|
)
|
|
782
984
|
return merged_assertion_input
|
|
783
985
|
|
|
784
|
-
def
|
|
986
|
+
def _merge_volume_input(
|
|
785
987
|
self,
|
|
786
988
|
dataset_urn: Union[str, DatasetUrn],
|
|
787
989
|
urn: Union[str, AssertionUrn],
|
|
788
990
|
display_name: Optional[str],
|
|
789
991
|
enabled: Optional[bool],
|
|
790
992
|
detection_mechanism: DetectionMechanismInputTypes,
|
|
791
|
-
sensitivity: Optional[Union[str, InferenceSensitivity]],
|
|
792
|
-
exclusion_windows: Optional[ExclusionWindowInputTypes],
|
|
793
|
-
training_data_lookback_days: Optional[int],
|
|
794
993
|
incident_behavior: Optional[
|
|
795
994
|
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
796
995
|
],
|
|
797
996
|
tags: Optional[TagsInputType],
|
|
798
|
-
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
799
997
|
now_utc: datetime,
|
|
800
|
-
assertion_input:
|
|
998
|
+
assertion_input: _VolumeAssertionInput,
|
|
801
999
|
maybe_assertion_entity: Optional[Assertion],
|
|
802
1000
|
maybe_monitor_entity: Optional[Monitor],
|
|
803
|
-
existing_assertion:
|
|
804
|
-
|
|
1001
|
+
existing_assertion: VolumeAssertion,
|
|
1002
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
1003
|
+
definition: Optional[_VolumeAssertionDefinitionTypes],
|
|
1004
|
+
) -> _VolumeAssertionInput:
|
|
805
1005
|
"""Merge the input with the existing assertion and monitor entities.
|
|
806
1006
|
|
|
807
1007
|
Args:
|
|
@@ -810,9 +1010,6 @@ class AssertionsClient:
|
|
|
810
1010
|
display_name: The display name of the assertion.
|
|
811
1011
|
enabled: Whether the assertion is enabled.
|
|
812
1012
|
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
813
|
-
sensitivity: The sensitivity to be applied to the assertion.
|
|
814
|
-
exclusion_windows: The exclusion windows to be applied to the assertion.
|
|
815
|
-
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
816
1013
|
incident_behavior: The incident behavior to be applied to the assertion.
|
|
817
1014
|
tags: The tags to be applied to the assertion.
|
|
818
1015
|
now_utc: The current UTC time from when the function is called.
|
|
@@ -820,11 +1017,13 @@ class AssertionsClient:
|
|
|
820
1017
|
maybe_assertion_entity: The existing assertion entity from the DataHub instance.
|
|
821
1018
|
maybe_monitor_entity: The existing monitor entity from the DataHub instance.
|
|
822
1019
|
existing_assertion: The existing assertion from the DataHub instance.
|
|
1020
|
+
schedule: The schedule to be applied to the assertion.
|
|
1021
|
+
definition: The volume assertion definition to be applied to the assertion.
|
|
823
1022
|
|
|
824
1023
|
Returns:
|
|
825
1024
|
The merged assertion input.
|
|
826
1025
|
"""
|
|
827
|
-
merged_assertion_input =
|
|
1026
|
+
merged_assertion_input = _VolumeAssertionInput(
|
|
828
1027
|
urn=urn,
|
|
829
1028
|
entity_client=self.client.entities,
|
|
830
1029
|
dataset_urn=dataset_urn,
|
|
@@ -856,43 +1055,262 @@ class AssertionsClient:
|
|
|
856
1055
|
"detection_mechanism",
|
|
857
1056
|
assertion_input,
|
|
858
1057
|
existing_assertion,
|
|
859
|
-
|
|
1058
|
+
VolumeAssertion._get_detection_mechanism(
|
|
860
1059
|
maybe_assertion_entity, maybe_monitor_entity, default=None
|
|
861
1060
|
)
|
|
862
1061
|
if maybe_assertion_entity and maybe_monitor_entity
|
|
863
1062
|
else None,
|
|
864
1063
|
),
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
"
|
|
1064
|
+
incident_behavior=_merge_field(
|
|
1065
|
+
incident_behavior,
|
|
1066
|
+
"incident_behavior",
|
|
868
1067
|
assertion_input,
|
|
869
1068
|
existing_assertion,
|
|
870
|
-
|
|
1069
|
+
VolumeAssertion._get_incident_behavior(maybe_assertion_entity)
|
|
1070
|
+
if maybe_assertion_entity
|
|
1071
|
+
else None,
|
|
871
1072
|
),
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
"
|
|
1073
|
+
tags=_merge_field(
|
|
1074
|
+
tags,
|
|
1075
|
+
"tags",
|
|
875
1076
|
assertion_input,
|
|
876
1077
|
existing_assertion,
|
|
877
|
-
|
|
878
|
-
if maybe_monitor_entity
|
|
879
|
-
else None,
|
|
1078
|
+
maybe_assertion_entity.tags if maybe_assertion_entity else None,
|
|
880
1079
|
),
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
"
|
|
1080
|
+
definition=_merge_field(
|
|
1081
|
+
definition,
|
|
1082
|
+
"definition",
|
|
884
1083
|
assertion_input,
|
|
885
1084
|
existing_assertion,
|
|
886
|
-
|
|
887
|
-
|
|
1085
|
+
existing_assertion.definition if existing_assertion else None,
|
|
1086
|
+
),
|
|
1087
|
+
created_by=existing_assertion.created_by
|
|
1088
|
+
or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
|
|
1089
|
+
created_at=existing_assertion.created_at
|
|
1090
|
+
or now_utc, # Override with the existing assertion's created_at or now if not set
|
|
1091
|
+
updated_by=assertion_input.updated_by, # Override with the input's updated_by
|
|
1092
|
+
updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
|
|
1093
|
+
)
|
|
1094
|
+
return merged_assertion_input
|
|
1095
|
+
|
|
1096
|
+
def _merge_sql_input(
|
|
1097
|
+
self,
|
|
1098
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
1099
|
+
urn: Union[str, AssertionUrn],
|
|
1100
|
+
display_name: Optional[str],
|
|
1101
|
+
enabled: Optional[bool],
|
|
1102
|
+
criteria: SqlAssertionCriteria,
|
|
1103
|
+
statement: str,
|
|
1104
|
+
incident_behavior: Optional[
|
|
1105
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
1106
|
+
],
|
|
1107
|
+
tags: Optional[TagsInputType],
|
|
1108
|
+
now_utc: datetime,
|
|
1109
|
+
assertion_input: _SqlAssertionInput,
|
|
1110
|
+
maybe_assertion_entity: Optional[Assertion],
|
|
1111
|
+
# not used: maybe_monitor_entity: Optional[Monitor], as schedule is already set in existing_assertion
|
|
1112
|
+
existing_assertion: SqlAssertion,
|
|
1113
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
1114
|
+
) -> _SqlAssertionInput:
|
|
1115
|
+
"""Merge the input with the existing assertion and monitor entities.
|
|
1116
|
+
|
|
1117
|
+
Args:
|
|
1118
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
1119
|
+
urn: The urn of the assertion.
|
|
1120
|
+
display_name: The display name of the assertion.
|
|
1121
|
+
enabled: Whether the assertion is enabled.
|
|
1122
|
+
criteria: The criteria of the assertion.
|
|
1123
|
+
statement: The statement of the assertion.
|
|
1124
|
+
incident_behavior: The incident behavior to be applied to the assertion.
|
|
1125
|
+
tags: The tags to be applied to the assertion.
|
|
1126
|
+
now_utc: The current UTC time from when the function is called.
|
|
1127
|
+
assertion_input: The validated input to the function.
|
|
1128
|
+
maybe_assertion_entity: The existing assertion entity from the DataHub instance.
|
|
1129
|
+
existing_assertion: The existing assertion from the DataHub instance.
|
|
1130
|
+
schedule: The schedule to be applied to the assertion.
|
|
1131
|
+
|
|
1132
|
+
Returns:
|
|
1133
|
+
The merged assertion input.
|
|
1134
|
+
"""
|
|
1135
|
+
merged_assertion_input = _SqlAssertionInput(
|
|
1136
|
+
urn=urn,
|
|
1137
|
+
entity_client=self.client.entities,
|
|
1138
|
+
dataset_urn=dataset_urn,
|
|
1139
|
+
display_name=_merge_field(
|
|
1140
|
+
display_name,
|
|
1141
|
+
"display_name",
|
|
1142
|
+
assertion_input,
|
|
1143
|
+
existing_assertion,
|
|
1144
|
+
maybe_assertion_entity.description if maybe_assertion_entity else None,
|
|
1145
|
+
),
|
|
1146
|
+
enabled=_merge_field(
|
|
1147
|
+
enabled,
|
|
1148
|
+
"enabled",
|
|
1149
|
+
assertion_input,
|
|
1150
|
+
existing_assertion,
|
|
1151
|
+
existing_assertion.mode == AssertionMode.ACTIVE
|
|
1152
|
+
if existing_assertion
|
|
888
1153
|
else None,
|
|
889
1154
|
),
|
|
1155
|
+
schedule=_merge_field(
|
|
1156
|
+
schedule,
|
|
1157
|
+
"schedule",
|
|
1158
|
+
assertion_input,
|
|
1159
|
+
existing_assertion,
|
|
1160
|
+
# TODO should this use maybe_monitor_entity.schedule?
|
|
1161
|
+
existing_assertion.schedule if existing_assertion else None,
|
|
1162
|
+
),
|
|
1163
|
+
criteria=_merge_field(
|
|
1164
|
+
criteria,
|
|
1165
|
+
"criteria",
|
|
1166
|
+
assertion_input,
|
|
1167
|
+
existing_assertion,
|
|
1168
|
+
existing_assertion.criteria if existing_assertion else None,
|
|
1169
|
+
),
|
|
1170
|
+
statement=_merge_field(
|
|
1171
|
+
statement,
|
|
1172
|
+
"statement",
|
|
1173
|
+
assertion_input,
|
|
1174
|
+
existing_assertion,
|
|
1175
|
+
existing_assertion.statement if existing_assertion else None,
|
|
1176
|
+
),
|
|
890
1177
|
incident_behavior=_merge_field(
|
|
891
1178
|
incident_behavior,
|
|
892
1179
|
"incident_behavior",
|
|
893
1180
|
assertion_input,
|
|
894
1181
|
existing_assertion,
|
|
895
|
-
|
|
1182
|
+
SqlAssertion._get_incident_behavior(maybe_assertion_entity)
|
|
1183
|
+
if maybe_assertion_entity
|
|
1184
|
+
else None,
|
|
1185
|
+
),
|
|
1186
|
+
tags=_merge_field(
|
|
1187
|
+
tags,
|
|
1188
|
+
"tags",
|
|
1189
|
+
assertion_input,
|
|
1190
|
+
existing_assertion,
|
|
1191
|
+
maybe_assertion_entity.tags if maybe_assertion_entity else None,
|
|
1192
|
+
),
|
|
1193
|
+
created_by=existing_assertion.created_by
|
|
1194
|
+
or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
|
|
1195
|
+
created_at=existing_assertion.created_at
|
|
1196
|
+
or now_utc, # Override with the existing assertion's created_at or now if not set
|
|
1197
|
+
updated_by=assertion_input.updated_by, # Override with the input's updated_by
|
|
1198
|
+
updated_at=assertion_input.updated_at, # Override with the input's updated_at (now)
|
|
1199
|
+
)
|
|
1200
|
+
return merged_assertion_input
|
|
1201
|
+
|
|
1202
|
+
def _merge_smart_volume_input(
|
|
1203
|
+
self,
|
|
1204
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
1205
|
+
urn: Union[str, AssertionUrn],
|
|
1206
|
+
display_name: Optional[str],
|
|
1207
|
+
enabled: Optional[bool],
|
|
1208
|
+
detection_mechanism: DetectionMechanismInputTypes,
|
|
1209
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]],
|
|
1210
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes],
|
|
1211
|
+
training_data_lookback_days: Optional[int],
|
|
1212
|
+
incident_behavior: Optional[
|
|
1213
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
1214
|
+
],
|
|
1215
|
+
tags: Optional[TagsInputType],
|
|
1216
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
1217
|
+
now_utc: datetime,
|
|
1218
|
+
assertion_input: _SmartVolumeAssertionInput,
|
|
1219
|
+
maybe_assertion_entity: Optional[Assertion],
|
|
1220
|
+
maybe_monitor_entity: Optional[Monitor],
|
|
1221
|
+
existing_assertion: SmartVolumeAssertion,
|
|
1222
|
+
) -> _SmartVolumeAssertionInput:
|
|
1223
|
+
"""Merge the input with the existing assertion and monitor entities.
|
|
1224
|
+
|
|
1225
|
+
Args:
|
|
1226
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
1227
|
+
urn: The urn of the assertion.
|
|
1228
|
+
display_name: The display name of the assertion.
|
|
1229
|
+
enabled: Whether the assertion is enabled.
|
|
1230
|
+
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
1231
|
+
sensitivity: The sensitivity to be applied to the assertion.
|
|
1232
|
+
exclusion_windows: The exclusion windows to be applied to the assertion.
|
|
1233
|
+
training_data_lookback_days: The training data lookback days to be applied to the assertion.
|
|
1234
|
+
incident_behavior: The incident behavior to be applied to the assertion.
|
|
1235
|
+
tags: The tags to be applied to the assertion.
|
|
1236
|
+
now_utc: The current UTC time from when the function is called.
|
|
1237
|
+
assertion_input: The validated input to the function.
|
|
1238
|
+
maybe_assertion_entity: The existing assertion entity from the DataHub instance.
|
|
1239
|
+
maybe_monitor_entity: The existing monitor entity from the DataHub instance.
|
|
1240
|
+
existing_assertion: The existing assertion from the DataHub instance.
|
|
1241
|
+
|
|
1242
|
+
Returns:
|
|
1243
|
+
The merged assertion input.
|
|
1244
|
+
"""
|
|
1245
|
+
merged_assertion_input = _SmartVolumeAssertionInput(
|
|
1246
|
+
urn=urn,
|
|
1247
|
+
entity_client=self.client.entities,
|
|
1248
|
+
dataset_urn=dataset_urn,
|
|
1249
|
+
display_name=_merge_field(
|
|
1250
|
+
display_name,
|
|
1251
|
+
"display_name",
|
|
1252
|
+
assertion_input,
|
|
1253
|
+
existing_assertion,
|
|
1254
|
+
maybe_assertion_entity.description if maybe_assertion_entity else None,
|
|
1255
|
+
),
|
|
1256
|
+
enabled=_merge_field(
|
|
1257
|
+
enabled,
|
|
1258
|
+
"enabled",
|
|
1259
|
+
assertion_input,
|
|
1260
|
+
existing_assertion,
|
|
1261
|
+
existing_assertion.mode == AssertionMode.ACTIVE
|
|
1262
|
+
if existing_assertion
|
|
1263
|
+
else None,
|
|
1264
|
+
),
|
|
1265
|
+
schedule=_merge_field(
|
|
1266
|
+
schedule,
|
|
1267
|
+
"schedule",
|
|
1268
|
+
assertion_input,
|
|
1269
|
+
existing_assertion,
|
|
1270
|
+
existing_assertion.schedule if existing_assertion else None,
|
|
1271
|
+
),
|
|
1272
|
+
detection_mechanism=_merge_field(
|
|
1273
|
+
detection_mechanism,
|
|
1274
|
+
"detection_mechanism",
|
|
1275
|
+
assertion_input,
|
|
1276
|
+
existing_assertion,
|
|
1277
|
+
SmartVolumeAssertion._get_detection_mechanism(
|
|
1278
|
+
maybe_assertion_entity, maybe_monitor_entity, default=None
|
|
1279
|
+
)
|
|
1280
|
+
if maybe_assertion_entity and maybe_monitor_entity
|
|
1281
|
+
else None,
|
|
1282
|
+
),
|
|
1283
|
+
sensitivity=_merge_field(
|
|
1284
|
+
sensitivity,
|
|
1285
|
+
"sensitivity",
|
|
1286
|
+
assertion_input,
|
|
1287
|
+
existing_assertion,
|
|
1288
|
+
maybe_monitor_entity.sensitivity if maybe_monitor_entity else None,
|
|
1289
|
+
),
|
|
1290
|
+
exclusion_windows=_merge_field(
|
|
1291
|
+
exclusion_windows,
|
|
1292
|
+
"exclusion_windows",
|
|
1293
|
+
assertion_input,
|
|
1294
|
+
existing_assertion,
|
|
1295
|
+
maybe_monitor_entity.exclusion_windows
|
|
1296
|
+
if maybe_monitor_entity
|
|
1297
|
+
else None,
|
|
1298
|
+
),
|
|
1299
|
+
training_data_lookback_days=_merge_field(
|
|
1300
|
+
training_data_lookback_days,
|
|
1301
|
+
"training_data_lookback_days",
|
|
1302
|
+
assertion_input,
|
|
1303
|
+
existing_assertion,
|
|
1304
|
+
maybe_monitor_entity.training_data_lookback_days
|
|
1305
|
+
if maybe_monitor_entity
|
|
1306
|
+
else None,
|
|
1307
|
+
),
|
|
1308
|
+
incident_behavior=_merge_field(
|
|
1309
|
+
incident_behavior,
|
|
1310
|
+
"incident_behavior",
|
|
1311
|
+
assertion_input,
|
|
1312
|
+
existing_assertion,
|
|
1313
|
+
SmartVolumeAssertion._get_incident_behavior(maybe_assertion_entity)
|
|
896
1314
|
if maybe_assertion_entity
|
|
897
1315
|
else None,
|
|
898
1316
|
),
|
|
@@ -1252,46 +1670,32 @@ class AssertionsClient:
|
|
|
1252
1670
|
# raise e
|
|
1253
1671
|
return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
|
|
1254
1672
|
|
|
1255
|
-
def
|
|
1673
|
+
def _create_volume_assertion(
|
|
1256
1674
|
self,
|
|
1257
1675
|
*,
|
|
1258
1676
|
dataset_urn: Union[str, DatasetUrn],
|
|
1259
|
-
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
1260
1677
|
display_name: Optional[str] = None,
|
|
1261
|
-
enabled:
|
|
1678
|
+
enabled: bool = True,
|
|
1262
1679
|
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
1263
|
-
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
1264
|
-
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
1265
|
-
training_data_lookback_days: Optional[int] = None,
|
|
1266
1680
|
incident_behavior: Optional[
|
|
1267
1681
|
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
1268
1682
|
] = None,
|
|
1269
1683
|
tags: Optional[TagsInputType] = None,
|
|
1270
|
-
|
|
1684
|
+
created_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
1271
1685
|
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
1272
|
-
|
|
1273
|
-
|
|
1686
|
+
definition: Optional[VolumeAssertionDefinitionInputTypes] = None,
|
|
1687
|
+
) -> VolumeAssertion:
|
|
1688
|
+
"""Create a volume assertion.
|
|
1274
1689
|
|
|
1275
1690
|
Note: keyword arguments are required.
|
|
1276
1691
|
|
|
1277
|
-
|
|
1278
|
-
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
1279
|
-
be updated if the input value is not None. If the input value is None, the existing value
|
|
1280
|
-
will be preserved. If the input value can be un-set e.g. by passing an empty list or
|
|
1281
|
-
empty string.
|
|
1282
|
-
|
|
1283
|
-
Schedule behavior:
|
|
1284
|
-
- Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
|
|
1285
|
-
- Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
|
|
1692
|
+
The created assertion will use the default daily schedule ("0 0 * * *").
|
|
1286
1693
|
|
|
1287
1694
|
Args:
|
|
1288
1695
|
dataset_urn: The urn of the dataset to be monitored.
|
|
1289
|
-
|
|
1290
|
-
will be
|
|
1291
|
-
|
|
1292
|
-
will be generated.
|
|
1293
|
-
enabled: Whether the assertion is enabled. If not provided, the existing value
|
|
1294
|
-
will be preserved.
|
|
1696
|
+
display_name: The display name of the assertion. If not provided, a random display
|
|
1697
|
+
name will be generated.
|
|
1698
|
+
enabled: Whether the assertion is enabled. Defaults to True.
|
|
1295
1699
|
detection_mechanism: The detection mechanism to be used for the assertion. Information
|
|
1296
1700
|
schema is recommended. Valid values are:
|
|
1297
1701
|
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
@@ -1302,33 +1706,7 @@ class AssertionsClient:
|
|
|
1302
1706
|
"additional_filter": "last_modified > '2021-01-01'",
|
|
1303
1707
|
} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
|
|
1304
1708
|
additional_filter='last_modified > 2021-01-01')
|
|
1305
|
-
- {
|
|
1306
|
-
"type": "high_watermark_column",
|
|
1307
|
-
"column_name": "id",
|
|
1308
|
-
"additional_filter": "id > 1000",
|
|
1309
|
-
} or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
|
|
1310
|
-
additional_filter='id > 1000')
|
|
1311
1709
|
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
1312
|
-
sensitivity: The sensitivity to be applied to the assertion. Valid values are:
|
|
1313
|
-
- "low" or InferenceSensitivity.LOW
|
|
1314
|
-
- "medium" or InferenceSensitivity.MEDIUM
|
|
1315
|
-
- "high" or InferenceSensitivity.HIGH
|
|
1316
|
-
exclusion_windows: The exclusion windows to be applied to the assertion, currently only
|
|
1317
|
-
fixed range exclusion windows are supported. Valid values are:
|
|
1318
|
-
- from datetime.datetime objects: {
|
|
1319
|
-
"start": "datetime(2025, 1, 1, 0, 0, 0)",
|
|
1320
|
-
"end": "datetime(2025, 1, 2, 0, 0, 0)",
|
|
1321
|
-
}
|
|
1322
|
-
- from string datetimes: {
|
|
1323
|
-
"start": "2025-01-01T00:00:00",
|
|
1324
|
-
"end": "2025-01-02T00:00:00",
|
|
1325
|
-
}
|
|
1326
|
-
- from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
|
|
1327
|
-
start=datetime(2025, 1, 1, 0, 0, 0),
|
|
1328
|
-
end=datetime(2025, 1, 2, 0, 0, 0)
|
|
1329
|
-
)
|
|
1330
|
-
training_data_lookback_days: The training data lookback days to be applied to the
|
|
1331
|
-
assertion as an integer.
|
|
1332
1710
|
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
1333
1711
|
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
1334
1712
|
- "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
@@ -1336,7 +1714,7 @@ class AssertionsClient:
|
|
|
1336
1714
|
- a list of strings (strings will be converted to TagUrn objects)
|
|
1337
1715
|
- a list of TagUrn objects
|
|
1338
1716
|
- a list of TagAssociationClass objects
|
|
1339
|
-
|
|
1717
|
+
created_by: Optional urn of the user who created the assertion. The format is
|
|
1340
1718
|
"urn:li:corpuser:<username>", which you can find on the Users & Groups page.
|
|
1341
1719
|
The default is the datahub system user.
|
|
1342
1720
|
TODO: Retrieve the SDK user as the default instead of the datahub system user.
|
|
@@ -1345,80 +1723,330 @@ class AssertionsClient:
|
|
|
1345
1723
|
The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
|
|
1346
1724
|
Alternatively, a models.CronScheduleClass object can be provided with string parameters
|
|
1347
1725
|
cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
|
|
1726
|
+
definition: The volume assertion definition. Must be provided and include type, operator,
|
|
1727
|
+
and parameters. Can be provided as:
|
|
1728
|
+
- A typed volume assertion object (RowCountTotal or RowCountChange)
|
|
1729
|
+
- A dictionary with keys: type, operator, parameters (and kind for row_count_change)
|
|
1730
|
+
|
|
1731
|
+
Example dictionary for row count total:
|
|
1732
|
+
{
|
|
1733
|
+
"type": "row_count_total",
|
|
1734
|
+
"operator": "GREATER_THAN_OR_EQUAL_TO",
|
|
1735
|
+
"parameters": 100
|
|
1736
|
+
}
|
|
1737
|
+
|
|
1738
|
+
Example dictionary for row count change:
|
|
1739
|
+
{
|
|
1740
|
+
"type": "row_count_change",
|
|
1741
|
+
"kind": "percent",
|
|
1742
|
+
"operator": "BETWEEN",
|
|
1743
|
+
"parameters": (10, 50)
|
|
1744
|
+
}
|
|
1348
1745
|
|
|
1349
1746
|
Returns:
|
|
1350
|
-
|
|
1747
|
+
VolumeAssertion: The created assertion.
|
|
1351
1748
|
"""
|
|
1352
1749
|
_print_experimental_warning()
|
|
1353
1750
|
now_utc = datetime.now(timezone.utc)
|
|
1354
|
-
|
|
1355
|
-
if updated_by is None:
|
|
1751
|
+
if created_by is None:
|
|
1356
1752
|
logger.warning(
|
|
1357
|
-
f"
|
|
1358
|
-
)
|
|
1359
|
-
updated_by = DEFAULT_CREATED_BY
|
|
1360
|
-
|
|
1361
|
-
# 1. If urn is not set, create a new assertion
|
|
1362
|
-
if urn is None:
|
|
1363
|
-
logger.info("URN is not set, creating a new assertion")
|
|
1364
|
-
return self._create_smart_volume_assertion(
|
|
1365
|
-
dataset_urn=dataset_urn,
|
|
1366
|
-
display_name=display_name,
|
|
1367
|
-
enabled=enabled if enabled is not None else True,
|
|
1368
|
-
detection_mechanism=detection_mechanism,
|
|
1369
|
-
sensitivity=sensitivity,
|
|
1370
|
-
exclusion_windows=exclusion_windows,
|
|
1371
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
1372
|
-
incident_behavior=incident_behavior,
|
|
1373
|
-
tags=tags,
|
|
1374
|
-
created_by=updated_by,
|
|
1375
|
-
schedule=schedule,
|
|
1753
|
+
f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
1376
1754
|
)
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
urn=urn,
|
|
1755
|
+
created_by = DEFAULT_CREATED_BY
|
|
1756
|
+
assertion_input = _VolumeAssertionInput(
|
|
1757
|
+
urn=None,
|
|
1381
1758
|
entity_client=self.client.entities,
|
|
1382
1759
|
dataset_urn=dataset_urn,
|
|
1383
1760
|
display_name=display_name,
|
|
1761
|
+
enabled=enabled,
|
|
1384
1762
|
detection_mechanism=detection_mechanism,
|
|
1385
|
-
sensitivity=sensitivity,
|
|
1386
|
-
exclusion_windows=exclusion_windows,
|
|
1387
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
1388
1763
|
incident_behavior=incident_behavior,
|
|
1389
1764
|
tags=tags,
|
|
1390
|
-
created_by=
|
|
1391
|
-
created_at=now_utc,
|
|
1392
|
-
updated_by=
|
|
1765
|
+
created_by=created_by,
|
|
1766
|
+
created_at=now_utc,
|
|
1767
|
+
updated_by=created_by,
|
|
1393
1768
|
updated_at=now_utc,
|
|
1394
1769
|
schedule=schedule,
|
|
1770
|
+
definition=definition,
|
|
1395
1771
|
)
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
# if the assertion does not exist:
|
|
1399
|
-
merged_assertion_input_or_created_assertion = (
|
|
1400
|
-
self._retrieve_and_merge_volume_assertion_and_monitor(
|
|
1401
|
-
assertion_input=assertion_input,
|
|
1402
|
-
dataset_urn=dataset_urn,
|
|
1403
|
-
urn=urn,
|
|
1404
|
-
display_name=display_name,
|
|
1405
|
-
enabled=enabled,
|
|
1406
|
-
detection_mechanism=detection_mechanism,
|
|
1407
|
-
sensitivity=sensitivity,
|
|
1408
|
-
exclusion_windows=exclusion_windows,
|
|
1409
|
-
training_data_lookback_days=training_data_lookback_days,
|
|
1410
|
-
incident_behavior=incident_behavior,
|
|
1411
|
-
tags=tags,
|
|
1412
|
-
updated_by=updated_by,
|
|
1413
|
-
now_utc=now_utc,
|
|
1414
|
-
schedule=schedule,
|
|
1415
|
-
)
|
|
1772
|
+
assertion_entity, monitor_entity = (
|
|
1773
|
+
assertion_input.to_assertion_and_monitor_entities()
|
|
1416
1774
|
)
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
if
|
|
1420
|
-
|
|
1421
|
-
|
|
1775
|
+
# If assertion creation fails, we won't try to create the monitor
|
|
1776
|
+
self.client.entities.create(assertion_entity)
|
|
1777
|
+
# TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
1778
|
+
# try:
|
|
1779
|
+
self.client.entities.create(monitor_entity)
|
|
1780
|
+
# except Exception as e:
|
|
1781
|
+
# logger.error(f"Error creating monitor: {e}")
|
|
1782
|
+
# self.client.entities.delete(assertion_entity)
|
|
1783
|
+
# raise e
|
|
1784
|
+
return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
|
|
1785
|
+
|
|
1786
|
+
def _create_sql_assertion(
|
|
1787
|
+
self,
|
|
1788
|
+
*,
|
|
1789
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
1790
|
+
display_name: Optional[str] = None,
|
|
1791
|
+
enabled: bool = True,
|
|
1792
|
+
criteria: SqlAssertionCriteria,
|
|
1793
|
+
statement: str,
|
|
1794
|
+
incident_behavior: Optional[
|
|
1795
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
1796
|
+
],
|
|
1797
|
+
tags: Optional[TagsInputType],
|
|
1798
|
+
created_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
1799
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
1800
|
+
) -> SqlAssertion:
|
|
1801
|
+
"""Create a sql assertion.
|
|
1802
|
+
|
|
1803
|
+
Args:
|
|
1804
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
1805
|
+
display_name: The display name of the assertion. If not provided, a random display
|
|
1806
|
+
name will be generated.
|
|
1807
|
+
enabled: Whether the assertion is enabled. Defaults to True.
|
|
1808
|
+
criteria: The criteria to be used for the assertion. This is of type SqlAssertionCriteria. It has the following fields:
|
|
1809
|
+
- type: The type of sql assertion. Valid values are:
|
|
1810
|
+
- "METRIC" -> Looks at the current value of the metric.
|
|
1811
|
+
- "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
|
|
1812
|
+
- change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
|
|
1813
|
+
- "ABSOLUTE" -> Looks at the absolute change in the metric.
|
|
1814
|
+
- "PERCENTAGE" -> Looks at the percentage change in the metric.
|
|
1815
|
+
- operator: The operator to be used for the assertion. Valid values are:
|
|
1816
|
+
- "GREATER_THAN" -> The metric value is greater than the threshold.
|
|
1817
|
+
- "LESS_THAN" -> The metric value is less than the threshold.
|
|
1818
|
+
- "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
|
|
1819
|
+
- "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
|
|
1820
|
+
- "EQUAL_TO" -> The metric value is equal to the threshold.
|
|
1821
|
+
- "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
|
|
1822
|
+
- "BETWEEN" -> The metric value is between the two thresholds.
|
|
1823
|
+
- parameters: The parameters to be used for the assertion. This is of type SqlAssertionParameters. It has the following fields:
|
|
1824
|
+
- value: The value of the metric. This can be a single value or a tuple range.
|
|
1825
|
+
- If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
|
|
1826
|
+
- If the operator is not "BETWEEN", the value is a single value.
|
|
1827
|
+
statement: The statement to be used for the assertion.
|
|
1828
|
+
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
1829
|
+
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
1830
|
+
- "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
1831
|
+
tags: The tags to be applied to the assertion. Valid values are:
|
|
1832
|
+
- a list of strings (strings will be converted to TagUrn objects)
|
|
1833
|
+
- a list of TagUrn objects
|
|
1834
|
+
- a list of TagAssociationClass objects
|
|
1835
|
+
created_by: Optional urn of the user who created the assertion. The format is
|
|
1836
|
+
"urn:li:corpuser:<username>", which you can find on the Users & Groups page.
|
|
1837
|
+
schedule: Optional cron formatted schedule for the assertion. If not provided, a default
|
|
1838
|
+
schedule will be used. The schedule determines when the assertion will be evaluated.
|
|
1839
|
+
The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
|
|
1840
|
+
Alternatively, a models.CronScheduleClass object can be provided with string parameters
|
|
1841
|
+
cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
|
|
1842
|
+
|
|
1843
|
+
Returns:
|
|
1844
|
+
SqlAssertion: The created assertion.
|
|
1845
|
+
"""
|
|
1846
|
+
_print_experimental_warning()
|
|
1847
|
+
now_utc = datetime.now(timezone.utc)
|
|
1848
|
+
if created_by is None:
|
|
1849
|
+
logger.warning(
|
|
1850
|
+
f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
1851
|
+
)
|
|
1852
|
+
created_by = DEFAULT_CREATED_BY
|
|
1853
|
+
assertion_input = _SqlAssertionInput(
|
|
1854
|
+
urn=None,
|
|
1855
|
+
entity_client=self.client.entities,
|
|
1856
|
+
dataset_urn=dataset_urn,
|
|
1857
|
+
display_name=display_name,
|
|
1858
|
+
enabled=enabled,
|
|
1859
|
+
criteria=criteria,
|
|
1860
|
+
statement=statement,
|
|
1861
|
+
incident_behavior=incident_behavior,
|
|
1862
|
+
tags=tags,
|
|
1863
|
+
created_by=created_by,
|
|
1864
|
+
created_at=now_utc,
|
|
1865
|
+
updated_by=created_by,
|
|
1866
|
+
updated_at=now_utc,
|
|
1867
|
+
schedule=schedule,
|
|
1868
|
+
)
|
|
1869
|
+
assertion_entity, monitor_entity = (
|
|
1870
|
+
assertion_input.to_assertion_and_monitor_entities()
|
|
1871
|
+
)
|
|
1872
|
+
# If assertion creation fails, we won't try to create the monitor
|
|
1873
|
+
self.client.entities.create(assertion_entity)
|
|
1874
|
+
# TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
1875
|
+
# try:
|
|
1876
|
+
self.client.entities.create(monitor_entity)
|
|
1877
|
+
# except Exception as e:
|
|
1878
|
+
# logger.error(f"Error creating monitor: {e}")
|
|
1879
|
+
# self.client.entities.delete(assertion_entity)
|
|
1880
|
+
# raise e
|
|
1881
|
+
return SqlAssertion._from_entities(assertion_entity, monitor_entity)
|
|
1882
|
+
|
|
1883
|
+
def sync_smart_volume_assertion(
|
|
1884
|
+
self,
|
|
1885
|
+
*,
|
|
1886
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
1887
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
1888
|
+
display_name: Optional[str] = None,
|
|
1889
|
+
enabled: Optional[bool] = None,
|
|
1890
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
1891
|
+
sensitivity: Optional[Union[str, InferenceSensitivity]] = None,
|
|
1892
|
+
exclusion_windows: Optional[ExclusionWindowInputTypes] = None,
|
|
1893
|
+
training_data_lookback_days: Optional[int] = None,
|
|
1894
|
+
incident_behavior: Optional[
|
|
1895
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
1896
|
+
] = None,
|
|
1897
|
+
tags: Optional[TagsInputType] = None,
|
|
1898
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
1899
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
1900
|
+
) -> SmartVolumeAssertion:
|
|
1901
|
+
"""Upsert and merge a smart volume assertion.
|
|
1902
|
+
|
|
1903
|
+
Note: keyword arguments are required.
|
|
1904
|
+
|
|
1905
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
1906
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
1907
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
1908
|
+
will be preserved. If the input value can be un-set e.g. by passing an empty list or
|
|
1909
|
+
empty string.
|
|
1910
|
+
|
|
1911
|
+
Schedule behavior:
|
|
1912
|
+
- Create case: Uses default hourly schedule (\"0 * * * *\") or provided schedule
|
|
1913
|
+
- Update case: Different than `sync_smart_freshness_assertion`, schedule is updated.
|
|
1914
|
+
|
|
1915
|
+
Args:
|
|
1916
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
1917
|
+
urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
|
|
1918
|
+
will be _created_ in the DataHub instance.
|
|
1919
|
+
display_name: The display name of the assertion. If not provided, a random display name
|
|
1920
|
+
will be generated.
|
|
1921
|
+
enabled: Whether the assertion is enabled. If not provided, the existing value
|
|
1922
|
+
will be preserved.
|
|
1923
|
+
detection_mechanism: The detection mechanism to be used for the assertion. Information
|
|
1924
|
+
schema is recommended. Valid values are:
|
|
1925
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
1926
|
+
- "audit_log" or DetectionMechanism.AUDIT_LOG
|
|
1927
|
+
- {
|
|
1928
|
+
"type": "last_modified_column",
|
|
1929
|
+
"column_name": "last_modified",
|
|
1930
|
+
"additional_filter": "last_modified > '2021-01-01'",
|
|
1931
|
+
} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
|
|
1932
|
+
additional_filter='last_modified > 2021-01-01')
|
|
1933
|
+
- {
|
|
1934
|
+
"type": "high_watermark_column",
|
|
1935
|
+
"column_name": "id",
|
|
1936
|
+
"additional_filter": "id > 1000",
|
|
1937
|
+
} or DetectionMechanism.HIGH_WATERMARK_COLUMN(column_name='id',
|
|
1938
|
+
additional_filter='id > 1000')
|
|
1939
|
+
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
1940
|
+
sensitivity: The sensitivity to be applied to the assertion. Valid values are:
|
|
1941
|
+
- "low" or InferenceSensitivity.LOW
|
|
1942
|
+
- "medium" or InferenceSensitivity.MEDIUM
|
|
1943
|
+
- "high" or InferenceSensitivity.HIGH
|
|
1944
|
+
exclusion_windows: The exclusion windows to be applied to the assertion, currently only
|
|
1945
|
+
fixed range exclusion windows are supported. Valid values are:
|
|
1946
|
+
- from datetime.datetime objects: {
|
|
1947
|
+
"start": "datetime(2025, 1, 1, 0, 0, 0)",
|
|
1948
|
+
"end": "datetime(2025, 1, 2, 0, 0, 0)",
|
|
1949
|
+
}
|
|
1950
|
+
- from string datetimes: {
|
|
1951
|
+
"start": "2025-01-01T00:00:00",
|
|
1952
|
+
"end": "2025-01-02T00:00:00",
|
|
1953
|
+
}
|
|
1954
|
+
- from FixedRangeExclusionWindow objects: FixedRangeExclusionWindow(
|
|
1955
|
+
start=datetime(2025, 1, 1, 0, 0, 0),
|
|
1956
|
+
end=datetime(2025, 1, 2, 0, 0, 0)
|
|
1957
|
+
)
|
|
1958
|
+
training_data_lookback_days: The training data lookback days to be applied to the
|
|
1959
|
+
assertion as an integer.
|
|
1960
|
+
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
1961
|
+
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
1962
|
+
- "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
1963
|
+
tags: The tags to be applied to the assertion. Valid values are:
|
|
1964
|
+
- a list of strings (strings will be converted to TagUrn objects)
|
|
1965
|
+
- a list of TagUrn objects
|
|
1966
|
+
- a list of TagAssociationClass objects
|
|
1967
|
+
updated_by: Optional urn of the user who updated the assertion. The format is
|
|
1968
|
+
"urn:li:corpuser:<username>", which you can find on the Users & Groups page.
|
|
1969
|
+
The default is the datahub system user.
|
|
1970
|
+
TODO: Retrieve the SDK user as the default instead of the datahub system user.
|
|
1971
|
+
schedule: Optional cron formatted schedule for the assertion. If not provided, a default
|
|
1972
|
+
schedule will be used. The schedule determines when the assertion will be evaluated.
|
|
1973
|
+
The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
|
|
1974
|
+
Alternatively, a models.CronScheduleClass object can be provided with string parameters
|
|
1975
|
+
cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
|
|
1976
|
+
|
|
1977
|
+
Returns:
|
|
1978
|
+
SmartVolumeAssertion: The created or updated assertion.
|
|
1979
|
+
"""
|
|
1980
|
+
_print_experimental_warning()
|
|
1981
|
+
now_utc = datetime.now(timezone.utc)
|
|
1982
|
+
|
|
1983
|
+
if updated_by is None:
|
|
1984
|
+
logger.warning(
|
|
1985
|
+
f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
1986
|
+
)
|
|
1987
|
+
updated_by = DEFAULT_CREATED_BY
|
|
1988
|
+
|
|
1989
|
+
# 1. If urn is not set, create a new assertion
|
|
1990
|
+
if urn is None:
|
|
1991
|
+
logger.info("URN is not set, creating a new assertion")
|
|
1992
|
+
return self._create_smart_volume_assertion(
|
|
1993
|
+
dataset_urn=dataset_urn,
|
|
1994
|
+
display_name=display_name,
|
|
1995
|
+
enabled=enabled if enabled is not None else True,
|
|
1996
|
+
detection_mechanism=detection_mechanism,
|
|
1997
|
+
sensitivity=sensitivity,
|
|
1998
|
+
exclusion_windows=exclusion_windows,
|
|
1999
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
2000
|
+
incident_behavior=incident_behavior,
|
|
2001
|
+
tags=tags,
|
|
2002
|
+
created_by=updated_by,
|
|
2003
|
+
schedule=schedule,
|
|
2004
|
+
)
|
|
2005
|
+
|
|
2006
|
+
# 2. If urn is set, first validate the input:
|
|
2007
|
+
assertion_input = _SmartVolumeAssertionInput(
|
|
2008
|
+
urn=urn,
|
|
2009
|
+
entity_client=self.client.entities,
|
|
2010
|
+
dataset_urn=dataset_urn,
|
|
2011
|
+
display_name=display_name,
|
|
2012
|
+
detection_mechanism=detection_mechanism,
|
|
2013
|
+
sensitivity=sensitivity,
|
|
2014
|
+
exclusion_windows=exclusion_windows,
|
|
2015
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
2016
|
+
incident_behavior=incident_behavior,
|
|
2017
|
+
tags=tags,
|
|
2018
|
+
created_by=updated_by, # This will be overridden by the actual created_by
|
|
2019
|
+
created_at=now_utc, # This will be overridden by the actual created_at
|
|
2020
|
+
updated_by=updated_by,
|
|
2021
|
+
updated_at=now_utc,
|
|
2022
|
+
schedule=schedule,
|
|
2023
|
+
)
|
|
2024
|
+
|
|
2025
|
+
# 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
|
|
2026
|
+
# if the assertion does not exist:
|
|
2027
|
+
merged_assertion_input_or_created_assertion = (
|
|
2028
|
+
self._retrieve_and_merge_volume_assertion_and_monitor(
|
|
2029
|
+
assertion_input=assertion_input,
|
|
2030
|
+
dataset_urn=dataset_urn,
|
|
2031
|
+
urn=urn,
|
|
2032
|
+
display_name=display_name,
|
|
2033
|
+
enabled=enabled,
|
|
2034
|
+
detection_mechanism=detection_mechanism,
|
|
2035
|
+
sensitivity=sensitivity,
|
|
2036
|
+
exclusion_windows=exclusion_windows,
|
|
2037
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
2038
|
+
incident_behavior=incident_behavior,
|
|
2039
|
+
tags=tags,
|
|
2040
|
+
updated_by=updated_by,
|
|
2041
|
+
now_utc=now_utc,
|
|
2042
|
+
schedule=schedule,
|
|
2043
|
+
)
|
|
2044
|
+
)
|
|
2045
|
+
|
|
2046
|
+
# Return early if we created a new assertion in the merge:
|
|
2047
|
+
if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
|
|
2048
|
+
# We know this is the correct type because we passed the assertion_class parameter
|
|
2049
|
+
assert isinstance(
|
|
1422
2050
|
merged_assertion_input_or_created_assertion, SmartVolumeAssertion
|
|
1423
2051
|
)
|
|
1424
2052
|
return merged_assertion_input_or_created_assertion
|
|
@@ -2317,6 +2945,355 @@ class AssertionsClient:
|
|
|
2317
2945
|
|
|
2318
2946
|
return FreshnessAssertion._from_entities(assertion_entity, monitor_entity)
|
|
2319
2947
|
|
|
2948
|
+
def sync_volume_assertion(
|
|
2949
|
+
self,
|
|
2950
|
+
*,
|
|
2951
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
2952
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
2953
|
+
display_name: Optional[str] = None,
|
|
2954
|
+
enabled: Optional[bool] = None,
|
|
2955
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
2956
|
+
incident_behavior: Optional[
|
|
2957
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
2958
|
+
] = None,
|
|
2959
|
+
tags: Optional[TagsInputType] = None,
|
|
2960
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
2961
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
2962
|
+
definition: Optional[VolumeAssertionDefinitionInputTypes] = None,
|
|
2963
|
+
) -> VolumeAssertion:
|
|
2964
|
+
"""Upsert and merge a volume assertion.
|
|
2965
|
+
|
|
2966
|
+
Note: keyword arguments are required.
|
|
2967
|
+
|
|
2968
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
2969
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
2970
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
2971
|
+
will be preserved. If the input value can be un-set e.g. by passing an empty list or
|
|
2972
|
+
empty string.
|
|
2973
|
+
|
|
2974
|
+
Schedule behavior:
|
|
2975
|
+
- Create case: Uses default daily schedule ("0 0 * * *") or provided schedule
|
|
2976
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
2977
|
+
|
|
2978
|
+
Args:
|
|
2979
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
2980
|
+
urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
|
|
2981
|
+
will be _created_ in the DataHub instance.
|
|
2982
|
+
display_name: The display name of the assertion. If not provided, a random display name
|
|
2983
|
+
will be generated.
|
|
2984
|
+
enabled: Whether the assertion is enabled. If not provided, the existing value
|
|
2985
|
+
will be preserved.
|
|
2986
|
+
detection_mechanism: The detection mechanism to be used for the assertion. Information
|
|
2987
|
+
schema is recommended. Valid values are:
|
|
2988
|
+
- "information_schema" or DetectionMechanism.INFORMATION_SCHEMA
|
|
2989
|
+
- "audit_log" or DetectionMechanism.AUDIT_LOG
|
|
2990
|
+
- {
|
|
2991
|
+
"type": "last_modified_column",
|
|
2992
|
+
"column_name": "last_modified",
|
|
2993
|
+
"additional_filter": "last_modified > '2021-01-01'",
|
|
2994
|
+
} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified',
|
|
2995
|
+
additional_filter='last_modified > 2021-01-01')
|
|
2996
|
+
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
2997
|
+
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
2998
|
+
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
2999
|
+
- "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
3000
|
+
tags: The tags to be applied to the assertion. Valid values are:
|
|
3001
|
+
- a list of strings (strings will be converted to TagUrn objects)
|
|
3002
|
+
- a list of TagUrn objects
|
|
3003
|
+
- a list of TagAssociationClass objects
|
|
3004
|
+
updated_by: Optional urn of the user who updated the assertion. The format is
|
|
3005
|
+
"urn:li:corpuser:<username>", which you can find on the Users & Groups page.
|
|
3006
|
+
The default is the datahub system user.
|
|
3007
|
+
TODO: Retrieve the SDK user as the default instead of the datahub system user.
|
|
3008
|
+
schedule: Optional cron formatted schedule for the assertion. If not provided, a default
|
|
3009
|
+
schedule will be used. The schedule determines when the assertion will be evaluated.
|
|
3010
|
+
The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
|
|
3011
|
+
Alternatively, a models.CronScheduleClass object can be provided with string parameters
|
|
3012
|
+
cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
|
|
3013
|
+
definition: The volume assertion definition. Can be provided as:
|
|
3014
|
+
- A typed volume assertion object (RowCountTotal or RowCountChange)
|
|
3015
|
+
- A dictionary with keys: type, operator, parameters (and kind for row_count_change)
|
|
3016
|
+
- None to preserve the existing definition from the backend (for update operations)
|
|
3017
|
+
|
|
3018
|
+
Example dictionary for row count total:
|
|
3019
|
+
{
|
|
3020
|
+
"type": "row_count_total",
|
|
3021
|
+
"operator": "GREATER_THAN_OR_EQUAL_TO",
|
|
3022
|
+
"parameters": 100
|
|
3023
|
+
}
|
|
3024
|
+
|
|
3025
|
+
Example dictionary for row count change:
|
|
3026
|
+
{
|
|
3027
|
+
"type": "row_count_change",
|
|
3028
|
+
"kind": "absolute",
|
|
3029
|
+
"operator": "LESS_THAN_OR_EQUAL_TO",
|
|
3030
|
+
"parameters": 50
|
|
3031
|
+
}
|
|
3032
|
+
|
|
3033
|
+
Returns:
|
|
3034
|
+
VolumeAssertion: The created or updated assertion.
|
|
3035
|
+
"""
|
|
3036
|
+
_print_experimental_warning()
|
|
3037
|
+
now_utc = datetime.now(timezone.utc)
|
|
3038
|
+
|
|
3039
|
+
if updated_by is None:
|
|
3040
|
+
logger.warning(
|
|
3041
|
+
f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
3042
|
+
)
|
|
3043
|
+
updated_by = DEFAULT_CREATED_BY
|
|
3044
|
+
|
|
3045
|
+
# 1. If urn is not set, create a new assertion
|
|
3046
|
+
if urn is None:
|
|
3047
|
+
logger.info("URN is not set, creating a new assertion")
|
|
3048
|
+
return self._create_volume_assertion(
|
|
3049
|
+
dataset_urn=dataset_urn,
|
|
3050
|
+
display_name=display_name,
|
|
3051
|
+
enabled=enabled if enabled is not None else True,
|
|
3052
|
+
detection_mechanism=detection_mechanism,
|
|
3053
|
+
incident_behavior=incident_behavior,
|
|
3054
|
+
tags=tags,
|
|
3055
|
+
created_by=updated_by,
|
|
3056
|
+
schedule=schedule,
|
|
3057
|
+
definition=definition,
|
|
3058
|
+
)
|
|
3059
|
+
|
|
3060
|
+
# 2. If urn is set, prepare definition for validation
|
|
3061
|
+
# We use temporary default definition if None is provided, just to pass the _VolumeAssertionInput validation.
|
|
3062
|
+
# However, we keep memory of this in use_backend_definition flag, so we can later
|
|
3063
|
+
# fail if there is no definition in backend (basically, there is no assertion). That would mean that
|
|
3064
|
+
# this is a creation case and the user missed the definition parameter, which is required.
|
|
3065
|
+
# Likely this pattern never happened before because there is no a publicly documented default definition
|
|
3066
|
+
# that we can use as fallback.
|
|
3067
|
+
use_backend_definition = definition is None
|
|
3068
|
+
temp_definition = (
|
|
3069
|
+
definition
|
|
3070
|
+
if definition is not None
|
|
3071
|
+
else RowCountTotal(
|
|
3072
|
+
operator=VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO,
|
|
3073
|
+
parameters=0, # Temporary placeholder
|
|
3074
|
+
)
|
|
3075
|
+
)
|
|
3076
|
+
|
|
3077
|
+
# 3. Create assertion input with effective definition
|
|
3078
|
+
assertion_input = _VolumeAssertionInput(
|
|
3079
|
+
urn=urn,
|
|
3080
|
+
dataset_urn=dataset_urn,
|
|
3081
|
+
entity_client=self.client.entities,
|
|
3082
|
+
detection_mechanism=detection_mechanism,
|
|
3083
|
+
incident_behavior=incident_behavior,
|
|
3084
|
+
tags=tags,
|
|
3085
|
+
created_by=updated_by, # This will be overridden by the actual created_by
|
|
3086
|
+
created_at=now_utc, # This will be overridden by the actual created_at
|
|
3087
|
+
updated_by=updated_by,
|
|
3088
|
+
updated_at=now_utc,
|
|
3089
|
+
schedule=schedule,
|
|
3090
|
+
definition=temp_definition,
|
|
3091
|
+
)
|
|
3092
|
+
|
|
3093
|
+
# 4. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
|
|
3094
|
+
# if the assertion does not exist:
|
|
3095
|
+
merged_assertion_input_or_created_assertion = (
|
|
3096
|
+
self._retrieve_and_merge_native_volume_assertion_and_monitor(
|
|
3097
|
+
assertion_input=assertion_input,
|
|
3098
|
+
dataset_urn=dataset_urn,
|
|
3099
|
+
urn=urn,
|
|
3100
|
+
display_name=display_name,
|
|
3101
|
+
enabled=enabled,
|
|
3102
|
+
detection_mechanism=detection_mechanism,
|
|
3103
|
+
definition=definition,
|
|
3104
|
+
use_backend_definition=use_backend_definition,
|
|
3105
|
+
incident_behavior=incident_behavior,
|
|
3106
|
+
tags=tags,
|
|
3107
|
+
updated_by=updated_by,
|
|
3108
|
+
now_utc=now_utc,
|
|
3109
|
+
schedule=schedule,
|
|
3110
|
+
)
|
|
3111
|
+
)
|
|
3112
|
+
|
|
3113
|
+
# Return early if we created a new assertion in the merge:
|
|
3114
|
+
if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
|
|
3115
|
+
# We know this is the correct type because we passed the assertion_class parameter
|
|
3116
|
+
assert isinstance(
|
|
3117
|
+
merged_assertion_input_or_created_assertion, VolumeAssertion
|
|
3118
|
+
)
|
|
3119
|
+
return merged_assertion_input_or_created_assertion
|
|
3120
|
+
|
|
3121
|
+
# 4. Upsert the assertion and monitor entities:
|
|
3122
|
+
assertion_entity, monitor_entity = (
|
|
3123
|
+
merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
|
|
3124
|
+
)
|
|
3125
|
+
# If assertion upsert fails, we won't try to upsert the monitor
|
|
3126
|
+
self.client.entities.upsert(assertion_entity)
|
|
3127
|
+
# TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
3128
|
+
# try:
|
|
3129
|
+
self.client.entities.upsert(monitor_entity)
|
|
3130
|
+
# except Exception as e:
|
|
3131
|
+
# logger.error(f"Error upserting monitor: {e}")
|
|
3132
|
+
# self.client.entities.delete(assertion_entity)
|
|
3133
|
+
# raise e
|
|
3134
|
+
return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
|
|
3135
|
+
|
|
3136
|
+
def sync_sql_assertion(
|
|
3137
|
+
self,
|
|
3138
|
+
*,
|
|
3139
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
3140
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
3141
|
+
display_name: Optional[str] = None,
|
|
3142
|
+
enabled: Optional[bool] = None,
|
|
3143
|
+
statement: str,
|
|
3144
|
+
criteria: SqlAssertionCriteria,
|
|
3145
|
+
incident_behavior: Optional[
|
|
3146
|
+
Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
|
|
3147
|
+
] = None,
|
|
3148
|
+
tags: Optional[TagsInputType] = None,
|
|
3149
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
3150
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
3151
|
+
) -> SqlAssertion:
|
|
3152
|
+
"""Upsert and merge a sql assertion.
|
|
3153
|
+
|
|
3154
|
+
Note: keyword arguments are required.
|
|
3155
|
+
|
|
3156
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
3157
|
+
it will be created. If it does exist, it will be updated. Existing assertion fields will
|
|
3158
|
+
be updated if the input value is not None. If the input value is None, the existing value
|
|
3159
|
+
will be preserved. If the input value can be un-set e.g. by passing an empty list or
|
|
3160
|
+
empty string.
|
|
3161
|
+
|
|
3162
|
+
Schedule behavior:
|
|
3163
|
+
- Create case: Uses default daily schedule (\"0 0 * * *\") or provided schedule
|
|
3164
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
3165
|
+
|
|
3166
|
+
Args:
|
|
3167
|
+
dataset_urn: The urn of the dataset to be monitored.
|
|
3168
|
+
urn: The urn of the assertion. If not provided, a urn will be generated and the assertion
|
|
3169
|
+
will be _created_ in the DataHub instance.
|
|
3170
|
+
display_name: The display name of the assertion. If not provided, a random display name
|
|
3171
|
+
will be generated.
|
|
3172
|
+
enabled: Whether the assertion is enabled. If not provided, the existing value
|
|
3173
|
+
will be preserved.
|
|
3174
|
+
criteria: The criteria to be used for the assertion. This is of type SqlAssertionCriteria. It has the following fields:
|
|
3175
|
+
- type: The type of sql assertion. Valid values are:
|
|
3176
|
+
- "METRIC" -> Looks at the current value of the metric.
|
|
3177
|
+
- "METRIC_CHANGE" -> Looks at the change in the metric between the current and previous run.
|
|
3178
|
+
- change_type: The change type of the assertion, if the type is "METRIC_CHANGE". Valid values are:
|
|
3179
|
+
- "ABSOLUTE" -> Looks at the absolute change in the metric.
|
|
3180
|
+
- "PERCENTAGE" -> Looks at the percentage change in the metric.
|
|
3181
|
+
- operator: The operator to be used for the assertion. Valid values are:
|
|
3182
|
+
- "GREATER_THAN" -> The metric value is greater than the threshold.
|
|
3183
|
+
- "LESS_THAN" -> The metric value is less than the threshold.
|
|
3184
|
+
- "GREATER_THAN_OR_EQUAL_TO" -> The metric value is greater than or equal to the threshold.
|
|
3185
|
+
- "LESS_THAN_OR_EQUAL_TO" -> The metric value is less than or equal to the threshold.
|
|
3186
|
+
- "EQUAL_TO" -> The metric value is equal to the threshold.
|
|
3187
|
+
- "NOT_EQUAL_TO" -> The metric value is not equal to the threshold.
|
|
3188
|
+
- "BETWEEN" -> The metric value is between the two thresholds.
|
|
3189
|
+
- parameters: The parameters to be used for the assertion. This is of type SqlAssertionParameters. It has the following fields:
|
|
3190
|
+
- value: The value of the metric. This can be a single value or a tuple range.
|
|
3191
|
+
- If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
|
|
3192
|
+
- If the operator is not "BETWEEN", the value is a single value.
|
|
3193
|
+
statement: The SQL statement to be used for the assertion.
|
|
3194
|
+
- "SELECT COUNT(*) FROM table WHERE column > 100"
|
|
3195
|
+
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
3196
|
+
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
3197
|
+
- "resolve_on_pass" or AssertionIncidentBehavior.RESOLVE_ON_PASS
|
|
3198
|
+
tags: The tags to be applied to the assertion. Valid values are:
|
|
3199
|
+
- a list of strings (strings will be converted to TagUrn objects)
|
|
3200
|
+
- a list of TagUrn objects
|
|
3201
|
+
- a list of TagAssociationClass objects
|
|
3202
|
+
updated_by: Optional urn of the user who updated the assertion. The format is
|
|
3203
|
+
"urn:li:corpuser:<username>", which you can find on the Users & Groups page.
|
|
3204
|
+
The default is the datahub system user.
|
|
3205
|
+
TODO: Retrieve the SDK user as the default instead of the datahub system user.
|
|
3206
|
+
schedule: Optional cron formatted schedule for the assertion. If not provided, a default
|
|
3207
|
+
schedule will be used. The schedule determines when the assertion will be evaluated.
|
|
3208
|
+
The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
|
|
3209
|
+
Alternatively, a models.CronScheduleClass object can be provided with string parameters
|
|
3210
|
+
cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
|
|
3211
|
+
|
|
3212
|
+
Returns:
|
|
3213
|
+
SqlAssertion: The created or updated assertion.
|
|
3214
|
+
"""
|
|
3215
|
+
_print_experimental_warning()
|
|
3216
|
+
now_utc = datetime.now(timezone.utc)
|
|
3217
|
+
|
|
3218
|
+
if updated_by is None:
|
|
3219
|
+
logger.warning(
|
|
3220
|
+
f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
3221
|
+
)
|
|
3222
|
+
updated_by = DEFAULT_CREATED_BY
|
|
3223
|
+
|
|
3224
|
+
# 1. If urn is not set, create a new assertion
|
|
3225
|
+
if urn is None:
|
|
3226
|
+
logger.info("URN is not set, creating a new assertion")
|
|
3227
|
+
return self._create_sql_assertion(
|
|
3228
|
+
dataset_urn=dataset_urn,
|
|
3229
|
+
display_name=display_name,
|
|
3230
|
+
enabled=enabled if enabled is not None else True,
|
|
3231
|
+
criteria=criteria,
|
|
3232
|
+
statement=statement,
|
|
3233
|
+
incident_behavior=incident_behavior,
|
|
3234
|
+
tags=tags,
|
|
3235
|
+
created_by=updated_by,
|
|
3236
|
+
schedule=schedule,
|
|
3237
|
+
)
|
|
3238
|
+
|
|
3239
|
+
# 2. If urn is set, first validate the input:
|
|
3240
|
+
assertion_input = _SqlAssertionInput(
|
|
3241
|
+
urn=urn,
|
|
3242
|
+
entity_client=self.client.entities,
|
|
3243
|
+
dataset_urn=dataset_urn,
|
|
3244
|
+
display_name=display_name,
|
|
3245
|
+
criteria=criteria,
|
|
3246
|
+
statement=statement,
|
|
3247
|
+
incident_behavior=incident_behavior,
|
|
3248
|
+
tags=tags,
|
|
3249
|
+
created_by=updated_by, # This will be overridden by the actual created_by
|
|
3250
|
+
created_at=now_utc, # This will be overridden by the actual created_at
|
|
3251
|
+
updated_by=updated_by,
|
|
3252
|
+
updated_at=now_utc,
|
|
3253
|
+
schedule=schedule,
|
|
3254
|
+
)
|
|
3255
|
+
|
|
3256
|
+
# 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
|
|
3257
|
+
# if the assertion does not exist:
|
|
3258
|
+
merged_assertion_input_or_created_assertion = (
|
|
3259
|
+
self._retrieve_and_merge_sql_assertion_and_monitor(
|
|
3260
|
+
assertion_input=assertion_input,
|
|
3261
|
+
dataset_urn=dataset_urn,
|
|
3262
|
+
urn=urn,
|
|
3263
|
+
display_name=display_name,
|
|
3264
|
+
enabled=enabled,
|
|
3265
|
+
criteria=criteria,
|
|
3266
|
+
statement=statement,
|
|
3267
|
+
incident_behavior=incident_behavior,
|
|
3268
|
+
tags=tags,
|
|
3269
|
+
updated_by=updated_by,
|
|
3270
|
+
now_utc=now_utc,
|
|
3271
|
+
schedule=schedule,
|
|
3272
|
+
)
|
|
3273
|
+
)
|
|
3274
|
+
|
|
3275
|
+
# Return early if we created a new assertion in the merge:
|
|
3276
|
+
if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
|
|
3277
|
+
# We know this is the correct type because we passed the assertion_class parameter
|
|
3278
|
+
assert isinstance(merged_assertion_input_or_created_assertion, SqlAssertion)
|
|
3279
|
+
return merged_assertion_input_or_created_assertion
|
|
3280
|
+
|
|
3281
|
+
# 4. Upsert the assertion and monitor entities:
|
|
3282
|
+
assertion_entity, monitor_entity = (
|
|
3283
|
+
merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
|
|
3284
|
+
)
|
|
3285
|
+
# If assertion upsert fails, we won't try to upsert the monitor
|
|
3286
|
+
self.client.entities.upsert(assertion_entity)
|
|
3287
|
+
# TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
3288
|
+
# try:
|
|
3289
|
+
self.client.entities.upsert(monitor_entity)
|
|
3290
|
+
# except Exception as e:
|
|
3291
|
+
# logger.error(f"Error upserting monitor: {e}")
|
|
3292
|
+
# self.client.entities.delete(assertion_entity)
|
|
3293
|
+
# raise e
|
|
3294
|
+
|
|
3295
|
+
return SqlAssertion._from_entities(assertion_entity, monitor_entity)
|
|
3296
|
+
|
|
2320
3297
|
|
|
2321
3298
|
def _merge_field(
|
|
2322
3299
|
input_field_value: Any,
|