acryl-datahub-cloud 0.3.12.1rc3__py3-none-any.whl → 0.3.12.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/sdk/__init__.py +20 -0
- acryl_datahub_cloud/sdk/assertion/assertion_base.py +146 -97
- acryl_datahub_cloud/sdk/assertion/column_metric_assertion.py +191 -0
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +10 -22
- acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +99 -19
- acryl_datahub_cloud/sdk/assertion_input/column_metric_assertion_input.py +965 -0
- acryl_datahub_cloud/sdk/assertion_input/column_metric_constants.py +191 -0
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +60 -11
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +438 -347
- acryl_datahub_cloud/sdk/assertion_input/sql_assertion_input.py +105 -61
- acryl_datahub_cloud/sdk/assertion_input/volume_assertion_input.py +381 -392
- acryl_datahub_cloud/sdk/assertions_client.py +993 -314
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/METADATA +47 -47
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/RECORD +18 -15
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12.1rc3.dist-info → acryl_datahub_cloud-0.3.12.2.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Optional, TypedDict, Union
|
|
6
6
|
|
|
7
7
|
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
8
8
|
AssertionMode,
|
|
@@ -12,6 +12,10 @@ from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
|
12
12
|
SqlAssertion,
|
|
13
13
|
VolumeAssertion,
|
|
14
14
|
_AssertionPublic,
|
|
15
|
+
_HasColumnMetricFunctionality,
|
|
16
|
+
)
|
|
17
|
+
from acryl_datahub_cloud.sdk.assertion.column_metric_assertion import (
|
|
18
|
+
ColumnMetricAssertion,
|
|
15
19
|
)
|
|
16
20
|
from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
17
21
|
SmartColumnMetricAssertion,
|
|
@@ -26,35 +30,31 @@ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
|
26
30
|
_SmartFreshnessAssertionInput,
|
|
27
31
|
_SmartVolumeAssertionInput,
|
|
28
32
|
)
|
|
33
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_assertion_input import (
|
|
34
|
+
ColumnMetricAssertionParameters,
|
|
35
|
+
_ColumnMetricAssertionInput,
|
|
36
|
+
)
|
|
37
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import (
|
|
38
|
+
MetricInputType,
|
|
39
|
+
OperatorInputType,
|
|
40
|
+
)
|
|
29
41
|
from acryl_datahub_cloud.sdk.assertion_input.freshness_assertion_input import (
|
|
42
|
+
FreshnessAssertionScheduleCheckType,
|
|
30
43
|
_FreshnessAssertionInput,
|
|
31
44
|
)
|
|
32
45
|
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
33
|
-
MetricInputType,
|
|
34
|
-
OperatorInputType,
|
|
35
|
-
RangeInputType,
|
|
36
|
-
RangeTypeInputType,
|
|
37
|
-
ValueInputType,
|
|
38
|
-
ValueTypeInputType,
|
|
39
46
|
_SmartColumnMetricAssertionInput,
|
|
40
47
|
)
|
|
41
48
|
from acryl_datahub_cloud.sdk.assertion_input.sql_assertion_input import (
|
|
42
|
-
|
|
49
|
+
SqlAssertionCondition,
|
|
43
50
|
SqlAssertionCriteria,
|
|
44
|
-
SqlAssertionOperator,
|
|
45
|
-
SqlAssertionType,
|
|
46
51
|
_SqlAssertionInput,
|
|
47
52
|
)
|
|
48
53
|
from acryl_datahub_cloud.sdk.assertion_input.volume_assertion_input import (
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
VolumeAssertionDefinitionChangeKind,
|
|
53
|
-
VolumeAssertionDefinitionInputTypes,
|
|
54
|
+
VolumeAssertionCondition,
|
|
55
|
+
VolumeAssertionCriteria,
|
|
56
|
+
VolumeAssertionCriteriaInputTypes,
|
|
54
57
|
VolumeAssertionDefinitionParameters,
|
|
55
|
-
VolumeAssertionDefinitionType,
|
|
56
|
-
VolumeAssertionOperator,
|
|
57
|
-
_VolumeAssertionDefinitionTypes,
|
|
58
58
|
_VolumeAssertionInput,
|
|
59
59
|
)
|
|
60
60
|
from acryl_datahub_cloud.sdk.entities.assertion import Assertion, TagsInputType
|
|
@@ -73,11 +73,58 @@ logger = logging.getLogger(__name__)
|
|
|
73
73
|
DEFAULT_CREATED_BY = CorpUserUrn.from_string("urn:li:corpuser:__datahub_system")
|
|
74
74
|
|
|
75
75
|
|
|
76
|
+
class _AssertionLookupInfo(TypedDict):
|
|
77
|
+
"""Minimal info needed to look up an assertion and monitor."""
|
|
78
|
+
|
|
79
|
+
dataset_urn: Union[str, DatasetUrn]
|
|
80
|
+
urn: Union[str, AssertionUrn]
|
|
81
|
+
|
|
82
|
+
|
|
76
83
|
class AssertionsClient:
|
|
77
84
|
def __init__(self, client: "DataHubClient"):
|
|
78
85
|
self.client = client
|
|
79
86
|
_print_experimental_warning()
|
|
80
87
|
|
|
88
|
+
def _validate_required_field(
|
|
89
|
+
self, field_value: Optional[Any], field_name: str, context: str
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Validate that a required field is not None and raise SDKUsageError if it is."""
|
|
92
|
+
if field_value is None:
|
|
93
|
+
raise SDKUsageError(f"{field_name} is required {context}")
|
|
94
|
+
|
|
95
|
+
def _validate_required_smart_column_fields_for_creation(
|
|
96
|
+
self,
|
|
97
|
+
column_name: Optional[str],
|
|
98
|
+
metric_type: Optional[MetricInputType],
|
|
99
|
+
) -> None:
|
|
100
|
+
"""Validate required fields for smart column metric assertion creation."""
|
|
101
|
+
self._validate_required_field(
|
|
102
|
+
column_name, "column_name", "when creating a new assertion (urn is None)"
|
|
103
|
+
)
|
|
104
|
+
self._validate_required_field(
|
|
105
|
+
metric_type, "metric_type", "when creating a new assertion (urn is None)"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _validate_required_smart_column_fields_for_update(
|
|
109
|
+
self,
|
|
110
|
+
column_name: Optional[str],
|
|
111
|
+
metric_type: Optional[MetricInputType],
|
|
112
|
+
assertion_urn: Union[str, AssertionUrn],
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Validate required fields after attempting to fetch from existing assertion."""
|
|
115
|
+
context = f"and not found in existing assertion {assertion_urn}. The existing assertion may be invalid or corrupted."
|
|
116
|
+
self._validate_required_field(column_name, "column_name", context)
|
|
117
|
+
self._validate_required_field(metric_type, "metric_type", context)
|
|
118
|
+
|
|
119
|
+
def _validate_criteria_parameters_for_creation(
|
|
120
|
+
self,
|
|
121
|
+
urn: Optional[Union[str, AssertionUrn]],
|
|
122
|
+
) -> None:
|
|
123
|
+
"""Validate criteria_parameters for creation scenario."""
|
|
124
|
+
# Smart assertions always use BETWEEN operator with (0, 0) criteria_parameters
|
|
125
|
+
# No validation needed since these values are fixed
|
|
126
|
+
pass
|
|
127
|
+
|
|
81
128
|
def sync_smart_freshness_assertion(
|
|
82
129
|
self,
|
|
83
130
|
*,
|
|
@@ -119,7 +166,11 @@ class AssertionsClient:
|
|
|
119
166
|
- {"type": "last_modified_column", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.LAST_MODIFIED_COLUMN(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
|
|
120
167
|
- "datahub_operation" or DetectionMechanism.DATAHUB_OPERATION
|
|
121
168
|
sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
|
|
122
|
-
exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported.
|
|
169
|
+
exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported. Valid values are:
|
|
170
|
+
- {"start": "2025-01-01T00:00:00", "end": "2025-01-02T00:00:00"} (using ISO strings)
|
|
171
|
+
- {"start": datetime(2025, 1, 1, 0, 0, 0), "end": datetime(2025, 1, 2, 0, 0, 0)} (using datetime objects)
|
|
172
|
+
- FixedRangeExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0)) (using typed object)
|
|
173
|
+
- A list of any of the above formats
|
|
123
174
|
training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
|
|
124
175
|
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass" or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
125
176
|
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
@@ -482,8 +533,8 @@ class AssertionsClient:
|
|
|
482
533
|
updated_by: Optional[Union[str, CorpUserUrn]],
|
|
483
534
|
now_utc: datetime,
|
|
484
535
|
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
485
|
-
|
|
486
|
-
|
|
536
|
+
criteria: VolumeAssertionCriteriaInputTypes,
|
|
537
|
+
use_backend_criteria: bool = False,
|
|
487
538
|
) -> Union[VolumeAssertion, _VolumeAssertionInput]:
|
|
488
539
|
# 1. Retrieve any existing assertion and monitor entities:
|
|
489
540
|
maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
|
|
@@ -506,7 +557,7 @@ class AssertionsClient:
|
|
|
506
557
|
)
|
|
507
558
|
# 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
|
|
508
559
|
elif not maybe_assertion_entity:
|
|
509
|
-
if
|
|
560
|
+
if use_backend_criteria:
|
|
510
561
|
raise SDKUsageError(
|
|
511
562
|
f"Cannot sync assertion {urn}: no existing definition found in backend and no definition provided in request"
|
|
512
563
|
)
|
|
@@ -514,8 +565,7 @@ class AssertionsClient:
|
|
|
514
565
|
f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
|
|
515
566
|
)
|
|
516
567
|
# Extract criteria from definition to call the new signature
|
|
517
|
-
|
|
518
|
-
assert isinstance(parsed_definition, (RowCountTotal, RowCountChange))
|
|
568
|
+
parsed_criteria = VolumeAssertionCriteria.parse(criteria)
|
|
519
569
|
return self._create_volume_assertion(
|
|
520
570
|
dataset_urn=dataset_urn,
|
|
521
571
|
display_name=display_name,
|
|
@@ -524,12 +574,8 @@ class AssertionsClient:
|
|
|
524
574
|
tags=tags,
|
|
525
575
|
created_by=updated_by,
|
|
526
576
|
schedule=schedule,
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
if isinstance(parsed_definition, RowCountChange)
|
|
530
|
-
else None,
|
|
531
|
-
criteria_operator=parsed_definition.operator,
|
|
532
|
-
criteria_parameters=parsed_definition.parameters,
|
|
577
|
+
criteria_condition=parsed_criteria.condition,
|
|
578
|
+
criteria_parameters=parsed_criteria.parameters,
|
|
533
579
|
)
|
|
534
580
|
|
|
535
581
|
# 3. Check for any issues e.g. different dataset urns
|
|
@@ -542,25 +588,25 @@ class AssertionsClient:
|
|
|
542
588
|
f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
|
|
543
589
|
)
|
|
544
590
|
|
|
545
|
-
# 4. Handle
|
|
546
|
-
if
|
|
591
|
+
# 4. Handle criteria: use backend criteria if flag is set and backend has one
|
|
592
|
+
if use_backend_criteria:
|
|
547
593
|
if maybe_assertion_entity is not None:
|
|
548
|
-
# Use
|
|
549
|
-
|
|
594
|
+
# Use criteria from backend
|
|
595
|
+
backend_criteria = VolumeAssertionCriteria.from_assertion(
|
|
550
596
|
maybe_assertion_entity
|
|
551
597
|
)
|
|
552
|
-
# Update the assertion_input with the real
|
|
553
|
-
assertion_input.
|
|
554
|
-
|
|
555
|
-
logger.info("Using
|
|
598
|
+
# Update the assertion_input with the real criteria from backend
|
|
599
|
+
assertion_input.criteria = backend_criteria
|
|
600
|
+
effective_criteria = backend_criteria
|
|
601
|
+
logger.info("Using criteria from backend assertion")
|
|
556
602
|
else:
|
|
557
|
-
# No backend assertion and no user-provided
|
|
603
|
+
# No backend assertion and no user-provided criteria - this is an error
|
|
558
604
|
raise SDKUsageError(
|
|
559
|
-
f"Cannot sync assertion {urn}: no existing
|
|
605
|
+
f"Cannot sync assertion {urn}: no existing criteria found in backend and no criteria provided in request"
|
|
560
606
|
)
|
|
561
607
|
else:
|
|
562
|
-
# Use the already-parsed
|
|
563
|
-
|
|
608
|
+
# Use the already-parsed criteria from assertion_input
|
|
609
|
+
effective_criteria = assertion_input.criteria
|
|
564
610
|
|
|
565
611
|
# 5. Merge the existing assertion with the validated input:
|
|
566
612
|
merged_assertion_input = self._merge_volume_input(
|
|
@@ -577,7 +623,7 @@ class AssertionsClient:
|
|
|
577
623
|
maybe_monitor_entity=maybe_monitor_entity,
|
|
578
624
|
existing_assertion=existing_assertion,
|
|
579
625
|
schedule=schedule,
|
|
580
|
-
|
|
626
|
+
criteria=effective_criteria,
|
|
581
627
|
)
|
|
582
628
|
|
|
583
629
|
return merged_assertion_input
|
|
@@ -624,9 +670,7 @@ class AssertionsClient:
|
|
|
624
670
|
return self._create_sql_assertion(
|
|
625
671
|
dataset_urn=dataset_urn,
|
|
626
672
|
display_name=display_name,
|
|
627
|
-
|
|
628
|
-
criteria_change_type=criteria.change_type,
|
|
629
|
-
criteria_operator=criteria.operator,
|
|
673
|
+
criteria_condition=criteria.condition,
|
|
630
674
|
criteria_parameters=criteria.parameters,
|
|
631
675
|
statement=statement,
|
|
632
676
|
incident_behavior=incident_behavior,
|
|
@@ -666,22 +710,40 @@ class AssertionsClient:
|
|
|
666
710
|
|
|
667
711
|
def _retrieve_assertion_and_monitor(
|
|
668
712
|
self,
|
|
669
|
-
assertion_input: _AssertionInput,
|
|
713
|
+
assertion_input: Union[_AssertionInput, _AssertionLookupInfo],
|
|
670
714
|
) -> tuple[Optional[Assertion], MonitorUrn, Optional[Monitor]]:
|
|
671
715
|
"""Retrieve the assertion and monitor entities from the DataHub instance.
|
|
672
716
|
|
|
673
717
|
Args:
|
|
674
|
-
assertion_input: The validated input to the function.
|
|
718
|
+
assertion_input: The validated input to the function or minimal lookup info.
|
|
675
719
|
|
|
676
720
|
Returns:
|
|
677
721
|
The assertion and monitor entities.
|
|
678
722
|
"""
|
|
679
|
-
|
|
723
|
+
# Extract URN and dataset URN from input
|
|
724
|
+
_urn: Union[str, AssertionUrn]
|
|
725
|
+
_dataset_urn: Union[str, DatasetUrn]
|
|
726
|
+
if isinstance(assertion_input, dict):
|
|
727
|
+
_urn = assertion_input["urn"]
|
|
728
|
+
_dataset_urn = assertion_input["dataset_urn"]
|
|
729
|
+
else:
|
|
730
|
+
assert assertion_input.urn is not None, "URN is required"
|
|
731
|
+
_urn = assertion_input.urn
|
|
732
|
+
_dataset_urn = assertion_input.dataset_urn
|
|
733
|
+
|
|
734
|
+
urn: AssertionUrn = (
|
|
735
|
+
_urn if isinstance(_urn, AssertionUrn) else AssertionUrn.from_string(_urn)
|
|
736
|
+
)
|
|
737
|
+
dataset_urn: DatasetUrn = (
|
|
738
|
+
_dataset_urn
|
|
739
|
+
if isinstance(_dataset_urn, DatasetUrn)
|
|
740
|
+
else DatasetUrn.from_string(_dataset_urn)
|
|
741
|
+
)
|
|
680
742
|
|
|
681
743
|
# Get assertion entity
|
|
682
744
|
maybe_assertion_entity: Optional[Assertion] = None
|
|
683
745
|
try:
|
|
684
|
-
entity = self.client.entities.get(
|
|
746
|
+
entity = self.client.entities.get(urn)
|
|
685
747
|
if entity is not None:
|
|
686
748
|
assert isinstance(entity, Assertion)
|
|
687
749
|
maybe_assertion_entity = entity
|
|
@@ -689,9 +751,7 @@ class AssertionsClient:
|
|
|
689
751
|
pass
|
|
690
752
|
|
|
691
753
|
# Get monitor entity
|
|
692
|
-
monitor_urn = Monitor._ensure_id(
|
|
693
|
-
id=(assertion_input.dataset_urn, assertion_input.urn)
|
|
694
|
-
)
|
|
754
|
+
monitor_urn = Monitor._ensure_id(id=(dataset_urn, urn))
|
|
695
755
|
maybe_monitor_entity: Optional[Monitor] = None
|
|
696
756
|
try:
|
|
697
757
|
entity = self.client.entities.get(monitor_urn)
|
|
@@ -967,7 +1027,7 @@ class AssertionsClient:
|
|
|
967
1027
|
maybe_monitor_entity: Optional[Monitor],
|
|
968
1028
|
existing_assertion: VolumeAssertion,
|
|
969
1029
|
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
970
|
-
|
|
1030
|
+
criteria: Optional[VolumeAssertionCriteria],
|
|
971
1031
|
) -> _VolumeAssertionInput:
|
|
972
1032
|
"""Merge the input with the existing assertion and monitor entities.
|
|
973
1033
|
|
|
@@ -1044,12 +1104,12 @@ class AssertionsClient:
|
|
|
1044
1104
|
existing_assertion,
|
|
1045
1105
|
maybe_assertion_entity.tags if maybe_assertion_entity else None,
|
|
1046
1106
|
),
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
"
|
|
1107
|
+
criteria=_merge_field(
|
|
1108
|
+
criteria,
|
|
1109
|
+
"criteria",
|
|
1050
1110
|
assertion_input,
|
|
1051
1111
|
existing_assertion,
|
|
1052
|
-
existing_assertion.
|
|
1112
|
+
existing_assertion.criteria if existing_assertion else None,
|
|
1053
1113
|
),
|
|
1054
1114
|
created_by=existing_assertion.created_by
|
|
1055
1115
|
or DEFAULT_CREATED_BY, # Override with the existing assertion's created_by or the default created_by if not set
|
|
@@ -1644,11 +1704,7 @@ class AssertionsClient:
|
|
|
1644
1704
|
tags: Optional[TagsInputType] = None,
|
|
1645
1705
|
created_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
1646
1706
|
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
1647
|
-
|
|
1648
|
-
criteria_change_type: Optional[
|
|
1649
|
-
Union[str, VolumeAssertionDefinitionChangeKind]
|
|
1650
|
-
] = None,
|
|
1651
|
-
criteria_operator: Union[str, VolumeAssertionOperator],
|
|
1707
|
+
criteria_condition: Union[str, VolumeAssertionCondition],
|
|
1652
1708
|
criteria_parameters: VolumeAssertionDefinitionParameters,
|
|
1653
1709
|
) -> VolumeAssertion:
|
|
1654
1710
|
"""Create a volume assertion.
|
|
@@ -1691,23 +1747,19 @@ class AssertionsClient:
|
|
|
1691
1747
|
The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone.
|
|
1692
1748
|
Alternatively, a models.CronScheduleClass object can be provided with string parameters
|
|
1693
1749
|
cron and timezone. Use `from datahub.metadata import schema_classes as models` to import the class.
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
-
|
|
1701
|
-
-
|
|
1702
|
-
-
|
|
1703
|
-
|
|
1704
|
-
criteria_parameters: The parameters for the assertion.
|
|
1705
|
-
(
|
|
1706
|
-
For
|
|
1707
|
-
|
|
1708
|
-
Examples:
|
|
1709
|
-
- For single value: 100 or 50.5
|
|
1710
|
-
- For BETWEEN: (10, 100) or (5.0, 15.5)
|
|
1750
|
+
criteria_condition: The condition for the volume assertion. Valid values are:
|
|
1751
|
+
- "ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO" -> The row count is less than or equal to the threshold.
|
|
1752
|
+
- "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO" -> The row count is greater than or equal to the threshold.
|
|
1753
|
+
- "ROW_COUNT_IS_WITHIN_A_RANGE" -> The row count is within the specified range.
|
|
1754
|
+
- "ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE" -> The row count growth is at most the threshold (absolute change).
|
|
1755
|
+
- "ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE" -> The row count growth is at least the threshold (absolute change).
|
|
1756
|
+
- "ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The row count growth is within the specified range (absolute change).
|
|
1757
|
+
- "ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE" -> The row count growth is at most the threshold (percentage change).
|
|
1758
|
+
- "ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE" -> The row count growth is at least the threshold (percentage change).
|
|
1759
|
+
- "ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The row count growth is within the specified range (percentage change).
|
|
1760
|
+
criteria_parameters: The threshold parameters to be used for the assertion. This can be a single threshold value or a tuple range.
|
|
1761
|
+
- If the condition is range-based (ROW_COUNT_IS_WITHIN_A_RANGE, ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE, ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
|
|
1762
|
+
- For other conditions, the value is a single numeric threshold value.
|
|
1711
1763
|
|
|
1712
1764
|
Returns:
|
|
1713
1765
|
VolumeAssertion: The created assertion.
|
|
@@ -1720,15 +1772,12 @@ class AssertionsClient:
|
|
|
1720
1772
|
)
|
|
1721
1773
|
created_by = DEFAULT_CREATED_BY
|
|
1722
1774
|
|
|
1723
|
-
# Create
|
|
1775
|
+
# Create criteria from criteria_condition and parameters
|
|
1724
1776
|
# The dictionary object will be fully validated down in the _VolumeAssertionInput class
|
|
1725
|
-
|
|
1726
|
-
"
|
|
1727
|
-
"operator": criteria_operator,
|
|
1777
|
+
criteria: dict[str, Any] = {
|
|
1778
|
+
"condition": criteria_condition,
|
|
1728
1779
|
"parameters": criteria_parameters,
|
|
1729
1780
|
}
|
|
1730
|
-
if criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE:
|
|
1731
|
-
definition["kind"] = criteria_change_type
|
|
1732
1781
|
|
|
1733
1782
|
assertion_input = _VolumeAssertionInput(
|
|
1734
1783
|
urn=None,
|
|
@@ -1744,7 +1793,7 @@ class AssertionsClient:
|
|
|
1744
1793
|
updated_by=created_by,
|
|
1745
1794
|
updated_at=now_utc,
|
|
1746
1795
|
schedule=schedule,
|
|
1747
|
-
|
|
1796
|
+
criteria=criteria,
|
|
1748
1797
|
)
|
|
1749
1798
|
assertion_entity, monitor_entity = (
|
|
1750
1799
|
assertion_input.to_assertion_and_monitor_entities()
|
|
@@ -1766,9 +1815,7 @@ class AssertionsClient:
|
|
|
1766
1815
|
dataset_urn: Union[str, DatasetUrn],
|
|
1767
1816
|
display_name: Optional[str] = None,
|
|
1768
1817
|
enabled: bool = True,
|
|
1769
|
-
|
|
1770
|
-
criteria_change_type: Optional[Union[SqlAssertionChangeType, str]] = None,
|
|
1771
|
-
criteria_operator: Union[SqlAssertionOperator, str],
|
|
1818
|
+
criteria_condition: Union[SqlAssertionCondition, str],
|
|
1772
1819
|
criteria_parameters: Union[
|
|
1773
1820
|
Union[float, int], tuple[Union[float, int], Union[float, int]]
|
|
1774
1821
|
],
|
|
@@ -1785,23 +1832,21 @@ class AssertionsClient:
|
|
|
1785
1832
|
display_name: The display name of the assertion. If not provided, a random display
|
|
1786
1833
|
name will be generated.
|
|
1787
1834
|
enabled: Whether the assertion is enabled. Defaults to True.
|
|
1788
|
-
|
|
1789
|
-
- "
|
|
1790
|
-
- "
|
|
1791
|
-
|
|
1792
|
-
- "
|
|
1793
|
-
- "
|
|
1794
|
-
|
|
1795
|
-
- "
|
|
1796
|
-
- "
|
|
1797
|
-
- "
|
|
1798
|
-
- "
|
|
1799
|
-
- "
|
|
1800
|
-
|
|
1801
|
-
-
|
|
1802
|
-
|
|
1803
|
-
- If the operator is "BETWEEN", the value is a tuple of two values, with format min, max.
|
|
1804
|
-
- If the operator is not "BETWEEN", the value is a single value.
|
|
1835
|
+
criteria_condition: The condition for the sql assertion. Valid values are:
|
|
1836
|
+
- "IS_EQUAL_TO" -> The metric value equals the threshold.
|
|
1837
|
+
- "IS_NOT_EQUAL_TO" -> The metric value does not equal the threshold.
|
|
1838
|
+
- "IS_GREATER_THAN" -> The metric value is greater than the threshold.
|
|
1839
|
+
- "IS_LESS_THAN" -> The metric value is less than the threshold.
|
|
1840
|
+
- "IS_WITHIN_A_RANGE" -> The metric value is within the specified range.
|
|
1841
|
+
- "GROWS_AT_MOST_ABSOLUTE" -> The metric growth is at most the threshold (absolute change).
|
|
1842
|
+
- "GROWS_AT_MOST_PERCENTAGE" -> The metric growth is at most the threshold (percentage change).
|
|
1843
|
+
- "GROWS_AT_LEAST_ABSOLUTE" -> The metric growth is at least the threshold (absolute change).
|
|
1844
|
+
- "GROWS_AT_LEAST_PERCENTAGE" -> The metric growth is at least the threshold (percentage change).
|
|
1845
|
+
- "GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The metric growth is within the specified range (absolute change).
|
|
1846
|
+
- "GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The metric growth is within the specified range (percentage change).
|
|
1847
|
+
criteria_parameters: The threshold parameters to be used for the assertion. This can be a single threshold value or a tuple range.
|
|
1848
|
+
- If the condition is range-based (IS_WITHIN_A_RANGE, GROWS_WITHIN_A_RANGE_ABSOLUTE, GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
|
|
1849
|
+
- For other conditions, the value is a single numeric threshold value.
|
|
1805
1850
|
statement: The statement to be used for the assertion.
|
|
1806
1851
|
incident_behavior: The incident behavior to be applied to the assertion. Valid values are:
|
|
1807
1852
|
- "raise_on_fail" or AssertionIncidentBehavior.RAISE_ON_FAIL
|
|
@@ -1831,9 +1876,7 @@ class AssertionsClient:
|
|
|
1831
1876
|
)
|
|
1832
1877
|
created_by = DEFAULT_CREATED_BY
|
|
1833
1878
|
criteria = SqlAssertionCriteria(
|
|
1834
|
-
|
|
1835
|
-
change_type=criteria_change_type,
|
|
1836
|
-
operator=criteria_operator,
|
|
1879
|
+
condition=criteria_condition,
|
|
1837
1880
|
parameters=criteria_parameters,
|
|
1838
1881
|
)
|
|
1839
1882
|
assertion_input = _SqlAssertionInput(
|
|
@@ -1907,7 +1950,11 @@ class AssertionsClient:
|
|
|
1907
1950
|
- {"type": "query", "additional_filter": "value > 1000"} or DetectionMechanism.QUERY(additional_filter='value > 1000')
|
|
1908
1951
|
- "dataset_profile" or DetectionMechanism.DATASET_PROFILE
|
|
1909
1952
|
sensitivity (Optional[Union[str, InferenceSensitivity]]): The sensitivity to be applied to the assertion. Valid values are: "low", "medium", "high".
|
|
1910
|
-
exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported.
|
|
1953
|
+
exclusion_windows (Optional[ExclusionWindowInputTypes]): The exclusion windows to be applied to the assertion. Only fixed range exclusion windows are supported. Valid values are:
|
|
1954
|
+
- {"start": "2025-01-01T00:00:00", "end": "2025-01-02T00:00:00"} (using ISO strings)
|
|
1955
|
+
- {"start": datetime(2025, 1, 1, 0, 0, 0), "end": datetime(2025, 1, 2, 0, 0, 0)} (using datetime objects)
|
|
1956
|
+
- FixedRangeExclusionWindow(start=datetime(2025, 1, 1, 0, 0, 0), end=datetime(2025, 1, 2, 0, 0, 0)) (using typed object)
|
|
1957
|
+
- A list of any of the above formats
|
|
1911
1958
|
training_data_lookback_days (Optional[int]): The training data lookback days to be applied to the assertion as an integer.
|
|
1912
1959
|
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
1913
1960
|
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
@@ -2007,17 +2054,281 @@ class AssertionsClient:
|
|
|
2007
2054
|
|
|
2008
2055
|
return SmartVolumeAssertion._from_entities(assertion_entity, monitor_entity)
|
|
2009
2056
|
|
|
2057
|
+
def sync_column_metric_assertion( # noqa: C901 # TODO: Refactor
|
|
2058
|
+
self,
|
|
2059
|
+
*,
|
|
2060
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
2061
|
+
column_name: Optional[str] = None,
|
|
2062
|
+
metric_type: Optional[MetricInputType] = None,
|
|
2063
|
+
operator: Optional[OperatorInputType] = None,
|
|
2064
|
+
criteria_parameters: Optional[ColumnMetricAssertionParameters] = None,
|
|
2065
|
+
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
2066
|
+
display_name: Optional[str] = None,
|
|
2067
|
+
enabled: Optional[bool] = None,
|
|
2068
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
2069
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
2070
|
+
tags: Optional[TagsInputType] = None,
|
|
2071
|
+
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
2072
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
2073
|
+
) -> ColumnMetricAssertion:
|
|
2074
|
+
"""Upsert and merge a column metric assertion.
|
|
2075
|
+
|
|
2076
|
+
Note:
|
|
2077
|
+
Keyword arguments are required.
|
|
2078
|
+
|
|
2079
|
+
Upsert and merge is a combination of create and update. If the assertion does not exist,
|
|
2080
|
+
it will be created. If it does exist, it will be updated.
|
|
2081
|
+
|
|
2082
|
+
Existing assertion fields will be updated if the input value is not None. If the input value is None, the existing value
|
|
2083
|
+
will be preserved. If the input value can be un-set (e.g. by passing an empty list or
|
|
2084
|
+
empty string), it will be unset.
|
|
2085
|
+
|
|
2086
|
+
Schedule behavior:
|
|
2087
|
+
- Create case: Uses default schedule of every 6 hours or provided schedule
|
|
2088
|
+
- Update case: Uses existing schedule or provided schedule.
|
|
2089
|
+
|
|
2090
|
+
Examples:
|
|
2091
|
+
# Using enum values (recommended for type safety)
|
|
2092
|
+
from acryl_datahub_cloud.sdk.assertion_input.column_metric_constants import MetricType, OperatorType
|
|
2093
|
+
client.sync_column_metric_assertion(
|
|
2094
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
2095
|
+
column_name="user_id",
|
|
2096
|
+
metric_type=MetricType.NULL_COUNT,
|
|
2097
|
+
operator=OperatorType.GREATER_THAN,
|
|
2098
|
+
criteria_parameters=10
|
|
2099
|
+
)
|
|
2100
|
+
|
|
2101
|
+
# Using case-insensitive strings (more flexible)
|
|
2102
|
+
client.sync_column_metric_assertion(
|
|
2103
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
2104
|
+
column_name="price",
|
|
2105
|
+
metric_type="mean",
|
|
2106
|
+
operator="between",
|
|
2107
|
+
criteria_parameters=(100.0, 500.0)
|
|
2108
|
+
)
|
|
2109
|
+
|
|
2110
|
+
Args:
|
|
2111
|
+
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
2112
|
+
column_name (Optional[str]): The name of the column to be monitored. Required for creation, optional for updates.
|
|
2113
|
+
metric_type (Optional[MetricInputType]): The type of the metric to be monitored. Required for creation, optional for updates. Valid values are:
|
|
2114
|
+
- Using MetricType enum: MetricType.NULL_COUNT, MetricType.NULL_PERCENTAGE, MetricType.UNIQUE_COUNT,
|
|
2115
|
+
MetricType.UNIQUE_PERCENTAGE, MetricType.MAX_LENGTH, MetricType.MIN_LENGTH, MetricType.EMPTY_COUNT,
|
|
2116
|
+
MetricType.EMPTY_PERCENTAGE, MetricType.MIN, MetricType.MAX, MetricType.MEAN, MetricType.MEDIAN,
|
|
2117
|
+
MetricType.STDDEV, MetricType.NEGATIVE_COUNT, MetricType.NEGATIVE_PERCENTAGE, MetricType.ZERO_COUNT,
|
|
2118
|
+
MetricType.ZERO_PERCENTAGE
|
|
2119
|
+
- Using case-insensitive strings: "null_count", "MEAN", "Max_Length", etc.
|
|
2120
|
+
- Using models enum: models.FieldMetricTypeClass.NULL_COUNT, etc. (import with: from datahub.metadata import schema_classes as models)
|
|
2121
|
+
operator (Optional[OperatorInputType]): The operator to be used for the assertion. Required for creation, optional for updates. Valid values are:
|
|
2122
|
+
- Using OperatorType enum: OperatorType.EQUAL_TO, OperatorType.NOT_EQUAL_TO, OperatorType.GREATER_THAN,
|
|
2123
|
+
OperatorType.GREATER_THAN_OR_EQUAL_TO, OperatorType.LESS_THAN, OperatorType.LESS_THAN_OR_EQUAL_TO,
|
|
2124
|
+
OperatorType.BETWEEN, OperatorType.IN, OperatorType.NOT_IN, OperatorType.NULL, OperatorType.NOT_NULL,
|
|
2125
|
+
OperatorType.IS_TRUE, OperatorType.IS_FALSE, OperatorType.CONTAIN, OperatorType.END_WITH,
|
|
2126
|
+
OperatorType.START_WITH, OperatorType.REGEX_MATCH
|
|
2127
|
+
- Using case-insensitive strings: "equal_to", "not_equal_to", "greater_than", "greater_than_or_equal_to",
|
|
2128
|
+
"less_than", "less_than_or_equal_to", "between", "in", "not_in", "null", "not_null", "is_true",
|
|
2129
|
+
"is_false", "contain", "end_with", "start_with", "regex_match"
|
|
2130
|
+
- Using models enum: models.AssertionStdOperatorClass.EQUAL_TO, models.AssertionStdOperatorClass.GREATER_THAN, etc.
|
|
2131
|
+
criteria_parameters (Optional[ColumnMetricAssertionParameters]): The criteria parameters for the assertion. Required for creation (except for operators that don't need parameters), optional for updates.
|
|
2132
|
+
- Single value operators (EQUAL_TO, NOT_EQUAL_TO, GREATER_THAN, GREATER_THAN_OR_EQUAL_TO, LESS_THAN, LESS_THAN_OR_EQUAL_TO, CONTAIN, END_WITH, START_WITH, REGEX_MATCH): pass a single number or string
|
|
2133
|
+
- Range operators (BETWEEN): pass a tuple of two numbers (min_value, max_value)
|
|
2134
|
+
- List operators (IN, NOT_IN): pass a list of values
|
|
2135
|
+
- No parameter operators (NULL, NOT_NULL, IS_TRUE, IS_FALSE): pass None or omit this parameter
|
|
2136
|
+
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
2137
|
+
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
2138
|
+
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
2139
|
+
detection_mechanism (DetectionMechanismInputTypes): The detection mechanism to be used for the assertion. Valid values are (additional_filter is optional):
|
|
2140
|
+
- "all_rows_query_datahub_dataset_profile" or DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
2141
|
+
- "all_rows_query" or DetectionMechanism.ALL_ROWS_QUERY(), or with additional_filter: {"type": "all_rows_query", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.ALL_ROWS_QUERY(additional_filter='last_modified > 2021-01-01')
|
|
2142
|
+
- {"type": "changed_rows_query", "column_name": "last_modified", "additional_filter": "last_modified > '2021-01-01'"} or DetectionMechanism.CHANGED_ROWS_QUERY(column_name='last_modified', additional_filter='last_modified > 2021-01-01')
|
|
2143
|
+
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
2144
|
+
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
2145
|
+
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
2146
|
+
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule of every 6 hours will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
2147
|
+
|
|
2148
|
+
Returns:
|
|
2149
|
+
ColumnMetricAssertion: The created or updated assertion.
|
|
2150
|
+
"""
|
|
2151
|
+
now_utc = datetime.now(timezone.utc)
|
|
2152
|
+
gms_criteria_type_info = None
|
|
2153
|
+
|
|
2154
|
+
if updated_by is None:
|
|
2155
|
+
logger.warning(
|
|
2156
|
+
f"updated_by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
2157
|
+
)
|
|
2158
|
+
updated_by = DEFAULT_CREATED_BY
|
|
2159
|
+
|
|
2160
|
+
# 1. If urn is not set, create a new assertion
|
|
2161
|
+
if urn is None:
|
|
2162
|
+
self._validate_required_column_fields_for_creation(
|
|
2163
|
+
column_name, metric_type, operator
|
|
2164
|
+
)
|
|
2165
|
+
assert (
|
|
2166
|
+
column_name is not None
|
|
2167
|
+
and metric_type is not None
|
|
2168
|
+
and operator is not None
|
|
2169
|
+
), "Fields guaranteed non-None after validation"
|
|
2170
|
+
logger.info("URN is not set, creating a new assertion")
|
|
2171
|
+
return self._create_column_metric_assertion(
|
|
2172
|
+
dataset_urn=dataset_urn,
|
|
2173
|
+
column_name=column_name,
|
|
2174
|
+
metric_type=metric_type,
|
|
2175
|
+
operator=operator,
|
|
2176
|
+
criteria_parameters=criteria_parameters,
|
|
2177
|
+
display_name=display_name,
|
|
2178
|
+
enabled=enabled if enabled is not None else True,
|
|
2179
|
+
detection_mechanism=detection_mechanism,
|
|
2180
|
+
incident_behavior=incident_behavior,
|
|
2181
|
+
tags=tags,
|
|
2182
|
+
created_by=updated_by,
|
|
2183
|
+
schedule=schedule,
|
|
2184
|
+
)
|
|
2185
|
+
|
|
2186
|
+
# 2.1 If urn is set, fetch missing required parameters from backend if needed:
|
|
2187
|
+
# NOTE: This is a tactical solution. The problem is we fetch twice (once for validation,
|
|
2188
|
+
# once for merge). Strategic solution would be to merge first, then validate after,
|
|
2189
|
+
# but that requires heavy refactor and is skipped for now.
|
|
2190
|
+
if urn is not None and (
|
|
2191
|
+
column_name is None
|
|
2192
|
+
or metric_type is None
|
|
2193
|
+
or operator is None
|
|
2194
|
+
or criteria_parameters is None
|
|
2195
|
+
):
|
|
2196
|
+
# Fetch existing assertion to get missing required parameters
|
|
2197
|
+
maybe_assertion_entity, _, maybe_monitor_entity = (
|
|
2198
|
+
self._retrieve_assertion_and_monitor(
|
|
2199
|
+
{"dataset_urn": dataset_urn, "urn": urn}
|
|
2200
|
+
)
|
|
2201
|
+
)
|
|
2202
|
+
|
|
2203
|
+
if maybe_assertion_entity is not None:
|
|
2204
|
+
assertion_info = maybe_assertion_entity.info
|
|
2205
|
+
if (
|
|
2206
|
+
hasattr(assertion_info, "fieldMetricAssertion")
|
|
2207
|
+
and assertion_info.fieldMetricAssertion
|
|
2208
|
+
):
|
|
2209
|
+
field_metric_assertion = assertion_info.fieldMetricAssertion
|
|
2210
|
+
# Use existing values for missing required parameters
|
|
2211
|
+
if (
|
|
2212
|
+
column_name is None
|
|
2213
|
+
and hasattr(field_metric_assertion, "field")
|
|
2214
|
+
and hasattr(field_metric_assertion.field, "path")
|
|
2215
|
+
):
|
|
2216
|
+
column_name = field_metric_assertion.field.path
|
|
2217
|
+
if metric_type is None and hasattr(
|
|
2218
|
+
field_metric_assertion, "metric"
|
|
2219
|
+
):
|
|
2220
|
+
metric_type = field_metric_assertion.metric
|
|
2221
|
+
if operator is None and hasattr(field_metric_assertion, "operator"):
|
|
2222
|
+
operator = field_metric_assertion.operator
|
|
2223
|
+
if criteria_parameters is None and hasattr(
|
|
2224
|
+
field_metric_assertion, "parameters"
|
|
2225
|
+
):
|
|
2226
|
+
# Extract criteria_parameters from existing assertion
|
|
2227
|
+
# This logic should match the parameter extraction in the assertion input class
|
|
2228
|
+
params = field_metric_assertion.parameters
|
|
2229
|
+
if params and hasattr(params, "value") and params.value:
|
|
2230
|
+
criteria_parameters = params.value.value
|
|
2231
|
+
elif (
|
|
2232
|
+
params
|
|
2233
|
+
and hasattr(params, "minValue")
|
|
2234
|
+
and hasattr(params, "maxValue")
|
|
2235
|
+
and params.minValue
|
|
2236
|
+
and params.maxValue
|
|
2237
|
+
):
|
|
2238
|
+
criteria_parameters = (
|
|
2239
|
+
params.minValue.value,
|
|
2240
|
+
params.maxValue.value,
|
|
2241
|
+
)
|
|
2242
|
+
|
|
2243
|
+
# Extract gms_criteria_type_info to preserve original parameter types
|
|
2244
|
+
gms_criteria_type_info = (
|
|
2245
|
+
_HasColumnMetricFunctionality._get_criteria_parameters_with_type(
|
|
2246
|
+
maybe_assertion_entity
|
|
2247
|
+
)
|
|
2248
|
+
)
|
|
2249
|
+
|
|
2250
|
+
self._validate_required_column_fields_for_update(
|
|
2251
|
+
column_name, metric_type, operator, urn
|
|
2252
|
+
)
|
|
2253
|
+
assert (
|
|
2254
|
+
column_name is not None
|
|
2255
|
+
and metric_type is not None
|
|
2256
|
+
and operator is not None
|
|
2257
|
+
), "Fields guaranteed non-None after validation"
|
|
2258
|
+
|
|
2259
|
+
# 2.2 Now validate the input with all required parameters:
|
|
2260
|
+
assertion_input = _ColumnMetricAssertionInput(
|
|
2261
|
+
urn=urn,
|
|
2262
|
+
entity_client=self.client.entities,
|
|
2263
|
+
dataset_urn=dataset_urn,
|
|
2264
|
+
column_name=column_name,
|
|
2265
|
+
metric_type=metric_type,
|
|
2266
|
+
operator=operator,
|
|
2267
|
+
criteria_parameters=criteria_parameters,
|
|
2268
|
+
display_name=display_name,
|
|
2269
|
+
detection_mechanism=detection_mechanism,
|
|
2270
|
+
incident_behavior=incident_behavior,
|
|
2271
|
+
tags=tags,
|
|
2272
|
+
created_by=updated_by, # This will be overridden by the actual created_by
|
|
2273
|
+
created_at=now_utc, # This will be overridden by the actual created_at
|
|
2274
|
+
updated_by=updated_by,
|
|
2275
|
+
updated_at=now_utc,
|
|
2276
|
+
schedule=schedule,
|
|
2277
|
+
gms_criteria_type_info=gms_criteria_type_info,
|
|
2278
|
+
)
|
|
2279
|
+
|
|
2280
|
+
# 3. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
|
|
2281
|
+
# if the assertion does not exist:
|
|
2282
|
+
merged_assertion_input_or_created_assertion = (
|
|
2283
|
+
self._retrieve_and_merge_column_metric_assertion_and_monitor(
|
|
2284
|
+
assertion_input=assertion_input,
|
|
2285
|
+
dataset_urn=dataset_urn,
|
|
2286
|
+
column_name=column_name,
|
|
2287
|
+
metric_type=metric_type,
|
|
2288
|
+
operator=operator,
|
|
2289
|
+
criteria_parameters=criteria_parameters,
|
|
2290
|
+
urn=urn,
|
|
2291
|
+
display_name=display_name,
|
|
2292
|
+
enabled=enabled,
|
|
2293
|
+
detection_mechanism=detection_mechanism,
|
|
2294
|
+
incident_behavior=incident_behavior,
|
|
2295
|
+
tags=tags,
|
|
2296
|
+
updated_by=updated_by,
|
|
2297
|
+
now_utc=now_utc,
|
|
2298
|
+
schedule=schedule,
|
|
2299
|
+
)
|
|
2300
|
+
)
|
|
2301
|
+
|
|
2302
|
+
# Return early if we created a new assertion in the merge:
|
|
2303
|
+
if isinstance(merged_assertion_input_or_created_assertion, _AssertionPublic):
|
|
2304
|
+
# We know this is the correct type because we passed the assertion_class parameter
|
|
2305
|
+
assert isinstance(
|
|
2306
|
+
merged_assertion_input_or_created_assertion, ColumnMetricAssertion
|
|
2307
|
+
)
|
|
2308
|
+
return merged_assertion_input_or_created_assertion
|
|
2309
|
+
|
|
2310
|
+
# 4. Upsert the assertion and monitor entities:
|
|
2311
|
+
assertion_entity, monitor_entity = (
|
|
2312
|
+
merged_assertion_input_or_created_assertion.to_assertion_and_monitor_entities()
|
|
2313
|
+
)
|
|
2314
|
+
# If assertion upsert fails, we won't try to upsert the monitor
|
|
2315
|
+
self.client.entities.upsert(assertion_entity)
|
|
2316
|
+
# TODO: Wrap monitor upsert in a try-except and delete the assertion if monitor upsert fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
2317
|
+
# try:
|
|
2318
|
+
self.client.entities.upsert(monitor_entity)
|
|
2319
|
+
# except Exception as e:
|
|
2320
|
+
# logger.error(f"Error upserting monitor: {e}")
|
|
2321
|
+
# self.client.entities.delete(assertion_entity)
|
|
2322
|
+
# raise e
|
|
2323
|
+
|
|
2324
|
+
return ColumnMetricAssertion._from_entities(assertion_entity, monitor_entity)
|
|
2325
|
+
|
|
2010
2326
|
def sync_smart_column_metric_assertion(
|
|
2011
2327
|
self,
|
|
2012
2328
|
*,
|
|
2013
2329
|
dataset_urn: Union[str, DatasetUrn],
|
|
2014
|
-
column_name: str,
|
|
2015
|
-
metric_type: MetricInputType,
|
|
2016
|
-
operator: OperatorInputType,
|
|
2017
|
-
value: Optional[ValueInputType] = None,
|
|
2018
|
-
value_type: Optional[ValueTypeInputType] = None,
|
|
2019
|
-
range: Optional[RangeInputType] = None,
|
|
2020
|
-
range_type: Optional[RangeTypeInputType] = None,
|
|
2330
|
+
column_name: Optional[str] = None,
|
|
2331
|
+
metric_type: Optional[MetricInputType] = None,
|
|
2021
2332
|
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
2022
2333
|
display_name: Optional[str] = None,
|
|
2023
2334
|
enabled: Optional[bool] = None,
|
|
@@ -2046,15 +2357,32 @@ class AssertionsClient:
|
|
|
2046
2357
|
- Create case: Uses default schedule of every 6 hours or provided schedule
|
|
2047
2358
|
- Update case: Uses existing schedule or provided schedule.
|
|
2048
2359
|
|
|
2360
|
+
Examples:
|
|
2361
|
+
# Using enum values (recommended for type safety)
|
|
2362
|
+
client.sync_smart_column_metric_assertion(
|
|
2363
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
2364
|
+
column_name="user_id",
|
|
2365
|
+
metric_type=MetricType.NULL_COUNT
|
|
2366
|
+
)
|
|
2367
|
+
|
|
2368
|
+
# Using case-insensitive strings (more flexible)
|
|
2369
|
+
client.sync_smart_column_metric_assertion(
|
|
2370
|
+
dataset_urn="urn:li:dataset:(urn:li:dataPlatform:snowflake,database.schema.table,PROD)",
|
|
2371
|
+
column_name="price",
|
|
2372
|
+
metric_type="mean"
|
|
2373
|
+
)
|
|
2374
|
+
|
|
2049
2375
|
Args:
|
|
2050
2376
|
dataset_urn (Union[str, DatasetUrn]): The urn of the dataset to be monitored.
|
|
2051
|
-
column_name (str): The name of the column to be monitored.
|
|
2052
|
-
metric_type (MetricInputType): The type of the metric to be monitored.
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2377
|
+
column_name (Optional[str]): The name of the column to be monitored. Required for creation, optional for updates.
|
|
2378
|
+
metric_type (Optional[MetricInputType]): The type of the metric to be monitored. Required for creation, optional for updates. Valid values are:
|
|
2379
|
+
- Using MetricType enum: MetricType.NULL_COUNT, MetricType.NULL_PERCENTAGE, MetricType.UNIQUE_COUNT,
|
|
2380
|
+
MetricType.UNIQUE_PERCENTAGE, MetricType.MAX_LENGTH, MetricType.MIN_LENGTH, MetricType.EMPTY_COUNT,
|
|
2381
|
+
MetricType.EMPTY_PERCENTAGE, MetricType.MIN, MetricType.MAX, MetricType.MEAN, MetricType.MEDIAN,
|
|
2382
|
+
MetricType.STDDEV, MetricType.NEGATIVE_COUNT, MetricType.NEGATIVE_PERCENTAGE, MetricType.ZERO_COUNT,
|
|
2383
|
+
MetricType.ZERO_PERCENTAGE
|
|
2384
|
+
- Using case-insensitive strings: "null_count", "MEAN", "Max_Length", etc.
|
|
2385
|
+
- Using models enum: models.FieldMetricTypeClass.NULL_COUNT, etc. (import with: from datahub.metadata import schema_classes as models)
|
|
2058
2386
|
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
2059
2387
|
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
2060
2388
|
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
@@ -2084,16 +2412,17 @@ class AssertionsClient:
|
|
|
2084
2412
|
|
|
2085
2413
|
# 1. If urn is not set, create a new assertion
|
|
2086
2414
|
if urn is None:
|
|
2415
|
+
self._validate_required_smart_column_fields_for_creation(
|
|
2416
|
+
column_name, metric_type
|
|
2417
|
+
)
|
|
2418
|
+
assert column_name is not None and metric_type is not None, (
|
|
2419
|
+
"Fields guaranteed non-None after validation"
|
|
2420
|
+
)
|
|
2087
2421
|
logger.info("URN is not set, creating a new assertion")
|
|
2088
2422
|
return self._create_smart_column_metric_assertion(
|
|
2089
2423
|
dataset_urn=dataset_urn,
|
|
2090
2424
|
column_name=column_name,
|
|
2091
2425
|
metric_type=metric_type,
|
|
2092
|
-
operator=operator,
|
|
2093
|
-
value=value,
|
|
2094
|
-
value_type=value_type,
|
|
2095
|
-
range=range,
|
|
2096
|
-
range_type=range_type,
|
|
2097
2426
|
display_name=display_name,
|
|
2098
2427
|
enabled=enabled if enabled is not None else True,
|
|
2099
2428
|
detection_mechanism=detection_mechanism,
|
|
@@ -2106,18 +2435,55 @@ class AssertionsClient:
|
|
|
2106
2435
|
schedule=schedule,
|
|
2107
2436
|
)
|
|
2108
2437
|
|
|
2109
|
-
# 2. If urn is set,
|
|
2438
|
+
# 2.1 If urn is set, fetch missing required parameters from backend if needed:
|
|
2439
|
+
# NOTE: This is a tactical solution. The problem is we fetch twice (once for validation,
|
|
2440
|
+
# once for merge). Strategic solution would be to merge first, then validate after,
|
|
2441
|
+
# but that requires heavy refactor and is skipped for now.
|
|
2442
|
+
if urn is not None and (column_name is None or metric_type is None):
|
|
2443
|
+
# Fetch existing assertion to get missing required parameters
|
|
2444
|
+
maybe_assertion_entity, _, maybe_monitor_entity = (
|
|
2445
|
+
self._retrieve_assertion_and_monitor(
|
|
2446
|
+
{"dataset_urn": dataset_urn, "urn": urn}
|
|
2447
|
+
)
|
|
2448
|
+
)
|
|
2449
|
+
|
|
2450
|
+
if maybe_assertion_entity is not None:
|
|
2451
|
+
assertion_info = maybe_assertion_entity.info
|
|
2452
|
+
if (
|
|
2453
|
+
hasattr(assertion_info, "fieldMetricAssertion")
|
|
2454
|
+
and assertion_info.fieldMetricAssertion
|
|
2455
|
+
):
|
|
2456
|
+
field_metric_assertion = assertion_info.fieldMetricAssertion
|
|
2457
|
+
# Use existing values for missing required parameters
|
|
2458
|
+
if (
|
|
2459
|
+
column_name is None
|
|
2460
|
+
and hasattr(field_metric_assertion, "field")
|
|
2461
|
+
and hasattr(field_metric_assertion.field, "path")
|
|
2462
|
+
):
|
|
2463
|
+
column_name = field_metric_assertion.field.path
|
|
2464
|
+
if metric_type is None and hasattr(
|
|
2465
|
+
field_metric_assertion, "metric"
|
|
2466
|
+
):
|
|
2467
|
+
metric_type = field_metric_assertion.metric
|
|
2468
|
+
# Smart assertions always use BETWEEN operator - no need to fetch from existing assertion
|
|
2469
|
+
|
|
2470
|
+
self._validate_required_smart_column_fields_for_update(
|
|
2471
|
+
column_name, metric_type, urn
|
|
2472
|
+
)
|
|
2473
|
+
assert column_name is not None and metric_type is not None, (
|
|
2474
|
+
"Fields guaranteed non-None after validation"
|
|
2475
|
+
)
|
|
2476
|
+
|
|
2477
|
+
# 2.1.1 Validate criteria_parameters for creation scenario
|
|
2478
|
+
self._validate_criteria_parameters_for_creation(urn)
|
|
2479
|
+
|
|
2480
|
+
# 2.2 Now validate the input with all required parameters:
|
|
2110
2481
|
assertion_input = _SmartColumnMetricAssertionInput(
|
|
2111
2482
|
urn=urn,
|
|
2112
2483
|
entity_client=self.client.entities,
|
|
2113
2484
|
dataset_urn=dataset_urn,
|
|
2114
2485
|
column_name=column_name,
|
|
2115
2486
|
metric_type=metric_type,
|
|
2116
|
-
operator=operator,
|
|
2117
|
-
value=value,
|
|
2118
|
-
value_type=value_type,
|
|
2119
|
-
range=range,
|
|
2120
|
-
range_type=range_type,
|
|
2121
2487
|
display_name=display_name,
|
|
2122
2488
|
detection_mechanism=detection_mechanism,
|
|
2123
2489
|
sensitivity=sensitivity,
|
|
@@ -2140,11 +2506,6 @@ class AssertionsClient:
|
|
|
2140
2506
|
dataset_urn=dataset_urn,
|
|
2141
2507
|
column_name=column_name,
|
|
2142
2508
|
metric_type=metric_type,
|
|
2143
|
-
operator=operator,
|
|
2144
|
-
value=value,
|
|
2145
|
-
value_type=value_type,
|
|
2146
|
-
range=range,
|
|
2147
|
-
range_type=range_type,
|
|
2148
2509
|
urn=urn,
|
|
2149
2510
|
display_name=display_name,
|
|
2150
2511
|
enabled=enabled,
|
|
@@ -2192,11 +2553,6 @@ class AssertionsClient:
|
|
|
2192
2553
|
dataset_urn: Union[str, DatasetUrn],
|
|
2193
2554
|
column_name: str,
|
|
2194
2555
|
metric_type: MetricInputType,
|
|
2195
|
-
operator: OperatorInputType,
|
|
2196
|
-
value: Optional[ValueInputType] = None,
|
|
2197
|
-
value_type: Optional[ValueTypeInputType] = None,
|
|
2198
|
-
range: Optional[RangeInputType] = None,
|
|
2199
|
-
range_type: Optional[RangeTypeInputType] = None,
|
|
2200
2556
|
display_name: Optional[str] = None,
|
|
2201
2557
|
enabled: bool = True,
|
|
2202
2558
|
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
@@ -2216,11 +2572,6 @@ class AssertionsClient:
|
|
|
2216
2572
|
dataset_urn: The urn of the dataset to be monitored. (Required)
|
|
2217
2573
|
column_name: The name of the column to be monitored. (Required)
|
|
2218
2574
|
metric_type: The type of the metric to be monitored. (Required)
|
|
2219
|
-
operator: The operator to be used for the assertion. (Required)
|
|
2220
|
-
value: The value to be used for the assertion. (Required if operator requires a value)
|
|
2221
|
-
value_type: The type of the value to be used for the assertion. (Required if operator requires a value)
|
|
2222
|
-
range: The range to be used for the assertion. (Required if operator requires a range)
|
|
2223
|
-
range_type: The type of the range to be used for the assertion. (Required if operator requires a range)
|
|
2224
2575
|
display_name: The display name of the assertion. If not provided, a random display
|
|
2225
2576
|
name will be generated.
|
|
2226
2577
|
enabled: Whether the assertion is enabled. Defaults to True.
|
|
@@ -2296,11 +2647,6 @@ class AssertionsClient:
|
|
|
2296
2647
|
dataset_urn=dataset_urn,
|
|
2297
2648
|
column_name=column_name,
|
|
2298
2649
|
metric_type=metric_type,
|
|
2299
|
-
operator=operator,
|
|
2300
|
-
value=value,
|
|
2301
|
-
value_type=value_type,
|
|
2302
|
-
range=range,
|
|
2303
|
-
range_type=range_type,
|
|
2304
2650
|
display_name=display_name,
|
|
2305
2651
|
enabled=enabled,
|
|
2306
2652
|
detection_mechanism=detection_mechanism,
|
|
@@ -2337,11 +2683,6 @@ class AssertionsClient:
|
|
|
2337
2683
|
dataset_urn: Union[str, DatasetUrn],
|
|
2338
2684
|
column_name: str,
|
|
2339
2685
|
metric_type: MetricInputType,
|
|
2340
|
-
operator: OperatorInputType,
|
|
2341
|
-
value: Optional[ValueInputType],
|
|
2342
|
-
value_type: Optional[ValueTypeInputType],
|
|
2343
|
-
range: Optional[RangeInputType],
|
|
2344
|
-
range_type: Optional[RangeTypeInputType],
|
|
2345
2686
|
urn: Union[str, AssertionUrn],
|
|
2346
2687
|
display_name: Optional[str],
|
|
2347
2688
|
enabled: Optional[bool],
|
|
@@ -2383,11 +2724,6 @@ class AssertionsClient:
|
|
|
2383
2724
|
dataset_urn=dataset_urn,
|
|
2384
2725
|
column_name=column_name,
|
|
2385
2726
|
metric_type=metric_type,
|
|
2386
|
-
operator=operator,
|
|
2387
|
-
value=value,
|
|
2388
|
-
value_type=value_type,
|
|
2389
|
-
range=range,
|
|
2390
|
-
range_type=range_type,
|
|
2391
2727
|
schedule=schedule,
|
|
2392
2728
|
display_name=display_name,
|
|
2393
2729
|
detection_mechanism=detection_mechanism,
|
|
@@ -2409,16 +2745,14 @@ class AssertionsClient:
|
|
|
2409
2745
|
f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
|
|
2410
2746
|
)
|
|
2411
2747
|
|
|
2412
|
-
# 4.
|
|
2748
|
+
# 4. Smart assertions always use fixed criteria_parameters (0, 0) and BETWEEN operator
|
|
2749
|
+
# No GMS type info needed since values are fixed
|
|
2750
|
+
|
|
2751
|
+
# 5. Merge the existing assertion with the validated input:
|
|
2413
2752
|
merged_assertion_input = self._merge_smart_column_metric_input(
|
|
2414
2753
|
dataset_urn=dataset_urn,
|
|
2415
2754
|
column_name=column_name,
|
|
2416
2755
|
metric_type=metric_type,
|
|
2417
|
-
operator=operator,
|
|
2418
|
-
value=value,
|
|
2419
|
-
value_type=value_type,
|
|
2420
|
-
range=range,
|
|
2421
|
-
range_type=range_type,
|
|
2422
2756
|
urn=urn,
|
|
2423
2757
|
display_name=display_name,
|
|
2424
2758
|
enabled=enabled,
|
|
@@ -2443,11 +2777,6 @@ class AssertionsClient:
|
|
|
2443
2777
|
dataset_urn: Union[str, DatasetUrn],
|
|
2444
2778
|
column_name: str,
|
|
2445
2779
|
metric_type: MetricInputType,
|
|
2446
|
-
operator: OperatorInputType,
|
|
2447
|
-
value: Optional[ValueInputType],
|
|
2448
|
-
value_type: Optional[ValueTypeInputType],
|
|
2449
|
-
range: Optional[RangeInputType],
|
|
2450
|
-
range_type: Optional[RangeTypeInputType],
|
|
2451
2780
|
urn: Union[str, AssertionUrn],
|
|
2452
2781
|
display_name: Optional[str],
|
|
2453
2782
|
enabled: Optional[bool],
|
|
@@ -2470,11 +2799,6 @@ class AssertionsClient:
|
|
|
2470
2799
|
dataset_urn: The urn of the dataset to be monitored.
|
|
2471
2800
|
column_name: The name of the column to be monitored.
|
|
2472
2801
|
metric_type: The type of the metric to be monitored.
|
|
2473
|
-
operator: The operator to be used for the assertion.
|
|
2474
|
-
value: The value to be used for the assertion.
|
|
2475
|
-
value_type: The type of the value to be used for the assertion.
|
|
2476
|
-
range: The range to be used for the assertion.
|
|
2477
|
-
range_type: The type of the range to be used for the assertion.
|
|
2478
2802
|
urn: The urn of the assertion.
|
|
2479
2803
|
display_name: The display name of the assertion.
|
|
2480
2804
|
enabled: Whether the assertion is enabled.
|
|
@@ -2519,61 +2843,6 @@ class AssertionsClient:
|
|
|
2519
2843
|
if maybe_assertion_entity
|
|
2520
2844
|
else None,
|
|
2521
2845
|
),
|
|
2522
|
-
operator=_merge_field(
|
|
2523
|
-
input_field_value=operator,
|
|
2524
|
-
input_field_name="operator",
|
|
2525
|
-
validated_assertion_input=assertion_input,
|
|
2526
|
-
validated_existing_assertion=existing_assertion,
|
|
2527
|
-
existing_entity_value=SmartColumnMetricAssertion._get_operator(
|
|
2528
|
-
maybe_assertion_entity
|
|
2529
|
-
)
|
|
2530
|
-
if maybe_assertion_entity
|
|
2531
|
-
else None,
|
|
2532
|
-
),
|
|
2533
|
-
value=_merge_field(
|
|
2534
|
-
input_field_value=value,
|
|
2535
|
-
input_field_name="value",
|
|
2536
|
-
validated_assertion_input=assertion_input,
|
|
2537
|
-
validated_existing_assertion=existing_assertion,
|
|
2538
|
-
existing_entity_value=SmartColumnMetricAssertion._get_value(
|
|
2539
|
-
maybe_assertion_entity
|
|
2540
|
-
)
|
|
2541
|
-
if maybe_assertion_entity
|
|
2542
|
-
else None,
|
|
2543
|
-
),
|
|
2544
|
-
value_type=_merge_field(
|
|
2545
|
-
input_field_value=value_type,
|
|
2546
|
-
input_field_name="value_type",
|
|
2547
|
-
validated_assertion_input=assertion_input,
|
|
2548
|
-
validated_existing_assertion=existing_assertion,
|
|
2549
|
-
existing_entity_value=SmartColumnMetricAssertion._get_value_type(
|
|
2550
|
-
maybe_assertion_entity
|
|
2551
|
-
)
|
|
2552
|
-
if maybe_assertion_entity
|
|
2553
|
-
else None,
|
|
2554
|
-
),
|
|
2555
|
-
range=_merge_field(
|
|
2556
|
-
input_field_value=range,
|
|
2557
|
-
input_field_name="range",
|
|
2558
|
-
validated_assertion_input=assertion_input,
|
|
2559
|
-
validated_existing_assertion=existing_assertion,
|
|
2560
|
-
existing_entity_value=SmartColumnMetricAssertion._get_range(
|
|
2561
|
-
maybe_assertion_entity
|
|
2562
|
-
)
|
|
2563
|
-
if maybe_assertion_entity
|
|
2564
|
-
else None,
|
|
2565
|
-
),
|
|
2566
|
-
range_type=_merge_field(
|
|
2567
|
-
input_field_value=range_type,
|
|
2568
|
-
input_field_name="range_type",
|
|
2569
|
-
validated_assertion_input=assertion_input,
|
|
2570
|
-
validated_existing_assertion=existing_assertion,
|
|
2571
|
-
existing_entity_value=SmartColumnMetricAssertion._get_range_type(
|
|
2572
|
-
maybe_assertion_entity
|
|
2573
|
-
)
|
|
2574
|
-
if maybe_assertion_entity
|
|
2575
|
-
else None,
|
|
2576
|
-
),
|
|
2577
2846
|
display_name=_merge_field(
|
|
2578
2847
|
input_field_value=display_name,
|
|
2579
2848
|
input_field_name="display_name",
|
|
@@ -2681,7 +2950,11 @@ class AssertionsClient:
|
|
|
2681
2950
|
tags: Optional[TagsInputType] = None,
|
|
2682
2951
|
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
2683
2952
|
freshness_schedule_check_type: Optional[
|
|
2684
|
-
Union[
|
|
2953
|
+
Union[
|
|
2954
|
+
str,
|
|
2955
|
+
FreshnessAssertionScheduleCheckType,
|
|
2956
|
+
models.FreshnessAssertionScheduleTypeClass,
|
|
2957
|
+
]
|
|
2685
2958
|
] = None,
|
|
2686
2959
|
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
2687
2960
|
lookback_window: Optional[TimeWindowSizeInputTypes] = None,
|
|
@@ -2715,9 +2988,16 @@ class AssertionsClient:
|
|
|
2715
2988
|
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
2716
2989
|
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
2717
2990
|
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
2718
|
-
freshness_schedule_check_type (Optional[Union[str, models.FreshnessAssertionScheduleTypeClass]]): The freshness schedule check type to be applied to the assertion. Valid values are: "since_the_last_check", "
|
|
2991
|
+
freshness_schedule_check_type (Optional[Union[str, FreshnessAssertionScheduleCheckType, models.FreshnessAssertionScheduleTypeClass]]): The freshness schedule check type to be applied to the assertion. Valid values are: "since_the_last_check", "fixed_interval".
|
|
2719
2992
|
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
2720
|
-
lookback_window (Optional[TimeWindowSizeInputTypes]): The lookback window to be applied to the assertion.
|
|
2993
|
+
lookback_window (Optional[TimeWindowSizeInputTypes]): The lookback window to be applied to the assertion. Valid values are:
|
|
2994
|
+
- TimeWindowSize(unit=CalendarInterval.MINUTE, multiple=10) for 10 minutes
|
|
2995
|
+
- TimeWindowSize(unit=CalendarInterval.HOUR, multiple=2) for 2 hours
|
|
2996
|
+
- TimeWindowSize(unit=CalendarInterval.DAY, multiple=1) for 1 day
|
|
2997
|
+
- {"unit": "MINUTE", "multiple": 30} for 30 minutes (using dict)
|
|
2998
|
+
- {"unit": "HOUR", "multiple": 6} for 6 hours (using dict)
|
|
2999
|
+
- {"unit": "DAY", "multiple": 7} for 7 days (using dict)
|
|
3000
|
+
Valid values for CalendarInterval are: "MINUTE", "HOUR", "DAY" and for multiple, the integer number of units.
|
|
2721
3001
|
|
|
2722
3002
|
Returns:
|
|
2723
3003
|
FreshnessAssertion: The created or updated assertion.
|
|
@@ -2821,11 +3101,7 @@ class AssertionsClient:
|
|
|
2821
3101
|
tags: Optional[TagsInputType] = None,
|
|
2822
3102
|
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
2823
3103
|
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
2824
|
-
|
|
2825
|
-
criteria_change_type: Optional[
|
|
2826
|
-
Union[str, VolumeAssertionDefinitionChangeKind]
|
|
2827
|
-
] = None,
|
|
2828
|
-
criteria_operator: Optional[Union[str, VolumeAssertionOperator]] = None,
|
|
3104
|
+
criteria_condition: Optional[Union[str, VolumeAssertionCondition]] = None,
|
|
2829
3105
|
criteria_parameters: Optional[VolumeAssertionDefinitionParameters] = None,
|
|
2830
3106
|
) -> VolumeAssertion:
|
|
2831
3107
|
"""Upsert and merge a volume assertion.
|
|
@@ -2856,10 +3132,21 @@ class AssertionsClient:
|
|
|
2856
3132
|
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
2857
3133
|
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
2858
3134
|
schedule (Optional[Union[str, models.CronScheduleClass]]): Optional cron formatted schedule for the assertion. If not provided, a default schedule will be used. The format is a cron expression, e.g. "0 * * * *" for every hour using UTC timezone. Alternatively, a models.CronScheduleClass object can be provided.
|
|
2859
|
-
|
|
2860
|
-
|
|
2861
|
-
|
|
2862
|
-
|
|
3135
|
+
criteria_condition (Optional[Union[str, VolumeAssertionCondition]]): Optional condition for the volume assertion. Valid values are:
|
|
3136
|
+
- "ROW_COUNT_IS_LESS_THAN_OR_EQUAL_TO" -> The row count is less than or equal to the threshold.
|
|
3137
|
+
- "ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO" -> The row count is greater than or equal to the threshold.
|
|
3138
|
+
- "ROW_COUNT_IS_WITHIN_A_RANGE" -> The row count is within the specified range.
|
|
3139
|
+
- "ROW_COUNT_GROWS_BY_AT_MOST_ABSOLUTE" -> The row count growth is at most the threshold (absolute change).
|
|
3140
|
+
- "ROW_COUNT_GROWS_BY_AT_LEAST_ABSOLUTE" -> The row count growth is at least the threshold (absolute change).
|
|
3141
|
+
- "ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The row count growth is within the specified range (absolute change).
|
|
3142
|
+
- "ROW_COUNT_GROWS_BY_AT_MOST_PERCENTAGE" -> The row count growth is at most the threshold (percentage change).
|
|
3143
|
+
- "ROW_COUNT_GROWS_BY_AT_LEAST_PERCENTAGE" -> The row count growth is at least the threshold (percentage change).
|
|
3144
|
+
- "ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The row count growth is within the specified range (percentage change).
|
|
3145
|
+
If not provided, the existing definition from the backend will be preserved (for update operations). Required when creating a new assertion (when urn is None).
|
|
3146
|
+
criteria_parameters (Optional[VolumeAssertionDefinitionParameters]): Optional threshold parameters to be used for the assertion. This can be a single threshold value or a tuple range.
|
|
3147
|
+
- If the condition is range-based (ROW_COUNT_IS_WITHIN_A_RANGE, ROW_COUNT_GROWS_WITHIN_A_RANGE_ABSOLUTE, ROW_COUNT_GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
|
|
3148
|
+
- For other conditions, the value is a single numeric threshold value.
|
|
3149
|
+
If not provided, existing value is preserved for updates. Required when creating a new assertion.
|
|
2863
3150
|
|
|
2864
3151
|
Returns:
|
|
2865
3152
|
VolumeAssertion: The created or updated assertion.
|
|
@@ -2874,44 +3161,26 @@ class AssertionsClient:
|
|
|
2874
3161
|
updated_by = DEFAULT_CREATED_BY
|
|
2875
3162
|
|
|
2876
3163
|
# 1. Validate criteria parameters if any are provided
|
|
2877
|
-
if (
|
|
2878
|
-
|
|
2879
|
-
or criteria_operator is not None
|
|
2880
|
-
or criteria_parameters is not None
|
|
2881
|
-
) and (
|
|
2882
|
-
criteria_type is None
|
|
2883
|
-
or criteria_operator is None
|
|
2884
|
-
or criteria_parameters is None
|
|
2885
|
-
or (
|
|
2886
|
-
criteria_type == VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
|
|
2887
|
-
and criteria_change_type is None
|
|
2888
|
-
)
|
|
3164
|
+
if (criteria_condition is not None or criteria_parameters is not None) and (
|
|
3165
|
+
criteria_condition is None or criteria_parameters is None
|
|
2889
3166
|
):
|
|
2890
3167
|
raise SDKUsageError(
|
|
2891
|
-
"When providing volume assertion criteria,
|
|
2892
|
-
"(criteria_type, criteria_operator, criteria_parameters must be provided, "
|
|
2893
|
-
"and criteria_change_type is required when criteria_type is 'row_count_change')"
|
|
3168
|
+
"When providing volume assertion criteria, both criteria_condition and criteria_parameters must be provided"
|
|
2894
3169
|
)
|
|
2895
3170
|
|
|
2896
|
-
# Assert the invariant: if
|
|
2897
|
-
assert
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
and (
|
|
2901
|
-
criteria_type != VolumeAssertionDefinitionType.ROW_COUNT_CHANGE
|
|
2902
|
-
or criteria_change_type is not None
|
|
2903
|
-
)
|
|
2904
|
-
), "criteria fields already validated"
|
|
3171
|
+
# Assert the invariant: if criteria_condition is provided, criteria_parameters is also provided
|
|
3172
|
+
assert criteria_condition is None or criteria_parameters is not None, (
|
|
3173
|
+
"criteria fields already validated"
|
|
3174
|
+
)
|
|
2905
3175
|
|
|
2906
3176
|
# 2. If urn is not set, create a new assertion
|
|
2907
3177
|
if urn is None:
|
|
2908
|
-
if
|
|
3178
|
+
if criteria_condition is None:
|
|
2909
3179
|
raise SDKUsageError(
|
|
2910
3180
|
"Volume assertion criteria are required when creating a new assertion"
|
|
2911
3181
|
)
|
|
2912
3182
|
logger.info("URN is not set, creating a new assertion")
|
|
2913
3183
|
# Type narrowing: we know these are not None because of validation above
|
|
2914
|
-
assert criteria_operator is not None
|
|
2915
3184
|
assert criteria_parameters is not None
|
|
2916
3185
|
return self._create_volume_assertion(
|
|
2917
3186
|
dataset_urn=dataset_urn,
|
|
@@ -2922,9 +3191,7 @@ class AssertionsClient:
|
|
|
2922
3191
|
tags=tags,
|
|
2923
3192
|
created_by=updated_by,
|
|
2924
3193
|
schedule=schedule,
|
|
2925
|
-
|
|
2926
|
-
criteria_change_type=criteria_change_type,
|
|
2927
|
-
criteria_operator=criteria_operator,
|
|
3194
|
+
criteria_condition=criteria_condition,
|
|
2928
3195
|
criteria_parameters=criteria_parameters,
|
|
2929
3196
|
)
|
|
2930
3197
|
|
|
@@ -2936,24 +3203,19 @@ class AssertionsClient:
|
|
|
2936
3203
|
# this is a creation case and the user missed the definition parameter, which is required.
|
|
2937
3204
|
# Likely this pattern never happened before because there is no a publicly documented default definition
|
|
2938
3205
|
# that we can use as fallback.
|
|
2939
|
-
if
|
|
2940
|
-
# Create
|
|
2941
|
-
|
|
2942
|
-
"
|
|
2943
|
-
"operator": criteria_operator,
|
|
3206
|
+
if criteria_condition is not None:
|
|
3207
|
+
# Create criteria from criteria_condition and parameters
|
|
3208
|
+
temp_criteria: dict[str, Any] = {
|
|
3209
|
+
"condition": criteria_condition,
|
|
2944
3210
|
"parameters": criteria_parameters,
|
|
2945
3211
|
}
|
|
2946
3212
|
|
|
2947
|
-
|
|
2948
|
-
temp_definition["kind"] = criteria_change_type
|
|
2949
|
-
|
|
2950
|
-
use_backend_definition = False
|
|
3213
|
+
use_backend_criteria = False
|
|
2951
3214
|
else:
|
|
2952
|
-
# No criteria provided, use backend
|
|
2953
|
-
|
|
2954
|
-
|
|
2955
|
-
"
|
|
2956
|
-
"operator": VolumeAssertionOperator.GREATER_THAN_OR_EQUAL_TO,
|
|
3215
|
+
# No criteria provided, use backend criteria
|
|
3216
|
+
use_backend_criteria = True
|
|
3217
|
+
temp_criteria = {
|
|
3218
|
+
"condition": VolumeAssertionCondition.ROW_COUNT_IS_GREATER_THAN_OR_EQUAL_TO,
|
|
2957
3219
|
"parameters": 0, # Temporary placeholder
|
|
2958
3220
|
}
|
|
2959
3221
|
|
|
@@ -2970,7 +3232,7 @@ class AssertionsClient:
|
|
|
2970
3232
|
updated_by=updated_by,
|
|
2971
3233
|
updated_at=now_utc,
|
|
2972
3234
|
schedule=schedule,
|
|
2973
|
-
|
|
3235
|
+
criteria=temp_criteria,
|
|
2974
3236
|
)
|
|
2975
3237
|
|
|
2976
3238
|
# 4. Merge the assertion input with the existing assertion and monitor entities or create a new assertion
|
|
@@ -2983,8 +3245,8 @@ class AssertionsClient:
|
|
|
2983
3245
|
display_name=display_name,
|
|
2984
3246
|
enabled=enabled,
|
|
2985
3247
|
detection_mechanism=detection_mechanism,
|
|
2986
|
-
|
|
2987
|
-
|
|
3248
|
+
criteria=temp_criteria,
|
|
3249
|
+
use_backend_criteria=use_backend_criteria,
|
|
2988
3250
|
incident_behavior=incident_behavior,
|
|
2989
3251
|
tags=tags,
|
|
2990
3252
|
updated_by=updated_by,
|
|
@@ -3016,6 +3278,46 @@ class AssertionsClient:
|
|
|
3016
3278
|
# raise e
|
|
3017
3279
|
return VolumeAssertion._from_entities(assertion_entity, monitor_entity)
|
|
3018
3280
|
|
|
3281
|
+
def _validate_sql_assertion_creation_params(
|
|
3282
|
+
self,
|
|
3283
|
+
statement: Optional[str],
|
|
3284
|
+
criteria_condition: Optional[Union[SqlAssertionCondition, str]],
|
|
3285
|
+
criteria_parameters: Optional[
|
|
3286
|
+
Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]
|
|
3287
|
+
],
|
|
3288
|
+
) -> None:
|
|
3289
|
+
"""Validate required parameters for SQL assertion creation."""
|
|
3290
|
+
self._validate_required_field(
|
|
3291
|
+
statement, "statement", "when creating a new assertion (urn is None)"
|
|
3292
|
+
)
|
|
3293
|
+
self._validate_required_field(
|
|
3294
|
+
criteria_condition,
|
|
3295
|
+
"criteria_condition",
|
|
3296
|
+
"when creating a new assertion (urn is None)",
|
|
3297
|
+
)
|
|
3298
|
+
self._validate_required_field(
|
|
3299
|
+
criteria_parameters,
|
|
3300
|
+
"criteria_parameters",
|
|
3301
|
+
"when creating a new assertion (urn is None)",
|
|
3302
|
+
)
|
|
3303
|
+
|
|
3304
|
+
def _validate_required_sql_fields_for_update(
|
|
3305
|
+
self,
|
|
3306
|
+
statement: Optional[str],
|
|
3307
|
+
criteria_condition: Optional[Union[SqlAssertionCondition, str]],
|
|
3308
|
+
criteria_parameters: Optional[
|
|
3309
|
+
Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]
|
|
3310
|
+
],
|
|
3311
|
+
assertion_urn: Union[str, AssertionUrn],
|
|
3312
|
+
) -> None:
|
|
3313
|
+
"""Validate required fields after attempting to fetch from existing assertion."""
|
|
3314
|
+
context = f"and not found in existing assertion {assertion_urn}. The existing assertion may be invalid or corrupted."
|
|
3315
|
+
self._validate_required_field(statement, "statement", context)
|
|
3316
|
+
self._validate_required_field(criteria_condition, "criteria_condition", context)
|
|
3317
|
+
self._validate_required_field(
|
|
3318
|
+
criteria_parameters, "criteria_parameters", context
|
|
3319
|
+
)
|
|
3320
|
+
|
|
3019
3321
|
def sync_sql_assertion(
|
|
3020
3322
|
self,
|
|
3021
3323
|
*,
|
|
@@ -3023,13 +3325,11 @@ class AssertionsClient:
|
|
|
3023
3325
|
urn: Optional[Union[str, AssertionUrn]] = None,
|
|
3024
3326
|
display_name: Optional[str] = None,
|
|
3025
3327
|
enabled: Optional[bool] = None,
|
|
3026
|
-
statement: str,
|
|
3027
|
-
|
|
3028
|
-
|
|
3029
|
-
|
|
3030
|
-
|
|
3031
|
-
Union[float, int], tuple[Union[float, int], Union[float, int]]
|
|
3032
|
-
],
|
|
3328
|
+
statement: Optional[str] = None,
|
|
3329
|
+
criteria_condition: Optional[Union[SqlAssertionCondition, str]] = None,
|
|
3330
|
+
criteria_parameters: Optional[
|
|
3331
|
+
Union[Union[float, int], tuple[Union[float, int], Union[float, int]]]
|
|
3332
|
+
] = None,
|
|
3033
3333
|
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
3034
3334
|
tags: Optional[TagsInputType] = None,
|
|
3035
3335
|
updated_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
@@ -3055,11 +3355,22 @@ class AssertionsClient:
|
|
|
3055
3355
|
urn (Optional[Union[str, AssertionUrn]]): The urn of the assertion. If not provided, a urn will be generated and the assertion will be created in the DataHub instance.
|
|
3056
3356
|
display_name (Optional[str]): The display name of the assertion. If not provided, a random display name will be generated.
|
|
3057
3357
|
enabled (Optional[bool]): Whether the assertion is enabled. If not provided, the existing value will be preserved.
|
|
3058
|
-
statement (str): The SQL statement to be used for the assertion.
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3358
|
+
statement (Optional[str]): The SQL statement to be used for the assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion.
|
|
3359
|
+
criteria_condition (Optional[Union[SqlAssertionCondition, str]]): The condition for the sql assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion. Valid values are:
|
|
3360
|
+
- "IS_EQUAL_TO" -> The metric value equals the threshold.
|
|
3361
|
+
- "IS_NOT_EQUAL_TO" -> The metric value does not equal the threshold.
|
|
3362
|
+
- "IS_GREATER_THAN" -> The metric value is greater than the threshold.
|
|
3363
|
+
- "IS_LESS_THAN" -> The metric value is less than the threshold.
|
|
3364
|
+
- "IS_WITHIN_A_RANGE" -> The metric value is within the specified range.
|
|
3365
|
+
- "GROWS_AT_MOST_ABSOLUTE" -> The metric growth is at most the threshold (absolute change).
|
|
3366
|
+
- "GROWS_AT_MOST_PERCENTAGE" -> The metric growth is at most the threshold (percentage change).
|
|
3367
|
+
- "GROWS_AT_LEAST_ABSOLUTE" -> The metric growth is at least the threshold (absolute change).
|
|
3368
|
+
- "GROWS_AT_LEAST_PERCENTAGE" -> The metric growth is at least the threshold (percentage change).
|
|
3369
|
+
- "GROWS_WITHIN_A_RANGE_ABSOLUTE" -> The metric growth is within the specified range (absolute change).
|
|
3370
|
+
- "GROWS_WITHIN_A_RANGE_PERCENTAGE" -> The metric growth is within the specified range (percentage change).
|
|
3371
|
+
criteria_parameters (Optional[Union[float, int, tuple[float, int]]]): The threshold parameters to be used for the assertion. Required when creating a new assertion (urn=None), optional when updating an existing assertion. This can be a single threshold value or a tuple range.
|
|
3372
|
+
- If the condition is range-based (IS_WITHIN_A_RANGE, GROWS_WITHIN_A_RANGE_ABSOLUTE, GROWS_WITHIN_A_RANGE_PERCENTAGE), the value is a tuple of two threshold values, with format (min, max).
|
|
3373
|
+
- For other conditions, the value is a single numeric threshold value.
|
|
3063
3374
|
incident_behavior (Optional[Union[str, list[str], AssertionIncidentBehavior, list[AssertionIncidentBehavior]]]): The incident behavior to be applied to the assertion. Valid values are: "raise_on_fail", "resolve_on_pass", or the typed ones (AssertionIncidentBehavior.RAISE_ON_FAIL and AssertionIncidentBehavior.RESOLVE_ON_PASS).
|
|
3064
3375
|
tags (Optional[TagsInputType]): The tags to be applied to the assertion. Valid values are: a list of strings, TagUrn objects, or TagAssociationClass objects.
|
|
3065
3376
|
updated_by (Optional[Union[str, CorpUserUrn]]): Optional urn of the user who updated the assertion. The format is "urn:li:corpuser:<username>". The default is the datahub system user.
|
|
@@ -3080,13 +3391,20 @@ class AssertionsClient:
|
|
|
3080
3391
|
# 1. If urn is not set, create a new assertion
|
|
3081
3392
|
if urn is None:
|
|
3082
3393
|
logger.info("URN is not set, creating a new assertion")
|
|
3394
|
+
|
|
3395
|
+
# Validate required parameters for creation
|
|
3396
|
+
self._validate_sql_assertion_creation_params(
|
|
3397
|
+
statement, criteria_condition, criteria_parameters
|
|
3398
|
+
)
|
|
3399
|
+
# After validation, these cannot be None
|
|
3400
|
+
assert statement is not None
|
|
3401
|
+
assert criteria_condition is not None
|
|
3402
|
+
assert criteria_parameters is not None
|
|
3083
3403
|
return self._create_sql_assertion(
|
|
3084
3404
|
dataset_urn=dataset_urn,
|
|
3085
3405
|
display_name=display_name,
|
|
3086
3406
|
enabled=enabled if enabled is not None else True,
|
|
3087
|
-
|
|
3088
|
-
criteria_change_type=criteria_change_type,
|
|
3089
|
-
criteria_operator=criteria_operator,
|
|
3407
|
+
criteria_condition=criteria_condition,
|
|
3090
3408
|
criteria_parameters=criteria_parameters,
|
|
3091
3409
|
statement=statement,
|
|
3092
3410
|
incident_behavior=incident_behavior,
|
|
@@ -3095,13 +3413,51 @@ class AssertionsClient:
|
|
|
3095
3413
|
schedule=schedule,
|
|
3096
3414
|
)
|
|
3097
3415
|
|
|
3098
|
-
# 2. If urn is set,
|
|
3416
|
+
# 2.1 If urn is set, fetch missing required parameters from backend if needed:
|
|
3417
|
+
# NOTE: This is a tactical solution. The problem is we fetch twice (once for validation,
|
|
3418
|
+
# once for merge). Strategic solution would be to merge first, then validate after,
|
|
3419
|
+
# but that requires heavy refactor and is skipped for now.
|
|
3420
|
+
if urn is not None and (
|
|
3421
|
+
statement is None
|
|
3422
|
+
or criteria_condition is None
|
|
3423
|
+
or criteria_parameters is None
|
|
3424
|
+
):
|
|
3425
|
+
# Fetch existing assertion to get missing required parameters
|
|
3426
|
+
maybe_assertion_entity, _, maybe_monitor_entity = (
|
|
3427
|
+
self._retrieve_assertion_and_monitor(
|
|
3428
|
+
{"dataset_urn": dataset_urn, "urn": urn}
|
|
3429
|
+
)
|
|
3430
|
+
)
|
|
3431
|
+
|
|
3432
|
+
if maybe_assertion_entity is not None and maybe_monitor_entity is not None:
|
|
3433
|
+
existing_assertion = SqlAssertion._from_entities(
|
|
3434
|
+
maybe_assertion_entity, maybe_monitor_entity
|
|
3435
|
+
)
|
|
3436
|
+
# Use existing values for missing required parameters
|
|
3437
|
+
if statement is None:
|
|
3438
|
+
statement = existing_assertion.statement
|
|
3439
|
+
if criteria_condition is None or criteria_parameters is None:
|
|
3440
|
+
criteria = existing_assertion._criteria
|
|
3441
|
+
if criteria_condition is None:
|
|
3442
|
+
criteria_condition = criteria.condition
|
|
3443
|
+
if criteria_parameters is None:
|
|
3444
|
+
criteria_parameters = criteria.parameters
|
|
3445
|
+
|
|
3446
|
+
self._validate_required_sql_fields_for_update(
|
|
3447
|
+
statement, criteria_condition, criteria_parameters, urn
|
|
3448
|
+
)
|
|
3449
|
+
assert (
|
|
3450
|
+
statement is not None
|
|
3451
|
+
and criteria_condition is not None
|
|
3452
|
+
and criteria_parameters is not None
|
|
3453
|
+
), "Fields guaranteed non-None after validation"
|
|
3454
|
+
|
|
3455
|
+
# 2.2 Now validate the input with all required parameters:
|
|
3099
3456
|
criteria = SqlAssertionCriteria(
|
|
3100
|
-
|
|
3101
|
-
change_type=criteria_change_type,
|
|
3102
|
-
operator=criteria_operator,
|
|
3457
|
+
condition=criteria_condition,
|
|
3103
3458
|
parameters=criteria_parameters,
|
|
3104
3459
|
)
|
|
3460
|
+
|
|
3105
3461
|
assertion_input = _SqlAssertionInput(
|
|
3106
3462
|
urn=urn,
|
|
3107
3463
|
entity_client=self.client.entities,
|
|
@@ -3159,6 +3515,329 @@ class AssertionsClient:
|
|
|
3159
3515
|
|
|
3160
3516
|
return SqlAssertion._from_entities(assertion_entity, monitor_entity)
|
|
3161
3517
|
|
|
3518
|
+
def _validate_required_column_fields_for_creation(
|
|
3519
|
+
self,
|
|
3520
|
+
column_name: Optional[str],
|
|
3521
|
+
metric_type: Optional[MetricInputType],
|
|
3522
|
+
operator: Optional[OperatorInputType],
|
|
3523
|
+
) -> None:
|
|
3524
|
+
"""Validate required fields for column metric assertion creation."""
|
|
3525
|
+
self._validate_required_field(
|
|
3526
|
+
column_name, "column_name", "when creating a new assertion (urn is None)"
|
|
3527
|
+
)
|
|
3528
|
+
self._validate_required_field(
|
|
3529
|
+
metric_type, "metric_type", "when creating a new assertion (urn is None)"
|
|
3530
|
+
)
|
|
3531
|
+
self._validate_required_field(
|
|
3532
|
+
operator, "operator", "when creating a new assertion (urn is None)"
|
|
3533
|
+
)
|
|
3534
|
+
|
|
3535
|
+
def _validate_required_column_fields_for_update(
|
|
3536
|
+
self,
|
|
3537
|
+
column_name: Optional[str],
|
|
3538
|
+
metric_type: Optional[MetricInputType],
|
|
3539
|
+
operator: Optional[OperatorInputType],
|
|
3540
|
+
assertion_urn: Union[str, AssertionUrn],
|
|
3541
|
+
) -> None:
|
|
3542
|
+
"""Validate required fields after attempting to fetch from existing assertion."""
|
|
3543
|
+
context = f"and not found in existing assertion {assertion_urn}. The existing assertion may be invalid or corrupted."
|
|
3544
|
+
self._validate_required_field(column_name, "column_name", context)
|
|
3545
|
+
self._validate_required_field(metric_type, "metric_type", context)
|
|
3546
|
+
self._validate_required_field(operator, "operator", context)
|
|
3547
|
+
|
|
3548
|
+
def _create_column_metric_assertion(
|
|
3549
|
+
self,
|
|
3550
|
+
*,
|
|
3551
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
3552
|
+
column_name: str,
|
|
3553
|
+
metric_type: MetricInputType,
|
|
3554
|
+
operator: OperatorInputType,
|
|
3555
|
+
criteria_parameters: Optional[ColumnMetricAssertionParameters] = None,
|
|
3556
|
+
display_name: Optional[str] = None,
|
|
3557
|
+
enabled: bool = True,
|
|
3558
|
+
detection_mechanism: DetectionMechanismInputTypes = None,
|
|
3559
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes] = None,
|
|
3560
|
+
tags: Optional[TagsInputType] = None,
|
|
3561
|
+
created_by: Optional[Union[str, CorpUserUrn]] = None,
|
|
3562
|
+
schedule: Optional[Union[str, models.CronScheduleClass]] = None,
|
|
3563
|
+
) -> ColumnMetricAssertion:
|
|
3564
|
+
"""Create a column metric assertion.
|
|
3565
|
+
|
|
3566
|
+
Note: keyword arguments are required.
|
|
3567
|
+
|
|
3568
|
+
Args:
|
|
3569
|
+
dataset_urn: The urn of the dataset to be monitored. (Required)
|
|
3570
|
+
column_name: The name of the column to be monitored. (Required)
|
|
3571
|
+
metric_type: The type of the metric to be monitored. (Required)
|
|
3572
|
+
operator: The operator to be used for the assertion. (Required)
|
|
3573
|
+
criteria_parameters: The criteria parameters for the assertion. Required for most operators.
|
|
3574
|
+
display_name: The display name of the assertion. If not provided, a random display
|
|
3575
|
+
name will be generated.
|
|
3576
|
+
enabled: Whether the assertion is enabled. Defaults to True.
|
|
3577
|
+
detection_mechanism: The detection mechanism to be used for the assertion.
|
|
3578
|
+
incident_behavior: The incident behavior to be applied to the assertion.
|
|
3579
|
+
tags: The tags to be applied to the assertion.
|
|
3580
|
+
created_by: Optional urn of the user who created the assertion.
|
|
3581
|
+
schedule: Optional cron formatted schedule for the assertion.
|
|
3582
|
+
|
|
3583
|
+
Returns:
|
|
3584
|
+
ColumnMetricAssertion: The created assertion.
|
|
3585
|
+
"""
|
|
3586
|
+
now_utc = datetime.now(timezone.utc)
|
|
3587
|
+
if created_by is None:
|
|
3588
|
+
logger.warning(
|
|
3589
|
+
f"Created by is not set, using {DEFAULT_CREATED_BY} as a placeholder"
|
|
3590
|
+
)
|
|
3591
|
+
created_by = DEFAULT_CREATED_BY
|
|
3592
|
+
assertion_input = _ColumnMetricAssertionInput(
|
|
3593
|
+
urn=None,
|
|
3594
|
+
entity_client=self.client.entities,
|
|
3595
|
+
dataset_urn=dataset_urn,
|
|
3596
|
+
column_name=column_name,
|
|
3597
|
+
metric_type=metric_type,
|
|
3598
|
+
operator=operator,
|
|
3599
|
+
criteria_parameters=criteria_parameters,
|
|
3600
|
+
display_name=display_name,
|
|
3601
|
+
enabled=enabled,
|
|
3602
|
+
detection_mechanism=detection_mechanism,
|
|
3603
|
+
incident_behavior=incident_behavior,
|
|
3604
|
+
tags=tags,
|
|
3605
|
+
created_by=created_by,
|
|
3606
|
+
created_at=now_utc,
|
|
3607
|
+
updated_by=created_by,
|
|
3608
|
+
updated_at=now_utc,
|
|
3609
|
+
schedule=schedule,
|
|
3610
|
+
gms_criteria_type_info=None,
|
|
3611
|
+
)
|
|
3612
|
+
assertion_entity, monitor_entity = (
|
|
3613
|
+
assertion_input.to_assertion_and_monitor_entities()
|
|
3614
|
+
)
|
|
3615
|
+
# If assertion creation fails, we won't try to create the monitor
|
|
3616
|
+
self.client.entities.create(assertion_entity)
|
|
3617
|
+
# TODO: Wrap monitor creation in a try-except and delete the assertion if monitor creation fails (once delete is implemented https://linear.app/acryl-data/issue/OBS-1350/add-delete-method-to-entity-clientpy)
|
|
3618
|
+
# try:
|
|
3619
|
+
self.client.entities.create(monitor_entity)
|
|
3620
|
+
# except Exception as e:
|
|
3621
|
+
# logger.error(f"Error creating monitor: {e}")
|
|
3622
|
+
# self.client.entities.delete(assertion_entity)
|
|
3623
|
+
# raise e
|
|
3624
|
+
return ColumnMetricAssertion._from_entities(assertion_entity, monitor_entity)
|
|
3625
|
+
|
|
3626
|
+
def _retrieve_and_merge_column_metric_assertion_and_monitor(
|
|
3627
|
+
self,
|
|
3628
|
+
assertion_input: _ColumnMetricAssertionInput,
|
|
3629
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
3630
|
+
column_name: str,
|
|
3631
|
+
metric_type: MetricInputType,
|
|
3632
|
+
operator: OperatorInputType,
|
|
3633
|
+
criteria_parameters: Optional[ColumnMetricAssertionParameters],
|
|
3634
|
+
urn: Union[str, AssertionUrn],
|
|
3635
|
+
display_name: Optional[str],
|
|
3636
|
+
enabled: Optional[bool],
|
|
3637
|
+
detection_mechanism: DetectionMechanismInputTypes,
|
|
3638
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
|
|
3639
|
+
tags: Optional[TagsInputType],
|
|
3640
|
+
updated_by: Optional[Union[str, CorpUserUrn]],
|
|
3641
|
+
now_utc: datetime,
|
|
3642
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
3643
|
+
) -> Union[ColumnMetricAssertion, _ColumnMetricAssertionInput]:
|
|
3644
|
+
# 1. Retrieve any existing assertion and monitor entities:
|
|
3645
|
+
maybe_assertion_entity, monitor_urn, maybe_monitor_entity = (
|
|
3646
|
+
self._retrieve_assertion_and_monitor(assertion_input)
|
|
3647
|
+
)
|
|
3648
|
+
|
|
3649
|
+
# Extract gms_criteria_type_info from existing assertion if available
|
|
3650
|
+
gms_criteria_type_info = None
|
|
3651
|
+
if maybe_assertion_entity is not None:
|
|
3652
|
+
gms_criteria_type_info = (
|
|
3653
|
+
_HasColumnMetricFunctionality._get_criteria_parameters_with_type(
|
|
3654
|
+
maybe_assertion_entity
|
|
3655
|
+
)
|
|
3656
|
+
)
|
|
3657
|
+
|
|
3658
|
+
# 2.1 If the assertion and monitor entities exist, create an assertion object from them:
|
|
3659
|
+
if maybe_assertion_entity and maybe_monitor_entity:
|
|
3660
|
+
existing_assertion = ColumnMetricAssertion._from_entities(
|
|
3661
|
+
maybe_assertion_entity, maybe_monitor_entity
|
|
3662
|
+
)
|
|
3663
|
+
# 2.2 If the assertion exists but the monitor does not, create a placeholder monitor entity to be able to create the assertion:
|
|
3664
|
+
elif maybe_assertion_entity and not maybe_monitor_entity:
|
|
3665
|
+
monitor_mode = (
|
|
3666
|
+
"ACTIVE" if enabled else "INACTIVE" if enabled is not None else "ACTIVE"
|
|
3667
|
+
)
|
|
3668
|
+
existing_assertion = ColumnMetricAssertion._from_entities(
|
|
3669
|
+
maybe_assertion_entity,
|
|
3670
|
+
Monitor(id=monitor_urn, info=("ASSERTION", monitor_mode)),
|
|
3671
|
+
)
|
|
3672
|
+
# 2.3 If the assertion does not exist, create a new assertion with a generated urn and return the assertion input:
|
|
3673
|
+
elif not maybe_assertion_entity:
|
|
3674
|
+
logger.info(
|
|
3675
|
+
f"No existing assertion entity found for assertion urn {urn}, creating a new assertion with a generated urn"
|
|
3676
|
+
)
|
|
3677
|
+
return self._create_column_metric_assertion(
|
|
3678
|
+
dataset_urn=dataset_urn,
|
|
3679
|
+
column_name=column_name,
|
|
3680
|
+
metric_type=metric_type,
|
|
3681
|
+
operator=operator,
|
|
3682
|
+
criteria_parameters=criteria_parameters,
|
|
3683
|
+
schedule=schedule,
|
|
3684
|
+
display_name=display_name,
|
|
3685
|
+
detection_mechanism=detection_mechanism,
|
|
3686
|
+
incident_behavior=incident_behavior,
|
|
3687
|
+
tags=tags,
|
|
3688
|
+
created_by=updated_by,
|
|
3689
|
+
)
|
|
3690
|
+
|
|
3691
|
+
# 3. Check for any issues e.g. different dataset urns
|
|
3692
|
+
if (
|
|
3693
|
+
existing_assertion
|
|
3694
|
+
and hasattr(existing_assertion, "dataset_urn")
|
|
3695
|
+
and existing_assertion.dataset_urn != assertion_input.dataset_urn
|
|
3696
|
+
):
|
|
3697
|
+
raise SDKUsageError(
|
|
3698
|
+
f"Dataset URN mismatch, existing assertion: {existing_assertion.dataset_urn} != new assertion: {dataset_urn}"
|
|
3699
|
+
)
|
|
3700
|
+
|
|
3701
|
+
# 4. Merge the existing assertion with the validated input:
|
|
3702
|
+
merged_assertion_input = self._merge_column_metric_input(
|
|
3703
|
+
dataset_urn=dataset_urn,
|
|
3704
|
+
column_name=column_name,
|
|
3705
|
+
metric_type=metric_type,
|
|
3706
|
+
operator=operator,
|
|
3707
|
+
criteria_parameters=criteria_parameters,
|
|
3708
|
+
urn=urn,
|
|
3709
|
+
display_name=display_name,
|
|
3710
|
+
enabled=enabled,
|
|
3711
|
+
schedule=schedule,
|
|
3712
|
+
detection_mechanism=detection_mechanism,
|
|
3713
|
+
incident_behavior=incident_behavior,
|
|
3714
|
+
tags=tags,
|
|
3715
|
+
now_utc=now_utc,
|
|
3716
|
+
assertion_input=assertion_input,
|
|
3717
|
+
maybe_assertion_entity=maybe_assertion_entity,
|
|
3718
|
+
maybe_monitor_entity=maybe_monitor_entity,
|
|
3719
|
+
existing_assertion=existing_assertion,
|
|
3720
|
+
gms_criteria_type_info=gms_criteria_type_info,
|
|
3721
|
+
)
|
|
3722
|
+
|
|
3723
|
+
return merged_assertion_input
|
|
3724
|
+
|
|
3725
|
+
def _merge_column_metric_input( # TODO: Refactor
|
|
3726
|
+
self,
|
|
3727
|
+
dataset_urn: Union[str, DatasetUrn],
|
|
3728
|
+
column_name: str,
|
|
3729
|
+
metric_type: MetricInputType,
|
|
3730
|
+
operator: OperatorInputType,
|
|
3731
|
+
criteria_parameters: Optional[ColumnMetricAssertionParameters],
|
|
3732
|
+
urn: Union[str, AssertionUrn],
|
|
3733
|
+
display_name: Optional[str],
|
|
3734
|
+
enabled: Optional[bool],
|
|
3735
|
+
schedule: Optional[Union[str, models.CronScheduleClass]],
|
|
3736
|
+
detection_mechanism: DetectionMechanismInputTypes,
|
|
3737
|
+
incident_behavior: Optional[AssertionIncidentBehaviorInputTypes],
|
|
3738
|
+
tags: Optional[TagsInputType],
|
|
3739
|
+
now_utc: datetime,
|
|
3740
|
+
assertion_input: _ColumnMetricAssertionInput,
|
|
3741
|
+
maybe_assertion_entity: Optional[Assertion],
|
|
3742
|
+
maybe_monitor_entity: Optional[Monitor],
|
|
3743
|
+
existing_assertion: ColumnMetricAssertion,
|
|
3744
|
+
gms_criteria_type_info: Optional[tuple] = None,
|
|
3745
|
+
) -> _ColumnMetricAssertionInput:
|
|
3746
|
+
"""Merge the validated assertion input with the existing assertion to create an upsert."""
|
|
3747
|
+
|
|
3748
|
+
# Extract existing values from entities for merging
|
|
3749
|
+
existing_display_name = None
|
|
3750
|
+
existing_enabled = None
|
|
3751
|
+
existing_schedule = None
|
|
3752
|
+
existing_detection_mechanism = None
|
|
3753
|
+
existing_incident_behavior = None
|
|
3754
|
+
existing_tags = None
|
|
3755
|
+
|
|
3756
|
+
if maybe_assertion_entity and maybe_assertion_entity.info:
|
|
3757
|
+
if hasattr(maybe_assertion_entity.info, "displayName"):
|
|
3758
|
+
existing_display_name = maybe_assertion_entity.info.displayName
|
|
3759
|
+
if hasattr(maybe_assertion_entity.info, "tags"):
|
|
3760
|
+
existing_tags = maybe_assertion_entity.info.tags
|
|
3761
|
+
|
|
3762
|
+
if maybe_monitor_entity and maybe_monitor_entity.info:
|
|
3763
|
+
if (
|
|
3764
|
+
hasattr(maybe_monitor_entity.info, "status")
|
|
3765
|
+
and maybe_monitor_entity.info.status
|
|
3766
|
+
):
|
|
3767
|
+
existing_enabled = maybe_monitor_entity.info.status == "ACTIVE"
|
|
3768
|
+
if (
|
|
3769
|
+
hasattr(maybe_monitor_entity.info, "config")
|
|
3770
|
+
and maybe_monitor_entity.info.config
|
|
3771
|
+
):
|
|
3772
|
+
if hasattr(maybe_monitor_entity.info.config, "schedule"):
|
|
3773
|
+
existing_schedule = maybe_monitor_entity.info.config.schedule
|
|
3774
|
+
if hasattr(maybe_monitor_entity.info.config, "executorId"):
|
|
3775
|
+
existing_detection_mechanism = (
|
|
3776
|
+
maybe_monitor_entity.info.config.executorId
|
|
3777
|
+
)
|
|
3778
|
+
if hasattr(maybe_monitor_entity.info.config, "actions"):
|
|
3779
|
+
existing_incident_behavior = (
|
|
3780
|
+
maybe_monitor_entity.info.config.actions
|
|
3781
|
+
)
|
|
3782
|
+
|
|
3783
|
+
# Merge each field using the merge logic
|
|
3784
|
+
merged_display_name = _merge_field(
|
|
3785
|
+
display_name,
|
|
3786
|
+
"display_name",
|
|
3787
|
+
assertion_input,
|
|
3788
|
+
existing_assertion,
|
|
3789
|
+
existing_display_name,
|
|
3790
|
+
)
|
|
3791
|
+
merged_enabled = _merge_field(
|
|
3792
|
+
enabled, "mode", assertion_input, existing_assertion, existing_enabled
|
|
3793
|
+
)
|
|
3794
|
+
merged_schedule = _merge_field(
|
|
3795
|
+
schedule, "schedule", assertion_input, existing_assertion, existing_schedule
|
|
3796
|
+
)
|
|
3797
|
+
merged_detection_mechanism = _merge_field(
|
|
3798
|
+
detection_mechanism,
|
|
3799
|
+
"detection_mechanism",
|
|
3800
|
+
assertion_input,
|
|
3801
|
+
existing_assertion,
|
|
3802
|
+
existing_detection_mechanism,
|
|
3803
|
+
)
|
|
3804
|
+
merged_incident_behavior = _merge_field(
|
|
3805
|
+
incident_behavior,
|
|
3806
|
+
"incident_behavior",
|
|
3807
|
+
assertion_input,
|
|
3808
|
+
existing_assertion,
|
|
3809
|
+
existing_incident_behavior,
|
|
3810
|
+
)
|
|
3811
|
+
merged_tags = _merge_field(
|
|
3812
|
+
tags, "tags", assertion_input, existing_assertion, existing_tags
|
|
3813
|
+
)
|
|
3814
|
+
|
|
3815
|
+
# Create the merged assertion input
|
|
3816
|
+
return _ColumnMetricAssertionInput(
|
|
3817
|
+
urn=urn,
|
|
3818
|
+
entity_client=assertion_input.entity_client,
|
|
3819
|
+
dataset_urn=dataset_urn,
|
|
3820
|
+
column_name=column_name,
|
|
3821
|
+
metric_type=metric_type,
|
|
3822
|
+
operator=operator,
|
|
3823
|
+
criteria_parameters=criteria_parameters,
|
|
3824
|
+
display_name=merged_display_name,
|
|
3825
|
+
enabled=merged_enabled,
|
|
3826
|
+
detection_mechanism=merged_detection_mechanism,
|
|
3827
|
+
incident_behavior=merged_incident_behavior,
|
|
3828
|
+
tags=merged_tags,
|
|
3829
|
+
created_by=existing_assertion.created_by
|
|
3830
|
+
if existing_assertion.created_by
|
|
3831
|
+
else assertion_input.created_by,
|
|
3832
|
+
created_at=existing_assertion.created_at
|
|
3833
|
+
if existing_assertion.created_at
|
|
3834
|
+
else assertion_input.created_at,
|
|
3835
|
+
updated_by=assertion_input.updated_by,
|
|
3836
|
+
updated_at=now_utc,
|
|
3837
|
+
schedule=merged_schedule,
|
|
3838
|
+
gms_criteria_type_info=gms_criteria_type_info,
|
|
3839
|
+
)
|
|
3840
|
+
|
|
3162
3841
|
|
|
3163
3842
|
def _merge_field(
|
|
3164
3843
|
input_field_value: Any,
|