acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
- acryl_datahub_cloud/metadata/schema.avsc +9 -0
- acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
- acryl_datahub_cloud/sdk/__init__.py +10 -2
- acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +614 -231
- acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
- acryl_datahub_cloud/sdk/assertion/types.py +18 -0
- acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
- acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +437 -147
- acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
- acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +943 -0
- acryl_datahub_cloud/sdk/assertions_client.py +1281 -70
- acryl_datahub_cloud/sdk/entities/assertion.py +8 -1
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +41 -41
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +20 -14
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional, Union
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Self
|
|
6
|
+
|
|
7
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
8
|
+
AssertionMode,
|
|
9
|
+
_AssertionPublic,
|
|
10
|
+
_HasColumnMetricFunctionality,
|
|
11
|
+
_HasSchedule,
|
|
12
|
+
_HasSmartFunctionality,
|
|
13
|
+
)
|
|
14
|
+
from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
|
|
15
|
+
ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
16
|
+
DEFAULT_DETECTION_MECHANISM,
|
|
17
|
+
DEFAULT_SCHEDULE,
|
|
18
|
+
DEFAULT_SENSITIVITY,
|
|
19
|
+
AssertionIncidentBehavior,
|
|
20
|
+
DetectionMechanism,
|
|
21
|
+
ExclusionWindowTypes,
|
|
22
|
+
InferenceSensitivity,
|
|
23
|
+
_DetectionMechanismTypes,
|
|
24
|
+
)
|
|
25
|
+
from acryl_datahub_cloud.sdk.assertion_input.smart_column_metric_assertion_input import (
|
|
26
|
+
MetricInputType,
|
|
27
|
+
OperatorInputType,
|
|
28
|
+
RangeInputType,
|
|
29
|
+
RangeTypeInputType,
|
|
30
|
+
ValueInputType,
|
|
31
|
+
ValueTypeInputType,
|
|
32
|
+
)
|
|
33
|
+
from acryl_datahub_cloud.sdk.entities.assertion import Assertion
|
|
34
|
+
from acryl_datahub_cloud.sdk.entities.monitor import Monitor
|
|
35
|
+
from datahub.metadata import schema_classes as models
|
|
36
|
+
from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn, TagUrn
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SmartColumnMetricAssertion(
|
|
42
|
+
_HasColumnMetricFunctionality,
|
|
43
|
+
_HasSmartFunctionality,
|
|
44
|
+
_HasSchedule,
|
|
45
|
+
_AssertionPublic,
|
|
46
|
+
):
|
|
47
|
+
"""
|
|
48
|
+
A class that represents a smart column metric assertion.
|
|
49
|
+
This assertion is used to validate the value of a common field / column metric (e.g. aggregation) such as null count + percentage,
|
|
50
|
+
min, max, median, and more. It uses AI to infer the assertion parameters.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
*,
|
|
56
|
+
urn: AssertionUrn,
|
|
57
|
+
dataset_urn: DatasetUrn,
|
|
58
|
+
column_name: str,
|
|
59
|
+
metric_type: MetricInputType,
|
|
60
|
+
operator: OperatorInputType,
|
|
61
|
+
# Depending on the operator, value, range (and corresponding type) or no parameters are required:
|
|
62
|
+
value: Optional[ValueInputType] = None,
|
|
63
|
+
value_type: Optional[ValueTypeInputType] = None,
|
|
64
|
+
range: Optional[RangeInputType] = None,
|
|
65
|
+
range_type: Optional[RangeTypeInputType] = None,
|
|
66
|
+
# TODO: Evaluate these params:
|
|
67
|
+
display_name: str,
|
|
68
|
+
mode: AssertionMode,
|
|
69
|
+
schedule: models.CronScheduleClass = DEFAULT_SCHEDULE,
|
|
70
|
+
sensitivity: InferenceSensitivity = DEFAULT_SENSITIVITY,
|
|
71
|
+
exclusion_windows: list[ExclusionWindowTypes],
|
|
72
|
+
training_data_lookback_days: int = ASSERTION_MONITOR_DEFAULT_TRAINING_LOOKBACK_WINDOW_DAYS,
|
|
73
|
+
incident_behavior: list[AssertionIncidentBehavior],
|
|
74
|
+
detection_mechanism: Optional[
|
|
75
|
+
_DetectionMechanismTypes
|
|
76
|
+
] = DEFAULT_DETECTION_MECHANISM,
|
|
77
|
+
tags: list[TagUrn],
|
|
78
|
+
created_by: Optional[CorpUserUrn] = None,
|
|
79
|
+
created_at: Union[datetime, None] = None,
|
|
80
|
+
updated_by: Optional[CorpUserUrn] = None,
|
|
81
|
+
updated_at: Optional[datetime] = None,
|
|
82
|
+
):
|
|
83
|
+
"""
|
|
84
|
+
Initialize a smart column metric assertion.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
urn: The URN of the assertion.
|
|
88
|
+
dataset_urn: The URN of the dataset to monitor.
|
|
89
|
+
display_name: The display name of the assertion.
|
|
90
|
+
mode: The mode of the assertion (active/inactive).
|
|
91
|
+
sensitivity: The sensitivity of the assertion (low/medium/high).
|
|
92
|
+
exclusion_windows: The exclusion windows to apply to the assertion.
|
|
93
|
+
training_data_lookback_days: The number of days of data to use for training.
|
|
94
|
+
incident_behavior: The behavior when incidents occur.
|
|
95
|
+
detection_mechanism: The mechanism used to detect changes.
|
|
96
|
+
tags: The tags to apply to the assertion.
|
|
97
|
+
created_by: The URN of the user who created the assertion.
|
|
98
|
+
created_at: The timestamp when the assertion was created.
|
|
99
|
+
updated_by: The URN of the user who last updated the assertion.
|
|
100
|
+
updated_at: The timestamp when the assertion was last updated.
|
|
101
|
+
"""
|
|
102
|
+
_AssertionPublic.__init__(
|
|
103
|
+
self,
|
|
104
|
+
urn=urn,
|
|
105
|
+
dataset_urn=dataset_urn,
|
|
106
|
+
display_name=display_name,
|
|
107
|
+
mode=mode,
|
|
108
|
+
tags=tags,
|
|
109
|
+
incident_behavior=incident_behavior,
|
|
110
|
+
detection_mechanism=detection_mechanism,
|
|
111
|
+
created_by=created_by,
|
|
112
|
+
created_at=created_at,
|
|
113
|
+
updated_by=updated_by,
|
|
114
|
+
updated_at=updated_at,
|
|
115
|
+
)
|
|
116
|
+
_HasSmartFunctionality.__init__(
|
|
117
|
+
self,
|
|
118
|
+
sensitivity=sensitivity,
|
|
119
|
+
exclusion_windows=exclusion_windows,
|
|
120
|
+
training_data_lookback_days=training_data_lookback_days,
|
|
121
|
+
)
|
|
122
|
+
_HasSchedule.__init__(
|
|
123
|
+
self,
|
|
124
|
+
schedule=schedule,
|
|
125
|
+
)
|
|
126
|
+
_HasColumnMetricFunctionality.__init__(
|
|
127
|
+
self,
|
|
128
|
+
column_name=column_name,
|
|
129
|
+
metric_type=metric_type,
|
|
130
|
+
operator=operator,
|
|
131
|
+
value=value,
|
|
132
|
+
value_type=value_type,
|
|
133
|
+
range=range,
|
|
134
|
+
range_type=range_type,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def _from_entities(cls, assertion: Assertion, monitor: Monitor) -> Self:
|
|
139
|
+
"""
|
|
140
|
+
Create a SmartColumnMetricAssertion from an Assertion and Monitor entity.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
assertion: The Assertion entity.
|
|
144
|
+
monitor: The Monitor entity.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
A SmartColumnMetricAssertion instance.
|
|
148
|
+
"""
|
|
149
|
+
return cls(
|
|
150
|
+
urn=assertion.urn,
|
|
151
|
+
dataset_urn=assertion.dataset,
|
|
152
|
+
column_name=cls._get_column_name(assertion),
|
|
153
|
+
metric_type=cls._get_metric_type(assertion),
|
|
154
|
+
operator=cls._get_operator(assertion),
|
|
155
|
+
value=cls._get_value(assertion),
|
|
156
|
+
value_type=cls._get_value_type(assertion),
|
|
157
|
+
range=cls._get_range(assertion),
|
|
158
|
+
range_type=cls._get_range_type(assertion),
|
|
159
|
+
display_name=assertion.description or "",
|
|
160
|
+
mode=cls._get_mode(monitor),
|
|
161
|
+
schedule=cls._get_schedule(monitor),
|
|
162
|
+
sensitivity=cls._get_sensitivity(monitor),
|
|
163
|
+
exclusion_windows=cls._get_exclusion_windows(monitor),
|
|
164
|
+
training_data_lookback_days=cls._get_training_data_lookback_days(monitor),
|
|
165
|
+
incident_behavior=cls._get_incident_behavior(assertion),
|
|
166
|
+
detection_mechanism=cls._get_detection_mechanism(assertion, monitor),
|
|
167
|
+
tags=cls._get_tags(assertion),
|
|
168
|
+
created_by=cls._get_created_by(assertion),
|
|
169
|
+
created_at=cls._get_created_at(assertion),
|
|
170
|
+
updated_by=cls._get_updated_by(assertion),
|
|
171
|
+
updated_at=cls._get_updated_at(assertion),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def _get_detection_mechanism(
|
|
176
|
+
assertion: Assertion,
|
|
177
|
+
monitor: Monitor,
|
|
178
|
+
default: Optional[_DetectionMechanismTypes] = DEFAULT_DETECTION_MECHANISM,
|
|
179
|
+
) -> Optional[_DetectionMechanismTypes]:
|
|
180
|
+
"""Get the detection mechanism for column metric assertions."""
|
|
181
|
+
parameters = _AssertionPublic._get_validated_detection_context(
|
|
182
|
+
monitor,
|
|
183
|
+
assertion,
|
|
184
|
+
models.AssertionEvaluationParametersTypeClass.DATASET_FIELD,
|
|
185
|
+
models.FieldAssertionInfoClass,
|
|
186
|
+
default,
|
|
187
|
+
)
|
|
188
|
+
if parameters is None:
|
|
189
|
+
return default
|
|
190
|
+
if parameters.datasetFieldParameters is None:
|
|
191
|
+
logger.warning(
|
|
192
|
+
f"Monitor does not have datasetFieldParameters, defaulting detection mechanism to {default}"
|
|
193
|
+
)
|
|
194
|
+
return default
|
|
195
|
+
source_type = parameters.datasetFieldParameters.sourceType
|
|
196
|
+
if source_type == models.DatasetFieldAssertionSourceTypeClass.ALL_ROWS_QUERY:
|
|
197
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
198
|
+
return DetectionMechanism.ALL_ROWS_QUERY(
|
|
199
|
+
additional_filter=additional_filter
|
|
200
|
+
)
|
|
201
|
+
elif (
|
|
202
|
+
source_type
|
|
203
|
+
== models.DatasetFieldAssertionSourceTypeClass.CHANGED_ROWS_QUERY
|
|
204
|
+
):
|
|
205
|
+
if parameters.datasetFieldParameters.changedRowsField is None:
|
|
206
|
+
logger.warning(
|
|
207
|
+
f"Monitor has CHANGED_ROWS_QUERY source type but no changedRowsField, defaulting detection mechanism to {default}"
|
|
208
|
+
)
|
|
209
|
+
return default
|
|
210
|
+
column_name = parameters.datasetFieldParameters.changedRowsField.path
|
|
211
|
+
additional_filter = _AssertionPublic._get_additional_filter(assertion)
|
|
212
|
+
return DetectionMechanism.CHANGED_ROWS_QUERY(
|
|
213
|
+
column_name=column_name, additional_filter=additional_filter
|
|
214
|
+
)
|
|
215
|
+
elif (
|
|
216
|
+
source_type
|
|
217
|
+
== models.DatasetFieldAssertionSourceTypeClass.DATAHUB_DATASET_PROFILE
|
|
218
|
+
):
|
|
219
|
+
return DetectionMechanism.ALL_ROWS_QUERY_DATAHUB_DATASET_PROFILE
|
|
220
|
+
else:
|
|
221
|
+
logger.warning(
|
|
222
|
+
f"Unsupported DatasetFieldAssertionSourceType {source_type}, defaulting detection mechanism to {default}"
|
|
223
|
+
)
|
|
224
|
+
return default
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from acryl_datahub_cloud.sdk.assertion.assertion_base import (
|
|
4
|
+
FreshnessAssertion,
|
|
5
|
+
SmartFreshnessAssertion,
|
|
6
|
+
SmartVolumeAssertion,
|
|
7
|
+
)
|
|
8
|
+
from acryl_datahub_cloud.sdk.assertion.smart_column_metric_assertion import (
|
|
9
|
+
SmartColumnMetricAssertion,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
AssertionTypes = Union[
|
|
13
|
+
SmartFreshnessAssertion,
|
|
14
|
+
SmartVolumeAssertion,
|
|
15
|
+
FreshnessAssertion,
|
|
16
|
+
SmartColumnMetricAssertion,
|
|
17
|
+
# TODO: Add other assertion types here as we add them.
|
|
18
|
+
]
|
|
File without changes
|