acryl-datahub-cloud 0.3.12rc3__py3-none-any.whl → 0.3.12rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (20) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +37 -2
  3. acryl_datahub_cloud/metadata/schema.avsc +9 -0
  4. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +9 -0
  5. acryl_datahub_cloud/sdk/__init__.py +10 -2
  6. acryl_datahub_cloud/sdk/assertion/__init__.py +0 -0
  7. acryl_datahub_cloud/sdk/{assertion.py → assertion/assertion_base.py} +614 -231
  8. acryl_datahub_cloud/sdk/assertion/smart_column_metric_assertion.py +224 -0
  9. acryl_datahub_cloud/sdk/assertion/types.py +18 -0
  10. acryl_datahub_cloud/sdk/assertion_input/__init__.py +0 -0
  11. acryl_datahub_cloud/sdk/{assertion_input.py → assertion_input/assertion_input.py} +437 -147
  12. acryl_datahub_cloud/sdk/assertion_input/freshness_assertion_input.py +261 -0
  13. acryl_datahub_cloud/sdk/assertion_input/smart_column_metric_assertion_input.py +943 -0
  14. acryl_datahub_cloud/sdk/assertions_client.py +1281 -70
  15. acryl_datahub_cloud/sdk/entities/assertion.py +8 -1
  16. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/METADATA +41 -41
  17. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/RECORD +20 -14
  18. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/WHEEL +0 -0
  19. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/entry_points.txt +0 -0
  20. {acryl_datahub_cloud-0.3.12rc3.dist-info → acryl_datahub_cloud-0.3.12rc5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,261 @@
1
+ from datetime import datetime
2
+ from typing import Optional, Union
3
+
4
+ from acryl_datahub_cloud.sdk.assertion_input.assertion_input import (
5
+ DEFAULT_DAILY_SCHEDULE,
6
+ HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
7
+ LAST_MODIFIED_ALLOWED_FIELD_TYPES,
8
+ AssertionIncidentBehavior,
9
+ DetectionMechanismInputTypes,
10
+ FieldSpecType,
11
+ TimeWindowSizeInputTypes,
12
+ _AssertionInput,
13
+ _AuditLog,
14
+ _DataHubOperation,
15
+ _HasFreshnessFeatures,
16
+ _HighWatermarkColumn,
17
+ _InformationSchema,
18
+ _LastModifiedColumn,
19
+ _try_parse_and_validate_schema_classes_enum,
20
+ _try_parse_time_window_size,
21
+ )
22
+ from acryl_datahub_cloud.sdk.entities.assertion import (
23
+ AssertionInfoInputType,
24
+ TagsInputType,
25
+ )
26
+ from acryl_datahub_cloud.sdk.errors import (
27
+ SDKNotYetSupportedError,
28
+ SDKUsageError,
29
+ )
30
+ from datahub.metadata import schema_classes as models
31
+ from datahub.metadata.urns import AssertionUrn, CorpUserUrn, DatasetUrn
32
+ from datahub.sdk.entity_client import EntityClient
33
+
34
+
35
+ class _FreshnessAssertionInput(_AssertionInput, _HasFreshnessFeatures):
36
+ def _assertion_type(self) -> str:
37
+ """Get the assertion type."""
38
+ return models.AssertionTypeClass.FRESHNESS
39
+
40
+ def __init__(
41
+ self,
42
+ *,
43
+ # Required fields
44
+ dataset_urn: Union[str, DatasetUrn],
45
+ entity_client: EntityClient, # Needed to get the schema field spec for the detection mechanism if needed
46
+ urn: Optional[Union[str, AssertionUrn]] = None,
47
+ # Optional fields
48
+ display_name: Optional[str] = None,
49
+ enabled: bool = True,
50
+ schedule: Optional[Union[str, models.CronScheduleClass]] = None,
51
+ detection_mechanism: DetectionMechanismInputTypes = None,
52
+ incident_behavior: Optional[
53
+ Union[AssertionIncidentBehavior, list[AssertionIncidentBehavior]]
54
+ ] = None,
55
+ tags: Optional[TagsInputType] = None,
56
+ created_by: Union[str, CorpUserUrn],
57
+ created_at: datetime,
58
+ updated_by: Union[str, CorpUserUrn],
59
+ updated_at: datetime,
60
+ freshness_schedule_check_type: Optional[
61
+ Union[str, models.FreshnessAssertionScheduleTypeClass]
62
+ ] = None,
63
+ lookback_window: Optional[TimeWindowSizeInputTypes] = None,
64
+ ):
65
+ _AssertionInput.__init__(
66
+ self,
67
+ dataset_urn=dataset_urn,
68
+ entity_client=entity_client,
69
+ urn=urn,
70
+ display_name=display_name,
71
+ enabled=enabled,
72
+ schedule=schedule,
73
+ detection_mechanism=detection_mechanism,
74
+ incident_behavior=incident_behavior,
75
+ tags=tags,
76
+ source_type=models.AssertionSourceTypeClass.NATIVE, # Native assertions are of type native, not inferred
77
+ created_by=created_by,
78
+ created_at=created_at,
79
+ updated_by=updated_by,
80
+ updated_at=updated_at,
81
+ )
82
+
83
+ self.freshness_schedule_check_type = (
84
+ _try_parse_and_validate_schema_classes_enum(
85
+ freshness_schedule_check_type
86
+ or models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK,
87
+ models.FreshnessAssertionScheduleTypeClass,
88
+ )
89
+ )
90
+ self.lookback_window = (
91
+ _try_parse_time_window_size(lookback_window) if lookback_window else None
92
+ )
93
+ if (
94
+ self.freshness_schedule_check_type
95
+ is models.FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL
96
+ and lookback_window is None
97
+ ):
98
+ raise SDKUsageError(
99
+ "Fixed interval freshness assertions must have a lookback_window provided."
100
+ )
101
+ if (
102
+ self.freshness_schedule_check_type
103
+ is models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK
104
+ and lookback_window is not None
105
+ ):
106
+ raise SDKUsageError(
107
+ "Since the last check freshness assertions cannot have a lookback_window provided."
108
+ )
109
+
110
+ def _create_monitor_info(
111
+ self,
112
+ assertion_urn: AssertionUrn,
113
+ status: models.MonitorStatusClass,
114
+ schedule: models.CronScheduleClass,
115
+ source_type: Union[str, models.DatasetFreshnessSourceTypeClass],
116
+ field: Optional[FieldSpecType],
117
+ ) -> models.MonitorInfoClass:
118
+ """
119
+ Create a MonitorInfoClass with all the necessary components.
120
+ """
121
+ return models.MonitorInfoClass(
122
+ type=models.MonitorTypeClass.ASSERTION,
123
+ status=status,
124
+ assertionMonitor=models.AssertionMonitorClass(
125
+ assertions=[
126
+ models.AssertionEvaluationSpecClass(
127
+ assertion=str(assertion_urn),
128
+ schedule=schedule,
129
+ parameters=self._get_assertion_evaluation_parameters(
130
+ str(source_type), field
131
+ ),
132
+ )
133
+ ]
134
+ ),
135
+ )
136
+
137
+ def _create_assertion_info(
138
+ self, filter: Optional[models.DatasetFilterClass]
139
+ ) -> AssertionInfoInputType:
140
+ """
141
+ Create a FreshnessAssertionInfoClass for a freshness assertion.
142
+
143
+ Args:
144
+ filter: Optional filter to apply to the assertion. Only relevant for QUERY detection mechanism.
145
+
146
+ Returns:
147
+ A FreshnessAssertionInfoClass configured for freshness.
148
+ """
149
+ schedule = self._convert_schedule()
150
+ return models.FreshnessAssertionInfoClass(
151
+ type=models.FreshnessAssertionTypeClass.DATASET_CHANGE, # Currently only dataset change is supported
152
+ entity=str(self.dataset_urn),
153
+ schedule=models.FreshnessAssertionScheduleClass(
154
+ type=self.freshness_schedule_check_type
155
+ or models.FreshnessAssertionScheduleTypeClass.SINCE_THE_LAST_CHECK,
156
+ cron=models.FreshnessCronScheduleClass(
157
+ cron=schedule.cron,
158
+ timezone=schedule.timezone,
159
+ ),
160
+ fixedInterval=models.FixedIntervalScheduleClass(
161
+ multiple=self.lookback_window.multiple,
162
+ unit=self.lookback_window.unit,
163
+ )
164
+ if self.lookback_window
165
+ else None,
166
+ ),
167
+ filter=filter,
168
+ )
169
+
170
+ def _convert_schedule(self) -> models.CronScheduleClass:
171
+ """Create a schedule for a freshness assertion.
172
+
173
+ Returns:
174
+ A CronScheduleClass with appropriate schedule settings.
175
+ """
176
+ if self.schedule is None:
177
+ return DEFAULT_DAILY_SCHEDULE
178
+
179
+ return models.CronScheduleClass(
180
+ cron=self.schedule.cron,
181
+ timezone=self.schedule.timezone,
182
+ )
183
+
184
+ def _get_assertion_evaluation_parameters(
185
+ self, source_type: str, field: Optional[FieldSpecType]
186
+ ) -> models.AssertionEvaluationParametersClass:
187
+ # Ensure field is either None or FreshnessFieldSpecClass
188
+ freshness_field = None
189
+ if field is not None:
190
+ if not isinstance(field, models.FreshnessFieldSpecClass):
191
+ raise SDKUsageError(
192
+ f"Expected FreshnessFieldSpecClass for freshness assertion, got {type(field).__name__}"
193
+ )
194
+ freshness_field = field
195
+
196
+ return models.AssertionEvaluationParametersClass(
197
+ type=models.AssertionEvaluationParametersTypeClass.DATASET_FRESHNESS,
198
+ datasetFreshnessParameters=models.DatasetFreshnessAssertionParametersClass(
199
+ sourceType=source_type, field=freshness_field
200
+ ),
201
+ )
202
+
203
+ def _convert_assertion_source_type_and_field(
204
+ self,
205
+ ) -> tuple[str, Optional[FieldSpecType]]:
206
+ """
207
+ Convert detection mechanism into source type and field specification for freshness assertions.
208
+
209
+ Returns:
210
+ A tuple of (source_type, field) where field may be None.
211
+ Note that the source_type is a string, not a models.DatasetFreshnessSourceTypeClass (or other assertion source type) since
212
+ the source type is not a enum in the code generated from the DatasetFreshnessSourceType enum in the PDL.
213
+
214
+ Raises:
215
+ SDKNotYetSupportedError: If the detection mechanism is not supported.
216
+ SDKUsageError: If the field (column) is not found in the dataset,
217
+ and the detection mechanism requires a field. Also if the field
218
+ is not an allowed type for the detection mechanism.
219
+ """
220
+ source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
221
+ field = None
222
+
223
+ if isinstance(self.detection_mechanism, _InformationSchema):
224
+ source_type = models.DatasetFreshnessSourceTypeClass.INFORMATION_SCHEMA
225
+ elif isinstance(self.detection_mechanism, _DataHubOperation):
226
+ source_type = models.DatasetFreshnessSourceTypeClass.DATAHUB_OPERATION
227
+ elif isinstance(self.detection_mechanism, _AuditLog):
228
+ source_type = models.DatasetFreshnessSourceTypeClass.AUDIT_LOG
229
+ elif isinstance(self.detection_mechanism, _LastModifiedColumn):
230
+ source_type = models.DatasetFreshnessSourceTypeClass.FIELD_VALUE
231
+ field = self._create_field_spec(
232
+ self.detection_mechanism.column_name,
233
+ LAST_MODIFIED_ALLOWED_FIELD_TYPES,
234
+ "last modified column",
235
+ models.FreshnessFieldKindClass.LAST_MODIFIED,
236
+ self._get_schema_field_spec,
237
+ self._validate_field_type,
238
+ )
239
+ elif isinstance(self.detection_mechanism, _HighWatermarkColumn):
240
+ if (
241
+ self.freshness_schedule_check_type
242
+ is models.FreshnessAssertionScheduleTypeClass.FIXED_INTERVAL
243
+ ):
244
+ raise SDKUsageError(
245
+ "Fixed interval freshness assertions cannot have a high watermark column provided."
246
+ )
247
+ source_type = models.DatasetFreshnessSourceTypeClass.FIELD_VALUE
248
+ field = self._create_field_spec(
249
+ self.detection_mechanism.column_name,
250
+ HIGH_WATERMARK_ALLOWED_FIELD_TYPES,
251
+ "high watermark column",
252
+ models.FreshnessFieldKindClass.HIGH_WATERMARK,
253
+ self._get_schema_field_spec,
254
+ self._validate_field_type,
255
+ )
256
+ else:
257
+ raise SDKNotYetSupportedError(
258
+ f"Detection mechanism {self.detection_mechanism} not yet supported for freshness assertions"
259
+ )
260
+
261
+ return source_type, field