databricks-sdk 0.67.0__py3-none-any.whl → 0.69.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of databricks-sdk might be problematic. Click here for more details.
- databricks/sdk/__init__.py +14 -10
- databricks/sdk/_base_client.py +4 -1
- databricks/sdk/common/lro.py +17 -0
- databricks/sdk/common/types/__init__.py +0 -0
- databricks/sdk/common/types/fieldmask.py +39 -0
- databricks/sdk/config.py +62 -14
- databricks/sdk/credentials_provider.py +61 -12
- databricks/sdk/dbutils.py +5 -1
- databricks/sdk/errors/parser.py +8 -3
- databricks/sdk/mixins/files.py +1156 -111
- databricks/sdk/mixins/files_utils.py +293 -0
- databricks/sdk/oidc_token_supplier.py +80 -0
- databricks/sdk/retries.py +102 -2
- databricks/sdk/service/_internal.py +93 -1
- databricks/sdk/service/agentbricks.py +1 -1
- databricks/sdk/service/apps.py +264 -1
- databricks/sdk/service/billing.py +2 -3
- databricks/sdk/service/catalog.py +1026 -540
- databricks/sdk/service/cleanrooms.py +3 -3
- databricks/sdk/service/compute.py +21 -33
- databricks/sdk/service/dashboards.py +7 -3
- databricks/sdk/service/database.py +3 -2
- databricks/sdk/service/dataquality.py +1145 -0
- databricks/sdk/service/files.py +2 -1
- databricks/sdk/service/iam.py +2 -1
- databricks/sdk/service/iamv2.py +1 -1
- databricks/sdk/service/jobs.py +6 -9
- databricks/sdk/service/marketplace.py +3 -1
- databricks/sdk/service/ml.py +3 -1
- databricks/sdk/service/oauth2.py +1 -1
- databricks/sdk/service/pipelines.py +5 -6
- databricks/sdk/service/provisioning.py +544 -655
- databricks/sdk/service/qualitymonitorv2.py +1 -1
- databricks/sdk/service/serving.py +3 -1
- databricks/sdk/service/settings.py +5 -2
- databricks/sdk/service/settingsv2.py +1 -1
- databricks/sdk/service/sharing.py +12 -3
- databricks/sdk/service/sql.py +305 -70
- databricks/sdk/service/tags.py +1 -1
- databricks/sdk/service/vectorsearch.py +3 -1
- databricks/sdk/service/workspace.py +70 -17
- databricks/sdk/version.py +1 -1
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/METADATA +4 -2
- databricks_sdk-0.69.0.dist-info/RECORD +84 -0
- databricks_sdk-0.67.0.dist-info/RECORD +0 -79
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/WHEEL +0 -0
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/licenses/LICENSE +0 -0
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/licenses/NOTICE +0 -0
- {databricks_sdk-0.67.0.dist-info → databricks_sdk-0.69.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1145 @@
|
|
|
1
|
+
# Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
9
|
+
|
|
10
|
+
from databricks.sdk.service._internal import (_enum, _from_dict,
|
|
11
|
+
_repeated_dict, _repeated_enum)
|
|
12
|
+
|
|
13
|
+
_LOG = logging.getLogger("databricks.sdk")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# all definitions in this file are in alphabetical order
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AggregationGranularity(Enum):
|
|
20
|
+
"""The granularity for aggregating data into time windows based on their timestamp."""
|
|
21
|
+
|
|
22
|
+
AGGREGATION_GRANULARITY_1_DAY = "AGGREGATION_GRANULARITY_1_DAY"
|
|
23
|
+
AGGREGATION_GRANULARITY_1_HOUR = "AGGREGATION_GRANULARITY_1_HOUR"
|
|
24
|
+
AGGREGATION_GRANULARITY_1_MONTH = "AGGREGATION_GRANULARITY_1_MONTH"
|
|
25
|
+
AGGREGATION_GRANULARITY_1_WEEK = "AGGREGATION_GRANULARITY_1_WEEK"
|
|
26
|
+
AGGREGATION_GRANULARITY_1_YEAR = "AGGREGATION_GRANULARITY_1_YEAR"
|
|
27
|
+
AGGREGATION_GRANULARITY_2_WEEKS = "AGGREGATION_GRANULARITY_2_WEEKS"
|
|
28
|
+
AGGREGATION_GRANULARITY_30_MINUTES = "AGGREGATION_GRANULARITY_30_MINUTES"
|
|
29
|
+
AGGREGATION_GRANULARITY_3_WEEKS = "AGGREGATION_GRANULARITY_3_WEEKS"
|
|
30
|
+
AGGREGATION_GRANULARITY_4_WEEKS = "AGGREGATION_GRANULARITY_4_WEEKS"
|
|
31
|
+
AGGREGATION_GRANULARITY_5_MINUTES = "AGGREGATION_GRANULARITY_5_MINUTES"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class AnomalyDetectionConfig:
|
|
36
|
+
"""Anomaly Detection Configurations."""
|
|
37
|
+
|
|
38
|
+
def as_dict(self) -> dict:
|
|
39
|
+
"""Serializes the AnomalyDetectionConfig into a dictionary suitable for use as a JSON request body."""
|
|
40
|
+
body = {}
|
|
41
|
+
return body
|
|
42
|
+
|
|
43
|
+
def as_shallow_dict(self) -> dict:
|
|
44
|
+
"""Serializes the AnomalyDetectionConfig into a shallow dictionary of its immediate attributes."""
|
|
45
|
+
body = {}
|
|
46
|
+
return body
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_dict(cls, d: Dict[str, Any]) -> AnomalyDetectionConfig:
|
|
50
|
+
"""Deserializes the AnomalyDetectionConfig from a dictionary."""
|
|
51
|
+
return cls()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class CancelRefreshResponse:
|
|
56
|
+
"""Response to cancelling a refresh."""
|
|
57
|
+
|
|
58
|
+
refresh: Optional[Refresh] = None
|
|
59
|
+
"""The refresh to cancel."""
|
|
60
|
+
|
|
61
|
+
def as_dict(self) -> dict:
|
|
62
|
+
"""Serializes the CancelRefreshResponse into a dictionary suitable for use as a JSON request body."""
|
|
63
|
+
body = {}
|
|
64
|
+
if self.refresh:
|
|
65
|
+
body["refresh"] = self.refresh.as_dict()
|
|
66
|
+
return body
|
|
67
|
+
|
|
68
|
+
def as_shallow_dict(self) -> dict:
|
|
69
|
+
"""Serializes the CancelRefreshResponse into a shallow dictionary of its immediate attributes."""
|
|
70
|
+
body = {}
|
|
71
|
+
if self.refresh:
|
|
72
|
+
body["refresh"] = self.refresh
|
|
73
|
+
return body
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def from_dict(cls, d: Dict[str, Any]) -> CancelRefreshResponse:
|
|
77
|
+
"""Deserializes the CancelRefreshResponse from a dictionary."""
|
|
78
|
+
return cls(refresh=_from_dict(d, "refresh", Refresh))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class CronSchedule:
|
|
83
|
+
"""The data quality monitoring workflow cron schedule."""
|
|
84
|
+
|
|
85
|
+
quartz_cron_expression: str
|
|
86
|
+
"""The expression that determines when to run the monitor. See [examples].
|
|
87
|
+
|
|
88
|
+
[examples]: https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html"""
|
|
89
|
+
|
|
90
|
+
timezone_id: str
|
|
91
|
+
"""A Java timezone id. The schedule for a job will be resolved with respect to this timezone. See
|
|
92
|
+
`Java TimeZone <http://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html>`_ for details.
|
|
93
|
+
The timezone id (e.g., ``America/Los_Angeles``) in which to evaluate the quartz expression."""
|
|
94
|
+
|
|
95
|
+
pause_status: Optional[CronSchedulePauseStatus] = None
|
|
96
|
+
"""Read only field that indicates whether the schedule is paused or not."""
|
|
97
|
+
|
|
98
|
+
def as_dict(self) -> dict:
|
|
99
|
+
"""Serializes the CronSchedule into a dictionary suitable for use as a JSON request body."""
|
|
100
|
+
body = {}
|
|
101
|
+
if self.pause_status is not None:
|
|
102
|
+
body["pause_status"] = self.pause_status.value
|
|
103
|
+
if self.quartz_cron_expression is not None:
|
|
104
|
+
body["quartz_cron_expression"] = self.quartz_cron_expression
|
|
105
|
+
if self.timezone_id is not None:
|
|
106
|
+
body["timezone_id"] = self.timezone_id
|
|
107
|
+
return body
|
|
108
|
+
|
|
109
|
+
def as_shallow_dict(self) -> dict:
|
|
110
|
+
"""Serializes the CronSchedule into a shallow dictionary of its immediate attributes."""
|
|
111
|
+
body = {}
|
|
112
|
+
if self.pause_status is not None:
|
|
113
|
+
body["pause_status"] = self.pause_status
|
|
114
|
+
if self.quartz_cron_expression is not None:
|
|
115
|
+
body["quartz_cron_expression"] = self.quartz_cron_expression
|
|
116
|
+
if self.timezone_id is not None:
|
|
117
|
+
body["timezone_id"] = self.timezone_id
|
|
118
|
+
return body
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def from_dict(cls, d: Dict[str, Any]) -> CronSchedule:
|
|
122
|
+
"""Deserializes the CronSchedule from a dictionary."""
|
|
123
|
+
return cls(
|
|
124
|
+
pause_status=_enum(d, "pause_status", CronSchedulePauseStatus),
|
|
125
|
+
quartz_cron_expression=d.get("quartz_cron_expression", None),
|
|
126
|
+
timezone_id=d.get("timezone_id", None),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class CronSchedulePauseStatus(Enum):
|
|
131
|
+
"""The data quality monitoring workflow cron schedule pause status."""
|
|
132
|
+
|
|
133
|
+
CRON_SCHEDULE_PAUSE_STATUS_PAUSED = "CRON_SCHEDULE_PAUSE_STATUS_PAUSED"
|
|
134
|
+
CRON_SCHEDULE_PAUSE_STATUS_UNPAUSED = "CRON_SCHEDULE_PAUSE_STATUS_UNPAUSED"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@dataclass
|
|
138
|
+
class DataProfilingConfig:
|
|
139
|
+
"""Data Profiling Configurations."""
|
|
140
|
+
|
|
141
|
+
output_schema_id: str
|
|
142
|
+
"""ID of the schema where output tables are created."""
|
|
143
|
+
|
|
144
|
+
assets_dir: Optional[str] = None
|
|
145
|
+
"""Field for specifying the absolute path to a custom directory to store data-monitoring assets.
|
|
146
|
+
Normally prepopulated to a default user location via UI and Python APIs."""
|
|
147
|
+
|
|
148
|
+
baseline_table_name: Optional[str] = None
|
|
149
|
+
"""Baseline table name. Baseline data is used to compute drift from the data in the monitored
|
|
150
|
+
`table_name`. The baseline table and the monitored table shall have the same schema."""
|
|
151
|
+
|
|
152
|
+
custom_metrics: Optional[List[DataProfilingCustomMetric]] = None
|
|
153
|
+
"""Custom metrics."""
|
|
154
|
+
|
|
155
|
+
dashboard_id: Optional[str] = None
|
|
156
|
+
"""Id of dashboard that visualizes the computed metrics. This can be empty if the monitor is in
|
|
157
|
+
PENDING state."""
|
|
158
|
+
|
|
159
|
+
drift_metrics_table_name: Optional[str] = None
|
|
160
|
+
"""Table that stores drift metrics data. Format: `catalog.schema.table_name`."""
|
|
161
|
+
|
|
162
|
+
effective_warehouse_id: Optional[str] = None
|
|
163
|
+
"""The warehouse for dashboard creation"""
|
|
164
|
+
|
|
165
|
+
inference_log: Optional[InferenceLogConfig] = None
|
|
166
|
+
"""Configuration for monitoring inference log tables."""
|
|
167
|
+
|
|
168
|
+
latest_monitor_failure_message: Optional[str] = None
|
|
169
|
+
"""The latest error message for a monitor failure."""
|
|
170
|
+
|
|
171
|
+
monitor_version: Optional[int] = None
|
|
172
|
+
"""Represents the current monitor configuration version in use. The version will be represented in
|
|
173
|
+
a numeric fashion (1,2,3...). The field has flexibility to take on negative values, which can
|
|
174
|
+
indicate corrupted monitor_version numbers."""
|
|
175
|
+
|
|
176
|
+
monitored_table_name: Optional[str] = None
|
|
177
|
+
"""Unity Catalog table to monitor. Format: `catalog.schema.table_name`"""
|
|
178
|
+
|
|
179
|
+
notification_settings: Optional[NotificationSettings] = None
|
|
180
|
+
"""Field for specifying notification settings."""
|
|
181
|
+
|
|
182
|
+
profile_metrics_table_name: Optional[str] = None
|
|
183
|
+
"""Table that stores profile metrics data. Format: `catalog.schema.table_name`."""
|
|
184
|
+
|
|
185
|
+
schedule: Optional[CronSchedule] = None
|
|
186
|
+
"""The cron schedule."""
|
|
187
|
+
|
|
188
|
+
skip_builtin_dashboard: Optional[bool] = None
|
|
189
|
+
"""Whether to skip creating a default dashboard summarizing data quality metrics."""
|
|
190
|
+
|
|
191
|
+
slicing_exprs: Optional[List[str]] = None
|
|
192
|
+
"""List of column expressions to slice data with for targeted analysis. The data is grouped by each
|
|
193
|
+
expression independently, resulting in a separate slice for each predicate and its complements.
|
|
194
|
+
For example `slicing_exprs=[“col_1”, “col_2 > 10”]` will generate the following slices:
|
|
195
|
+
two slices for `col_2 > 10` (True and False), and one slice per unique value in `col1`. For
|
|
196
|
+
high-cardinality columns, only the top 100 unique values by frequency will generate slices."""
|
|
197
|
+
|
|
198
|
+
snapshot: Optional[SnapshotConfig] = None
|
|
199
|
+
"""Configuration for monitoring snapshot tables."""
|
|
200
|
+
|
|
201
|
+
status: Optional[DataProfilingStatus] = None
|
|
202
|
+
"""The data profiling monitor status."""
|
|
203
|
+
|
|
204
|
+
time_series: Optional[TimeSeriesConfig] = None
|
|
205
|
+
"""Configuration for monitoring time series tables."""
|
|
206
|
+
|
|
207
|
+
warehouse_id: Optional[str] = None
|
|
208
|
+
"""Optional argument to specify the warehouse for dashboard creation. If not specified, the first
|
|
209
|
+
running warehouse will be used."""
|
|
210
|
+
|
|
211
|
+
def as_dict(self) -> dict:
|
|
212
|
+
"""Serializes the DataProfilingConfig into a dictionary suitable for use as a JSON request body."""
|
|
213
|
+
body = {}
|
|
214
|
+
if self.assets_dir is not None:
|
|
215
|
+
body["assets_dir"] = self.assets_dir
|
|
216
|
+
if self.baseline_table_name is not None:
|
|
217
|
+
body["baseline_table_name"] = self.baseline_table_name
|
|
218
|
+
if self.custom_metrics:
|
|
219
|
+
body["custom_metrics"] = [v.as_dict() for v in self.custom_metrics]
|
|
220
|
+
if self.dashboard_id is not None:
|
|
221
|
+
body["dashboard_id"] = self.dashboard_id
|
|
222
|
+
if self.drift_metrics_table_name is not None:
|
|
223
|
+
body["drift_metrics_table_name"] = self.drift_metrics_table_name
|
|
224
|
+
if self.effective_warehouse_id is not None:
|
|
225
|
+
body["effective_warehouse_id"] = self.effective_warehouse_id
|
|
226
|
+
if self.inference_log:
|
|
227
|
+
body["inference_log"] = self.inference_log.as_dict()
|
|
228
|
+
if self.latest_monitor_failure_message is not None:
|
|
229
|
+
body["latest_monitor_failure_message"] = self.latest_monitor_failure_message
|
|
230
|
+
if self.monitor_version is not None:
|
|
231
|
+
body["monitor_version"] = self.monitor_version
|
|
232
|
+
if self.monitored_table_name is not None:
|
|
233
|
+
body["monitored_table_name"] = self.monitored_table_name
|
|
234
|
+
if self.notification_settings:
|
|
235
|
+
body["notification_settings"] = self.notification_settings.as_dict()
|
|
236
|
+
if self.output_schema_id is not None:
|
|
237
|
+
body["output_schema_id"] = self.output_schema_id
|
|
238
|
+
if self.profile_metrics_table_name is not None:
|
|
239
|
+
body["profile_metrics_table_name"] = self.profile_metrics_table_name
|
|
240
|
+
if self.schedule:
|
|
241
|
+
body["schedule"] = self.schedule.as_dict()
|
|
242
|
+
if self.skip_builtin_dashboard is not None:
|
|
243
|
+
body["skip_builtin_dashboard"] = self.skip_builtin_dashboard
|
|
244
|
+
if self.slicing_exprs:
|
|
245
|
+
body["slicing_exprs"] = [v for v in self.slicing_exprs]
|
|
246
|
+
if self.snapshot:
|
|
247
|
+
body["snapshot"] = self.snapshot.as_dict()
|
|
248
|
+
if self.status is not None:
|
|
249
|
+
body["status"] = self.status.value
|
|
250
|
+
if self.time_series:
|
|
251
|
+
body["time_series"] = self.time_series.as_dict()
|
|
252
|
+
if self.warehouse_id is not None:
|
|
253
|
+
body["warehouse_id"] = self.warehouse_id
|
|
254
|
+
return body
|
|
255
|
+
|
|
256
|
+
def as_shallow_dict(self) -> dict:
|
|
257
|
+
"""Serializes the DataProfilingConfig into a shallow dictionary of its immediate attributes."""
|
|
258
|
+
body = {}
|
|
259
|
+
if self.assets_dir is not None:
|
|
260
|
+
body["assets_dir"] = self.assets_dir
|
|
261
|
+
if self.baseline_table_name is not None:
|
|
262
|
+
body["baseline_table_name"] = self.baseline_table_name
|
|
263
|
+
if self.custom_metrics:
|
|
264
|
+
body["custom_metrics"] = self.custom_metrics
|
|
265
|
+
if self.dashboard_id is not None:
|
|
266
|
+
body["dashboard_id"] = self.dashboard_id
|
|
267
|
+
if self.drift_metrics_table_name is not None:
|
|
268
|
+
body["drift_metrics_table_name"] = self.drift_metrics_table_name
|
|
269
|
+
if self.effective_warehouse_id is not None:
|
|
270
|
+
body["effective_warehouse_id"] = self.effective_warehouse_id
|
|
271
|
+
if self.inference_log:
|
|
272
|
+
body["inference_log"] = self.inference_log
|
|
273
|
+
if self.latest_monitor_failure_message is not None:
|
|
274
|
+
body["latest_monitor_failure_message"] = self.latest_monitor_failure_message
|
|
275
|
+
if self.monitor_version is not None:
|
|
276
|
+
body["monitor_version"] = self.monitor_version
|
|
277
|
+
if self.monitored_table_name is not None:
|
|
278
|
+
body["monitored_table_name"] = self.monitored_table_name
|
|
279
|
+
if self.notification_settings:
|
|
280
|
+
body["notification_settings"] = self.notification_settings
|
|
281
|
+
if self.output_schema_id is not None:
|
|
282
|
+
body["output_schema_id"] = self.output_schema_id
|
|
283
|
+
if self.profile_metrics_table_name is not None:
|
|
284
|
+
body["profile_metrics_table_name"] = self.profile_metrics_table_name
|
|
285
|
+
if self.schedule:
|
|
286
|
+
body["schedule"] = self.schedule
|
|
287
|
+
if self.skip_builtin_dashboard is not None:
|
|
288
|
+
body["skip_builtin_dashboard"] = self.skip_builtin_dashboard
|
|
289
|
+
if self.slicing_exprs:
|
|
290
|
+
body["slicing_exprs"] = self.slicing_exprs
|
|
291
|
+
if self.snapshot:
|
|
292
|
+
body["snapshot"] = self.snapshot
|
|
293
|
+
if self.status is not None:
|
|
294
|
+
body["status"] = self.status
|
|
295
|
+
if self.time_series:
|
|
296
|
+
body["time_series"] = self.time_series
|
|
297
|
+
if self.warehouse_id is not None:
|
|
298
|
+
body["warehouse_id"] = self.warehouse_id
|
|
299
|
+
return body
|
|
300
|
+
|
|
301
|
+
@classmethod
|
|
302
|
+
def from_dict(cls, d: Dict[str, Any]) -> DataProfilingConfig:
|
|
303
|
+
"""Deserializes the DataProfilingConfig from a dictionary."""
|
|
304
|
+
return cls(
|
|
305
|
+
assets_dir=d.get("assets_dir", None),
|
|
306
|
+
baseline_table_name=d.get("baseline_table_name", None),
|
|
307
|
+
custom_metrics=_repeated_dict(d, "custom_metrics", DataProfilingCustomMetric),
|
|
308
|
+
dashboard_id=d.get("dashboard_id", None),
|
|
309
|
+
drift_metrics_table_name=d.get("drift_metrics_table_name", None),
|
|
310
|
+
effective_warehouse_id=d.get("effective_warehouse_id", None),
|
|
311
|
+
inference_log=_from_dict(d, "inference_log", InferenceLogConfig),
|
|
312
|
+
latest_monitor_failure_message=d.get("latest_monitor_failure_message", None),
|
|
313
|
+
monitor_version=d.get("monitor_version", None),
|
|
314
|
+
monitored_table_name=d.get("monitored_table_name", None),
|
|
315
|
+
notification_settings=_from_dict(d, "notification_settings", NotificationSettings),
|
|
316
|
+
output_schema_id=d.get("output_schema_id", None),
|
|
317
|
+
profile_metrics_table_name=d.get("profile_metrics_table_name", None),
|
|
318
|
+
schedule=_from_dict(d, "schedule", CronSchedule),
|
|
319
|
+
skip_builtin_dashboard=d.get("skip_builtin_dashboard", None),
|
|
320
|
+
slicing_exprs=d.get("slicing_exprs", None),
|
|
321
|
+
snapshot=_from_dict(d, "snapshot", SnapshotConfig),
|
|
322
|
+
status=_enum(d, "status", DataProfilingStatus),
|
|
323
|
+
time_series=_from_dict(d, "time_series", TimeSeriesConfig),
|
|
324
|
+
warehouse_id=d.get("warehouse_id", None),
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
@dataclass
|
|
329
|
+
class DataProfilingCustomMetric:
|
|
330
|
+
"""Custom metric definition."""
|
|
331
|
+
|
|
332
|
+
name: str
|
|
333
|
+
"""Name of the metric in the output tables."""
|
|
334
|
+
|
|
335
|
+
definition: str
|
|
336
|
+
"""Jinja template for a SQL expression that specifies how to compute the metric. See [create metric
|
|
337
|
+
definition].
|
|
338
|
+
|
|
339
|
+
[create metric definition]: https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition"""
|
|
340
|
+
|
|
341
|
+
input_columns: List[str]
|
|
342
|
+
"""A list of column names in the input table the metric should be computed for. Can use
|
|
343
|
+
``":table"`` to indicate that the metric needs information from multiple columns."""
|
|
344
|
+
|
|
345
|
+
output_data_type: str
|
|
346
|
+
"""The output type of the custom metric."""
|
|
347
|
+
|
|
348
|
+
type: DataProfilingCustomMetricType
|
|
349
|
+
"""The type of the custom metric."""
|
|
350
|
+
|
|
351
|
+
def as_dict(self) -> dict:
|
|
352
|
+
"""Serializes the DataProfilingCustomMetric into a dictionary suitable for use as a JSON request body."""
|
|
353
|
+
body = {}
|
|
354
|
+
if self.definition is not None:
|
|
355
|
+
body["definition"] = self.definition
|
|
356
|
+
if self.input_columns:
|
|
357
|
+
body["input_columns"] = [v for v in self.input_columns]
|
|
358
|
+
if self.name is not None:
|
|
359
|
+
body["name"] = self.name
|
|
360
|
+
if self.output_data_type is not None:
|
|
361
|
+
body["output_data_type"] = self.output_data_type
|
|
362
|
+
if self.type is not None:
|
|
363
|
+
body["type"] = self.type.value
|
|
364
|
+
return body
|
|
365
|
+
|
|
366
|
+
def as_shallow_dict(self) -> dict:
|
|
367
|
+
"""Serializes the DataProfilingCustomMetric into a shallow dictionary of its immediate attributes."""
|
|
368
|
+
body = {}
|
|
369
|
+
if self.definition is not None:
|
|
370
|
+
body["definition"] = self.definition
|
|
371
|
+
if self.input_columns:
|
|
372
|
+
body["input_columns"] = self.input_columns
|
|
373
|
+
if self.name is not None:
|
|
374
|
+
body["name"] = self.name
|
|
375
|
+
if self.output_data_type is not None:
|
|
376
|
+
body["output_data_type"] = self.output_data_type
|
|
377
|
+
if self.type is not None:
|
|
378
|
+
body["type"] = self.type
|
|
379
|
+
return body
|
|
380
|
+
|
|
381
|
+
@classmethod
|
|
382
|
+
def from_dict(cls, d: Dict[str, Any]) -> DataProfilingCustomMetric:
|
|
383
|
+
"""Deserializes the DataProfilingCustomMetric from a dictionary."""
|
|
384
|
+
return cls(
|
|
385
|
+
definition=d.get("definition", None),
|
|
386
|
+
input_columns=d.get("input_columns", None),
|
|
387
|
+
name=d.get("name", None),
|
|
388
|
+
output_data_type=d.get("output_data_type", None),
|
|
389
|
+
type=_enum(d, "type", DataProfilingCustomMetricType),
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class DataProfilingCustomMetricType(Enum):
|
|
394
|
+
"""The custom metric type."""
|
|
395
|
+
|
|
396
|
+
DATA_PROFILING_CUSTOM_METRIC_TYPE_AGGREGATE = "DATA_PROFILING_CUSTOM_METRIC_TYPE_AGGREGATE"
|
|
397
|
+
DATA_PROFILING_CUSTOM_METRIC_TYPE_DERIVED = "DATA_PROFILING_CUSTOM_METRIC_TYPE_DERIVED"
|
|
398
|
+
DATA_PROFILING_CUSTOM_METRIC_TYPE_DRIFT = "DATA_PROFILING_CUSTOM_METRIC_TYPE_DRIFT"
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
class DataProfilingStatus(Enum):
|
|
402
|
+
"""The status of the data profiling monitor."""
|
|
403
|
+
|
|
404
|
+
DATA_PROFILING_STATUS_ACTIVE = "DATA_PROFILING_STATUS_ACTIVE"
|
|
405
|
+
DATA_PROFILING_STATUS_DELETE_PENDING = "DATA_PROFILING_STATUS_DELETE_PENDING"
|
|
406
|
+
DATA_PROFILING_STATUS_ERROR = "DATA_PROFILING_STATUS_ERROR"
|
|
407
|
+
DATA_PROFILING_STATUS_FAILED = "DATA_PROFILING_STATUS_FAILED"
|
|
408
|
+
DATA_PROFILING_STATUS_PENDING = "DATA_PROFILING_STATUS_PENDING"
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
@dataclass
|
|
412
|
+
class InferenceLogConfig:
|
|
413
|
+
"""Inference log configuration."""
|
|
414
|
+
|
|
415
|
+
problem_type: InferenceProblemType
|
|
416
|
+
"""Problem type the model aims to solve."""
|
|
417
|
+
|
|
418
|
+
timestamp_column: str
|
|
419
|
+
"""Column for the timestamp."""
|
|
420
|
+
|
|
421
|
+
granularities: List[AggregationGranularity]
|
|
422
|
+
"""List of granularities to use when aggregating data into time windows based on their timestamp."""
|
|
423
|
+
|
|
424
|
+
prediction_column: str
|
|
425
|
+
"""Column for the prediction."""
|
|
426
|
+
|
|
427
|
+
model_id_column: str
|
|
428
|
+
"""Column for the model identifier."""
|
|
429
|
+
|
|
430
|
+
label_column: Optional[str] = None
|
|
431
|
+
"""Column for the label."""
|
|
432
|
+
|
|
433
|
+
def as_dict(self) -> dict:
|
|
434
|
+
"""Serializes the InferenceLogConfig into a dictionary suitable for use as a JSON request body."""
|
|
435
|
+
body = {}
|
|
436
|
+
if self.granularities:
|
|
437
|
+
body["granularities"] = [v.value for v in self.granularities]
|
|
438
|
+
if self.label_column is not None:
|
|
439
|
+
body["label_column"] = self.label_column
|
|
440
|
+
if self.model_id_column is not None:
|
|
441
|
+
body["model_id_column"] = self.model_id_column
|
|
442
|
+
if self.prediction_column is not None:
|
|
443
|
+
body["prediction_column"] = self.prediction_column
|
|
444
|
+
if self.problem_type is not None:
|
|
445
|
+
body["problem_type"] = self.problem_type.value
|
|
446
|
+
if self.timestamp_column is not None:
|
|
447
|
+
body["timestamp_column"] = self.timestamp_column
|
|
448
|
+
return body
|
|
449
|
+
|
|
450
|
+
def as_shallow_dict(self) -> dict:
|
|
451
|
+
"""Serializes the InferenceLogConfig into a shallow dictionary of its immediate attributes."""
|
|
452
|
+
body = {}
|
|
453
|
+
if self.granularities:
|
|
454
|
+
body["granularities"] = self.granularities
|
|
455
|
+
if self.label_column is not None:
|
|
456
|
+
body["label_column"] = self.label_column
|
|
457
|
+
if self.model_id_column is not None:
|
|
458
|
+
body["model_id_column"] = self.model_id_column
|
|
459
|
+
if self.prediction_column is not None:
|
|
460
|
+
body["prediction_column"] = self.prediction_column
|
|
461
|
+
if self.problem_type is not None:
|
|
462
|
+
body["problem_type"] = self.problem_type
|
|
463
|
+
if self.timestamp_column is not None:
|
|
464
|
+
body["timestamp_column"] = self.timestamp_column
|
|
465
|
+
return body
|
|
466
|
+
|
|
467
|
+
@classmethod
|
|
468
|
+
def from_dict(cls, d: Dict[str, Any]) -> InferenceLogConfig:
|
|
469
|
+
"""Deserializes the InferenceLogConfig from a dictionary."""
|
|
470
|
+
return cls(
|
|
471
|
+
granularities=_repeated_enum(d, "granularities", AggregationGranularity),
|
|
472
|
+
label_column=d.get("label_column", None),
|
|
473
|
+
model_id_column=d.get("model_id_column", None),
|
|
474
|
+
prediction_column=d.get("prediction_column", None),
|
|
475
|
+
problem_type=_enum(d, "problem_type", InferenceProblemType),
|
|
476
|
+
timestamp_column=d.get("timestamp_column", None),
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
class InferenceProblemType(Enum):
|
|
481
|
+
"""Inference problem type the model aims to solve."""
|
|
482
|
+
|
|
483
|
+
INFERENCE_PROBLEM_TYPE_CLASSIFICATION = "INFERENCE_PROBLEM_TYPE_CLASSIFICATION"
|
|
484
|
+
INFERENCE_PROBLEM_TYPE_REGRESSION = "INFERENCE_PROBLEM_TYPE_REGRESSION"
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
@dataclass
|
|
488
|
+
class ListMonitorResponse:
|
|
489
|
+
"""Response for listing Monitors."""
|
|
490
|
+
|
|
491
|
+
monitors: Optional[List[Monitor]] = None
|
|
492
|
+
|
|
493
|
+
next_page_token: Optional[str] = None
|
|
494
|
+
|
|
495
|
+
def as_dict(self) -> dict:
|
|
496
|
+
"""Serializes the ListMonitorResponse into a dictionary suitable for use as a JSON request body."""
|
|
497
|
+
body = {}
|
|
498
|
+
if self.monitors:
|
|
499
|
+
body["monitors"] = [v.as_dict() for v in self.monitors]
|
|
500
|
+
if self.next_page_token is not None:
|
|
501
|
+
body["next_page_token"] = self.next_page_token
|
|
502
|
+
return body
|
|
503
|
+
|
|
504
|
+
def as_shallow_dict(self) -> dict:
|
|
505
|
+
"""Serializes the ListMonitorResponse into a shallow dictionary of its immediate attributes."""
|
|
506
|
+
body = {}
|
|
507
|
+
if self.monitors:
|
|
508
|
+
body["monitors"] = self.monitors
|
|
509
|
+
if self.next_page_token is not None:
|
|
510
|
+
body["next_page_token"] = self.next_page_token
|
|
511
|
+
return body
|
|
512
|
+
|
|
513
|
+
@classmethod
|
|
514
|
+
def from_dict(cls, d: Dict[str, Any]) -> ListMonitorResponse:
|
|
515
|
+
"""Deserializes the ListMonitorResponse from a dictionary."""
|
|
516
|
+
return cls(monitors=_repeated_dict(d, "monitors", Monitor), next_page_token=d.get("next_page_token", None))
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
@dataclass
|
|
520
|
+
class ListRefreshResponse:
|
|
521
|
+
"""Response for listing refreshes."""
|
|
522
|
+
|
|
523
|
+
next_page_token: Optional[str] = None
|
|
524
|
+
|
|
525
|
+
refreshes: Optional[List[Refresh]] = None
|
|
526
|
+
|
|
527
|
+
def as_dict(self) -> dict:
|
|
528
|
+
"""Serializes the ListRefreshResponse into a dictionary suitable for use as a JSON request body."""
|
|
529
|
+
body = {}
|
|
530
|
+
if self.next_page_token is not None:
|
|
531
|
+
body["next_page_token"] = self.next_page_token
|
|
532
|
+
if self.refreshes:
|
|
533
|
+
body["refreshes"] = [v.as_dict() for v in self.refreshes]
|
|
534
|
+
return body
|
|
535
|
+
|
|
536
|
+
def as_shallow_dict(self) -> dict:
|
|
537
|
+
"""Serializes the ListRefreshResponse into a shallow dictionary of its immediate attributes."""
|
|
538
|
+
body = {}
|
|
539
|
+
if self.next_page_token is not None:
|
|
540
|
+
body["next_page_token"] = self.next_page_token
|
|
541
|
+
if self.refreshes:
|
|
542
|
+
body["refreshes"] = self.refreshes
|
|
543
|
+
return body
|
|
544
|
+
|
|
545
|
+
@classmethod
|
|
546
|
+
def from_dict(cls, d: Dict[str, Any]) -> ListRefreshResponse:
|
|
547
|
+
"""Deserializes the ListRefreshResponse from a dictionary."""
|
|
548
|
+
return cls(next_page_token=d.get("next_page_token", None), refreshes=_repeated_dict(d, "refreshes", Refresh))
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
@dataclass
|
|
552
|
+
class Monitor:
|
|
553
|
+
"""Monitor for the data quality of unity catalog entities such as schema or table."""
|
|
554
|
+
|
|
555
|
+
object_type: str
|
|
556
|
+
"""The type of the monitored object. Can be one of the following: `schema` or `table`."""
|
|
557
|
+
|
|
558
|
+
object_id: str
|
|
559
|
+
"""The UUID of the request object. For example, schema id."""
|
|
560
|
+
|
|
561
|
+
anomaly_detection_config: Optional[AnomalyDetectionConfig] = None
|
|
562
|
+
"""Anomaly Detection Configuration, applicable to `schema` object types."""
|
|
563
|
+
|
|
564
|
+
data_profiling_config: Optional[DataProfilingConfig] = None
|
|
565
|
+
"""Data Profiling Configuration, applicable to `table` object types"""
|
|
566
|
+
|
|
567
|
+
def as_dict(self) -> dict:
|
|
568
|
+
"""Serializes the Monitor into a dictionary suitable for use as a JSON request body."""
|
|
569
|
+
body = {}
|
|
570
|
+
if self.anomaly_detection_config:
|
|
571
|
+
body["anomaly_detection_config"] = self.anomaly_detection_config.as_dict()
|
|
572
|
+
if self.data_profiling_config:
|
|
573
|
+
body["data_profiling_config"] = self.data_profiling_config.as_dict()
|
|
574
|
+
if self.object_id is not None:
|
|
575
|
+
body["object_id"] = self.object_id
|
|
576
|
+
if self.object_type is not None:
|
|
577
|
+
body["object_type"] = self.object_type
|
|
578
|
+
return body
|
|
579
|
+
|
|
580
|
+
def as_shallow_dict(self) -> dict:
|
|
581
|
+
"""Serializes the Monitor into a shallow dictionary of its immediate attributes."""
|
|
582
|
+
body = {}
|
|
583
|
+
if self.anomaly_detection_config:
|
|
584
|
+
body["anomaly_detection_config"] = self.anomaly_detection_config
|
|
585
|
+
if self.data_profiling_config:
|
|
586
|
+
body["data_profiling_config"] = self.data_profiling_config
|
|
587
|
+
if self.object_id is not None:
|
|
588
|
+
body["object_id"] = self.object_id
|
|
589
|
+
if self.object_type is not None:
|
|
590
|
+
body["object_type"] = self.object_type
|
|
591
|
+
return body
|
|
592
|
+
|
|
593
|
+
@classmethod
|
|
594
|
+
def from_dict(cls, d: Dict[str, Any]) -> Monitor:
|
|
595
|
+
"""Deserializes the Monitor from a dictionary."""
|
|
596
|
+
return cls(
|
|
597
|
+
anomaly_detection_config=_from_dict(d, "anomaly_detection_config", AnomalyDetectionConfig),
|
|
598
|
+
data_profiling_config=_from_dict(d, "data_profiling_config", DataProfilingConfig),
|
|
599
|
+
object_id=d.get("object_id", None),
|
|
600
|
+
object_type=d.get("object_type", None),
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
@dataclass
|
|
605
|
+
class NotificationDestination:
|
|
606
|
+
"""Destination of the data quality monitoring notification."""
|
|
607
|
+
|
|
608
|
+
email_addresses: Optional[List[str]] = None
|
|
609
|
+
"""The list of email addresses to send the notification to. A maximum of 5 email addresses is
|
|
610
|
+
supported."""
|
|
611
|
+
|
|
612
|
+
def as_dict(self) -> dict:
|
|
613
|
+
"""Serializes the NotificationDestination into a dictionary suitable for use as a JSON request body."""
|
|
614
|
+
body = {}
|
|
615
|
+
if self.email_addresses:
|
|
616
|
+
body["email_addresses"] = [v for v in self.email_addresses]
|
|
617
|
+
return body
|
|
618
|
+
|
|
619
|
+
def as_shallow_dict(self) -> dict:
|
|
620
|
+
"""Serializes the NotificationDestination into a shallow dictionary of its immediate attributes."""
|
|
621
|
+
body = {}
|
|
622
|
+
if self.email_addresses:
|
|
623
|
+
body["email_addresses"] = self.email_addresses
|
|
624
|
+
return body
|
|
625
|
+
|
|
626
|
+
@classmethod
|
|
627
|
+
def from_dict(cls, d: Dict[str, Any]) -> NotificationDestination:
|
|
628
|
+
"""Deserializes the NotificationDestination from a dictionary."""
|
|
629
|
+
return cls(email_addresses=d.get("email_addresses", None))
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
@dataclass
|
|
633
|
+
class NotificationSettings:
|
|
634
|
+
"""Settings for sending notifications on the data quality monitoring."""
|
|
635
|
+
|
|
636
|
+
on_failure: Optional[NotificationDestination] = None
|
|
637
|
+
"""Destinations to send notifications on failure/timeout."""
|
|
638
|
+
|
|
639
|
+
def as_dict(self) -> dict:
|
|
640
|
+
"""Serializes the NotificationSettings into a dictionary suitable for use as a JSON request body."""
|
|
641
|
+
body = {}
|
|
642
|
+
if self.on_failure:
|
|
643
|
+
body["on_failure"] = self.on_failure.as_dict()
|
|
644
|
+
return body
|
|
645
|
+
|
|
646
|
+
def as_shallow_dict(self) -> dict:
|
|
647
|
+
"""Serializes the NotificationSettings into a shallow dictionary of its immediate attributes."""
|
|
648
|
+
body = {}
|
|
649
|
+
if self.on_failure:
|
|
650
|
+
body["on_failure"] = self.on_failure
|
|
651
|
+
return body
|
|
652
|
+
|
|
653
|
+
@classmethod
|
|
654
|
+
def from_dict(cls, d: Dict[str, Any]) -> NotificationSettings:
|
|
655
|
+
"""Deserializes the NotificationSettings from a dictionary."""
|
|
656
|
+
return cls(on_failure=_from_dict(d, "on_failure", NotificationDestination))
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
@dataclass
|
|
660
|
+
class Refresh:
|
|
661
|
+
"""The Refresh object gives information on a refresh of the data quality monitoring pipeline."""
|
|
662
|
+
|
|
663
|
+
object_type: str
|
|
664
|
+
"""The type of the monitored object. Can be one of the following: `schema`or `table`."""
|
|
665
|
+
|
|
666
|
+
object_id: str
|
|
667
|
+
"""The UUID of the request object. For example, table id."""
|
|
668
|
+
|
|
669
|
+
end_time_ms: Optional[int] = None
|
|
670
|
+
"""Time when the refresh ended (milliseconds since 1/1/1970 UTC)."""
|
|
671
|
+
|
|
672
|
+
message: Optional[str] = None
|
|
673
|
+
"""An optional message to give insight into the current state of the refresh (e.g. FAILURE
|
|
674
|
+
messages)."""
|
|
675
|
+
|
|
676
|
+
refresh_id: Optional[int] = None
|
|
677
|
+
"""Unique id of the refresh operation."""
|
|
678
|
+
|
|
679
|
+
start_time_ms: Optional[int] = None
|
|
680
|
+
"""Time when the refresh started (milliseconds since 1/1/1970 UTC)."""
|
|
681
|
+
|
|
682
|
+
state: Optional[RefreshState] = None
|
|
683
|
+
"""The current state of the refresh."""
|
|
684
|
+
|
|
685
|
+
trigger: Optional[RefreshTrigger] = None
|
|
686
|
+
"""What triggered the refresh."""
|
|
687
|
+
|
|
688
|
+
def as_dict(self) -> dict:
|
|
689
|
+
"""Serializes the Refresh into a dictionary suitable for use as a JSON request body."""
|
|
690
|
+
body = {}
|
|
691
|
+
if self.end_time_ms is not None:
|
|
692
|
+
body["end_time_ms"] = self.end_time_ms
|
|
693
|
+
if self.message is not None:
|
|
694
|
+
body["message"] = self.message
|
|
695
|
+
if self.object_id is not None:
|
|
696
|
+
body["object_id"] = self.object_id
|
|
697
|
+
if self.object_type is not None:
|
|
698
|
+
body["object_type"] = self.object_type
|
|
699
|
+
if self.refresh_id is not None:
|
|
700
|
+
body["refresh_id"] = self.refresh_id
|
|
701
|
+
if self.start_time_ms is not None:
|
|
702
|
+
body["start_time_ms"] = self.start_time_ms
|
|
703
|
+
if self.state is not None:
|
|
704
|
+
body["state"] = self.state.value
|
|
705
|
+
if self.trigger is not None:
|
|
706
|
+
body["trigger"] = self.trigger.value
|
|
707
|
+
return body
|
|
708
|
+
|
|
709
|
+
def as_shallow_dict(self) -> dict:
|
|
710
|
+
"""Serializes the Refresh into a shallow dictionary of its immediate attributes."""
|
|
711
|
+
body = {}
|
|
712
|
+
if self.end_time_ms is not None:
|
|
713
|
+
body["end_time_ms"] = self.end_time_ms
|
|
714
|
+
if self.message is not None:
|
|
715
|
+
body["message"] = self.message
|
|
716
|
+
if self.object_id is not None:
|
|
717
|
+
body["object_id"] = self.object_id
|
|
718
|
+
if self.object_type is not None:
|
|
719
|
+
body["object_type"] = self.object_type
|
|
720
|
+
if self.refresh_id is not None:
|
|
721
|
+
body["refresh_id"] = self.refresh_id
|
|
722
|
+
if self.start_time_ms is not None:
|
|
723
|
+
body["start_time_ms"] = self.start_time_ms
|
|
724
|
+
if self.state is not None:
|
|
725
|
+
body["state"] = self.state
|
|
726
|
+
if self.trigger is not None:
|
|
727
|
+
body["trigger"] = self.trigger
|
|
728
|
+
return body
|
|
729
|
+
|
|
730
|
+
@classmethod
|
|
731
|
+
def from_dict(cls, d: Dict[str, Any]) -> Refresh:
|
|
732
|
+
"""Deserializes the Refresh from a dictionary."""
|
|
733
|
+
return cls(
|
|
734
|
+
end_time_ms=d.get("end_time_ms", None),
|
|
735
|
+
message=d.get("message", None),
|
|
736
|
+
object_id=d.get("object_id", None),
|
|
737
|
+
object_type=d.get("object_type", None),
|
|
738
|
+
refresh_id=d.get("refresh_id", None),
|
|
739
|
+
start_time_ms=d.get("start_time_ms", None),
|
|
740
|
+
state=_enum(d, "state", RefreshState),
|
|
741
|
+
trigger=_enum(d, "trigger", RefreshTrigger),
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
class RefreshState(Enum):
|
|
746
|
+
"""The state of the refresh."""
|
|
747
|
+
|
|
748
|
+
MONITOR_REFRESH_STATE_CANCELED = "MONITOR_REFRESH_STATE_CANCELED"
|
|
749
|
+
MONITOR_REFRESH_STATE_FAILED = "MONITOR_REFRESH_STATE_FAILED"
|
|
750
|
+
MONITOR_REFRESH_STATE_PENDING = "MONITOR_REFRESH_STATE_PENDING"
|
|
751
|
+
MONITOR_REFRESH_STATE_RUNNING = "MONITOR_REFRESH_STATE_RUNNING"
|
|
752
|
+
MONITOR_REFRESH_STATE_SUCCESS = "MONITOR_REFRESH_STATE_SUCCESS"
|
|
753
|
+
MONITOR_REFRESH_STATE_UNKNOWN = "MONITOR_REFRESH_STATE_UNKNOWN"
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
class RefreshTrigger(Enum):
|
|
757
|
+
"""The trigger of the refresh."""
|
|
758
|
+
|
|
759
|
+
MONITOR_REFRESH_TRIGGER_DATA_CHANGE = "MONITOR_REFRESH_TRIGGER_DATA_CHANGE"
|
|
760
|
+
MONITOR_REFRESH_TRIGGER_MANUAL = "MONITOR_REFRESH_TRIGGER_MANUAL"
|
|
761
|
+
MONITOR_REFRESH_TRIGGER_SCHEDULE = "MONITOR_REFRESH_TRIGGER_SCHEDULE"
|
|
762
|
+
MONITOR_REFRESH_TRIGGER_UNKNOWN = "MONITOR_REFRESH_TRIGGER_UNKNOWN"
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
@dataclass
|
|
766
|
+
class SnapshotConfig:
|
|
767
|
+
"""Snapshot analysis configuration."""
|
|
768
|
+
|
|
769
|
+
def as_dict(self) -> dict:
|
|
770
|
+
"""Serializes the SnapshotConfig into a dictionary suitable for use as a JSON request body."""
|
|
771
|
+
body = {}
|
|
772
|
+
return body
|
|
773
|
+
|
|
774
|
+
def as_shallow_dict(self) -> dict:
|
|
775
|
+
"""Serializes the SnapshotConfig into a shallow dictionary of its immediate attributes."""
|
|
776
|
+
body = {}
|
|
777
|
+
return body
|
|
778
|
+
|
|
779
|
+
@classmethod
|
|
780
|
+
def from_dict(cls, d: Dict[str, Any]) -> SnapshotConfig:
|
|
781
|
+
"""Deserializes the SnapshotConfig from a dictionary."""
|
|
782
|
+
return cls()
|
|
783
|
+
|
|
784
|
+
|
|
785
|
+
@dataclass
|
|
786
|
+
class TimeSeriesConfig:
|
|
787
|
+
"""Time series analysis configuration."""
|
|
788
|
+
|
|
789
|
+
timestamp_column: str
|
|
790
|
+
"""Column for the timestamp."""
|
|
791
|
+
|
|
792
|
+
granularities: List[AggregationGranularity]
|
|
793
|
+
"""List of granularities to use when aggregating data into time windows based on their timestamp."""
|
|
794
|
+
|
|
795
|
+
def as_dict(self) -> dict:
|
|
796
|
+
"""Serializes the TimeSeriesConfig into a dictionary suitable for use as a JSON request body."""
|
|
797
|
+
body = {}
|
|
798
|
+
if self.granularities:
|
|
799
|
+
body["granularities"] = [v.value for v in self.granularities]
|
|
800
|
+
if self.timestamp_column is not None:
|
|
801
|
+
body["timestamp_column"] = self.timestamp_column
|
|
802
|
+
return body
|
|
803
|
+
|
|
804
|
+
def as_shallow_dict(self) -> dict:
|
|
805
|
+
"""Serializes the TimeSeriesConfig into a shallow dictionary of its immediate attributes."""
|
|
806
|
+
body = {}
|
|
807
|
+
if self.granularities:
|
|
808
|
+
body["granularities"] = self.granularities
|
|
809
|
+
if self.timestamp_column is not None:
|
|
810
|
+
body["timestamp_column"] = self.timestamp_column
|
|
811
|
+
return body
|
|
812
|
+
|
|
813
|
+
@classmethod
|
|
814
|
+
def from_dict(cls, d: Dict[str, Any]) -> TimeSeriesConfig:
|
|
815
|
+
"""Deserializes the TimeSeriesConfig from a dictionary."""
|
|
816
|
+
return cls(
|
|
817
|
+
granularities=_repeated_enum(d, "granularities", AggregationGranularity),
|
|
818
|
+
timestamp_column=d.get("timestamp_column", None),
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
class DataQualityAPI:
|
|
823
|
+
"""Manage the data quality of Unity Catalog objects (currently support `schema` and `table`)"""
|
|
824
|
+
|
|
825
|
+
def __init__(self, api_client):
|
|
826
|
+
self._api = api_client
|
|
827
|
+
|
|
828
|
+
def cancel_refresh(self, object_type: str, object_id: str, refresh_id: int) -> CancelRefreshResponse:
|
|
829
|
+
"""Cancels a data quality monitor refresh. Currently only supported for the `table` `object_type`.
|
|
830
|
+
|
|
831
|
+
:param object_type: str
|
|
832
|
+
The type of the monitored object. Can be one of the following: `schema` or `table`.
|
|
833
|
+
:param object_id: str
|
|
834
|
+
The UUID of the request object. For example, schema id.
|
|
835
|
+
:param refresh_id: int
|
|
836
|
+
Unique id of the refresh operation.
|
|
837
|
+
|
|
838
|
+
:returns: :class:`CancelRefreshResponse`
|
|
839
|
+
"""
|
|
840
|
+
|
|
841
|
+
headers = {
|
|
842
|
+
"Accept": "application/json",
|
|
843
|
+
"Content-Type": "application/json",
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
res = self._api.do(
|
|
847
|
+
"POST",
|
|
848
|
+
f"/api/data-quality/v1/monitors/{object_type}/{object_id}/refreshes/{refresh_id}/cancel",
|
|
849
|
+
headers=headers,
|
|
850
|
+
)
|
|
851
|
+
return CancelRefreshResponse.from_dict(res)
|
|
852
|
+
|
|
853
|
+
def create_monitor(self, monitor: Monitor) -> Monitor:
|
|
854
|
+
"""Create a data quality monitor on a Unity Catalog object. The caller must provide either
|
|
855
|
+
`anomaly_detection_config` for a schema monitor or `data_profiling_config` for a table monitor.
|
|
856
|
+
|
|
857
|
+
For the `table` `object_type`, the caller must either: 1. be an owner of the table's parent catalog,
|
|
858
|
+
have **USE_SCHEMA** on the table's parent schema, and have **SELECT** access on the table 2. have
|
|
859
|
+
**USE_CATALOG** on the table's parent catalog, be an owner of the table's parent schema, and have
|
|
860
|
+
**SELECT** access on the table. 3. have the following permissions: - **USE_CATALOG** on the table's
|
|
861
|
+
parent catalog - **USE_SCHEMA** on the table's parent schema - be an owner of the table.
|
|
862
|
+
|
|
863
|
+
Workspace assets, such as the dashboard, will be created in the workspace where this call was made.
|
|
864
|
+
|
|
865
|
+
:param monitor: :class:`Monitor`
|
|
866
|
+
The monitor to create.
|
|
867
|
+
|
|
868
|
+
:returns: :class:`Monitor`
|
|
869
|
+
"""
|
|
870
|
+
body = monitor.as_dict()
|
|
871
|
+
headers = {
|
|
872
|
+
"Accept": "application/json",
|
|
873
|
+
"Content-Type": "application/json",
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
res = self._api.do("POST", "/api/data-quality/v1/monitors", body=body, headers=headers)
|
|
877
|
+
return Monitor.from_dict(res)
|
|
878
|
+
|
|
879
|
+
def create_refresh(self, object_type: str, object_id: str, refresh: Refresh) -> Refresh:
|
|
880
|
+
"""Creates a refresh. Currently only supported for the `table` `object_type`.
|
|
881
|
+
|
|
882
|
+
The caller must either: 1. be an owner of the table's parent catalog 2. have **USE_CATALOG** on the
|
|
883
|
+
table's parent catalog and be an owner of the table's parent schema 3. have the following permissions:
|
|
884
|
+
- **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an
|
|
885
|
+
owner of the table
|
|
886
|
+
|
|
887
|
+
:param object_type: str
|
|
888
|
+
The type of the monitored object. Can be one of the following: `schema`or `table`.
|
|
889
|
+
:param object_id: str
|
|
890
|
+
The UUID of the request object. For example, table id.
|
|
891
|
+
:param refresh: :class:`Refresh`
|
|
892
|
+
The refresh to create
|
|
893
|
+
|
|
894
|
+
:returns: :class:`Refresh`
|
|
895
|
+
"""
|
|
896
|
+
body = refresh.as_dict()
|
|
897
|
+
headers = {
|
|
898
|
+
"Accept": "application/json",
|
|
899
|
+
"Content-Type": "application/json",
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
res = self._api.do(
|
|
903
|
+
"POST", f"/api/data-quality/v1/monitors/{object_type}/{object_id}/refreshes", body=body, headers=headers
|
|
904
|
+
)
|
|
905
|
+
return Refresh.from_dict(res)
|
|
906
|
+
|
|
907
|
+
def delete_monitor(self, object_type: str, object_id: str):
|
|
908
|
+
"""Delete a data quality monitor on Unity Catalog object.
|
|
909
|
+
|
|
910
|
+
For the `table` `object_type`, the caller must either: 1. be an owner of the table's parent catalog 2.
|
|
911
|
+
have **USE_CATALOG** on the table's parent catalog and be an owner of the table's parent schema 3.
|
|
912
|
+
have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on
|
|
913
|
+
the table's parent schema - be an owner of the table.
|
|
914
|
+
|
|
915
|
+
Note that the metric tables and dashboard will not be deleted as part of this call; those assets must
|
|
916
|
+
be manually cleaned up (if desired).
|
|
917
|
+
|
|
918
|
+
:param object_type: str
|
|
919
|
+
The type of the monitored object. Can be one of the following: `schema` or `table`.
|
|
920
|
+
:param object_id: str
|
|
921
|
+
The UUID of the request object. For example, schema id.
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
"""
|
|
925
|
+
|
|
926
|
+
headers = {
|
|
927
|
+
"Accept": "application/json",
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
self._api.do("DELETE", f"/api/data-quality/v1/monitors/{object_type}/{object_id}", headers=headers)
|
|
931
|
+
|
|
932
|
+
def delete_refresh(self, object_type: str, object_id: str, refresh_id: int):
|
|
933
|
+
"""(Unimplemented) Delete a refresh
|
|
934
|
+
|
|
935
|
+
:param object_type: str
|
|
936
|
+
The type of the monitored object. Can be one of the following: `schema` or `table`.
|
|
937
|
+
:param object_id: str
|
|
938
|
+
The UUID of the request object. For example, schema id.
|
|
939
|
+
:param refresh_id: int
|
|
940
|
+
Unique id of the refresh operation.
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
"""
|
|
944
|
+
|
|
945
|
+
headers = {
|
|
946
|
+
"Accept": "application/json",
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
self._api.do(
|
|
950
|
+
"DELETE", f"/api/data-quality/v1/monitors/{object_type}/{object_id}/refreshes/{refresh_id}", headers=headers
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
def get_monitor(self, object_type: str, object_id: str) -> Monitor:
|
|
954
|
+
"""Read a data quality monitor on Unity Catalog object.
|
|
955
|
+
|
|
956
|
+
For the `table` `object_type`, the caller must either: 1. be an owner of the table's parent catalog 2.
|
|
957
|
+
have **USE_CATALOG** on the table's parent catalog and be an owner of the table's parent schema. 3.
|
|
958
|
+
have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on
|
|
959
|
+
the table's parent schema - **SELECT** privilege on the table.
|
|
960
|
+
|
|
961
|
+
The returned information includes configuration values, as well as information on assets created by
|
|
962
|
+
the monitor. Some information (e.g., dashboard) may be filtered out if the caller is in a different
|
|
963
|
+
workspace than where the monitor was created.
|
|
964
|
+
|
|
965
|
+
:param object_type: str
|
|
966
|
+
The type of the monitored object. Can be one of the following: `schema` or `table`.
|
|
967
|
+
:param object_id: str
|
|
968
|
+
The UUID of the request object. For example, schema id.
|
|
969
|
+
|
|
970
|
+
:returns: :class:`Monitor`
|
|
971
|
+
"""
|
|
972
|
+
|
|
973
|
+
headers = {
|
|
974
|
+
"Accept": "application/json",
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
res = self._api.do("GET", f"/api/data-quality/v1/monitors/{object_type}/{object_id}", headers=headers)
|
|
978
|
+
return Monitor.from_dict(res)
|
|
979
|
+
|
|
980
|
+
def get_refresh(self, object_type: str, object_id: str, refresh_id: int) -> Refresh:
|
|
981
|
+
"""Get data quality monitor refresh.
|
|
982
|
+
|
|
983
|
+
For the `table` `object_type`, the caller must either: 1. be an owner of the table's parent catalog 2.
|
|
984
|
+
have **USE_CATALOG** on the table's parent catalog and be an owner of the table's parent schema 3.
|
|
985
|
+
have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on
|
|
986
|
+
the table's parent schema - **SELECT** privilege on the table.
|
|
987
|
+
|
|
988
|
+
:param object_type: str
|
|
989
|
+
The type of the monitored object. Can be one of the following: `schema` or `table`.
|
|
990
|
+
:param object_id: str
|
|
991
|
+
The UUID of the request object. For example, schema id.
|
|
992
|
+
:param refresh_id: int
|
|
993
|
+
Unique id of the refresh operation.
|
|
994
|
+
|
|
995
|
+
:returns: :class:`Refresh`
|
|
996
|
+
"""
|
|
997
|
+
|
|
998
|
+
headers = {
|
|
999
|
+
"Accept": "application/json",
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
res = self._api.do(
|
|
1003
|
+
"GET", f"/api/data-quality/v1/monitors/{object_type}/{object_id}/refreshes/{refresh_id}", headers=headers
|
|
1004
|
+
)
|
|
1005
|
+
return Refresh.from_dict(res)
|
|
1006
|
+
|
|
1007
|
+
def list_monitor(self, *, page_size: Optional[int] = None, page_token: Optional[str] = None) -> Iterator[Monitor]:
|
|
1008
|
+
"""(Unimplemented) List data quality monitors.
|
|
1009
|
+
|
|
1010
|
+
:param page_size: int (optional)
|
|
1011
|
+
:param page_token: str (optional)
|
|
1012
|
+
|
|
1013
|
+
:returns: Iterator over :class:`Monitor`
|
|
1014
|
+
"""
|
|
1015
|
+
|
|
1016
|
+
query = {}
|
|
1017
|
+
if page_size is not None:
|
|
1018
|
+
query["page_size"] = page_size
|
|
1019
|
+
if page_token is not None:
|
|
1020
|
+
query["page_token"] = page_token
|
|
1021
|
+
headers = {
|
|
1022
|
+
"Accept": "application/json",
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
while True:
|
|
1026
|
+
json = self._api.do("GET", "/api/data-quality/v1/monitors", query=query, headers=headers)
|
|
1027
|
+
if "monitors" in json:
|
|
1028
|
+
for v in json["monitors"]:
|
|
1029
|
+
yield Monitor.from_dict(v)
|
|
1030
|
+
if "next_page_token" not in json or not json["next_page_token"]:
|
|
1031
|
+
return
|
|
1032
|
+
query["page_token"] = json["next_page_token"]
|
|
1033
|
+
|
|
1034
|
+
def list_refresh(
|
|
1035
|
+
self, object_type: str, object_id: str, *, page_size: Optional[int] = None, page_token: Optional[str] = None
|
|
1036
|
+
) -> Iterator[Refresh]:
|
|
1037
|
+
"""List data quality monitor refreshes.
|
|
1038
|
+
|
|
1039
|
+
For the `table` `object_type`, the caller must either: 1. be an owner of the table's parent catalog 2.
|
|
1040
|
+
have **USE_CATALOG** on the table's parent catalog and be an owner of the table's parent schema 3.
|
|
1041
|
+
have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on
|
|
1042
|
+
the table's parent schema - **SELECT** privilege on the table.
|
|
1043
|
+
|
|
1044
|
+
:param object_type: str
|
|
1045
|
+
The type of the monitored object. Can be one of the following: `schema` or `table`.
|
|
1046
|
+
:param object_id: str
|
|
1047
|
+
The UUID of the request object. For example, schema id.
|
|
1048
|
+
:param page_size: int (optional)
|
|
1049
|
+
:param page_token: str (optional)
|
|
1050
|
+
|
|
1051
|
+
:returns: Iterator over :class:`Refresh`
|
|
1052
|
+
"""
|
|
1053
|
+
|
|
1054
|
+
query = {}
|
|
1055
|
+
if page_size is not None:
|
|
1056
|
+
query["page_size"] = page_size
|
|
1057
|
+
if page_token is not None:
|
|
1058
|
+
query["page_token"] = page_token
|
|
1059
|
+
headers = {
|
|
1060
|
+
"Accept": "application/json",
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
while True:
|
|
1064
|
+
json = self._api.do(
|
|
1065
|
+
"GET",
|
|
1066
|
+
f"/api/data-quality/v1/monitors/{object_type}/{object_id}/refreshes",
|
|
1067
|
+
query=query,
|
|
1068
|
+
headers=headers,
|
|
1069
|
+
)
|
|
1070
|
+
if "refreshes" in json:
|
|
1071
|
+
for v in json["refreshes"]:
|
|
1072
|
+
yield Refresh.from_dict(v)
|
|
1073
|
+
if "next_page_token" not in json or not json["next_page_token"]:
|
|
1074
|
+
return
|
|
1075
|
+
query["page_token"] = json["next_page_token"]
|
|
1076
|
+
|
|
1077
|
+
def update_monitor(self, object_type: str, object_id: str, monitor: Monitor, update_mask: str) -> Monitor:
|
|
1078
|
+
"""Update a data quality monitor on Unity Catalog object.
|
|
1079
|
+
|
|
1080
|
+
For the `table` `object_type`, The caller must either: 1. be an owner of the table's parent catalog 2.
|
|
1081
|
+
have **USE_CATALOG** on the table's parent catalog and be an owner of the table's parent schema 3.
|
|
1082
|
+
have the following permissions: - **USE_CATALOG** on the table's parent catalog - **USE_SCHEMA** on
|
|
1083
|
+
the table's parent schema - be an owner of the table.
|
|
1084
|
+
|
|
1085
|
+
:param object_type: str
|
|
1086
|
+
The type of the monitored object. Can be one of the following: `schema` or `table`.
|
|
1087
|
+
:param object_id: str
|
|
1088
|
+
The UUID of the request object. For example, schema id.
|
|
1089
|
+
:param monitor: :class:`Monitor`
|
|
1090
|
+
The monitor to update.
|
|
1091
|
+
:param update_mask: str
|
|
1092
|
+
The field mask to specify which fields to update as a comma-separated list. Example value:
|
|
1093
|
+
`data_profiling_config.custom_metrics,data_profiling_config.schedule.quartz_cron_expression`
|
|
1094
|
+
|
|
1095
|
+
:returns: :class:`Monitor`
|
|
1096
|
+
"""
|
|
1097
|
+
body = monitor.as_dict()
|
|
1098
|
+
query = {}
|
|
1099
|
+
if update_mask is not None:
|
|
1100
|
+
query["update_mask"] = update_mask
|
|
1101
|
+
headers = {
|
|
1102
|
+
"Accept": "application/json",
|
|
1103
|
+
"Content-Type": "application/json",
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
res = self._api.do(
|
|
1107
|
+
"PATCH", f"/api/data-quality/v1/monitors/{object_type}/{object_id}", query=query, body=body, headers=headers
|
|
1108
|
+
)
|
|
1109
|
+
return Monitor.from_dict(res)
|
|
1110
|
+
|
|
1111
|
+
def update_refresh(
|
|
1112
|
+
self, object_type: str, object_id: str, refresh_id: int, refresh: Refresh, update_mask: str
|
|
1113
|
+
) -> Refresh:
|
|
1114
|
+
"""(Unimplemented) Update a refresh
|
|
1115
|
+
|
|
1116
|
+
:param object_type: str
|
|
1117
|
+
The type of the monitored object. Can be one of the following: `schema` or `table`.
|
|
1118
|
+
:param object_id: str
|
|
1119
|
+
The UUID of the request object. For example, schema id.
|
|
1120
|
+
:param refresh_id: int
|
|
1121
|
+
Unique id of the refresh operation.
|
|
1122
|
+
:param refresh: :class:`Refresh`
|
|
1123
|
+
The refresh to update.
|
|
1124
|
+
:param update_mask: str
|
|
1125
|
+
The field mask to specify which fields to update.
|
|
1126
|
+
|
|
1127
|
+
:returns: :class:`Refresh`
|
|
1128
|
+
"""
|
|
1129
|
+
body = refresh.as_dict()
|
|
1130
|
+
query = {}
|
|
1131
|
+
if update_mask is not None:
|
|
1132
|
+
query["update_mask"] = update_mask
|
|
1133
|
+
headers = {
|
|
1134
|
+
"Accept": "application/json",
|
|
1135
|
+
"Content-Type": "application/json",
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
res = self._api.do(
|
|
1139
|
+
"PATCH",
|
|
1140
|
+
f"/api/data-quality/v1/monitors/{object_type}/{object_id}/refreshes/{refresh_id}",
|
|
1141
|
+
query=query,
|
|
1142
|
+
body=body,
|
|
1143
|
+
headers=headers,
|
|
1144
|
+
)
|
|
1145
|
+
return Refresh.from_dict(res)
|