dasl-client 1.0.23__tar.gz → 1.0.24__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dasl-client might be problematic. Click here for more details.
- {dasl_client-1.0.23 → dasl_client-1.0.24}/PKG-INFO +7 -23
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/__init__.py +1 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/client.py +240 -23
- dasl_client-1.0.24/dasl_client/exec_rule.py +92 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/helpers.py +1 -1
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/preset_development/errors.py +42 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/preset_development/preview_engine.py +106 -25
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/preset_development/preview_parameters.py +206 -94
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/preset_development/stage.py +87 -24
- dasl_client-1.0.24/dasl_client/regions.json +3 -0
- dasl_client-1.0.24/dasl_client/regions.py +18 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/datasource.py +51 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/rule.py +33 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/workspace_config.py +13 -1
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client.egg-info/PKG-INFO +7 -23
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client.egg-info/SOURCES.txt +4 -9
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client.egg-info/requires.txt +1 -1
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client.egg-info/top_level.txt +0 -1
- {dasl_client-1.0.23 → dasl_client-1.0.24}/pyproject.toml +9 -3
- dasl_client-1.0.23/README.md +0 -19
- dasl_client-1.0.23/test/__init__.py +0 -0
- dasl_client-1.0.23/test/conftest.py +0 -18
- dasl_client-1.0.23/test/constants.py +0 -10
- dasl_client-1.0.23/test/test_api_changes.py +0 -137
- dasl_client-1.0.23/test/test_api_surface.py +0 -306
- dasl_client-1.0.23/test/test_databricks_secret_auth.py +0 -119
- dasl_client-1.0.23/test/test_marshaling.py +0 -921
- {dasl_client-1.0.23 → dasl_client-1.0.24}/LICENSE +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/auth/__init__.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/auth/auth.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/conn/__init__.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/conn/client_identifier.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/conn/conn.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/errors/__init__.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/errors/errors.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/preset_development/__init__.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/__init__.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/admin_config.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/content.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/dbui.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/helpers.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client/types/types.py +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/dasl_client.egg-info/dependency_links.txt +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/setup.cfg +0 -0
- {dasl_client-1.0.23 → dasl_client-1.0.24}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: dasl_client
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.24
|
|
4
4
|
Summary: The DASL client library used for interacting with the DASL workspace
|
|
5
5
|
Home-page: https://github.com/antimatter/asl
|
|
6
6
|
Author: Antimatter Team
|
|
@@ -8,27 +8,11 @@ Author-email: Antimatter Team <support@antimatter.io>
|
|
|
8
8
|
Requires-Python: >=3.8
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
|
11
|
-
Requires-Dist: dasl_api==0.1.
|
|
11
|
+
Requires-Dist: dasl_api==0.1.24
|
|
12
12
|
Requires-Dist: databricks-sdk>=0.41.0
|
|
13
13
|
Requires-Dist: pydantic>=2
|
|
14
14
|
Requires-Dist: typing_extensions>=4.10.0
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
## Requirements
|
|
21
|
-
Python:
|
|
22
|
-
- wheel
|
|
23
|
-
- setuptools
|
|
24
|
-
- asl_api
|
|
25
|
-
|
|
26
|
-
Other:
|
|
27
|
-
- Earthly
|
|
28
|
-
|
|
29
|
-
## Build
|
|
30
|
-
|
|
31
|
-
To build manually here:
|
|
32
|
-
```bash
|
|
33
|
-
python setup.py sdist bdist_wheel
|
|
34
|
-
```
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: home-page
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
Dynamic: requires-python
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from copy import deepcopy
|
|
2
|
-
from datetime import datetime, timedelta
|
|
2
|
+
from datetime import datetime, timedelta, timezone
|
|
3
3
|
from time import sleep
|
|
4
4
|
from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar
|
|
5
5
|
from pydantic import Field
|
|
@@ -8,6 +8,14 @@ from pyspark.sql import DataFrame
|
|
|
8
8
|
from dasl_api import (
|
|
9
9
|
CoreV1Api,
|
|
10
10
|
DbuiV1Api,
|
|
11
|
+
DbuiV1QueryGenerateRequest,
|
|
12
|
+
DbuiV1QueryGenerateRequestTimeRange,
|
|
13
|
+
DbuiV1QueryGenerateStatus,
|
|
14
|
+
DbuiV1QueryLookupRequest,
|
|
15
|
+
DbuiV1QueryLookupResult,
|
|
16
|
+
DbuiV1QueryHistogramRequest,
|
|
17
|
+
DbuiV1QueryHistogramResult,
|
|
18
|
+
DbuiV1QueryLookupRequestPagination,
|
|
11
19
|
ContentV1Api,
|
|
12
20
|
WorkspaceV1Api,
|
|
13
21
|
WorkspaceV1CreateWorkspaceRequest,
|
|
@@ -22,6 +30,8 @@ from dasl_client.auth.auth import (
|
|
|
22
30
|
from dasl_client.conn.conn import get_base_conn
|
|
23
31
|
from dasl_client.errors.errors import ConflictError, error_handler
|
|
24
32
|
from .helpers import Helpers
|
|
33
|
+
from .exec_rule import ExecRule
|
|
34
|
+
from .regions import Regions
|
|
25
35
|
|
|
26
36
|
from .types import (
|
|
27
37
|
AdminConfig,
|
|
@@ -67,7 +77,8 @@ class Client:
|
|
|
67
77
|
service_principal_id: str,
|
|
68
78
|
service_principal_secret: str,
|
|
69
79
|
workspace_url: Optional[str] = None,
|
|
70
|
-
|
|
80
|
+
region: str = Helpers.default_region,
|
|
81
|
+
dasl_host: Optional[str] = None,
|
|
71
82
|
) -> "Client":
|
|
72
83
|
"""
|
|
73
84
|
Register a new workspace and return a client for it.
|
|
@@ -85,11 +96,15 @@ class Client:
|
|
|
85
96
|
being registered. If you omit this value, it will be inferred
|
|
86
97
|
if you are running within a Databricks notebook. Otherwise, an
|
|
87
98
|
exception will be raised.
|
|
99
|
+
:param region: The name of the DASL region.
|
|
88
100
|
:param dasl_host: The URL of the DASL server. This value should
|
|
89
|
-
not generally be specified
|
|
90
|
-
|
|
101
|
+
not generally be specified. When specified, this value
|
|
102
|
+
overrides region.
|
|
91
103
|
:returns: Client for the newly created workspace.
|
|
92
104
|
"""
|
|
105
|
+
if dasl_host is None:
|
|
106
|
+
dasl_host = Regions.lookup(region)
|
|
107
|
+
|
|
93
108
|
with error_handler():
|
|
94
109
|
if workspace_url is None:
|
|
95
110
|
workspace_url = Helpers.current_workspace_url()
|
|
@@ -120,7 +135,8 @@ class Client:
|
|
|
120
135
|
def for_workspace(
|
|
121
136
|
workspace_url: Optional[str] = None,
|
|
122
137
|
service_account_token: Optional[str] = None,
|
|
123
|
-
|
|
138
|
+
region: str = Helpers.default_region,
|
|
139
|
+
dasl_host: Optional[str] = None,
|
|
124
140
|
) -> "Client":
|
|
125
141
|
"""
|
|
126
142
|
Create a client for the argument workspace, if specified, or
|
|
@@ -133,11 +149,15 @@ class Client:
|
|
|
133
149
|
:param service_account_token: Antimatter service account token.
|
|
134
150
|
If provided, the client will use this token for auth instead
|
|
135
151
|
of (automatic) secret-based auth.
|
|
152
|
+
:param region: The name of the DASL region.
|
|
136
153
|
:param dasl_host: The URL of the DASL server. This value should
|
|
137
|
-
not generally be specified
|
|
138
|
-
|
|
154
|
+
not generally be specified. When specified, this value
|
|
155
|
+
overrides region.
|
|
139
156
|
:returns: Client for the existing workspace.
|
|
140
157
|
"""
|
|
158
|
+
if dasl_host is None:
|
|
159
|
+
dasl_host = Regions.lookup(region)
|
|
160
|
+
|
|
141
161
|
with error_handler():
|
|
142
162
|
if workspace_url is None:
|
|
143
163
|
workspace_url = Helpers.current_workspace_url()
|
|
@@ -166,7 +186,8 @@ class Client:
|
|
|
166
186
|
service_principal_secret: str,
|
|
167
187
|
workspace_url: Optional[str] = None,
|
|
168
188
|
service_account_token: Optional[str] = None,
|
|
169
|
-
|
|
189
|
+
region: str = Helpers.default_region,
|
|
190
|
+
dasl_host: Optional[str] = None,
|
|
170
191
|
) -> "Client":
|
|
171
192
|
"""
|
|
172
193
|
Initialize a new client for the workspace associated with the
|
|
@@ -197,9 +218,10 @@ class Client:
|
|
|
197
218
|
If provided, the client will use this token for auth instead
|
|
198
219
|
of (automatic) secret-based auth. Ignored if the workspace
|
|
199
220
|
doesn't exist.
|
|
221
|
+
:param region: The name of the DASL region.
|
|
200
222
|
:param dasl_host: The URL of the DASL server. This value should
|
|
201
|
-
not generally be specified
|
|
202
|
-
|
|
223
|
+
not generally be specified. When specified, this value
|
|
224
|
+
overrides region.
|
|
203
225
|
:returns: Client for the newly created or existing workspace.
|
|
204
226
|
"""
|
|
205
227
|
try:
|
|
@@ -209,11 +231,12 @@ class Client:
|
|
|
209
231
|
service_principal_id,
|
|
210
232
|
service_principal_secret,
|
|
211
233
|
workspace_url,
|
|
234
|
+
region,
|
|
212
235
|
dasl_host,
|
|
213
236
|
)
|
|
214
237
|
except ConflictError:
|
|
215
238
|
result = Client.for_workspace(
|
|
216
|
-
workspace_url, service_account_token, dasl_host
|
|
239
|
+
workspace_url, service_account_token, region, dasl_host
|
|
217
240
|
)
|
|
218
241
|
result.put_admin_config(
|
|
219
242
|
AdminConfig(
|
|
@@ -572,21 +595,27 @@ class Client:
|
|
|
572
595
|
return Rule.from_api_obj(result)
|
|
573
596
|
|
|
574
597
|
def exec_rule(
|
|
575
|
-
self,
|
|
576
|
-
|
|
598
|
+
self,
|
|
599
|
+
spark,
|
|
600
|
+
rule_in: Rule,
|
|
601
|
+
) -> ExecRule:
|
|
577
602
|
"""
|
|
578
603
|
Locally execute a Rule. Must be run from within a Databricks
|
|
579
604
|
notebook or else an exception will be raised. This is intended
|
|
580
605
|
to facilitate Rule development.
|
|
581
606
|
|
|
607
|
+
:param spark: Spark context from Databricks notebook. Will be
|
|
608
|
+
injected into the execution environment for use by the
|
|
609
|
+
Rule notebook.
|
|
582
610
|
:param rule_in: The specification of the Rule to execute.
|
|
583
|
-
:
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
611
|
+
:returns ExecRule: A class containing various information and
|
|
612
|
+
functionality relating to the execution. See the docs for
|
|
613
|
+
ExecRule for additional details, but note that you must
|
|
614
|
+
call its cleanup function or tables created just for this
|
|
615
|
+
request will leak.
|
|
588
616
|
"""
|
|
589
617
|
Helpers.ensure_databricks()
|
|
618
|
+
|
|
590
619
|
with error_handler():
|
|
591
620
|
result = self._core_client().core_v1_render_rule(
|
|
592
621
|
self._workspace(),
|
|
@@ -602,9 +631,8 @@ class Client:
|
|
|
602
631
|
f"%pip install {result.notebook_utils_path}"
|
|
603
632
|
)
|
|
604
633
|
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
return namespace["generate"](df)
|
|
634
|
+
exec(result.content, {"spark": spark})
|
|
635
|
+
return ExecRule(spark, result.tables)
|
|
608
636
|
|
|
609
637
|
def adhoc_transform(
|
|
610
638
|
self,
|
|
@@ -637,9 +665,9 @@ class Client:
|
|
|
637
665
|
self._workspace(), status.id
|
|
638
666
|
)
|
|
639
667
|
|
|
640
|
-
if status.status == "
|
|
668
|
+
if status.status == "failed":
|
|
641
669
|
raise Exception(f"adhoc transform failed with {status.error}")
|
|
642
|
-
elif status.status == "
|
|
670
|
+
elif status.status == "succeeded":
|
|
643
671
|
return TransformResponse.from_api_obj(status.result)
|
|
644
672
|
|
|
645
673
|
raise Exception("timed out waiting for adhoc transform result")
|
|
@@ -711,3 +739,192 @@ class Client:
|
|
|
711
739
|
"""
|
|
712
740
|
with error_handler():
|
|
713
741
|
self._content_client().content_v1_preset_purge_cache(self._workspace())
|
|
742
|
+
|
|
743
|
+
def generate_query(
|
|
744
|
+
self,
|
|
745
|
+
sql: str,
|
|
746
|
+
warehouse: Optional[str] = None,
|
|
747
|
+
start_date: Optional[str] = None,
|
|
748
|
+
end_date: Optional[str] = None,
|
|
749
|
+
) -> str:
|
|
750
|
+
"""
|
|
751
|
+
Generate a query from the given SQL.
|
|
752
|
+
|
|
753
|
+
:param sql: The SQL to use to create the query data set.
|
|
754
|
+
:param warehouse: The SQL warehouse use to execute the SQL. If
|
|
755
|
+
omitted, the default SQL warehouse specified in the workspace
|
|
756
|
+
config will be used.
|
|
757
|
+
:param start_date: The optional starting date to filter by for
|
|
758
|
+
the provided sql used to create the data set. Only rows with
|
|
759
|
+
their time column (see the time_col parameter) greater than
|
|
760
|
+
or equal to this value will be included in the data set. You
|
|
761
|
+
must specify a value for this parameter if you wish to filter
|
|
762
|
+
by time. Valid values include actual timestamps and computed
|
|
763
|
+
timestamps (such as now()).
|
|
764
|
+
:param end_date: The optional ending date to filter by for the
|
|
765
|
+
provided sql used to create the data set. The same caveats
|
|
766
|
+
apply as with the start_time parameter. However, this parameter
|
|
767
|
+
is not required and if omitted when a start_date is provided,
|
|
768
|
+
the current date will be used.
|
|
769
|
+
:returns str: The ID of the query generation operation. This value
|
|
770
|
+
can be used with get_query_status to track the progress of
|
|
771
|
+
the generation process, and eventually to perform lookups
|
|
772
|
+
on the completed query.
|
|
773
|
+
"""
|
|
774
|
+
time_range = None
|
|
775
|
+
if start_date is not None or end_date is not None:
|
|
776
|
+
time_range = DbuiV1QueryGenerateRequestTimeRange(
|
|
777
|
+
startDate=start_date,
|
|
778
|
+
endDate=end_date,
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
req = DbuiV1QueryGenerateRequest(
|
|
782
|
+
warehouse=warehouse,
|
|
783
|
+
sql=sql,
|
|
784
|
+
timeRange=time_range,
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
with error_handler():
|
|
788
|
+
return (
|
|
789
|
+
self._dbui_client()
|
|
790
|
+
.dbui_v1_query_generate(
|
|
791
|
+
self._workspace(),
|
|
792
|
+
req,
|
|
793
|
+
)
|
|
794
|
+
.id
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
def get_query_status(
|
|
798
|
+
self,
|
|
799
|
+
id: str,
|
|
800
|
+
) -> DbuiV1QueryGenerateStatus:
|
|
801
|
+
"""
|
|
802
|
+
Check the status of a query generation operation. Since generation
|
|
803
|
+
happens in the background, it is up to the caller to check the
|
|
804
|
+
status until the return value's status member is either equal to
|
|
805
|
+
"succeeded" or "failed".
|
|
806
|
+
|
|
807
|
+
:param id: The id of the query generation operation.
|
|
808
|
+
:returns DbuiV1QueryGenerateStatus: The imporant field is
|
|
809
|
+
status (as used in the example code).
|
|
810
|
+
|
|
811
|
+
The following example demonstrates usage of the API.
|
|
812
|
+
|
|
813
|
+
Example:
|
|
814
|
+
id = client.generate_query("SELECT now() as time")
|
|
815
|
+
result = None
|
|
816
|
+
while True:
|
|
817
|
+
time.sleep(3)
|
|
818
|
+
status = client.get_query_status(id)
|
|
819
|
+
if status.status == "failed":
|
|
820
|
+
raise Exception("query failed")
|
|
821
|
+
if status.status == "succeeded":
|
|
822
|
+
break
|
|
823
|
+
"""
|
|
824
|
+
with error_handler():
|
|
825
|
+
return self._dbui_client().dbui_v1_query_generate_status(
|
|
826
|
+
self._workspace(),
|
|
827
|
+
id,
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
def query_lookup(
|
|
831
|
+
self,
|
|
832
|
+
id: str,
|
|
833
|
+
warehouse: Optional[str] = None,
|
|
834
|
+
pagination: Optional[DbuiV1QueryLookupRequestPagination] = None,
|
|
835
|
+
start_value: Optional[str] = None,
|
|
836
|
+
row_count: Optional[int] = None,
|
|
837
|
+
refinements: Optional[List[str]] = None,
|
|
838
|
+
) -> DbuiV1QueryLookupResult:
|
|
839
|
+
"""
|
|
840
|
+
Perform a lookup on a query, which applies refinements to the
|
|
841
|
+
query and returns the results.
|
|
842
|
+
|
|
843
|
+
:param id: The query ID returned from query_generate and
|
|
844
|
+
get_query_status.
|
|
845
|
+
:param warehouse: The optional SQL warehouse ID to use to compute
|
|
846
|
+
the results. If not specified, uses the default SQL warehouse
|
|
847
|
+
configured for the workspace.
|
|
848
|
+
:param pagination: A sequence of fields and a direction that can
|
|
849
|
+
be applied to a lookup request. If 'fetchPreceding' is true,
|
|
850
|
+
the prior n rows up to the first row that matches the provided
|
|
851
|
+
fields will be returned. Otherwise, the n rows following the
|
|
852
|
+
first row that matches the provided fields will be returned.
|
|
853
|
+
:param start_value: An optional start value to constrain the data
|
|
854
|
+
being returned. This will be applied to the primary ordering
|
|
855
|
+
column if provided, before any refinements.
|
|
856
|
+
:param row_count: The maximum number of rows to include in a page.
|
|
857
|
+
Defaults to 1000, and must be in the range [1,1000].
|
|
858
|
+
:param refinements: Pipeline filters to be applied to the result.
|
|
859
|
+
Any SQL which is valid as a pipeline stage (i.e. coming between
|
|
860
|
+
|> symbols) is valid here, such as ORDER BY id, or WHERE
|
|
861
|
+
column = 'value'.
|
|
862
|
+
"""
|
|
863
|
+
with error_handler():
|
|
864
|
+
return self._dbui_client().dbui_v1_query_lookup(
|
|
865
|
+
self._workspace(),
|
|
866
|
+
id,
|
|
867
|
+
DbuiV1QueryLookupRequest(
|
|
868
|
+
warehouse=warehouse,
|
|
869
|
+
startValue=start_value,
|
|
870
|
+
pagination=pagination,
|
|
871
|
+
rowCount=row_count,
|
|
872
|
+
refinements=refinements,
|
|
873
|
+
),
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
def query_histogram(
|
|
877
|
+
self,
|
|
878
|
+
id: str,
|
|
879
|
+
interval: str,
|
|
880
|
+
warehouse: Optional[str] = None,
|
|
881
|
+
start_date: str = None,
|
|
882
|
+
end_date: Optional[str] = None,
|
|
883
|
+
refinements: Optional[List[str]] = None,
|
|
884
|
+
) -> DbuiV1QueryHistogramResult:
|
|
885
|
+
"""
|
|
886
|
+
Perform a lookup on a query, which applies refinements to the
|
|
887
|
+
query and returns the results.
|
|
888
|
+
|
|
889
|
+
:param id: The query ID returned from query_generate and
|
|
890
|
+
get_query_status.
|
|
891
|
+
:param warehouse: The optional SQL warehouse ID to use to compute
|
|
892
|
+
the results. If not specified, uses the default SQL warehouse
|
|
893
|
+
configured for the workspace.
|
|
894
|
+
:param start_date: The start date filter. The resulting frequency
|
|
895
|
+
map will be restricted to rows where the time column value
|
|
896
|
+
is greater than or equal to this value. Valid values include
|
|
897
|
+
literal timestamps and function calls such as now().
|
|
898
|
+
:param end_date: The optional end date filter. If specified, the
|
|
899
|
+
resulting frequency map will contain only rows where the time
|
|
900
|
+
column value is less than or equal to this value.
|
|
901
|
+
:param interval: The duration of each interval in the resulting
|
|
902
|
+
frequency map. This must be an interval string in the format:
|
|
903
|
+
'1 day', '3 minutes 2 seconds', '2 weeks'.
|
|
904
|
+
:param refinements: Pipeline filters to be applied to the result.
|
|
905
|
+
Any SQL which is valid as a pipeline stage (i.e. coming between
|
|
906
|
+
|> symbols) is valid here, such as ORDER BY id, or WHERE
|
|
907
|
+
column = 'value'.
|
|
908
|
+
"""
|
|
909
|
+
with error_handler():
|
|
910
|
+
return self._dbui_client().dbui_v1_query_histogram(
|
|
911
|
+
self._workspace(),
|
|
912
|
+
id,
|
|
913
|
+
DbuiV1QueryHistogramRequest(
|
|
914
|
+
warehouse=warehouse,
|
|
915
|
+
startDate=start_date,
|
|
916
|
+
endDate=end_date,
|
|
917
|
+
interval=interval,
|
|
918
|
+
refinements=refinements,
|
|
919
|
+
),
|
|
920
|
+
)
|
|
921
|
+
|
|
922
|
+
def query_cancel(self, id: str) -> None:
|
|
923
|
+
"""
|
|
924
|
+
Cancel an existing query.
|
|
925
|
+
|
|
926
|
+
:param id: The query ID returned from query_generate and
|
|
927
|
+
get_query_status.
|
|
928
|
+
"""
|
|
929
|
+
with error_handler():
|
|
930
|
+
return self._dbui_client().dbui_v1_query_cancel(self._workspace(), id)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from pyspark.sql import DataFrame
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from dasl_api import CoreV1RenderedRuleTables
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ExecRule:
|
|
8
|
+
"""
|
|
9
|
+
ExecRule result object allowing access to and clean up of tables
|
|
10
|
+
created as part of the rule rendering endpoint. While the table
|
|
11
|
+
names are exposed as attributes, there are helper functions for
|
|
12
|
+
fetching the contents of the most common tables (i.e. notables
|
|
13
|
+
and observables). In general, it won't be necessary to access
|
|
14
|
+
these attributes. Note that you must call the cleanup function
|
|
15
|
+
when you are done with an instance of this class or else tables
|
|
16
|
+
created as part of rendering and running the rule will be left
|
|
17
|
+
orphaned in your workspace.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
notables_table (str):
|
|
21
|
+
name of table where notables for the rule execution
|
|
22
|
+
can be found.
|
|
23
|
+
observables_table (str):
|
|
24
|
+
name of table where observables for the rule execution
|
|
25
|
+
can be found.
|
|
26
|
+
opals_table (str):
|
|
27
|
+
name of table where operational alerts for the rule execution
|
|
28
|
+
can be found.
|
|
29
|
+
data_metrics_table (str):
|
|
30
|
+
name of table where rule metrics for the rule execution
|
|
31
|
+
can be found.
|
|
32
|
+
stream_metrics_table (str):
|
|
33
|
+
name of table where stream metrics for the rule execution
|
|
34
|
+
can be found.
|
|
35
|
+
observables_acc_table (str):
|
|
36
|
+
name of table where observables aggregation for the rule
|
|
37
|
+
execution can be found.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, spark, tables: CoreV1RenderedRuleTables):
|
|
41
|
+
self.spark = spark
|
|
42
|
+
self.notables_table = tables.notables
|
|
43
|
+
self.observables_table = tables.observables
|
|
44
|
+
self.opals_table = tables.operational_alerts
|
|
45
|
+
self.data_metrics_table = tables.data_metrics
|
|
46
|
+
self.stream_metrics_table = tables.stream_metrics
|
|
47
|
+
self.observables_acc_table = tables.observables_accumulation
|
|
48
|
+
|
|
49
|
+
def _all_tables(self) -> List[str]:
|
|
50
|
+
return [
|
|
51
|
+
self.notables_table,
|
|
52
|
+
self.observables_table,
|
|
53
|
+
self.opals_table,
|
|
54
|
+
self.data_metrics_table,
|
|
55
|
+
self.stream_metrics_table,
|
|
56
|
+
self.observables_acc_table,
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
def cleanup(self):
|
|
60
|
+
"""
|
|
61
|
+
Clean up when done with this ExecRule instance. This method
|
|
62
|
+
cleans up temporarily allocated tables used to store the
|
|
63
|
+
results of the rule execution. Unless you need to preserve
|
|
64
|
+
the results for some reason, you must call this method or
|
|
65
|
+
the temporary tables will be orphaned in your workspace.
|
|
66
|
+
"""
|
|
67
|
+
for table in self._all_tables():
|
|
68
|
+
self.spark.sql(f"DROP TABLE IF EXISTS {table}")
|
|
69
|
+
|
|
70
|
+
def notables(self, limit: Optional[int] = None) -> DataFrame:
|
|
71
|
+
"""
|
|
72
|
+
Return the contents of the notables table.
|
|
73
|
+
|
|
74
|
+
:param limit: optional limit to the number of rows returned.
|
|
75
|
+
:returns: DataFrame containing the notables table rows.
|
|
76
|
+
"""
|
|
77
|
+
query = f"SELECT * FROM {self.notables_table}"
|
|
78
|
+
if limit is not None:
|
|
79
|
+
query = f"{query} LIMIT {limit}"
|
|
80
|
+
return self.spark.sql(query)
|
|
81
|
+
|
|
82
|
+
def observables(self, limit: Optional[int] = None) -> DataFrame:
|
|
83
|
+
"""
|
|
84
|
+
Return the contents of the observables table.
|
|
85
|
+
|
|
86
|
+
:param limit: optional limit to the number of rows returned.
|
|
87
|
+
:returns: DataFrame containing the observables table rows.
|
|
88
|
+
"""
|
|
89
|
+
query = f"SELECT * FROM {self.observables_table}"
|
|
90
|
+
if limit is not None:
|
|
91
|
+
query = f"{query} LIMIT {limit}"
|
|
92
|
+
return self.spark.sql(query)
|
|
@@ -75,6 +75,25 @@ class MalformedFieldError(PresetError):
|
|
|
75
75
|
super().__init__(message)
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
class InvalidLiteralError(PresetError):
|
|
79
|
+
def __init__(self, stage: str, stage_name: str, field_name: str):
|
|
80
|
+
self.stage = stage
|
|
81
|
+
self.stage_name = stage_name
|
|
82
|
+
self.field_name = field_name
|
|
83
|
+
message = f"Literal can only be type string in {stage} stage {stage_name}'s field specification named {field_name}."
|
|
84
|
+
super().__init__(message)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class InvalidFromError(PresetError):
|
|
88
|
+
def __init__(self, stage: str, stage_name: str, field_name: str, reason: str):
|
|
89
|
+
self.stage = stage
|
|
90
|
+
self.stage_name = stage_name
|
|
91
|
+
self.field_name = field_name
|
|
92
|
+
self.reason = reason
|
|
93
|
+
message = f"{reason} in {stage} stage {stage_name}'s field specification named {field_name}."
|
|
94
|
+
super().__init__(message)
|
|
95
|
+
|
|
96
|
+
|
|
78
97
|
class MissingFieldNameError(PresetError):
|
|
79
98
|
def __init__(self, stage: str, stage_name: str):
|
|
80
99
|
self.stage = stage
|
|
@@ -107,6 +126,21 @@ class AutoloaderMissingFieldError(PresetError):
|
|
|
107
126
|
super().__init__(message)
|
|
108
127
|
|
|
109
128
|
|
|
129
|
+
class MissingBronzeTablesError(PresetError):
|
|
130
|
+
def __init__(
|
|
131
|
+
self,
|
|
132
|
+
message: str = "Bronze tables mode selected, but no bronze table definitions provided.",
|
|
133
|
+
):
|
|
134
|
+
super().__init__(message)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class MissingBronzeTableFieldError(PresetError):
|
|
138
|
+
def __init__(self, field_name: str):
|
|
139
|
+
self.field_name = field_name
|
|
140
|
+
message = f"A bronze table definition is missing a field {field_name} in provided definitions."
|
|
141
|
+
super().__init__(message)
|
|
142
|
+
|
|
143
|
+
|
|
110
144
|
class UnknownGoldTableError(PresetError):
|
|
111
145
|
def __init__(self, table_name: str, schema: str):
|
|
112
146
|
self.table_name = table_name
|
|
@@ -145,6 +179,14 @@ class MissingUtilityConfigurationFieldError(PresetError):
|
|
|
145
179
|
super().__init__(message)
|
|
146
180
|
|
|
147
181
|
|
|
182
|
+
class DisallowedUtilityConfigurationError(PresetError):
|
|
183
|
+
def __init__(self, operation: str, stage: str):
|
|
184
|
+
self.operation = operation
|
|
185
|
+
self.stage = stage
|
|
186
|
+
message = f"The {operation} utility is disallowed in the {stage} stage."
|
|
187
|
+
super().__init__(message)
|
|
188
|
+
|
|
189
|
+
|
|
148
190
|
class AssertionFailedError(PresetError):
|
|
149
191
|
def __init__(self, expr: str, assertion_message: str, df: DataFrame):
|
|
150
192
|
# Get the Databricks built-in functions out the namespace.
|