dasl-client 1.0.13__tar.gz → 1.0.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dasl-client might be problematic. Click here for more details.
- {dasl_client-1.0.13 → dasl_client-1.0.16}/PKG-INFO +3 -3
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/client.py +60 -9
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/helpers.py +6 -3
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/errors.py +4 -1
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/preview_engine.py +23 -7
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/preview_parameters.py +31 -6
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/stage.py +87 -20
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/content.py +4 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/dbui.py +87 -3
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client.egg-info/PKG-INFO +3 -3
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client.egg-info/SOURCES.txt +4 -0
- dasl_client-1.0.16/dasl_client.egg-info/requires.txt +4 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client.egg-info/top_level.txt +1 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/pyproject.toml +3 -3
- dasl_client-1.0.16/test/__init__.py +0 -0
- dasl_client-1.0.16/test/conftest.py +18 -0
- dasl_client-1.0.16/test/constants.py +10 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/test/test_api_surface.py +1 -24
- dasl_client-1.0.16/test/test_databricks_secret_auth.py +116 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/test/test_marshaling.py +5 -6
- dasl_client-1.0.13/dasl_client.egg-info/requires.txt +0 -4
- {dasl_client-1.0.13 → dasl_client-1.0.16}/LICENSE +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/README.md +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/__init__.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/auth/__init__.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/auth/auth.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/conn/__init__.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/conn/client_identifier.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/conn/conn.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/errors/__init__.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/errors/errors.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/__init__.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/__init__.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/admin_config.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/datasource.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/helpers.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/rule.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/types.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/workspace_config.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client.egg-info/dependency_links.txt +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/setup.cfg +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/setup.py +0 -0
- {dasl_client-1.0.13 → dasl_client-1.0.16}/test/test_api_changes.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dasl_client
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.16
|
|
4
4
|
Summary: The DASL client library used for interacting with the DASL workspace
|
|
5
5
|
Home-page: https://github.com/antimatter/asl
|
|
6
6
|
Author: Antimatter Team
|
|
@@ -8,10 +8,10 @@ Author-email: Antimatter Team <support@antimatter.io>
|
|
|
8
8
|
Requires-Python: >=3.8
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
|
11
|
-
Requires-Dist: dasl_api==0.1.
|
|
11
|
+
Requires-Dist: dasl_api==0.1.17
|
|
12
12
|
Requires-Dist: databricks-sdk>=0.41.0
|
|
13
13
|
Requires-Dist: pydantic>=2
|
|
14
|
-
Requires-Dist: typing_extensions
|
|
14
|
+
Requires-Dist: typing_extensions>=4.10.0
|
|
15
15
|
|
|
16
16
|
# DASL Client Library
|
|
17
17
|
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from copy import deepcopy
|
|
2
|
-
from
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
from time import sleep
|
|
4
|
+
from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
from pyspark.sql import DataFrame
|
|
3
7
|
|
|
4
8
|
from dasl_api import (
|
|
5
9
|
CoreV1Api,
|
|
@@ -9,8 +13,6 @@ from dasl_api import (
|
|
|
9
13
|
WorkspaceV1CreateWorkspaceRequest,
|
|
10
14
|
api,
|
|
11
15
|
)
|
|
12
|
-
from pydantic import Field
|
|
13
|
-
|
|
14
16
|
from dasl_client.auth.auth import (
|
|
15
17
|
Authorization,
|
|
16
18
|
DatabricksSecretAuth,
|
|
@@ -569,10 +571,46 @@ class Client:
|
|
|
569
571
|
)
|
|
570
572
|
return Rule.from_api_obj(result)
|
|
571
573
|
|
|
574
|
+
def exec_rule(
|
|
575
|
+
self, rule_in: Rule, df: DataFrame
|
|
576
|
+
) -> Tuple[DataFrame, Optional[DataFrame]]:
|
|
577
|
+
"""
|
|
578
|
+
Locally execute a Rule. Must be run from within a Databricks
|
|
579
|
+
notebook or else an exception will be raised. This is intended
|
|
580
|
+
to facilitate Rule development.
|
|
581
|
+
|
|
582
|
+
:param rule_in: The specification of the Rule to execute.
|
|
583
|
+
:param df: The DataFrame to use as the input to the Rule.
|
|
584
|
+
:returns Tuple[DataFrame, Optional[DataFrame]]: The first
|
|
585
|
+
element of the tuple contains the notables produced by
|
|
586
|
+
the rule, and the second element contains the observables
|
|
587
|
+
or None if no observables were produced.
|
|
588
|
+
"""
|
|
589
|
+
Helpers.ensure_databricks()
|
|
590
|
+
with error_handler():
|
|
591
|
+
result = self._core_client().core_v1_render_rule(
|
|
592
|
+
self._workspace(),
|
|
593
|
+
rule_in.to_api_obj(),
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
try:
|
|
597
|
+
import notebook_utils
|
|
598
|
+
except ImportError as e:
|
|
599
|
+
raise ImportError(
|
|
600
|
+
"Package 'notebook_utils' not found. "
|
|
601
|
+
"Install it within this this notebook using "
|
|
602
|
+
f"%pip install {result.notebook_utils_path}"
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
namespace = {}
|
|
606
|
+
exec(result.content, namespace)
|
|
607
|
+
return namespace["generate"](df)
|
|
608
|
+
|
|
572
609
|
def adhoc_transform(
|
|
573
610
|
self,
|
|
574
611
|
warehouse: str,
|
|
575
612
|
request: TransformRequest,
|
|
613
|
+
timeout: timedelta = timedelta(minutes=5),
|
|
576
614
|
) -> TransformResponse:
|
|
577
615
|
"""
|
|
578
616
|
Run a sequence of ADHOC transforms against a SQL warehouse to
|
|
@@ -583,16 +621,29 @@ class Client:
|
|
|
583
621
|
:return: a TransformResponse object containing the results
|
|
584
622
|
after running the transforms.
|
|
585
623
|
:raises: NotFoundError if the rule does not exist
|
|
624
|
+
:raises: Exception for a server-side error or timeout
|
|
586
625
|
"""
|
|
587
626
|
with error_handler():
|
|
588
|
-
|
|
589
|
-
self.
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
request.to_api_obj(),
|
|
593
|
-
)
|
|
627
|
+
status = self._dbui_client().dbui_v1_transform(
|
|
628
|
+
self._workspace(),
|
|
629
|
+
warehouse,
|
|
630
|
+
request.to_api_obj(),
|
|
594
631
|
)
|
|
595
632
|
|
|
633
|
+
begin = datetime.now()
|
|
634
|
+
while datetime.now() - begin < timeout:
|
|
635
|
+
sleep(5)
|
|
636
|
+
status = self._dbui_client().dbui_v1_transform_status(
|
|
637
|
+
self._workspace(), status.id
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
if status.status == "failure":
|
|
641
|
+
raise Exception(f"adhoc transform failed with {status.error}")
|
|
642
|
+
elif status.status == "success":
|
|
643
|
+
return TransformResponse.from_api_obj(status.result)
|
|
644
|
+
|
|
645
|
+
raise Exception("timed out waiting for adhoc transform result")
|
|
646
|
+
|
|
596
647
|
def get_observable_events(
|
|
597
648
|
self,
|
|
598
649
|
warehouse: str,
|
|
@@ -6,15 +6,18 @@ class Helpers:
|
|
|
6
6
|
default_dasl_host = "https://api.prod.sl.antimatter.io"
|
|
7
7
|
|
|
8
8
|
@staticmethod
|
|
9
|
-
def
|
|
10
|
-
# This import raises an exception if outside a notebook context, so only
|
|
11
|
-
# import if this method is called
|
|
9
|
+
def ensure_databricks():
|
|
12
10
|
if "DATABRICKS_RUNTIME_VERSION" not in os.environ:
|
|
13
11
|
raise Exception(
|
|
14
12
|
"attempted to access databricks context outside "
|
|
15
13
|
+ "of databricks notebook"
|
|
16
14
|
)
|
|
17
15
|
|
|
16
|
+
@staticmethod
|
|
17
|
+
def databricks_context():
|
|
18
|
+
# This import raises an exception if outside a notebook context, so only
|
|
19
|
+
# import if this method is called
|
|
20
|
+
Helpers.ensure_databricks()
|
|
18
21
|
from databricks.sdk.runtime import dbutils
|
|
19
22
|
|
|
20
23
|
return dbutils.notebook.entry_point.getDbutils().notebook().getContext()
|
|
@@ -149,7 +149,10 @@ class AssertionFailedError(PresetError):
|
|
|
149
149
|
def __init__(self, expr: str, assertion_message: str, df: DataFrame):
|
|
150
150
|
# Get the Databricks built-in functions out the namespace.
|
|
151
151
|
ipython = get_ipython()
|
|
152
|
-
|
|
152
|
+
if ipython is not None:
|
|
153
|
+
display = ipython.user_ns["display"]
|
|
154
|
+
else:
|
|
155
|
+
display = lambda x: x.show()
|
|
153
156
|
|
|
154
157
|
self.expr = expr
|
|
155
158
|
self.assertion_message = assertion_message
|
|
@@ -54,10 +54,13 @@ class PreviewEngine:
|
|
|
54
54
|
self._preset.get("silver", None), self._pretransform_name
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
+
self._bronze = None
|
|
57
58
|
self._pre = None
|
|
58
59
|
self._silver = []
|
|
59
60
|
self._gold = []
|
|
60
|
-
self._result_df_map
|
|
61
|
+
self._result_df_map: Tuple[
|
|
62
|
+
DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]
|
|
63
|
+
] = (None, {}, {})
|
|
61
64
|
|
|
62
65
|
def _validate_pretransform_name(
|
|
63
66
|
self, silver: Dict[str, str], pretransform_name: str
|
|
@@ -181,8 +184,12 @@ class PreviewEngine:
|
|
|
181
184
|
|
|
182
185
|
# Get the Databricks built-in functions out the namespace.
|
|
183
186
|
ipython = get_ipython()
|
|
184
|
-
|
|
185
|
-
|
|
187
|
+
if ipython is not None:
|
|
188
|
+
displayHTML = ipython.user_ns["displayHTML"]
|
|
189
|
+
display = ipython.user_ns["display"]
|
|
190
|
+
else:
|
|
191
|
+
displayHTML = lambda x: print(x)
|
|
192
|
+
display = lambda x: x.show()
|
|
186
193
|
|
|
187
194
|
def d(txt, lvl) -> None:
|
|
188
195
|
displayHTML(
|
|
@@ -245,7 +252,7 @@ class PreviewEngine:
|
|
|
245
252
|
raise UnknownGoldTableError(name, gold_table_schema)
|
|
246
253
|
|
|
247
254
|
# Performs the type check.
|
|
248
|
-
delta_df = self._spark.table(f"{gold_table_schema}
|
|
255
|
+
delta_df = self._spark.table(f"`{gold_table_schema}`.`{name}`").limit(0)
|
|
249
256
|
unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
|
|
250
257
|
|
|
251
258
|
# Now we check no new columns.
|
|
@@ -286,7 +293,7 @@ class PreviewEngine:
|
|
|
286
293
|
d("Resultant gold table preview", 3)
|
|
287
294
|
display(unioned_df)
|
|
288
295
|
|
|
289
|
-
def evaluate(self, gold_table_schema: str) -> None:
|
|
296
|
+
def evaluate(self, gold_table_schema: str, display: bool = True) -> None:
|
|
290
297
|
"""
|
|
291
298
|
Evaluates the loaded preset YAML using the input datasource configuration to load
|
|
292
299
|
records. Finally, checks that the output from the Gold stages is compatible with
|
|
@@ -303,7 +310,9 @@ class PreviewEngine:
|
|
|
303
310
|
):
|
|
304
311
|
if not any(
|
|
305
312
|
row.databaseName == schema_name
|
|
306
|
-
for row in self._spark.sql(
|
|
313
|
+
for row in self._spark.sql(
|
|
314
|
+
f"SHOW SCHEMAS IN `{catalog_name}`"
|
|
315
|
+
).collect()
|
|
307
316
|
):
|
|
308
317
|
raise InvalidGoldTableSchemaError(
|
|
309
318
|
gold_table_schema,
|
|
@@ -340,5 +349,12 @@ class PreviewEngine:
|
|
|
340
349
|
self._compile_stages()
|
|
341
350
|
|
|
342
351
|
with self._ds_params as df:
|
|
352
|
+
self._bronze = df
|
|
343
353
|
self._result_df_map = self._run(df)
|
|
344
|
-
|
|
354
|
+
if display:
|
|
355
|
+
self._render_output(df, self._result_df_map, gold_table_schema)
|
|
356
|
+
|
|
357
|
+
def results(
|
|
358
|
+
self,
|
|
359
|
+
) -> Tuple[DataFrame, DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]]:
|
|
360
|
+
return self._bronze, *self._result_df_map
|
{dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/preview_parameters.py
RENAMED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
from pyspark.sql import DataFrame, SparkSession
|
|
2
4
|
from pyspark.sql.types import *
|
|
3
5
|
from pyspark.sql.dataframe import DataFrame
|
|
@@ -115,6 +117,7 @@ class PreviewParameters:
|
|
|
115
117
|
self._table = None
|
|
116
118
|
|
|
117
119
|
self._pretransform_name = None
|
|
120
|
+
self._bronze_pre_transform: Optional[List[str]] = None
|
|
118
121
|
|
|
119
122
|
self._df = None
|
|
120
123
|
|
|
@@ -166,10 +169,13 @@ class PreviewParameters:
|
|
|
166
169
|
self._record_limit
|
|
167
170
|
)
|
|
168
171
|
|
|
172
|
+
if self._bronze_pre_transform is not None:
|
|
173
|
+
stream_df = stream_df.selectExpr(*self._bronze_pre_transform)
|
|
174
|
+
|
|
169
175
|
query = (
|
|
170
176
|
stream_df.writeStream.format("memory")
|
|
171
177
|
.queryName("batch_data")
|
|
172
|
-
.trigger(
|
|
178
|
+
.trigger(availableNow=True)
|
|
173
179
|
.start()
|
|
174
180
|
)
|
|
175
181
|
|
|
@@ -193,12 +199,17 @@ class PreviewParameters:
|
|
|
193
199
|
|
|
194
200
|
# Get the Databricks built-in functions out the namespace.
|
|
195
201
|
ipython = get_ipython()
|
|
196
|
-
|
|
202
|
+
if ipython is not None:
|
|
203
|
+
dbutils = ipython.user_ns["dbutils"]
|
|
197
204
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
205
|
+
dbutils.fs.rm(
|
|
206
|
+
f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
|
|
207
|
+
recurse=True,
|
|
208
|
+
)
|
|
209
|
+
else:
|
|
210
|
+
print(
|
|
211
|
+
f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}"
|
|
212
|
+
)
|
|
202
213
|
|
|
203
214
|
def from_input(self):
|
|
204
215
|
"""
|
|
@@ -286,6 +297,10 @@ class PreviewParameters:
|
|
|
286
297
|
Returns:
|
|
287
298
|
PreviewParameters: The current instance with updated configuration.
|
|
288
299
|
"""
|
|
300
|
+
if file_format.lower() == "jsonl":
|
|
301
|
+
self._autoloader_format = "json"
|
|
302
|
+
self.set_autoloader_multiline(False)
|
|
303
|
+
return self
|
|
289
304
|
self._autoloader_format = file_format
|
|
290
305
|
return self
|
|
291
306
|
|
|
@@ -350,6 +365,16 @@ class PreviewParameters:
|
|
|
350
365
|
self._pretransform_name = pretransform_name
|
|
351
366
|
return self
|
|
352
367
|
|
|
368
|
+
def set_bronze_pre_transform(self, expr: List[str]):
|
|
369
|
+
"""
|
|
370
|
+
Sets a pre-transform expression that will run before data is written to bronze
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
PreviewParameters: The current instance with updated configuration.
|
|
374
|
+
"""
|
|
375
|
+
self._bronze_pre_transform = expr
|
|
376
|
+
return self
|
|
377
|
+
|
|
353
378
|
def set_date_range(self, column: str, start_time: str, end_time: str):
|
|
354
379
|
"""
|
|
355
380
|
Set the TIMESTAMP column and date range to use as the input data filter to
|
|
@@ -150,17 +150,19 @@ class Stage:
|
|
|
150
150
|
if target_col not in existing_columns:
|
|
151
151
|
raise ReferencedColumnMissingError("jsonExtract", target_col)
|
|
152
152
|
schema = self._spark.sql(
|
|
153
|
-
f"SELECT schema_of_json_agg({target_col}) AS sc FROM {{df}}",
|
|
153
|
+
f"SELECT schema_of_json_agg({self.auto_backtick(target_col)}) AS sc FROM {{df}}",
|
|
154
|
+
df=df,
|
|
154
155
|
).collect()[0][0]
|
|
155
156
|
extract_df = self._spark.createDataFrame(data=[], schema=schema)
|
|
156
157
|
columns = extract_df.columns
|
|
157
158
|
columns = [
|
|
158
|
-
f"extract.{col} AS {col}"
|
|
159
|
+
self.auto_backtick(f"extract.{col}") + f" AS {self.auto_backtick(col)}"
|
|
159
160
|
for col in columns
|
|
160
161
|
if col not in omit_fields and col not in existing_columns
|
|
161
162
|
]
|
|
162
163
|
columns += [
|
|
163
|
-
f"extract.{col}
|
|
164
|
+
self.auto_backtick(f"extract.{col}")
|
|
165
|
+
+ f" AS {self.auto_backtick(duplicate_prefix + col)}"
|
|
164
166
|
for col in columns
|
|
165
167
|
if col not in omit_fields and col in existing_columns
|
|
166
168
|
]
|
|
@@ -176,7 +178,10 @@ class Stage:
|
|
|
176
178
|
A DataFrame with the resultant operation's records.
|
|
177
179
|
"""
|
|
178
180
|
return (
|
|
179
|
-
df.selectExpr(
|
|
181
|
+
df.selectExpr(
|
|
182
|
+
"*",
|
|
183
|
+
f"from_json({self.auto_backtick(target_col)}, '{schema}') AS extract",
|
|
184
|
+
)
|
|
180
185
|
.selectExpr("*", *columns)
|
|
181
186
|
.drop("extract")
|
|
182
187
|
)
|
|
@@ -198,7 +203,10 @@ class Stage:
|
|
|
198
203
|
"""
|
|
199
204
|
extract_df = self._spark.createDataFrame(data=[], schema=schema)
|
|
200
205
|
schema = extract_df.drop(omit_fields).schema.simpleString()
|
|
201
|
-
return df.selectExpr(
|
|
206
|
+
return df.selectExpr(
|
|
207
|
+
"*",
|
|
208
|
+
f"from_json({self.auto_backtick(target_col)}, '{schema}') AS {self.auto_backtick(name)}",
|
|
209
|
+
)
|
|
202
210
|
|
|
203
211
|
def preserved_columns(
|
|
204
212
|
self, df: DataFrame
|
|
@@ -224,7 +232,7 @@ class Stage:
|
|
|
224
232
|
duplicate_prefix = self._duplicate_prefix()
|
|
225
233
|
column_names = self._column_names()
|
|
226
234
|
duplicate_renames = [
|
|
227
|
-
f"{col} AS {duplicate_prefix
|
|
235
|
+
f"{self.auto_backtick(col)} AS {self.auto_backtick(duplicate_prefix + col)}"
|
|
228
236
|
for col in preserved_columns
|
|
229
237
|
if col in column_names
|
|
230
238
|
]
|
|
@@ -296,14 +304,46 @@ class Stage:
|
|
|
296
304
|
"""
|
|
297
305
|
if field.get("from", None):
|
|
298
306
|
# check that the from column exists in the df?
|
|
299
|
-
return f"{field['from']} AS {name}"
|
|
307
|
+
return f"{self.auto_backtick(field['from'])} AS {self.auto_backtick(name)}"
|
|
300
308
|
elif field.get("literal", None):
|
|
301
|
-
return f"'{field['literal']}' AS {name}"
|
|
302
|
-
elif field.get("expr", None):
|
|
303
|
-
return f"{field['expr']} AS {name}"
|
|
309
|
+
return f"'{field['literal']}' AS {self.auto_backtick(name)}"
|
|
310
|
+
elif field.get("expr", None) is not None:
|
|
311
|
+
return f"{field['expr']} AS {self.auto_backtick(name)}"
|
|
304
312
|
else:
|
|
305
313
|
return ""
|
|
306
314
|
|
|
315
|
+
def is_backtick_escaped(self, name: str) -> bool:
|
|
316
|
+
"""
|
|
317
|
+
check if a given (column) name is backtick escaped or not
|
|
318
|
+
:param name: column name
|
|
319
|
+
:return: bool
|
|
320
|
+
"""
|
|
321
|
+
return name.startswith("`") and name.endswith("`")
|
|
322
|
+
|
|
323
|
+
def auto_backtick(self, name: str) -> str:
|
|
324
|
+
"""
|
|
325
|
+
auto-backtick given name in case it isn't already backtick escaped.
|
|
326
|
+
if the name contains dots it will get split and each component backticked individually.
|
|
327
|
+
Returns the name wrapped in backticks or the passed name if it already had backticks.
|
|
328
|
+
:param name: column name
|
|
329
|
+
:return: str
|
|
330
|
+
"""
|
|
331
|
+
if self.is_backtick_escaped(name):
|
|
332
|
+
return name
|
|
333
|
+
parts = name.split(".")
|
|
334
|
+
return ".".join(list(map(lambda s: f"`{s}`", parts)))
|
|
335
|
+
|
|
336
|
+
def force_apply_backticks(self, name: str) -> str:
|
|
337
|
+
"""
|
|
338
|
+
forces application of backticks to the given (column) name as a single unit
|
|
339
|
+
if it already has backticks this is a noop
|
|
340
|
+
:param name: column name
|
|
341
|
+
:return: str
|
|
342
|
+
"""
|
|
343
|
+
if self.is_backtick_escaped(name):
|
|
344
|
+
return name
|
|
345
|
+
return f"`{name}`"
|
|
346
|
+
|
|
307
347
|
def process_node(self, name: str, node: Node) -> str:
|
|
308
348
|
"""
|
|
309
349
|
Processes a single node in a tree of nodes.
|
|
@@ -319,7 +359,7 @@ class Stage:
|
|
|
319
359
|
child_expr = self.process_node(child_name, child_node)
|
|
320
360
|
fields_list.append(f"{child_expr}")
|
|
321
361
|
joined_fields = ",\n".join(fields_list)
|
|
322
|
-
return f"struct(\n{joined_fields}\n) AS {name}"
|
|
362
|
+
return f"struct(\n{joined_fields}\n) AS {self.auto_backtick(name)}"
|
|
323
363
|
else:
|
|
324
364
|
return ""
|
|
325
365
|
|
|
@@ -341,12 +381,22 @@ class Stage:
|
|
|
341
381
|
"""
|
|
342
382
|
Renders a list of field specifications containing both simple and
|
|
343
383
|
STRUCT references into valid, STRUCT cognicient, SELECT expressions.
|
|
384
|
+
if a nested field is wrapped in backticks it will be treated as a simple field
|
|
385
|
+
for example field of name `col.with.dots` will NOT be treated as nested field.
|
|
344
386
|
|
|
345
387
|
Returns:
|
|
346
388
|
The SQL expression.
|
|
347
389
|
"""
|
|
348
|
-
simple_fields = [
|
|
349
|
-
|
|
390
|
+
simple_fields = [
|
|
391
|
+
f
|
|
392
|
+
for f in fields
|
|
393
|
+
if "." not in f["name"] or self.is_backtick_escaped(f["name"])
|
|
394
|
+
]
|
|
395
|
+
nested_fields = [
|
|
396
|
+
f
|
|
397
|
+
for f in fields
|
|
398
|
+
if "." in f["name"] and not self.is_backtick_escaped(f["name"])
|
|
399
|
+
]
|
|
350
400
|
|
|
351
401
|
result_parts = []
|
|
352
402
|
for field in simple_fields:
|
|
@@ -358,7 +408,7 @@ class Stage:
|
|
|
358
408
|
nested_str = self.parse_to_string(tree)
|
|
359
409
|
result_parts.append(nested_str)
|
|
360
410
|
|
|
361
|
-
return [p for p in result_parts if p]
|
|
411
|
+
return [p for p in result_parts if p is not None and len(p) > 0]
|
|
362
412
|
|
|
363
413
|
def select_expr(self, df: DataFrame) -> str:
|
|
364
414
|
"""
|
|
@@ -379,8 +429,12 @@ class Stage:
|
|
|
379
429
|
if should_preserve:
|
|
380
430
|
if embed_col := preserve.get("embedColumn", None):
|
|
381
431
|
preserved_columns = self.preserved_columns_embed_column(df)
|
|
432
|
+
# preserved_columns is obtained from df.columns
|
|
433
|
+
# applying backticks to all of them is OK here
|
|
434
|
+
# since they will never use "obj.key" to reference nested fields of structs
|
|
435
|
+
# so we just go ahead and apply backticks to all across the board
|
|
382
436
|
select_fields += [
|
|
383
|
-
f"struct({', '.join(preserved_columns)}) AS {embed_col}"
|
|
437
|
+
f"struct({', '.join(list(map(lambda x: self.force_apply_backticks(x), preserved_columns)))}) AS {self.auto_backtick(embed_col)}"
|
|
384
438
|
]
|
|
385
439
|
else:
|
|
386
440
|
(
|
|
@@ -388,8 +442,13 @@ class Stage:
|
|
|
388
442
|
duplicate_renames,
|
|
389
443
|
column_names,
|
|
390
444
|
) = self.preserved_columns(df)
|
|
391
|
-
|
|
392
|
-
select_fields +=
|
|
445
|
+
# see note above: same here - apply backticks to all columns across the board
|
|
446
|
+
select_fields += list(
|
|
447
|
+
map(lambda x: self.force_apply_backticks(x), preserved_columns)
|
|
448
|
+
)
|
|
449
|
+
select_fields += list(
|
|
450
|
+
map(lambda x: self.force_apply_backticks(x), duplicate_renames)
|
|
451
|
+
)
|
|
393
452
|
|
|
394
453
|
return ["*"] + select_fields if self._stage == "temp_fields" else select_fields
|
|
395
454
|
|
|
@@ -475,7 +534,9 @@ class Stage:
|
|
|
475
534
|
df = (
|
|
476
535
|
df.alias("tmp")
|
|
477
536
|
.join(df_joined, on=[df[lhs] == df_joined[rhs]], how="left")
|
|
478
|
-
.selectExpr(
|
|
537
|
+
.selectExpr(
|
|
538
|
+
"tmp.*", f"{select} AS {self.auto_backtick(field.get('name'))}"
|
|
539
|
+
)
|
|
479
540
|
)
|
|
480
541
|
elif csv := join.get("withCSV", None):
|
|
481
542
|
if path := csv.get("path", None):
|
|
@@ -485,7 +546,10 @@ class Stage:
|
|
|
485
546
|
df = (
|
|
486
547
|
df.alias("tmp")
|
|
487
548
|
.join(df_joined, on=[df[lhs] == df_joined[rhs]], how="left")
|
|
488
|
-
.selectExpr(
|
|
549
|
+
.selectExpr(
|
|
550
|
+
"tmp.*",
|
|
551
|
+
f"{select} AS {self.auto_backtick(field.get('name'))}",
|
|
552
|
+
)
|
|
489
553
|
)
|
|
490
554
|
else:
|
|
491
555
|
raise MissingJoinFieldError("withTable or withCSV (please supply 1)")
|
|
@@ -500,7 +564,10 @@ class Stage:
|
|
|
500
564
|
"""
|
|
501
565
|
for field in self._fields:
|
|
502
566
|
if field.get("alias", None):
|
|
503
|
-
df = df.selectExpr(
|
|
567
|
+
df = df.selectExpr(
|
|
568
|
+
"*",
|
|
569
|
+
f"{self.auto_backtick(field.get('alias'))} AS {self.auto_backtick(field.get('name'))}",
|
|
570
|
+
)
|
|
504
571
|
return df
|
|
505
572
|
|
|
506
573
|
def run_assertions(self, df: DataFrame) -> DataFrame:
|
|
@@ -173,6 +173,8 @@ class DataSourcePreset(BaseModel):
|
|
|
173
173
|
name: Optional[str] = None
|
|
174
174
|
author: Optional[str] = None
|
|
175
175
|
description: Optional[str] = None
|
|
176
|
+
title: Optional[str] = None
|
|
177
|
+
icon_url: Optional[str] = None
|
|
176
178
|
autoloader: Optional[PresetAutoloader] = None
|
|
177
179
|
silver: Optional[SilverPreset] = None
|
|
178
180
|
gold: Optional[List[GoldPreset]] = None
|
|
@@ -188,6 +190,8 @@ class DataSourcePreset(BaseModel):
|
|
|
188
190
|
name=obj.name,
|
|
189
191
|
author=obj.author,
|
|
190
192
|
description=obj.description,
|
|
193
|
+
title=obj.title,
|
|
194
|
+
icon_url=obj.icon_url,
|
|
191
195
|
autoloader=PresetAutoloader.from_api_obj(obj.autoloader),
|
|
192
196
|
silver=SilverPreset.from_api_obj(obj.silver),
|
|
193
197
|
gold=[GoldPreset.from_api_obj(item) for item in obj.gold],
|
|
@@ -12,6 +12,8 @@ from dasl_api import (
|
|
|
12
12
|
DbuiV1TransformRequestTransformsInnerPresetOverrides,
|
|
13
13
|
DbuiV1TransformResponse,
|
|
14
14
|
DbuiV1TransformResponseStagesInner,
|
|
15
|
+
ContentV1DatasourcePresetAutoloaderCloudFiles,
|
|
16
|
+
DbuiV1TransformRequestAutoloaderInput,
|
|
15
17
|
)
|
|
16
18
|
|
|
17
19
|
from .datasource import DataSource, FieldSpec, FieldUtils
|
|
@@ -131,7 +133,7 @@ class TransformRequest(BaseModel):
|
|
|
131
133
|
Attributes:
|
|
132
134
|
input (TransformRequest.Input):
|
|
133
135
|
The input block containing the columns metadata and data.
|
|
134
|
-
autoloader_input (
|
|
136
|
+
autoloader_input (Autoloader):
|
|
135
137
|
The autoloader input configuration.
|
|
136
138
|
use_preset (str):
|
|
137
139
|
Indicates which preset to use for the transforms.
|
|
@@ -172,6 +174,86 @@ class TransformRequest(BaseModel):
|
|
|
172
174
|
data=self.data,
|
|
173
175
|
)
|
|
174
176
|
|
|
177
|
+
class Autoloader(BaseModel):
|
|
178
|
+
"""
|
|
179
|
+
Autoloader configuration for the DataSource.
|
|
180
|
+
|
|
181
|
+
Attributes:
|
|
182
|
+
format (Optional[str]):
|
|
183
|
+
The format of the data (e.g., json, parquet, csv, etc.).
|
|
184
|
+
location (str):
|
|
185
|
+
External location for the volume in Unity Catalog.
|
|
186
|
+
schema_file (Optional[str]):
|
|
187
|
+
An optional file containing the schema of the data source.
|
|
188
|
+
cloud_files (Optional[Autoloader.CloudFiles]):
|
|
189
|
+
CloudFiles configuration.
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
class CloudFiles(BaseModel):
|
|
193
|
+
"""
|
|
194
|
+
CloudFiles configuration for the Autoloader.
|
|
195
|
+
|
|
196
|
+
Attributes:
|
|
197
|
+
schema_hints_file (Optional[str]):
|
|
198
|
+
schema_hints (Optional[str]):
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
schema_hints_file: Optional[str] = None
|
|
202
|
+
schema_hints: Optional[str] = None
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def from_api_obj(
|
|
206
|
+
obj: Optional[ContentV1DatasourcePresetAutoloaderCloudFiles],
|
|
207
|
+
) -> "TransformRequest.Autoloader.CloudFiles":
|
|
208
|
+
if obj is None:
|
|
209
|
+
return None
|
|
210
|
+
return TransformRequest.Autoloader.CloudFiles(
|
|
211
|
+
schema_hints_file=obj.schema_hints_file,
|
|
212
|
+
schema_hints=obj.schema_hints,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
def to_api_obj(self) -> ContentV1DatasourcePresetAutoloaderCloudFiles:
|
|
216
|
+
return ContentV1DatasourcePresetAutoloaderCloudFiles(
|
|
217
|
+
schema_hints_file=self.schema_hints_file,
|
|
218
|
+
schema_hints=self.schema_hints,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
format: Optional[str] = None
|
|
222
|
+
location: str
|
|
223
|
+
schema_file: Optional[str] = None
|
|
224
|
+
schema: Optional[str] = None
|
|
225
|
+
cloud_files: Optional["TransformRequest.Autoloader.CloudFiles"] = None
|
|
226
|
+
row_count: Optional[int] = None
|
|
227
|
+
row_offset: Optional[int] = None
|
|
228
|
+
|
|
229
|
+
@staticmethod
|
|
230
|
+
def from_api_obj(
|
|
231
|
+
obj: Optional[DbuiV1TransformRequestAutoloaderInput],
|
|
232
|
+
) -> "Optional[TransformRequest.Autoloader]":
|
|
233
|
+
if obj is None:
|
|
234
|
+
return None
|
|
235
|
+
return TransformRequest.Autoloader(
|
|
236
|
+
format=obj.format,
|
|
237
|
+
location=obj.location,
|
|
238
|
+
schema_file=obj.schema_file,
|
|
239
|
+
cloud_files=TransformRequest.Autoloader.CloudFiles.from_api_obj(
|
|
240
|
+
obj.cloud_files
|
|
241
|
+
),
|
|
242
|
+
row_count=obj.row_count,
|
|
243
|
+
row_offset=obj.row_offset,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def to_api_obj(self) -> DbuiV1TransformRequestAutoloaderInput:
|
|
247
|
+
return DbuiV1TransformRequestAutoloaderInput(
|
|
248
|
+
format=self.format,
|
|
249
|
+
location=self.location,
|
|
250
|
+
schemaFile=self.schema_file,
|
|
251
|
+
schema=self.schema_file,
|
|
252
|
+
cloudFiles=Helpers.maybe(lambda o: o.to_api_obj(), self.cloud_files),
|
|
253
|
+
rowCount=self.row_count,
|
|
254
|
+
rowOffset=self.row_offset,
|
|
255
|
+
)
|
|
256
|
+
|
|
175
257
|
class Transform(BaseModel):
|
|
176
258
|
"""
|
|
177
259
|
A transform configuration to apply to the data.
|
|
@@ -273,7 +355,7 @@ class TransformRequest(BaseModel):
|
|
|
273
355
|
)
|
|
274
356
|
|
|
275
357
|
input: Optional["TransformRequest.Input"] = None
|
|
276
|
-
autoloader_input: Optional[
|
|
358
|
+
autoloader_input: Optional["TransformRequest.Autoloader"] = None
|
|
277
359
|
use_preset: Optional[str] = None
|
|
278
360
|
transforms: List["TransformRequest.Transform"]
|
|
279
361
|
|
|
@@ -281,7 +363,9 @@ class TransformRequest(BaseModel):
|
|
|
281
363
|
def from_api_obj(obj: DbuiV1TransformRequest) -> "TransformRequest":
|
|
282
364
|
return TransformRequest(
|
|
283
365
|
input=TransformRequest.Input.from_api_obj(obj.input),
|
|
284
|
-
autoloader_input=
|
|
366
|
+
autoloader_input=TransformRequest.Autoloader.from_api_obj(
|
|
367
|
+
obj.autoloader_input
|
|
368
|
+
),
|
|
285
369
|
use_preset=obj.use_preset,
|
|
286
370
|
transforms=[
|
|
287
371
|
TransformRequest.Transform.from_api_obj(item) for item in obj.transforms
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dasl_client
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.16
|
|
4
4
|
Summary: The DASL client library used for interacting with the DASL workspace
|
|
5
5
|
Home-page: https://github.com/antimatter/asl
|
|
6
6
|
Author: Antimatter Team
|
|
@@ -8,10 +8,10 @@ Author-email: Antimatter Team <support@antimatter.io>
|
|
|
8
8
|
Requires-Python: >=3.8
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
|
11
|
-
Requires-Dist: dasl_api==0.1.
|
|
11
|
+
Requires-Dist: dasl_api==0.1.17
|
|
12
12
|
Requires-Dist: databricks-sdk>=0.41.0
|
|
13
13
|
Requires-Dist: pydantic>=2
|
|
14
|
-
Requires-Dist: typing_extensions
|
|
14
|
+
Requires-Dist: typing_extensions>=4.10.0
|
|
15
15
|
|
|
16
16
|
# DASL Client Library
|
|
17
17
|
|
|
@@ -31,6 +31,10 @@ dasl_client/types/helpers.py
|
|
|
31
31
|
dasl_client/types/rule.py
|
|
32
32
|
dasl_client/types/types.py
|
|
33
33
|
dasl_client/types/workspace_config.py
|
|
34
|
+
test/__init__.py
|
|
35
|
+
test/conftest.py
|
|
36
|
+
test/constants.py
|
|
34
37
|
test/test_api_changes.py
|
|
35
38
|
test/test_api_surface.py
|
|
39
|
+
test/test_databricks_secret_auth.py
|
|
36
40
|
test/test_marshaling.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "dasl_client"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.16"
|
|
8
8
|
description = "The DASL client library used for interacting with the DASL workspace"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [
|
|
@@ -13,8 +13,8 @@ authors = [
|
|
|
13
13
|
requires-python = ">=3.8"
|
|
14
14
|
|
|
15
15
|
dependencies = [
|
|
16
|
-
"dasl_api==0.1.
|
|
16
|
+
"dasl_api==0.1.17",
|
|
17
17
|
"databricks-sdk>=0.41.0",
|
|
18
18
|
"pydantic>=2",
|
|
19
|
-
"typing_extensions
|
|
19
|
+
"typing_extensions>=4.10.0",
|
|
20
20
|
]
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from dasl_client import Client
|
|
4
|
+
|
|
5
|
+
from .constants import *
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture(scope="session")
|
|
9
|
+
def api_client():
|
|
10
|
+
client = Client.new_workspace(
|
|
11
|
+
admin_email="test@antimatter.io",
|
|
12
|
+
app_client_id=app_client_id,
|
|
13
|
+
service_principal_id=databricks_client_id,
|
|
14
|
+
service_principal_secret=databricks_client_secret,
|
|
15
|
+
workspace_url=databricks_host,
|
|
16
|
+
dasl_host=dasl_host,
|
|
17
|
+
)
|
|
18
|
+
yield client
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from urllib.parse import urlparse
|
|
3
|
+
|
|
4
|
+
dasl_host = os.environ["DASL_API_URL"]
|
|
5
|
+
databricks_host = os.environ["DASL_DATABRICKS_HOST"]
|
|
6
|
+
databricks_client_id = os.environ["DASL_DATABRICKS_CLIENT_ID"]
|
|
7
|
+
databricks_client_secret = os.environ["DASL_DATABRICKS_CLIENT_SECRET"]
|
|
8
|
+
workspace = urlparse(databricks_host).hostname
|
|
9
|
+
app_client_id = "22853b93-68ba-4ae2-8e41-976417f501dd"
|
|
10
|
+
alternate_app_client_id = "335ac0d3-e0ea-4732-ba93-0277423b5029"
|
|
@@ -1,29 +1,6 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import pytest
|
|
3
|
-
from urllib.parse import urlparse
|
|
4
|
-
|
|
5
1
|
from dasl_client import *
|
|
6
2
|
|
|
7
|
-
|
|
8
|
-
databricks_host = os.environ["DATABRICKS_HOST"]
|
|
9
|
-
databricks_client_id = os.environ["DATABRICKS_CLIENT_ID"]
|
|
10
|
-
databricks_client_secret = os.environ["DATABRICKS_CLIENT_SECRET"]
|
|
11
|
-
workspace = urlparse(databricks_host).hostname
|
|
12
|
-
app_client_id = "22853b93-68ba-4ae2-8e41-976417f501dd"
|
|
13
|
-
alternate_app_client_id = "335ac0d3-e0ea-4732-ba93-0277423b5029"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@pytest.fixture(scope="session")
|
|
17
|
-
def api_client():
|
|
18
|
-
client = Client.new_workspace(
|
|
19
|
-
admin_email="test@antimatter.io",
|
|
20
|
-
app_client_id=app_client_id,
|
|
21
|
-
service_principal_id=databricks_client_id,
|
|
22
|
-
service_principal_secret=databricks_client_secret,
|
|
23
|
-
workspace_url=databricks_host,
|
|
24
|
-
dasl_host=dasl_host,
|
|
25
|
-
)
|
|
26
|
-
yield client
|
|
3
|
+
from .constants import *
|
|
27
4
|
|
|
28
5
|
|
|
29
6
|
def test_admin_config(api_client):
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import datetime
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
from databricks.sdk import WorkspaceClient
|
|
7
|
+
from databricks.sdk.service import jobs, workspace as dbworkspace
|
|
8
|
+
|
|
9
|
+
from .constants import *
|
|
10
|
+
|
|
11
|
+
pylib_volume_path = os.environ["PYLIB_VOLUME_PATH"]
|
|
12
|
+
pylib_wheel_path = os.environ["PYLIB_WHEEL_PATH"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_secret_auth(api_client):
|
|
16
|
+
# making sure it's even possible to get a config
|
|
17
|
+
api_client.get_config()
|
|
18
|
+
|
|
19
|
+
# need to do an API operation using databricks secret auth.
|
|
20
|
+
notebook_data = f"""
|
|
21
|
+
%pip install {pylib_wheel_path}
|
|
22
|
+
dbutils.library.restartPython()
|
|
23
|
+
# COMMAND ----------
|
|
24
|
+
from dasl_client.client import Client
|
|
25
|
+
|
|
26
|
+
Client.for_workspace(
|
|
27
|
+
workspace_url="{databricks_host}",
|
|
28
|
+
dasl_host="{dasl_host}",
|
|
29
|
+
).get_config()
|
|
30
|
+
# COMMAND ----------
|
|
31
|
+
dbutils.notebook.exit("SUCCESS")
|
|
32
|
+
"""
|
|
33
|
+
print(f"notebook_data={notebook_data}")
|
|
34
|
+
|
|
35
|
+
wsc = WorkspaceClient()
|
|
36
|
+
wsc.workspace.mkdirs(path=pylib_volume_path)
|
|
37
|
+
|
|
38
|
+
notebook_path = f"{pylib_volume_path}/test_secret_auth_notebook"
|
|
39
|
+
wsc.workspace.import_(
|
|
40
|
+
path=notebook_path,
|
|
41
|
+
format=dbworkspace.ImportFormat.SOURCE,
|
|
42
|
+
language=dbworkspace.Language.PYTHON,
|
|
43
|
+
content=base64.b64encode(notebook_data.encode("utf-8")).decode("utf-8"),
|
|
44
|
+
overwrite=True,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
job_id = None
|
|
48
|
+
try:
|
|
49
|
+
job_id = wsc.jobs.create(
|
|
50
|
+
name="run test_secret_auth notebook",
|
|
51
|
+
tasks=[
|
|
52
|
+
jobs.Task(
|
|
53
|
+
task_key="run_notebook",
|
|
54
|
+
notebook_task=jobs.NotebookTask(notebook_path=notebook_path),
|
|
55
|
+
),
|
|
56
|
+
],
|
|
57
|
+
).job_id
|
|
58
|
+
|
|
59
|
+
wsc.jobs.run_now(job_id=job_id)
|
|
60
|
+
|
|
61
|
+
logs = []
|
|
62
|
+
start = datetime.datetime.now()
|
|
63
|
+
complete = False
|
|
64
|
+
while not complete:
|
|
65
|
+
elapsed = datetime.datetime.now() - start
|
|
66
|
+
if elapsed > datetime.timedelta(seconds=300):
|
|
67
|
+
raise Exception(f"timed out waiting for job")
|
|
68
|
+
|
|
69
|
+
time.sleep(5)
|
|
70
|
+
|
|
71
|
+
status, logs = fetch_latest_run_status_and_logs(wsc, job_id)
|
|
72
|
+
print(f"logs={logs}")
|
|
73
|
+
|
|
74
|
+
if status == jobs.TerminationCodeCode.RUN_EXECUTION_ERROR:
|
|
75
|
+
raise Exception(f"job terminated with error")
|
|
76
|
+
|
|
77
|
+
complete = status == jobs.TerminationCodeCode.SUCCESS
|
|
78
|
+
|
|
79
|
+
print(logs)
|
|
80
|
+
assert len(logs) == 1
|
|
81
|
+
assert logs[0] == "SUCCESS"
|
|
82
|
+
finally:
|
|
83
|
+
wsc.workspace.delete(pylib_volume_path, recursive=True)
|
|
84
|
+
if job_id is not None:
|
|
85
|
+
wsc.jobs.delete(job_id=job_id)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def fetch_latest_run_status_and_logs(
|
|
89
|
+
wsc: WorkspaceClient,
|
|
90
|
+
job_id: str,
|
|
91
|
+
):
|
|
92
|
+
runs = list(wsc.jobs.list_runs(job_id=job_id, expand_tasks=True))
|
|
93
|
+
if not runs:
|
|
94
|
+
return "No runs found", None
|
|
95
|
+
|
|
96
|
+
# Find the latest run based on the start time
|
|
97
|
+
latest_run = max(runs, key=lambda r: r.start_time)
|
|
98
|
+
if latest_run.status.termination_details is None:
|
|
99
|
+
return "No runs found", None
|
|
100
|
+
status = latest_run.status.termination_details.code
|
|
101
|
+
logs = []
|
|
102
|
+
for task in latest_run.tasks:
|
|
103
|
+
output = wsc.jobs.get_run_output(task.run_id)
|
|
104
|
+
if output.error is not None:
|
|
105
|
+
logs.append(output.error)
|
|
106
|
+
elif output.logs is not None:
|
|
107
|
+
logs.append(output.logs)
|
|
108
|
+
elif output.notebook_output is not None:
|
|
109
|
+
logs.append(output.notebook_output.result)
|
|
110
|
+
elif output.run_job_output is not None:
|
|
111
|
+
raise Exception("Nested jobs are not supported")
|
|
112
|
+
elif output.sql_output is not None:
|
|
113
|
+
raise Exception("SQL jobs are unsupported")
|
|
114
|
+
else:
|
|
115
|
+
logs.append("")
|
|
116
|
+
return status, logs
|
|
@@ -1,9 +1,6 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
3
1
|
from dasl_client import *
|
|
4
2
|
|
|
5
|
-
|
|
6
|
-
workspace = databricks_host.split("//")[1]
|
|
3
|
+
from .constants import *
|
|
7
4
|
|
|
8
5
|
|
|
9
6
|
def test_workspace_config_marshal_unmarshal():
|
|
@@ -701,13 +698,15 @@ def test_transform_request_marshal_unmarshal():
|
|
|
701
698
|
],
|
|
702
699
|
data=[{"col1": "1", "col2": "a"}, {"col1": "2", "col2": "b"}],
|
|
703
700
|
),
|
|
704
|
-
autoloader_input=
|
|
701
|
+
autoloader_input=TransformRequest.Autoloader(
|
|
705
702
|
format="csv",
|
|
706
703
|
location="s3://bucket/data",
|
|
707
704
|
schema_file="schema.json",
|
|
708
|
-
cloud_files=
|
|
705
|
+
cloud_files=TransformRequest.Autoloader.CloudFiles(
|
|
709
706
|
schema_hints_file="hints_file.csv", schema_hints="hint1, hint2"
|
|
710
707
|
),
|
|
708
|
+
row_count=1,
|
|
709
|
+
row_offset=5,
|
|
711
710
|
),
|
|
712
711
|
use_preset="preset_value",
|
|
713
712
|
transforms=[
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|