dasl-client 1.0.14__py3-none-any.whl → 1.0.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dasl-client might be problematic. Click here for more details.
- dasl_client/client.py +30 -6
- dasl_client/preset_development/errors.py +4 -1
- dasl_client/preset_development/preview_engine.py +23 -7
- dasl_client/preset_development/preview_parameters.py +31 -6
- dasl_client/preset_development/stage.py +87 -20
- dasl_client/types/dbui.py +15 -9
- {dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/METADATA +2 -3
- {dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/RECORD +18 -11
- {dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/top_level.txt +1 -0
- test/__init__.py +0 -0
- test/conftest.py +18 -0
- test/constants.py +10 -0
- test/test_api_changes.py +125 -0
- test/test_api_surface.py +300 -0
- test/test_databricks_secret_auth.py +116 -0
- test/test_marshaling.py +912 -0
- {dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/LICENSE +0 -0
- {dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/WHEEL +0 -0
dasl_client/client.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
from copy import deepcopy
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
from time import sleep
|
|
2
4
|
from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar
|
|
3
5
|
from pydantic import Field
|
|
4
6
|
from pyspark.sql import DataFrame
|
|
@@ -608,6 +610,7 @@ class Client:
|
|
|
608
610
|
self,
|
|
609
611
|
warehouse: str,
|
|
610
612
|
request: TransformRequest,
|
|
613
|
+
timeout: timedelta = timedelta(minutes=5),
|
|
611
614
|
) -> TransformResponse:
|
|
612
615
|
"""
|
|
613
616
|
Run a sequence of ADHOC transforms against a SQL warehouse to
|
|
@@ -618,16 +621,29 @@ class Client:
|
|
|
618
621
|
:return: a TransformResponse object containing the results
|
|
619
622
|
after running the transforms.
|
|
620
623
|
:raises: NotFoundError if the rule does not exist
|
|
624
|
+
:raises: Exception for a server-side error or timeout
|
|
621
625
|
"""
|
|
622
626
|
with error_handler():
|
|
623
|
-
|
|
624
|
-
self.
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
request.to_api_obj(),
|
|
628
|
-
)
|
|
627
|
+
status = self._dbui_client().dbui_v1_transform(
|
|
628
|
+
self._workspace(),
|
|
629
|
+
warehouse,
|
|
630
|
+
request.to_api_obj(),
|
|
629
631
|
)
|
|
630
632
|
|
|
633
|
+
begin = datetime.now()
|
|
634
|
+
while datetime.now() - begin < timeout:
|
|
635
|
+
sleep(5)
|
|
636
|
+
status = self._dbui_client().dbui_v1_transform_status(
|
|
637
|
+
self._workspace(), status.id
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
if status.status == "failure":
|
|
641
|
+
raise Exception(f"adhoc transform failed with {status.error}")
|
|
642
|
+
elif status.status == "success":
|
|
643
|
+
return TransformResponse.from_api_obj(status.result)
|
|
644
|
+
|
|
645
|
+
raise Exception("timed out waiting for adhoc transform result")
|
|
646
|
+
|
|
631
647
|
def get_observable_events(
|
|
632
648
|
self,
|
|
633
649
|
warehouse: str,
|
|
@@ -687,3 +703,11 @@ class Client:
|
|
|
687
703
|
self._workspace(), name
|
|
688
704
|
)
|
|
689
705
|
)
|
|
706
|
+
|
|
707
|
+
def purge_preset_cache(self)-> None:
|
|
708
|
+
"""
|
|
709
|
+
Purge the datasource cache presets. This will cause the DASL workspace
|
|
710
|
+
to fetch presets from provided sources.
|
|
711
|
+
"""
|
|
712
|
+
with error_handler():
|
|
713
|
+
self._content_client().content_v1_preset_purge_cache(self._workspace())
|
|
@@ -149,7 +149,10 @@ class AssertionFailedError(PresetError):
|
|
|
149
149
|
def __init__(self, expr: str, assertion_message: str, df: DataFrame):
|
|
150
150
|
# Get the Databricks built-in functions out the namespace.
|
|
151
151
|
ipython = get_ipython()
|
|
152
|
-
|
|
152
|
+
if ipython is not None:
|
|
153
|
+
display = ipython.user_ns["display"]
|
|
154
|
+
else:
|
|
155
|
+
display = lambda x: x.show()
|
|
153
156
|
|
|
154
157
|
self.expr = expr
|
|
155
158
|
self.assertion_message = assertion_message
|
|
@@ -54,10 +54,13 @@ class PreviewEngine:
|
|
|
54
54
|
self._preset.get("silver", None), self._pretransform_name
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
+
self._bronze = None
|
|
57
58
|
self._pre = None
|
|
58
59
|
self._silver = []
|
|
59
60
|
self._gold = []
|
|
60
|
-
self._result_df_map
|
|
61
|
+
self._result_df_map: Tuple[
|
|
62
|
+
DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]
|
|
63
|
+
] = (None, {}, {})
|
|
61
64
|
|
|
62
65
|
def _validate_pretransform_name(
|
|
63
66
|
self, silver: Dict[str, str], pretransform_name: str
|
|
@@ -181,8 +184,12 @@ class PreviewEngine:
|
|
|
181
184
|
|
|
182
185
|
# Get the Databricks built-in functions out the namespace.
|
|
183
186
|
ipython = get_ipython()
|
|
184
|
-
|
|
185
|
-
|
|
187
|
+
if ipython is not None:
|
|
188
|
+
displayHTML = ipython.user_ns["displayHTML"]
|
|
189
|
+
display = ipython.user_ns["display"]
|
|
190
|
+
else:
|
|
191
|
+
displayHTML = lambda x: print(x)
|
|
192
|
+
display = lambda x: x.show()
|
|
186
193
|
|
|
187
194
|
def d(txt, lvl) -> None:
|
|
188
195
|
displayHTML(
|
|
@@ -245,7 +252,7 @@ class PreviewEngine:
|
|
|
245
252
|
raise UnknownGoldTableError(name, gold_table_schema)
|
|
246
253
|
|
|
247
254
|
# Performs the type check.
|
|
248
|
-
delta_df = self._spark.table(f"{gold_table_schema}
|
|
255
|
+
delta_df = self._spark.table(f"`{gold_table_schema}`.`{name}`").limit(0)
|
|
249
256
|
unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
|
|
250
257
|
|
|
251
258
|
# Now we check no new columns.
|
|
@@ -286,7 +293,7 @@ class PreviewEngine:
|
|
|
286
293
|
d("Resultant gold table preview", 3)
|
|
287
294
|
display(unioned_df)
|
|
288
295
|
|
|
289
|
-
def evaluate(self, gold_table_schema: str) -> None:
|
|
296
|
+
def evaluate(self, gold_table_schema: str, display: bool = True) -> None:
|
|
290
297
|
"""
|
|
291
298
|
Evaluates the loaded preset YAML using the input datasource configuration to load
|
|
292
299
|
records. Finally, checks that the output from the Gold stages is compatible with
|
|
@@ -303,7 +310,9 @@ class PreviewEngine:
|
|
|
303
310
|
):
|
|
304
311
|
if not any(
|
|
305
312
|
row.databaseName == schema_name
|
|
306
|
-
for row in self._spark.sql(
|
|
313
|
+
for row in self._spark.sql(
|
|
314
|
+
f"SHOW SCHEMAS IN `{catalog_name}`"
|
|
315
|
+
).collect()
|
|
307
316
|
):
|
|
308
317
|
raise InvalidGoldTableSchemaError(
|
|
309
318
|
gold_table_schema,
|
|
@@ -340,5 +349,12 @@ class PreviewEngine:
|
|
|
340
349
|
self._compile_stages()
|
|
341
350
|
|
|
342
351
|
with self._ds_params as df:
|
|
352
|
+
self._bronze = df
|
|
343
353
|
self._result_df_map = self._run(df)
|
|
344
|
-
|
|
354
|
+
if display:
|
|
355
|
+
self._render_output(df, self._result_df_map, gold_table_schema)
|
|
356
|
+
|
|
357
|
+
def results(
|
|
358
|
+
self,
|
|
359
|
+
) -> Tuple[DataFrame, DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]]:
|
|
360
|
+
return self._bronze, *self._result_df_map
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
1
3
|
from pyspark.sql import DataFrame, SparkSession
|
|
2
4
|
from pyspark.sql.types import *
|
|
3
5
|
from pyspark.sql.dataframe import DataFrame
|
|
@@ -115,6 +117,7 @@ class PreviewParameters:
|
|
|
115
117
|
self._table = None
|
|
116
118
|
|
|
117
119
|
self._pretransform_name = None
|
|
120
|
+
self._bronze_pre_transform: Optional[List[str]] = None
|
|
118
121
|
|
|
119
122
|
self._df = None
|
|
120
123
|
|
|
@@ -166,10 +169,13 @@ class PreviewParameters:
|
|
|
166
169
|
self._record_limit
|
|
167
170
|
)
|
|
168
171
|
|
|
172
|
+
if self._bronze_pre_transform is not None:
|
|
173
|
+
stream_df = stream_df.selectExpr(*self._bronze_pre_transform)
|
|
174
|
+
|
|
169
175
|
query = (
|
|
170
176
|
stream_df.writeStream.format("memory")
|
|
171
177
|
.queryName("batch_data")
|
|
172
|
-
.trigger(
|
|
178
|
+
.trigger(availableNow=True)
|
|
173
179
|
.start()
|
|
174
180
|
)
|
|
175
181
|
|
|
@@ -193,12 +199,17 @@ class PreviewParameters:
|
|
|
193
199
|
|
|
194
200
|
# Get the Databricks built-in functions out the namespace.
|
|
195
201
|
ipython = get_ipython()
|
|
196
|
-
|
|
202
|
+
if ipython is not None:
|
|
203
|
+
dbutils = ipython.user_ns["dbutils"]
|
|
197
204
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
205
|
+
dbutils.fs.rm(
|
|
206
|
+
f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
|
|
207
|
+
recurse=True,
|
|
208
|
+
)
|
|
209
|
+
else:
|
|
210
|
+
print(
|
|
211
|
+
f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}"
|
|
212
|
+
)
|
|
202
213
|
|
|
203
214
|
def from_input(self):
|
|
204
215
|
"""
|
|
@@ -286,6 +297,10 @@ class PreviewParameters:
|
|
|
286
297
|
Returns:
|
|
287
298
|
PreviewParameters: The current instance with updated configuration.
|
|
288
299
|
"""
|
|
300
|
+
if file_format.lower() == "jsonl":
|
|
301
|
+
self._autoloader_format = "json"
|
|
302
|
+
self.set_autoloader_multiline(False)
|
|
303
|
+
return self
|
|
289
304
|
self._autoloader_format = file_format
|
|
290
305
|
return self
|
|
291
306
|
|
|
@@ -350,6 +365,16 @@ class PreviewParameters:
|
|
|
350
365
|
self._pretransform_name = pretransform_name
|
|
351
366
|
return self
|
|
352
367
|
|
|
368
|
+
def set_bronze_pre_transform(self, expr: List[str]):
|
|
369
|
+
"""
|
|
370
|
+
Sets a pre-transform expression that will run before data is written to bronze
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
PreviewParameters: The current instance with updated configuration.
|
|
374
|
+
"""
|
|
375
|
+
self._bronze_pre_transform = expr
|
|
376
|
+
return self
|
|
377
|
+
|
|
353
378
|
def set_date_range(self, column: str, start_time: str, end_time: str):
|
|
354
379
|
"""
|
|
355
380
|
Set the TIMESTAMP column and date range to use as the input data filter to
|
|
@@ -150,17 +150,19 @@ class Stage:
|
|
|
150
150
|
if target_col not in existing_columns:
|
|
151
151
|
raise ReferencedColumnMissingError("jsonExtract", target_col)
|
|
152
152
|
schema = self._spark.sql(
|
|
153
|
-
f"SELECT schema_of_json_agg({target_col}) AS sc FROM {{df}}",
|
|
153
|
+
f"SELECT schema_of_json_agg({self.auto_backtick(target_col)}) AS sc FROM {{df}}",
|
|
154
|
+
df=df,
|
|
154
155
|
).collect()[0][0]
|
|
155
156
|
extract_df = self._spark.createDataFrame(data=[], schema=schema)
|
|
156
157
|
columns = extract_df.columns
|
|
157
158
|
columns = [
|
|
158
|
-
f"extract.{col} AS {col}"
|
|
159
|
+
self.auto_backtick(f"extract.{col}") + f" AS {self.auto_backtick(col)}"
|
|
159
160
|
for col in columns
|
|
160
161
|
if col not in omit_fields and col not in existing_columns
|
|
161
162
|
]
|
|
162
163
|
columns += [
|
|
163
|
-
f"extract.{col}
|
|
164
|
+
self.auto_backtick(f"extract.{col}")
|
|
165
|
+
+ f" AS {self.auto_backtick(duplicate_prefix + col)}"
|
|
164
166
|
for col in columns
|
|
165
167
|
if col not in omit_fields and col in existing_columns
|
|
166
168
|
]
|
|
@@ -176,7 +178,10 @@ class Stage:
|
|
|
176
178
|
A DataFrame with the resultant operation's records.
|
|
177
179
|
"""
|
|
178
180
|
return (
|
|
179
|
-
df.selectExpr(
|
|
181
|
+
df.selectExpr(
|
|
182
|
+
"*",
|
|
183
|
+
f"from_json({self.auto_backtick(target_col)}, '{schema}') AS extract",
|
|
184
|
+
)
|
|
180
185
|
.selectExpr("*", *columns)
|
|
181
186
|
.drop("extract")
|
|
182
187
|
)
|
|
@@ -198,7 +203,10 @@ class Stage:
|
|
|
198
203
|
"""
|
|
199
204
|
extract_df = self._spark.createDataFrame(data=[], schema=schema)
|
|
200
205
|
schema = extract_df.drop(omit_fields).schema.simpleString()
|
|
201
|
-
return df.selectExpr(
|
|
206
|
+
return df.selectExpr(
|
|
207
|
+
"*",
|
|
208
|
+
f"from_json({self.auto_backtick(target_col)}, '{schema}') AS {self.auto_backtick(name)}",
|
|
209
|
+
)
|
|
202
210
|
|
|
203
211
|
def preserved_columns(
|
|
204
212
|
self, df: DataFrame
|
|
@@ -224,7 +232,7 @@ class Stage:
|
|
|
224
232
|
duplicate_prefix = self._duplicate_prefix()
|
|
225
233
|
column_names = self._column_names()
|
|
226
234
|
duplicate_renames = [
|
|
227
|
-
f"{col} AS {duplicate_prefix
|
|
235
|
+
f"{self.auto_backtick(col)} AS {self.auto_backtick(duplicate_prefix + col)}"
|
|
228
236
|
for col in preserved_columns
|
|
229
237
|
if col in column_names
|
|
230
238
|
]
|
|
@@ -296,14 +304,46 @@ class Stage:
|
|
|
296
304
|
"""
|
|
297
305
|
if field.get("from", None):
|
|
298
306
|
# check that the from column exists in the df?
|
|
299
|
-
return f"{field['from']} AS {name}"
|
|
307
|
+
return f"{self.auto_backtick(field['from'])} AS {self.auto_backtick(name)}"
|
|
300
308
|
elif field.get("literal", None):
|
|
301
|
-
return f"'{field['literal']}' AS {name}"
|
|
302
|
-
elif field.get("expr", None):
|
|
303
|
-
return f"{field['expr']} AS {name}"
|
|
309
|
+
return f"'{field['literal']}' AS {self.auto_backtick(name)}"
|
|
310
|
+
elif field.get("expr", None) is not None:
|
|
311
|
+
return f"{field['expr']} AS {self.auto_backtick(name)}"
|
|
304
312
|
else:
|
|
305
313
|
return ""
|
|
306
314
|
|
|
315
|
+
def is_backtick_escaped(self, name: str) -> bool:
|
|
316
|
+
"""
|
|
317
|
+
check if a given (column) name is backtick escaped or not
|
|
318
|
+
:param name: column name
|
|
319
|
+
:return: bool
|
|
320
|
+
"""
|
|
321
|
+
return name.startswith("`") and name.endswith("`")
|
|
322
|
+
|
|
323
|
+
def auto_backtick(self, name: str) -> str:
|
|
324
|
+
"""
|
|
325
|
+
auto-backtick given name in case it isn't already backtick escaped.
|
|
326
|
+
if the name contains dots it will get split and each component backticked individually.
|
|
327
|
+
Returns the name wrapped in backticks or the passed name if it already had backticks.
|
|
328
|
+
:param name: column name
|
|
329
|
+
:return: str
|
|
330
|
+
"""
|
|
331
|
+
if self.is_backtick_escaped(name):
|
|
332
|
+
return name
|
|
333
|
+
parts = name.split(".")
|
|
334
|
+
return ".".join(list(map(lambda s: f"`{s}`", parts)))
|
|
335
|
+
|
|
336
|
+
def force_apply_backticks(self, name: str) -> str:
|
|
337
|
+
"""
|
|
338
|
+
forces application of backticks to the given (column) name as a single unit
|
|
339
|
+
if it already has backticks this is a noop
|
|
340
|
+
:param name: column name
|
|
341
|
+
:return: str
|
|
342
|
+
"""
|
|
343
|
+
if self.is_backtick_escaped(name):
|
|
344
|
+
return name
|
|
345
|
+
return f"`{name}`"
|
|
346
|
+
|
|
307
347
|
def process_node(self, name: str, node: Node) -> str:
|
|
308
348
|
"""
|
|
309
349
|
Processes a single node in a tree of nodes.
|
|
@@ -319,7 +359,7 @@ class Stage:
|
|
|
319
359
|
child_expr = self.process_node(child_name, child_node)
|
|
320
360
|
fields_list.append(f"{child_expr}")
|
|
321
361
|
joined_fields = ",\n".join(fields_list)
|
|
322
|
-
return f"struct(\n{joined_fields}\n) AS {name}"
|
|
362
|
+
return f"struct(\n{joined_fields}\n) AS {self.auto_backtick(name)}"
|
|
323
363
|
else:
|
|
324
364
|
return ""
|
|
325
365
|
|
|
@@ -341,12 +381,22 @@ class Stage:
|
|
|
341
381
|
"""
|
|
342
382
|
Renders a list of field specifications containing both simple and
|
|
343
383
|
STRUCT references into valid, STRUCT cognicient, SELECT expressions.
|
|
384
|
+
if a nested field is wrapped in backticks it will be treated as a simple field
|
|
385
|
+
for example field of name `col.with.dots` will NOT be treated as nested field.
|
|
344
386
|
|
|
345
387
|
Returns:
|
|
346
388
|
The SQL expression.
|
|
347
389
|
"""
|
|
348
|
-
simple_fields = [
|
|
349
|
-
|
|
390
|
+
simple_fields = [
|
|
391
|
+
f
|
|
392
|
+
for f in fields
|
|
393
|
+
if "." not in f["name"] or self.is_backtick_escaped(f["name"])
|
|
394
|
+
]
|
|
395
|
+
nested_fields = [
|
|
396
|
+
f
|
|
397
|
+
for f in fields
|
|
398
|
+
if "." in f["name"] and not self.is_backtick_escaped(f["name"])
|
|
399
|
+
]
|
|
350
400
|
|
|
351
401
|
result_parts = []
|
|
352
402
|
for field in simple_fields:
|
|
@@ -358,7 +408,7 @@ class Stage:
|
|
|
358
408
|
nested_str = self.parse_to_string(tree)
|
|
359
409
|
result_parts.append(nested_str)
|
|
360
410
|
|
|
361
|
-
return [p for p in result_parts if p]
|
|
411
|
+
return [p for p in result_parts if p is not None and len(p) > 0]
|
|
362
412
|
|
|
363
413
|
def select_expr(self, df: DataFrame) -> str:
|
|
364
414
|
"""
|
|
@@ -379,8 +429,12 @@ class Stage:
|
|
|
379
429
|
if should_preserve:
|
|
380
430
|
if embed_col := preserve.get("embedColumn", None):
|
|
381
431
|
preserved_columns = self.preserved_columns_embed_column(df)
|
|
432
|
+
# preserved_columns is obtained from df.columns
|
|
433
|
+
# applying backticks to all of them is OK here
|
|
434
|
+
# since they will never use "obj.key" to reference nested fields of structs
|
|
435
|
+
# so we just go ahead and apply backticks to all across the board
|
|
382
436
|
select_fields += [
|
|
383
|
-
f"struct({', '.join(preserved_columns)}) AS {embed_col}"
|
|
437
|
+
f"struct({', '.join(list(map(lambda x: self.force_apply_backticks(x), preserved_columns)))}) AS {self.auto_backtick(embed_col)}"
|
|
384
438
|
]
|
|
385
439
|
else:
|
|
386
440
|
(
|
|
@@ -388,8 +442,13 @@ class Stage:
|
|
|
388
442
|
duplicate_renames,
|
|
389
443
|
column_names,
|
|
390
444
|
) = self.preserved_columns(df)
|
|
391
|
-
|
|
392
|
-
select_fields +=
|
|
445
|
+
# see note above: same here - apply backticks to all columns across the board
|
|
446
|
+
select_fields += list(
|
|
447
|
+
map(lambda x: self.force_apply_backticks(x), preserved_columns)
|
|
448
|
+
)
|
|
449
|
+
select_fields += list(
|
|
450
|
+
map(lambda x: self.force_apply_backticks(x), duplicate_renames)
|
|
451
|
+
)
|
|
393
452
|
|
|
394
453
|
return ["*"] + select_fields if self._stage == "temp_fields" else select_fields
|
|
395
454
|
|
|
@@ -475,7 +534,9 @@ class Stage:
|
|
|
475
534
|
df = (
|
|
476
535
|
df.alias("tmp")
|
|
477
536
|
.join(df_joined, on=[df[lhs] == df_joined[rhs]], how="left")
|
|
478
|
-
.selectExpr(
|
|
537
|
+
.selectExpr(
|
|
538
|
+
"tmp.*", f"{select} AS {self.auto_backtick(field.get('name'))}"
|
|
539
|
+
)
|
|
479
540
|
)
|
|
480
541
|
elif csv := join.get("withCSV", None):
|
|
481
542
|
if path := csv.get("path", None):
|
|
@@ -485,7 +546,10 @@ class Stage:
|
|
|
485
546
|
df = (
|
|
486
547
|
df.alias("tmp")
|
|
487
548
|
.join(df_joined, on=[df[lhs] == df_joined[rhs]], how="left")
|
|
488
|
-
.selectExpr(
|
|
549
|
+
.selectExpr(
|
|
550
|
+
"tmp.*",
|
|
551
|
+
f"{select} AS {self.auto_backtick(field.get('name'))}",
|
|
552
|
+
)
|
|
489
553
|
)
|
|
490
554
|
else:
|
|
491
555
|
raise MissingJoinFieldError("withTable or withCSV (please supply 1)")
|
|
@@ -500,7 +564,10 @@ class Stage:
|
|
|
500
564
|
"""
|
|
501
565
|
for field in self._fields:
|
|
502
566
|
if field.get("alias", None):
|
|
503
|
-
df = df.selectExpr(
|
|
567
|
+
df = df.selectExpr(
|
|
568
|
+
"*",
|
|
569
|
+
f"{self.auto_backtick(field.get('alias'))} AS {self.auto_backtick(field.get('name'))}",
|
|
570
|
+
)
|
|
504
571
|
return df
|
|
505
572
|
|
|
506
573
|
def run_assertions(self, df: DataFrame) -> DataFrame:
|
dasl_client/types/dbui.py
CHANGED
|
@@ -11,7 +11,8 @@ from dasl_api import (
|
|
|
11
11
|
DbuiV1TransformRequestTransformsInner,
|
|
12
12
|
DbuiV1TransformRequestTransformsInnerPresetOverrides,
|
|
13
13
|
DbuiV1TransformResponse,
|
|
14
|
-
DbuiV1TransformResponseStagesInner,
|
|
14
|
+
DbuiV1TransformResponseStagesInner,
|
|
15
|
+
ContentV1DatasourcePresetAutoloaderCloudFiles,
|
|
15
16
|
DbuiV1TransformRequestAutoloaderInput,
|
|
16
17
|
)
|
|
17
18
|
|
|
@@ -202,7 +203,7 @@ class TransformRequest(BaseModel):
|
|
|
202
203
|
|
|
203
204
|
@staticmethod
|
|
204
205
|
def from_api_obj(
|
|
205
|
-
|
|
206
|
+
obj: Optional[ContentV1DatasourcePresetAutoloaderCloudFiles],
|
|
206
207
|
) -> "TransformRequest.Autoloader.CloudFiles":
|
|
207
208
|
if obj is None:
|
|
208
209
|
return None
|
|
@@ -220,14 +221,14 @@ class TransformRequest(BaseModel):
|
|
|
220
221
|
format: Optional[str] = None
|
|
221
222
|
location: str
|
|
222
223
|
schema_file: Optional[str] = None
|
|
223
|
-
|
|
224
|
+
var_schema: Optional[str] = None
|
|
224
225
|
cloud_files: Optional["TransformRequest.Autoloader.CloudFiles"] = None
|
|
225
226
|
row_count: Optional[int] = None
|
|
226
227
|
row_offset: Optional[int] = None
|
|
227
228
|
|
|
228
229
|
@staticmethod
|
|
229
230
|
def from_api_obj(
|
|
230
|
-
|
|
231
|
+
obj: Optional[DbuiV1TransformRequestAutoloaderInput],
|
|
231
232
|
) -> "Optional[TransformRequest.Autoloader]":
|
|
232
233
|
if obj is None:
|
|
233
234
|
return None
|
|
@@ -235,22 +236,25 @@ class TransformRequest(BaseModel):
|
|
|
235
236
|
format=obj.format,
|
|
236
237
|
location=obj.location,
|
|
237
238
|
schema_file=obj.schema_file,
|
|
238
|
-
|
|
239
|
+
var_schema=obj.var_schema,
|
|
240
|
+
cloud_files=TransformRequest.Autoloader.CloudFiles.from_api_obj(
|
|
241
|
+
obj.cloud_files
|
|
242
|
+
),
|
|
239
243
|
row_count=obj.row_count,
|
|
240
244
|
row_offset=obj.row_offset,
|
|
241
245
|
)
|
|
246
|
+
|
|
242
247
|
def to_api_obj(self) -> DbuiV1TransformRequestAutoloaderInput:
|
|
243
248
|
return DbuiV1TransformRequestAutoloaderInput(
|
|
244
249
|
format=self.format,
|
|
245
250
|
location=self.location,
|
|
246
251
|
schemaFile=self.schema_file,
|
|
247
|
-
schema=self.
|
|
252
|
+
schema=self.var_schema,
|
|
248
253
|
cloudFiles=Helpers.maybe(lambda o: o.to_api_obj(), self.cloud_files),
|
|
249
254
|
rowCount=self.row_count,
|
|
250
|
-
rowOffset=self.row_offset
|
|
255
|
+
rowOffset=self.row_offset,
|
|
251
256
|
)
|
|
252
257
|
|
|
253
|
-
|
|
254
258
|
class Transform(BaseModel):
|
|
255
259
|
"""
|
|
256
260
|
A transform configuration to apply to the data.
|
|
@@ -360,7 +364,9 @@ class TransformRequest(BaseModel):
|
|
|
360
364
|
def from_api_obj(obj: DbuiV1TransformRequest) -> "TransformRequest":
|
|
361
365
|
return TransformRequest(
|
|
362
366
|
input=TransformRequest.Input.from_api_obj(obj.input),
|
|
363
|
-
autoloader_input=TransformRequest.Autoloader.from_api_obj(
|
|
367
|
+
autoloader_input=TransformRequest.Autoloader.from_api_obj(
|
|
368
|
+
obj.autoloader_input
|
|
369
|
+
),
|
|
364
370
|
use_preset=obj.use_preset,
|
|
365
371
|
transforms=[
|
|
366
372
|
TransformRequest.Transform.from_api_obj(item) for item in obj.transforms
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dasl_client
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.17
|
|
4
4
|
Summary: The DASL client library used for interacting with the DASL workspace
|
|
5
5
|
Home-page: https://github.com/antimatter/asl
|
|
6
6
|
Author: Antimatter Team
|
|
@@ -11,8 +11,7 @@ License-File: LICENSE
|
|
|
11
11
|
Requires-Dist: dasl-api ==0.1.17
|
|
12
12
|
Requires-Dist: databricks-sdk >=0.41.0
|
|
13
13
|
Requires-Dist: pydantic >=2
|
|
14
|
-
Requires-Dist:
|
|
15
|
-
Requires-Dist: typing-extensions ==4.10.0
|
|
14
|
+
Requires-Dist: typing-extensions >=4.10.0
|
|
16
15
|
|
|
17
16
|
# DASL Client Library
|
|
18
17
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
dasl_client/__init__.py,sha256=E6gOgO8qg96Y38JKA-4LyNBvc2ytQPEfhdniYsCWBxA,127
|
|
2
|
-
dasl_client/client.py,sha256=
|
|
2
|
+
dasl_client/client.py,sha256=vt2qY0BkhE-hbbntfg6FmyC7c-HJkHGzni8zbSD6aXQ,27819
|
|
3
3
|
dasl_client/helpers.py,sha256=L7ycxrqyG28glRRGZgsrVBdCJzXYCW7DB0hAvupGMuA,1118
|
|
4
4
|
dasl_client/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
dasl_client/auth/auth.py,sha256=yTeijYYpfJVJ_wYyq0U6kAntg4xz5MzIR37_CpVR57k,7277
|
|
@@ -9,21 +9,28 @@ dasl_client/conn/conn.py,sha256=7o-2qeoVhzULQGW5l6OLYE5dZ60_8OU080ebV_-AW9Q,1433
|
|
|
9
9
|
dasl_client/errors/__init__.py,sha256=lpH2HGF5kCRTk6MxpPEyY9ulTvsLBFKb4NnLuFFLZZA,40
|
|
10
10
|
dasl_client/errors/errors.py,sha256=u-B8dR8zlxdNVeEdHi6UozX178jwJJ5ZJOGl9YjONRc,4008
|
|
11
11
|
dasl_client/preset_development/__init__.py,sha256=9yC4gmQfombvYLThzo0pSfT5JMolfNVWFVQIuIg_XUA,131
|
|
12
|
-
dasl_client/preset_development/errors.py,sha256=
|
|
13
|
-
dasl_client/preset_development/preview_engine.py,sha256=
|
|
14
|
-
dasl_client/preset_development/preview_parameters.py,sha256=
|
|
15
|
-
dasl_client/preset_development/stage.py,sha256=
|
|
12
|
+
dasl_client/preset_development/errors.py,sha256=jsqBFMZtl7uHi6O9bBHnOt0UQ4WM9KN9x0uYtf5c268,5482
|
|
13
|
+
dasl_client/preset_development/preview_engine.py,sha256=eMt-Uu3_ocLMuEsZAoCdQfzDI_YvM5gUfiPFfRzpyvU,14714
|
|
14
|
+
dasl_client/preset_development/preview_parameters.py,sha256=YjSJ00mEUcqF5KkJEPW6Wif8I4iaMIMxJeUSuyIS4x0,14640
|
|
15
|
+
dasl_client/preset_development/stage.py,sha256=b4AE7aOocVG1WLKJLb69efNn1qwHulophBa_i58wMog,23089
|
|
16
16
|
dasl_client/types/__init__.py,sha256=GsXC3eWuv21VTLPLPH9pzM95JByaKnKrPjJkh2rlZfQ,170
|
|
17
17
|
dasl_client/types/admin_config.py,sha256=Kmx3Kuai9_LWMeO2NpWasRUgLihYSEXGtuYVfG0FkjU,2200
|
|
18
18
|
dasl_client/types/content.py,sha256=uZAO-Vm_orvqsH2CkiwBUHgn6fWGXR90hOGKc256lcA,7442
|
|
19
19
|
dasl_client/types/datasource.py,sha256=-ABmBh5yZwHeY-PKQMnNCNa9FSzod5n1O817m8ZCL6o,52519
|
|
20
|
-
dasl_client/types/dbui.py,sha256=
|
|
20
|
+
dasl_client/types/dbui.py,sha256=VYRcxwVgoAfcuY7IlPdgfQ-R21Xmz-QWa3DdJgXIHM4,16413
|
|
21
21
|
dasl_client/types/helpers.py,sha256=gLGTvrssAKrdkQT9h80twEosld2egwhvj-zAudxWFPs,109
|
|
22
22
|
dasl_client/types/rule.py,sha256=BqhWhT8Eh95UXNytd0PxVcjqYuWQcdN1tfKjUB4Tk74,25781
|
|
23
23
|
dasl_client/types/types.py,sha256=DeUOfdYGOhUGEy7yKOfo0OYTXYRrs57yYgNLUbu7Tlc,8806
|
|
24
24
|
dasl_client/types/workspace_config.py,sha256=RThg_THS_4leITWdzBPTWdR2ytq5Uk36m6nIOUMzFCM,24878
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
+
test/conftest.py,sha256=ZfNULJxVQ609GHxw9UsreTcbQMl3gbcTP_DKT1oySwQ,440
|
|
27
|
+
test/constants.py,sha256=ed3xiemWDJVBlHDwn-iQToCbcaXD3AN-5r8HkURCqBs,438
|
|
28
|
+
test/test_api_changes.py,sha256=-SjOG5wKPD_cpTukkfNkALebph06_yxBYsqxtQVnedc,3466
|
|
29
|
+
test/test_api_surface.py,sha256=XXAfJxYddRpYV1cmmVH2dI5LS9NLzd-sp-6jOnOgBTs,10613
|
|
30
|
+
test/test_databricks_secret_auth.py,sha256=P1seBBHOLcCzJPLdRZlJZxeG62GUFKFbjsY8c7gTT_8,3613
|
|
31
|
+
test/test_marshaling.py,sha256=WrouTCgcye_ytVOBMMseeEvUGZjbwibPJmEvGSK2LL4,37928
|
|
32
|
+
dasl_client-1.0.17.dist-info/LICENSE,sha256=M35UepUPyKmFkvENlkweeaMElheQqNoM5Emh8ADO-rs,4
|
|
33
|
+
dasl_client-1.0.17.dist-info/METADATA,sha256=T7ngJt7bv-rI9M9Y9hyIfq2wTPkMbtKcK0vVWNH1Ew8,741
|
|
34
|
+
dasl_client-1.0.17.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
35
|
+
dasl_client-1.0.17.dist-info/top_level.txt,sha256=943P5S_qILHKZYxAvxPUeqOzM2yV18d5SBVKxzPw2OE,17
|
|
36
|
+
dasl_client-1.0.17.dist-info/RECORD,,
|
test/__init__.py
ADDED
|
File without changes
|
test/conftest.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from dasl_client import Client
|
|
4
|
+
|
|
5
|
+
from .constants import *
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture(scope="session")
|
|
9
|
+
def api_client():
|
|
10
|
+
client = Client.new_workspace(
|
|
11
|
+
admin_email="test@antimatter.io",
|
|
12
|
+
app_client_id=app_client_id,
|
|
13
|
+
service_principal_id=databricks_client_id,
|
|
14
|
+
service_principal_secret=databricks_client_secret,
|
|
15
|
+
workspace_url=databricks_host,
|
|
16
|
+
dasl_host=dasl_host,
|
|
17
|
+
)
|
|
18
|
+
yield client
|
test/constants.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from urllib.parse import urlparse
|
|
3
|
+
|
|
4
|
+
dasl_host = os.environ["DASL_API_URL"]
|
|
5
|
+
databricks_host = os.environ["DASL_DATABRICKS_HOST"]
|
|
6
|
+
databricks_client_id = os.environ["DASL_DATABRICKS_CLIENT_ID"]
|
|
7
|
+
databricks_client_secret = os.environ["DASL_DATABRICKS_CLIENT_SECRET"]
|
|
8
|
+
workspace = urlparse(databricks_host).hostname
|
|
9
|
+
app_client_id = "22853b93-68ba-4ae2-8e41-976417f501dd"
|
|
10
|
+
alternate_app_client_id = "335ac0d3-e0ea-4732-ba93-0277423b5029"
|