dasl-client 1.0.14__tar.gz → 1.0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

Files changed (42) hide show
  1. {dasl_client-1.0.14 → dasl_client-1.0.16}/PKG-INFO +2 -3
  2. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/client.py +22 -6
  3. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/preset_development/errors.py +4 -1
  4. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/preset_development/preview_engine.py +23 -7
  5. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/preset_development/preview_parameters.py +31 -6
  6. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/preset_development/stage.py +87 -20
  7. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/dbui.py +12 -7
  8. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client.egg-info/PKG-INFO +2 -3
  9. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client.egg-info/SOURCES.txt +4 -0
  10. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client.egg-info/requires.txt +1 -2
  11. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client.egg-info/top_level.txt +1 -0
  12. {dasl_client-1.0.14 → dasl_client-1.0.16}/pyproject.toml +2 -3
  13. dasl_client-1.0.16/test/__init__.py +0 -0
  14. dasl_client-1.0.16/test/conftest.py +18 -0
  15. dasl_client-1.0.16/test/constants.py +10 -0
  16. {dasl_client-1.0.14 → dasl_client-1.0.16}/test/test_api_surface.py +1 -24
  17. dasl_client-1.0.16/test/test_databricks_secret_auth.py +116 -0
  18. {dasl_client-1.0.14 → dasl_client-1.0.16}/test/test_marshaling.py +5 -6
  19. {dasl_client-1.0.14 → dasl_client-1.0.16}/LICENSE +0 -0
  20. {dasl_client-1.0.14 → dasl_client-1.0.16}/README.md +0 -0
  21. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/__init__.py +0 -0
  22. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/auth/__init__.py +0 -0
  23. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/auth/auth.py +0 -0
  24. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/conn/__init__.py +0 -0
  25. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/conn/client_identifier.py +0 -0
  26. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/conn/conn.py +0 -0
  27. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/errors/__init__.py +0 -0
  28. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/errors/errors.py +0 -0
  29. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/helpers.py +0 -0
  30. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/preset_development/__init__.py +0 -0
  31. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/__init__.py +0 -0
  32. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/admin_config.py +0 -0
  33. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/content.py +0 -0
  34. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/datasource.py +0 -0
  35. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/helpers.py +0 -0
  36. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/rule.py +0 -0
  37. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/types.py +0 -0
  38. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client/types/workspace_config.py +0 -0
  39. {dasl_client-1.0.14 → dasl_client-1.0.16}/dasl_client.egg-info/dependency_links.txt +0 -0
  40. {dasl_client-1.0.14 → dasl_client-1.0.16}/setup.cfg +0 -0
  41. {dasl_client-1.0.14 → dasl_client-1.0.16}/setup.py +0 -0
  42. {dasl_client-1.0.14 → dasl_client-1.0.16}/test/test_api_changes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dasl_client
3
- Version: 1.0.14
3
+ Version: 1.0.16
4
4
  Summary: The DASL client library used for interacting with the DASL workspace
5
5
  Home-page: https://github.com/antimatter/asl
6
6
  Author: Antimatter Team
@@ -11,8 +11,7 @@ License-File: LICENSE
11
11
  Requires-Dist: dasl_api==0.1.17
12
12
  Requires-Dist: databricks-sdk>=0.41.0
13
13
  Requires-Dist: pydantic>=2
14
- Requires-Dist: pyspark>=3.5.5
15
- Requires-Dist: typing_extensions==4.10.0
14
+ Requires-Dist: typing_extensions>=4.10.0
16
15
 
17
16
  # DASL Client Library
18
17
 
@@ -1,4 +1,6 @@
1
1
  from copy import deepcopy
2
+ from datetime import datetime, timedelta
3
+ from time import sleep
2
4
  from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar
3
5
  from pydantic import Field
4
6
  from pyspark.sql import DataFrame
@@ -608,6 +610,7 @@ class Client:
608
610
  self,
609
611
  warehouse: str,
610
612
  request: TransformRequest,
613
+ timeout: timedelta = timedelta(minutes=5),
611
614
  ) -> TransformResponse:
612
615
  """
613
616
  Run a sequence of ADHOC transforms against a SQL warehouse to
@@ -618,16 +621,29 @@ class Client:
618
621
  :return: a TransformResponse object containing the results
619
622
  after running the transforms.
620
623
  :raises: NotFoundError if the rule does not exist
624
+ :raises: Exception for a server-side error or timeout
621
625
  """
622
626
  with error_handler():
623
- return TransformResponse.from_api_obj(
624
- self._dbui_client().dbui_v1_transform(
625
- self._workspace(),
626
- warehouse,
627
- request.to_api_obj(),
628
- )
627
+ status = self._dbui_client().dbui_v1_transform(
628
+ self._workspace(),
629
+ warehouse,
630
+ request.to_api_obj(),
629
631
  )
630
632
 
633
+ begin = datetime.now()
634
+ while datetime.now() - begin < timeout:
635
+ sleep(5)
636
+ status = self._dbui_client().dbui_v1_transform_status(
637
+ self._workspace(), status.id
638
+ )
639
+
640
+ if status.status == "failure":
641
+ raise Exception(f"adhoc transform failed with {status.error}")
642
+ elif status.status == "success":
643
+ return TransformResponse.from_api_obj(status.result)
644
+
645
+ raise Exception("timed out waiting for adhoc transform result")
646
+
631
647
  def get_observable_events(
632
648
  self,
633
649
  warehouse: str,
@@ -149,7 +149,10 @@ class AssertionFailedError(PresetError):
149
149
  def __init__(self, expr: str, assertion_message: str, df: DataFrame):
150
150
  # Get the Databricks built-in functions out the namespace.
151
151
  ipython = get_ipython()
152
- display = ipython.user_ns["display"]
152
+ if ipython is not None:
153
+ display = ipython.user_ns["display"]
154
+ else:
155
+ display = lambda x: x.show()
153
156
 
154
157
  self.expr = expr
155
158
  self.assertion_message = assertion_message
@@ -54,10 +54,13 @@ class PreviewEngine:
54
54
  self._preset.get("silver", None), self._pretransform_name
55
55
  )
56
56
 
57
+ self._bronze = None
57
58
  self._pre = None
58
59
  self._silver = []
59
60
  self._gold = []
60
- self._result_df_map = {}
61
+ self._result_df_map: Tuple[
62
+ DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]
63
+ ] = (None, {}, {})
61
64
 
62
65
  def _validate_pretransform_name(
63
66
  self, silver: Dict[str, str], pretransform_name: str
@@ -181,8 +184,12 @@ class PreviewEngine:
181
184
 
182
185
  # Get the Databricks built-in functions out the namespace.
183
186
  ipython = get_ipython()
184
- displayHTML = ipython.user_ns["displayHTML"]
185
- display = ipython.user_ns["display"]
187
+ if ipython is not None:
188
+ displayHTML = ipython.user_ns["displayHTML"]
189
+ display = ipython.user_ns["display"]
190
+ else:
191
+ displayHTML = lambda x: print(x)
192
+ display = lambda x: x.show()
186
193
 
187
194
  def d(txt, lvl) -> None:
188
195
  displayHTML(
@@ -245,7 +252,7 @@ class PreviewEngine:
245
252
  raise UnknownGoldTableError(name, gold_table_schema)
246
253
 
247
254
  # Performs the type check.
248
- delta_df = self._spark.table(f"{gold_table_schema}.{name}").limit(0)
255
+ delta_df = self._spark.table(f"`{gold_table_schema}`.`{name}`").limit(0)
249
256
  unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
250
257
 
251
258
  # Now we check no new columns.
@@ -286,7 +293,7 @@ class PreviewEngine:
286
293
  d("Resultant gold table preview", 3)
287
294
  display(unioned_df)
288
295
 
289
- def evaluate(self, gold_table_schema: str) -> None:
296
+ def evaluate(self, gold_table_schema: str, display: bool = True) -> None:
290
297
  """
291
298
  Evaluates the loaded preset YAML using the input datasource configuration to load
292
299
  records. Finally, checks that the output from the Gold stages is compatible with
@@ -303,7 +310,9 @@ class PreviewEngine:
303
310
  ):
304
311
  if not any(
305
312
  row.databaseName == schema_name
306
- for row in self._spark.sql(f"SHOW SCHEMAS IN {catalog_name}").collect()
313
+ for row in self._spark.sql(
314
+ f"SHOW SCHEMAS IN `{catalog_name}`"
315
+ ).collect()
307
316
  ):
308
317
  raise InvalidGoldTableSchemaError(
309
318
  gold_table_schema,
@@ -340,5 +349,12 @@ class PreviewEngine:
340
349
  self._compile_stages()
341
350
 
342
351
  with self._ds_params as df:
352
+ self._bronze = df
343
353
  self._result_df_map = self._run(df)
344
- self._render_output(df, self._result_df_map, gold_table_schema)
354
+ if display:
355
+ self._render_output(df, self._result_df_map, gold_table_schema)
356
+
357
+ def results(
358
+ self,
359
+ ) -> Tuple[DataFrame, DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]]:
360
+ return self._bronze, *self._result_df_map
@@ -1,3 +1,5 @@
1
+ from typing import Optional
2
+
1
3
  from pyspark.sql import DataFrame, SparkSession
2
4
  from pyspark.sql.types import *
3
5
  from pyspark.sql.dataframe import DataFrame
@@ -115,6 +117,7 @@ class PreviewParameters:
115
117
  self._table = None
116
118
 
117
119
  self._pretransform_name = None
120
+ self._bronze_pre_transform: Optional[List[str]] = None
118
121
 
119
122
  self._df = None
120
123
 
@@ -166,10 +169,13 @@ class PreviewParameters:
166
169
  self._record_limit
167
170
  )
168
171
 
172
+ if self._bronze_pre_transform is not None:
173
+ stream_df = stream_df.selectExpr(*self._bronze_pre_transform)
174
+
169
175
  query = (
170
176
  stream_df.writeStream.format("memory")
171
177
  .queryName("batch_data")
172
- .trigger(once=True)
178
+ .trigger(availableNow=True)
173
179
  .start()
174
180
  )
175
181
 
@@ -193,12 +199,17 @@ class PreviewParameters:
193
199
 
194
200
  # Get the Databricks built-in functions out the namespace.
195
201
  ipython = get_ipython()
196
- dbutils = ipython.user_ns["dbutils"]
202
+ if ipython is not None:
203
+ dbutils = ipython.user_ns["dbutils"]
197
204
 
198
- dbutils.fs.rm(
199
- f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
200
- recurse=True,
201
- )
205
+ dbutils.fs.rm(
206
+ f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
207
+ recurse=True,
208
+ )
209
+ else:
210
+ print(
211
+ f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}"
212
+ )
202
213
 
203
214
  def from_input(self):
204
215
  """
@@ -286,6 +297,10 @@ class PreviewParameters:
286
297
  Returns:
287
298
  PreviewParameters: The current instance with updated configuration.
288
299
  """
300
+ if file_format.lower() == "jsonl":
301
+ self._autoloader_format = "json"
302
+ self.set_autoloader_multiline(False)
303
+ return self
289
304
  self._autoloader_format = file_format
290
305
  return self
291
306
 
@@ -350,6 +365,16 @@ class PreviewParameters:
350
365
  self._pretransform_name = pretransform_name
351
366
  return self
352
367
 
368
+ def set_bronze_pre_transform(self, expr: List[str]):
369
+ """
370
+ Sets a pre-transform expression that will run before data is written to bronze
371
+
372
+ Returns:
373
+ PreviewParameters: The current instance with updated configuration.
374
+ """
375
+ self._bronze_pre_transform = expr
376
+ return self
377
+
353
378
  def set_date_range(self, column: str, start_time: str, end_time: str):
354
379
  """
355
380
  Set the TIMESTAMP column and date range to use as the input data filter to
@@ -150,17 +150,19 @@ class Stage:
150
150
  if target_col not in existing_columns:
151
151
  raise ReferencedColumnMissingError("jsonExtract", target_col)
152
152
  schema = self._spark.sql(
153
- f"SELECT schema_of_json_agg({target_col}) AS sc FROM {{df}}", df=df
153
+ f"SELECT schema_of_json_agg({self.auto_backtick(target_col)}) AS sc FROM {{df}}",
154
+ df=df,
154
155
  ).collect()[0][0]
155
156
  extract_df = self._spark.createDataFrame(data=[], schema=schema)
156
157
  columns = extract_df.columns
157
158
  columns = [
158
- f"extract.{col} AS {col}"
159
+ self.auto_backtick(f"extract.{col}") + f" AS {self.auto_backtick(col)}"
159
160
  for col in columns
160
161
  if col not in omit_fields and col not in existing_columns
161
162
  ]
162
163
  columns += [
163
- f"extract.{col} AS {duplicate_prefix}{col}"
164
+ self.auto_backtick(f"extract.{col}")
165
+ + f" AS {self.auto_backtick(duplicate_prefix + col)}"
164
166
  for col in columns
165
167
  if col not in omit_fields and col in existing_columns
166
168
  ]
@@ -176,7 +178,10 @@ class Stage:
176
178
  A DataFrame with the resultant operation's records.
177
179
  """
178
180
  return (
179
- df.selectExpr("*", f"from_json({target_col}, '{schema}') AS extract")
181
+ df.selectExpr(
182
+ "*",
183
+ f"from_json({self.auto_backtick(target_col)}, '{schema}') AS extract",
184
+ )
180
185
  .selectExpr("*", *columns)
181
186
  .drop("extract")
182
187
  )
@@ -198,7 +203,10 @@ class Stage:
198
203
  """
199
204
  extract_df = self._spark.createDataFrame(data=[], schema=schema)
200
205
  schema = extract_df.drop(omit_fields).schema.simpleString()
201
- return df.selectExpr("*", f"from_json({target_col}, '{schema}') AS {name}")
206
+ return df.selectExpr(
207
+ "*",
208
+ f"from_json({self.auto_backtick(target_col)}, '{schema}') AS {self.auto_backtick(name)}",
209
+ )
202
210
 
203
211
  def preserved_columns(
204
212
  self, df: DataFrame
@@ -224,7 +232,7 @@ class Stage:
224
232
  duplicate_prefix = self._duplicate_prefix()
225
233
  column_names = self._column_names()
226
234
  duplicate_renames = [
227
- f"{col} AS {duplicate_prefix}{col}"
235
+ f"{self.auto_backtick(col)} AS {self.auto_backtick(duplicate_prefix + col)}"
228
236
  for col in preserved_columns
229
237
  if col in column_names
230
238
  ]
@@ -296,14 +304,46 @@ class Stage:
296
304
  """
297
305
  if field.get("from", None):
298
306
  # check that the from column exists in the df?
299
- return f"{field['from']} AS {name}"
307
+ return f"{self.auto_backtick(field['from'])} AS {self.auto_backtick(name)}"
300
308
  elif field.get("literal", None):
301
- return f"'{field['literal']}' AS {name}"
302
- elif field.get("expr", None):
303
- return f"{field['expr']} AS {name}"
309
+ return f"'{field['literal']}' AS {self.auto_backtick(name)}"
310
+ elif field.get("expr", None) is not None:
311
+ return f"{field['expr']} AS {self.auto_backtick(name)}"
304
312
  else:
305
313
  return ""
306
314
 
315
+ def is_backtick_escaped(self, name: str) -> bool:
316
+ """
317
+ check if a given (column) name is backtick escaped or not
318
+ :param name: column name
319
+ :return: bool
320
+ """
321
+ return name.startswith("`") and name.endswith("`")
322
+
323
+ def auto_backtick(self, name: str) -> str:
324
+ """
325
+ auto-backtick given name in case it isn't already backtick escaped.
326
+ if the name contains dots it will get split and each component backticked individually.
327
+ Returns the name wrapped in backticks or the passed name if it already had backticks.
328
+ :param name: column name
329
+ :return: str
330
+ """
331
+ if self.is_backtick_escaped(name):
332
+ return name
333
+ parts = name.split(".")
334
+ return ".".join(list(map(lambda s: f"`{s}`", parts)))
335
+
336
+ def force_apply_backticks(self, name: str) -> str:
337
+ """
338
+ forces application of backticks to the given (column) name as a single unit
339
+ if it already has backticks this is a noop
340
+ :param name: column name
341
+ :return: str
342
+ """
343
+ if self.is_backtick_escaped(name):
344
+ return name
345
+ return f"`{name}`"
346
+
307
347
  def process_node(self, name: str, node: Node) -> str:
308
348
  """
309
349
  Processes a single node in a tree of nodes.
@@ -319,7 +359,7 @@ class Stage:
319
359
  child_expr = self.process_node(child_name, child_node)
320
360
  fields_list.append(f"{child_expr}")
321
361
  joined_fields = ",\n".join(fields_list)
322
- return f"struct(\n{joined_fields}\n) AS {name}"
362
+ return f"struct(\n{joined_fields}\n) AS {self.auto_backtick(name)}"
323
363
  else:
324
364
  return ""
325
365
 
@@ -341,12 +381,22 @@ class Stage:
341
381
  """
342
382
  Renders a list of field specifications containing both simple and
343
383
  STRUCT references into valid, STRUCT cognicient, SELECT expressions.
384
+ if a nested field is wrapped in backticks it will be treated as a simple field
385
+ for example field of name `col.with.dots` will NOT be treated as nested field.
344
386
 
345
387
  Returns:
346
388
  The SQL expression.
347
389
  """
348
- simple_fields = [f for f in fields if "." not in f["name"]]
349
- nested_fields = [f for f in fields if "." in f["name"]]
390
+ simple_fields = [
391
+ f
392
+ for f in fields
393
+ if "." not in f["name"] or self.is_backtick_escaped(f["name"])
394
+ ]
395
+ nested_fields = [
396
+ f
397
+ for f in fields
398
+ if "." in f["name"] and not self.is_backtick_escaped(f["name"])
399
+ ]
350
400
 
351
401
  result_parts = []
352
402
  for field in simple_fields:
@@ -358,7 +408,7 @@ class Stage:
358
408
  nested_str = self.parse_to_string(tree)
359
409
  result_parts.append(nested_str)
360
410
 
361
- return [p for p in result_parts if p]
411
+ return [p for p in result_parts if p is not None and len(p) > 0]
362
412
 
363
413
  def select_expr(self, df: DataFrame) -> str:
364
414
  """
@@ -379,8 +429,12 @@ class Stage:
379
429
  if should_preserve:
380
430
  if embed_col := preserve.get("embedColumn", None):
381
431
  preserved_columns = self.preserved_columns_embed_column(df)
432
+ # preserved_columns is obtained from df.columns
433
+ # applying backticks to all of them is OK here
434
+ # since they will never use "obj.key" to reference nested fields of structs
435
+ # so we just go ahead and apply backticks to all across the board
382
436
  select_fields += [
383
- f"struct({', '.join(preserved_columns)}) AS {embed_col}"
437
+ f"struct({', '.join(list(map(lambda x: self.force_apply_backticks(x), preserved_columns)))}) AS {self.auto_backtick(embed_col)}"
384
438
  ]
385
439
  else:
386
440
  (
@@ -388,8 +442,13 @@ class Stage:
388
442
  duplicate_renames,
389
443
  column_names,
390
444
  ) = self.preserved_columns(df)
391
- select_fields += preserved_columns
392
- select_fields += duplicate_renames
445
+ # see note above: same here - apply backticks to all columns across the board
446
+ select_fields += list(
447
+ map(lambda x: self.force_apply_backticks(x), preserved_columns)
448
+ )
449
+ select_fields += list(
450
+ map(lambda x: self.force_apply_backticks(x), duplicate_renames)
451
+ )
393
452
 
394
453
  return ["*"] + select_fields if self._stage == "temp_fields" else select_fields
395
454
 
@@ -475,7 +534,9 @@ class Stage:
475
534
  df = (
476
535
  df.alias("tmp")
477
536
  .join(df_joined, on=[df[lhs] == df_joined[rhs]], how="left")
478
- .selectExpr("tmp.*", f"{select} AS {field.get('name')}")
537
+ .selectExpr(
538
+ "tmp.*", f"{select} AS {self.auto_backtick(field.get('name'))}"
539
+ )
479
540
  )
480
541
  elif csv := join.get("withCSV", None):
481
542
  if path := csv.get("path", None):
@@ -485,7 +546,10 @@ class Stage:
485
546
  df = (
486
547
  df.alias("tmp")
487
548
  .join(df_joined, on=[df[lhs] == df_joined[rhs]], how="left")
488
- .selectExpr("tmp.*", f"{select} AS {field.get('name')}")
549
+ .selectExpr(
550
+ "tmp.*",
551
+ f"{select} AS {self.auto_backtick(field.get('name'))}",
552
+ )
489
553
  )
490
554
  else:
491
555
  raise MissingJoinFieldError("withTable or withCSV (please supply 1)")
@@ -500,7 +564,10 @@ class Stage:
500
564
  """
501
565
  for field in self._fields:
502
566
  if field.get("alias", None):
503
- df = df.selectExpr("*", f"{field.get('alias')} AS {field.get('name')}")
567
+ df = df.selectExpr(
568
+ "*",
569
+ f"{self.auto_backtick(field.get('alias'))} AS {self.auto_backtick(field.get('name'))}",
570
+ )
504
571
  return df
505
572
 
506
573
  def run_assertions(self, df: DataFrame) -> DataFrame:
@@ -11,7 +11,8 @@ from dasl_api import (
11
11
  DbuiV1TransformRequestTransformsInner,
12
12
  DbuiV1TransformRequestTransformsInnerPresetOverrides,
13
13
  DbuiV1TransformResponse,
14
- DbuiV1TransformResponseStagesInner, ContentV1DatasourcePresetAutoloaderCloudFiles,
14
+ DbuiV1TransformResponseStagesInner,
15
+ ContentV1DatasourcePresetAutoloaderCloudFiles,
15
16
  DbuiV1TransformRequestAutoloaderInput,
16
17
  )
17
18
 
@@ -202,7 +203,7 @@ class TransformRequest(BaseModel):
202
203
 
203
204
  @staticmethod
204
205
  def from_api_obj(
205
- obj: Optional[ContentV1DatasourcePresetAutoloaderCloudFiles],
206
+ obj: Optional[ContentV1DatasourcePresetAutoloaderCloudFiles],
206
207
  ) -> "TransformRequest.Autoloader.CloudFiles":
207
208
  if obj is None:
208
209
  return None
@@ -227,7 +228,7 @@ class TransformRequest(BaseModel):
227
228
 
228
229
  @staticmethod
229
230
  def from_api_obj(
230
- obj: Optional[DbuiV1TransformRequestAutoloaderInput],
231
+ obj: Optional[DbuiV1TransformRequestAutoloaderInput],
231
232
  ) -> "Optional[TransformRequest.Autoloader]":
232
233
  if obj is None:
233
234
  return None
@@ -235,10 +236,13 @@ class TransformRequest(BaseModel):
235
236
  format=obj.format,
236
237
  location=obj.location,
237
238
  schema_file=obj.schema_file,
238
- cloud_files=TransformRequest.Autoloader.CloudFiles.from_api_obj(obj.cloud_files),
239
+ cloud_files=TransformRequest.Autoloader.CloudFiles.from_api_obj(
240
+ obj.cloud_files
241
+ ),
239
242
  row_count=obj.row_count,
240
243
  row_offset=obj.row_offset,
241
244
  )
245
+
242
246
  def to_api_obj(self) -> DbuiV1TransformRequestAutoloaderInput:
243
247
  return DbuiV1TransformRequestAutoloaderInput(
244
248
  format=self.format,
@@ -247,10 +251,9 @@ class TransformRequest(BaseModel):
247
251
  schema=self.schema_file,
248
252
  cloudFiles=Helpers.maybe(lambda o: o.to_api_obj(), self.cloud_files),
249
253
  rowCount=self.row_count,
250
- rowOffset=self.row_offset
254
+ rowOffset=self.row_offset,
251
255
  )
252
256
 
253
-
254
257
  class Transform(BaseModel):
255
258
  """
256
259
  A transform configuration to apply to the data.
@@ -360,7 +363,9 @@ class TransformRequest(BaseModel):
360
363
  def from_api_obj(obj: DbuiV1TransformRequest) -> "TransformRequest":
361
364
  return TransformRequest(
362
365
  input=TransformRequest.Input.from_api_obj(obj.input),
363
- autoloader_input=TransformRequest.Autoloader.from_api_obj(obj.autoloader_input),
366
+ autoloader_input=TransformRequest.Autoloader.from_api_obj(
367
+ obj.autoloader_input
368
+ ),
364
369
  use_preset=obj.use_preset,
365
370
  transforms=[
366
371
  TransformRequest.Transform.from_api_obj(item) for item in obj.transforms
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dasl_client
3
- Version: 1.0.14
3
+ Version: 1.0.16
4
4
  Summary: The DASL client library used for interacting with the DASL workspace
5
5
  Home-page: https://github.com/antimatter/asl
6
6
  Author: Antimatter Team
@@ -11,8 +11,7 @@ License-File: LICENSE
11
11
  Requires-Dist: dasl_api==0.1.17
12
12
  Requires-Dist: databricks-sdk>=0.41.0
13
13
  Requires-Dist: pydantic>=2
14
- Requires-Dist: pyspark>=3.5.5
15
- Requires-Dist: typing_extensions==4.10.0
14
+ Requires-Dist: typing_extensions>=4.10.0
16
15
 
17
16
  # DASL Client Library
18
17
 
@@ -31,6 +31,10 @@ dasl_client/types/helpers.py
31
31
  dasl_client/types/rule.py
32
32
  dasl_client/types/types.py
33
33
  dasl_client/types/workspace_config.py
34
+ test/__init__.py
35
+ test/conftest.py
36
+ test/constants.py
34
37
  test/test_api_changes.py
35
38
  test/test_api_surface.py
39
+ test/test_databricks_secret_auth.py
36
40
  test/test_marshaling.py
@@ -1,5 +1,4 @@
1
1
  dasl_api==0.1.17
2
2
  databricks-sdk>=0.41.0
3
3
  pydantic>=2
4
- pyspark>=3.5.5
5
- typing_extensions==4.10.0
4
+ typing_extensions>=4.10.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dasl_client"
7
- version = "1.0.14"
7
+ version = "1.0.16"
8
8
  description = "The DASL client library used for interacting with the DASL workspace"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -16,6 +16,5 @@ dependencies = [
16
16
  "dasl_api==0.1.17",
17
17
  "databricks-sdk>=0.41.0",
18
18
  "pydantic>=2",
19
- "pyspark>=3.5.5",
20
- "typing_extensions==4.10.0",
19
+ "typing_extensions>=4.10.0",
21
20
  ]
File without changes
@@ -0,0 +1,18 @@
1
+ import pytest
2
+
3
+ from dasl_client import Client
4
+
5
+ from .constants import *
6
+
7
+
8
+ @pytest.fixture(scope="session")
9
+ def api_client():
10
+ client = Client.new_workspace(
11
+ admin_email="test@antimatter.io",
12
+ app_client_id=app_client_id,
13
+ service_principal_id=databricks_client_id,
14
+ service_principal_secret=databricks_client_secret,
15
+ workspace_url=databricks_host,
16
+ dasl_host=dasl_host,
17
+ )
18
+ yield client
@@ -0,0 +1,10 @@
1
+ import os
2
+ from urllib.parse import urlparse
3
+
4
+ dasl_host = os.environ["DASL_API_URL"]
5
+ databricks_host = os.environ["DASL_DATABRICKS_HOST"]
6
+ databricks_client_id = os.environ["DASL_DATABRICKS_CLIENT_ID"]
7
+ databricks_client_secret = os.environ["DASL_DATABRICKS_CLIENT_SECRET"]
8
+ workspace = urlparse(databricks_host).hostname
9
+ app_client_id = "22853b93-68ba-4ae2-8e41-976417f501dd"
10
+ alternate_app_client_id = "335ac0d3-e0ea-4732-ba93-0277423b5029"
@@ -1,29 +1,6 @@
1
- import os
2
- import pytest
3
- from urllib.parse import urlparse
4
-
5
1
  from dasl_client import *
6
2
 
7
- dasl_host = os.environ["DASL_API_URL"]
8
- databricks_host = os.environ["DATABRICKS_HOST"]
9
- databricks_client_id = os.environ["DATABRICKS_CLIENT_ID"]
10
- databricks_client_secret = os.environ["DATABRICKS_CLIENT_SECRET"]
11
- workspace = urlparse(databricks_host).hostname
12
- app_client_id = "22853b93-68ba-4ae2-8e41-976417f501dd"
13
- alternate_app_client_id = "335ac0d3-e0ea-4732-ba93-0277423b5029"
14
-
15
-
16
- @pytest.fixture(scope="session")
17
- def api_client():
18
- client = Client.new_workspace(
19
- admin_email="test@antimatter.io",
20
- app_client_id=app_client_id,
21
- service_principal_id=databricks_client_id,
22
- service_principal_secret=databricks_client_secret,
23
- workspace_url=databricks_host,
24
- dasl_host=dasl_host,
25
- )
26
- yield client
3
+ from .constants import *
27
4
 
28
5
 
29
6
  def test_admin_config(api_client):
@@ -0,0 +1,116 @@
1
+ import base64
2
+ import datetime
3
+ import os
4
+ import time
5
+
6
+ from databricks.sdk import WorkspaceClient
7
+ from databricks.sdk.service import jobs, workspace as dbworkspace
8
+
9
+ from .constants import *
10
+
11
+ pylib_volume_path = os.environ["PYLIB_VOLUME_PATH"]
12
+ pylib_wheel_path = os.environ["PYLIB_WHEEL_PATH"]
13
+
14
+
15
+ def test_secret_auth(api_client):
16
+ # making sure it's even possible to get a config
17
+ api_client.get_config()
18
+
19
+ # need to do an API operation using databricks secret auth.
20
+ notebook_data = f"""
21
+ %pip install {pylib_wheel_path}
22
+ dbutils.library.restartPython()
23
+ # COMMAND ----------
24
+ from dasl_client.client import Client
25
+
26
+ Client.for_workspace(
27
+ workspace_url="{databricks_host}",
28
+ dasl_host="{dasl_host}",
29
+ ).get_config()
30
+ # COMMAND ----------
31
+ dbutils.notebook.exit("SUCCESS")
32
+ """
33
+ print(f"notebook_data={notebook_data}")
34
+
35
+ wsc = WorkspaceClient()
36
+ wsc.workspace.mkdirs(path=pylib_volume_path)
37
+
38
+ notebook_path = f"{pylib_volume_path}/test_secret_auth_notebook"
39
+ wsc.workspace.import_(
40
+ path=notebook_path,
41
+ format=dbworkspace.ImportFormat.SOURCE,
42
+ language=dbworkspace.Language.PYTHON,
43
+ content=base64.b64encode(notebook_data.encode("utf-8")).decode("utf-8"),
44
+ overwrite=True,
45
+ )
46
+
47
+ job_id = None
48
+ try:
49
+ job_id = wsc.jobs.create(
50
+ name="run test_secret_auth notebook",
51
+ tasks=[
52
+ jobs.Task(
53
+ task_key="run_notebook",
54
+ notebook_task=jobs.NotebookTask(notebook_path=notebook_path),
55
+ ),
56
+ ],
57
+ ).job_id
58
+
59
+ wsc.jobs.run_now(job_id=job_id)
60
+
61
+ logs = []
62
+ start = datetime.datetime.now()
63
+ complete = False
64
+ while not complete:
65
+ elapsed = datetime.datetime.now() - start
66
+ if elapsed > datetime.timedelta(seconds=300):
67
+ raise Exception(f"timed out waiting for job")
68
+
69
+ time.sleep(5)
70
+
71
+ status, logs = fetch_latest_run_status_and_logs(wsc, job_id)
72
+ print(f"logs={logs}")
73
+
74
+ if status == jobs.TerminationCodeCode.RUN_EXECUTION_ERROR:
75
+ raise Exception(f"job terminated with error")
76
+
77
+ complete = status == jobs.TerminationCodeCode.SUCCESS
78
+
79
+ print(logs)
80
+ assert len(logs) == 1
81
+ assert logs[0] == "SUCCESS"
82
+ finally:
83
+ wsc.workspace.delete(pylib_volume_path, recursive=True)
84
+ if job_id is not None:
85
+ wsc.jobs.delete(job_id=job_id)
86
+
87
+
88
+ def fetch_latest_run_status_and_logs(
89
+ wsc: WorkspaceClient,
90
+ job_id: str,
91
+ ):
92
+ runs = list(wsc.jobs.list_runs(job_id=job_id, expand_tasks=True))
93
+ if not runs:
94
+ return "No runs found", None
95
+
96
+ # Find the latest run based on the start time
97
+ latest_run = max(runs, key=lambda r: r.start_time)
98
+ if latest_run.status.termination_details is None:
99
+ return "No runs found", None
100
+ status = latest_run.status.termination_details.code
101
+ logs = []
102
+ for task in latest_run.tasks:
103
+ output = wsc.jobs.get_run_output(task.run_id)
104
+ if output.error is not None:
105
+ logs.append(output.error)
106
+ elif output.logs is not None:
107
+ logs.append(output.logs)
108
+ elif output.notebook_output is not None:
109
+ logs.append(output.notebook_output.result)
110
+ elif output.run_job_output is not None:
111
+ raise Exception("Nested jobs are not supported")
112
+ elif output.sql_output is not None:
113
+ raise Exception("SQL jobs are unsupported")
114
+ else:
115
+ logs.append("")
116
+ return status, logs
@@ -1,9 +1,6 @@
1
- import os
2
-
3
1
  from dasl_client import *
4
2
 
5
- databricks_host = os.environ["DATABRICKS_HOST"]
6
- workspace = databricks_host.split("//")[1]
3
+ from .constants import *
7
4
 
8
5
 
9
6
  def test_workspace_config_marshal_unmarshal():
@@ -701,13 +698,15 @@ def test_transform_request_marshal_unmarshal():
701
698
  ],
702
699
  data=[{"col1": "1", "col2": "a"}, {"col1": "2", "col2": "b"}],
703
700
  ),
704
- autoloader_input=DataSource.Autoloader(
701
+ autoloader_input=TransformRequest.Autoloader(
705
702
  format="csv",
706
703
  location="s3://bucket/data",
707
704
  schema_file="schema.json",
708
- cloud_files=DataSource.Autoloader.CloudFiles(
705
+ cloud_files=TransformRequest.Autoloader.CloudFiles(
709
706
  schema_hints_file="hints_file.csv", schema_hints="hint1, hint2"
710
707
  ),
708
+ row_count=1,
709
+ row_offset=5,
711
710
  ),
712
711
  use_preset="preset_value",
713
712
  transforms=[
File without changes
File without changes
File without changes
File without changes