dasl-client 1.0.13__tar.gz → 1.0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

Files changed (43) hide show
  1. {dasl_client-1.0.13 → dasl_client-1.0.16}/PKG-INFO +3 -3
  2. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/client.py +60 -9
  3. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/helpers.py +6 -3
  4. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/errors.py +4 -1
  5. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/preview_engine.py +23 -7
  6. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/preview_parameters.py +31 -6
  7. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/stage.py +87 -20
  8. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/content.py +4 -0
  9. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/dbui.py +87 -3
  10. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client.egg-info/PKG-INFO +3 -3
  11. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client.egg-info/SOURCES.txt +4 -0
  12. dasl_client-1.0.16/dasl_client.egg-info/requires.txt +4 -0
  13. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client.egg-info/top_level.txt +1 -0
  14. {dasl_client-1.0.13 → dasl_client-1.0.16}/pyproject.toml +3 -3
  15. dasl_client-1.0.16/test/__init__.py +0 -0
  16. dasl_client-1.0.16/test/conftest.py +18 -0
  17. dasl_client-1.0.16/test/constants.py +10 -0
  18. {dasl_client-1.0.13 → dasl_client-1.0.16}/test/test_api_surface.py +1 -24
  19. dasl_client-1.0.16/test/test_databricks_secret_auth.py +116 -0
  20. {dasl_client-1.0.13 → dasl_client-1.0.16}/test/test_marshaling.py +5 -6
  21. dasl_client-1.0.13/dasl_client.egg-info/requires.txt +0 -4
  22. {dasl_client-1.0.13 → dasl_client-1.0.16}/LICENSE +0 -0
  23. {dasl_client-1.0.13 → dasl_client-1.0.16}/README.md +0 -0
  24. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/__init__.py +0 -0
  25. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/auth/__init__.py +0 -0
  26. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/auth/auth.py +0 -0
  27. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/conn/__init__.py +0 -0
  28. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/conn/client_identifier.py +0 -0
  29. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/conn/conn.py +0 -0
  30. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/errors/__init__.py +0 -0
  31. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/errors/errors.py +0 -0
  32. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/preset_development/__init__.py +0 -0
  33. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/__init__.py +0 -0
  34. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/admin_config.py +0 -0
  35. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/datasource.py +0 -0
  36. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/helpers.py +0 -0
  37. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/rule.py +0 -0
  38. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/types.py +0 -0
  39. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client/types/workspace_config.py +0 -0
  40. {dasl_client-1.0.13 → dasl_client-1.0.16}/dasl_client.egg-info/dependency_links.txt +0 -0
  41. {dasl_client-1.0.13 → dasl_client-1.0.16}/setup.cfg +0 -0
  42. {dasl_client-1.0.13 → dasl_client-1.0.16}/setup.py +0 -0
  43. {dasl_client-1.0.13 → dasl_client-1.0.16}/test/test_api_changes.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dasl_client
3
- Version: 1.0.13
3
+ Version: 1.0.16
4
4
  Summary: The DASL client library used for interacting with the DASL workspace
5
5
  Home-page: https://github.com/antimatter/asl
6
6
  Author: Antimatter Team
@@ -8,10 +8,10 @@ Author-email: Antimatter Team <support@antimatter.io>
8
8
  Requires-Python: >=3.8
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
- Requires-Dist: dasl_api==0.1.15
11
+ Requires-Dist: dasl_api==0.1.17
12
12
  Requires-Dist: databricks-sdk>=0.41.0
13
13
  Requires-Dist: pydantic>=2
14
- Requires-Dist: typing_extensions==4.10.0
14
+ Requires-Dist: typing_extensions>=4.10.0
15
15
 
16
16
  # DASL Client Library
17
17
 
@@ -1,5 +1,9 @@
1
1
  from copy import deepcopy
2
- from typing import Any, Callable, Iterator, List, Optional, TypeVar
2
+ from datetime import datetime, timedelta
3
+ from time import sleep
4
+ from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar
5
+ from pydantic import Field
6
+ from pyspark.sql import DataFrame
3
7
 
4
8
  from dasl_api import (
5
9
  CoreV1Api,
@@ -9,8 +13,6 @@ from dasl_api import (
9
13
  WorkspaceV1CreateWorkspaceRequest,
10
14
  api,
11
15
  )
12
- from pydantic import Field
13
-
14
16
  from dasl_client.auth.auth import (
15
17
  Authorization,
16
18
  DatabricksSecretAuth,
@@ -569,10 +571,46 @@ class Client:
569
571
  )
570
572
  return Rule.from_api_obj(result)
571
573
 
574
+ def exec_rule(
575
+ self, rule_in: Rule, df: DataFrame
576
+ ) -> Tuple[DataFrame, Optional[DataFrame]]:
577
+ """
578
+ Locally execute a Rule. Must be run from within a Databricks
579
+ notebook or else an exception will be raised. This is intended
580
+ to facilitate Rule development.
581
+
582
+ :param rule_in: The specification of the Rule to execute.
583
+ :param df: The DataFrame to use as the input to the Rule.
584
+ :returns Tuple[DataFrame, Optional[DataFrame]]: The first
585
+ element of the tuple contains the notables produced by
586
+ the rule, and the second element contains the observables
587
+ or None if no observables were produced.
588
+ """
589
+ Helpers.ensure_databricks()
590
+ with error_handler():
591
+ result = self._core_client().core_v1_render_rule(
592
+ self._workspace(),
593
+ rule_in.to_api_obj(),
594
+ )
595
+
596
+ try:
597
+ import notebook_utils
598
+ except ImportError as e:
599
+ raise ImportError(
600
+ "Package 'notebook_utils' not found. "
601
+ "Install it within this this notebook using "
602
+ f"%pip install {result.notebook_utils_path}"
603
+ )
604
+
605
+ namespace = {}
606
+ exec(result.content, namespace)
607
+ return namespace["generate"](df)
608
+
572
609
  def adhoc_transform(
573
610
  self,
574
611
  warehouse: str,
575
612
  request: TransformRequest,
613
+ timeout: timedelta = timedelta(minutes=5),
576
614
  ) -> TransformResponse:
577
615
  """
578
616
  Run a sequence of ADHOC transforms against a SQL warehouse to
@@ -583,16 +621,29 @@ class Client:
583
621
  :return: a TransformResponse object containing the results
584
622
  after running the transforms.
585
623
  :raises: NotFoundError if the rule does not exist
624
+ :raises: Exception for a server-side error or timeout
586
625
  """
587
626
  with error_handler():
588
- return TransformResponse.from_api_obj(
589
- self._dbui_client().dbui_v1_transform(
590
- self._workspace(),
591
- warehouse,
592
- request.to_api_obj(),
593
- )
627
+ status = self._dbui_client().dbui_v1_transform(
628
+ self._workspace(),
629
+ warehouse,
630
+ request.to_api_obj(),
594
631
  )
595
632
 
633
+ begin = datetime.now()
634
+ while datetime.now() - begin < timeout:
635
+ sleep(5)
636
+ status = self._dbui_client().dbui_v1_transform_status(
637
+ self._workspace(), status.id
638
+ )
639
+
640
+ if status.status == "failure":
641
+ raise Exception(f"adhoc transform failed with {status.error}")
642
+ elif status.status == "success":
643
+ return TransformResponse.from_api_obj(status.result)
644
+
645
+ raise Exception("timed out waiting for adhoc transform result")
646
+
596
647
  def get_observable_events(
597
648
  self,
598
649
  warehouse: str,
@@ -6,15 +6,18 @@ class Helpers:
6
6
  default_dasl_host = "https://api.prod.sl.antimatter.io"
7
7
 
8
8
  @staticmethod
9
- def databricks_context():
10
- # This import raises an exception if outside a notebook context, so only
11
- # import if this method is called
9
+ def ensure_databricks():
12
10
  if "DATABRICKS_RUNTIME_VERSION" not in os.environ:
13
11
  raise Exception(
14
12
  "attempted to access databricks context outside "
15
13
  + "of databricks notebook"
16
14
  )
17
15
 
16
+ @staticmethod
17
+ def databricks_context():
18
+ # This import raises an exception if outside a notebook context, so only
19
+ # import if this method is called
20
+ Helpers.ensure_databricks()
18
21
  from databricks.sdk.runtime import dbutils
19
22
 
20
23
  return dbutils.notebook.entry_point.getDbutils().notebook().getContext()
@@ -149,7 +149,10 @@ class AssertionFailedError(PresetError):
149
149
  def __init__(self, expr: str, assertion_message: str, df: DataFrame):
150
150
  # Get the Databricks built-in functions out the namespace.
151
151
  ipython = get_ipython()
152
- display = ipython.user_ns["display"]
152
+ if ipython is not None:
153
+ display = ipython.user_ns["display"]
154
+ else:
155
+ display = lambda x: x.show()
153
156
 
154
157
  self.expr = expr
155
158
  self.assertion_message = assertion_message
@@ -54,10 +54,13 @@ class PreviewEngine:
54
54
  self._preset.get("silver", None), self._pretransform_name
55
55
  )
56
56
 
57
+ self._bronze = None
57
58
  self._pre = None
58
59
  self._silver = []
59
60
  self._gold = []
60
- self._result_df_map = {}
61
+ self._result_df_map: Tuple[
62
+ DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]
63
+ ] = (None, {}, {})
61
64
 
62
65
  def _validate_pretransform_name(
63
66
  self, silver: Dict[str, str], pretransform_name: str
@@ -181,8 +184,12 @@ class PreviewEngine:
181
184
 
182
185
  # Get the Databricks built-in functions out the namespace.
183
186
  ipython = get_ipython()
184
- displayHTML = ipython.user_ns["displayHTML"]
185
- display = ipython.user_ns["display"]
187
+ if ipython is not None:
188
+ displayHTML = ipython.user_ns["displayHTML"]
189
+ display = ipython.user_ns["display"]
190
+ else:
191
+ displayHTML = lambda x: print(x)
192
+ display = lambda x: x.show()
186
193
 
187
194
  def d(txt, lvl) -> None:
188
195
  displayHTML(
@@ -245,7 +252,7 @@ class PreviewEngine:
245
252
  raise UnknownGoldTableError(name, gold_table_schema)
246
253
 
247
254
  # Performs the type check.
248
- delta_df = self._spark.table(f"{gold_table_schema}.{name}").limit(0)
255
+ delta_df = self._spark.table(f"`{gold_table_schema}`.`{name}`").limit(0)
249
256
  unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
250
257
 
251
258
  # Now we check no new columns.
@@ -286,7 +293,7 @@ class PreviewEngine:
286
293
  d("Resultant gold table preview", 3)
287
294
  display(unioned_df)
288
295
 
289
- def evaluate(self, gold_table_schema: str) -> None:
296
+ def evaluate(self, gold_table_schema: str, display: bool = True) -> None:
290
297
  """
291
298
  Evaluates the loaded preset YAML using the input datasource configuration to load
292
299
  records. Finally, checks that the output from the Gold stages is compatible with
@@ -303,7 +310,9 @@ class PreviewEngine:
303
310
  ):
304
311
  if not any(
305
312
  row.databaseName == schema_name
306
- for row in self._spark.sql(f"SHOW SCHEMAS IN {catalog_name}").collect()
313
+ for row in self._spark.sql(
314
+ f"SHOW SCHEMAS IN `{catalog_name}`"
315
+ ).collect()
307
316
  ):
308
317
  raise InvalidGoldTableSchemaError(
309
318
  gold_table_schema,
@@ -340,5 +349,12 @@ class PreviewEngine:
340
349
  self._compile_stages()
341
350
 
342
351
  with self._ds_params as df:
352
+ self._bronze = df
343
353
  self._result_df_map = self._run(df)
344
- self._render_output(df, self._result_df_map, gold_table_schema)
354
+ if display:
355
+ self._render_output(df, self._result_df_map, gold_table_schema)
356
+
357
+ def results(
358
+ self,
359
+ ) -> Tuple[DataFrame, DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]]:
360
+ return self._bronze, *self._result_df_map
@@ -1,3 +1,5 @@
1
+ from typing import Optional
2
+
1
3
  from pyspark.sql import DataFrame, SparkSession
2
4
  from pyspark.sql.types import *
3
5
  from pyspark.sql.dataframe import DataFrame
@@ -115,6 +117,7 @@ class PreviewParameters:
115
117
  self._table = None
116
118
 
117
119
  self._pretransform_name = None
120
+ self._bronze_pre_transform: Optional[List[str]] = None
118
121
 
119
122
  self._df = None
120
123
 
@@ -166,10 +169,13 @@ class PreviewParameters:
166
169
  self._record_limit
167
170
  )
168
171
 
172
+ if self._bronze_pre_transform is not None:
173
+ stream_df = stream_df.selectExpr(*self._bronze_pre_transform)
174
+
169
175
  query = (
170
176
  stream_df.writeStream.format("memory")
171
177
  .queryName("batch_data")
172
- .trigger(once=True)
178
+ .trigger(availableNow=True)
173
179
  .start()
174
180
  )
175
181
 
@@ -193,12 +199,17 @@ class PreviewParameters:
193
199
 
194
200
  # Get the Databricks built-in functions out the namespace.
195
201
  ipython = get_ipython()
196
- dbutils = ipython.user_ns["dbutils"]
202
+ if ipython is not None:
203
+ dbutils = ipython.user_ns["dbutils"]
197
204
 
198
- dbutils.fs.rm(
199
- f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
200
- recurse=True,
201
- )
205
+ dbutils.fs.rm(
206
+ f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
207
+ recurse=True,
208
+ )
209
+ else:
210
+ print(
211
+ f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}"
212
+ )
202
213
 
203
214
  def from_input(self):
204
215
  """
@@ -286,6 +297,10 @@ class PreviewParameters:
286
297
  Returns:
287
298
  PreviewParameters: The current instance with updated configuration.
288
299
  """
300
+ if file_format.lower() == "jsonl":
301
+ self._autoloader_format = "json"
302
+ self.set_autoloader_multiline(False)
303
+ return self
289
304
  self._autoloader_format = file_format
290
305
  return self
291
306
 
@@ -350,6 +365,16 @@ class PreviewParameters:
350
365
  self._pretransform_name = pretransform_name
351
366
  return self
352
367
 
368
+ def set_bronze_pre_transform(self, expr: List[str]):
369
+ """
370
+ Sets a pre-transform expression that will run before data is written to bronze
371
+
372
+ Returns:
373
+ PreviewParameters: The current instance with updated configuration.
374
+ """
375
+ self._bronze_pre_transform = expr
376
+ return self
377
+
353
378
  def set_date_range(self, column: str, start_time: str, end_time: str):
354
379
  """
355
380
  Set the TIMESTAMP column and date range to use as the input data filter to
@@ -150,17 +150,19 @@ class Stage:
150
150
  if target_col not in existing_columns:
151
151
  raise ReferencedColumnMissingError("jsonExtract", target_col)
152
152
  schema = self._spark.sql(
153
- f"SELECT schema_of_json_agg({target_col}) AS sc FROM {{df}}", df=df
153
+ f"SELECT schema_of_json_agg({self.auto_backtick(target_col)}) AS sc FROM {{df}}",
154
+ df=df,
154
155
  ).collect()[0][0]
155
156
  extract_df = self._spark.createDataFrame(data=[], schema=schema)
156
157
  columns = extract_df.columns
157
158
  columns = [
158
- f"extract.{col} AS {col}"
159
+ self.auto_backtick(f"extract.{col}") + f" AS {self.auto_backtick(col)}"
159
160
  for col in columns
160
161
  if col not in omit_fields and col not in existing_columns
161
162
  ]
162
163
  columns += [
163
- f"extract.{col} AS {duplicate_prefix}{col}"
164
+ self.auto_backtick(f"extract.{col}")
165
+ + f" AS {self.auto_backtick(duplicate_prefix + col)}"
164
166
  for col in columns
165
167
  if col not in omit_fields and col in existing_columns
166
168
  ]
@@ -176,7 +178,10 @@ class Stage:
176
178
  A DataFrame with the resultant operation's records.
177
179
  """
178
180
  return (
179
- df.selectExpr("*", f"from_json({target_col}, '{schema}') AS extract")
181
+ df.selectExpr(
182
+ "*",
183
+ f"from_json({self.auto_backtick(target_col)}, '{schema}') AS extract",
184
+ )
180
185
  .selectExpr("*", *columns)
181
186
  .drop("extract")
182
187
  )
@@ -198,7 +203,10 @@ class Stage:
198
203
  """
199
204
  extract_df = self._spark.createDataFrame(data=[], schema=schema)
200
205
  schema = extract_df.drop(omit_fields).schema.simpleString()
201
- return df.selectExpr("*", f"from_json({target_col}, '{schema}') AS {name}")
206
+ return df.selectExpr(
207
+ "*",
208
+ f"from_json({self.auto_backtick(target_col)}, '{schema}') AS {self.auto_backtick(name)}",
209
+ )
202
210
 
203
211
  def preserved_columns(
204
212
  self, df: DataFrame
@@ -224,7 +232,7 @@ class Stage:
224
232
  duplicate_prefix = self._duplicate_prefix()
225
233
  column_names = self._column_names()
226
234
  duplicate_renames = [
227
- f"{col} AS {duplicate_prefix}{col}"
235
+ f"{self.auto_backtick(col)} AS {self.auto_backtick(duplicate_prefix + col)}"
228
236
  for col in preserved_columns
229
237
  if col in column_names
230
238
  ]
@@ -296,14 +304,46 @@ class Stage:
296
304
  """
297
305
  if field.get("from", None):
298
306
  # check that the from column exists in the df?
299
- return f"{field['from']} AS {name}"
307
+ return f"{self.auto_backtick(field['from'])} AS {self.auto_backtick(name)}"
300
308
  elif field.get("literal", None):
301
- return f"'{field['literal']}' AS {name}"
302
- elif field.get("expr", None):
303
- return f"{field['expr']} AS {name}"
309
+ return f"'{field['literal']}' AS {self.auto_backtick(name)}"
310
+ elif field.get("expr", None) is not None:
311
+ return f"{field['expr']} AS {self.auto_backtick(name)}"
304
312
  else:
305
313
  return ""
306
314
 
315
+ def is_backtick_escaped(self, name: str) -> bool:
316
+ """
317
+ check if a given (column) name is backtick escaped or not
318
+ :param name: column name
319
+ :return: bool
320
+ """
321
+ return name.startswith("`") and name.endswith("`")
322
+
323
+ def auto_backtick(self, name: str) -> str:
324
+ """
325
+ auto-backtick given name in case it isn't already backtick escaped.
326
+ if the name contains dots it will get split and each component backticked individually.
327
+ Returns the name wrapped in backticks or the passed name if it already had backticks.
328
+ :param name: column name
329
+ :return: str
330
+ """
331
+ if self.is_backtick_escaped(name):
332
+ return name
333
+ parts = name.split(".")
334
+ return ".".join(list(map(lambda s: f"`{s}`", parts)))
335
+
336
+ def force_apply_backticks(self, name: str) -> str:
337
+ """
338
+ forces application of backticks to the given (column) name as a single unit
339
+ if it already has backticks this is a noop
340
+ :param name: column name
341
+ :return: str
342
+ """
343
+ if self.is_backtick_escaped(name):
344
+ return name
345
+ return f"`{name}`"
346
+
307
347
  def process_node(self, name: str, node: Node) -> str:
308
348
  """
309
349
  Processes a single node in a tree of nodes.
@@ -319,7 +359,7 @@ class Stage:
319
359
  child_expr = self.process_node(child_name, child_node)
320
360
  fields_list.append(f"{child_expr}")
321
361
  joined_fields = ",\n".join(fields_list)
322
- return f"struct(\n{joined_fields}\n) AS {name}"
362
+ return f"struct(\n{joined_fields}\n) AS {self.auto_backtick(name)}"
323
363
  else:
324
364
  return ""
325
365
 
@@ -341,12 +381,22 @@ class Stage:
341
381
  """
342
382
  Renders a list of field specifications containing both simple and
343
383
  STRUCT references into valid, STRUCT cognicient, SELECT expressions.
384
+ if a nested field is wrapped in backticks it will be treated as a simple field
385
+ for example field of name `col.with.dots` will NOT be treated as nested field.
344
386
 
345
387
  Returns:
346
388
  The SQL expression.
347
389
  """
348
- simple_fields = [f for f in fields if "." not in f["name"]]
349
- nested_fields = [f for f in fields if "." in f["name"]]
390
+ simple_fields = [
391
+ f
392
+ for f in fields
393
+ if "." not in f["name"] or self.is_backtick_escaped(f["name"])
394
+ ]
395
+ nested_fields = [
396
+ f
397
+ for f in fields
398
+ if "." in f["name"] and not self.is_backtick_escaped(f["name"])
399
+ ]
350
400
 
351
401
  result_parts = []
352
402
  for field in simple_fields:
@@ -358,7 +408,7 @@ class Stage:
358
408
  nested_str = self.parse_to_string(tree)
359
409
  result_parts.append(nested_str)
360
410
 
361
- return [p for p in result_parts if p]
411
+ return [p for p in result_parts if p is not None and len(p) > 0]
362
412
 
363
413
  def select_expr(self, df: DataFrame) -> str:
364
414
  """
@@ -379,8 +429,12 @@ class Stage:
379
429
  if should_preserve:
380
430
  if embed_col := preserve.get("embedColumn", None):
381
431
  preserved_columns = self.preserved_columns_embed_column(df)
432
+ # preserved_columns is obtained from df.columns
433
+ # applying backticks to all of them is OK here
434
+ # since they will never use "obj.key" to reference nested fields of structs
435
+ # so we just go ahead and apply backticks to all across the board
382
436
  select_fields += [
383
- f"struct({', '.join(preserved_columns)}) AS {embed_col}"
437
+ f"struct({', '.join(list(map(lambda x: self.force_apply_backticks(x), preserved_columns)))}) AS {self.auto_backtick(embed_col)}"
384
438
  ]
385
439
  else:
386
440
  (
@@ -388,8 +442,13 @@ class Stage:
388
442
  duplicate_renames,
389
443
  column_names,
390
444
  ) = self.preserved_columns(df)
391
- select_fields += preserved_columns
392
- select_fields += duplicate_renames
445
+ # see note above: same here - apply backticks to all columns across the board
446
+ select_fields += list(
447
+ map(lambda x: self.force_apply_backticks(x), preserved_columns)
448
+ )
449
+ select_fields += list(
450
+ map(lambda x: self.force_apply_backticks(x), duplicate_renames)
451
+ )
393
452
 
394
453
  return ["*"] + select_fields if self._stage == "temp_fields" else select_fields
395
454
 
@@ -475,7 +534,9 @@ class Stage:
475
534
  df = (
476
535
  df.alias("tmp")
477
536
  .join(df_joined, on=[df[lhs] == df_joined[rhs]], how="left")
478
- .selectExpr("tmp.*", f"{select} AS {field.get('name')}")
537
+ .selectExpr(
538
+ "tmp.*", f"{select} AS {self.auto_backtick(field.get('name'))}"
539
+ )
479
540
  )
480
541
  elif csv := join.get("withCSV", None):
481
542
  if path := csv.get("path", None):
@@ -485,7 +546,10 @@ class Stage:
485
546
  df = (
486
547
  df.alias("tmp")
487
548
  .join(df_joined, on=[df[lhs] == df_joined[rhs]], how="left")
488
- .selectExpr("tmp.*", f"{select} AS {field.get('name')}")
549
+ .selectExpr(
550
+ "tmp.*",
551
+ f"{select} AS {self.auto_backtick(field.get('name'))}",
552
+ )
489
553
  )
490
554
  else:
491
555
  raise MissingJoinFieldError("withTable or withCSV (please supply 1)")
@@ -500,7 +564,10 @@ class Stage:
500
564
  """
501
565
  for field in self._fields:
502
566
  if field.get("alias", None):
503
- df = df.selectExpr("*", f"{field.get('alias')} AS {field.get('name')}")
567
+ df = df.selectExpr(
568
+ "*",
569
+ f"{self.auto_backtick(field.get('alias'))} AS {self.auto_backtick(field.get('name'))}",
570
+ )
504
571
  return df
505
572
 
506
573
  def run_assertions(self, df: DataFrame) -> DataFrame:
@@ -173,6 +173,8 @@ class DataSourcePreset(BaseModel):
173
173
  name: Optional[str] = None
174
174
  author: Optional[str] = None
175
175
  description: Optional[str] = None
176
+ title: Optional[str] = None
177
+ icon_url: Optional[str] = None
176
178
  autoloader: Optional[PresetAutoloader] = None
177
179
  silver: Optional[SilverPreset] = None
178
180
  gold: Optional[List[GoldPreset]] = None
@@ -188,6 +190,8 @@ class DataSourcePreset(BaseModel):
188
190
  name=obj.name,
189
191
  author=obj.author,
190
192
  description=obj.description,
193
+ title=obj.title,
194
+ icon_url=obj.icon_url,
191
195
  autoloader=PresetAutoloader.from_api_obj(obj.autoloader),
192
196
  silver=SilverPreset.from_api_obj(obj.silver),
193
197
  gold=[GoldPreset.from_api_obj(item) for item in obj.gold],
@@ -12,6 +12,8 @@ from dasl_api import (
12
12
  DbuiV1TransformRequestTransformsInnerPresetOverrides,
13
13
  DbuiV1TransformResponse,
14
14
  DbuiV1TransformResponseStagesInner,
15
+ ContentV1DatasourcePresetAutoloaderCloudFiles,
16
+ DbuiV1TransformRequestAutoloaderInput,
15
17
  )
16
18
 
17
19
  from .datasource import DataSource, FieldSpec, FieldUtils
@@ -131,7 +133,7 @@ class TransformRequest(BaseModel):
131
133
  Attributes:
132
134
  input (TransformRequest.Input):
133
135
  The input block containing the columns metadata and data.
134
- autoloader_input (DataSource.Autoloader):
136
+ autoloader_input (Autoloader):
135
137
  The autoloader input configuration.
136
138
  use_preset (str):
137
139
  Indicates which preset to use for the transforms.
@@ -172,6 +174,86 @@ class TransformRequest(BaseModel):
172
174
  data=self.data,
173
175
  )
174
176
 
177
+ class Autoloader(BaseModel):
178
+ """
179
+ Autoloader configuration for the DataSource.
180
+
181
+ Attributes:
182
+ format (Optional[str]):
183
+ The format of the data (e.g., json, parquet, csv, etc.).
184
+ location (str):
185
+ External location for the volume in Unity Catalog.
186
+ schema_file (Optional[str]):
187
+ An optional file containing the schema of the data source.
188
+ cloud_files (Optional[Autoloader.CloudFiles]):
189
+ CloudFiles configuration.
190
+ """
191
+
192
+ class CloudFiles(BaseModel):
193
+ """
194
+ CloudFiles configuration for the Autoloader.
195
+
196
+ Attributes:
197
+ schema_hints_file (Optional[str]):
198
+ schema_hints (Optional[str]):
199
+ """
200
+
201
+ schema_hints_file: Optional[str] = None
202
+ schema_hints: Optional[str] = None
203
+
204
+ @staticmethod
205
+ def from_api_obj(
206
+ obj: Optional[ContentV1DatasourcePresetAutoloaderCloudFiles],
207
+ ) -> "TransformRequest.Autoloader.CloudFiles":
208
+ if obj is None:
209
+ return None
210
+ return TransformRequest.Autoloader.CloudFiles(
211
+ schema_hints_file=obj.schema_hints_file,
212
+ schema_hints=obj.schema_hints,
213
+ )
214
+
215
+ def to_api_obj(self) -> ContentV1DatasourcePresetAutoloaderCloudFiles:
216
+ return ContentV1DatasourcePresetAutoloaderCloudFiles(
217
+ schema_hints_file=self.schema_hints_file,
218
+ schema_hints=self.schema_hints,
219
+ )
220
+
221
+ format: Optional[str] = None
222
+ location: str
223
+ schema_file: Optional[str] = None
224
+ schema: Optional[str] = None
225
+ cloud_files: Optional["TransformRequest.Autoloader.CloudFiles"] = None
226
+ row_count: Optional[int] = None
227
+ row_offset: Optional[int] = None
228
+
229
+ @staticmethod
230
+ def from_api_obj(
231
+ obj: Optional[DbuiV1TransformRequestAutoloaderInput],
232
+ ) -> "Optional[TransformRequest.Autoloader]":
233
+ if obj is None:
234
+ return None
235
+ return TransformRequest.Autoloader(
236
+ format=obj.format,
237
+ location=obj.location,
238
+ schema_file=obj.schema_file,
239
+ cloud_files=TransformRequest.Autoloader.CloudFiles.from_api_obj(
240
+ obj.cloud_files
241
+ ),
242
+ row_count=obj.row_count,
243
+ row_offset=obj.row_offset,
244
+ )
245
+
246
+ def to_api_obj(self) -> DbuiV1TransformRequestAutoloaderInput:
247
+ return DbuiV1TransformRequestAutoloaderInput(
248
+ format=self.format,
249
+ location=self.location,
250
+ schemaFile=self.schema_file,
251
+ schema=self.schema_file,
252
+ cloudFiles=Helpers.maybe(lambda o: o.to_api_obj(), self.cloud_files),
253
+ rowCount=self.row_count,
254
+ rowOffset=self.row_offset,
255
+ )
256
+
175
257
  class Transform(BaseModel):
176
258
  """
177
259
  A transform configuration to apply to the data.
@@ -273,7 +355,7 @@ class TransformRequest(BaseModel):
273
355
  )
274
356
 
275
357
  input: Optional["TransformRequest.Input"] = None
276
- autoloader_input: Optional[DataSource.Autoloader] = None
358
+ autoloader_input: Optional["TransformRequest.Autoloader"] = None
277
359
  use_preset: Optional[str] = None
278
360
  transforms: List["TransformRequest.Transform"]
279
361
 
@@ -281,7 +363,9 @@ class TransformRequest(BaseModel):
281
363
  def from_api_obj(obj: DbuiV1TransformRequest) -> "TransformRequest":
282
364
  return TransformRequest(
283
365
  input=TransformRequest.Input.from_api_obj(obj.input),
284
- autoloader_input=DataSource.Autoloader.from_api_obj(obj.autoloader_input),
366
+ autoloader_input=TransformRequest.Autoloader.from_api_obj(
367
+ obj.autoloader_input
368
+ ),
285
369
  use_preset=obj.use_preset,
286
370
  transforms=[
287
371
  TransformRequest.Transform.from_api_obj(item) for item in obj.transforms
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dasl_client
3
- Version: 1.0.13
3
+ Version: 1.0.16
4
4
  Summary: The DASL client library used for interacting with the DASL workspace
5
5
  Home-page: https://github.com/antimatter/asl
6
6
  Author: Antimatter Team
@@ -8,10 +8,10 @@ Author-email: Antimatter Team <support@antimatter.io>
8
8
  Requires-Python: >=3.8
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
- Requires-Dist: dasl_api==0.1.15
11
+ Requires-Dist: dasl_api==0.1.17
12
12
  Requires-Dist: databricks-sdk>=0.41.0
13
13
  Requires-Dist: pydantic>=2
14
- Requires-Dist: typing_extensions==4.10.0
14
+ Requires-Dist: typing_extensions>=4.10.0
15
15
 
16
16
  # DASL Client Library
17
17
 
@@ -31,6 +31,10 @@ dasl_client/types/helpers.py
31
31
  dasl_client/types/rule.py
32
32
  dasl_client/types/types.py
33
33
  dasl_client/types/workspace_config.py
34
+ test/__init__.py
35
+ test/conftest.py
36
+ test/constants.py
34
37
  test/test_api_changes.py
35
38
  test/test_api_surface.py
39
+ test/test_databricks_secret_auth.py
36
40
  test/test_marshaling.py
@@ -0,0 +1,4 @@
1
+ dasl_api==0.1.17
2
+ databricks-sdk>=0.41.0
3
+ pydantic>=2
4
+ typing_extensions>=4.10.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dasl_client"
7
- version = "1.0.13"
7
+ version = "1.0.16"
8
8
  description = "The DASL client library used for interacting with the DASL workspace"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -13,8 +13,8 @@ authors = [
13
13
  requires-python = ">=3.8"
14
14
 
15
15
  dependencies = [
16
- "dasl_api==0.1.15",
16
+ "dasl_api==0.1.17",
17
17
  "databricks-sdk>=0.41.0",
18
18
  "pydantic>=2",
19
- "typing_extensions==4.10.0"
19
+ "typing_extensions>=4.10.0",
20
20
  ]
File without changes
@@ -0,0 +1,18 @@
1
+ import pytest
2
+
3
+ from dasl_client import Client
4
+
5
+ from .constants import *
6
+
7
+
8
+ @pytest.fixture(scope="session")
9
+ def api_client():
10
+ client = Client.new_workspace(
11
+ admin_email="test@antimatter.io",
12
+ app_client_id=app_client_id,
13
+ service_principal_id=databricks_client_id,
14
+ service_principal_secret=databricks_client_secret,
15
+ workspace_url=databricks_host,
16
+ dasl_host=dasl_host,
17
+ )
18
+ yield client
@@ -0,0 +1,10 @@
1
+ import os
2
+ from urllib.parse import urlparse
3
+
4
+ dasl_host = os.environ["DASL_API_URL"]
5
+ databricks_host = os.environ["DASL_DATABRICKS_HOST"]
6
+ databricks_client_id = os.environ["DASL_DATABRICKS_CLIENT_ID"]
7
+ databricks_client_secret = os.environ["DASL_DATABRICKS_CLIENT_SECRET"]
8
+ workspace = urlparse(databricks_host).hostname
9
+ app_client_id = "22853b93-68ba-4ae2-8e41-976417f501dd"
10
+ alternate_app_client_id = "335ac0d3-e0ea-4732-ba93-0277423b5029"
@@ -1,29 +1,6 @@
1
- import os
2
- import pytest
3
- from urllib.parse import urlparse
4
-
5
1
  from dasl_client import *
6
2
 
7
- dasl_host = os.environ["DASL_API_URL"]
8
- databricks_host = os.environ["DATABRICKS_HOST"]
9
- databricks_client_id = os.environ["DATABRICKS_CLIENT_ID"]
10
- databricks_client_secret = os.environ["DATABRICKS_CLIENT_SECRET"]
11
- workspace = urlparse(databricks_host).hostname
12
- app_client_id = "22853b93-68ba-4ae2-8e41-976417f501dd"
13
- alternate_app_client_id = "335ac0d3-e0ea-4732-ba93-0277423b5029"
14
-
15
-
16
- @pytest.fixture(scope="session")
17
- def api_client():
18
- client = Client.new_workspace(
19
- admin_email="test@antimatter.io",
20
- app_client_id=app_client_id,
21
- service_principal_id=databricks_client_id,
22
- service_principal_secret=databricks_client_secret,
23
- workspace_url=databricks_host,
24
- dasl_host=dasl_host,
25
- )
26
- yield client
3
+ from .constants import *
27
4
 
28
5
 
29
6
  def test_admin_config(api_client):
@@ -0,0 +1,116 @@
1
+ import base64
2
+ import datetime
3
+ import os
4
+ import time
5
+
6
+ from databricks.sdk import WorkspaceClient
7
+ from databricks.sdk.service import jobs, workspace as dbworkspace
8
+
9
+ from .constants import *
10
+
11
+ pylib_volume_path = os.environ["PYLIB_VOLUME_PATH"]
12
+ pylib_wheel_path = os.environ["PYLIB_WHEEL_PATH"]
13
+
14
+
15
+ def test_secret_auth(api_client):
16
+ # making sure it's even possible to get a config
17
+ api_client.get_config()
18
+
19
+ # need to do an API operation using databricks secret auth.
20
+ notebook_data = f"""
21
+ %pip install {pylib_wheel_path}
22
+ dbutils.library.restartPython()
23
+ # COMMAND ----------
24
+ from dasl_client.client import Client
25
+
26
+ Client.for_workspace(
27
+ workspace_url="{databricks_host}",
28
+ dasl_host="{dasl_host}",
29
+ ).get_config()
30
+ # COMMAND ----------
31
+ dbutils.notebook.exit("SUCCESS")
32
+ """
33
+ print(f"notebook_data={notebook_data}")
34
+
35
+ wsc = WorkspaceClient()
36
+ wsc.workspace.mkdirs(path=pylib_volume_path)
37
+
38
+ notebook_path = f"{pylib_volume_path}/test_secret_auth_notebook"
39
+ wsc.workspace.import_(
40
+ path=notebook_path,
41
+ format=dbworkspace.ImportFormat.SOURCE,
42
+ language=dbworkspace.Language.PYTHON,
43
+ content=base64.b64encode(notebook_data.encode("utf-8")).decode("utf-8"),
44
+ overwrite=True,
45
+ )
46
+
47
+ job_id = None
48
+ try:
49
+ job_id = wsc.jobs.create(
50
+ name="run test_secret_auth notebook",
51
+ tasks=[
52
+ jobs.Task(
53
+ task_key="run_notebook",
54
+ notebook_task=jobs.NotebookTask(notebook_path=notebook_path),
55
+ ),
56
+ ],
57
+ ).job_id
58
+
59
+ wsc.jobs.run_now(job_id=job_id)
60
+
61
+ logs = []
62
+ start = datetime.datetime.now()
63
+ complete = False
64
+ while not complete:
65
+ elapsed = datetime.datetime.now() - start
66
+ if elapsed > datetime.timedelta(seconds=300):
67
+ raise Exception(f"timed out waiting for job")
68
+
69
+ time.sleep(5)
70
+
71
+ status, logs = fetch_latest_run_status_and_logs(wsc, job_id)
72
+ print(f"logs={logs}")
73
+
74
+ if status == jobs.TerminationCodeCode.RUN_EXECUTION_ERROR:
75
+ raise Exception(f"job terminated with error")
76
+
77
+ complete = status == jobs.TerminationCodeCode.SUCCESS
78
+
79
+ print(logs)
80
+ assert len(logs) == 1
81
+ assert logs[0] == "SUCCESS"
82
+ finally:
83
+ wsc.workspace.delete(pylib_volume_path, recursive=True)
84
+ if job_id is not None:
85
+ wsc.jobs.delete(job_id=job_id)
86
+
87
+
88
+ def fetch_latest_run_status_and_logs(
89
+ wsc: WorkspaceClient,
90
+ job_id: str,
91
+ ):
92
+ runs = list(wsc.jobs.list_runs(job_id=job_id, expand_tasks=True))
93
+ if not runs:
94
+ return "No runs found", None
95
+
96
+ # Find the latest run based on the start time
97
+ latest_run = max(runs, key=lambda r: r.start_time)
98
+ if latest_run.status.termination_details is None:
99
+ return "No runs found", None
100
+ status = latest_run.status.termination_details.code
101
+ logs = []
102
+ for task in latest_run.tasks:
103
+ output = wsc.jobs.get_run_output(task.run_id)
104
+ if output.error is not None:
105
+ logs.append(output.error)
106
+ elif output.logs is not None:
107
+ logs.append(output.logs)
108
+ elif output.notebook_output is not None:
109
+ logs.append(output.notebook_output.result)
110
+ elif output.run_job_output is not None:
111
+ raise Exception("Nested jobs are not supported")
112
+ elif output.sql_output is not None:
113
+ raise Exception("SQL jobs are unsupported")
114
+ else:
115
+ logs.append("")
116
+ return status, logs
@@ -1,9 +1,6 @@
1
- import os
2
-
3
1
  from dasl_client import *
4
2
 
5
- databricks_host = os.environ["DATABRICKS_HOST"]
6
- workspace = databricks_host.split("//")[1]
3
+ from .constants import *
7
4
 
8
5
 
9
6
  def test_workspace_config_marshal_unmarshal():
@@ -701,13 +698,15 @@ def test_transform_request_marshal_unmarshal():
701
698
  ],
702
699
  data=[{"col1": "1", "col2": "a"}, {"col1": "2", "col2": "b"}],
703
700
  ),
704
- autoloader_input=DataSource.Autoloader(
701
+ autoloader_input=TransformRequest.Autoloader(
705
702
  format="csv",
706
703
  location="s3://bucket/data",
707
704
  schema_file="schema.json",
708
- cloud_files=DataSource.Autoloader.CloudFiles(
705
+ cloud_files=TransformRequest.Autoloader.CloudFiles(
709
706
  schema_hints_file="hints_file.csv", schema_hints="hint1, hint2"
710
707
  ),
708
+ row_count=1,
709
+ row_offset=5,
711
710
  ),
712
711
  use_preset="preset_value",
713
712
  transforms=[
@@ -1,4 +0,0 @@
1
- dasl_api==0.1.15
2
- databricks-sdk>=0.41.0
3
- pydantic>=2
4
- typing_extensions==4.10.0
File without changes
File without changes
File without changes
File without changes