dasl-client 1.0.24__tar.gz → 1.0.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

Files changed (38) hide show
  1. {dasl_client-1.0.24 → dasl_client-1.0.26}/PKG-INFO +3 -2
  2. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/client.py +65 -3
  3. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/conn/conn.py +3 -1
  4. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/preset_development/errors.py +20 -0
  5. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/preset_development/preview_engine.py +136 -42
  6. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/preset_development/stage.py +23 -2
  7. dasl_client-1.0.26/dasl_client/regions.json +3 -0
  8. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/regions.py +1 -1
  9. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/datasource.py +4 -0
  10. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/rule.py +29 -1
  11. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/workspace_config.py +69 -24
  12. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client.egg-info/PKG-INFO +3 -2
  13. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client.egg-info/requires.txt +2 -1
  14. {dasl_client-1.0.24 → dasl_client-1.0.26}/pyproject.toml +3 -2
  15. dasl_client-1.0.24/dasl_client/regions.json +0 -3
  16. {dasl_client-1.0.24 → dasl_client-1.0.26}/LICENSE +0 -0
  17. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/__init__.py +0 -0
  18. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/auth/__init__.py +0 -0
  19. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/auth/auth.py +0 -0
  20. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/conn/__init__.py +0 -0
  21. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/conn/client_identifier.py +0 -0
  22. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/errors/__init__.py +0 -0
  23. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/errors/errors.py +0 -0
  24. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/exec_rule.py +0 -0
  25. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/helpers.py +0 -0
  26. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/preset_development/__init__.py +0 -0
  27. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/preset_development/preview_parameters.py +0 -0
  28. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/__init__.py +0 -0
  29. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/admin_config.py +0 -0
  30. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/content.py +0 -0
  31. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/dbui.py +0 -0
  32. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/helpers.py +0 -0
  33. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client/types/types.py +0 -0
  34. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client.egg-info/SOURCES.txt +0 -0
  35. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client.egg-info/dependency_links.txt +0 -0
  36. {dasl_client-1.0.24 → dasl_client-1.0.26}/dasl_client.egg-info/top_level.txt +0 -0
  37. {dasl_client-1.0.24 → dasl_client-1.0.26}/setup.cfg +0 -0
  38. {dasl_client-1.0.24 → dasl_client-1.0.26}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dasl_client
3
- Version: 1.0.24
3
+ Version: 1.0.26
4
4
  Summary: The DASL client library used for interacting with the DASL workspace
5
5
  Home-page: https://github.com/antimatter/asl
6
6
  Author: Antimatter Team
@@ -8,10 +8,11 @@ Author-email: Antimatter Team <support@antimatter.io>
8
8
  Requires-Python: >=3.8
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
- Requires-Dist: dasl_api==0.1.24
11
+ Requires-Dist: dasl_api==0.1.25
12
12
  Requires-Dist: databricks-sdk>=0.41.0
13
13
  Requires-Dist: pydantic>=2
14
14
  Requires-Dist: typing_extensions>=4.10.0
15
+ Requires-Dist: pyyaml==6.0.2
15
16
  Dynamic: author
16
17
  Dynamic: home-page
17
18
  Dynamic: license-file
@@ -8,6 +8,8 @@ from pyspark.sql import DataFrame
8
8
  from dasl_api import (
9
9
  CoreV1Api,
10
10
  DbuiV1Api,
11
+ DbuiV1QueryExtendRequest,
12
+ DbuiV1QueryExtendRequestTimeRange,
11
13
  DbuiV1QueryGenerateRequest,
12
14
  DbuiV1QueryGenerateRequestTimeRange,
13
15
  DbuiV1QueryGenerateStatus,
@@ -597,7 +599,7 @@ class Client:
597
599
  def exec_rule(
598
600
  self,
599
601
  spark,
600
- rule_in: Rule,
602
+ rule_in: Rule | str,
601
603
  ) -> ExecRule:
602
604
  """
603
605
  Locally execute a Rule. Must be run from within a Databricks
@@ -607,19 +609,25 @@ class Client:
607
609
  :param spark: Spark context from Databricks notebook. Will be
608
610
  injected into the execution environment for use by the
609
611
  Rule notebook.
610
- :param rule_in: The specification of the Rule to execute.
612
+ :param rule_in:
613
+ The specification of the Rule to execute. If specified as
614
+ a string, it should be in YAML format.
611
615
  :returns ExecRule: A class containing various information and
612
616
  functionality relating to the execution. See the docs for
613
617
  ExecRule for additional details, but note that you must
614
618
  call its cleanup function or tables created just for this
615
619
  request will leak.
616
620
  """
621
+ rule = rule_in
622
+ if isinstance(rule_in, str):
623
+ rule = Rule.from_yaml_str(rule_in)
624
+
617
625
  Helpers.ensure_databricks()
618
626
 
619
627
  with error_handler():
620
628
  result = self._core_client().core_v1_render_rule(
621
629
  self._workspace(),
622
- rule_in.to_api_obj(),
630
+ rule.to_api_obj(),
623
631
  )
624
632
 
625
633
  try:
@@ -794,6 +802,60 @@ class Client:
794
802
  .id
795
803
  )
796
804
 
805
+ def extend_query(
806
+ self,
807
+ id: str,
808
+ warehouse: Optional[str] = None,
809
+ start_date: Optional[str] = None,
810
+ end_date: Optional[str] = None,
811
+ ) -> str:
812
+ """
813
+ Extend an existing query to cover a larger time range . If the query
814
+ is ordered by time and contains no aggregations, this will add the
815
+ additional data to the existing underlying query, returning the
816
+ existing ID. If the existing table cannot be extended, a new table
817
+ will be created to cover the updated time range.
818
+
819
+ :param id: The ID of the query to extend.
820
+ :param warehouse: The SQL warehouse use to execute the SQL. If
821
+ omitted, the default SQL warehouse specified in the workspace
822
+ config will be used.
823
+ :param start_date: An optional starting date to extend the existing
824
+ query by. If not provided, the current start date of the query
825
+ will be used.
826
+ :param end_date: An optional end date to extend the existing
827
+ query by. If not provided, the current end date of the query
828
+ will be used.
829
+ :returns str: The ID of the query generation operation. This value
830
+ can be used with get_query_status to track the progress of
831
+ the generation process, and eventually to perform lookups
832
+ on the completed query. If the current query could be extended,
833
+ this id will be the same as the one provided. If a new query had
834
+ to be generated, the new ID is returned.
835
+ """
836
+ time_range = None
837
+ if start_date is not None or end_date is not None:
838
+ time_range = DbuiV1QueryExtendRequestTimeRange(
839
+ startDate=start_date,
840
+ endDate=end_date,
841
+ )
842
+
843
+ req = DbuiV1QueryExtendRequest(
844
+ warehouse=warehouse,
845
+ timeRange=time_range,
846
+ )
847
+
848
+ with error_handler():
849
+ return (
850
+ self._dbui_client()
851
+ .dbui_v1_query_extend(
852
+ self._workspace(),
853
+ id,
854
+ req,
855
+ )
856
+ .id
857
+ )
858
+
797
859
  def get_query_status(
798
860
  self,
799
861
  id: str,
@@ -19,7 +19,9 @@ def get_base_conn(enable_retries: bool = True, host: Optional[str] = None) -> Ap
19
19
  :return: An API conn without any auth
20
20
  """
21
21
  if host is None:
22
- host = os.getenv("DASL_API_URL", "https://api.prod.sl.antimatter.io")
22
+ host = os.getenv(
23
+ "DASL_API_URL", "https://api.sl.us-east-1.cloud.databricks.com"
24
+ )
23
25
  config = Configuration(host=host)
24
26
  if enable_retries:
25
27
  # configure retries with backup for all HTTP verbs; we do not limit this to only
@@ -9,6 +9,26 @@ class PresetError(Exception):
9
9
  pass
10
10
 
11
11
 
12
+ class StageExecutionException(PresetError):
13
+ def __init__(
14
+ self,
15
+ medallion_layer="unknown",
16
+ exception_map: Dict[str, List[str]] = {},
17
+ verbose: bool = False,
18
+ ):
19
+ self.exception_map = exception_map
20
+ message = (
21
+ f"Field specification errors encountered in {medallion_layer} stage.\n\n"
22
+ )
23
+ for table, exceptions in exception_map.items():
24
+ message += f"Table: {table}\n"
25
+ count = 1
26
+ for exception in exceptions:
27
+ message += f"Exception {count}:\n{exception.split('JVM')[0] if not verbose else exception}\n\n"
28
+ count += 1
29
+ super().__init__(message)
30
+
31
+
12
32
  class InvalidGoldTableSchemaError(PresetError):
13
33
  def __init__(self, schema: str, additional_message: str = ""):
14
34
  self.schema = schema
@@ -49,6 +49,7 @@ class PreviewEngine:
49
49
  """
50
50
  self._spark = spark
51
51
  self._ds_params = ds_params
52
+ self.__stage_exception = {}
52
53
  self._preset = yaml.safe_load(preset_yaml_str)
53
54
  self._pretransform_name = ds_params._pretransform_name
54
55
 
@@ -129,7 +130,7 @@ class PreviewEngine:
129
130
  if missing_keys:
130
131
  raise MissingSilverKeysError(missing_keys)
131
132
 
132
- def _compile_stages(self) -> None:
133
+ def _compile_stages(self, force_evaluation: bool = False) -> None:
133
134
  """
134
135
  Creates Stage objects, setting silver pretransform to None if not provided.
135
136
  """
@@ -160,15 +161,21 @@ class PreviewEngine:
160
161
  break
161
162
 
162
163
  self._silver = [
163
- Stage(self._spark, "silver transform", table)
164
+ Stage(
165
+ self._spark,
166
+ "silver transform",
167
+ table,
168
+ force_evaluation=force_evaluation,
169
+ )
164
170
  for table in self._preset.get("silver", {}).get("transform", [])
165
171
  ]
166
172
  self._gold = [
167
- Stage(self._spark, "gold", table) for table in self._preset.get("gold", [])
173
+ Stage(self._spark, "gold", table, force_evaluation=force_evaluation)
174
+ for table in self._preset.get("gold", [])
168
175
  ]
169
176
 
170
177
  def _run(
171
- self, df: DataFrame
178
+ self, df: DataFrame, verbose: bool = False
172
179
  ) -> Tuple[DataFrame, Dict[str, DataFrame], Dict[str, DataFrame]]:
173
180
  """
174
181
  Runs all stages, in medallion stage order. This allows prior stage outputs to feed
@@ -232,6 +239,14 @@ class PreviewEngine:
232
239
  for table in self._silver:
233
240
  silver_output_map[table._name] = table.run(df)
234
241
 
242
+ # Check for silver stage exceptions.
243
+ # NOTE: These exception lists only get populated if force_evaluation is enabled.
244
+ for table in self._silver:
245
+ if exceptions := table.get_exceptions():
246
+ self.__stage_exception[table._name] = exceptions
247
+ if self.__stage_exception:
248
+ raise StageExecutionException("silver", self.__stage_exception, verbose)
249
+
235
250
  gold_output_map = {}
236
251
  for table in self._gold:
237
252
  # We store as gold_name/silver_input to prevent clobbering on duplicate gold table use.
@@ -239,12 +254,92 @@ class PreviewEngine:
239
254
  silver_output_map[table._input]
240
255
  )
241
256
 
257
+ # Check for gold stage exceptions.
258
+ # NOTE: These exception lists only get populated if force_evaluation is enabled.
259
+ for table in self._gold:
260
+ if exceptions := table.get_exceptions():
261
+ self.__stage_exception[table._name] = exceptions
262
+ if self.__stage_exception:
263
+ raise StageExecutionException("gold", self.__stage_exception, verbose)
264
+
242
265
  return (
243
266
  (df, silver_output_map, gold_output_map, pre_bronze_output)
244
267
  if self._pre_silver
245
268
  else (None, silver_output_map, gold_output_map, pre_bronze_output)
246
269
  )
247
270
 
271
+ def __get_sql_type(self, data_type) -> str:
272
+ """
273
+ Helper to convert Spark data type objects to SQL type strings.
274
+ """
275
+ if isinstance(data_type, StringType):
276
+ return "STRING"
277
+ elif isinstance(data_type, IntegerType):
278
+ return "INT"
279
+ elif isinstance(data_type, LongType):
280
+ return "BIGINT"
281
+ elif isinstance(data_type, FloatType):
282
+ return "FLOAT"
283
+ elif isinstance(data_type, DoubleType):
284
+ return "DOUBLE"
285
+ elif isinstance(data_type, BooleanType):
286
+ return "BOOLEAN"
287
+ elif isinstance(data_type, TimestampType):
288
+ return "TIMESTAMP"
289
+ elif isinstance(data_type, DateType):
290
+ return "DATE"
291
+ elif isinstance(data_type, ArrayType):
292
+ return f"ARRAY<{self.__get_sql_type(data_type.elementType)}>"
293
+ elif isinstance(data_type, MapType):
294
+ return f"MAP<{self.__get_sql_type(data_type.keyType)}, {self.__get_sql_type(data_type.valueType)}>"
295
+ elif isinstance(data_type, StructType):
296
+ fields = ", ".join(
297
+ [
298
+ f"{field.name}: {self.__get_sql_type(field.dataType)}"
299
+ for field in data_type.fields
300
+ ]
301
+ )
302
+ return f"STRUCT<{fields}>"
303
+ elif isinstance(data_type, VariantType):
304
+ return f"VARIANT"
305
+ else:
306
+ return f"UNKNOWN ({data_type})"
307
+
308
+ def __format_gold_column_merge_exception(
309
+ self,
310
+ columns: Dict[str, List[Exception]],
311
+ gold_df: DataFrame,
312
+ verbose: bool = False,
313
+ ):
314
+ """
315
+ Formatter for various exceptions that occur during the merge of gold tables.
316
+ """
317
+ missing_column_flag = False
318
+ for column, info in columns.items():
319
+ # RANT: it is annoying, but basically every exception comes back from the
320
+ # query analyzer as pyspark.errors.exceptions.connect.AnalysisException,
321
+ # so we are forced into this awkward string search.
322
+ str_e = str(info["exception"])
323
+ str_e = str_e.split("JVM")[0] if not verbose else str_e
324
+ if "LEGACY_ERROR_TEMP_DELTA_0007" in str_e:
325
+ print(
326
+ f"-> Column \"{column}\" of type \"{self.__get_sql_type(info['type'])}\" does not exist in gold table \"{info['table']}\"."
327
+ )
328
+ missing_column_flag = True
329
+ elif "DELTA_FAILED_TO_MERGE_FIELDS" in str_e:
330
+ print(
331
+ f"-> Column \"{column}\" of type \"{self.__get_sql_type(info['type'])}\" is not compatiable with gold table \"{info['table']}\"'s \"{column}\" of type \"{self.__get_sql_type(gold_df.schema[column].dataType)}\""
332
+ )
333
+ else:
334
+ print(
335
+ f"-> Column \"{column}\" raised the following unformatted exception when appending to gold table \"{info['table']}\":\n{str_e}"
336
+ )
337
+
338
+ if missing_column_flag:
339
+ print(
340
+ f"\nA write to 1 or more non-existent columns occured - available columns are: {', '.join(gold_df.columns)}"
341
+ )
342
+
248
343
  def _render_output(
249
344
  self,
250
345
  input_df: DataFrame,
@@ -253,6 +348,7 @@ class PreviewEngine:
253
348
  ],
254
349
  gold_table_catalog: str,
255
350
  gold_table_schema: str,
351
+ verbose: bool = False,
256
352
  ) -> None:
257
353
  """
258
354
  Displays formatted HTML output from executed Stages' DataFrames.
@@ -278,31 +374,6 @@ class PreviewEngine:
278
374
  """
279
375
  )
280
376
 
281
- def check_struct_compatibility(
282
- target_field: StructField, df_field: StructField, prefix=""
283
- ):
284
- if not (
285
- isinstance(target_field.dataType, StructType)
286
- and isinstance(df_field.dataType, StructType)
287
- ):
288
- return
289
-
290
- target_fields = {
291
- field.name: field for field in target_field.dataType.fields
292
- }
293
- for field in df_field.dataType.fields:
294
- if field.name not in target_fields:
295
- raise GoldTableCompatibilityError(
296
- f"Extra field found in gold stage output STRUCT column {prefix}{target_field.name}: {field.name}"
297
- )
298
- else:
299
- if isinstance(field.dataType, StructType):
300
- check_struct_compatibility(
301
- target_fields[field.name],
302
- field,
303
- prefix=prefix + target_field.name + ".",
304
- )
305
-
306
377
  (pre_silver, silver, gold, pre_bronze) = stage_dataframes
307
378
  d("Autoloader Input", 1)
308
379
  display(input_df)
@@ -343,17 +414,33 @@ class PreviewEngine:
343
414
  self._ds_params.add_gold_schema_table(full_name)
344
415
 
345
416
  # Perform the type checks by trying to insert data into the table
346
- try:
347
- df.write.mode("append").save(
348
- f"{self._ds_params.get_autoloader_temp_schema_location()}/{full_name}"
349
- )
350
- except Exception as e:
351
- raise GoldTableCompatibilityError(
352
- f"Preset gold table '{full_name}' did not match the gold schema for {fqn_gold_table_name}: {repr(e)}"
353
- )
354
417
 
355
- d("Resultant gold table preview", 3)
356
- display(df)
418
+ df_columns = df.columns
419
+ df_single_columns = {}
420
+ df_append_exceptions = {}
421
+ for column in df_columns:
422
+ df_single_columns[column] = df.select(column)
423
+ for column, df_single_column in df_single_columns.items():
424
+ try:
425
+ df_single_column.write.mode("append").save(
426
+ f"{self._ds_params.get_autoloader_temp_schema_location()}/{full_name}"
427
+ )
428
+ except Exception as e:
429
+ df_append_exceptions[column] = {
430
+ "type": df_single_column.schema[column].dataType,
431
+ "exception": e,
432
+ "table": name,
433
+ }
434
+
435
+ self.__format_gold_column_merge_exception(
436
+ df_append_exceptions, delta_df, verbose
437
+ )
438
+
439
+ if not df_append_exceptions:
440
+ # alls good. display the output.
441
+ d("Resultant gold table preview", 3)
442
+ unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
443
+ display(unioned_df)
357
444
 
358
445
  def is_backtick_escaped(self, name: str) -> bool:
359
446
  """
@@ -374,7 +461,13 @@ class PreviewEngine:
374
461
  return name
375
462
  return f"`{name}`"
376
463
 
377
- def evaluate(self, gold_table_schema: str, display: bool = True) -> None:
464
+ def evaluate(
465
+ self,
466
+ gold_table_schema: str,
467
+ display: bool = True,
468
+ force_evaluation: bool = False,
469
+ verbose: bool = False,
470
+ ) -> None:
378
471
  """
379
472
  Evaluates the loaded preset YAML using the input datasource configuration to load
380
473
  records. Finally, checks that the output from the Gold stages is compatible with
@@ -429,16 +522,17 @@ class PreviewEngine:
429
522
  schema_hints_file
430
523
  )
431
524
 
432
- self._compile_stages()
525
+ self._compile_stages(force_evaluation=force_evaluation)
433
526
 
434
527
  with self._ds_params as df:
435
- self._result_df_map = self._run(df)
528
+ self._result_df_map = self._run(df, verbose)
436
529
  if display:
437
530
  self._render_output(
438
531
  df,
439
532
  self._result_df_map,
440
533
  self.force_apply_backticks(catalog_name),
441
534
  self.force_apply_backticks(schema_name),
535
+ verbose,
442
536
  )
443
537
 
444
538
  def results(
@@ -98,7 +98,13 @@ class Stage:
98
98
  "Malformed column name referenced",
99
99
  )
100
100
 
101
- def __init__(self, spark: SparkSession, stage: str, table: Dict[str, any]):
101
+ def __init__(
102
+ self,
103
+ spark: SparkSession,
104
+ stage: str,
105
+ table: Dict[str, any],
106
+ force_evaluation: bool = False,
107
+ ):
102
108
  """
103
109
  Initializes a Stage object that encapsulates all operations required for a single
104
110
  table within a stage.
@@ -115,11 +121,13 @@ class Stage:
115
121
  """
116
122
  self._spark = spark
117
123
  self._stage = stage
124
+ self.__force_evaluation = force_evaluation
118
125
  self._name = table.get("name", "")
119
126
  self._filter = table.get("filter", "")
120
127
  self._postFilter = table.get("postFilter", "")
121
128
  self._utils = table.get("utils", {})
122
129
  self._input = table.get("input", None)
130
+ self.__exceptions = []
123
131
 
124
132
  # The dasl_id does not exist before bronze or when dealing with temp fields.
125
133
  fields = (
@@ -134,6 +142,12 @@ class Stage:
134
142
 
135
143
  self.__validate_field_spec(self._fields, self._stage)
136
144
 
145
+ def get_exceptions(self) -> List[str]:
146
+ """
147
+ Get the list of exceptions encountered during field spec evaluation.
148
+ """
149
+ return self.__exceptions
150
+
137
151
  def _referenced_columns(self) -> List[str]:
138
152
  """
139
153
  Get a list of columns referenced in the table's field specifications.
@@ -569,7 +583,14 @@ class Stage:
569
583
  if field.get("join", None):
570
584
  joins_cols += [field["name"]]
571
585
 
572
- return df.selectExpr(select_fields + joins_cols)
586
+ expressions = select_fields + joins_cols
587
+ if self.__force_evaluation:
588
+ for expression in expressions:
589
+ try:
590
+ df.selectExpr(expression).collect() # Collect to make unlazy.
591
+ except Exception as e:
592
+ self.__exceptions += [f"expression: {expression}\n{str(e)}"]
593
+ return df.selectExpr(expressions)
573
594
 
574
595
  def run_joins(self, df: DataFrame) -> DataFrame:
575
596
  """
@@ -0,0 +1,3 @@
1
+ {
2
+ "us-east-1": "https://api.sl.us-east-1.cloud.databricks.com"
3
+ }
@@ -9,7 +9,7 @@ class Regions:
9
9
  @staticmethod
10
10
  def lookup(name: str) -> str:
11
11
  try:
12
- return _data(name)
12
+ return _data[name]
13
13
  except KeyError as e:
14
14
  raise ValueError(f"unknown region {name}") from e
15
15
 
@@ -1201,6 +1201,7 @@ class DataSource(BaseModel):
1201
1201
  The name of the originator of the data.
1202
1202
  source_type (Optional[str]):
1203
1203
  The type of data being imported.
1204
+ epoch (Optional[int]):
1204
1205
  schedule (Optional[Schedule]):
1205
1206
  The schedule for data ingestion.
1206
1207
  custom (Optional[DataSource.CustomNotebook]):
@@ -1353,6 +1354,7 @@ class DataSource(BaseModel):
1353
1354
  metadata: Optional[Metadata] = None
1354
1355
  source: Optional[str] = None
1355
1356
  source_type: Optional[str] = None
1357
+ epoch: Optional[int] = None
1356
1358
  schedule: Optional[Schedule] = None
1357
1359
  custom: Optional["DataSource.CustomNotebook"] = None
1358
1360
  primary_key: Optional["DataSource.PrimaryKey"] = None
@@ -1371,6 +1373,7 @@ class DataSource(BaseModel):
1371
1373
  metadata=Metadata.from_api_obj(obj.metadata),
1372
1374
  source=obj.spec.source,
1373
1375
  source_type=obj.spec.source_type,
1376
+ epoch=obj.spec.epoch,
1374
1377
  schedule=Schedule.from_api_obj(obj.spec.schedule),
1375
1378
  custom=DataSource.CustomNotebook.from_api_obj(obj.spec.custom),
1376
1379
  primary_key=DataSource.PrimaryKey.from_api_obj(obj.spec.primary_key),
@@ -1393,6 +1396,7 @@ class DataSource(BaseModel):
1393
1396
  spec=CoreV1DataSourceSpec(
1394
1397
  source=self.source,
1395
1398
  source_type=self.source_type,
1399
+ epoch=self.epoch,
1396
1400
  schedule=Helpers.maybe(to_api_obj, self.schedule),
1397
1401
  custom=Helpers.maybe(to_api_obj, self.custom),
1398
1402
  primary_key=Helpers.maybe(to_api_obj, self.primary_key),
@@ -1,6 +1,7 @@
1
1
  from pydantic import BaseModel
2
- from typing import Dict, List, Optional, Union
2
+ from typing import Dict, Iterable, List, Optional, Union
3
3
  from datetime import datetime, timezone
4
+ import yaml
4
5
 
5
6
  from dasl_api import (
6
7
  CoreV1Rule,
@@ -37,6 +38,8 @@ class Rule(BaseModel):
37
38
  The rule configuration metadata.
38
39
  schedule (Schedule):
39
40
  The rule schedule.
41
+ compute_mode (Optional[str]):
42
+ The compute mode to use for this rule's job.
40
43
  input (Rule.Input):
41
44
  The rule input configuration.
42
45
  observables (Optional[List[Rule.Observable]]):
@@ -613,10 +616,12 @@ class Rule(BaseModel):
613
616
  Attributes:
614
617
  summary (Optional[str]):
615
618
  context (Optional[Dict[str, str]]):
619
+ default_context (Optional[bool]):
616
620
  """
617
621
 
618
622
  summary: Optional[str] = None
619
623
  context: Optional[Dict[str, str]] = None
624
+ default_context: Optional[bool] = False
620
625
 
621
626
  @staticmethod
622
627
  def from_api_obj(obj: Optional[CoreV1RuleSpecOutput]) -> "Rule.Output":
@@ -625,12 +630,14 @@ class Rule(BaseModel):
625
630
  return Rule.Output(
626
631
  summary=obj.summary,
627
632
  context=obj.context,
633
+ default_context=obj.default_context,
628
634
  )
629
635
 
630
636
  def to_api_obj(self) -> CoreV1RuleSpecOutput:
631
637
  return CoreV1RuleSpecOutput(
632
638
  summary=self.summary,
633
639
  context=self.context,
640
+ default_context=self.default_context,
634
641
  )
635
642
 
636
643
  class Collate(BaseModel):
@@ -673,6 +680,7 @@ class Rule(BaseModel):
673
680
  metadata: Optional[Metadata] = None
674
681
  rule_metadata: Optional["Rule.RuleMetadata"] = None
675
682
  schedule: Schedule
683
+ compute_mode: Optional[str] = None
676
684
  input: "Rule.Input"
677
685
  observables: Optional[List["Rule.Observable"]] = None
678
686
  output: "Rule.Output"
@@ -690,6 +698,7 @@ class Rule(BaseModel):
690
698
  metadata=Metadata.from_api_obj(obj.metadata),
691
699
  rule_metadata=Rule.RuleMetadata.from_api_obj(obj.spec.metadata),
692
700
  schedule=Schedule.from_api_obj(obj.spec.schedule),
701
+ compute_mode=obj.spec.compute_mode,
693
702
  input=Rule.Input.from_api_obj(obj.spec.input),
694
703
  observables=observables,
695
704
  output=Rule.Output.from_api_obj(obj.spec.output),
@@ -697,6 +706,24 @@ class Rule(BaseModel):
697
706
  status=ResourceStatus.from_api_obj(obj.status),
698
707
  )
699
708
 
709
+ @staticmethod
710
+ def from_yaml_str(s: str) -> "Rule":
711
+ docs = yaml.safe_load_all(s)
712
+ docs = list(docs)
713
+
714
+ if not docs:
715
+ raise ValueError("YAML is empty")
716
+ if len(docs) > 1:
717
+ raise ValueError(f"Expected a single YAML document, got {len(docs)}")
718
+
719
+ data = docs[0]
720
+ if not isinstance(data, dict):
721
+ raise ValueError(
722
+ f"Expected a mapping at top-level, got {type(data).__name__}"
723
+ )
724
+
725
+ return Rule.model_validate(data)
726
+
700
727
  def to_api_obj(self) -> CoreV1Rule:
701
728
  observables = None
702
729
  if self.observables is not None:
@@ -709,6 +736,7 @@ class Rule(BaseModel):
709
736
  spec=CoreV1RuleSpec(
710
737
  metadata=Helpers.maybe(to_api_obj, self.rule_metadata),
711
738
  schedule=self.schedule.to_api_obj(),
739
+ compute_mode=self.compute_mode,
712
740
  input=self.input.to_api_obj(),
713
741
  observables=observables,
714
742
  output=self.output.to_api_obj(),
@@ -19,6 +19,7 @@ from dasl_api import (
19
19
  WorkspaceV1WorkspaceConfigSpecSystemTablesConfig,
20
20
  WorkspaceV1DefaultConfig,
21
21
  WorkspaceV1DefaultConfigComputeGroupOverridesValue,
22
+ WorkspaceV1ExportConfigSlackConfigToken,
22
23
  )
23
24
 
24
25
  from .helpers import Helpers
@@ -104,18 +105,53 @@ class ExportConfig(BaseModel):
104
105
  destination=self.destination.to_api_obj(),
105
106
  )
106
107
 
108
+ class SlackToken(BaseModel):
109
+ """
110
+ Configuration settings for access a slack token.
111
+
112
+ Attributes:
113
+ value (Optional[str]):
114
+ scope (Optional[str]):
115
+ key (Optional[str]):
116
+ """
117
+
118
+ value: Optional[str] = None
119
+ scope: Optional[str] = None
120
+ key: Optional[str] = None
121
+
122
+ @staticmethod
123
+ def from_api_obj(
124
+ obj: Optional[WorkspaceV1ExportConfigSlackConfigToken],
125
+ ) -> "ExportConfig.WebhookDestination":
126
+ if obj is None:
127
+ return None
128
+ return ExportConfig.SlackToken(
129
+ value=obj.value,
130
+ scope=obj.scope,
131
+ key=obj.key,
132
+ )
133
+
134
+ def to_api_obj(self) -> WorkspaceV1ExportConfigSlackConfigToken:
135
+ return WorkspaceV1ExportConfigSlackConfigToken(
136
+ value=self.value,
137
+ scope=self.scope,
138
+ key=self.key,
139
+ )
140
+
107
141
  class SlackConfig(BaseModel):
108
142
  """
109
143
  Configuration settings for exporting to Slack.
110
144
 
111
145
  Attributes:
112
- token (Optional[ExportConfig.WebhookDestination]):
146
+ token (Optional[ExportConfig.SlackToken]):
113
147
  channel (Optional[str]):
148
+ url (Optional[str]):
114
149
  message (Optional[str]):
115
150
  """
116
151
 
117
- token: Optional["ExportConfig.WebhookDestination"] = None
152
+ token: Optional["ExportConfig.SlackToken"] = None
118
153
  channel: Optional[str] = None
154
+ url: Optional[str] = None
119
155
  message: Optional[str] = None
120
156
 
121
157
  @staticmethod
@@ -125,8 +161,9 @@ class ExportConfig(BaseModel):
125
161
  if obj is None:
126
162
  return None
127
163
  return ExportConfig.SlackConfig(
128
- token=ExportConfig.WebhookDestination.from_api_obj(obj.token),
164
+ token=ExportConfig.SlackToken.from_api_obj(obj.token),
129
165
  channel=obj.channel,
166
+ url=obj.url,
130
167
  message=obj.message,
131
168
  )
132
169
 
@@ -137,6 +174,7 @@ class ExportConfig(BaseModel):
137
174
  return WorkspaceV1ExportConfigSlackConfig(
138
175
  token=token,
139
176
  channel=self.channel,
177
+ url=self.url,
140
178
  message=self.message,
141
179
  )
142
180
 
@@ -301,9 +339,12 @@ class RulesConfig(BaseModel):
301
339
  checkpoint_location (Optional[str]):
302
340
  The location to store checkpoints for streaming writes. If
303
341
  not provided, the daslStoragePath will be used.
342
+ default_compute_mode (Optional[str]):
343
+ The default compute mode to use for rule jobs.
304
344
  """
305
345
 
306
346
  checkpoint_location: Optional[str] = None
347
+ default_compute_mode: Optional[str] = None
307
348
 
308
349
  @staticmethod
309
350
  def from_api_obj(
@@ -314,11 +355,13 @@ class RulesConfig(BaseModel):
314
355
 
315
356
  return RulesConfig(
316
357
  checkpoint_location=obj.checkpoint_location,
358
+ default_compute_mode=obj.default_compute_mode,
317
359
  )
318
360
 
319
361
  def to_api_obj(self) -> WorkspaceV1WorkspaceConfigSpecRules:
320
362
  return WorkspaceV1WorkspaceConfigSpecRules(
321
363
  checkpoint_location=self.checkpoint_location,
364
+ default_compute_mode=self.default_compute_mode,
322
365
  )
323
366
 
324
367
 
@@ -332,18 +375,18 @@ class DefaultConfig(BaseModel):
332
375
  for alternatives.
333
376
 
334
377
  Attributes:
335
- datasources (Optional[DefaultConfig.Config]):
378
+ datasources (Optional[DefaultConfig.ResourceConfig]):
336
379
  Configuration that applies to DataSources. May be omitted.
337
- transforms (Optional[DefaultConfig.Config]):
380
+ transforms (Optional[DefaultConfig.ResourceConfig]):
338
381
  Configuration that applies to Transforms. May be omitted.
339
- rules (Optional[DefaultConfig.Config]):
382
+ rules (Optional[DefaultConfig.ResourceConfig]):
340
383
  Configuration that applies to Rules. May be omitted.
341
- var_global (Optional[DefaultConfig.Config]):
384
+ var_global (Optional[DefaultConfig.ResourceConfig]):
342
385
  Configuration that applies globally to resources without a
343
386
  resource-specific configuration specified. Must be specified.
344
387
  """
345
388
 
346
- class Config(BaseModel):
389
+ class ResourceConfig(BaseModel):
347
390
  """
348
391
  Default configuration for a specific resource type.
349
392
 
@@ -364,7 +407,7 @@ class DefaultConfig(BaseModel):
364
407
  checkpoint_location (Optional[str]):
365
408
  The location to store checkpoints for streaming writes. If
366
409
  not provided, the daslStoragePath will be used.
367
- compute_group_overrides (Optional[Dict[str, DefaultConfig.Config.ComputeGroupOverrides]]):
410
+ compute_group_overrides (Optional[Dict[str, DefaultConfig.ResourceConfig.ComputeGroupOverrides]]):
368
411
  Overrides for the maximum number of resources that can be
369
412
  placed into a single job, keyed by the compute group name.
370
413
  """
@@ -384,9 +427,9 @@ class DefaultConfig(BaseModel):
384
427
 
385
428
  @staticmethod
386
429
  def from_api_obj(
387
- obj: Optional["DefaultConfig.Config.ComputeGroupOverrides"],
388
- ) -> "DefaultConfig.Config.ComputeGroupOverrides":
389
- return DefaultConfig.Config.ComputeGroupOverrides(
430
+ obj: Optional["DefaultConfig.ResourceConfig.ComputeGroupOverrides"],
431
+ ) -> "DefaultConfig.ResourceConfig.ComputeGroupOverrides":
432
+ return DefaultConfig.ResourceConfig.ComputeGroupOverrides(
390
433
  max_resources_per_job=obj.max_resources_per_job,
391
434
  )
392
435
 
@@ -403,23 +446,25 @@ class DefaultConfig(BaseModel):
403
446
  default_max_resources_per_job: Optional[int] = None
404
447
  checkpoint_location: Optional[str] = None
405
448
  compute_group_overrides: Optional[
406
- Dict[str, "DefaultConfig.Config.ComputeGroupOverrides"]
449
+ Dict[str, "DefaultConfig.ResourceConfig.ComputeGroupOverrides"]
407
450
  ] = None
408
451
 
409
452
  @staticmethod
410
453
  def from_api_obj(
411
454
  obj: Optional[WorkspaceV1DefaultConfig],
412
- ) -> "DefaultConfig.Config":
455
+ ) -> "DefaultConfig.ResourceConfig":
413
456
  if obj is None:
414
457
  return None
415
458
 
416
459
  compute_group_overrides = None
417
460
  if obj.compute_group_overrides is not None:
418
461
  compute_group_overrides = {
419
- key: DefaultConfig.Config.ComputeGroupOverrides.from_api_obj(value)
462
+ key: DefaultConfig.ResourceConfig.ComputeGroupOverrides.from_api_obj(
463
+ value
464
+ )
420
465
  for key, value in obj.compute_group_overrides.items()
421
466
  }
422
- return DefaultConfig.Config(
467
+ return DefaultConfig.ResourceConfig(
423
468
  notebook_location=obj.notebook_location,
424
469
  bronze_schema=obj.bronze_schema,
425
470
  silver_schema=obj.silver_schema,
@@ -448,10 +493,10 @@ class DefaultConfig(BaseModel):
448
493
  compute_group_overrides=compute_group_overrides,
449
494
  )
450
495
 
451
- datasources: Optional["DefaultConfig.Config"] = None
452
- transforms: Optional["DefaultConfig.Config"] = None
453
- rules: Optional["DefaultConfig.Config"] = None
454
- var_global: Optional["DefaultConfig.Config"] = None
496
+ datasources: Optional["DefaultConfig.ResourceConfig"] = None
497
+ transforms: Optional["DefaultConfig.ResourceConfig"] = None
498
+ rules: Optional["DefaultConfig.ResourceConfig"] = None
499
+ var_global: Optional["DefaultConfig.ResourceConfig"] = None
455
500
 
456
501
  @staticmethod
457
502
  def from_api_obj(
@@ -461,10 +506,10 @@ class DefaultConfig(BaseModel):
461
506
  return None
462
507
 
463
508
  return DefaultConfig(
464
- datasources=DefaultConfig.Config.from_api_obj(obj.datasources),
465
- transforms=DefaultConfig.Config.from_api_obj(obj.transforms),
466
- rules=DefaultConfig.Config.from_api_obj(obj.rules),
467
- var_global=DefaultConfig.Config.from_api_obj(obj.var_global),
509
+ datasources=DefaultConfig.ResourceConfig.from_api_obj(obj.datasources),
510
+ transforms=DefaultConfig.ResourceConfig.from_api_obj(obj.transforms),
511
+ rules=DefaultConfig.ResourceConfig.from_api_obj(obj.rules),
512
+ var_global=DefaultConfig.ResourceConfig.from_api_obj(obj.var_global),
468
513
  )
469
514
 
470
515
  def to_api_obj(self) -> WorkspaceV1WorkspaceConfigSpecDefaultConfig:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dasl_client
3
- Version: 1.0.24
3
+ Version: 1.0.26
4
4
  Summary: The DASL client library used for interacting with the DASL workspace
5
5
  Home-page: https://github.com/antimatter/asl
6
6
  Author: Antimatter Team
@@ -8,10 +8,11 @@ Author-email: Antimatter Team <support@antimatter.io>
8
8
  Requires-Python: >=3.8
9
9
  Description-Content-Type: text/markdown
10
10
  License-File: LICENSE
11
- Requires-Dist: dasl_api==0.1.24
11
+ Requires-Dist: dasl_api==0.1.25
12
12
  Requires-Dist: databricks-sdk>=0.41.0
13
13
  Requires-Dist: pydantic>=2
14
14
  Requires-Dist: typing_extensions>=4.10.0
15
+ Requires-Dist: pyyaml==6.0.2
15
16
  Dynamic: author
16
17
  Dynamic: home-page
17
18
  Dynamic: license-file
@@ -1,4 +1,5 @@
1
- dasl_api==0.1.24
1
+ dasl_api==0.1.25
2
2
  databricks-sdk>=0.41.0
3
3
  pydantic>=2
4
4
  typing_extensions>=4.10.0
5
+ pyyaml==6.0.2
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "dasl_client"
7
- version = "1.0.24"
7
+ version = "1.0.26"
8
8
  description = "The DASL client library used for interacting with the DASL workspace"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -13,10 +13,11 @@ authors = [
13
13
  requires-python = ">=3.8"
14
14
 
15
15
  dependencies = [
16
- "dasl_api==0.1.24",
16
+ "dasl_api==0.1.25",
17
17
  "databricks-sdk>=0.41.0",
18
18
  "pydantic>=2",
19
19
  "typing_extensions>=4.10.0",
20
+ "pyyaml==6.0.2",
20
21
  ]
21
22
 
22
23
  [tool.setuptools]
@@ -1,3 +0,0 @@
1
- {
2
- "us-east-1": "https://api.prod.sl.antimatter.io"
3
- }
File without changes
File without changes
File without changes