dasl-client 1.0.22__py3-none-any.whl → 1.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dasl-client might be problematic. Click here for more details.

@@ -240,60 +240,40 @@ class PreviewEngine:
240
240
  d(f"{name}", 2)
241
241
  display(df)
242
242
  d("Gold", 1)
243
- for name, df in gold.items():
244
- d(f"{name}", 2)
243
+ for full_name, df in gold.items():
244
+ d(f"{full_name}", 2)
245
245
  d("Stage output", 3)
246
246
  display(df)
247
247
 
248
248
  # NOTE: Name is stored as Gold_name/Silver_input. So we need to get just the Gold table
249
249
  # name that we are comparing the dataframe metadata to.
250
- name = name.split("/")[0]
250
+ name = full_name.split("/")[0]
251
251
  fqn_gold_table_name = f"{self.force_apply_backticks(gold_table_catalog)}.{self.force_apply_backticks(gold_table_schema)}.{self.force_apply_backticks(name)}"
252
252
 
253
253
  if not self._spark.catalog.tableExists(f"{fqn_gold_table_name}"):
254
254
  raise UnknownGoldTableError(name, gold_table_schema)
255
255
 
256
- # Performs the type check.
256
+ # Create a temporary table to perform the type check
257
257
  delta_df = self._spark.table(f"{fqn_gold_table_name}").limit(0)
258
- unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
258
+ delta_df.write.mode("overwrite").save(
259
+ f"{self._ds_params.get_autoloader_temp_schema_location()}/{full_name}"
260
+ )
259
261
 
260
- # Now we check no new columns.
261
- if not set(df.columns).issubset(delta_df.columns):
262
- raise GoldTableCompatibilityError(
263
- f"Extra columns provided: {', '.join([col for col in df.columns if col not in delta_df.columns])}"
264
- )
262
+ # Update the params to indicate we've added a testing temp gold table
263
+ self._ds_params.add_gold_schema_table(full_name)
265
264
 
266
- # Now we check no new fields in STRUCT columns.
267
- for field in delta_df.schema.fields:
268
- if isinstance(field.dataType, StructType) and field.name in df.columns:
269
- # Retrieve the corresponding field from the DataFrame's schema.
270
- df_field = next(f for f in df.schema.fields if f.name == field.name)
271
- check_struct_compatibility(field, df_field)
272
-
273
- # Check nullable columns exist, and data what we are inserting is set.
274
- non_nullable_cols = [
275
- field.name for field in delta_df.schema.fields if not field.nullable
276
- ]
277
- null_checks = [
278
- sum_(when(col_(col).isNull(), 1).otherwise(0)).alias(col)
279
- for col in non_nullable_cols
280
- ]
281
- null_counts = df.select(null_checks).collect()[0].asDict()
282
- cols_with_nulls = []
265
+ # Perform the type checks by trying to insert data into the table
283
266
  try:
284
- cols_with_nulls = [
285
- col_name for col_name, count in null_counts.items() if count > 0
286
- ]
287
- except TypeError:
288
- # There were no records returned and so null_counts == None.
289
- pass
290
- if cols_with_nulls:
267
+ df.write.mode("append").save(
268
+ f"{self._ds_params.get_autoloader_temp_schema_location()}/{full_name}"
269
+ )
270
+ except Exception as e:
291
271
  raise GoldTableCompatibilityError(
292
- f"Record with null data found for non-nullable columns: {', '.join([col for col in cols_with_nulls])}"
272
+ f"Preset gold table '{full_name}' did not match the gold schema for {fqn_gold_table_name}: {repr(e)}"
293
273
  )
294
274
 
295
275
  d("Resultant gold table preview", 3)
296
- display(unioned_df)
276
+ display(df)
297
277
 
298
278
  def is_backtick_escaped(self, name: str) -> bool:
299
279
  """
@@ -97,6 +97,7 @@ class PreviewParameters:
97
97
  self._mode = None # [input, autoloader]
98
98
  self._record_limit = 10
99
99
  self._autoloader_temp_schema_location = "dbfs:/tmp/schemas"
100
+ self._gold_test_schemas = []
100
101
 
101
102
  self._time_column = None
102
103
  self._start_time = None
@@ -206,10 +207,21 @@ class PreviewParameters:
206
207
  f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
207
208
  recurse=True,
208
209
  )
210
+ for gold_test_schema in self._gold_test_schemas:
211
+ dbutils.fs.rm(
212
+ f"{self._autoloader_temp_schema_location}/{gold_test_schema}",
213
+ recurse=True,
214
+ )
209
215
  else:
210
- print(
211
- f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}"
212
- )
216
+ leaked_lines = [
217
+ f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
218
+ *[
219
+ f"{self._autoloader_temp_schema_location}/{x}"
220
+ for x in self._gold_test_schemas
221
+ ],
222
+ ]
223
+ print(", ".join(leaked_lines))
224
+ self._gold_test_schemas = []
213
225
 
214
226
  def from_input(self):
215
227
  """
@@ -253,6 +265,15 @@ class PreviewParameters:
253
265
  self._autoloader_temp_schema_location = path
254
266
  return self
255
267
 
268
+ def get_autoloader_temp_schema_location(self) -> str:
269
+ """
270
+ Get the location for the autoloader's streaming mode schema to be created.
271
+
272
+ Returns:
273
+ str: The location for the autoloader's streaming mode schema to be created.
274
+ """
275
+ return self._autoloader_temp_schema_location
276
+
256
277
  def set_data_schema(self, schema: StructType):
257
278
  """
258
279
  Set the input schema for "input" mode. For example:
@@ -409,3 +430,10 @@ class PreviewParameters:
409
430
  """
410
431
  self._table = table_name
411
432
  return self
433
+
434
+ def add_gold_schema_table(self, gold_schema_table_name: str):
435
+ """
436
+ Add a gold schema temporary table name that will need to be cleaned
437
+ up at the end of the run.
438
+ """
439
+ self._gold_test_schemas.append(gold_schema_table_name)
@@ -8,12 +8,14 @@ from dasl_api import (
8
8
  WorkspaceV1ExportConfigWebhookConfigDestination,
9
9
  WorkspaceV1WorkspaceConfig,
10
10
  WorkspaceV1WorkspaceConfigSpec,
11
+ WorkspaceV1WorkspaceConfigSpecDatasources,
11
12
  WorkspaceV1WorkspaceConfigSpecDefaultConfig,
12
13
  WorkspaceV1WorkspaceConfigSpecDetectionRuleMetadata,
13
14
  WorkspaceV1WorkspaceConfigSpecManagedRetentionInner,
14
15
  WorkspaceV1WorkspaceConfigSpecManagedRetentionInnerOverridesInner,
15
16
  WorkspaceV1WorkspaceConfigSpecObservables,
16
17
  WorkspaceV1WorkspaceConfigSpecObservablesKindsInner,
18
+ WorkspaceV1WorkspaceConfigSpecRules,
17
19
  WorkspaceV1WorkspaceConfigSpecSystemTablesConfig,
18
20
  WorkspaceV1DefaultConfig,
19
21
  WorkspaceV1DefaultConfigComputeGroupOverridesValue,
@@ -238,12 +240,91 @@ class WorkspaceConfigObservables(BaseModel):
238
240
  )
239
241
 
240
242
 
243
+ class DatasourcesConfig(BaseModel):
244
+ """
245
+ Configuration settings used by Datasources.
246
+
247
+ Attributes:
248
+ bronze_schema (Optional[str]):
249
+ Name of the bronze schema in the catalog.
250
+ silver_schema (Optional[str]):
251
+ Name of the silver schema in the catalog.
252
+ gold_schema (Optional[str]):
253
+ Name of the gold schema in the catalog.
254
+ catalog_name (Optional[str]):
255
+ The catalog name to use as the resource's default.
256
+ checkpoint_location (Optional[str]):
257
+ The base checkpoint location to use in Rule notebooks.
258
+ """
259
+
260
+ catalog_name: Optional[str] = None
261
+ bronze_schema: Optional[str] = None
262
+ silver_schema: Optional[str] = None
263
+ gold_schema: Optional[str] = None
264
+ checkpoint_location: Optional[str] = None
265
+
266
+ @staticmethod
267
+ def from_api_obj(
268
+ obj: Optional[WorkspaceV1WorkspaceConfigSpecDatasources],
269
+ ) -> Optional["DatasourcesConfig"]:
270
+ if obj is None:
271
+ return None
272
+
273
+ return DatasourcesConfig(
274
+ catalog_name=obj.catalog_name,
275
+ bronze_schema=obj.bronze_schema,
276
+ silver_schema=obj.silver_schema,
277
+ gold_schema=obj.gold_schema,
278
+ checkpoint_location=obj.checkpoint_location,
279
+ )
280
+
281
+ def to_api_obj(self) -> WorkspaceV1WorkspaceConfigSpecDatasources:
282
+ return WorkspaceV1WorkspaceConfigSpecDatasources(
283
+ catalog_name=self.catalog_name,
284
+ bronze_schema=self.bronze_schema,
285
+ silver_schema=self.silver_schema,
286
+ gold_schema=self.gold_schema,
287
+ checkpoint_location=self.checkpoint_location,
288
+ )
289
+
290
+
291
+ class RulesConfig(BaseModel):
292
+ """
293
+ Configuration settings used by Rules.
294
+
295
+ Attributes:
296
+ checkpoint_location (Optional[str]):
297
+ The location to store checkpoints for streaming writes. If
298
+ not provided, the daslStoragePath will be used.
299
+ """
300
+
301
+ checkpoint_location: Optional[str] = None
302
+
303
+ @staticmethod
304
+ def from_api_obj(
305
+ obj: Optional[WorkspaceV1WorkspaceConfigSpecRules],
306
+ ) -> "RulesConfig":
307
+ if obj is None:
308
+ return None
309
+
310
+ return RulesConfig(
311
+ checkpoint_location=obj.checkpoint_location,
312
+ )
313
+
314
+ def to_api_obj(self) -> WorkspaceV1WorkspaceConfigSpecRules:
315
+ return WorkspaceV1WorkspaceConfigSpecRules(
316
+ checkpoint_location=self.checkpoint_location,
317
+ )
318
+
319
+
241
320
  class DefaultConfig(BaseModel):
242
321
  """
243
- Configuration of the schemas, notebook storage locations, checkpoint
244
- storage locations, and so forth, for each concrete resource type and
245
- a global fallback that applies to resources which do not have a
246
- specified DefaultConfig.
322
+ (DEPRECATED) Configuration of the schemas, notebook storage locations,
323
+ checkpoint storage locations, and so forth, for each concrete resource
324
+ type and a global fallback that applies to resources which do not have a
325
+ specified DefaultConfig. While it does still work, this field is
326
+ deprecated and should not be used; see DatasourcesConfig and RulesConfig
327
+ for alternatives.
247
328
 
248
329
  Attributes:
249
330
  datasources (Optional[DefaultConfig.Config]):
@@ -369,8 +450,11 @@ class DefaultConfig(BaseModel):
369
450
 
370
451
  @staticmethod
371
452
  def from_api_obj(
372
- obj: WorkspaceV1WorkspaceConfigSpecDefaultConfig,
373
- ) -> "DefaultConfig":
453
+ obj: Optional[WorkspaceV1WorkspaceConfigSpecDefaultConfig],
454
+ ) -> Optional["DefaultConfig"]:
455
+ if obj is None:
456
+ return None
457
+
374
458
  return DefaultConfig(
375
459
  datasources=DefaultConfig.Config.from_api_obj(obj.datasources),
376
460
  transforms=DefaultConfig.Config.from_api_obj(obj.transforms),
@@ -579,8 +663,15 @@ class WorkspaceConfig(BaseModel):
579
663
  dasl_custom_presets_path (Optional[str]):
580
664
  An optional path to a directory containing user defined presets.
581
665
  default_config (Optional[DefaultConfig]):
582
- Configuration settings regarding storage of bronze, silver, and
583
- gold tables and related assets for each resource type.
666
+ (DEPRECATED) Configuration settings regarding storage of bronze,
667
+ silver, and gold tables and related assets for each resource type.
668
+ default_custom_notebook_location (Optional[str]):
669
+ The storage location for custom user-provided notebooks. Also
670
+ used as the prefix for relative paths to custom notebooks.
671
+ datasources (Optional[DatasourcesConfig]):
672
+ Configuration items that apply specifically to datasources.
673
+ rules (Optional[RulesConfig]):
674
+ Configuration items that apply specifically to rules.
584
675
  managed_retention (Optional[List[ManagedRetention]]):
585
676
  Configuration of regular cleanup (i.e. pruning) jobs for various
586
677
  catalogs, schemas, and tables.
@@ -598,6 +689,9 @@ class WorkspaceConfig(BaseModel):
598
689
  dasl_storage_path: Optional[str] = None
599
690
  dasl_custom_presets_path: Optional[str] = None
600
691
  default_config: Optional[DefaultConfig] = None
692
+ default_custom_notebook_location: Optional[str] = None
693
+ datasources: Optional[DatasourcesConfig] = None
694
+ rules: Optional[RulesConfig] = None
601
695
  managed_retention: Optional[List[ManagedRetention]] = None
602
696
  status: Optional[ResourceStatus] = None
603
697
 
@@ -628,6 +722,9 @@ class WorkspaceConfig(BaseModel):
628
722
  dasl_storage_path=spec.dasl_storage_path,
629
723
  dasl_custom_presets_path=spec.dasl_custom_presets_path,
630
724
  default_config=DefaultConfig.from_api_obj(spec.default_config),
725
+ default_custom_notebook_location=spec.default_custom_notebook_location,
726
+ datasources=DatasourcesConfig.from_api_obj(spec.datasources),
727
+ rules=RulesConfig.from_api_obj(spec.rules),
631
728
  managed_retention=managed_retention,
632
729
  status=ResourceStatus.from_api_obj(obj.status),
633
730
  )
@@ -658,6 +755,9 @@ class WorkspaceConfig(BaseModel):
658
755
  dasl_storage_path=self.dasl_storage_path,
659
756
  dasl_custom_presets_path=self.dasl_custom_presets_path,
660
757
  default_config=Helpers.maybe(to_api_obj, self.default_config),
758
+ default_custom_notebook_location=self.default_custom_notebook_location,
759
+ datasources=Helpers.maybe(to_api_obj, self.datasources),
760
+ rules=Helpers.maybe(to_api_obj, self.rules),
661
761
  managed_retention=managed_retention,
662
762
  ),
663
763
  status=Helpers.maybe(to_api_obj, self.status),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dasl_client
3
- Version: 1.0.22
3
+ Version: 1.0.23
4
4
  Summary: The DASL client library used for interacting with the DASL workspace
5
5
  Home-page: https://github.com/antimatter/asl
6
6
  Author: Antimatter Team
@@ -10,8 +10,8 @@ dasl_client/errors/__init__.py,sha256=lpH2HGF5kCRTk6MxpPEyY9ulTvsLBFKb4NnLuFFLZZ
10
10
  dasl_client/errors/errors.py,sha256=u-B8dR8zlxdNVeEdHi6UozX178jwJJ5ZJOGl9YjONRc,4008
11
11
  dasl_client/preset_development/__init__.py,sha256=9yC4gmQfombvYLThzo0pSfT5JMolfNVWFVQIuIg_XUA,131
12
12
  dasl_client/preset_development/errors.py,sha256=jsqBFMZtl7uHi6O9bBHnOt0UQ4WM9KN9x0uYtf5c268,5482
13
- dasl_client/preset_development/preview_engine.py,sha256=D8e3Ohds0KtyPec-iWJknh9GvlPbwHufOFF1gtj62kE,15735
14
- dasl_client/preset_development/preview_parameters.py,sha256=YjSJ00mEUcqF5KkJEPW6Wif8I4iaMIMxJeUSuyIS4x0,14640
13
+ dasl_client/preset_development/preview_engine.py,sha256=mNDLuuTVXKenRa-jhr5-xQtonsLGIC6ZyD3asSFFf8A,14745
14
+ dasl_client/preset_development/preview_parameters.py,sha256=h0DwYFKdA3qAvBJD5Kww21uOPpsfYS9DVF1ssJ1m6Gs,15743
15
15
  dasl_client/preset_development/stage.py,sha256=2FPOZvb_bCVpjrY5TsYB05BD4KYbrhgfAe9uZCQFkOk,23397
16
16
  dasl_client/types/__init__.py,sha256=GsXC3eWuv21VTLPLPH9pzM95JByaKnKrPjJkh2rlZfQ,170
17
17
  dasl_client/types/admin_config.py,sha256=Kmx3Kuai9_LWMeO2NpWasRUgLihYSEXGtuYVfG0FkjU,2200
@@ -21,16 +21,16 @@ dasl_client/types/dbui.py,sha256=k2WXNjfrEjXa-5iBlZ17pvFAs_jgbd-ir5NJl5sXvpA,160
21
21
  dasl_client/types/helpers.py,sha256=gLGTvrssAKrdkQT9h80twEosld2egwhvj-zAudxWFPs,109
22
22
  dasl_client/types/rule.py,sha256=BqhWhT8Eh95UXNytd0PxVcjqYuWQcdN1tfKjUB4Tk74,25781
23
23
  dasl_client/types/types.py,sha256=DeUOfdYGOhUGEy7yKOfo0OYTXYRrs57yYgNLUbu7Tlc,8806
24
- dasl_client/types/workspace_config.py,sha256=RThg_THS_4leITWdzBPTWdR2ytq5Uk36m6nIOUMzFCM,24878
24
+ dasl_client/types/workspace_config.py,sha256=sknQcLjZ7efwn2iBOVwxBj4oqO6xVwaBZVEbmU-UJbc,28661
25
25
  test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  test/conftest.py,sha256=ZfNULJxVQ609GHxw9UsreTcbQMl3gbcTP_DKT1oySwQ,440
27
27
  test/constants.py,sha256=ed3xiemWDJVBlHDwn-iQToCbcaXD3AN-5r8HkURCqBs,438
28
28
  test/test_api_changes.py,sha256=RzLauhCkwLmf_gK5yZZ7R7TI9803XCGr-YCyv_jSc94,3827
29
- test/test_api_surface.py,sha256=nOxoxg9mVSpHLtEDiK98qbAarXsUzC3zTIUZ4e4KLAI,10940
30
- test/test_databricks_secret_auth.py,sha256=P1seBBHOLcCzJPLdRZlJZxeG62GUFKFbjsY8c7gTT_8,3613
31
- test/test_marshaling.py,sha256=DLy5C1lBAon9oD55tzrh98cbcii6OmpTPP4CBm4cvu0,37816
32
- dasl_client-1.0.22.dist-info/LICENSE,sha256=M35UepUPyKmFkvENlkweeaMElheQqNoM5Emh8ADO-rs,4
33
- dasl_client-1.0.22.dist-info/METADATA,sha256=pmJrFcebmhZ5GXiENQrjQoR_aVYmLIoZNBO5xItlaDs,741
34
- dasl_client-1.0.22.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
35
- dasl_client-1.0.22.dist-info/top_level.txt,sha256=943P5S_qILHKZYxAvxPUeqOzM2yV18d5SBVKxzPw2OE,17
36
- dasl_client-1.0.22.dist-info/RECORD,,
29
+ test/test_api_surface.py,sha256=SnJQtaWIfeuTcySFeGfj6cVDksmLyIa8BmblB0t-ZJg,11014
30
+ test/test_databricks_secret_auth.py,sha256=w0ZX23j9gDbJtZuNdZus3joUmdD5U2rX8Qrs0vbqMd4,3736
31
+ test/test_marshaling.py,sha256=ltMuJCqBMpqMpchQ1ZjdO3vrGk_ef1NR3yd07SHV7gU,38212
32
+ dasl_client-1.0.23.dist-info/LICENSE,sha256=M35UepUPyKmFkvENlkweeaMElheQqNoM5Emh8ADO-rs,4
33
+ dasl_client-1.0.23.dist-info/METADATA,sha256=LMDZqT-XHfQgsccfU8YwbAeatbsPl9TFekjj0Nyk7us,741
34
+ dasl_client-1.0.23.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
35
+ dasl_client-1.0.23.dist-info/top_level.txt,sha256=943P5S_qILHKZYxAvxPUeqOzM2yV18d5SBVKxzPw2OE,17
36
+ dasl_client-1.0.23.dist-info/RECORD,,
test/test_api_surface.py CHANGED
@@ -47,13 +47,15 @@ def test_workspace_config(api_client):
47
47
  catalog_name="automated_test_cases",
48
48
  var_schema="default",
49
49
  ),
50
- default_config=DefaultConfig(
51
- var_global=DefaultConfig.Config(
52
- bronze_schema="bronze",
53
- silver_schema="silver",
54
- gold_schema="gold",
55
- catalog_name="automated_test_cases",
56
- ),
50
+ default_custom_notebook_location="/Users/test/notebooks",
51
+ datasources=DatasourcesConfig(
52
+ bronze_schema="bronze",
53
+ silver_schema="silver",
54
+ gold_schema="gold",
55
+ catalog_name="automated_test_cases",
56
+ ),
57
+ rules=RulesConfig(
58
+ checkpoint_location="/Users/test/checkpoints",
57
59
  ),
58
60
  )
59
61
 
@@ -69,7 +71,7 @@ def test_workspace_config(api_client):
69
71
 
70
72
  assert api_client.get_config() == base_workspace_config
71
73
 
72
- base_workspace_config.default_config.var_global.bronze_schema = "bronze_new"
74
+ base_workspace_config.datasources.bronze_schema = "bronze_new"
73
75
  api_client.put_config(base_workspace_config)
74
76
  got = api_client.get_config()
75
77
  base_workspace_config.metadata.modified_timestamp = got.metadata.modified_timestamp
@@ -10,6 +10,7 @@ from .constants import *
10
10
 
11
11
  pylib_volume_path = os.environ["PYLIB_VOLUME_PATH"]
12
12
  pylib_wheel_path = os.environ["PYLIB_WHEEL_PATH"]
13
+ api_wheel_path = os.environ["API_WHEEL_PATH"]
13
14
 
14
15
 
15
16
  def test_secret_auth(api_client):
@@ -18,6 +19,8 @@ def test_secret_auth(api_client):
18
19
 
19
20
  # need to do an API operation using databricks secret auth.
20
21
  notebook_data = f"""
22
+ %pip uninstall -y dasl-client dasl-api
23
+ %pip install {api_wheel_path}
21
24
  %pip install {pylib_wheel_path}
22
25
  dbutils.library.restartPython()
23
26
  # COMMAND ----------
test/test_marshaling.py CHANGED
@@ -72,6 +72,17 @@ def test_workspace_config_marshal_unmarshal():
72
72
  relationships=["rel1", "rel2"],
73
73
  ),
74
74
  dasl_storage_path="/random/storage/path",
75
+ default_custom_notebook_location="/tmp/notebooks",
76
+ datasources=DatasourcesConfig(
77
+ catalog_name="test_catalog",
78
+ bronze_schema="bronze",
79
+ silver_schema="silver",
80
+ gold_schema="gold",
81
+ checkpoint_location="/tmp/checkpoints",
82
+ ),
83
+ rules=RulesConfig(
84
+ checkpoint_location="/tmp/checkpoints",
85
+ ),
75
86
  default_config=DefaultConfig(
76
87
  datasources=DefaultConfig.Config(
77
88
  notebook_location="notebook_ds",