PyPI - dasl-client - Versions diffs - 1.0.22__py3-none-any.whl → 1.0.23__py3-none-any.whl - Mend

dasl-client 1.0.22py3-none-any.whl → 1.0.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dasl-client might be problematic. Click here for more details.

Files changed (11) hide show

dasl_client/preset_development/preview_engine.py CHANGED Viewed

@@ -240,60 +240,40 @@ class PreviewEngine:
             d(f"{name}", 2)
             display(df)
         d("Gold", 1)
-        for name, df in gold.items():
-            d(f"{name}", 2)
+        for full_name, df in gold.items():
+            d(f"{full_name}", 2)
             d("Stage output", 3)
             display(df)
             # NOTE: Name is stored as Gold_name/Silver_input. So we need to get just the Gold table
             # name that we are comparing the dataframe metadata to.
-            name = name.split("/")[0]
+            name = full_name.split("/")[0]
             fqn_gold_table_name = f"{self.force_apply_backticks(gold_table_catalog)}.{self.force_apply_backticks(gold_table_schema)}.{self.force_apply_backticks(name)}"
             if not self._spark.catalog.tableExists(f"{fqn_gold_table_name}"):
                 raise UnknownGoldTableError(name, gold_table_schema)
-            # Performs the type check.
+            # Create a temporary table to perform the type check
             delta_df = self._spark.table(f"{fqn_gold_table_name}").limit(0)
-            unioned_df = delta_df.unionByName(df, allowMissingColumns=True)
+            delta_df.write.mode("overwrite").save(
+                f"{self._ds_params.get_autoloader_temp_schema_location()}/{full_name}"
+            )
-            # Now we check no new columns.
-            if not set(df.columns).issubset(delta_df.columns):
-                raise GoldTableCompatibilityError(
-                    f"Extra columns provided: {', '.join([col for col in df.columns if col not in delta_df.columns])}"
-                )
+            # Update the params to indicate we've added a testing temp gold table
+            self._ds_params.add_gold_schema_table(full_name)
-            # Now we check no new fields in STRUCT columns.
-            for field in delta_df.schema.fields:
-                if isinstance(field.dataType, StructType) and field.name in df.columns:
-                    # Retrieve the corresponding field from the DataFrame's schema.
-                    df_field = next(f for f in df.schema.fields if f.name == field.name)
-                    check_struct_compatibility(field, df_field)
-            # Check nullable columns exist, and data what we are inserting is set.
-            non_nullable_cols = [
-                field.name for field in delta_df.schema.fields if not field.nullable
-            ]
-            null_checks = [
-                sum_(when(col_(col).isNull(), 1).otherwise(0)).alias(col)
-                for col in non_nullable_cols
-            ]
-            null_counts = df.select(null_checks).collect()[0].asDict()
-            cols_with_nulls = []
+            # Perform the type checks by trying to insert data into the table
             try:
-                cols_with_nulls = [
-                    col_name for col_name, count in null_counts.items() if count > 0
-                ]
-            except TypeError:
-                # There were no records returned and so null_counts == None.
-                pass
-            if cols_with_nulls:
+                df.write.mode("append").save(
+                    f"{self._ds_params.get_autoloader_temp_schema_location()}/{full_name}"
+                )
+            except Exception as e:
                 raise GoldTableCompatibilityError(
-                    f"Record with null data found for non-nullable columns: {', '.join([col for col in cols_with_nulls])}"
+                    f"Preset gold table '{full_name}' did not match the gold schema for {fqn_gold_table_name}: {repr(e)}"
                 )
             d("Resultant gold table preview", 3)
-            display(unioned_df)
+            display(df)
     def is_backtick_escaped(self, name: str) -> bool:
         """

dasl_client/preset_development/preview_parameters.py CHANGED Viewed

@@ -97,6 +97,7 @@ class PreviewParameters:
         self._mode = None  # [input, autoloader]
         self._record_limit = 10
         self._autoloader_temp_schema_location = "dbfs:/tmp/schemas"
+        self._gold_test_schemas = []
         self._time_column = None
         self._start_time = None
@@ -206,10 +207,21 @@ class PreviewParameters:
                 f"{self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
                 recurse=True,
             )
+            for gold_test_schema in self._gold_test_schemas:
+                dbutils.fs.rm(
+                    f"{self._autoloader_temp_schema_location}/{gold_test_schema}",
+                    recurse=True,
+                )
         else:
-            print(
-                f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}"
-            )
+            leaked_lines = [
+                f"FYI, we are leaking temp data {self._autoloader_temp_schema_location}/{self._schema_uuid_str}",
+                *[
+                    f"{self._autoloader_temp_schema_location}/{x}"
+                    for x in self._gold_test_schemas
+                ],
+            ]
+            print(", ".join(leaked_lines))
+        self._gold_test_schemas = []
     def from_input(self):
         """
@@ -253,6 +265,15 @@ class PreviewParameters:
         self._autoloader_temp_schema_location = path
         return self
+    def get_autoloader_temp_schema_location(self) -> str:
+        """
+        Get the location for the autoloader's streaming mode schema to be created.
+        Returns:
+             str: The location for the autoloader's streaming mode schema to be created.
+        """
+        return self._autoloader_temp_schema_location
     def set_data_schema(self, schema: StructType):
         """
         Set the input schema for "input" mode. For example:
@@ -409,3 +430,10 @@ class PreviewParameters:
         """
         self._table = table_name
         return self
+    def add_gold_schema_table(self, gold_schema_table_name: str):
+        """
+        Add a gold schema temporary table name that will need to be cleaned
+        up at the end of the run.
+        """
+        self._gold_test_schemas.append(gold_schema_table_name)

dasl_client/types/workspace_config.py CHANGED Viewed

@@ -8,12 +8,14 @@ from dasl_api import (
     WorkspaceV1ExportConfigWebhookConfigDestination,
     WorkspaceV1WorkspaceConfig,
     WorkspaceV1WorkspaceConfigSpec,
+    WorkspaceV1WorkspaceConfigSpecDatasources,
     WorkspaceV1WorkspaceConfigSpecDefaultConfig,
     WorkspaceV1WorkspaceConfigSpecDetectionRuleMetadata,
     WorkspaceV1WorkspaceConfigSpecManagedRetentionInner,
     WorkspaceV1WorkspaceConfigSpecManagedRetentionInnerOverridesInner,
     WorkspaceV1WorkspaceConfigSpecObservables,
     WorkspaceV1WorkspaceConfigSpecObservablesKindsInner,
+    WorkspaceV1WorkspaceConfigSpecRules,
     WorkspaceV1WorkspaceConfigSpecSystemTablesConfig,
     WorkspaceV1DefaultConfig,
     WorkspaceV1DefaultConfigComputeGroupOverridesValue,
@@ -238,12 +240,91 @@ class WorkspaceConfigObservables(BaseModel):
         )
+class DatasourcesConfig(BaseModel):
+    """
+    Configuration settings used by Datasources.
+    Attributes:
+        bronze_schema (Optional[str]):
+            Name of the bronze schema in the catalog.
+        silver_schema (Optional[str]):
+            Name of the silver schema in the catalog.
+        gold_schema (Optional[str]):
+            Name of the gold schema in the catalog.
+        catalog_name (Optional[str]):
+            The catalog name to use as the resource's default.
+        checkpoint_location (Optional[str]):
+            The base checkpoint location to use in Rule notebooks.
+    """
+    catalog_name: Optional[str] = None
+    bronze_schema: Optional[str] = None
+    silver_schema: Optional[str] = None
+    gold_schema: Optional[str] = None
+    checkpoint_location: Optional[str] = None
+    @staticmethod
+    def from_api_obj(
+        obj: Optional[WorkspaceV1WorkspaceConfigSpecDatasources],
+    ) -> Optional["DatasourcesConfig"]:
+        if obj is None:
+            return None
+        return DatasourcesConfig(
+            catalog_name=obj.catalog_name,
+            bronze_schema=obj.bronze_schema,
+            silver_schema=obj.silver_schema,
+            gold_schema=obj.gold_schema,
+            checkpoint_location=obj.checkpoint_location,
+        )
+    def to_api_obj(self) -> WorkspaceV1WorkspaceConfigSpecDatasources:
+        return WorkspaceV1WorkspaceConfigSpecDatasources(
+            catalog_name=self.catalog_name,
+            bronze_schema=self.bronze_schema,
+            silver_schema=self.silver_schema,
+            gold_schema=self.gold_schema,
+            checkpoint_location=self.checkpoint_location,
+        )
+class RulesConfig(BaseModel):
+    """
+    Configuration settings used by Rules.
+    Attributes:
+        checkpoint_location (Optional[str]):
+            The location to store checkpoints for streaming writes. If
+            not provided, the daslStoragePath will be used.
+    """
+    checkpoint_location: Optional[str] = None
+    @staticmethod
+    def from_api_obj(
+        obj: Optional[WorkspaceV1WorkspaceConfigSpecRules],
+    ) -> "RulesConfig":
+        if obj is None:
+            return None
+        return RulesConfig(
+            checkpoint_location=obj.checkpoint_location,
+        )
+    def to_api_obj(self) -> WorkspaceV1WorkspaceConfigSpecRules:
+        return WorkspaceV1WorkspaceConfigSpecRules(
+            checkpoint_location=self.checkpoint_location,
+        )
 class DefaultConfig(BaseModel):
     """
-    Configuration of the schemas, notebook storage locations, checkpoint
-    storage locations, and so forth, for each concrete resource type and
-    a global fallback that applies to resources which do not have a
-    specified DefaultConfig.
+    (DEPRECATED) Configuration of the schemas, notebook storage locations,
+    checkpoint storage locations, and so forth, for each concrete resource
+    type and a global fallback that applies to resources which do not have a
+    specified DefaultConfig. While it does still work, this field is
+    deprecated and should not be used; see DatasourcesConfig and RulesConfig
+    for alternatives.
     Attributes:
         datasources (Optional[DefaultConfig.Config]):
@@ -369,8 +450,11 @@ class DefaultConfig(BaseModel):
     @staticmethod
     def from_api_obj(
-        obj: WorkspaceV1WorkspaceConfigSpecDefaultConfig,
-    ) -> "DefaultConfig":
+        obj: Optional[WorkspaceV1WorkspaceConfigSpecDefaultConfig],
+    ) -> Optional["DefaultConfig"]:
+        if obj is None:
+            return None
         return DefaultConfig(
             datasources=DefaultConfig.Config.from_api_obj(obj.datasources),
             transforms=DefaultConfig.Config.from_api_obj(obj.transforms),
@@ -579,8 +663,15 @@ class WorkspaceConfig(BaseModel):
         dasl_custom_presets_path (Optional[str]):
             An optional path to a directory containing user defined presets.
         default_config (Optional[DefaultConfig]):
-            Configuration settings regarding storage of bronze, silver, and
-            gold tables and related assets for each resource type.
+            (DEPRECATED) Configuration settings regarding storage of bronze,
+            silver, and gold tables and related assets for each resource type.
+        default_custom_notebook_location (Optional[str]):
+            The storage location for custom user-provided notebooks. Also
+            used as the prefix for relative paths to custom notebooks.
+        datasources (Optional[DatasourcesConfig]):
+            Configuration items that apply specifically to datasources.
+        rules (Optional[RulesConfig]):
+            Configuration items that apply specifically to rules.
         managed_retention (Optional[List[ManagedRetention]]):
             Configuration of regular cleanup (i.e. pruning) jobs for various
             catalogs, schemas, and tables.
@@ -598,6 +689,9 @@ class WorkspaceConfig(BaseModel):
     dasl_storage_path: Optional[str] = None
     dasl_custom_presets_path: Optional[str] = None
     default_config: Optional[DefaultConfig] = None
+    default_custom_notebook_location: Optional[str] = None
+    datasources: Optional[DatasourcesConfig] = None
+    rules: Optional[RulesConfig] = None
     managed_retention: Optional[List[ManagedRetention]] = None
     status: Optional[ResourceStatus] = None
@@ -628,6 +722,9 @@ class WorkspaceConfig(BaseModel):
             dasl_storage_path=spec.dasl_storage_path,
             dasl_custom_presets_path=spec.dasl_custom_presets_path,
             default_config=DefaultConfig.from_api_obj(spec.default_config),
+            default_custom_notebook_location=spec.default_custom_notebook_location,
+            datasources=DatasourcesConfig.from_api_obj(spec.datasources),
+            rules=RulesConfig.from_api_obj(spec.rules),
             managed_retention=managed_retention,
             status=ResourceStatus.from_api_obj(obj.status),
         )
@@ -658,6 +755,9 @@ class WorkspaceConfig(BaseModel):
                 dasl_storage_path=self.dasl_storage_path,
                 dasl_custom_presets_path=self.dasl_custom_presets_path,
                 default_config=Helpers.maybe(to_api_obj, self.default_config),
+                default_custom_notebook_location=self.default_custom_notebook_location,
+                datasources=Helpers.maybe(to_api_obj, self.datasources),
+                rules=Helpers.maybe(to_api_obj, self.rules),
                 managed_retention=managed_retention,
             ),
             status=Helpers.maybe(to_api_obj, self.status),

{dasl_client-1.0.22.dist-info → dasl_client-1.0.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dasl_client
-Version: 1.0.22
+Version: 1.0.23
 Summary: The DASL client library used for interacting with the DASL workspace
 Home-page: https://github.com/antimatter/asl
 Author: Antimatter Team

{dasl_client-1.0.22.dist-info → dasl_client-1.0.23.dist-info}/RECORD RENAMED Viewed

@@ -10,8 +10,8 @@ dasl_client/errors/__init__.py,sha256=lpH2HGF5kCRTk6MxpPEyY9ulTvsLBFKb4NnLuFFLZZ
 dasl_client/errors/errors.py,sha256=u-B8dR8zlxdNVeEdHi6UozX178jwJJ5ZJOGl9YjONRc,4008
 dasl_client/preset_development/__init__.py,sha256=9yC4gmQfombvYLThzo0pSfT5JMolfNVWFVQIuIg_XUA,131
 dasl_client/preset_development/errors.py,sha256=jsqBFMZtl7uHi6O9bBHnOt0UQ4WM9KN9x0uYtf5c268,5482
-dasl_client/preset_development/preview_engine.py,sha256=D8e3Ohds0KtyPec-iWJknh9GvlPbwHufOFF1gtj62kE,15735
-dasl_client/preset_development/preview_parameters.py,sha256=YjSJ00mEUcqF5KkJEPW6Wif8I4iaMIMxJeUSuyIS4x0,14640
+dasl_client/preset_development/preview_engine.py,sha256=mNDLuuTVXKenRa-jhr5-xQtonsLGIC6ZyD3asSFFf8A,14745
+dasl_client/preset_development/preview_parameters.py,sha256=h0DwYFKdA3qAvBJD5Kww21uOPpsfYS9DVF1ssJ1m6Gs,15743
 dasl_client/preset_development/stage.py,sha256=2FPOZvb_bCVpjrY5TsYB05BD4KYbrhgfAe9uZCQFkOk,23397
 dasl_client/types/__init__.py,sha256=GsXC3eWuv21VTLPLPH9pzM95JByaKnKrPjJkh2rlZfQ,170
 dasl_client/types/admin_config.py,sha256=Kmx3Kuai9_LWMeO2NpWasRUgLihYSEXGtuYVfG0FkjU,2200
@@ -21,16 +21,16 @@ dasl_client/types/dbui.py,sha256=k2WXNjfrEjXa-5iBlZ17pvFAs_jgbd-ir5NJl5sXvpA,160
 dasl_client/types/helpers.py,sha256=gLGTvrssAKrdkQT9h80twEosld2egwhvj-zAudxWFPs,109
 dasl_client/types/rule.py,sha256=BqhWhT8Eh95UXNytd0PxVcjqYuWQcdN1tfKjUB4Tk74,25781
 dasl_client/types/types.py,sha256=DeUOfdYGOhUGEy7yKOfo0OYTXYRrs57yYgNLUbu7Tlc,8806
-dasl_client/types/workspace_config.py,sha256=RThg_THS_4leITWdzBPTWdR2ytq5Uk36m6nIOUMzFCM,24878
+dasl_client/types/workspace_config.py,sha256=sknQcLjZ7efwn2iBOVwxBj4oqO6xVwaBZVEbmU-UJbc,28661
 test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/conftest.py,sha256=ZfNULJxVQ609GHxw9UsreTcbQMl3gbcTP_DKT1oySwQ,440
 test/constants.py,sha256=ed3xiemWDJVBlHDwn-iQToCbcaXD3AN-5r8HkURCqBs,438
 test/test_api_changes.py,sha256=RzLauhCkwLmf_gK5yZZ7R7TI9803XCGr-YCyv_jSc94,3827
-test/test_api_surface.py,sha256=nOxoxg9mVSpHLtEDiK98qbAarXsUzC3zTIUZ4e4KLAI,10940
-test/test_databricks_secret_auth.py,sha256=P1seBBHOLcCzJPLdRZlJZxeG62GUFKFbjsY8c7gTT_8,3613
-test/test_marshaling.py,sha256=DLy5C1lBAon9oD55tzrh98cbcii6OmpTPP4CBm4cvu0,37816
-dasl_client-1.0.22.dist-info/LICENSE,sha256=M35UepUPyKmFkvENlkweeaMElheQqNoM5Emh8ADO-rs,4
-dasl_client-1.0.22.dist-info/METADATA,sha256=pmJrFcebmhZ5GXiENQrjQoR_aVYmLIoZNBO5xItlaDs,741
-dasl_client-1.0.22.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-dasl_client-1.0.22.dist-info/top_level.txt,sha256=943P5S_qILHKZYxAvxPUeqOzM2yV18d5SBVKxzPw2OE,17
-dasl_client-1.0.22.dist-info/RECORD,,
+test/test_api_surface.py,sha256=SnJQtaWIfeuTcySFeGfj6cVDksmLyIa8BmblB0t-ZJg,11014
+test/test_databricks_secret_auth.py,sha256=w0ZX23j9gDbJtZuNdZus3joUmdD5U2rX8Qrs0vbqMd4,3736
+test/test_marshaling.py,sha256=ltMuJCqBMpqMpchQ1ZjdO3vrGk_ef1NR3yd07SHV7gU,38212
+dasl_client-1.0.23.dist-info/LICENSE,sha256=M35UepUPyKmFkvENlkweeaMElheQqNoM5Emh8ADO-rs,4
+dasl_client-1.0.23.dist-info/METADATA,sha256=LMDZqT-XHfQgsccfU8YwbAeatbsPl9TFekjj0Nyk7us,741
+dasl_client-1.0.23.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+dasl_client-1.0.23.dist-info/top_level.txt,sha256=943P5S_qILHKZYxAvxPUeqOzM2yV18d5SBVKxzPw2OE,17
+dasl_client-1.0.23.dist-info/RECORD,,

test/test_api_surface.py CHANGED Viewed

@@ -47,13 +47,15 @@ def test_workspace_config(api_client):
             catalog_name="automated_test_cases",
             var_schema="default",
         ),
-        default_config=DefaultConfig(
-            var_global=DefaultConfig.Config(
-                bronze_schema="bronze",
-                silver_schema="silver",
-                gold_schema="gold",
-                catalog_name="automated_test_cases",
-            ),
+        default_custom_notebook_location="/Users/test/notebooks",
+        datasources=DatasourcesConfig(
+            bronze_schema="bronze",
+            silver_schema="silver",
+            gold_schema="gold",
+            catalog_name="automated_test_cases",
+        ),
+        rules=RulesConfig(
+            checkpoint_location="/Users/test/checkpoints",
         ),
     )
@@ -69,7 +71,7 @@ def test_workspace_config(api_client):
     assert api_client.get_config() == base_workspace_config
-    base_workspace_config.default_config.var_global.bronze_schema = "bronze_new"
+    base_workspace_config.datasources.bronze_schema = "bronze_new"
     api_client.put_config(base_workspace_config)
     got = api_client.get_config()
     base_workspace_config.metadata.modified_timestamp = got.metadata.modified_timestamp

test/test_databricks_secret_auth.py CHANGED Viewed

@@ -10,6 +10,7 @@ from .constants import *
 pylib_volume_path = os.environ["PYLIB_VOLUME_PATH"]
 pylib_wheel_path = os.environ["PYLIB_WHEEL_PATH"]
+api_wheel_path = os.environ["API_WHEEL_PATH"]
 def test_secret_auth(api_client):
@@ -18,6 +19,8 @@ def test_secret_auth(api_client):
     # need to do an API operation using databricks secret auth.
     notebook_data = f"""
+    %pip uninstall -y dasl-client dasl-api
+    %pip install {api_wheel_path}
     %pip install {pylib_wheel_path}
     dbutils.library.restartPython()
     # COMMAND ----------

test/test_marshaling.py CHANGED Viewed

@@ -72,6 +72,17 @@ def test_workspace_config_marshal_unmarshal():
             relationships=["rel1", "rel2"],
         ),
         dasl_storage_path="/random/storage/path",
+        default_custom_notebook_location="/tmp/notebooks",
+        datasources=DatasourcesConfig(
+            catalog_name="test_catalog",
+            bronze_schema="bronze",
+            silver_schema="silver",
+            gold_schema="gold",
+            checkpoint_location="/tmp/checkpoints",
+        ),
+        rules=RulesConfig(
+            checkpoint_location="/tmp/checkpoints",
+        ),
         default_config=DefaultConfig(
             datasources=DefaultConfig.Config(
                 notebook_location="notebook_ds",

{dasl_client-1.0.22.dist-info → dasl_client-1.0.23.dist-info}/LICENSE RENAMED Viewed

File without changes

{dasl_client-1.0.22.dist-info → dasl_client-1.0.23.dist-info}/WHEEL RENAMED Viewed

File without changes

{dasl_client-1.0.22.dist-info → dasl_client-1.0.23.dist-info}/top_level.txt RENAMED Viewed

File without changes

dasl-client 1.0.22__py3-none-any.whl → 1.0.23__py3-none-any.whl

Potentially problematic release.

dasl-client 1.0.22py3-none-any.whl → 1.0.23py3-none-any.whl