PyPI - dasl-client - Versions diffs - 1.0.14__py3-none-any.whl → 1.0.17__py3-none-any.whl - Mend

dasl-client 1.0.14py3-none-any.whl → 1.0.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dasl-client might be problematic. Click here for more details.

Files changed (18) hide show

dasl_client/client.py +30 -6
dasl_client/preset_development/errors.py +4 -1
dasl_client/preset_development/preview_engine.py +23 -7
dasl_client/preset_development/preview_parameters.py +31 -6
dasl_client/preset_development/stage.py +87 -20
dasl_client/types/dbui.py +15 -9
{dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/METADATA +2 -3
{dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/RECORD +18 -11
{dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/top_level.txt +1 -0
test/__init__.py +0 -0
test/conftest.py +18 -0
test/constants.py +10 -0
test/test_api_changes.py +125 -0
test/test_api_surface.py +300 -0
test/test_databricks_secret_auth.py +116 -0
test/test_marshaling.py +912 -0
{dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/LICENSE +0 -0
{dasl_client-1.0.14.dist-info → dasl_client-1.0.17.dist-info}/WHEEL +0 -0

test/test_api_changes.py ADDED Viewed

@@ -0,0 +1,125 @@
+import inspect
+import json
+import pytest
+from datetime import datetime
+from hashlib import md5
+from typing import Optional, Type, Union
+from dasl_api.models import *
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+checked_dasl_types = {
+    # Resources
+    WorkspaceV1AdminConfig: "admin_config.json",
+    CoreV1DataSource: "data_source.json",
+    CoreV1Rule: "rule.json",
+    WorkspaceV1WorkspaceConfig: "workspace_config.json",
+    ContentV1DatasourcePreset: "datasource_preset.json",
+    # Data
+    DbuiV1ObservableEventsList: "observable_events_list.json",
+}
+simple_types = [
+    bool,
+    int,
+    float,
+    str,
+    datetime,
+]
+def is_simple_type(tpe: Type) -> bool:
+    return tpe in simple_types
+def is_dasl_api_type(tpe: Type) -> bool:
+    if tpe.__name__ in globals():
+        return "dasl_api" in globals()[tpe.__name__].__module__
+    return False
+def dasl_model_to_dict(tpe: Type[BaseModel]) -> dict:
+    decorators = getattr(
+        getattr(tpe, "__pydantic_decorators__", None), "field_validators", {}
+    )
+    return {
+        "name": tpe.__name__,
+        "fields": [
+            field_to_dict(name, field, decorators)
+            for name, field in tpe.model_fields.items()
+        ],
+    }
+def field_to_dict(name: str, field: FieldInfo, validators: dict) -> dict:
+    d = {
+        "name": name,
+        "alias": field.alias,
+        "is_required": field.is_required(),
+        "is_nullable": is_nullable(field.annotation),
+        "is_sequence": is_sequence(field.annotation),
+        "validation_hash": field_validation_hash(name, validators),
+    }
+    field_type: Union[*simple_types, BaseModel] = inner_type(field.annotation)
+    if is_simple_type(field_type):
+        d["type"] = field_type.__name__
+    elif is_dasl_api_type(field_type):
+        d["type"] = dasl_model_to_dict(field_type)
+    else:
+        raise Exception(
+            f"unsupported field type {field_type} encountered while converting field - {name}: {field}"
+        )
+    return d
+def is_sequence(tpe: Type) -> bool:
+    seq_types = [list, set, frozenset, tuple]
+    if tpe in seq_types:
+        return True
+    if hasattr(tpe, "__origin__"):
+        if tpe.__origin__ in seq_types:
+            return True
+    if hasattr(tpe, "__args__"):
+        return is_sequence(tpe.__args__[0])
+    return False
+def is_nullable(tpe: Type) -> bool:
+    return hasattr(tpe, "__args__") and type(None) in tpe.__args__
+def field_validation_hash(field_name: str, validators: dict) -> Optional[str]:
+    for validator in validators.values():
+        if hasattr(validator, "info") and hasattr(validator.info, "fields"):
+            if field_name in validator.info.fields:
+                return md5(
+                    inspect.getsource(validator.func).encode("utf-8")
+                ).hexdigest()
+    return None
+def inner_type(tpe: Type) -> Type:
+    if hasattr(tpe, "__args__"):
+        return inner_type(tpe.__args__[0])
+    return tpe
+def dasl_model_to_string(tpe: Type[BaseModel]) -> str:
+    d = dasl_model_to_dict(tpe)
+    return json.dumps(d, indent=2, sort_keys=True)
+@pytest.mark.parametrize(
+    "tpe",
+    checked_dasl_types.keys(),
+    ids=[f"{tpe.__name__} model is unchanged" for tpe in checked_dasl_types.keys()],
+)
+def test_api_model_for_changes(tpe):
+    with open(f"test/expected_api_models/{checked_dasl_types[tpe]}", "r") as f:
+        expected_val = f.read()
+    assert dasl_model_to_string(tpe) == expected_val

test/test_api_surface.py ADDED Viewed

@@ -0,0 +1,300 @@
+from dasl_client import *
+from .constants import *
+def test_admin_config(api_client):
+    base_admin_config = AdminConfig(
+        workspace_url=databricks_host,
+        app_client_id=app_client_id,
+        service_principal_id=databricks_client_id,
+        service_principal_secret="********",
+    )
+    ac = api_client.get_admin_config()
+    assert ac == base_admin_config
+    other = AdminConfig(
+        workspace_url=databricks_host,
+        app_client_id=alternate_app_client_id,
+        service_principal_id=databricks_client_id,
+        service_principal_secret=databricks_client_secret,
+    )
+    api_client.put_admin_config(other)
+    assert api_client.get_admin_config() == AdminConfig(
+        workspace_url=databricks_host,
+        app_client_id=alternate_app_client_id,
+        service_principal_id=databricks_client_id,
+        service_principal_secret="********",
+    )
+    ac.service_principal_secret = databricks_client_secret
+    api_client.put_admin_config(ac)
+    assert api_client.get_admin_config() == base_admin_config
+def test_workspace_config(api_client):
+    base_workspace_config = WorkspaceConfig(
+        metadata=Metadata(
+            name="config",
+            workspace=workspace,
+            client_of_origin=get_client_identifier(),
+        ),
+        dasl_storage_path="/Volumes/automated_test_cases/default/test",
+        system_tables_config=SystemTablesConfig(
+            catalog_name="automated_test_cases",
+            var_schema="default",
+        ),
+        default_config=DefaultConfig(
+            var_global=DefaultConfig.Config(
+                bronze_schema="bronze",
+                silver_schema="silver",
+                gold_schema="gold",
+                catalog_name="automated_test_cases",
+            ),
+        ),
+    )
+    api_client.put_config(base_workspace_config)
+    got = api_client.get_config()
+    # the server is going to populate created_timestamp, modified_timestamp,
+    # version, and resource_status, so copy those over before comparing.
+    base_workspace_config.metadata.created_timestamp = got.metadata.created_timestamp
+    base_workspace_config.metadata.modified_timestamp = got.metadata.modified_timestamp
+    base_workspace_config.metadata.version = got.metadata.version
+    base_workspace_config.metadata.resource_status = got.metadata.resource_status
+    assert api_client.get_config() == base_workspace_config
+    base_workspace_config.default_config.var_global.bronze_schema = "bronze_new"
+    api_client.put_config(base_workspace_config)
+    got = api_client.get_config()
+    base_workspace_config.metadata.modified_timestamp = got.metadata.modified_timestamp
+    base_workspace_config.metadata.version = got.metadata.version
+    base_workspace_config.metadata.resource_status = got.metadata.resource_status
+    assert api_client.get_config() == base_workspace_config
+def test_minimal_data_source(api_client):
+    base_data_source = DataSource(
+        source="test",
+        schedule=Schedule(
+            at_least_every="2h",
+            enabled=True,
+        ),
+        bronze=BronzeSpec(
+            bronze_table="test_bronze_table",
+            skip_bronze_loading=False,
+        ),
+        silver=SilverSpec(),
+        gold=GoldSpec(),
+    )
+    base_ds_1 = api_client.create_datasource("test_1", base_data_source)
+    assert base_ds_1.source == base_data_source.source
+    assert base_ds_1.schedule == base_data_source.schedule
+    assert base_ds_1.bronze == base_data_source.bronze
+    assert base_ds_1.silver == base_data_source.silver
+    assert base_ds_1.gold == base_data_source.gold
+    got = api_client.get_datasource("test_1")
+    listed = []
+    for ds in api_client.list_datasources():
+        listed.append(ds)
+    assert len(listed) == 1
+    assert listed[0] == got
+    # the server is going to populate created_timestamp, modified_timestamp,
+    # version, and resource_status, so copy those over before comparing.
+    base_ds_1.metadata.created_timestamp = got.metadata.created_timestamp
+    base_ds_1.metadata.created_by = got.metadata.created_by
+    base_ds_1.metadata.modified_timestamp = got.metadata.modified_timestamp
+    base_ds_1.metadata.version = got.metadata.version
+    base_ds_1.metadata.resource_status = got.metadata.resource_status
+    assert api_client.get_datasource("test_1") == base_ds_1
+    base_ds_2 = api_client.create_datasource("test_2", base_data_source)
+    assert base_ds_2.source == base_data_source.source
+    assert base_ds_2.schedule == base_data_source.schedule
+    assert base_ds_2.bronze == base_data_source.bronze
+    assert base_ds_2.silver == base_data_source.silver
+    assert base_ds_2.gold == base_data_source.gold
+    got_2 = api_client.get_datasource("test_2")
+    listed = []
+    for ds in api_client.list_datasources():
+        listed.append(ds)
+    assert len(listed) == 2
+    assert listed[0] == got
+    assert listed[1] == got_2
+    base_ds_2.metadata.created_timestamp = got_2.metadata.created_timestamp
+    base_ds_2.metadata.created_by = got_2.metadata.created_by
+    base_ds_2.metadata.modified_timestamp = got_2.metadata.modified_timestamp
+    base_ds_2.metadata.version = got_2.metadata.version
+    base_ds_2.metadata.resource_status = got_2.metadata.resource_status
+    assert api_client.get_datasource("test_2") == base_ds_2
+    base_ds_2.bronze.bronze_table = "test_2"
+    api_client.replace_datasource("test_2", base_ds_2)
+    got_2 = api_client.get_datasource("test_2")
+    base_ds_2.metadata.modified_timestamp = got_2.metadata.modified_timestamp
+    base_ds_2.metadata.version = got_2.metadata.version
+    base_ds_2.metadata.resource_status = got_2.metadata.resource_status
+    assert api_client.get_datasource("test_2") == base_ds_2
+    api_client.delete_datasource("test_1")
+    listed = [
+        item
+        for item in api_client.list_datasources()
+        if item.metadata.resource_status != "deletionPending"
+    ]
+    assert len(listed) == 1
+    assert listed[0] == base_ds_2
+def test_minimal_rule(api_client):
+    base_rule = Rule(
+        schedule=Schedule(
+            at_least_every="2h",
+            enabled=True,
+        ),
+        input=Rule.Input(
+            stream=Rule.Input.Stream(
+                tables=[
+                    Rule.Input.Stream.Table(
+                        name="test",
+                    ),
+                ],
+            ),
+        ),
+        output=Rule.Output(
+            summary="test",
+        ),
+    )
+    base_rule_1 = api_client.create_rule("test_0", base_rule)
+    assert base_rule_1.schedule == base_rule.schedule
+    assert base_rule_1.input == base_rule.input
+    assert base_rule_1.output == base_rule.output
+    got = api_client.get_rule("test_0")
+    listed = []
+    for rule in api_client.list_rules():
+        listed.append(rule)
+    assert len(listed) == 1
+    assert listed[0] == got
+    # the server is going to populate created_timestamp, modified_timestamp,
+    # version, and resource_status, so copy those over before comparing.
+    base_rule_1.metadata.created_timestamp = got.metadata.created_timestamp
+    base_rule_1.metadata.created_by = got.metadata.created_by
+    base_rule_1.metadata.modified_timestamp = got.metadata.modified_timestamp
+    base_rule_1.metadata.version = got.metadata.version
+    base_rule_1.metadata.resource_status = got.metadata.resource_status
+    assert api_client.get_rule("test_0") == base_rule_1
+    base_rule_2 = api_client.create_rule("test_1", base_rule)
+    assert base_rule_2.schedule == base_rule.schedule
+    assert base_rule_2.input == base_rule.input
+    assert base_rule_2.output == base_rule.output
+    got_2 = api_client.get_rule("test_1")
+    listed = []
+    for rule in api_client.list_rules():
+        listed.append(rule)
+    assert len(listed) == 2
+    assert listed[0] == got
+    assert listed[1] == got_2
+    base_rule_2.metadata.created_timestamp = got_2.metadata.created_timestamp
+    base_rule_2.metadata.created_by = got_2.metadata.created_by
+    base_rule_2.metadata.modified_timestamp = got_2.metadata.modified_timestamp
+    base_rule_2.metadata.version = got_2.metadata.version
+    base_rule_2.metadata.resource_status = got_2.metadata.resource_status
+    assert api_client.get_rule("test_1") == base_rule_2
+    base_rule_2.input.stream.tables[0].name = "test_1"
+    api_client.replace_rule("test_1", base_rule_2)
+    got_2 = api_client.get_rule("test_1")
+    base_rule_2.metadata.modified_timestamp = got_2.metadata.modified_timestamp
+    base_rule_2.metadata.version = got_2.metadata.version
+    base_rule_2.metadata.resource_status = got_2.metadata.resource_status
+    assert api_client.get_rule("test_1") == base_rule_2
+    api_client.delete_rule("test_0")
+    listed = [
+        item
+        for item in api_client.list_rules()
+        if item.metadata.resource_status != "deletionPending"
+    ]
+    assert len(listed) == 1
+    assert listed[0] == base_rule_2
+def test_list_pagination(api_client):
+    base_rule = Rule(
+        schedule=Schedule(
+            at_least_every="2h",
+            enabled=True,
+        ),
+        input=Rule.Input(
+            stream=Rule.Input.Stream(
+                tables=[
+                    Rule.Input.Stream.Table(
+                        name="test",
+                    ),
+                ],
+            ),
+        ),
+        output=Rule.Output(
+            summary="test",
+        ),
+    )
+    # create (remainder of) 10 rules for the test
+    for i in range(8):
+        api_client.create_rule(f"test_{i+2}", base_rule)
+    # ensure all rules are returned for a list call with no params
+    listed = []
+    for rule in api_client.list_rules():
+        listed.append(rule)
+    assert len(listed) == 10
+    for i in range(10):
+        assert listed[i] == api_client.get_rule(f"test_{i}")
+    # ensure the first 5 rules are returned when limit=5
+    listed = []
+    for rule in api_client.list_rules(limit=5):
+        listed.append(rule)
+    assert len(listed) == 5
+    for i in range(5):
+        assert listed[i] == api_client.get_rule(f"test_{i}")
+    # ensure the last 5 rules are returned when limit=5, cursor=pagination_test_4
+    listed = []
+    for rule in api_client.list_rules(cursor="test_4", limit=5):
+        listed.append(rule)
+    assert len(listed) == 5
+    for i in range(5):
+        assert listed[i] == api_client.get_rule(f"test_{i+5}")
+    # ensure the last 9 rules are returned when cursor=test_0
+    listed = []
+    for rule in api_client.list_rules(cursor="test_0"):
+        listed.append(rule)
+    assert len(listed) == 9
+    for i in range(9):
+        assert listed[i] == api_client.get_rule(f"test_{i+1}")

test/test_databricks_secret_auth.py ADDED Viewed

@@ -0,0 +1,116 @@
+import base64
+import datetime
+import os
+import time
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service import jobs, workspace as dbworkspace
+from .constants import *
+pylib_volume_path = os.environ["PYLIB_VOLUME_PATH"]
+pylib_wheel_path = os.environ["PYLIB_WHEEL_PATH"]
+def test_secret_auth(api_client):
+    # making sure it's even possible to get a config
+    api_client.get_config()
+    # need to do an API operation using databricks secret auth.
+    notebook_data = f"""
+    %pip install {pylib_wheel_path}
+    dbutils.library.restartPython()
+    # COMMAND ----------
+    from dasl_client.client import Client
+    Client.for_workspace(
+        workspace_url="{databricks_host}",
+        dasl_host="{dasl_host}",
+    ).get_config()
+    # COMMAND ----------
+    dbutils.notebook.exit("SUCCESS")
+    """
+    print(f"notebook_data={notebook_data}")
+    wsc = WorkspaceClient()
+    wsc.workspace.mkdirs(path=pylib_volume_path)
+    notebook_path = f"{pylib_volume_path}/test_secret_auth_notebook"
+    wsc.workspace.import_(
+        path=notebook_path,
+        format=dbworkspace.ImportFormat.SOURCE,
+        language=dbworkspace.Language.PYTHON,
+        content=base64.b64encode(notebook_data.encode("utf-8")).decode("utf-8"),
+        overwrite=True,
+    )
+    job_id = None
+    try:
+        job_id = wsc.jobs.create(
+            name="run test_secret_auth notebook",
+            tasks=[
+                jobs.Task(
+                    task_key="run_notebook",
+                    notebook_task=jobs.NotebookTask(notebook_path=notebook_path),
+                ),
+            ],
+        ).job_id
+        wsc.jobs.run_now(job_id=job_id)
+        logs = []
+        start = datetime.datetime.now()
+        complete = False
+        while not complete:
+            elapsed = datetime.datetime.now() - start
+            if elapsed > datetime.timedelta(seconds=300):
+                raise Exception(f"timed out waiting for job")
+            time.sleep(5)
+            status, logs = fetch_latest_run_status_and_logs(wsc, job_id)
+            print(f"logs={logs}")
+            if status == jobs.TerminationCodeCode.RUN_EXECUTION_ERROR:
+                raise Exception(f"job terminated with error")
+            complete = status == jobs.TerminationCodeCode.SUCCESS
+        print(logs)
+        assert len(logs) == 1
+        assert logs[0] == "SUCCESS"
+    finally:
+        wsc.workspace.delete(pylib_volume_path, recursive=True)
+        if job_id is not None:
+            wsc.jobs.delete(job_id=job_id)
+def fetch_latest_run_status_and_logs(
+    wsc: WorkspaceClient,
+    job_id: str,
+):
+    runs = list(wsc.jobs.list_runs(job_id=job_id, expand_tasks=True))
+    if not runs:
+        return "No runs found", None
+    # Find the latest run based on the start time
+    latest_run = max(runs, key=lambda r: r.start_time)
+    if latest_run.status.termination_details is None:
+        return "No runs found", None
+    status = latest_run.status.termination_details.code
+    logs = []
+    for task in latest_run.tasks:
+        output = wsc.jobs.get_run_output(task.run_id)
+        if output.error is not None:
+            logs.append(output.error)
+        elif output.logs is not None:
+            logs.append(output.logs)
+        elif output.notebook_output is not None:
+            logs.append(output.notebook_output.result)
+        elif output.run_job_output is not None:
+            raise Exception("Nested jobs are not supported")
+        elif output.sql_output is not None:
+            raise Exception("SQL jobs are unsupported")
+        else:
+            logs.append("")
+    return status, logs

dasl-client 1.0.14__py3-none-any.whl → 1.0.17__py3-none-any.whl

Potentially problematic release.

dasl-client 1.0.14py3-none-any.whl → 1.0.17py3-none-any.whl