PyPI - dasl-client - Versions diffs - 1.0.23__tar.gz → 1.0.25__tar.gz - Mend

dasl-client 1.0.23tar.gz → 1.0.25tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dasl-client might be problematic. Click here for more details.

Files changed (45) hide show

{dasl_client-1.0.23 → dasl_client-1.0.25}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: dasl_client
-Version: 1.0.23
+Version: 1.0.25
 Summary: The DASL client library used for interacting with the DASL workspace
 Home-page: https://github.com/antimatter/asl
 Author: Antimatter Team
@@ -8,27 +8,11 @@ Author-email: Antimatter Team <support@antimatter.io>
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: dasl_api==0.1.20
+Requires-Dist: dasl_api==0.1.24
 Requires-Dist: databricks-sdk>=0.41.0
 Requires-Dist: pydantic>=2
 Requires-Dist: typing_extensions>=4.10.0
-# DASL Client Library
-This client library is used for interacting with the DASL services in python.
-## Requirements
-Python:
-- wheel
-- setuptools
-- asl_api
-Other:
-- Earthly
-## Build
-To build manually here:
-```bash
-python setup.py sdist bdist_wheel
-```
+Dynamic: author
+Dynamic: home-page
+Dynamic: license-file
+Dynamic: requires-python

{dasl_client-1.0.23 → dasl_client-1.0.25}/dasl_client/__init__.py RENAMED Viewed

@@ -3,3 +3,4 @@ from dasl_api.api import *
 from .errors import *
 from .client import Client
 from .types import *
+from .regions import *

{dasl_client-1.0.23 → dasl_client-1.0.25}/dasl_client/client.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from copy import deepcopy
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from time import sleep
 from typing import Any, Callable, Iterator, List, Optional, Tuple, TypeVar
 from pydantic import Field
@@ -8,6 +8,14 @@ from pyspark.sql import DataFrame
 from dasl_api import (
     CoreV1Api,
     DbuiV1Api,
+    DbuiV1QueryGenerateRequest,
+    DbuiV1QueryGenerateRequestTimeRange,
+    DbuiV1QueryGenerateStatus,
+    DbuiV1QueryLookupRequest,
+    DbuiV1QueryLookupResult,
+    DbuiV1QueryHistogramRequest,
+    DbuiV1QueryHistogramResult,
+    DbuiV1QueryLookupRequestPagination,
     ContentV1Api,
     WorkspaceV1Api,
     WorkspaceV1CreateWorkspaceRequest,
@@ -22,6 +30,8 @@ from dasl_client.auth.auth import (
 from dasl_client.conn.conn import get_base_conn
 from dasl_client.errors.errors import ConflictError, error_handler
 from .helpers import Helpers
+from .exec_rule import ExecRule
+from .regions import Regions
 from .types import (
     AdminConfig,
@@ -67,7 +77,8 @@ class Client:
         service_principal_id: str,
         service_principal_secret: str,
         workspace_url: Optional[str] = None,
-        dasl_host: str = Helpers.default_dasl_host,
+        region: str = Helpers.default_region,
+        dasl_host: Optional[str] = None,
     ) -> "Client":
         """
         Register a new workspace and return a client for it.
@@ -85,11 +96,15 @@ class Client:
             being registered. If you omit this value, it will be inferred
             if you are running within a Databricks notebook. Otherwise, an
             exception will be raised.
+        :param region: The name of the DASL region.
         :param dasl_host: The URL of the DASL server. This value should
-            not generally be specified unless you are testing against
-            an alternative environment.
+            not generally be specified. When specified, this value
+            overrides region.
         :returns: Client for the newly created workspace.
         """
+        if dasl_host is None:
+            dasl_host = Regions.lookup(region)
         with error_handler():
             if workspace_url is None:
                 workspace_url = Helpers.current_workspace_url()
@@ -120,7 +135,8 @@ class Client:
     def for_workspace(
         workspace_url: Optional[str] = None,
         service_account_token: Optional[str] = None,
-        dasl_host: str = Helpers.default_dasl_host,
+        region: str = Helpers.default_region,
+        dasl_host: Optional[str] = None,
     ) -> "Client":
         """
         Create a client for the argument workspace, if specified, or
@@ -133,11 +149,15 @@ class Client:
         :param service_account_token: Antimatter service account token.
             If provided, the client will use this token for auth instead
             of (automatic) secret-based auth.
+        :param region: The name of the DASL region.
         :param dasl_host: The URL of the DASL server. This value should
-            not generally be specified unless you are testing against
-            an alternative environment.
+            not generally be specified. When specified, this value
+            overrides region.
         :returns: Client for the existing workspace.
         """
+        if dasl_host is None:
+            dasl_host = Regions.lookup(region)
         with error_handler():
             if workspace_url is None:
                 workspace_url = Helpers.current_workspace_url()
@@ -166,7 +186,8 @@ class Client:
         service_principal_secret: str,
         workspace_url: Optional[str] = None,
         service_account_token: Optional[str] = None,
-        dasl_host: str = Helpers.default_dasl_host,
+        region: str = Helpers.default_region,
+        dasl_host: Optional[str] = None,
     ) -> "Client":
         """
         Initialize a new client for the workspace associated with the
@@ -197,9 +218,10 @@ class Client:
             If provided, the client will use this token for auth instead
             of (automatic) secret-based auth. Ignored if the workspace
             doesn't exist.
+        :param region: The name of the DASL region.
         :param dasl_host: The URL of the DASL server. This value should
-            not generally be specified unless you are testing against
-            an alternative environment.
+            not generally be specified. When specified, this value
+            overrides region.
         :returns: Client for the newly created or existing workspace.
         """
         try:
@@ -209,11 +231,12 @@ class Client:
                 service_principal_id,
                 service_principal_secret,
                 workspace_url,
+                region,
                 dasl_host,
             )
         except ConflictError:
             result = Client.for_workspace(
-                workspace_url, service_account_token, dasl_host
+                workspace_url, service_account_token, region, dasl_host
             )
             result.put_admin_config(
                 AdminConfig(
@@ -572,21 +595,27 @@ class Client:
         return Rule.from_api_obj(result)
     def exec_rule(
-        self, rule_in: Rule, df: DataFrame
-    ) -> Tuple[DataFrame, Optional[DataFrame]]:
+        self,
+        spark,
+        rule_in: Rule,
+    ) -> ExecRule:
         """
         Locally execute a Rule. Must be run from within a Databricks
         notebook or else an exception will be raised. This is intended
         to facilitate Rule development.
+        :param spark: Spark context from Databricks notebook. Will be
+            injected into the execution environment for use by the
+            Rule notebook.
         :param rule_in: The specification of the Rule to execute.
-        :param df: The DataFrame to use as the input to the Rule.
-        :returns Tuple[DataFrame, Optional[DataFrame]]: The first
-            element of the tuple contains the notables produced by
-            the rule, and the second element contains the observables
-            or None if no observables were produced.
+        :returns ExecRule: A class containing various information and
+            functionality relating to the execution. See the docs for
+            ExecRule for additional details, but note that you must
+            call its cleanup function or tables created just for this
+            request will leak.
         """
         Helpers.ensure_databricks()
         with error_handler():
             result = self._core_client().core_v1_render_rule(
                 self._workspace(),
@@ -602,9 +631,8 @@ class Client:
                     f"%pip install {result.notebook_utils_path}"
                 )
-            namespace = {}
-            exec(result.content, namespace)
-            return namespace["generate"](df)
+            exec(result.content, {"spark": spark})
+            return ExecRule(spark, result.tables)
     def adhoc_transform(
         self,
@@ -637,9 +665,9 @@ class Client:
                     self._workspace(), status.id
                 )
-                if status.status == "failure":
+                if status.status == "failed":
                     raise Exception(f"adhoc transform failed with {status.error}")
-                elif status.status == "success":
+                elif status.status == "succeeded":
                     return TransformResponse.from_api_obj(status.result)
             raise Exception("timed out waiting for adhoc transform result")
@@ -711,3 +739,192 @@ class Client:
         """
         with error_handler():
             self._content_client().content_v1_preset_purge_cache(self._workspace())
+    def generate_query(
+        self,
+        sql: str,
+        warehouse: Optional[str] = None,
+        start_date: Optional[str] = None,
+        end_date: Optional[str] = None,
+    ) -> str:
+        """
+        Generate a query from the given SQL.
+        :param sql: The SQL to use to create the query data set.
+        :param warehouse: The SQL warehouse use to execute the SQL. If
+            omitted, the default SQL warehouse specified in the workspace
+            config will be used.
+        :param start_date: The optional starting date to filter by for
+            the provided sql used to create the data set. Only rows with
+            their time column (see the time_col parameter) greater than
+            or equal to this value will be included in the data set. You
+            must specify a value for this parameter if you wish to filter
+            by time. Valid values include actual timestamps and computed
+            timestamps (such as now()).
+        :param end_date: The optional ending date to filter by for the
+            provided sql used to create the data set. The same caveats
+            apply as with the start_time parameter. However, this parameter
+            is not required and if omitted when a start_date is provided,
+            the current date will be used.
+        :returns str: The ID of the query generation operation. This value
+            can be used with get_query_status to track the progress of
+            the generation process, and eventually to perform lookups
+            on the completed query.
+        """
+        time_range = None
+        if start_date is not None or end_date is not None:
+            time_range = DbuiV1QueryGenerateRequestTimeRange(
+                startDate=start_date,
+                endDate=end_date,
+            )
+        req = DbuiV1QueryGenerateRequest(
+            warehouse=warehouse,
+            sql=sql,
+            timeRange=time_range,
+        )
+        with error_handler():
+            return (
+                self._dbui_client()
+                .dbui_v1_query_generate(
+                    self._workspace(),
+                    req,
+                )
+                .id
+            )
+    def get_query_status(
+        self,
+        id: str,
+    ) -> DbuiV1QueryGenerateStatus:
+        """
+        Check the status of a query generation operation. Since generation
+        happens in the background, it is up to the caller to check the
+        status until the return value's status member is either equal to
+        "succeeded" or "failed".
+        :param id: The id of the query generation operation.
+        :returns DbuiV1QueryGenerateStatus: The imporant field is
+            status (as used in the example code).
+        The following example demonstrates usage of the API.
+        Example:
+        id = client.generate_query("SELECT now() as time")
+        result = None
+        while True:
+            time.sleep(3)
+            status = client.get_query_status(id)
+            if status.status == "failed":
+                raise Exception("query failed")
+            if status.status == "succeeded":
+                break
+        """
+        with error_handler():
+            return self._dbui_client().dbui_v1_query_generate_status(
+                self._workspace(),
+                id,
+            )
+    def query_lookup(
+        self,
+        id: str,
+        warehouse: Optional[str] = None,
+        pagination: Optional[DbuiV1QueryLookupRequestPagination] = None,
+        start_value: Optional[str] = None,
+        row_count: Optional[int] = None,
+        refinements: Optional[List[str]] = None,
+    ) -> DbuiV1QueryLookupResult:
+        """
+        Perform a lookup on a query, which applies refinements to the
+        query and returns the results.
+        :param id: The query ID returned from query_generate and
+            get_query_status.
+        :param warehouse: The optional SQL warehouse ID to use to compute
+            the results. If not specified, uses the default SQL warehouse
+            configured for the workspace.
+        :param pagination: A sequence of fields and a direction that can
+            be applied to a lookup request. If 'fetchPreceding' is true,
+            the prior n rows up to the first row that matches the provided
+            fields will be returned. Otherwise, the n rows following the
+            first row that matches the provided fields will be returned.
+        :param start_value: An optional start value to constrain the data
+            being returned. This will  be applied to the primary ordering
+            column if provided, before any refinements.
+        :param row_count: The maximum number of rows to include in a page.
+            Defaults to 1000, and must be in the range [1,1000].
+        :param refinements: Pipeline filters to be applied to the result.
+            Any SQL which is valid as a pipeline stage (i.e. coming between
+            |> symbols) is valid here, such as ORDER BY id, or WHERE
+            column = 'value'.
+        """
+        with error_handler():
+            return self._dbui_client().dbui_v1_query_lookup(
+                self._workspace(),
+                id,
+                DbuiV1QueryLookupRequest(
+                    warehouse=warehouse,
+                    startValue=start_value,
+                    pagination=pagination,
+                    rowCount=row_count,
+                    refinements=refinements,
+                ),
+            )
+    def query_histogram(
+        self,
+        id: str,
+        interval: str,
+        warehouse: Optional[str] = None,
+        start_date: str = None,
+        end_date: Optional[str] = None,
+        refinements: Optional[List[str]] = None,
+    ) -> DbuiV1QueryHistogramResult:
+        """
+        Perform a lookup on a query, which applies refinements to the
+        query and returns the results.
+        :param id: The query ID returned from query_generate and
+            get_query_status.
+        :param warehouse: The optional SQL warehouse ID to use to compute
+            the results. If not specified, uses the default SQL warehouse
+            configured for the workspace.
+        :param start_date: The start date filter. The resulting frequency
+            map will be restricted to rows where the time column value
+            is greater than or equal to this value. Valid values include
+            literal timestamps and function calls such as now().
+        :param end_date: The optional end date filter. If specified, the
+            resulting frequency map will contain only rows where the time
+            column value is less than or equal to this value.
+        :param interval: The duration of each interval in the resulting
+            frequency map. This must be an interval string in  the format:
+            '1 day', '3 minutes 2 seconds', '2 weeks'.
+        :param refinements: Pipeline filters to be applied to the result.
+            Any SQL which is valid as a pipeline stage (i.e. coming between
+            |> symbols) is valid here, such as ORDER BY id, or WHERE
+            column = 'value'.
+        """
+        with error_handler():
+            return self._dbui_client().dbui_v1_query_histogram(
+                self._workspace(),
+                id,
+                DbuiV1QueryHistogramRequest(
+                    warehouse=warehouse,
+                    startDate=start_date,
+                    endDate=end_date,
+                    interval=interval,
+                    refinements=refinements,
+                ),
+            )
+    def query_cancel(self, id: str) -> None:
+        """
+        Cancel an existing query.
+        :param id: The query ID returned from query_generate and
+            get_query_status.
+        """
+        with error_handler():
+            return self._dbui_client().dbui_v1_query_cancel(self._workspace(), id)

dasl_client-1.0.25/dasl_client/exec_rule.py ADDED Viewed

@@ -0,0 +1,92 @@
+from pyspark.sql import DataFrame
+from typing import List, Optional
+from dasl_api import CoreV1RenderedRuleTables
+class ExecRule:
+    """
+    ExecRule result object allowing access to and clean up of tables
+    created as part of the rule rendering endpoint. While the table
+    names are exposed as attributes, there are helper functions for
+    fetching the contents of the most common tables (i.e. notables
+    and observables). In general, it won't be necessary to access
+    these attributes. Note that you must call the cleanup function
+    when you are done with an instance of this class or else tables
+    created as part of rendering and running the rule will be left
+    orphaned in your workspace.
+    Attributes:
+        notables_table (str):
+            name of table where notables for the rule execution
+            can be found.
+        observables_table (str):
+            name of table where observables for the rule execution
+            can be found.
+        opals_table (str):
+            name of table where operational alerts for the rule execution
+            can be found.
+        data_metrics_table (str):
+            name of table where rule metrics for the rule execution
+            can be found.
+        stream_metrics_table (str):
+            name of table where stream metrics for the rule execution
+            can be found.
+        observables_acc_table (str):
+            name of table where observables aggregation for the rule
+            execution can be found.
+    """
+    def __init__(self, spark, tables: CoreV1RenderedRuleTables):
+        self.spark = spark
+        self.notables_table = tables.notables
+        self.observables_table = tables.observables
+        self.opals_table = tables.operational_alerts
+        self.data_metrics_table = tables.data_metrics
+        self.stream_metrics_table = tables.stream_metrics
+        self.observables_acc_table = tables.observables_accumulation
+    def _all_tables(self) -> List[str]:
+        return [
+            self.notables_table,
+            self.observables_table,
+            self.opals_table,
+            self.data_metrics_table,
+            self.stream_metrics_table,
+            self.observables_acc_table,
+        ]
+    def cleanup(self):
+        """
+        Clean up when done with this ExecRule instance. This method
+        cleans up temporarily allocated tables used to store the
+        results of the rule execution. Unless you need to preserve
+        the results for some reason, you must call this method or
+        the temporary tables will be orphaned in your workspace.
+        """
+        for table in self._all_tables():
+            self.spark.sql(f"DROP TABLE IF EXISTS {table}")
+    def notables(self, limit: Optional[int] = None) -> DataFrame:
+        """
+        Return the contents of the notables table.
+        :param limit: optional limit to the number of rows returned.
+        :returns: DataFrame containing the notables table rows.
+        """
+        query = f"SELECT * FROM {self.notables_table}"
+        if limit is not None:
+            query = f"{query} LIMIT {limit}"
+        return self.spark.sql(query)
+    def observables(self, limit: Optional[int] = None) -> DataFrame:
+        """
+        Return the contents of the observables table.
+        :param limit: optional limit to the number of rows returned.
+        :returns: DataFrame containing the observables table rows.
+        """
+        query = f"SELECT * FROM {self.observables_table}"
+        if limit is not None:
+            query = f"{query} LIMIT {limit}"
+        return self.spark.sql(query)

{dasl_client-1.0.23 → dasl_client-1.0.25}/dasl_client/helpers.py RENAMED Viewed

@@ -3,7 +3,7 @@ import os
 class Helpers:
-    default_dasl_host = "https://api.prod.sl.antimatter.io"
+    default_region = "us-east-1"
     @staticmethod
     def ensure_databricks():

{dasl_client-1.0.23 → dasl_client-1.0.25}/dasl_client/preset_development/errors.py RENAMED Viewed

@@ -75,6 +75,25 @@ class MalformedFieldError(PresetError):
         super().__init__(message)
+class InvalidLiteralError(PresetError):
+    def __init__(self, stage: str, stage_name: str, field_name: str):
+        self.stage = stage
+        self.stage_name = stage_name
+        self.field_name = field_name
+        message = f"Literal can only be type string in {stage} stage {stage_name}'s field specification named {field_name}."
+        super().__init__(message)
+class InvalidFromError(PresetError):
+    def __init__(self, stage: str, stage_name: str, field_name: str, reason: str):
+        self.stage = stage
+        self.stage_name = stage_name
+        self.field_name = field_name
+        self.reason = reason
+        message = f"{reason} in {stage} stage {stage_name}'s field specification named {field_name}."
+        super().__init__(message)
 class MissingFieldNameError(PresetError):
     def __init__(self, stage: str, stage_name: str):
         self.stage = stage
@@ -107,6 +126,21 @@ class AutoloaderMissingFieldError(PresetError):
         super().__init__(message)
+class MissingBronzeTablesError(PresetError):
+    def __init__(
+        self,
+        message: str = "Bronze tables mode selected, but no bronze table definitions provided.",
+    ):
+        super().__init__(message)
+class MissingBronzeTableFieldError(PresetError):
+    def __init__(self, field_name: str):
+        self.field_name = field_name
+        message = f"A bronze table definition is missing a field {field_name} in provided definitions."
+        super().__init__(message)
 class UnknownGoldTableError(PresetError):
     def __init__(self, table_name: str, schema: str):
         self.table_name = table_name
@@ -145,6 +179,14 @@ class MissingUtilityConfigurationFieldError(PresetError):
         super().__init__(message)
+class DisallowedUtilityConfigurationError(PresetError):
+    def __init__(self, operation: str, stage: str):
+        self.operation = operation
+        self.stage = stage
+        message = f"The {operation} utility is disallowed in the {stage} stage."
+        super().__init__(message)
 class AssertionFailedError(PresetError):
     def __init__(self, expr: str, assertion_message: str, df: DataFrame):
         # Get the Databricks built-in functions out the namespace.

dasl-client 1.0.23__tar.gz → 1.0.25__tar.gz

Potentially problematic release.

dasl-client 1.0.23tar.gz → 1.0.25tar.gz