PyPI - databricks-labs-lakebridge - Versions diffs - 0.10.0__py3-none-any.whl - Mend

databricks-labs-lakebridge 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

databricks/labs/lakebridge/helpers/metastore.py ADDED Viewed

@@ -0,0 +1,164 @@
+import functools
+import logging
+from itertools import chain
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors import NotFound
+from databricks.sdk.service.catalog import (
+    CatalogInfo,
+    Privilege,
+    SchemaInfo,
+    SecurableType,
+    VolumeInfo,
+    VolumeType,
+)
+logger = logging.getLogger(__name__)
+class CatalogOperations:
+    def __init__(self, ws: WorkspaceClient):
+        self._ws = ws
+    def get_catalog(self, name: str) -> CatalogInfo | None:
+        try:
+            return self._ws.catalogs.get(name)
+        except NotFound:
+            return None
+    def get_schema(self, catalog_name: str, schema_name: str) -> SchemaInfo | None:
+        try:
+            return self._ws.schemas.get(f"{catalog_name}.{schema_name}")
+        except NotFound:
+            return None
+    def get_volume(self, catalog: str, schema: str, name: str) -> VolumeInfo | None:
+        try:
+            return self._ws.volumes.read(f"{catalog}.{schema}.{name}")
+        except NotFound:
+            return None
+    def create_catalog(self, name: str) -> CatalogInfo:
+        logger.debug(f"Creating catalog `{name}`.")
+        catalog_info = self._ws.catalogs.create(name)
+        logger.info(f"Created catalog `{name}`.")
+        return catalog_info
+    def create_schema(self, schema_name: str, catalog_name: str) -> SchemaInfo:
+        logger.debug(f"Creating schema `{schema_name}` in catalog `{catalog_name}`.")
+        schema_info = self._ws.schemas.create(schema_name, catalog_name)
+        logger.info(f"Created schema `{schema_name}` in catalog `{catalog_name}`.")
+        return schema_info
+    def create_volume(
+        self,
+        catalog: str,
+        schema: str,
+        name: str,
+        volume_type: VolumeType = VolumeType.MANAGED,
+    ) -> VolumeInfo:
+        logger.debug(f"Creating volume `{name}` in catalog `{catalog}` and schema `{schema}`")
+        volume_info = self._ws.volumes.create(catalog, schema, name, volume_type)
+        logger.info(f"Created volume `{name}` in catalog `{catalog}` and schema `{schema}`")
+        return volume_info
+    def has_catalog_access(
+        self,
+        catalog: CatalogInfo,
+        user_name: str,
+        privilege_sets: tuple[set[Privilege], ...],
+    ) -> bool:
+        """
+        Check if a user has access to a catalog based on ownership or a set of privileges.
+        :param catalog: A catalog to check access for.
+        :param user_name: Username to check.
+        :param privilege_sets: A tuple of sets, where each set contains Privilege objects.
+                               The function checks if the user has any of these sets of privileges. For example:
+                               ({Privilege.ALL_PRIVILEGES}, {Privilege.USE_CATALOG, Privilege.APPLY_TAG})
+                               In this case, the user would need either ALL_PRIVILEGES,
+                               or both USE_CATALOG and APPLY_TAG.
+        """
+        if user_name == catalog.owner:
+            return True
+        return any(
+            self.has_privileges(user_name, SecurableType.CATALOG, catalog.name, privilege_set)
+            for privilege_set in privilege_sets
+        )
+    def has_schema_access(
+        self,
+        schema: SchemaInfo,
+        user_name: str,
+        privilege_sets: tuple[set[Privilege], ...],
+    ) -> bool:
+        """
+        Check if a user has access to a schema based on ownership or a set of privileges.
+        :param schema: A schema to check access for.
+        :param user_name: Username to check.
+        :param privilege_sets: The function checks if the user has any of these sets of privileges. For example:
+                               ({Privilege.ALL_PRIVILEGES}, {Privilege.USE_SCHEMA, Privilege.CREATE_TABLE})
+                               In this case, the user would need either ALL_PRIVILEGES,
+                               or both USE_SCHEMA and CREATE_TABLE.
+        """
+        if user_name == schema.owner:
+            return True
+        return any(
+            self.has_privileges(user_name, SecurableType.SCHEMA, schema.full_name, privilege_set)
+            for privilege_set in privilege_sets
+        )
+    def has_volume_access(
+        self,
+        volume: VolumeInfo,
+        user_name: str,
+        privilege_sets: tuple[set[Privilege], ...],
+    ) -> bool:
+        """
+        Check if a user has access to a volume based on ownership or a set of privileges.
+        :param volume: A volume to check access for.
+        :param user_name: Username to check.
+        :param privilege_sets: The function checks if the user has any of these sets of privileges. For example:
+                               ({Privilege.ALL_PRIVILEGES}, {Privilege.READ_VOLUME, Privilege.WRITE_VOLUME})
+                               In this case, the user would need either ALL_PRIVILEGES,
+                               or both READ_VOLUME and WRITE_VOLUME.
+        """
+        if user_name == volume.owner:
+            return True
+        return any(
+            self.has_privileges(user_name, SecurableType.VOLUME, volume.full_name, privilege_set)
+            for privilege_set in privilege_sets
+        )
+    def has_privileges(
+        self,
+        user: str | None,
+        securable_type: SecurableType,
+        full_name: str | None,
+        privileges: set[Privilege],
+    ) -> bool:
+        """
+        Check if a user has a set of privileges for a securable object.
+        """
+        assert user, "User must be provided"
+        assert full_name, "Full name must be provided"
+        user_privileges = self._get_user_privileges(user, securable_type, full_name)
+        result = privileges.issubset(user_privileges)
+        if not result:
+            logger.debug(f"User {user} doesn't have privilege set {privileges} for {securable_type} {full_name}")
+        return result
+    @functools.lru_cache(maxsize=1024)
+    def _get_user_privileges(self, user: str, securable_type: SecurableType, full_name: str) -> set[Privilege]:
+        permissions = self._ws.grants.get_effective(securable_type, full_name, principal=user)
+        if not permissions or not permissions.privilege_assignments:
+            return set()
+        return {
+            p.privilege
+            for p in chain.from_iterable(
+                privilege.privileges for privilege in permissions.privilege_assignments if privilege.privileges
+            )
+            if p.privilege
+        }

databricks/labs/lakebridge/helpers/recon_config_utils.py ADDED Viewed

@@ -0,0 +1,176 @@
+import logging
+from databricks.labs.blueprint.tui import Prompts
+from databricks.labs.lakebridge.reconcile.constants import ReconSourceType
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors.platform import ResourceDoesNotExist
+logger = logging.getLogger(__name__)
+class ReconConfigPrompts:
+    def __init__(self, ws: WorkspaceClient, prompts: Prompts = Prompts()):
+        self._source = None
+        self._prompts = prompts
+        self._ws = ws
+    def _scope_exists(self, scope_name: str) -> bool:
+        scope_exists = scope_name in [scope.name for scope in self._ws.secrets.list_scopes()]
+        if not scope_exists:
+            logger.error(
+                f"Error: Cannot find Secret Scope: `{scope_name}` in Databricks Workspace."
+                f"\nUse `remorph configure-secrets` to setup Scope and Secrets"
+            )
+            return False
+        logger.debug(f"Found Scope: `{scope_name}` in Databricks Workspace")
+        return True
+    def _ensure_scope_exists(self, scope_name: str):
+        """
+        Get or Create a new Scope in Databricks Workspace
+        :param scope_name:
+        """
+        scope_exists = self._scope_exists(scope_name)
+        if not scope_exists:
+            allow_scope_creation = self._prompts.confirm("Do you want to create a new one?")
+            if not allow_scope_creation:
+                msg = "Scope is needed to store Secrets in Databricks Workspace"
+                raise SystemExit(msg)
+            try:
+                logger.debug(f" Creating a new Scope: `{scope_name}`")
+                self._ws.secrets.create_scope(scope_name)
+            except Exception as ex:
+                logger.error(f"Exception while creating Scope `{scope_name}`: {ex}")
+                raise ex
+            logger.info(f" Created a new Scope: `{scope_name}`")
+        logger.info(f" Using Scope: `{scope_name}`...")
+    def _secret_key_exists(self, scope_name: str, secret_key: str) -> bool:
+        try:
+            self._ws.secrets.get_secret(scope_name, secret_key)
+            logger.info(f"Found Secret key `{secret_key}` in Scope `{scope_name}`")
+            return True
+        except ResourceDoesNotExist:
+            logger.debug(f"Secret key `{secret_key}` not found in Scope `{scope_name}`")
+            return False
+    def _store_secret(self, scope_name: str, secret_key: str, secret_value: str):
+        try:
+            logger.debug(f"Storing Secret: *{secret_key}* in Scope: `{scope_name}`")
+            self._ws.secrets.put_secret(scope=scope_name, key=secret_key, string_value=secret_value)
+        except Exception as ex:
+            logger.error(f"Exception while storing Secret `{secret_key}`: {ex}")
+            raise ex
+    def store_connection_secrets(self, scope_name: str, conn_details: tuple[str, dict[str, str]]):
+        engine = conn_details[0]
+        secrets = conn_details[1]
+        logger.debug(f"Storing `{engine}` Connection Secrets in Scope: `{scope_name}`")
+        for key, value in secrets.items():
+            secret_key = key
+            logger.debug(f"Processing Secret: *{secret_key}*")
+            debug_op = "Storing"
+            info_op = "Stored"
+            if self._secret_key_exists(scope_name, secret_key):
+                overwrite_secret = self._prompts.confirm(f"Do you want to overwrite `{secret_key}`?")
+                if not overwrite_secret:
+                    continue
+                debug_op = "Overwriting"
+                info_op = "Overwritten"
+            logger.debug(f"{debug_op} Secret: *{secret_key}* in Scope: `{scope_name}`")
+            self._store_secret(scope_name, secret_key, value)
+            logger.info(f"{info_op} Secret: *{secret_key}* in Scope: `{scope_name}`")
+    def prompt_source(self):
+        source = self._prompts.choice(
+            "Select the source dialect", [source_type.value for source_type in ReconSourceType]
+        )
+        self._source = source
+        return source
+    def _prompt_snowflake_connection_details(self) -> tuple[str, dict[str, str]]:
+        """
+        Prompt for Snowflake connection details
+        :return: tuple[str, dict[str, str]]
+        """
+        logger.info(
+            f"Please answer a couple of questions to configure `{ReconSourceType.SNOWFLAKE.value}` Connection profile"
+        )
+        sf_url = self._prompts.question("Enter Snowflake URL")
+        account = self._prompts.question("Enter Account Name")
+        sf_user = self._prompts.question("Enter User")
+        sf_password = self._prompts.question("Enter Password")
+        sf_db = self._prompts.question("Enter Database")
+        sf_schema = self._prompts.question("Enter Schema")
+        sf_warehouse = self._prompts.question("Enter Snowflake Warehouse")
+        sf_role = self._prompts.question("Enter Role", default=" ")
+        sf_conn_details = {
+            "sfUrl": sf_url,
+            "account": account,
+            "sfUser": sf_user,
+            "sfPassword": sf_password,
+            "sfDatabase": sf_db,
+            "sfSchema": sf_schema,
+            "sfWarehouse": sf_warehouse,
+            "sfRole": sf_role,
+        }
+        sf_conn_dict = (ReconSourceType.SNOWFLAKE.value, sf_conn_details)
+        return sf_conn_dict
+    def _prompt_oracle_connection_details(self) -> tuple[str, dict[str, str]]:
+        """
+        Prompt for Oracle connection details
+        :return: tuple[str, dict[str, str]]
+        """
+        logger.info(
+            f"Please answer a couple of questions to configure `{ReconSourceType.ORACLE.value}` Connection profile"
+        )
+        user = self._prompts.question("Enter User")
+        password = self._prompts.question("Enter Password")
+        host = self._prompts.question("Enter host")
+        port = self._prompts.question("Enter port")
+        database = self._prompts.question("Enter database/SID")
+        oracle_conn_details = {"user": user, "password": password, "host": host, "port": port, "database": database}
+        oracle_conn_dict = (ReconSourceType.ORACLE.value, oracle_conn_details)
+        return oracle_conn_dict
+    def _connection_details(self):
+        """
+        Prompt for connection details based on the source
+        :return: None
+        """
+        logger.debug(f"Prompting for `{self._source}` connection details")
+        match self._source:
+            case ReconSourceType.SNOWFLAKE.value:
+                return self._prompt_snowflake_connection_details()
+            case ReconSourceType.ORACLE.value:
+                return self._prompt_oracle_connection_details()
+    def prompt_and_save_connection_details(self):
+        """
+        Prompt for connection details and save them as Secrets in Databricks Workspace
+        """
+        # prompt for connection_details only if source is other than Databricks
+        if self._source == ReconSourceType.DATABRICKS.value:
+            logger.info("*Databricks* as a source is supported only for **Hive MetaStore (HMS) setup**")
+            return
+        # Prompt for secret scope
+        scope_name = self._prompts.question("Enter Secret Scope name")
+        self._ensure_scope_exists(scope_name)
+        # Prompt for connection details
+        connection_details = self._connection_details()
+        logger.debug(f"Storing `{self._source}` connection details as Secrets in Databricks Workspace...")
+        self.store_connection_secrets(scope_name, connection_details)

databricks/labs/lakebridge/helpers/string_utils.py ADDED Viewed

@@ -0,0 +1,62 @@
+import codecs
+# Optionally check to see if a string begins with a Byte Order Mark
+# such a character will cause the transpiler to fail
+def remove_bom(input_string: str) -> str:
+    """
+    Removes the Byte Order Mark (BOM) from the given string if it exists.
+    :param input_string: String to remove BOM from
+    :return: String without BOM
+    """
+    output_string = input_string
+    # Check and remove UTF-16 (LE and BE) BOM
+    if input_string.startswith(codecs.BOM_UTF16_BE.decode("utf-16-be")):
+        output_string = input_string[len(codecs.BOM_UTF16_BE.decode("utf-16-be")) :]
+    elif input_string.startswith(codecs.BOM_UTF16_LE.decode("utf-16-le")):
+        output_string = input_string[len(codecs.BOM_UTF16_LE.decode("utf-16-le")) :]
+    elif input_string.startswith(codecs.BOM_UTF16.decode("utf-16")):
+        output_string = input_string[len(codecs.BOM_UTF16.decode("utf-16")) :]
+    # Check and remove UTF-32 (LE and BE) BOM
+    elif input_string.startswith(codecs.BOM_UTF32_BE.decode("utf-32-be")):
+        output_string = input_string[len(codecs.BOM_UTF32_BE.decode("utf-32-be")) :]
+    elif input_string.startswith(codecs.BOM_UTF32_LE.decode("utf-32-le")):
+        output_string = input_string[len(codecs.BOM_UTF32_LE.decode("utf-32-le")) :]
+    elif input_string.startswith(codecs.BOM_UTF32.decode("utf-32")):
+        output_string = input_string[len(codecs.BOM_UTF32.decode("utf-32")) :]
+    # Check and remove UTF-8 BOM
+    elif input_string.startswith(codecs.BOM_UTF8.decode("utf-8")):
+        output_string = input_string[len(codecs.BOM_UTF8.decode("utf-8")) :]
+    return output_string
+def refactor_hexadecimal_chars(input_string: str) -> str:
+    """
+    Updates the HexaDecimal characters ( \x1b[\\d+m ) in the given string as below.
+    :param input_string: String with HexaDecimal characters. ex: ( \x1b[4mWHERE\x1b[0m )
+    :return: String with HexaDecimal characters refactored to arrows. ex: ( --> WHERE <--)
+    """
+    output_string = input_string
+    highlight = {"\x1b[4m": "--> ", "\x1b[0m": " <--"}
+    for key, value in highlight.items():
+        output_string = output_string.replace(key, value)
+    return output_string
+def format_error_message(error_type: str, error_message: Exception, error_sql: str) -> str:
+    """
+    Formats the error message with the error SQL.
+    :param error_type: Error Type
+    :param error_message: Error message
+    :param error_sql: Error SQL
+    :return: Formatted error message
+    """
+    error_str = (
+        f"------------------------ {error_type} Start:------------------------\n"
+        f"/*\n{str(error_message)}\n*/\n\n"
+        f"/*\nOriginal Query:\n\n{str(error_sql)}\n*/\n"
+        f"------------------------- {error_type} End:-------------------------"
+    ).strip()
+    return error_str

databricks/labs/lakebridge/helpers/telemetry_utils.py ADDED Viewed

@@ -0,0 +1,13 @@
+from databricks.sdk.useragent import alphanum_pattern, semver_pattern
+def make_alphanum_or_semver(value: str) -> str:
+    if alphanum_pattern.match(value) or semver_pattern.match(value):
+        return value
+    # assume it's not a semver, replace illegal alphanum chars
+    result = []
+    for char in value:
+        if not alphanum_pattern.match(char):
+            char = '_'
+        result.append(char)
+    return "".join(result)

databricks/labs/lakebridge/helpers/validation.py ADDED Viewed

@@ -0,0 +1,101 @@
+import logging
+from io import StringIO
+from databricks.labs.lsql.backends import SqlBackend
+from databricks.labs.lakebridge.config import TranspileConfig, ValidationResult
+from databricks.sdk.errors.base import DatabricksError
+logger = logging.getLogger(__name__)
+class Validator:
+    """
+    The Validator class is used to validate SQL queries.
+    """
+    def __init__(self, sql_backend: SqlBackend):
+        self._sql_backend = sql_backend
+    def validate_format_result(self, config: TranspileConfig, sql_text: str) -> ValidationResult:
+        """
+        Validates the SQL query and formats the result.
+        This function validates the SQL query based on the provided configuration. If the query is valid,
+        it appends a semicolon to the end of the query. If the query is not valid, it formats the error message.
+        Parameters:
+        - config (MorphConfig): The configuration for the validation.
+        - sql_text (str): The SQL query to be validated.
+        Returns:
+        - tuple: A tuple containing the result of the validation and the exception message (if any).
+        """
+        logger.debug(f"Validating query with catalog {config.catalog_name} and schema {config.schema_name}")
+        (is_valid, exception_type, exception_msg) = self._query(
+            self._sql_backend,
+            sql_text,
+            config.catalog_name,
+            config.schema_name,
+        )
+        if is_valid:
+            result = sql_text
+            if exception_type is not None:
+                exception_msg = f"[{exception_type.upper()}]: {exception_msg}"
+        else:
+            query = ""
+            if "[UNRESOLVED_ROUTINE]" in str(exception_msg):
+                query = sql_text
+            buffer = StringIO()
+            buffer.write("-------------- Exception Start-------------------\n")
+            buffer.write("/* \n")
+            buffer.write(str(exception_msg))
+            buffer.write("\n */ \n")
+            buffer.write(query)
+            buffer.write("\n ---------------Exception End --------------------\n")
+            result = buffer.getvalue()
+        return ValidationResult(result, exception_msg)
+    def _query(
+        self, sql_backend: SqlBackend, query: str, catalog: str, schema: str
+    ) -> tuple[bool, str | None, str | None]:
+        """
+        Validate a given SQL query using the provided SQL backend
+        Parameters:
+        - query (str): The SQL query to be validated.
+        - sql_backend (SqlBackend): The SQL backend to be used for validation.
+        Returns:
+        - tuple: A tuple containing a boolean indicating whether the query is valid or not,
+        and a string containing a success message or an exception message.
+        """
+        # When variables is mentioned Explain fails we need way to replace them before explain is executed.
+        explain_query = f'EXPLAIN {query.replace("${", "`{").replace("}", "}`").replace("``", "`")}'
+        try:
+            rows = list(sql_backend.fetch(explain_query, catalog=catalog, schema=schema))
+            if not rows:
+                return False, "error", "No results returned from explain query."
+            if "Error occurred during query planning" in rows[0].asDict().get("plan", ""):
+                error_details = rows[1].asDict().get("plan", "Unknown error.") if len(rows) > 1 else "Unknown error."
+                raise DatabricksError(error_details)
+            return True, None, None
+        except DatabricksError as dbe:
+            err_msg = str(dbe)
+            if "[PARSE_SYNTAX_ERROR]" in err_msg:
+                logger.debug(f"Syntax Exception : NOT IGNORED. Flag as syntax error: {err_msg}")
+                return False, "error", err_msg
+            if "[UNRESOLVED_ROUTINE]" in err_msg:
+                logger.debug(f"Analysis Exception : NOT IGNORED: Flag as Function Missing error {err_msg}")
+                return False, "error", err_msg
+            if "[TABLE_OR_VIEW_NOT_FOUND]" in err_msg or "[TABLE_OR_VIEW_ALREADY_EXISTS]" in err_msg:
+                logger.debug(f"Analysis Exception : IGNORED: {err_msg}")
+                return True, "warning", err_msg
+            if "Hive support is required to CREATE Hive TABLE (AS SELECT).;" in err_msg:
+                logger.debug(f"Analysis Exception : IGNORED: {err_msg}")
+                return True, "warning", err_msg
+            logger.debug(f"Unknown Exception: {err_msg}")
+            return False, "error", err_msg