PyPI - databricks-sqlalchemy - Versions diffs - 0.0.1b1__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

databricks-sqlalchemy 0.0.1b1py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

CHANGELOG.md +274 -0
databricks/sqlalchemy/__init__.py +4 -2
databricks/sqlalchemy/_ddl.py +100 -0
databricks/sqlalchemy/_parse.py +385 -0
databricks/sqlalchemy/_types.py +323 -0
databricks/sqlalchemy/base.py +436 -0
databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
databricks/sqlalchemy/py.typed +0 -0
databricks/sqlalchemy/pytest.ini +4 -0
databricks/sqlalchemy/requirements.py +249 -0
databricks/sqlalchemy/setup.cfg +4 -0
databricks/sqlalchemy/test/_extra.py +70 -0
databricks/sqlalchemy/test/_future.py +331 -0
databricks/sqlalchemy/test/_regression.py +311 -0
databricks/sqlalchemy/test/_unsupported.py +450 -0
databricks/sqlalchemy/test/conftest.py +13 -0
databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
databricks/sqlalchemy/test/test_suite.py +13 -0
databricks/sqlalchemy/test_local/__init__.py +5 -0
databricks/sqlalchemy/test_local/conftest.py +44 -0
databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
databricks/sqlalchemy/test_local/test_ddl.py +96 -0
databricks/sqlalchemy/test_local/test_parsing.py +160 -0
databricks/sqlalchemy/test_local/test_types.py +161 -0
databricks_sqlalchemy-1.0.0.dist-info/LICENSE +201 -0
databricks_sqlalchemy-1.0.0.dist-info/METADATA +225 -0
databricks_sqlalchemy-1.0.0.dist-info/RECORD +31 -0
{databricks_sqlalchemy-0.0.1b1.dist-info → databricks_sqlalchemy-1.0.0.dist-info}/WHEEL +1 -1
databricks_sqlalchemy-1.0.0.dist-info/entry_points.txt +3 -0
databricks/__init__.py +0 -7
databricks_sqlalchemy-0.0.1b1.dist-info/METADATA +0 -19
databricks_sqlalchemy-0.0.1b1.dist-info/RECORD +0 -5

databricks/sqlalchemy/base.py ADDED Viewed

@@ -0,0 +1,436 @@
+from typing import Any, List, Optional, Dict, Union
+import databricks.sqlalchemy._ddl as dialect_ddl_impl
+import databricks.sqlalchemy._types as dialect_type_impl
+from databricks import sql
+from databricks.sqlalchemy._parse import (
+    _describe_table_extended_result_to_dict_list,
+    _match_table_not_found_string,
+    build_fk_dict,
+    build_pk_dict,
+    get_fk_strings_from_dte_output,
+    get_pk_strings_from_dte_output,
+    get_comment_from_dte_output,
+    parse_column_info_from_tgetcolumnsresponse,
+)
+import sqlalchemy
+from sqlalchemy import DDL, event
+from sqlalchemy.engine import Connection, Engine, default, reflection
+from sqlalchemy.engine.interfaces import (
+    ReflectedForeignKeyConstraint,
+    ReflectedPrimaryKeyConstraint,
+    ReflectedColumn,
+    ReflectedTableComment,
+)
+from sqlalchemy.engine.reflection import ReflectionDefaults
+from sqlalchemy.exc import DatabaseError, SQLAlchemyError
+try:
+    import alembic
+except ImportError:
+    pass
+else:
+    from alembic.ddl import DefaultImpl
+    class DatabricksImpl(DefaultImpl):
+        __dialect__ = "databricks"
+import logging
+logger = logging.getLogger(__name__)
+class DatabricksDialect(default.DefaultDialect):
+    """This dialect implements only those methods required to pass our e2e tests"""
+    # See sqlalchemy.engine.interfaces for descriptions of each of these properties
+    name: str = "databricks"
+    driver: str = "databricks"
+    default_schema_name: str = "default"
+    preparer = dialect_ddl_impl.DatabricksIdentifierPreparer  # type: ignore
+    ddl_compiler = dialect_ddl_impl.DatabricksDDLCompiler
+    statement_compiler = dialect_ddl_impl.DatabricksStatementCompiler
+    supports_statement_cache: bool = True
+    supports_multivalues_insert: bool = True
+    supports_native_decimal: bool = True
+    supports_sane_rowcount: bool = False
+    non_native_boolean_check_constraint: bool = False
+    supports_identity_columns: bool = True
+    supports_schemas: bool = True
+    default_paramstyle: str = "named"
+    div_is_floordiv: bool = False
+    supports_default_values: bool = False
+    supports_server_side_cursors: bool = False
+    supports_sequences: bool = False
+    supports_native_boolean: bool = True
+    colspecs = {
+        sqlalchemy.types.DateTime: dialect_type_impl.TIMESTAMP_NTZ,
+        sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType,
+        sqlalchemy.types.String: dialect_type_impl.DatabricksStringType,
+    }
+    # SQLAlchemy requires that a table with no primary key
+    # constraint return a dictionary that looks like this.
+    EMPTY_PK: Dict[str, Any] = {"constrained_columns": [], "name": None}
+    # SQLAlchemy requires that a table with no foreign keys
+    # defined return an empty list. Same for indexes.
+    EMPTY_FK: List
+    EMPTY_INDEX: List
+    EMPTY_FK = EMPTY_INDEX = []
+    @classmethod
+    def import_dbapi(cls):
+        return sql
+    def _force_paramstyle_to_native_mode(self):
+        """This method can be removed after databricks-sql-connector wholly switches to NATIVE ParamApproach.
+        This is a hack to trick SQLAlchemy into using a different paramstyle
+        than the one declared by this module in src/databricks/sql/__init__.py
+        This method is called _after_ the dialect has been initialised, which is important because otherwise
+        our users would need to include a `paramstyle` argument in their SQLAlchemy connection string.
+        This dialect is written to support NATIVE queries. Although the INLINE approach can technically work,
+        the same behaviour can be achieved within SQLAlchemy itself using its literal_processor methods.
+        """
+        self.paramstyle = self.default_paramstyle
+    def create_connect_args(self, url):
+        # TODO: can schema be provided after HOST?
+        # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/***
+        kwargs = {
+            "server_hostname": url.host,
+            "access_token": url.password,
+            "http_path": url.query.get("http_path"),
+            "catalog": url.query.get("catalog"),
+            "schema": url.query.get("schema"),
+            "use_inline_params": False,
+        }
+        self.schema = kwargs["schema"]
+        self.catalog = kwargs["catalog"]
+        self._force_paramstyle_to_native_mode()
+        return [], kwargs
+    def get_columns(
+        self, connection, table_name, schema=None, **kwargs
+    ) -> List[ReflectedColumn]:
+        """Return information about columns in `table_name`."""
+        with self.get_connection_cursor(connection) as cur:
+            resp = cur.columns(
+                catalog_name=self.catalog,
+                schema_name=schema or self.schema,
+                table_name=table_name,
+            ).fetchall()
+        if not resp:
+            # TGetColumnsRequest will not raise an exception if passed a table that doesn't exist
+            # But Databricks supports tables with no columns. So if the result is an empty list,
+            # we need to check if the table exists (and raise an exception if not) or simply return
+            # an empty list.
+            self._describe_table_extended(
+                connection,
+                table_name,
+                self.catalog,
+                schema or self.schema,
+                expect_result=False,
+            )
+            return resp
+        columns = []
+        for col in resp:
+            row_dict = parse_column_info_from_tgetcolumnsresponse(col)
+            columns.append(row_dict)
+        return columns
+    def _describe_table_extended(
+        self,
+        connection: Connection,
+        table_name: str,
+        catalog_name: Optional[str] = None,
+        schema_name: Optional[str] = None,
+        expect_result=True,
+    ) -> Union[List[Dict[str, str]], None]:
+        """Run DESCRIBE TABLE EXTENDED on a table and return a list of dictionaries of the result.
+        This method is the fastest way to check for the presence of a table in a schema.
+        If expect_result is False, this method returns None as the output dict isn't required.
+        Raises NoSuchTableError if the table is not present in the schema.
+        """
+        _target_catalog = catalog_name or self.catalog
+        _target_schema = schema_name or self.schema
+        _target = f"`{_target_catalog}`.`{_target_schema}`.`{table_name}`"
+        # sql injection risk?
+        # DESCRIBE TABLE EXTENDED in DBR doesn't support parameterised inputs :(
+        stmt = DDL(f"DESCRIBE TABLE EXTENDED {_target}")
+        try:
+            result = connection.execute(stmt)
+        except DatabaseError as e:
+            if _match_table_not_found_string(str(e)):
+                raise sqlalchemy.exc.NoSuchTableError(
+                    f"No such table {table_name}"
+                ) from e
+            raise e
+        if not expect_result:
+            return None
+        fmt_result = _describe_table_extended_result_to_dict_list(result)
+        return fmt_result
+    @reflection.cache
+    def get_pk_constraint(
+        self,
+        connection,
+        table_name: str,
+        schema: Optional[str] = None,
+        **kw: Any,
+    ) -> ReflectedPrimaryKeyConstraint:
+        """Fetch information about the primary key constraint on table_name.
+        Returns a dictionary with these keys:
+            constrained_columns
+              a list of column names that make up the primary key. Results is an empty list
+              if no PRIMARY KEY is defined.
+            name
+              the name of the primary key constraint
+        """
+        result = self._describe_table_extended(
+            connection=connection,
+            table_name=table_name,
+            schema_name=schema,
+        )
+        # Type ignore is because mypy knows that self._describe_table_extended *can*
+        # return None (even though it never will since expect_result defaults to True)
+        raw_pk_constraints: List = get_pk_strings_from_dte_output(result)  # type: ignore
+        if not any(raw_pk_constraints):
+            return self.EMPTY_PK  # type: ignore
+        if len(raw_pk_constraints) > 1:
+            logger.warning(
+                "Found more than one primary key constraint in DESCRIBE TABLE EXTENDED output. "
+                "This is unexpected. Please report this as a bug. "
+                "Only the first primary key constraint will be returned."
+            )
+        first_pk_constraint = raw_pk_constraints[0]
+        pk_name = first_pk_constraint.get("col_name")
+        pk_constraint_string = first_pk_constraint.get("data_type")
+        # TODO: figure out how to return sqlalchemy.interfaces in a way that mypy respects
+        return build_pk_dict(pk_name, pk_constraint_string)  # type: ignore
+    def get_foreign_keys(
+        self, connection, table_name, schema=None, **kw
+    ) -> List[ReflectedForeignKeyConstraint]:
+        """Return information about foreign_keys in `table_name`."""
+        result = self._describe_table_extended(
+            connection=connection,
+            table_name=table_name,
+            schema_name=schema,
+        )
+        # Type ignore is because mypy knows that self._describe_table_extended *can*
+        # return None (even though it never will since expect_result defaults to True)
+        raw_fk_constraints: List = get_fk_strings_from_dte_output(result)  # type: ignore
+        if not any(raw_fk_constraints):
+            return self.EMPTY_FK
+        fk_constraints = []
+        for constraint_dict in raw_fk_constraints:
+            fk_name = constraint_dict.get("col_name")
+            fk_constraint_string = constraint_dict.get("data_type")
+            this_constraint_dict = build_fk_dict(
+                fk_name, fk_constraint_string, schema_name=schema
+            )
+            fk_constraints.append(this_constraint_dict)
+        # TODO: figure out how to return sqlalchemy.interfaces in a way that mypy respects
+        return fk_constraints  # type: ignore
+    def get_indexes(self, connection, table_name, schema=None, **kw):
+        """SQLAlchemy requires this method. Databricks doesn't support indexes."""
+        return self.EMPTY_INDEX
+    @reflection.cache
+    def get_table_names(self, connection: Connection, schema=None, **kwargs):
+        """Return a list of tables in the current schema."""
+        _target_catalog = self.catalog
+        _target_schema = schema or self.schema
+        _target = f"`{_target_catalog}`.`{_target_schema}`"
+        stmt = DDL(f"SHOW TABLES FROM {_target}")
+        tables_result = connection.execute(stmt).all()
+        views_result = self.get_view_names(connection=connection, schema=schema)
+        # In Databricks, SHOW TABLES FROM <schema> returns both tables and views.
+        # Potential optimisation: rewrite this to instead query information_schema
+        tables_minus_views = [
+            row.tableName for row in tables_result if row.tableName not in views_result
+        ]
+        return tables_minus_views
+    @reflection.cache
+    def get_view_names(
+        self,
+        connection,
+        schema=None,
+        only_materialized=False,
+        only_temp=False,
+        **kwargs,
+    ) -> List[str]:
+        """Returns a list of string view names contained in the schema, if any."""
+        _target_catalog = self.catalog
+        _target_schema = schema or self.schema
+        _target = f"`{_target_catalog}`.`{_target_schema}`"
+        stmt = DDL(f"SHOW VIEWS FROM {_target}")
+        result = connection.execute(stmt).all()
+        return [
+            row.viewName
+            for row in result
+            if (not only_materialized or row.isMaterialized)
+            and (not only_temp or row.isTemporary)
+        ]
+    @reflection.cache
+    def get_materialized_view_names(
+        self, connection: Connection, schema: Optional[str] = None, **kw: Any
+    ) -> List[str]:
+        """A wrapper around get_view_names that fetches only the names of materialized views"""
+        return self.get_view_names(connection, schema, only_materialized=True)
+    @reflection.cache
+    def get_temp_view_names(
+        self, connection: Connection, schema: Optional[str] = None, **kw: Any
+    ) -> List[str]:
+        """A wrapper around get_view_names that fetches only the names of temporary views"""
+        return self.get_view_names(connection, schema, only_temp=True)
+    def do_rollback(self, dbapi_connection):
+        # Databricks SQL Does not support transactions
+        pass
+    @reflection.cache
+    def has_table(
+        self, connection, table_name, schema=None, catalog=None, **kwargs
+    ) -> bool:
+        """For internal dialect use, check the existence of a particular table
+        or view in the database.
+        """
+        try:
+            self._describe_table_extended(
+                connection=connection,
+                table_name=table_name,
+                catalog_name=catalog,
+                schema_name=schema,
+            )
+            return True
+        except sqlalchemy.exc.NoSuchTableError as e:
+            return False
+    def get_connection_cursor(self, connection):
+        """Added for backwards compatibility with 1.3.x"""
+        if hasattr(connection, "_dbapi_connection"):
+            return connection._dbapi_connection.dbapi_connection.cursor()
+        elif hasattr(connection, "raw_connection"):
+            return connection.raw_connection().cursor()
+        elif hasattr(connection, "connection"):
+            return connection.connection.cursor()
+        raise SQLAlchemyError(
+            "Databricks dialect can't obtain a cursor context manager from the dbapi"
+        )
+    @reflection.cache
+    def get_schema_names(self, connection, **kw):
+        """Return a list of all schema names available in the database."""
+        stmt = DDL("SHOW SCHEMAS")
+        result = connection.execute(stmt)
+        schema_list = [row[0] for row in result]
+        return schema_list
+    @reflection.cache
+    def get_table_comment(
+        self,
+        connection: Connection,
+        table_name: str,
+        schema: Optional[str] = None,
+        **kw: Any,
+    ) -> ReflectedTableComment:
+        result = self._describe_table_extended(
+            connection=connection,
+            table_name=table_name,
+            schema_name=schema,
+        )
+        if result is None:
+            return ReflectionDefaults.table_comment()
+        comment = get_comment_from_dte_output(result)
+        if comment:
+            return dict(text=comment)
+        else:
+            return ReflectionDefaults.table_comment()
+@event.listens_for(Engine, "do_connect")
+def receive_do_connect(dialect, conn_rec, cargs, cparams):
+    """Helpful for DS on traffic from clients using SQLAlchemy in particular"""
+    # Ignore connect invocations that don't use our dialect
+    if not dialect.name == "databricks":
+        return
+    ua = cparams.get("_user_agent_entry", "")
+    def add_sqla_tag_if_not_present(val: str):
+        if not val:
+            output = "sqlalchemy"
+        if val and "sqlalchemy" in val:
+            output = val
+        else:
+            output = f"sqlalchemy + {val}"
+        return output
+    cparams["_user_agent_entry"] = add_sqla_tag_if_not_present(ua)
+    if sqlalchemy.__version__.startswith("1.3"):
+        # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
+        # These should be passed in as connect_args when building the Engine
+        if "schema" in cparams:
+            dialect.schema = cparams["schema"]
+        if "catalog" in cparams:
+            dialect.catalog = cparams["catalog"]

databricks/sqlalchemy/dependency_test/test_dependency.py ADDED Viewed

@@ -0,0 +1,22 @@
+import pytest
+class DatabricksImportError(Exception):
+    pass
+class TestLibraryDependencySuite:
+    @pytest.mark.skipif(pytest.importorskip("databricks_sql_connector_core"), reason="databricks_sql_connector_core is present")
+    def test_sql_core(self):
+        with pytest.raises(DatabricksImportError, match="databricks_sql_connector_core module is not available"):
+            try:
+                import databricks
+            except ImportError:
+                raise DatabricksImportError("databricks_sql_connector_core module is not available")
+    @pytest.mark.skipif(pytest.importorskip("sqlalchemy"), reason="SQLAlchemy is present")
+    def test_sqlalchemy(self):
+        with pytest.raises(DatabricksImportError, match="sqlalchemy module is not available"):
+            try:
+                import sqlalchemy
+            except ImportError:
+                raise DatabricksImportError("sqlalchemy module is not available")

databricks/sqlalchemy/py.typed ADDED Viewed

File without changes

databricks/sqlalchemy/pytest.ini ADDED Viewed

@@ -0,0 +1,4 @@
+[sqla_testing]
+requirement_cls=databricks.sqlalchemy.requirements:Requirements
+profile_file=profiles.txt

databricks-sqlalchemy 0.0.1b1__py3-none-any.whl → 1.0.0__py3-none-any.whl

databricks-sqlalchemy 0.0.1b1py3-none-any.whl → 1.0.0py3-none-any.whl