databricks-sqlalchemy 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CHANGELOG.md +2 -2
- databricks/sqlalchemy/__init__.py +4 -1
- databricks/sqlalchemy/_ddl.py +100 -0
- databricks/sqlalchemy/_parse.py +385 -0
- databricks/sqlalchemy/_types.py +323 -0
- databricks/sqlalchemy/base.py +436 -0
- databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
- databricks/sqlalchemy/py.typed +0 -0
- databricks/sqlalchemy/pytest.ini +4 -0
- databricks/sqlalchemy/requirements.py +249 -0
- databricks/sqlalchemy/setup.cfg +4 -0
- databricks/sqlalchemy/test/_extra.py +70 -0
- databricks/sqlalchemy/test/_future.py +331 -0
- databricks/sqlalchemy/test/_regression.py +311 -0
- databricks/sqlalchemy/test/_unsupported.py +450 -0
- databricks/sqlalchemy/test/conftest.py +13 -0
- databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
- databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
- databricks/sqlalchemy/test/test_suite.py +13 -0
- databricks/sqlalchemy/test_local/__init__.py +5 -0
- databricks/sqlalchemy/test_local/conftest.py +44 -0
- databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
- databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
- databricks/sqlalchemy/test_local/test_ddl.py +96 -0
- databricks/sqlalchemy/test_local/test_parsing.py +160 -0
- databricks/sqlalchemy/test_local/test_types.py +161 -0
- {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/METADATA +60 -39
- databricks_sqlalchemy-2.0.0.dist-info/RECORD +31 -0
- databricks/sqlalchemy/dialect/__init__.py +0 -340
- databricks/sqlalchemy/dialect/base.py +0 -17
- databricks/sqlalchemy/dialect/compiler.py +0 -38
- databricks_sqlalchemy-1.0.1.dist-info/RECORD +0 -10
- {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/LICENSE +0 -0
- {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/WHEEL +0 -0
- {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
import pytest
|
2
|
+
from databricks.sqlalchemy._parse import (
|
3
|
+
extract_identifiers_from_string,
|
4
|
+
extract_identifier_groups_from_string,
|
5
|
+
extract_three_level_identifier_from_constraint_string,
|
6
|
+
build_fk_dict,
|
7
|
+
build_pk_dict,
|
8
|
+
match_dte_rows_by_value,
|
9
|
+
get_comment_from_dte_output,
|
10
|
+
DatabricksSqlAlchemyParseException,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
# These are outputs from DESCRIBE TABLE EXTENDED
|
15
|
+
@pytest.mark.parametrize(
|
16
|
+
"input, expected",
|
17
|
+
[
|
18
|
+
("PRIMARY KEY (`pk1`, `pk2`)", ["pk1", "pk2"]),
|
19
|
+
("PRIMARY KEY (`a`, `b`, `c`)", ["a", "b", "c"]),
|
20
|
+
("PRIMARY KEY (`name`, `id`, `attr`)", ["name", "id", "attr"]),
|
21
|
+
],
|
22
|
+
)
|
23
|
+
def test_extract_identifiers(input, expected):
|
24
|
+
assert (
|
25
|
+
extract_identifiers_from_string(input) == expected
|
26
|
+
), "Failed to extract identifiers from string"
|
27
|
+
|
28
|
+
|
29
|
+
@pytest.mark.parametrize(
|
30
|
+
"input, expected",
|
31
|
+
[
|
32
|
+
(
|
33
|
+
"FOREIGN KEY (`pname`, `pid`, `pattr`) REFERENCES `main`.`pysql_sqlalchemy`.`tb1` (`name`, `id`, `attr`)",
|
34
|
+
[
|
35
|
+
"(`pname`, `pid`, `pattr`)",
|
36
|
+
"(`name`, `id`, `attr`)",
|
37
|
+
],
|
38
|
+
)
|
39
|
+
],
|
40
|
+
)
|
41
|
+
def test_extract_identifer_batches(input, expected):
|
42
|
+
assert (
|
43
|
+
extract_identifier_groups_from_string(input) == expected
|
44
|
+
), "Failed to extract identifier groups from string"
|
45
|
+
|
46
|
+
|
47
|
+
def test_extract_3l_namespace_from_constraint_string():
|
48
|
+
input = "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`pysql_dialect_compliance`.`users` (`user_id`)"
|
49
|
+
expected = {
|
50
|
+
"catalog": "main",
|
51
|
+
"schema": "pysql_dialect_compliance",
|
52
|
+
"table": "users",
|
53
|
+
}
|
54
|
+
|
55
|
+
assert (
|
56
|
+
extract_three_level_identifier_from_constraint_string(input) == expected
|
57
|
+
), "Failed to extract 3L namespace from constraint string"
|
58
|
+
|
59
|
+
|
60
|
+
def test_extract_3l_namespace_from_bad_constraint_string():
|
61
|
+
input = "FOREIGN KEY (`parent_user_id`) REFERENCES `pysql_dialect_compliance`.`users` (`user_id`)"
|
62
|
+
|
63
|
+
with pytest.raises(DatabricksSqlAlchemyParseException):
|
64
|
+
extract_three_level_identifier_from_constraint_string(input)
|
65
|
+
|
66
|
+
|
67
|
+
@pytest.mark.parametrize("tschema", [None, "some_schema"])
|
68
|
+
def test_build_fk_dict(tschema):
|
69
|
+
fk_constraint_string = "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`some_schema`.`users` (`user_id`)"
|
70
|
+
|
71
|
+
result = build_fk_dict("some_fk_name", fk_constraint_string, schema_name=tschema)
|
72
|
+
|
73
|
+
assert result == {
|
74
|
+
"name": "some_fk_name",
|
75
|
+
"constrained_columns": ["parent_user_id"],
|
76
|
+
"referred_schema": tschema,
|
77
|
+
"referred_table": "users",
|
78
|
+
"referred_columns": ["user_id"],
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
def test_build_pk_dict():
|
83
|
+
pk_constraint_string = "PRIMARY KEY (`id`, `name`, `email_address`)"
|
84
|
+
pk_name = "pk1"
|
85
|
+
|
86
|
+
result = build_pk_dict(pk_name, pk_constraint_string)
|
87
|
+
|
88
|
+
assert result == {
|
89
|
+
"constrained_columns": ["id", "name", "email_address"],
|
90
|
+
"name": "pk1",
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
# This is a real example of the output from DESCRIBE TABLE EXTENDED as of 15 October 2023
|
95
|
+
RAW_SAMPLE_DTE_OUTPUT = [
|
96
|
+
["id", "int"],
|
97
|
+
["name", "string"],
|
98
|
+
["", ""],
|
99
|
+
["# Detailed Table Information", ""],
|
100
|
+
["Catalog", "main"],
|
101
|
+
["Database", "pysql_sqlalchemy"],
|
102
|
+
["Table", "exampleexampleexample"],
|
103
|
+
["Created Time", "Sun Oct 15 21:12:54 UTC 2023"],
|
104
|
+
["Last Access", "UNKNOWN"],
|
105
|
+
["Created By", "Spark "],
|
106
|
+
["Type", "MANAGED"],
|
107
|
+
["Location", "s3://us-west-2-****-/19a85dee-****/tables/ccb7***"],
|
108
|
+
["Provider", "delta"],
|
109
|
+
["Comment", "some comment"],
|
110
|
+
["Owner", "some.user@example.com"],
|
111
|
+
["Is_managed_location", "true"],
|
112
|
+
["Predictive Optimization", "ENABLE (inherited from CATALOG main)"],
|
113
|
+
[
|
114
|
+
"Table Properties",
|
115
|
+
"[delta.checkpoint.writeStatsAsJson=false,delta.checkpoint.writeStatsAsStruct=true,delta.minReaderVersion=1,delta.minWriterVersion=2]",
|
116
|
+
],
|
117
|
+
["", ""],
|
118
|
+
["# Constraints", ""],
|
119
|
+
["exampleexampleexample_pk", "PRIMARY KEY (`id`)"],
|
120
|
+
[
|
121
|
+
"exampleexampleexample_fk",
|
122
|
+
"FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`pysql_dialect_compliance`.`users` (`user_id`)",
|
123
|
+
],
|
124
|
+
]
|
125
|
+
|
126
|
+
FMT_SAMPLE_DT_OUTPUT = [
|
127
|
+
{"col_name": i[0], "data_type": i[1]} for i in RAW_SAMPLE_DTE_OUTPUT
|
128
|
+
]
|
129
|
+
|
130
|
+
|
131
|
+
@pytest.mark.parametrize(
|
132
|
+
"match, output",
|
133
|
+
[
|
134
|
+
(
|
135
|
+
"PRIMARY KEY",
|
136
|
+
[
|
137
|
+
{
|
138
|
+
"col_name": "exampleexampleexample_pk",
|
139
|
+
"data_type": "PRIMARY KEY (`id`)",
|
140
|
+
}
|
141
|
+
],
|
142
|
+
),
|
143
|
+
(
|
144
|
+
"FOREIGN KEY",
|
145
|
+
[
|
146
|
+
{
|
147
|
+
"col_name": "exampleexampleexample_fk",
|
148
|
+
"data_type": "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`pysql_dialect_compliance`.`users` (`user_id`)",
|
149
|
+
}
|
150
|
+
],
|
151
|
+
),
|
152
|
+
],
|
153
|
+
)
|
154
|
+
def test_filter_dict_by_value(match, output):
|
155
|
+
result = match_dte_rows_by_value(FMT_SAMPLE_DT_OUTPUT, match)
|
156
|
+
assert result == output
|
157
|
+
|
158
|
+
|
159
|
+
def test_get_comment_from_dte_output():
|
160
|
+
assert get_comment_from_dte_output(FMT_SAMPLE_DT_OUTPUT) == "some comment"
|
@@ -0,0 +1,161 @@
|
|
1
|
+
import enum
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
import sqlalchemy
|
5
|
+
|
6
|
+
from databricks.sqlalchemy.base import DatabricksDialect
|
7
|
+
from databricks.sqlalchemy._types import TINYINT, TIMESTAMP, TIMESTAMP_NTZ
|
8
|
+
|
9
|
+
|
10
|
+
class DatabricksDataType(enum.Enum):
|
11
|
+
"""https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html"""
|
12
|
+
|
13
|
+
BIGINT = enum.auto()
|
14
|
+
BINARY = enum.auto()
|
15
|
+
BOOLEAN = enum.auto()
|
16
|
+
DATE = enum.auto()
|
17
|
+
DECIMAL = enum.auto()
|
18
|
+
DOUBLE = enum.auto()
|
19
|
+
FLOAT = enum.auto()
|
20
|
+
INT = enum.auto()
|
21
|
+
INTERVAL = enum.auto()
|
22
|
+
VOID = enum.auto()
|
23
|
+
SMALLINT = enum.auto()
|
24
|
+
STRING = enum.auto()
|
25
|
+
TIMESTAMP = enum.auto()
|
26
|
+
TIMESTAMP_NTZ = enum.auto()
|
27
|
+
TINYINT = enum.auto()
|
28
|
+
ARRAY = enum.auto()
|
29
|
+
MAP = enum.auto()
|
30
|
+
STRUCT = enum.auto()
|
31
|
+
|
32
|
+
|
33
|
+
# Defines the way that SQLAlchemy CamelCase types are compiled into Databricks SQL types.
|
34
|
+
# Note: I wish I could define this within the TestCamelCaseTypesCompilation class, but pytest doesn't like that.
|
35
|
+
camel_case_type_map = {
|
36
|
+
sqlalchemy.types.BigInteger: DatabricksDataType.BIGINT,
|
37
|
+
sqlalchemy.types.LargeBinary: DatabricksDataType.BINARY,
|
38
|
+
sqlalchemy.types.Boolean: DatabricksDataType.BOOLEAN,
|
39
|
+
sqlalchemy.types.Date: DatabricksDataType.DATE,
|
40
|
+
sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP_NTZ,
|
41
|
+
sqlalchemy.types.Double: DatabricksDataType.DOUBLE,
|
42
|
+
sqlalchemy.types.Enum: DatabricksDataType.STRING,
|
43
|
+
sqlalchemy.types.Float: DatabricksDataType.FLOAT,
|
44
|
+
sqlalchemy.types.Integer: DatabricksDataType.INT,
|
45
|
+
sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP_NTZ,
|
46
|
+
sqlalchemy.types.Numeric: DatabricksDataType.DECIMAL,
|
47
|
+
sqlalchemy.types.PickleType: DatabricksDataType.BINARY,
|
48
|
+
sqlalchemy.types.SmallInteger: DatabricksDataType.SMALLINT,
|
49
|
+
sqlalchemy.types.String: DatabricksDataType.STRING,
|
50
|
+
sqlalchemy.types.Text: DatabricksDataType.STRING,
|
51
|
+
sqlalchemy.types.Time: DatabricksDataType.STRING,
|
52
|
+
sqlalchemy.types.Unicode: DatabricksDataType.STRING,
|
53
|
+
sqlalchemy.types.UnicodeText: DatabricksDataType.STRING,
|
54
|
+
sqlalchemy.types.Uuid: DatabricksDataType.STRING,
|
55
|
+
}
|
56
|
+
|
57
|
+
|
58
|
+
def dict_as_tuple_list(d: dict):
|
59
|
+
"""Return a list of [(key, value), ...] from a dictionary."""
|
60
|
+
return [(key, value) for key, value in d.items()]
|
61
|
+
|
62
|
+
|
63
|
+
class CompilationTestBase:
|
64
|
+
dialect = DatabricksDialect()
|
65
|
+
|
66
|
+
def _assert_compiled_value(
|
67
|
+
self, type_: sqlalchemy.types.TypeEngine, expected: DatabricksDataType
|
68
|
+
):
|
69
|
+
"""Assert that when type_ is compiled for the databricks dialect, it renders the DatabricksDataType name.
|
70
|
+
|
71
|
+
This method initialises the type_ with no arguments.
|
72
|
+
"""
|
73
|
+
compiled_result = type_().compile(dialect=self.dialect) # type: ignore
|
74
|
+
assert compiled_result == expected.name
|
75
|
+
|
76
|
+
def _assert_compiled_value_explicit(
|
77
|
+
self, type_: sqlalchemy.types.TypeEngine, expected: str
|
78
|
+
):
|
79
|
+
"""Assert that when type_ is compiled for the databricks dialect, it renders the expected string.
|
80
|
+
|
81
|
+
This method expects an initialised type_ so that we can test how a TypeEngine created with arguments
|
82
|
+
is compiled.
|
83
|
+
"""
|
84
|
+
compiled_result = type_.compile(dialect=self.dialect)
|
85
|
+
assert compiled_result == expected
|
86
|
+
|
87
|
+
|
88
|
+
class TestCamelCaseTypesCompilation(CompilationTestBase):
|
89
|
+
"""Per the sqlalchemy documentation[^1] here, the camel case members of sqlalchemy.types are
|
90
|
+
are expected to work across all dialects. These tests verify that the types compile into valid
|
91
|
+
Databricks SQL type strings. For example, the sqlalchemy.types.Integer() should compile as "INT".
|
92
|
+
|
93
|
+
Truly custom types like STRUCT (notice the uppercase) are not expected to work across all dialects.
|
94
|
+
We test these separately.
|
95
|
+
|
96
|
+
Note that these tests have to do with type **name** compiliation. Which is separate from actually
|
97
|
+
mapping values between Python and Databricks.
|
98
|
+
|
99
|
+
Note: SchemaType and MatchType are not tested because it's not used in table definitions
|
100
|
+
|
101
|
+
[1]: https://docs.sqlalchemy.org/en/20/core/type_basics.html#generic-camelcase-types
|
102
|
+
"""
|
103
|
+
|
104
|
+
@pytest.mark.parametrize("type_, expected", dict_as_tuple_list(camel_case_type_map))
|
105
|
+
def test_bare_camel_case_types_compile(self, type_, expected):
|
106
|
+
self._assert_compiled_value(type_, expected)
|
107
|
+
|
108
|
+
def test_numeric_renders_as_decimal_with_precision(self):
|
109
|
+
self._assert_compiled_value_explicit(
|
110
|
+
sqlalchemy.types.Numeric(10), "DECIMAL(10)"
|
111
|
+
)
|
112
|
+
|
113
|
+
def test_numeric_renders_as_decimal_with_precision_and_scale(self):
|
114
|
+
self._assert_compiled_value_explicit(
|
115
|
+
sqlalchemy.types.Numeric(10, 2), "DECIMAL(10, 2)"
|
116
|
+
)
|
117
|
+
|
118
|
+
|
119
|
+
uppercase_type_map = {
|
120
|
+
sqlalchemy.types.ARRAY: DatabricksDataType.ARRAY,
|
121
|
+
sqlalchemy.types.BIGINT: DatabricksDataType.BIGINT,
|
122
|
+
sqlalchemy.types.BINARY: DatabricksDataType.BINARY,
|
123
|
+
sqlalchemy.types.BOOLEAN: DatabricksDataType.BOOLEAN,
|
124
|
+
sqlalchemy.types.DATE: DatabricksDataType.DATE,
|
125
|
+
sqlalchemy.types.DECIMAL: DatabricksDataType.DECIMAL,
|
126
|
+
sqlalchemy.types.DOUBLE: DatabricksDataType.DOUBLE,
|
127
|
+
sqlalchemy.types.FLOAT: DatabricksDataType.FLOAT,
|
128
|
+
sqlalchemy.types.INT: DatabricksDataType.INT,
|
129
|
+
sqlalchemy.types.SMALLINT: DatabricksDataType.SMALLINT,
|
130
|
+
sqlalchemy.types.TIMESTAMP: DatabricksDataType.TIMESTAMP,
|
131
|
+
TINYINT: DatabricksDataType.TINYINT,
|
132
|
+
TIMESTAMP: DatabricksDataType.TIMESTAMP,
|
133
|
+
TIMESTAMP_NTZ: DatabricksDataType.TIMESTAMP_NTZ,
|
134
|
+
}
|
135
|
+
|
136
|
+
|
137
|
+
class TestUppercaseTypesCompilation(CompilationTestBase):
|
138
|
+
"""Per the sqlalchemy documentation[^1], uppercase types are considered to be specific to some
|
139
|
+
database backends. These tests verify that the types compile into valid Databricks SQL type strings.
|
140
|
+
|
141
|
+
[1]: https://docs.sqlalchemy.org/en/20/core/type_basics.html#backend-specific-uppercase-datatypes
|
142
|
+
"""
|
143
|
+
|
144
|
+
@pytest.mark.parametrize("type_, expected", dict_as_tuple_list(uppercase_type_map))
|
145
|
+
def test_bare_uppercase_types_compile(self, type_, expected):
|
146
|
+
if isinstance(type_, type(sqlalchemy.types.ARRAY)):
|
147
|
+
# ARRAY cannot be initialised without passing an item definition so we test separately
|
148
|
+
# I preserve it in the uppercase_type_map for clarity
|
149
|
+
assert True
|
150
|
+
else:
|
151
|
+
self._assert_compiled_value(type_, expected)
|
152
|
+
|
153
|
+
def test_array_string_renders_as_array_of_string(self):
|
154
|
+
"""SQLAlchemy's ARRAY type requires an item definition. And their docs indicate that they've only tested
|
155
|
+
it with Postgres since that's the only first-class dialect with support for ARRAY.
|
156
|
+
|
157
|
+
https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY
|
158
|
+
"""
|
159
|
+
self._assert_compiled_value_explicit(
|
160
|
+
sqlalchemy.types.ARRAY(sqlalchemy.types.String), "ARRAY<STRING>"
|
161
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: databricks-sqlalchemy
|
3
|
-
Version:
|
3
|
+
Version: 2.0.0
|
4
4
|
Summary: Databricks SQLAlchemy plugin for Python
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: Databricks
|
@@ -14,14 +14,17 @@ Classifier: Programming Language :: Python :: 3.10
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
16
16
|
Requires-Dist: databricks_sql_connector_core (>=4.0.0)
|
17
|
-
Requires-Dist: sqlalchemy (>=
|
17
|
+
Requires-Dist: sqlalchemy (>=2.0.21)
|
18
18
|
Project-URL: Bug Tracker, https://github.com/databricks/databricks-sqlalchemy/issues
|
19
19
|
Project-URL: Homepage, https://github.com/databricks/databricks-sqlalchemy
|
20
20
|
Description-Content-Type: text/markdown
|
21
21
|
|
22
|
-
## Databricks dialect for SQLALchemy
|
22
|
+
## Databricks dialect for SQLALchemy 2.0
|
23
23
|
|
24
|
-
The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `
|
24
|
+
The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `examples/sqlalchemy.py`.
|
25
|
+
|
26
|
+
## Usage with SQLAlchemy <= 2.0
|
27
|
+
A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4](https://github.com/databricks/databricks-sql-python/releases/tag/v2.4.0). Support for SQLAlchemy 1.4 was dropped from the dialect as part of `databricks-sql-connector==3.0.0`. To continue using the dialect with SQLAlchemy 1.x, you can use `databricks-sql-connector^2.4.0`.
|
25
28
|
|
26
29
|
|
27
30
|
## Installation
|
@@ -29,7 +32,7 @@ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](http
|
|
29
32
|
To install the dialect and its dependencies:
|
30
33
|
|
31
34
|
```shell
|
32
|
-
pip install databricks-sqlalchemy
|
35
|
+
pip install databricks-sqlalchemy
|
33
36
|
```
|
34
37
|
|
35
38
|
If you also plan to use `alembic` you can alternatively run:
|
@@ -62,45 +65,41 @@ access_token = os.getenv("DATABRICKS_TOKEN")
|
|
62
65
|
catalog = os.getenv("DATABRICKS_CATALOG")
|
63
66
|
schema = os.getenv("DATABRICKS_SCHEMA")
|
64
67
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
conn_string = f"databricks://token:{access_token}@{host}"
|
70
|
-
connect_args = dict(catalog=catalog, schema=schema, http_path=http_path)
|
71
|
-
all_connect_args = {**extra_connect_args, **connect_args}
|
72
|
-
engine = create_engine(conn_string, connect_args=all_connect_args)
|
73
|
-
else:
|
74
|
-
engine = create_engine(
|
75
|
-
f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}",
|
76
|
-
connect_args=extra_connect_args,
|
77
|
-
)
|
78
|
-
|
68
|
+
engine = create_engine(
|
69
|
+
f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}"
|
70
|
+
)
|
79
71
|
```
|
80
72
|
|
81
73
|
## Types
|
82
74
|
|
83
|
-
The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/
|
75
|
+
The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/20/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/20/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
|
84
76
|
|
85
77
|
|SQLAlchemy Type|Databricks SQL Type|
|
86
78
|
|-|-|
|
87
|
-
[`BigInteger`](https://docs.sqlalchemy.org/en/
|
88
|
-
[`LargeBinary`](https://docs.sqlalchemy.org/en/
|
89
|
-
[`Boolean`](https://docs.sqlalchemy.org/en/
|
90
|
-
[`Date`](https://docs.sqlalchemy.org/en/
|
91
|
-
[`DateTime`](https://docs.sqlalchemy.org/en/
|
92
|
-
[`
|
93
|
-
[`
|
94
|
-
[`
|
95
|
-
[`
|
96
|
-
[`
|
97
|
-
[`
|
98
|
-
[`
|
99
|
-
[`
|
100
|
-
[`
|
101
|
-
[`
|
102
|
-
[`
|
103
|
-
[`
|
79
|
+
[`BigInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
|
80
|
+
[`LargeBinary`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
|
81
|
+
[`Boolean`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
|
82
|
+
[`Date`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
|
83
|
+
[`DateTime`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
|
84
|
+
[`Double`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Double)| [`DOUBLE`](https://docs.databricks.com/en/sql/language-manual/data-types/double-type.html)
|
85
|
+
[`Enum`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
|
86
|
+
[`Float`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
|
87
|
+
[`Integer`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
|
88
|
+
[`Numeric`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
|
89
|
+
[`PickleType`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
|
90
|
+
[`SmallInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
|
91
|
+
[`String`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
92
|
+
[`Text`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
93
|
+
[`Time`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
94
|
+
[`Unicode`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
95
|
+
[`UnicodeText`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
96
|
+
[`Uuid`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
|
97
|
+
|
98
|
+
In addition, the dialect exposes three UPPERCASE SQLAlchemy types which are specific to Databricks:
|
99
|
+
|
100
|
+
- [`databricks.sqlalchemy.TINYINT`](https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html)
|
101
|
+
- [`databricks.sqlalchemy.TIMESTAMP`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html)
|
102
|
+
- [`databricks.sqlalchemy.TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)
|
104
103
|
|
105
104
|
|
106
105
|
### `LargeBinary()` and `PickleType()`
|
@@ -113,6 +112,24 @@ Support for `CHECK` constraints is not implemented in this dialect. Support is p
|
|
113
112
|
|
114
113
|
SQLAlchemy's `Enum()` type depends on `CHECK` constraints and is therefore not yet supported.
|
115
114
|
|
115
|
+
### `DateTime()`, `TIMESTAMP_NTZ()`, and `TIMESTAMP()`
|
116
|
+
|
117
|
+
Databricks Runtime provides two datetime-like types: `TIMESTAMP` which is always timezone-aware and `TIMESTAMP_NTZ` which is timezone agnostic. Both types can be imported from `databricks.sqlalchemy` and used in your models.
|
118
|
+
|
119
|
+
The SQLAlchemy documentation indicates that `DateTime()` is not timezone-aware by default. So our dialect maps this type to `TIMESTAMP_NTZ()`. In practice, you should never need to use `TIMESTAMP_NTZ()` directly. Just use `DateTime()`.
|
120
|
+
|
121
|
+
If you need your field to be timezone-aware, you can import `TIMESTAMP()` and use it instead.
|
122
|
+
|
123
|
+
_Note that SQLAlchemy documentation suggests that you can declare a `DateTime()` with `timezone=True` on supported backends. However, if you do this with the Databricks dialect, the `timezone` argument will be ignored._
|
124
|
+
|
125
|
+
```python
|
126
|
+
from sqlalchemy import DateTime
|
127
|
+
from databricks.sqlalchemy import TIMESTAMP
|
128
|
+
|
129
|
+
class SomeModel(Base):
|
130
|
+
some_date_without_timezone = DateTime()
|
131
|
+
some_date_with_timezone = TIMESTAMP()
|
132
|
+
```
|
116
133
|
|
117
134
|
### `String()`, `Text()`, `Unicode()`, and `UnicodeText()`
|
118
135
|
|
@@ -137,7 +154,7 @@ class SomeModel(Base):
|
|
137
154
|
|
138
155
|
Identity and generated value support is currently limited in this dialect.
|
139
156
|
|
140
|
-
When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/
|
157
|
+
When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/20/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/20/core/defaults.html#identity-ddl) instead.
|
141
158
|
|
142
159
|
Furthermore, in Databricks Runtime, only `BIGINT` fields can be configured to auto-increment. So in SQLAlchemy, you must use the `BigInteger()` type.
|
143
160
|
|
@@ -151,6 +168,10 @@ class SomeModel(Base):
|
|
151
168
|
|
152
169
|
When calling `Base.metadata.create_all()`, the executed DDL will include `GENERATED ALWAYS AS IDENTITY` for the `id` column. This is useful when using SQLAlchemy to generate tables. However, as of this writing, `Identity()` constructs are not captured when SQLAlchemy reflects a table's metadata (support for this is planned).
|
153
170
|
|
171
|
+
## Parameters
|
172
|
+
|
173
|
+
`databricks-sql-connector` supports two approaches to parameterizing SQL queries: native and inline. Our SQLAlchemy 2.0 dialect always uses the native approach and is therefore limited to DBR 14.2 and above. If you are writing parameterized queries to be executed by SQLAlchemy, you must use the "named" paramstyle (`:param`). Read more about parameterization in `docs/parameters.md`.
|
174
|
+
|
154
175
|
## Usage with pandas
|
155
176
|
|
156
177
|
Use [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html) and [`pandas.read_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql) to write and read from Databricks SQL. These methods both accept a SQLAlchemy connection to interact with Databricks.
|
@@ -181,7 +202,7 @@ with engine.connect() as conn:
|
|
181
202
|
df.to_sql('squares',conn)
|
182
203
|
```
|
183
204
|
|
184
|
-
## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/
|
205
|
+
## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#defining-foreign-keys)
|
185
206
|
|
186
207
|
Unity Catalog workspaces in Databricks support PRIMARY KEY and FOREIGN KEY constraints. _Note that Databricks Runtime does not enforce the integrity of FOREIGN KEY constraints_. You can establish a primary key by setting `primary_key=True` when defining a column.
|
187
208
|
|
@@ -0,0 +1,31 @@
|
|
1
|
+
CHANGELOG.md,sha256=3y1Bi07K7cXfMbmEN4Pfpbpz_G7irzxBQrGbqzGwC5c,226
|
2
|
+
databricks/sqlalchemy/__init__.py,sha256=Gk3XC5OCzq7LuxMVpxK3t4q0rkflXJ8uJRJh9uusMqc,185
|
3
|
+
databricks/sqlalchemy/_ddl.py,sha256=c0_GwfmnrFVr4-Ls14fmdGUUFyUok_GW4Uo45hLABFc,3983
|
4
|
+
databricks/sqlalchemy/_parse.py,sha256=C0Q0_87PknCibRjs3ewPL5dimwQqaW_vr4nMxMsS220,13048
|
5
|
+
databricks/sqlalchemy/_types.py,sha256=EqC_TWWY7mDw9EM2AVZnPrw5DD6G-vBV7wiwX4tcBcM,11753
|
6
|
+
databricks/sqlalchemy/base.py,sha256=KcjfHMH0NsceYE2NRxrePtf5T1uw9u8JHofRdbnAKS4,15619
|
7
|
+
databricks/sqlalchemy/dependency_test/test_dependency.py,sha256=oFv2oW0e0ScpiKbmXHwpIuYf7mWpj4BiVShiLvw2b2k,938
|
8
|
+
databricks/sqlalchemy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
databricks/sqlalchemy/pytest.ini,sha256=ImutflUjkhByVNWCQ18Todj6XTvgJAQX_v7fD-gWHhU,106
|
10
|
+
databricks/sqlalchemy/requirements.py,sha256=OobunAEwZ9y2dvSQLOmdgJciVn9xGlY9NAFfszPCTU0,9018
|
11
|
+
databricks/sqlalchemy/setup.cfg,sha256=ImutflUjkhByVNWCQ18Todj6XTvgJAQX_v7fD-gWHhU,106
|
12
|
+
databricks/sqlalchemy/test/_extra.py,sha256=ZMbqkdw9_sTRrcmuOssZoaZjNsaM-L1Z8tlumOoipMg,1955
|
13
|
+
databricks/sqlalchemy/test/_future.py,sha256=7ZKdl2-hPVgkNUtq-mVS1DWsI5Y8N4fEnwxXfFnTqCU,12658
|
14
|
+
databricks/sqlalchemy/test/_regression.py,sha256=MI6Jlmnw-DYmyY-mHfrscNQ8l3UEDaPXC7J3R2uKI9o,5412
|
15
|
+
databricks/sqlalchemy/test/_unsupported.py,sha256=ORi3FvzjGDx3KK62KysJFaEI4zfAw3JdbpVbT5oCCYM,16061
|
16
|
+
databricks/sqlalchemy/test/conftest.py,sha256=wauk1PYW_epp5-CKA2HbcTk_Ke3i8XpCnHB7UJLIRoE,597
|
17
|
+
databricks/sqlalchemy/test/overrides/_componentreflectiontest.py,sha256=OAaFx_l3sHuUn322NuyzpBq1SquvHCyXIvk5NxDXNv8,7042
|
18
|
+
databricks/sqlalchemy/test/overrides/_ctetest.py,sha256=u4jSIMrZY2dCSvBRhk9RsiObx1GB3RoFuLRByC212VU,1026
|
19
|
+
databricks/sqlalchemy/test/test_suite.py,sha256=kQfqmoXROaMNi6RebaPKS6MFabzSU5Rz-YPo84CImIQ,492
|
20
|
+
databricks/sqlalchemy/test_local/__init__.py,sha256=gphvzZ0Cb4Kz7rPRHHULanKyyjKgFt7zmGGYvcuGxys,131
|
21
|
+
databricks/sqlalchemy/test_local/conftest.py,sha256=b6LThokKLJrCfe7207A6NvF2MYnGOmajwtVILCWj1qY,951
|
22
|
+
databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx,sha256=9zqXUDGzgS2yjPz8x0uFsJU6kQTqdVRKKfJrEBHTZuY,59837
|
23
|
+
databricks/sqlalchemy/test_local/e2e/test_basic.py,sha256=wLP28vz2H9wz0dS52_iXbRwu0Zoh0wTEN9MOj2xJiOQ,16749
|
24
|
+
databricks/sqlalchemy/test_local/test_ddl.py,sha256=L5V1NoW9dT-7BHcaB97FQOw9ZFvo0g2_FIPKqOzlECM,3198
|
25
|
+
databricks/sqlalchemy/test_local/test_parsing.py,sha256=pSTAnWyA44vDTEZ-_HnfwEr3QbA2Kmzn1yU5q1GqMts,5017
|
26
|
+
databricks/sqlalchemy/test_local/test_types.py,sha256=Uey-z4ypzD5ykClBQs7XNW9KArHPbZU2cAk3EYD9jS0,6749
|
27
|
+
databricks_sqlalchemy-2.0.0.dist-info/LICENSE,sha256=WgVm2VpfZ3CsUfPndD2NeCrEIcFA4UB-YnnW4ejxcbE,11346
|
28
|
+
databricks_sqlalchemy-2.0.0.dist-info/METADATA,sha256=qjj_K1tWMMn7qGvzae8IhbabCSNWeWa0TTyq919wjVc,13073
|
29
|
+
databricks_sqlalchemy-2.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
30
|
+
databricks_sqlalchemy-2.0.0.dist-info/entry_points.txt,sha256=AAjpsvZbVcoMAcWLIesoAT5FNZhBEcIhxdKknVua3jw,74
|
31
|
+
databricks_sqlalchemy-2.0.0.dist-info/RECORD,,
|