databricks-sqlalchemy 1.0.1__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- databricks_sqlalchemy-2.0.0/CHANGELOG.md +5 -0
- {databricks_sqlalchemy-1.0.1 → databricks_sqlalchemy-2.0.0}/PKG-INFO +60 -39
- {databricks_sqlalchemy-1.0.1 → databricks_sqlalchemy-2.0.0}/README.md +58 -37
- {databricks_sqlalchemy-1.0.1 → databricks_sqlalchemy-2.0.0}/pyproject.toml +10 -3
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/__init__.py +4 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/_ddl.py +100 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/_parse.py +385 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/_types.py +323 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/base.py +436 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/py.typed +0 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/pytest.ini +4 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/requirements.py +249 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/setup.cfg +4 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/_extra.py +70 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/_future.py +331 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/_regression.py +311 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/_unsupported.py +450 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/conftest.py +13 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/test_suite.py +13 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/__init__.py +5 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/conftest.py +44 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/test_ddl.py +96 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/test_parsing.py +160 -0
- databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/test_types.py +161 -0
- databricks_sqlalchemy-1.0.1/CHANGELOG.md +0 -5
- databricks_sqlalchemy-1.0.1/src/databricks/sqlalchemy/__init__.py +0 -1
- databricks_sqlalchemy-1.0.1/src/databricks/sqlalchemy/dialect/__init__.py +0 -340
- databricks_sqlalchemy-1.0.1/src/databricks/sqlalchemy/dialect/base.py +0 -17
- databricks_sqlalchemy-1.0.1/src/databricks/sqlalchemy/dialect/compiler.py +0 -38
- {databricks_sqlalchemy-1.0.1 → databricks_sqlalchemy-2.0.0}/LICENSE +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: databricks-sqlalchemy
|
3
|
-
Version:
|
3
|
+
Version: 2.0.0
|
4
4
|
Summary: Databricks SQLAlchemy plugin for Python
|
5
5
|
License: Apache-2.0
|
6
6
|
Author: Databricks
|
@@ -14,14 +14,17 @@ Classifier: Programming Language :: Python :: 3.10
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
16
16
|
Requires-Dist: databricks_sql_connector_core (>=4.0.0)
|
17
|
-
Requires-Dist: sqlalchemy (>=
|
17
|
+
Requires-Dist: sqlalchemy (>=2.0.21)
|
18
18
|
Project-URL: Bug Tracker, https://github.com/databricks/databricks-sqlalchemy/issues
|
19
19
|
Project-URL: Homepage, https://github.com/databricks/databricks-sqlalchemy
|
20
20
|
Description-Content-Type: text/markdown
|
21
21
|
|
22
|
-
## Databricks dialect for SQLALchemy
|
22
|
+
## Databricks dialect for SQLALchemy 2.0
|
23
23
|
|
24
|
-
The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `
|
24
|
+
The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `examples/sqlalchemy.py`.
|
25
|
+
|
26
|
+
## Usage with SQLAlchemy <= 2.0
|
27
|
+
A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4](https://github.com/databricks/databricks-sql-python/releases/tag/v2.4.0). Support for SQLAlchemy 1.4 was dropped from the dialect as part of `databricks-sql-connector==3.0.0`. To continue using the dialect with SQLAlchemy 1.x, you can use `databricks-sql-connector^2.4.0`.
|
25
28
|
|
26
29
|
|
27
30
|
## Installation
|
@@ -29,7 +32,7 @@ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](http
|
|
29
32
|
To install the dialect and its dependencies:
|
30
33
|
|
31
34
|
```shell
|
32
|
-
pip install databricks-sqlalchemy
|
35
|
+
pip install databricks-sqlalchemy
|
33
36
|
```
|
34
37
|
|
35
38
|
If you also plan to use `alembic` you can alternatively run:
|
@@ -62,45 +65,41 @@ access_token = os.getenv("DATABRICKS_TOKEN")
|
|
62
65
|
catalog = os.getenv("DATABRICKS_CATALOG")
|
63
66
|
schema = os.getenv("DATABRICKS_SCHEMA")
|
64
67
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
conn_string = f"databricks://token:{access_token}@{host}"
|
70
|
-
connect_args = dict(catalog=catalog, schema=schema, http_path=http_path)
|
71
|
-
all_connect_args = {**extra_connect_args, **connect_args}
|
72
|
-
engine = create_engine(conn_string, connect_args=all_connect_args)
|
73
|
-
else:
|
74
|
-
engine = create_engine(
|
75
|
-
f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}",
|
76
|
-
connect_args=extra_connect_args,
|
77
|
-
)
|
78
|
-
|
68
|
+
engine = create_engine(
|
69
|
+
f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}"
|
70
|
+
)
|
79
71
|
```
|
80
72
|
|
81
73
|
## Types
|
82
74
|
|
83
|
-
The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/
|
75
|
+
The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/20/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/20/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
|
84
76
|
|
85
77
|
|SQLAlchemy Type|Databricks SQL Type|
|
86
78
|
|-|-|
|
87
|
-
[`BigInteger`](https://docs.sqlalchemy.org/en/
|
88
|
-
[`LargeBinary`](https://docs.sqlalchemy.org/en/
|
89
|
-
[`Boolean`](https://docs.sqlalchemy.org/en/
|
90
|
-
[`Date`](https://docs.sqlalchemy.org/en/
|
91
|
-
[`DateTime`](https://docs.sqlalchemy.org/en/
|
92
|
-
[`
|
93
|
-
[`
|
94
|
-
[`
|
95
|
-
[`
|
96
|
-
[`
|
97
|
-
[`
|
98
|
-
[`
|
99
|
-
[`
|
100
|
-
[`
|
101
|
-
[`
|
102
|
-
[`
|
103
|
-
[`
|
79
|
+
[`BigInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
|
80
|
+
[`LargeBinary`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
|
81
|
+
[`Boolean`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
|
82
|
+
[`Date`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
|
83
|
+
[`DateTime`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
|
84
|
+
[`Double`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Double)| [`DOUBLE`](https://docs.databricks.com/en/sql/language-manual/data-types/double-type.html)
|
85
|
+
[`Enum`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
|
86
|
+
[`Float`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
|
87
|
+
[`Integer`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
|
88
|
+
[`Numeric`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
|
89
|
+
[`PickleType`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
|
90
|
+
[`SmallInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
|
91
|
+
[`String`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
92
|
+
[`Text`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
93
|
+
[`Time`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
94
|
+
[`Unicode`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
95
|
+
[`UnicodeText`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
96
|
+
[`Uuid`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
|
97
|
+
|
98
|
+
In addition, the dialect exposes three UPPERCASE SQLAlchemy types which are specific to Databricks:
|
99
|
+
|
100
|
+
- [`databricks.sqlalchemy.TINYINT`](https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html)
|
101
|
+
- [`databricks.sqlalchemy.TIMESTAMP`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html)
|
102
|
+
- [`databricks.sqlalchemy.TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)
|
104
103
|
|
105
104
|
|
106
105
|
### `LargeBinary()` and `PickleType()`
|
@@ -113,6 +112,24 @@ Support for `CHECK` constraints is not implemented in this dialect. Support is p
|
|
113
112
|
|
114
113
|
SQLAlchemy's `Enum()` type depends on `CHECK` constraints and is therefore not yet supported.
|
115
114
|
|
115
|
+
### `DateTime()`, `TIMESTAMP_NTZ()`, and `TIMESTAMP()`
|
116
|
+
|
117
|
+
Databricks Runtime provides two datetime-like types: `TIMESTAMP` which is always timezone-aware and `TIMESTAMP_NTZ` which is timezone agnostic. Both types can be imported from `databricks.sqlalchemy` and used in your models.
|
118
|
+
|
119
|
+
The SQLAlchemy documentation indicates that `DateTime()` is not timezone-aware by default. So our dialect maps this type to `TIMESTAMP_NTZ()`. In practice, you should never need to use `TIMESTAMP_NTZ()` directly. Just use `DateTime()`.
|
120
|
+
|
121
|
+
If you need your field to be timezone-aware, you can import `TIMESTAMP()` and use it instead.
|
122
|
+
|
123
|
+
_Note that SQLAlchemy documentation suggests that you can declare a `DateTime()` with `timezone=True` on supported backends. However, if you do this with the Databricks dialect, the `timezone` argument will be ignored._
|
124
|
+
|
125
|
+
```python
|
126
|
+
from sqlalchemy import DateTime
|
127
|
+
from databricks.sqlalchemy import TIMESTAMP
|
128
|
+
|
129
|
+
class SomeModel(Base):
|
130
|
+
some_date_without_timezone = DateTime()
|
131
|
+
some_date_with_timezone = TIMESTAMP()
|
132
|
+
```
|
116
133
|
|
117
134
|
### `String()`, `Text()`, `Unicode()`, and `UnicodeText()`
|
118
135
|
|
@@ -137,7 +154,7 @@ class SomeModel(Base):
|
|
137
154
|
|
138
155
|
Identity and generated value support is currently limited in this dialect.
|
139
156
|
|
140
|
-
When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/
|
157
|
+
When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/20/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/20/core/defaults.html#identity-ddl) instead.
|
141
158
|
|
142
159
|
Furthermore, in Databricks Runtime, only `BIGINT` fields can be configured to auto-increment. So in SQLAlchemy, you must use the `BigInteger()` type.
|
143
160
|
|
@@ -151,6 +168,10 @@ class SomeModel(Base):
|
|
151
168
|
|
152
169
|
When calling `Base.metadata.create_all()`, the executed DDL will include `GENERATED ALWAYS AS IDENTITY` for the `id` column. This is useful when using SQLAlchemy to generate tables. However, as of this writing, `Identity()` constructs are not captured when SQLAlchemy reflects a table's metadata (support for this is planned).
|
153
170
|
|
171
|
+
## Parameters
|
172
|
+
|
173
|
+
`databricks-sql-connector` supports two approaches to parameterizing SQL queries: native and inline. Our SQLAlchemy 2.0 dialect always uses the native approach and is therefore limited to DBR 14.2 and above. If you are writing parameterized queries to be executed by SQLAlchemy, you must use the "named" paramstyle (`:param`). Read more about parameterization in `docs/parameters.md`.
|
174
|
+
|
154
175
|
## Usage with pandas
|
155
176
|
|
156
177
|
Use [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html) and [`pandas.read_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql) to write and read from Databricks SQL. These methods both accept a SQLAlchemy connection to interact with Databricks.
|
@@ -181,7 +202,7 @@ with engine.connect() as conn:
|
|
181
202
|
df.to_sql('squares',conn)
|
182
203
|
```
|
183
204
|
|
184
|
-
## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/
|
205
|
+
## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#defining-foreign-keys)
|
185
206
|
|
186
207
|
Unity Catalog workspaces in Databricks support PRIMARY KEY and FOREIGN KEY constraints. _Note that Databricks Runtime does not enforce the integrity of FOREIGN KEY constraints_. You can establish a primary key by setting `primary_key=True` when defining a column.
|
187
208
|
|
@@ -1,6 +1,9 @@
|
|
1
|
-
## Databricks dialect for SQLALchemy
|
1
|
+
## Databricks dialect for SQLALchemy 2.0
|
2
2
|
|
3
|
-
The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `
|
3
|
+
The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `examples/sqlalchemy.py`.
|
4
|
+
|
5
|
+
## Usage with SQLAlchemy <= 2.0
|
6
|
+
A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4](https://github.com/databricks/databricks-sql-python/releases/tag/v2.4.0). Support for SQLAlchemy 1.4 was dropped from the dialect as part of `databricks-sql-connector==3.0.0`. To continue using the dialect with SQLAlchemy 1.x, you can use `databricks-sql-connector^2.4.0`.
|
4
7
|
|
5
8
|
|
6
9
|
## Installation
|
@@ -8,7 +11,7 @@ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](http
|
|
8
11
|
To install the dialect and its dependencies:
|
9
12
|
|
10
13
|
```shell
|
11
|
-
pip install databricks-sqlalchemy
|
14
|
+
pip install databricks-sqlalchemy
|
12
15
|
```
|
13
16
|
|
14
17
|
If you also plan to use `alembic` you can alternatively run:
|
@@ -41,45 +44,41 @@ access_token = os.getenv("DATABRICKS_TOKEN")
|
|
41
44
|
catalog = os.getenv("DATABRICKS_CATALOG")
|
42
45
|
schema = os.getenv("DATABRICKS_SCHEMA")
|
43
46
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
conn_string = f"databricks://token:{access_token}@{host}"
|
49
|
-
connect_args = dict(catalog=catalog, schema=schema, http_path=http_path)
|
50
|
-
all_connect_args = {**extra_connect_args, **connect_args}
|
51
|
-
engine = create_engine(conn_string, connect_args=all_connect_args)
|
52
|
-
else:
|
53
|
-
engine = create_engine(
|
54
|
-
f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}",
|
55
|
-
connect_args=extra_connect_args,
|
56
|
-
)
|
57
|
-
|
47
|
+
engine = create_engine(
|
48
|
+
f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}"
|
49
|
+
)
|
58
50
|
```
|
59
51
|
|
60
52
|
## Types
|
61
53
|
|
62
|
-
The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/
|
54
|
+
The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/20/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/20/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
|
63
55
|
|
64
56
|
|SQLAlchemy Type|Databricks SQL Type|
|
65
57
|
|-|-|
|
66
|
-
[`BigInteger`](https://docs.sqlalchemy.org/en/
|
67
|
-
[`LargeBinary`](https://docs.sqlalchemy.org/en/
|
68
|
-
[`Boolean`](https://docs.sqlalchemy.org/en/
|
69
|
-
[`Date`](https://docs.sqlalchemy.org/en/
|
70
|
-
[`DateTime`](https://docs.sqlalchemy.org/en/
|
71
|
-
[`
|
72
|
-
[`
|
73
|
-
[`
|
74
|
-
[`
|
75
|
-
[`
|
76
|
-
[`
|
77
|
-
[`
|
78
|
-
[`
|
79
|
-
[`
|
80
|
-
[`
|
81
|
-
[`
|
82
|
-
[`
|
58
|
+
[`BigInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
|
59
|
+
[`LargeBinary`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
|
60
|
+
[`Boolean`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
|
61
|
+
[`Date`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
|
62
|
+
[`DateTime`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
|
63
|
+
[`Double`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Double)| [`DOUBLE`](https://docs.databricks.com/en/sql/language-manual/data-types/double-type.html)
|
64
|
+
[`Enum`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
|
65
|
+
[`Float`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
|
66
|
+
[`Integer`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
|
67
|
+
[`Numeric`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
|
68
|
+
[`PickleType`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
|
69
|
+
[`SmallInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
|
70
|
+
[`String`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
71
|
+
[`Text`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
72
|
+
[`Time`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
73
|
+
[`Unicode`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
74
|
+
[`UnicodeText`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
|
75
|
+
[`Uuid`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
|
76
|
+
|
77
|
+
In addition, the dialect exposes three UPPERCASE SQLAlchemy types which are specific to Databricks:
|
78
|
+
|
79
|
+
- [`databricks.sqlalchemy.TINYINT`](https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html)
|
80
|
+
- [`databricks.sqlalchemy.TIMESTAMP`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html)
|
81
|
+
- [`databricks.sqlalchemy.TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)
|
83
82
|
|
84
83
|
|
85
84
|
### `LargeBinary()` and `PickleType()`
|
@@ -92,6 +91,24 @@ Support for `CHECK` constraints is not implemented in this dialect. Support is p
|
|
92
91
|
|
93
92
|
SQLAlchemy's `Enum()` type depends on `CHECK` constraints and is therefore not yet supported.
|
94
93
|
|
94
|
+
### `DateTime()`, `TIMESTAMP_NTZ()`, and `TIMESTAMP()`
|
95
|
+
|
96
|
+
Databricks Runtime provides two datetime-like types: `TIMESTAMP` which is always timezone-aware and `TIMESTAMP_NTZ` which is timezone agnostic. Both types can be imported from `databricks.sqlalchemy` and used in your models.
|
97
|
+
|
98
|
+
The SQLAlchemy documentation indicates that `DateTime()` is not timezone-aware by default. So our dialect maps this type to `TIMESTAMP_NTZ()`. In practice, you should never need to use `TIMESTAMP_NTZ()` directly. Just use `DateTime()`.
|
99
|
+
|
100
|
+
If you need your field to be timezone-aware, you can import `TIMESTAMP()` and use it instead.
|
101
|
+
|
102
|
+
_Note that SQLAlchemy documentation suggests that you can declare a `DateTime()` with `timezone=True` on supported backends. However, if you do this with the Databricks dialect, the `timezone` argument will be ignored._
|
103
|
+
|
104
|
+
```python
|
105
|
+
from sqlalchemy import DateTime
|
106
|
+
from databricks.sqlalchemy import TIMESTAMP
|
107
|
+
|
108
|
+
class SomeModel(Base):
|
109
|
+
some_date_without_timezone = DateTime()
|
110
|
+
some_date_with_timezone = TIMESTAMP()
|
111
|
+
```
|
95
112
|
|
96
113
|
### `String()`, `Text()`, `Unicode()`, and `UnicodeText()`
|
97
114
|
|
@@ -116,7 +133,7 @@ class SomeModel(Base):
|
|
116
133
|
|
117
134
|
Identity and generated value support is currently limited in this dialect.
|
118
135
|
|
119
|
-
When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/
|
136
|
+
When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/20/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/20/core/defaults.html#identity-ddl) instead.
|
120
137
|
|
121
138
|
Furthermore, in Databricks Runtime, only `BIGINT` fields can be configured to auto-increment. So in SQLAlchemy, you must use the `BigInteger()` type.
|
122
139
|
|
@@ -130,6 +147,10 @@ class SomeModel(Base):
|
|
130
147
|
|
131
148
|
When calling `Base.metadata.create_all()`, the executed DDL will include `GENERATED ALWAYS AS IDENTITY` for the `id` column. This is useful when using SQLAlchemy to generate tables. However, as of this writing, `Identity()` constructs are not captured when SQLAlchemy reflects a table's metadata (support for this is planned).
|
132
149
|
|
150
|
+
## Parameters
|
151
|
+
|
152
|
+
`databricks-sql-connector` supports two approaches to parameterizing SQL queries: native and inline. Our SQLAlchemy 2.0 dialect always uses the native approach and is therefore limited to DBR 14.2 and above. If you are writing parameterized queries to be executed by SQLAlchemy, you must use the "named" paramstyle (`:param`). Read more about parameterization in `docs/parameters.md`.
|
153
|
+
|
133
154
|
## Usage with pandas
|
134
155
|
|
135
156
|
Use [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html) and [`pandas.read_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql) to write and read from Databricks SQL. These methods both accept a SQLAlchemy connection to interact with Databricks.
|
@@ -160,7 +181,7 @@ with engine.connect() as conn:
|
|
160
181
|
df.to_sql('squares',conn)
|
161
182
|
```
|
162
183
|
|
163
|
-
## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/
|
184
|
+
## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#defining-foreign-keys)
|
164
185
|
|
165
186
|
Unity Catalog workspaces in Databricks support PRIMARY KEY and FOREIGN KEY constraints. _Note that Databricks Runtime does not enforce the integrity of FOREIGN KEY constraints_. You can establish a primary key by setting `primary_key=True` when defining a column.
|
166
187
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "databricks-sqlalchemy"
|
3
|
-
version = "
|
3
|
+
version = "2.0.0"
|
4
4
|
description = "Databricks SQLAlchemy plugin for Python"
|
5
5
|
authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
|
6
6
|
license = "Apache-2.0"
|
@@ -11,7 +11,7 @@ include = ["CHANGELOG.md"]
|
|
11
11
|
[tool.poetry.dependencies]
|
12
12
|
python = "^3.8.0"
|
13
13
|
databricks_sql_connector_core = { version = ">=4.0.0"}
|
14
|
-
sqlalchemy = { version = "
|
14
|
+
sqlalchemy = { version = ">=2.0.21" }
|
15
15
|
|
16
16
|
[tool.poetry.dev-dependencies]
|
17
17
|
pytest = "^7.1.2"
|
@@ -33,4 +33,11 @@ build-backend = "poetry.core.masonry.api"
|
|
33
33
|
|
34
34
|
[tool.black]
|
35
35
|
exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
|
36
|
-
|
36
|
+
#
|
37
|
+
[tool.pytest.ini_options]
|
38
|
+
markers = {"reviewed" = "Test case has been reviewed by Databricks"}
|
39
|
+
minversion = "6.0"
|
40
|
+
log_cli = "false"
|
41
|
+
log_cli_level = "INFO"
|
42
|
+
testpaths = ["tests", "src/databricks/sqlalchemy/test_local"]
|
43
|
+
env_files = ["test.env"]
|
@@ -0,0 +1,100 @@
|
|
1
|
+
import re
|
2
|
+
from sqlalchemy.sql import compiler, sqltypes
|
3
|
+
import logging
|
4
|
+
|
5
|
+
logger = logging.getLogger(__name__)
|
6
|
+
|
7
|
+
|
8
|
+
class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
|
9
|
+
"""https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html"""
|
10
|
+
|
11
|
+
legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
|
12
|
+
|
13
|
+
def __init__(self, dialect):
|
14
|
+
super().__init__(dialect, initial_quote="`")
|
15
|
+
|
16
|
+
|
17
|
+
class DatabricksDDLCompiler(compiler.DDLCompiler):
|
18
|
+
def post_create_table(self, table):
|
19
|
+
post = [" USING DELTA"]
|
20
|
+
if table.comment:
|
21
|
+
comment = self.sql_compiler.render_literal_value(
|
22
|
+
table.comment, sqltypes.String()
|
23
|
+
)
|
24
|
+
post.append("COMMENT " + comment)
|
25
|
+
|
26
|
+
post.append("TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'enabled')")
|
27
|
+
return "\n".join(post)
|
28
|
+
|
29
|
+
def visit_unique_constraint(self, constraint, **kw):
|
30
|
+
logger.warning("Databricks does not support unique constraints")
|
31
|
+
pass
|
32
|
+
|
33
|
+
def visit_check_constraint(self, constraint, **kw):
|
34
|
+
logger.warning("This dialect does not support check constraints")
|
35
|
+
pass
|
36
|
+
|
37
|
+
def visit_identity_column(self, identity, **kw):
|
38
|
+
"""When configuring an Identity() with Databricks, only the always option is supported.
|
39
|
+
All other options are ignored.
|
40
|
+
|
41
|
+
Note: IDENTITY columns must always be defined as BIGINT. An exception will be raised if INT is used.
|
42
|
+
|
43
|
+
https://www.databricks.com/blog/2022/08/08/identity-columns-to-generate-surrogate-keys-are-now-available-in-a-lakehouse-near-you.html
|
44
|
+
"""
|
45
|
+
text = "GENERATED %s AS IDENTITY" % (
|
46
|
+
"ALWAYS" if identity.always else "BY DEFAULT",
|
47
|
+
)
|
48
|
+
return text
|
49
|
+
|
50
|
+
def visit_set_column_comment(self, create, **kw):
|
51
|
+
return "ALTER TABLE %s ALTER COLUMN %s COMMENT %s" % (
|
52
|
+
self.preparer.format_table(create.element.table),
|
53
|
+
self.preparer.format_column(create.element),
|
54
|
+
self.sql_compiler.render_literal_value(
|
55
|
+
create.element.comment, sqltypes.String()
|
56
|
+
),
|
57
|
+
)
|
58
|
+
|
59
|
+
def visit_drop_column_comment(self, create, **kw):
|
60
|
+
return "ALTER TABLE %s ALTER COLUMN %s COMMENT ''" % (
|
61
|
+
self.preparer.format_table(create.element.table),
|
62
|
+
self.preparer.format_column(create.element),
|
63
|
+
)
|
64
|
+
|
65
|
+
def get_column_specification(self, column, **kwargs):
|
66
|
+
"""
|
67
|
+
Emit a log message if a user attempts to set autoincrement=True on a column.
|
68
|
+
See comments in test_suite.py. We may implement implicit IDENTITY using this
|
69
|
+
feature in the future, similar to the Microsoft SQL Server dialect.
|
70
|
+
"""
|
71
|
+
if column is column.table._autoincrement_column or column.autoincrement is True:
|
72
|
+
logger.warning(
|
73
|
+
"Databricks dialect ignores SQLAlchemy's autoincrement semantics. Use explicit Identity() instead."
|
74
|
+
)
|
75
|
+
|
76
|
+
colspec = super().get_column_specification(column, **kwargs)
|
77
|
+
if column.comment is not None:
|
78
|
+
literal = self.sql_compiler.render_literal_value(
|
79
|
+
column.comment, sqltypes.STRINGTYPE
|
80
|
+
)
|
81
|
+
colspec += " COMMENT " + literal
|
82
|
+
|
83
|
+
return colspec
|
84
|
+
|
85
|
+
|
86
|
+
class DatabricksStatementCompiler(compiler.SQLCompiler):
|
87
|
+
def limit_clause(self, select, **kw):
|
88
|
+
"""Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,
|
89
|
+
since Databricks SQL doesn't support the latter.
|
90
|
+
|
91
|
+
https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-limit.html
|
92
|
+
"""
|
93
|
+
text = ""
|
94
|
+
if select._limit_clause is not None:
|
95
|
+
text += "\n LIMIT " + self.process(select._limit_clause, **kw)
|
96
|
+
if select._offset_clause is not None:
|
97
|
+
if select._limit_clause is None:
|
98
|
+
text += "\n LIMIT ALL"
|
99
|
+
text += " OFFSET " + self.process(select._offset_clause, **kw)
|
100
|
+
return text
|