databricks-sqlalchemy 1.0.1__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. databricks_sqlalchemy-2.0.0/CHANGELOG.md +5 -0
  2. {databricks_sqlalchemy-1.0.1 → databricks_sqlalchemy-2.0.0}/PKG-INFO +60 -39
  3. {databricks_sqlalchemy-1.0.1 → databricks_sqlalchemy-2.0.0}/README.md +58 -37
  4. {databricks_sqlalchemy-1.0.1 → databricks_sqlalchemy-2.0.0}/pyproject.toml +10 -3
  5. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/__init__.py +4 -0
  6. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/_ddl.py +100 -0
  7. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/_parse.py +385 -0
  8. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/_types.py +323 -0
  9. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/base.py +436 -0
  10. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
  11. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/py.typed +0 -0
  12. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/pytest.ini +4 -0
  13. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/requirements.py +249 -0
  14. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/setup.cfg +4 -0
  15. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/_extra.py +70 -0
  16. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/_future.py +331 -0
  17. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/_regression.py +311 -0
  18. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/_unsupported.py +450 -0
  19. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/conftest.py +13 -0
  20. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
  21. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
  22. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test/test_suite.py +13 -0
  23. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/__init__.py +5 -0
  24. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/conftest.py +44 -0
  25. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  26. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
  27. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/test_ddl.py +96 -0
  28. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/test_parsing.py +160 -0
  29. databricks_sqlalchemy-2.0.0/src/databricks/sqlalchemy/test_local/test_types.py +161 -0
  30. databricks_sqlalchemy-1.0.1/CHANGELOG.md +0 -5
  31. databricks_sqlalchemy-1.0.1/src/databricks/sqlalchemy/__init__.py +0 -1
  32. databricks_sqlalchemy-1.0.1/src/databricks/sqlalchemy/dialect/__init__.py +0 -340
  33. databricks_sqlalchemy-1.0.1/src/databricks/sqlalchemy/dialect/base.py +0 -17
  34. databricks_sqlalchemy-1.0.1/src/databricks/sqlalchemy/dialect/compiler.py +0 -38
  35. {databricks_sqlalchemy-1.0.1 → databricks_sqlalchemy-2.0.0}/LICENSE +0 -0
@@ -0,0 +1,5 @@
1
+ # Release History
2
+
3
+ # 2.0.0 ( 2021-10-23 )
4
+
5
+ - databricks-sqlalchemy plugin for databricks-sql-python built on sqlalchemy v2 and has all the features of sqlalchemy compatible with the connector till databricks-sql-python v3.3.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sqlalchemy
3
- Version: 1.0.1
3
+ Version: 2.0.0
4
4
  Summary: Databricks SQLAlchemy plugin for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -14,14 +14,17 @@ Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Dist: databricks_sql_connector_core (>=4.0.0)
17
- Requires-Dist: sqlalchemy (>=1.3.24,<2.0.0)
17
+ Requires-Dist: sqlalchemy (>=2.0.21)
18
18
  Project-URL: Bug Tracker, https://github.com/databricks/databricks-sqlalchemy/issues
19
19
  Project-URL: Homepage, https://github.com/databricks/databricks-sqlalchemy
20
20
  Description-Content-Type: text/markdown
21
21
 
22
- ## Databricks dialect for SQLALchemy 1.0
22
+ ## Databricks dialect for SQLALchemy 2.0
23
23
 
24
- The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `example.py`.
24
+ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `examples/sqlalchemy.py`.
25
+
26
+ ## Usage with SQLAlchemy <= 2.0
27
+ A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4](https://github.com/databricks/databricks-sql-python/releases/tag/v2.4.0). Support for SQLAlchemy 1.4 was dropped from the dialect as part of `databricks-sql-connector==3.0.0`. To continue using the dialect with SQLAlchemy 1.x, you can use `databricks-sql-connector^2.4.0`.
25
28
 
26
29
 
27
30
  ## Installation
@@ -29,7 +32,7 @@ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](http
29
32
  To install the dialect and its dependencies:
30
33
 
31
34
  ```shell
32
- pip install databricks-sqlalchemy~=1.0
35
+ pip install databricks-sqlalchemy
33
36
  ```
34
37
 
35
38
  If you also plan to use `alembic` you can alternatively run:
@@ -62,45 +65,41 @@ access_token = os.getenv("DATABRICKS_TOKEN")
62
65
  catalog = os.getenv("DATABRICKS_CATALOG")
63
66
  schema = os.getenv("DATABRICKS_SCHEMA")
64
67
 
65
- if sqlalchemy.__version__.startswith("1.3"):
66
- # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
67
- # Pass these in as connect_args instead
68
-
69
- conn_string = f"databricks://token:{access_token}@{host}"
70
- connect_args = dict(catalog=catalog, schema=schema, http_path=http_path)
71
- all_connect_args = {**extra_connect_args, **connect_args}
72
- engine = create_engine(conn_string, connect_args=all_connect_args)
73
- else:
74
- engine = create_engine(
75
- f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}",
76
- connect_args=extra_connect_args,
77
- )
78
-
68
+ engine = create_engine(
69
+ f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}"
70
+ )
79
71
  ```
80
72
 
81
73
  ## Types
82
74
 
83
- The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/13/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/13/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
75
+ The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/20/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/20/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
84
76
 
85
77
  |SQLAlchemy Type|Databricks SQL Type|
86
78
  |-|-|
87
- [`BigInteger`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
88
- [`LargeBinary`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
89
- [`Boolean`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
90
- [`Date`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
91
- [`DateTime`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
92
- [`Enum`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
93
- [`Float`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
94
- [`Integer`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
95
- [`Numeric`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
96
- [`PickleType`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
97
- [`SmallInteger`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
98
- [`String`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
99
- [`Text`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
100
- [`Time`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
101
- [`Unicode`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
102
- [`UnicodeText`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
103
- [`Uuid`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
79
+ [`BigInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
80
+ [`LargeBinary`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
81
+ [`Boolean`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
82
+ [`Date`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
83
+ [`DateTime`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
84
+ [`Double`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Double)| [`DOUBLE`](https://docs.databricks.com/en/sql/language-manual/data-types/double-type.html)
85
+ [`Enum`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
86
+ [`Float`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
87
+ [`Integer`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
88
+ [`Numeric`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
89
+ [`PickleType`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
90
+ [`SmallInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
91
+ [`String`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
92
+ [`Text`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
93
+ [`Time`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
94
+ [`Unicode`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
95
+ [`UnicodeText`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
96
+ [`Uuid`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
97
+
98
+ In addition, the dialect exposes three UPPERCASE SQLAlchemy types which are specific to Databricks:
99
+
100
+ - [`databricks.sqlalchemy.TINYINT`](https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html)
101
+ - [`databricks.sqlalchemy.TIMESTAMP`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html)
102
+ - [`databricks.sqlalchemy.TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)
104
103
 
105
104
 
106
105
  ### `LargeBinary()` and `PickleType()`
@@ -113,6 +112,24 @@ Support for `CHECK` constraints is not implemented in this dialect. Support is p
113
112
 
114
113
  SQLAlchemy's `Enum()` type depends on `CHECK` constraints and is therefore not yet supported.
115
114
 
115
+ ### `DateTime()`, `TIMESTAMP_NTZ()`, and `TIMESTAMP()`
116
+
117
+ Databricks Runtime provides two datetime-like types: `TIMESTAMP` which is always timezone-aware and `TIMESTAMP_NTZ` which is timezone agnostic. Both types can be imported from `databricks.sqlalchemy` and used in your models.
118
+
119
+ The SQLAlchemy documentation indicates that `DateTime()` is not timezone-aware by default. So our dialect maps this type to `TIMESTAMP_NTZ()`. In practice, you should never need to use `TIMESTAMP_NTZ()` directly. Just use `DateTime()`.
120
+
121
+ If you need your field to be timezone-aware, you can import `TIMESTAMP()` and use it instead.
122
+
123
+ _Note that SQLAlchemy documentation suggests that you can declare a `DateTime()` with `timezone=True` on supported backends. However, if you do this with the Databricks dialect, the `timezone` argument will be ignored._
124
+
125
+ ```python
126
+ from sqlalchemy import DateTime
127
+ from databricks.sqlalchemy import TIMESTAMP
128
+
129
+ class SomeModel(Base):
130
+ some_date_without_timezone = DateTime()
131
+ some_date_with_timezone = TIMESTAMP()
132
+ ```
116
133
 
117
134
  ### `String()`, `Text()`, `Unicode()`, and `UnicodeText()`
118
135
 
@@ -137,7 +154,7 @@ class SomeModel(Base):
137
154
 
138
155
  Identity and generated value support is currently limited in this dialect.
139
156
 
140
- When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/13/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/13/core/defaults.html#identity-ddl) instead.
157
+ When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/20/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/20/core/defaults.html#identity-ddl) instead.
141
158
 
142
159
  Furthermore, in Databricks Runtime, only `BIGINT` fields can be configured to auto-increment. So in SQLAlchemy, you must use the `BigInteger()` type.
143
160
 
@@ -151,6 +168,10 @@ class SomeModel(Base):
151
168
 
152
169
  When calling `Base.metadata.create_all()`, the executed DDL will include `GENERATED ALWAYS AS IDENTITY` for the `id` column. This is useful when using SQLAlchemy to generate tables. However, as of this writing, `Identity()` constructs are not captured when SQLAlchemy reflects a table's metadata (support for this is planned).
153
170
 
171
+ ## Parameters
172
+
173
+ `databricks-sql-connector` supports two approaches to parameterizing SQL queries: native and inline. Our SQLAlchemy 2.0 dialect always uses the native approach and is therefore limited to DBR 14.2 and above. If you are writing parameterized queries to be executed by SQLAlchemy, you must use the "named" paramstyle (`:param`). Read more about parameterization in `docs/parameters.md`.
174
+
154
175
  ## Usage with pandas
155
176
 
156
177
  Use [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html) and [`pandas.read_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql) to write and read from Databricks SQL. These methods both accept a SQLAlchemy connection to interact with Databricks.
@@ -181,7 +202,7 @@ with engine.connect() as conn:
181
202
  df.to_sql('squares',conn)
182
203
  ```
183
204
 
184
- ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/13/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/13/core/constraints.html#defining-foreign-keys)
205
+ ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#defining-foreign-keys)
185
206
 
186
207
  Unity Catalog workspaces in Databricks support PRIMARY KEY and FOREIGN KEY constraints. _Note that Databricks Runtime does not enforce the integrity of FOREIGN KEY constraints_. You can establish a primary key by setting `primary_key=True` when defining a column.
187
208
 
@@ -1,6 +1,9 @@
1
- ## Databricks dialect for SQLALchemy 1.0
1
+ ## Databricks dialect for SQLALchemy 2.0
2
2
 
3
- The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `example.py`.
3
+ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `examples/sqlalchemy.py`.
4
+
5
+ ## Usage with SQLAlchemy <= 2.0
6
+ A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4](https://github.com/databricks/databricks-sql-python/releases/tag/v2.4.0). Support for SQLAlchemy 1.4 was dropped from the dialect as part of `databricks-sql-connector==3.0.0`. To continue using the dialect with SQLAlchemy 1.x, you can use `databricks-sql-connector^2.4.0`.
4
7
 
5
8
 
6
9
  ## Installation
@@ -8,7 +11,7 @@ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](http
8
11
  To install the dialect and its dependencies:
9
12
 
10
13
  ```shell
11
- pip install databricks-sqlalchemy~=1.0
14
+ pip install databricks-sqlalchemy
12
15
  ```
13
16
 
14
17
  If you also plan to use `alembic` you can alternatively run:
@@ -41,45 +44,41 @@ access_token = os.getenv("DATABRICKS_TOKEN")
41
44
  catalog = os.getenv("DATABRICKS_CATALOG")
42
45
  schema = os.getenv("DATABRICKS_SCHEMA")
43
46
 
44
- if sqlalchemy.__version__.startswith("1.3"):
45
- # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
46
- # Pass these in as connect_args instead
47
-
48
- conn_string = f"databricks://token:{access_token}@{host}"
49
- connect_args = dict(catalog=catalog, schema=schema, http_path=http_path)
50
- all_connect_args = {**extra_connect_args, **connect_args}
51
- engine = create_engine(conn_string, connect_args=all_connect_args)
52
- else:
53
- engine = create_engine(
54
- f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}",
55
- connect_args=extra_connect_args,
56
- )
57
-
47
+ engine = create_engine(
48
+ f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}"
49
+ )
58
50
  ```
59
51
 
60
52
  ## Types
61
53
 
62
- The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/13/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/13/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
54
+ The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/20/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/20/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
63
55
 
64
56
  |SQLAlchemy Type|Databricks SQL Type|
65
57
  |-|-|
66
- [`BigInteger`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
67
- [`LargeBinary`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
68
- [`Boolean`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
69
- [`Date`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
70
- [`DateTime`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
71
- [`Enum`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
72
- [`Float`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
73
- [`Integer`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
74
- [`Numeric`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
75
- [`PickleType`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
76
- [`SmallInteger`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
77
- [`String`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
78
- [`Text`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
79
- [`Time`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
80
- [`Unicode`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
81
- [`UnicodeText`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
82
- [`Uuid`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
58
+ [`BigInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
59
+ [`LargeBinary`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
60
+ [`Boolean`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
61
+ [`Date`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
62
+ [`DateTime`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
63
+ [`Double`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Double)| [`DOUBLE`](https://docs.databricks.com/en/sql/language-manual/data-types/double-type.html)
64
+ [`Enum`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
65
+ [`Float`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
66
+ [`Integer`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
67
+ [`Numeric`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
68
+ [`PickleType`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
69
+ [`SmallInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
70
+ [`String`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
71
+ [`Text`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
72
+ [`Time`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
73
+ [`Unicode`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
74
+ [`UnicodeText`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
75
+ [`Uuid`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
76
+
77
+ In addition, the dialect exposes three UPPERCASE SQLAlchemy types which are specific to Databricks:
78
+
79
+ - [`databricks.sqlalchemy.TINYINT`](https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html)
80
+ - [`databricks.sqlalchemy.TIMESTAMP`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html)
81
+ - [`databricks.sqlalchemy.TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)
83
82
 
84
83
 
85
84
  ### `LargeBinary()` and `PickleType()`
@@ -92,6 +91,24 @@ Support for `CHECK` constraints is not implemented in this dialect. Support is p
92
91
 
93
92
  SQLAlchemy's `Enum()` type depends on `CHECK` constraints and is therefore not yet supported.
94
93
 
94
+ ### `DateTime()`, `TIMESTAMP_NTZ()`, and `TIMESTAMP()`
95
+
96
+ Databricks Runtime provides two datetime-like types: `TIMESTAMP` which is always timezone-aware and `TIMESTAMP_NTZ` which is timezone agnostic. Both types can be imported from `databricks.sqlalchemy` and used in your models.
97
+
98
+ The SQLAlchemy documentation indicates that `DateTime()` is not timezone-aware by default. So our dialect maps this type to `TIMESTAMP_NTZ()`. In practice, you should never need to use `TIMESTAMP_NTZ()` directly. Just use `DateTime()`.
99
+
100
+ If you need your field to be timezone-aware, you can import `TIMESTAMP()` and use it instead.
101
+
102
+ _Note that SQLAlchemy documentation suggests that you can declare a `DateTime()` with `timezone=True` on supported backends. However, if you do this with the Databricks dialect, the `timezone` argument will be ignored._
103
+
104
+ ```python
105
+ from sqlalchemy import DateTime
106
+ from databricks.sqlalchemy import TIMESTAMP
107
+
108
+ class SomeModel(Base):
109
+ some_date_without_timezone = DateTime()
110
+ some_date_with_timezone = TIMESTAMP()
111
+ ```
95
112
 
96
113
  ### `String()`, `Text()`, `Unicode()`, and `UnicodeText()`
97
114
 
@@ -116,7 +133,7 @@ class SomeModel(Base):
116
133
 
117
134
  Identity and generated value support is currently limited in this dialect.
118
135
 
119
- When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/13/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/13/core/defaults.html#identity-ddl) instead.
136
+ When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/20/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/20/core/defaults.html#identity-ddl) instead.
120
137
 
121
138
  Furthermore, in Databricks Runtime, only `BIGINT` fields can be configured to auto-increment. So in SQLAlchemy, you must use the `BigInteger()` type.
122
139
 
@@ -130,6 +147,10 @@ class SomeModel(Base):
130
147
 
131
148
  When calling `Base.metadata.create_all()`, the executed DDL will include `GENERATED ALWAYS AS IDENTITY` for the `id` column. This is useful when using SQLAlchemy to generate tables. However, as of this writing, `Identity()` constructs are not captured when SQLAlchemy reflects a table's metadata (support for this is planned).
132
149
 
150
+ ## Parameters
151
+
152
+ `databricks-sql-connector` supports two approaches to parameterizing SQL queries: native and inline. Our SQLAlchemy 2.0 dialect always uses the native approach and is therefore limited to DBR 14.2 and above. If you are writing parameterized queries to be executed by SQLAlchemy, you must use the "named" paramstyle (`:param`). Read more about parameterization in `docs/parameters.md`.
153
+
133
154
  ## Usage with pandas
134
155
 
135
156
  Use [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html) and [`pandas.read_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql) to write and read from Databricks SQL. These methods both accept a SQLAlchemy connection to interact with Databricks.
@@ -160,7 +181,7 @@ with engine.connect() as conn:
160
181
  df.to_sql('squares',conn)
161
182
  ```
162
183
 
163
- ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/13/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/13/core/constraints.html#defining-foreign-keys)
184
+ ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#defining-foreign-keys)
164
185
 
165
186
  Unity Catalog workspaces in Databricks support PRIMARY KEY and FOREIGN KEY constraints. _Note that Databricks Runtime does not enforce the integrity of FOREIGN KEY constraints_. You can establish a primary key by setting `primary_key=True` when defining a column.
166
187
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sqlalchemy"
3
- version = "1.0.1"
3
+ version = "2.0.0"
4
4
  description = "Databricks SQLAlchemy plugin for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -11,7 +11,7 @@ include = ["CHANGELOG.md"]
11
11
  [tool.poetry.dependencies]
12
12
  python = "^3.8.0"
13
13
  databricks_sql_connector_core = { version = ">=4.0.0"}
14
- sqlalchemy = { version = "^1.3.24" }
14
+ sqlalchemy = { version = ">=2.0.21" }
15
15
 
16
16
  [tool.poetry.dev-dependencies]
17
17
  pytest = "^7.1.2"
@@ -33,4 +33,11 @@ build-backend = "poetry.core.masonry.api"
33
33
 
34
34
  [tool.black]
35
35
  exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
36
-
36
+ #
37
+ [tool.pytest.ini_options]
38
+ markers = {"reviewed" = "Test case has been reviewed by Databricks"}
39
+ minversion = "6.0"
40
+ log_cli = "false"
41
+ log_cli_level = "INFO"
42
+ testpaths = ["tests", "src/databricks/sqlalchemy/test_local"]
43
+ env_files = ["test.env"]
@@ -0,0 +1,4 @@
1
+ from databricks.sqlalchemy.base import DatabricksDialect
2
+ from databricks.sqlalchemy._types import TINYINT, TIMESTAMP, TIMESTAMP_NTZ
3
+
4
+ __all__ = ["TINYINT", "TIMESTAMP", "TIMESTAMP_NTZ"]
@@ -0,0 +1,100 @@
1
+ import re
2
+ from sqlalchemy.sql import compiler, sqltypes
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
9
+ """https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html"""
10
+
11
+ legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
12
+
13
+ def __init__(self, dialect):
14
+ super().__init__(dialect, initial_quote="`")
15
+
16
+
17
+ class DatabricksDDLCompiler(compiler.DDLCompiler):
18
+ def post_create_table(self, table):
19
+ post = [" USING DELTA"]
20
+ if table.comment:
21
+ comment = self.sql_compiler.render_literal_value(
22
+ table.comment, sqltypes.String()
23
+ )
24
+ post.append("COMMENT " + comment)
25
+
26
+ post.append("TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'enabled')")
27
+ return "\n".join(post)
28
+
29
+ def visit_unique_constraint(self, constraint, **kw):
30
+ logger.warning("Databricks does not support unique constraints")
31
+ pass
32
+
33
+ def visit_check_constraint(self, constraint, **kw):
34
+ logger.warning("This dialect does not support check constraints")
35
+ pass
36
+
37
+ def visit_identity_column(self, identity, **kw):
38
+ """When configuring an Identity() with Databricks, only the always option is supported.
39
+ All other options are ignored.
40
+
41
+ Note: IDENTITY columns must always be defined as BIGINT. An exception will be raised if INT is used.
42
+
43
+ https://www.databricks.com/blog/2022/08/08/identity-columns-to-generate-surrogate-keys-are-now-available-in-a-lakehouse-near-you.html
44
+ """
45
+ text = "GENERATED %s AS IDENTITY" % (
46
+ "ALWAYS" if identity.always else "BY DEFAULT",
47
+ )
48
+ return text
49
+
50
+ def visit_set_column_comment(self, create, **kw):
51
+ return "ALTER TABLE %s ALTER COLUMN %s COMMENT %s" % (
52
+ self.preparer.format_table(create.element.table),
53
+ self.preparer.format_column(create.element),
54
+ self.sql_compiler.render_literal_value(
55
+ create.element.comment, sqltypes.String()
56
+ ),
57
+ )
58
+
59
+ def visit_drop_column_comment(self, create, **kw):
60
+ return "ALTER TABLE %s ALTER COLUMN %s COMMENT ''" % (
61
+ self.preparer.format_table(create.element.table),
62
+ self.preparer.format_column(create.element),
63
+ )
64
+
65
+ def get_column_specification(self, column, **kwargs):
66
+ """
67
+ Emit a log message if a user attempts to set autoincrement=True on a column.
68
+ See comments in test_suite.py. We may implement implicit IDENTITY using this
69
+ feature in the future, similar to the Microsoft SQL Server dialect.
70
+ """
71
+ if column is column.table._autoincrement_column or column.autoincrement is True:
72
+ logger.warning(
73
+ "Databricks dialect ignores SQLAlchemy's autoincrement semantics. Use explicit Identity() instead."
74
+ )
75
+
76
+ colspec = super().get_column_specification(column, **kwargs)
77
+ if column.comment is not None:
78
+ literal = self.sql_compiler.render_literal_value(
79
+ column.comment, sqltypes.STRINGTYPE
80
+ )
81
+ colspec += " COMMENT " + literal
82
+
83
+ return colspec
84
+
85
+
86
+ class DatabricksStatementCompiler(compiler.SQLCompiler):
87
+ def limit_clause(self, select, **kw):
88
+ """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,
89
+ since Databricks SQL doesn't support the latter.
90
+
91
+ https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-limit.html
92
+ """
93
+ text = ""
94
+ if select._limit_clause is not None:
95
+ text += "\n LIMIT " + self.process(select._limit_clause, **kw)
96
+ if select._offset_clause is not None:
97
+ if select._limit_clause is None:
98
+ text += "\n LIMIT ALL"
99
+ text += " OFFSET " + self.process(select._offset_clause, **kw)
100
+ return text