databricks-sqlalchemy 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. CHANGELOG.md +2 -271
  2. databricks/sqlalchemy/__init__.py +1 -4
  3. databricks/sqlalchemy/dialect/__init__.py +340 -0
  4. databricks/sqlalchemy/dialect/base.py +17 -0
  5. databricks/sqlalchemy/dialect/compiler.py +38 -0
  6. {databricks_sqlalchemy-1.0.0.dist-info → databricks_sqlalchemy-1.0.2.dist-info}/METADATA +39 -61
  7. databricks_sqlalchemy-1.0.2.dist-info/RECORD +10 -0
  8. databricks/sqlalchemy/_ddl.py +0 -100
  9. databricks/sqlalchemy/_parse.py +0 -385
  10. databricks/sqlalchemy/_types.py +0 -323
  11. databricks/sqlalchemy/base.py +0 -436
  12. databricks/sqlalchemy/dependency_test/test_dependency.py +0 -22
  13. databricks/sqlalchemy/py.typed +0 -0
  14. databricks/sqlalchemy/pytest.ini +0 -4
  15. databricks/sqlalchemy/requirements.py +0 -249
  16. databricks/sqlalchemy/setup.cfg +0 -4
  17. databricks/sqlalchemy/test/_extra.py +0 -70
  18. databricks/sqlalchemy/test/_future.py +0 -331
  19. databricks/sqlalchemy/test/_regression.py +0 -311
  20. databricks/sqlalchemy/test/_unsupported.py +0 -450
  21. databricks/sqlalchemy/test/conftest.py +0 -13
  22. databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -189
  23. databricks/sqlalchemy/test/overrides/_ctetest.py +0 -33
  24. databricks/sqlalchemy/test/test_suite.py +0 -13
  25. databricks/sqlalchemy/test_local/__init__.py +0 -5
  26. databricks/sqlalchemy/test_local/conftest.py +0 -44
  27. databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  28. databricks/sqlalchemy/test_local/e2e/test_basic.py +0 -543
  29. databricks/sqlalchemy/test_local/test_ddl.py +0 -96
  30. databricks/sqlalchemy/test_local/test_parsing.py +0 -160
  31. databricks/sqlalchemy/test_local/test_types.py +0 -161
  32. databricks_sqlalchemy-1.0.0.dist-info/RECORD +0 -31
  33. {databricks_sqlalchemy-1.0.0.dist-info → databricks_sqlalchemy-1.0.2.dist-info}/LICENSE +0 -0
  34. {databricks_sqlalchemy-1.0.0.dist-info → databricks_sqlalchemy-1.0.2.dist-info}/WHEEL +0 -0
  35. {databricks_sqlalchemy-1.0.0.dist-info → databricks_sqlalchemy-1.0.2.dist-info}/entry_points.txt +0 -0
CHANGELOG.md CHANGED
@@ -1,274 +1,5 @@
1
1
  # Release History
2
2
 
3
- # 3.3.0 (2024-07-18)
3
+ # 1.0.1
4
4
 
5
- - Don't retry requests that fail with HTTP code 401 (databricks/databricks-sql-python#408 by @Hodnebo)
6
- - Remove username/password (aka "basic") auth option (databricks/databricks-sql-python#409 by @jackyhu-db)
7
- - Refactor CloudFetch handler to fix numerous issues with it (databricks/databricks-sql-python#405 by @kravets-levko)
8
- - Add option to disable SSL verification for CloudFetch links (databricks/databricks-sql-python#414 by @kravets-levko)
9
-
10
- Databricks-managed passwords reached end of life on July 10, 2024. Therefore, Basic auth support was removed from
11
- the library. See https://docs.databricks.com/en/security/auth-authz/password-deprecation.html
12
-
13
- The existing option `_tls_no_verify=True` of `sql.connect(...)` will now also disable SSL cert verification
14
- (but not the SSL itself) for CloudFetch links. This option should be used as a workaround only, when other ways
15
- to fix SSL certificate errors didn't work.
16
-
17
- # 3.2.0 (2024-06-06)
18
-
19
- - Update proxy authentication (databricks/databricks-sql-python#354 by @amir-haroun)
20
- - Relax `pyarrow` pin (databricks/databricks-sql-python#389 by @dhirschfeld)
21
- - Fix error logging in OAuth manager (databricks/databricks-sql-python#269 by @susodapop)
22
- - SQLAlchemy: enable delta.feature.allowColumnDefaults for all tables (databricks/databricks-sql-python#343 by @dhirschfeld)
23
- - Update `thrift` dependency (databricks/databricks-sql-python#397 by @m1n0)
24
-
25
- # 3.1.2 (2024-04-18)
26
-
27
- - Remove broken cookie code (#379)
28
- - Small typing fixes (#382, #384 thanks @wyattscarpenter)
29
-
30
- # 3.1.1 (2024-03-19)
31
-
32
- - Don't retry requests that fail with code 403 (#373)
33
- - Assume a default retry-after for 429/503 (#371)
34
- - Fix boolean literals (#357)
35
-
36
- # 3.1.0 (2024-02-16)
37
-
38
- - Revert retry-after behavior to be exponential backoff (#349)
39
- - Support Databricks OAuth on Azure (#351)
40
- - Support Databricks OAuth on GCP (#338)
41
-
42
- # 3.0.3 (2024-02-02)
43
-
44
- - Revised docstrings and examples for OAuth (#339)
45
- - Redact the URL query parameters from the urllib3.connectionpool logs (#341)
46
-
47
- # 3.0.2 (2024-01-25)
48
-
49
- - SQLAlchemy dialect now supports table and column comments (thanks @cbornet!)
50
- - Fix: SQLAlchemy dialect now correctly reflects TINYINT types (thanks @TimTheinAtTabs!)
51
- - Fix: `server_hostname` URIs that included `https://` would raise an exception
52
- - Other: pinned to `pandas<=2.1` and `urllib3>=1.26` to avoid runtime errors in dbt-databricks (#330)
53
-
54
- ## 3.0.1 (2023-12-01)
55
-
56
- - Other: updated docstring comment about default parameterization approach (#287)
57
- - Other: added tests for reading complex types and revised docstrings and type hints (#293)
58
- - Fix: SQLAlchemy dialect raised DeprecationWarning due to `dbapi` classmethod (#294)
59
- - Fix: SQLAlchemy dialect could not reflect TIMESTAMP_NTZ columns (#296)
60
-
61
- ## 3.0.0 (2023-11-17)
62
-
63
- - Remove support for Python 3.7
64
- - Add support for native parameterized SQL queries. Requires DBR 14.2 and above. See docs/parameters.md for more info.
65
- - Completely rewritten SQLAlchemy dialect
66
- - Adds support for SQLAlchemy >= 2.0 and drops support for SQLAlchemy 1.x
67
- - Full e2e test coverage of all supported features
68
- - Detailed usage notes in `README.sqlalchemy.md`
69
- - Adds support for:
70
- - New types: `TIME`, `TIMESTAMP`, `TIMESTAMP_NTZ`, `TINYINT`
71
- - `Numeric` type scale and precision, like `Numeric(10,2)`
72
- - Reading and writing `PrimaryKeyConstraint` and `ForeignKeyConstraint`
73
- - Reading and writing composite keys
74
- - Reading and writing from views
75
- - Writing `Identity` to tables (i.e. autoincrementing primary keys)
76
- - `LIMIT` and `OFFSET` for paging through results
77
- - Caching metadata calls
78
- - Enable cloud fetch by default. To disable, set `use_cloud_fetch=False` when building `databricks.sql.client`.
79
- - Add integration tests for Databricks UC Volumes ingestion queries
80
- - Retries:
81
- - Add `_retry_max_redirects` config
82
- - Set `_enable_v3_retries=True` and warn if users override it
83
- - Security: bump minimum pyarrow version to 14.0.1 (CVE-2023-47248)
84
-
85
- ## 2.9.3 (2023-08-24)
86
-
87
- - Fix: Connections failed when urllib3~=1.0.0 is installed (#206)
88
-
89
- ## 2.9.2 (2023-08-17)
90
-
91
- **Note: this release was yanked from Pypi on 13 September 2023 due to compatibility issues with environments where `urllib3<=2.0.0` were installed. The log changes are incorporated into version 2.9.3 and greater.**
92
-
93
- - Other: Add `examples/v3_retries_query_execute.py` (#199)
94
- - Other: suppress log message when `_enable_v3_retries` is not `True` (#199)
95
- - Other: make this connector backwards compatible with `urllib3>=1.0.0` (#197)
96
-
97
- ## 2.9.1 (2023-08-11)
98
-
99
- **Note: this release was yanked from Pypi on 13 September 2023 due to compatibility issues with environments where `urllib3<=2.0.0` were installed.**
100
-
101
- - Other: Explicitly pin urllib3 to ^2.0.0 (#191)
102
-
103
- ## 2.9.0 (2023-08-10)
104
-
105
- - Replace retry handling with DatabricksRetryPolicy. This is disabled by default. To enable, set `_enable_v3_retries=True` when creating `databricks.sql.client` (#182)
106
- - Other: Fix typo in README quick start example (#186)
107
- - Other: Add autospec to Client mocks and tidy up `make_request` (#188)
108
-
109
- ## 2.8.0 (2023-07-21)
110
-
111
- - Add support for Cloud Fetch. Disabled by default. Set `use_cloud_fetch=True` when building `databricks.sql.client` to enable it (#146, #151, #154)
112
- - SQLAlchemy has_table function now honours schema= argument and adds catalog= argument (#174)
113
- - SQLAlchemy set non_native_boolean_check_constraint False as it's not supported by Databricks (#120)
114
- - Fix: Revised SQLAlchemy dialect and examples for compatibility with SQLAlchemy==1.3.x (#173)
115
- - Fix: oauth would fail if expired credentials appeared in ~/.netrc (#122)
116
- - Fix: Python HTTP proxies were broken after switch to urllib3 (#158)
117
- - Other: remove unused import in SQLAlchemy dialect
118
- - Other: Relax pandas dependency constraint to allow ^2.0.0 (#164)
119
- - Other: Connector now logs operation handle guids as hexadecimal instead of bytes (#170)
120
- - Other: test_socket_timeout_user_defined e2e test was broken (#144)
121
-
122
- ## 2.7.0 (2023-06-26)
123
-
124
- - Fix: connector raised exception when calling close() on a closed Thrift session
125
- - Improve e2e test development ergonomics
126
- - Redact logged thrift responses by default
127
- - Add support for OAuth on Databricks Azure
128
-
129
- ## 2.6.2 (2023-06-14)
130
-
131
- - Fix: Retry GetOperationStatus requests for http errors
132
-
133
- ## 2.6.1 (2023-06-08)
134
-
135
- - Fix: http.client would raise a BadStatusLine exception in some cases
136
-
137
- ## 2.6.0 (2023-06-07)
138
-
139
- - Add support for HTTP 1.1 connections (connection pools)
140
- - Add a default socket timeout for thrift RPCs
141
-
142
- ## 2.5.2 (2023-05-08)
143
-
144
- - Fix: SQLAlchemy adapter could not reflect TIMESTAMP or DATETIME columns
145
- - Other: Relax pandas and alembic dependency specifications
146
-
147
- ## 2.5.1 (2023-04-28)
148
-
149
- - Other: Relax sqlalchemy required version as it was unecessarily strict.
150
-
151
- ## 2.5.0 (2023-04-14)
152
-
153
- - Add support for External Auth providers
154
- - Fix: Python HTTP proxies were broken
155
- - Other: All Thrift requests that timeout during connection will be automatically retried
156
-
157
- ## 2.4.1 (2023-03-21)
158
-
159
- - Less strict numpy and pyarrow dependencies
160
- - Update examples in README to use security best practices
161
- - Update docstring for client.execute() for clarity
162
-
163
- ## 2.4.0 (2023-02-21)
164
-
165
- - Improve compatibility when installed alongside other Databricks namespace Python packages
166
- - Add SQLAlchemy dialect
167
-
168
- ## 2.3.0 (2023-01-10)
169
-
170
- - Support staging ingestion commands for DBR 12+
171
-
172
- ## 2.2.2 (2023-01-03)
173
-
174
- - Support custom oauth client id and redirect port
175
- - Fix: Add none check on \_oauth_persistence in DatabricksOAuthProvider
176
-
177
- ## 2.2.1 (2022-11-29)
178
-
179
- - Add support for Python 3.11
180
-
181
- ## 2.2.0 (2022-11-15)
182
-
183
- - Bump thrift version to address https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13949
184
- - Add support for lz4 compression
185
-
186
- ## 2.1.0 (2022-09-30)
187
-
188
- - Introduce experimental OAuth support while Bring Your Own IDP is in Public Preview on AWS
189
- - Add functional examples
190
-
191
- ## 2.0.5 (2022-08-23)
192
-
193
- - Fix: closing a connection now closes any open cursors from that connection at the server
194
- - Other: Add project links to pyproject.toml (helpful for visitors from PyPi)
195
-
196
- ## 2.0.4 (2022-08-17)
197
-
198
- - Add support for Python 3.10
199
- - Add unit test matrix for supported Python versions
200
-
201
- Huge thanks to @dbaxa for contributing this change!
202
-
203
- ## 2.0.3 (2022-08-05)
204
-
205
- - Add retry logic for `GetOperationStatus` requests that fail with an `OSError`
206
- - Reorganised code to use Poetry for dependency management.
207
-
208
- ## 2.0.2 (2022-05-04)
209
-
210
- - Better exception handling in automatic connection close
211
-
212
- ## 2.0.1 (2022-04-21)
213
-
214
- - Fixed Pandas dependency in setup.cfg to be >= 1.2.0
215
-
216
- ## 2.0.0 (2022-04-19)
217
-
218
- - Initial stable release of V2
219
- - Added better support for complex types, so that in Databricks runtime 10.3+, Arrays, Maps and Structs will get
220
- deserialized as lists, lists of tuples and dicts, respectively.
221
- - Changed the name of the metadata arg to http_headers
222
-
223
- ## 2.0.b2 (2022-04-04)
224
-
225
- - Change import of collections.Iterable to collections.abc.Iterable to make the library compatible with Python 3.10
226
- - Fixed bug with .tables method so that .tables works as expected with Unity-Catalog enabled endpoints
227
-
228
- ## 2.0.0b1 (2022-03-04)
229
-
230
- - Fix packaging issue (dependencies were not being installed properly)
231
- - Fetching timestamp results will now return aware instead of naive timestamps
232
- - The client will now default to using simplified error messages
233
-
234
- ## 2.0.0b (2022-02-08)
235
-
236
- - Initial beta release of V2. V2 is an internal re-write of large parts of the connector to use Databricks edge features. All public APIs from V1 remain.
237
- - Added Unity Catalog support (pass catalog and / or schema key word args to the .connect method to select initial schema and catalog)
238
-
239
- ---
240
-
241
- **Note**: The code for versions prior to `v2.0.0b` is not contained in this repository. The below entries are included for reference only.
242
-
243
- ---
244
-
245
- ## 1.0.0 (2022-01-20)
246
-
247
- - Add operations for retrieving metadata
248
- - Add the ability to access columns by name on result rows
249
- - Add the ability to provide configuration settings on connect
250
-
251
- ## 0.9.4 (2022-01-10)
252
-
253
- - Improved logging and error messages.
254
-
255
- ## 0.9.3 (2021-12-08)
256
-
257
- - Add retries for 429 and 503 HTTP responses.
258
-
259
- ## 0.9.2 (2021-12-02)
260
-
261
- - (Bug fix) Increased Thrift requirement from 0.10.0 to 0.13.0 as 0.10.0 was in fact incompatible
262
- - (Bug fix) Fixed error message after query execution failed -SQLSTATE and Error message were misplaced
263
-
264
- ## 0.9.1 (2021-09-01)
265
-
266
- - Public Preview release, Experimental tag removed
267
- - minor updates in internal build/packaging
268
- - no functional changes
269
-
270
- ## 0.9.0 (2021-08-04)
271
-
272
- - initial (Experimental) release of pyhive-forked connector
273
- - Python DBAPI 2.0 (PEP-0249), thrift based
274
- - see docs for more info: https://docs.databricks.com/dev-tools/python-sql-connector.html
5
+ - This is databricks-sqlalchemy plugin based on sqlalchemy v1 and has all the databricks-sql-python v2.9.6 needed sqlalchemy features
@@ -1,4 +1 @@
1
- from databricks.sqlalchemy.base import DatabricksDialect
2
- from databricks.sqlalchemy._types import TINYINT, TIMESTAMP, TIMESTAMP_NTZ
3
-
4
- __all__ = ["TINYINT", "TIMESTAMP", "TIMESTAMP_NTZ"]
1
+ from databricks.sqlalchemy.dialect import DatabricksDialect
@@ -0,0 +1,340 @@
1
+ """This module's layout loosely follows example of SQLAlchemy's postgres dialect
2
+ """
3
+
4
+ import decimal, re, datetime
5
+ from dateutil.parser import parse
6
+
7
+ import sqlalchemy
8
+ from sqlalchemy import types, event
9
+ from sqlalchemy.engine import default, Engine
10
+ from sqlalchemy.exc import DatabaseError, SQLAlchemyError
11
+ from sqlalchemy.engine import reflection
12
+
13
+ from databricks import sql
14
+
15
+
16
+ from databricks.sqlalchemy.dialect.base import (
17
+ DatabricksDDLCompiler,
18
+ DatabricksIdentifierPreparer,
19
+ )
20
+ from databricks.sqlalchemy.dialect.compiler import DatabricksTypeCompiler
21
+
22
+ try:
23
+ import alembic
24
+ except ImportError:
25
+ pass
26
+ else:
27
+ from alembic.ddl import DefaultImpl
28
+
29
+ class DatabricksImpl(DefaultImpl):
30
+ __dialect__ = "databricks"
31
+
32
+
33
+ class DatabricksDecimal(types.TypeDecorator):
34
+ """Translates strings to decimals"""
35
+
36
+ impl = types.DECIMAL
37
+
38
+ def process_result_value(self, value, dialect):
39
+ if value is not None:
40
+ return decimal.Decimal(value)
41
+ else:
42
+ return None
43
+
44
+
45
+ class DatabricksTimestamp(types.TypeDecorator):
46
+ """Translates timestamp strings to datetime objects"""
47
+
48
+ impl = types.TIMESTAMP
49
+
50
+ def process_result_value(self, value, dialect):
51
+ return value
52
+
53
+ def adapt(self, impltype, **kwargs):
54
+ return self.impl
55
+
56
+
57
+ class DatabricksDate(types.TypeDecorator):
58
+ """Translates date strings to date objects"""
59
+
60
+ impl = types.DATE
61
+
62
+ def process_result_value(self, value, dialect):
63
+ return value
64
+
65
+ def adapt(self, impltype, **kwargs):
66
+ return self.impl
67
+
68
+
69
+ class DatabricksDialect(default.DefaultDialect):
70
+ """This dialect implements only those methods required to pass our e2e tests"""
71
+
72
+ # Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect
73
+ name: str = "databricks"
74
+ driver: str = "databricks-sql-python"
75
+ default_schema_name: str = "default"
76
+
77
+ preparer = DatabricksIdentifierPreparer # type: ignore
78
+ type_compiler = DatabricksTypeCompiler
79
+ ddl_compiler = DatabricksDDLCompiler
80
+ supports_statement_cache: bool = True
81
+ supports_multivalues_insert: bool = True
82
+ supports_native_decimal: bool = True
83
+ supports_sane_rowcount: bool = False
84
+ non_native_boolean_check_constraint: bool = False
85
+
86
+ @classmethod
87
+ def dbapi(cls):
88
+ return sql
89
+
90
+ def create_connect_args(self, url):
91
+ # TODO: can schema be provided after HOST?
92
+ # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/***
93
+
94
+ kwargs = {
95
+ "server_hostname": url.host,
96
+ "access_token": url.password,
97
+ "http_path": url.query.get("http_path"),
98
+ "catalog": url.query.get("catalog"),
99
+ "schema": url.query.get("schema"),
100
+ }
101
+
102
+ self.schema = kwargs["schema"]
103
+ self.catalog = kwargs["catalog"]
104
+
105
+ return [], kwargs
106
+
107
+ def get_columns(self, connection, table_name, schema=None, **kwargs):
108
+ """Return information about columns in `table_name`.
109
+
110
+ Given a :class:`_engine.Connection`, a string
111
+ `table_name`, and an optional string `schema`, return column
112
+ information as a list of dictionaries with these keys:
113
+
114
+ name
115
+ the column's name
116
+
117
+ type
118
+ [sqlalchemy.types#TypeEngine]
119
+
120
+ nullable
121
+ boolean
122
+
123
+ default
124
+ the column's default value
125
+
126
+ autoincrement
127
+ boolean
128
+
129
+ sequence
130
+ a dictionary of the form
131
+ {'name' : str, 'start' :int, 'increment': int, 'minvalue': int,
132
+ 'maxvalue': int, 'nominvalue': bool, 'nomaxvalue': bool,
133
+ 'cycle': bool, 'cache': int, 'order': bool}
134
+
135
+ Additional column attributes may be present.
136
+ """
137
+
138
+ _type_map = {
139
+ "boolean": types.Boolean,
140
+ "smallint": types.SmallInteger,
141
+ "int": types.Integer,
142
+ "bigint": types.BigInteger,
143
+ "float": types.Float,
144
+ "double": types.Float,
145
+ "string": types.String,
146
+ "varchar": types.String,
147
+ "char": types.String,
148
+ "binary": types.String,
149
+ "array": types.String,
150
+ "map": types.String,
151
+ "struct": types.String,
152
+ "uniontype": types.String,
153
+ "decimal": DatabricksDecimal,
154
+ "timestamp": DatabricksTimestamp,
155
+ "date": DatabricksDate,
156
+ }
157
+
158
+ with self.get_connection_cursor(connection) as cur:
159
+ resp = cur.columns(
160
+ catalog_name=self.catalog,
161
+ schema_name=schema or self.schema,
162
+ table_name=table_name,
163
+ ).fetchall()
164
+
165
+ columns = []
166
+
167
+ for col in resp:
168
+
169
+ # Taken from PyHive. This removes added type info from decimals and maps
170
+ _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0)
171
+ this_column = {
172
+ "name": col.COLUMN_NAME,
173
+ "type": _type_map[_col_type.lower()],
174
+ "nullable": bool(col.NULLABLE),
175
+ "default": col.COLUMN_DEF,
176
+ "autoincrement": False if col.IS_AUTO_INCREMENT == "NO" else True,
177
+ }
178
+ columns.append(this_column)
179
+
180
+ return columns
181
+
182
+ def get_pk_constraint(self, connection, table_name, schema=None, **kw):
183
+ """Return information about the primary key constraint on
184
+ table_name`.
185
+
186
+ Given a :class:`_engine.Connection`, a string
187
+ `table_name`, and an optional string `schema`, return primary
188
+ key information as a dictionary with these keys:
189
+
190
+ constrained_columns
191
+ a list of column names that make up the primary key
192
+
193
+ name
194
+ optional name of the primary key constraint.
195
+
196
+ """
197
+ # TODO: implement this behaviour
198
+ return {"constrained_columns": []}
199
+
200
+ def get_foreign_keys(self, connection, table_name, schema=None, **kw):
201
+ """Return information about foreign_keys in `table_name`.
202
+
203
+ Given a :class:`_engine.Connection`, a string
204
+ `table_name`, and an optional string `schema`, return foreign
205
+ key information as a list of dicts with these keys:
206
+
207
+ name
208
+ the constraint's name
209
+
210
+ constrained_columns
211
+ a list of column names that make up the foreign key
212
+
213
+ referred_schema
214
+ the name of the referred schema
215
+
216
+ referred_table
217
+ the name of the referred table
218
+
219
+ referred_columns
220
+ a list of column names in the referred table that correspond to
221
+ constrained_columns
222
+ """
223
+ # TODO: Implement this behaviour
224
+ return []
225
+
226
+ def get_indexes(self, connection, table_name, schema=None, **kw):
227
+ """Return information about indexes in `table_name`.
228
+
229
+ Given a :class:`_engine.Connection`, a string
230
+ `table_name` and an optional string `schema`, return index
231
+ information as a list of dictionaries with these keys:
232
+
233
+ name
234
+ the index's name
235
+
236
+ column_names
237
+ list of column names in order
238
+
239
+ unique
240
+ boolean
241
+ """
242
+ # TODO: Implement this behaviour
243
+ return []
244
+
245
+ def get_table_names(self, connection, schema=None, **kwargs):
246
+ TABLE_NAME = 1
247
+ with self.get_connection_cursor(connection) as cur:
248
+ sql_str = "SHOW TABLES FROM {}".format(
249
+ ".".join([self.catalog, schema or self.schema])
250
+ )
251
+ data = cur.execute(sql_str).fetchall()
252
+ _tables = [i[TABLE_NAME] for i in data]
253
+
254
+ return _tables
255
+
256
+ def get_view_names(self, connection, schema=None, **kwargs):
257
+ VIEW_NAME = 1
258
+ with self.get_connection_cursor(connection) as cur:
259
+ sql_str = "SHOW VIEWS FROM {}".format(
260
+ ".".join([self.catalog, schema or self.schema])
261
+ )
262
+ data = cur.execute(sql_str).fetchall()
263
+ _tables = [i[VIEW_NAME] for i in data]
264
+
265
+ return _tables
266
+
267
+ def do_rollback(self, dbapi_connection):
268
+ # Databricks SQL Does not support transactions
269
+ pass
270
+
271
+ def has_table(
272
+ self, connection, table_name, schema=None, catalog=None, **kwargs
273
+ ) -> bool:
274
+ """SQLAlchemy docstrings say dialect providers must implement this method"""
275
+
276
+ _schema = schema or self.schema
277
+ _catalog = catalog or self.catalog
278
+
279
+ # DBR >12.x uses underscores in error messages
280
+ DBR_LTE_12_NOT_FOUND_STRING = "Table or view not found"
281
+ DBR_GT_12_NOT_FOUND_STRING = "TABLE_OR_VIEW_NOT_FOUND"
282
+
283
+ try:
284
+ res = connection.execute(
285
+ f"DESCRIBE TABLE {_catalog}.{_schema}.{table_name}"
286
+ )
287
+ return True
288
+ except DatabaseError as e:
289
+ if DBR_GT_12_NOT_FOUND_STRING in str(
290
+ e
291
+ ) or DBR_LTE_12_NOT_FOUND_STRING in str(e):
292
+ return False
293
+ else:
294
+ raise e
295
+
296
+ def get_connection_cursor(self, connection):
297
+ """Added for backwards compatibility with 1.3.x"""
298
+ if hasattr(connection, "_dbapi_connection"):
299
+ return connection._dbapi_connection.dbapi_connection.cursor()
300
+ elif hasattr(connection, "raw_connection"):
301
+ return connection.raw_connection().cursor()
302
+ elif hasattr(connection, "connection"):
303
+ return connection.connection.cursor()
304
+
305
+ raise SQLAlchemyError(
306
+ "Databricks dialect can't obtain a cursor context manager from the dbapi"
307
+ )
308
+
309
+ @reflection.cache
310
+ def get_schema_names(self, connection, **kw):
311
+ # Equivalent to SHOW DATABASES
312
+
313
+ # TODO: replace with call to cursor.schemas() once its performance matches raw SQL
314
+ return [row[0] for row in connection.execute("SHOW SCHEMAS")]
315
+
316
+
317
+ @event.listens_for(Engine, "do_connect")
318
+ def receive_do_connect(dialect, conn_rec, cargs, cparams):
319
+ """Helpful for DS on traffic from clients using SQLAlchemy in particular"""
320
+
321
+ # Ignore connect invocations that don't use our dialect
322
+ if not dialect.name == "databricks":
323
+ return
324
+
325
+ if "_user_agent_entry" in cparams:
326
+ new_user_agent = f"sqlalchemy + {cparams['_user_agent_entry']}"
327
+ else:
328
+ new_user_agent = "sqlalchemy"
329
+
330
+ cparams["_user_agent_entry"] = new_user_agent
331
+
332
+ if sqlalchemy.__version__.startswith("1.3"):
333
+ # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
334
+ # These should be passed in as connect_args when building the Engine
335
+
336
+ if "schema" in cparams:
337
+ dialect.schema = cparams["schema"]
338
+
339
+ if "catalog" in cparams:
340
+ dialect.catalog = cparams["catalog"]
@@ -0,0 +1,17 @@
1
+ import re
2
+ from sqlalchemy.sql import compiler
3
+
4
+
5
+ class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
6
+ # SparkSQL identifier specification:
7
+ # ref: https://spark.apache.org/docs/latest/sql-ref-identifier.html
8
+
9
+ legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
10
+
11
+ def __init__(self, dialect):
12
+ super().__init__(dialect, initial_quote="`")
13
+
14
+
15
+ class DatabricksDDLCompiler(compiler.DDLCompiler):
16
+ def post_create_table(self, table):
17
+ return " USING DELTA"
@@ -0,0 +1,38 @@
1
+ from sqlalchemy.sql import compiler
2
+
3
+
4
+ class DatabricksTypeCompiler(compiler.GenericTypeCompiler):
5
+ """Originally forked from pyhive"""
6
+
7
+ def visit_INTEGER(self, type_):
8
+ return "INT"
9
+
10
+ def visit_NUMERIC(self, type_):
11
+ return "DECIMAL"
12
+
13
+ def visit_CHAR(self, type_):
14
+ return "STRING"
15
+
16
+ def visit_VARCHAR(self, type_):
17
+ return "STRING"
18
+
19
+ def visit_NCHAR(self, type_):
20
+ return "STRING"
21
+
22
+ def visit_TEXT(self, type_):
23
+ return "STRING"
24
+
25
+ def visit_CLOB(self, type_):
26
+ return "STRING"
27
+
28
+ def visit_BLOB(self, type_):
29
+ return "BINARY"
30
+
31
+ def visit_TIME(self, type_):
32
+ return "TIMESTAMP"
33
+
34
+ def visit_DATE(self, type_):
35
+ return "DATE"
36
+
37
+ def visit_DATETIME(self, type_):
38
+ return "TIMESTAMP"