databricks-sqlalchemy 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CHANGELOG.md +2 -2
- databricks/sqlalchemy/__init__.py +4 -1
- databricks/sqlalchemy/_ddl.py +100 -0
- databricks/sqlalchemy/_parse.py +385 -0
- databricks/sqlalchemy/_types.py +323 -0
- databricks/sqlalchemy/base.py +436 -0
- databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
- databricks/sqlalchemy/py.typed +0 -0
- databricks/sqlalchemy/pytest.ini +4 -0
- databricks/sqlalchemy/requirements.py +249 -0
- databricks/sqlalchemy/setup.cfg +4 -0
- databricks/sqlalchemy/test/_extra.py +70 -0
- databricks/sqlalchemy/test/_future.py +331 -0
- databricks/sqlalchemy/test/_regression.py +311 -0
- databricks/sqlalchemy/test/_unsupported.py +450 -0
- databricks/sqlalchemy/test/conftest.py +13 -0
- databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
- databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
- databricks/sqlalchemy/test/test_suite.py +13 -0
- databricks/sqlalchemy/test_local/__init__.py +5 -0
- databricks/sqlalchemy/test_local/conftest.py +44 -0
- databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
- databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
- databricks/sqlalchemy/test_local/test_ddl.py +96 -0
- databricks/sqlalchemy/test_local/test_parsing.py +160 -0
- databricks/sqlalchemy/test_local/test_types.py +161 -0
- {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/METADATA +60 -39
- databricks_sqlalchemy-2.0.0.dist-info/RECORD +31 -0
- databricks/sqlalchemy/dialect/__init__.py +0 -340
- databricks/sqlalchemy/dialect/base.py +0 -17
- databricks/sqlalchemy/dialect/compiler.py +0 -38
- databricks_sqlalchemy-1.0.1.dist-info/RECORD +0 -10
- {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/LICENSE +0 -0
- {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/WHEEL +0 -0
- {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,340 +0,0 @@
|
|
1
|
-
"""This module's layout loosely follows example of SQLAlchemy's postgres dialect
|
2
|
-
"""
|
3
|
-
|
4
|
-
import decimal, re, datetime
|
5
|
-
from dateutil.parser import parse
|
6
|
-
|
7
|
-
import sqlalchemy
|
8
|
-
from sqlalchemy import types, event
|
9
|
-
from sqlalchemy.engine import default, Engine
|
10
|
-
from sqlalchemy.exc import DatabaseError, SQLAlchemyError
|
11
|
-
from sqlalchemy.engine import reflection
|
12
|
-
|
13
|
-
from databricks import sql
|
14
|
-
|
15
|
-
|
16
|
-
from databricks.sqlalchemy.dialect.base import (
|
17
|
-
DatabricksDDLCompiler,
|
18
|
-
DatabricksIdentifierPreparer,
|
19
|
-
)
|
20
|
-
from databricks.sqlalchemy.dialect.compiler import DatabricksTypeCompiler
|
21
|
-
|
22
|
-
try:
|
23
|
-
import alembic
|
24
|
-
except ImportError:
|
25
|
-
pass
|
26
|
-
else:
|
27
|
-
from alembic.ddl import DefaultImpl
|
28
|
-
|
29
|
-
class DatabricksImpl(DefaultImpl):
|
30
|
-
__dialect__ = "databricks"
|
31
|
-
|
32
|
-
|
33
|
-
class DatabricksDecimal(types.TypeDecorator):
|
34
|
-
"""Translates strings to decimals"""
|
35
|
-
|
36
|
-
impl = types.DECIMAL
|
37
|
-
|
38
|
-
def process_result_value(self, value, dialect):
|
39
|
-
if value is not None:
|
40
|
-
return decimal.Decimal(value)
|
41
|
-
else:
|
42
|
-
return None
|
43
|
-
|
44
|
-
|
45
|
-
class DatabricksTimestamp(types.TypeDecorator):
|
46
|
-
"""Translates timestamp strings to datetime objects"""
|
47
|
-
|
48
|
-
impl = types.TIMESTAMP
|
49
|
-
|
50
|
-
def process_result_value(self, value, dialect):
|
51
|
-
return value
|
52
|
-
|
53
|
-
def adapt(self, impltype, **kwargs):
|
54
|
-
return self.impl
|
55
|
-
|
56
|
-
|
57
|
-
class DatabricksDate(types.TypeDecorator):
|
58
|
-
"""Translates date strings to date objects"""
|
59
|
-
|
60
|
-
impl = types.DATE
|
61
|
-
|
62
|
-
def process_result_value(self, value, dialect):
|
63
|
-
return value
|
64
|
-
|
65
|
-
def adapt(self, impltype, **kwargs):
|
66
|
-
return self.impl
|
67
|
-
|
68
|
-
|
69
|
-
class DatabricksDialect(default.DefaultDialect):
|
70
|
-
"""This dialect implements only those methods required to pass our e2e tests"""
|
71
|
-
|
72
|
-
# Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect
|
73
|
-
name: str = "databricks"
|
74
|
-
driver: str = "databricks-sql-python"
|
75
|
-
default_schema_name: str = "default"
|
76
|
-
|
77
|
-
preparer = DatabricksIdentifierPreparer # type: ignore
|
78
|
-
type_compiler = DatabricksTypeCompiler
|
79
|
-
ddl_compiler = DatabricksDDLCompiler
|
80
|
-
supports_statement_cache: bool = True
|
81
|
-
supports_multivalues_insert: bool = True
|
82
|
-
supports_native_decimal: bool = True
|
83
|
-
supports_sane_rowcount: bool = False
|
84
|
-
non_native_boolean_check_constraint: bool = False
|
85
|
-
|
86
|
-
@classmethod
|
87
|
-
def dbapi(cls):
|
88
|
-
return sql
|
89
|
-
|
90
|
-
def create_connect_args(self, url):
|
91
|
-
# TODO: can schema be provided after HOST?
|
92
|
-
# Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/***
|
93
|
-
|
94
|
-
kwargs = {
|
95
|
-
"server_hostname": url.host,
|
96
|
-
"access_token": url.password,
|
97
|
-
"http_path": url.query.get("http_path"),
|
98
|
-
"catalog": url.query.get("catalog"),
|
99
|
-
"schema": url.query.get("schema"),
|
100
|
-
}
|
101
|
-
|
102
|
-
self.schema = kwargs["schema"]
|
103
|
-
self.catalog = kwargs["catalog"]
|
104
|
-
|
105
|
-
return [], kwargs
|
106
|
-
|
107
|
-
def get_columns(self, connection, table_name, schema=None, **kwargs):
|
108
|
-
"""Return information about columns in `table_name`.
|
109
|
-
|
110
|
-
Given a :class:`_engine.Connection`, a string
|
111
|
-
`table_name`, and an optional string `schema`, return column
|
112
|
-
information as a list of dictionaries with these keys:
|
113
|
-
|
114
|
-
name
|
115
|
-
the column's name
|
116
|
-
|
117
|
-
type
|
118
|
-
[sqlalchemy.types#TypeEngine]
|
119
|
-
|
120
|
-
nullable
|
121
|
-
boolean
|
122
|
-
|
123
|
-
default
|
124
|
-
the column's default value
|
125
|
-
|
126
|
-
autoincrement
|
127
|
-
boolean
|
128
|
-
|
129
|
-
sequence
|
130
|
-
a dictionary of the form
|
131
|
-
{'name' : str, 'start' :int, 'increment': int, 'minvalue': int,
|
132
|
-
'maxvalue': int, 'nominvalue': bool, 'nomaxvalue': bool,
|
133
|
-
'cycle': bool, 'cache': int, 'order': bool}
|
134
|
-
|
135
|
-
Additional column attributes may be present.
|
136
|
-
"""
|
137
|
-
|
138
|
-
_type_map = {
|
139
|
-
"boolean": types.Boolean,
|
140
|
-
"smallint": types.SmallInteger,
|
141
|
-
"int": types.Integer,
|
142
|
-
"bigint": types.BigInteger,
|
143
|
-
"float": types.Float,
|
144
|
-
"double": types.Float,
|
145
|
-
"string": types.String,
|
146
|
-
"varchar": types.String,
|
147
|
-
"char": types.String,
|
148
|
-
"binary": types.String,
|
149
|
-
"array": types.String,
|
150
|
-
"map": types.String,
|
151
|
-
"struct": types.String,
|
152
|
-
"uniontype": types.String,
|
153
|
-
"decimal": DatabricksDecimal,
|
154
|
-
"timestamp": DatabricksTimestamp,
|
155
|
-
"date": DatabricksDate,
|
156
|
-
}
|
157
|
-
|
158
|
-
with self.get_connection_cursor(connection) as cur:
|
159
|
-
resp = cur.columns(
|
160
|
-
catalog_name=self.catalog,
|
161
|
-
schema_name=schema or self.schema,
|
162
|
-
table_name=table_name,
|
163
|
-
).fetchall()
|
164
|
-
|
165
|
-
columns = []
|
166
|
-
|
167
|
-
for col in resp:
|
168
|
-
|
169
|
-
# Taken from PyHive. This removes added type info from decimals and maps
|
170
|
-
_col_type = re.search(r"^\w+", col.TYPE_NAME).group(0)
|
171
|
-
this_column = {
|
172
|
-
"name": col.COLUMN_NAME,
|
173
|
-
"type": _type_map[_col_type.lower()],
|
174
|
-
"nullable": bool(col.NULLABLE),
|
175
|
-
"default": col.COLUMN_DEF,
|
176
|
-
"autoincrement": False if col.IS_AUTO_INCREMENT == "NO" else True,
|
177
|
-
}
|
178
|
-
columns.append(this_column)
|
179
|
-
|
180
|
-
return columns
|
181
|
-
|
182
|
-
def get_pk_constraint(self, connection, table_name, schema=None, **kw):
|
183
|
-
"""Return information about the primary key constraint on
|
184
|
-
table_name`.
|
185
|
-
|
186
|
-
Given a :class:`_engine.Connection`, a string
|
187
|
-
`table_name`, and an optional string `schema`, return primary
|
188
|
-
key information as a dictionary with these keys:
|
189
|
-
|
190
|
-
constrained_columns
|
191
|
-
a list of column names that make up the primary key
|
192
|
-
|
193
|
-
name
|
194
|
-
optional name of the primary key constraint.
|
195
|
-
|
196
|
-
"""
|
197
|
-
# TODO: implement this behaviour
|
198
|
-
return {"constrained_columns": []}
|
199
|
-
|
200
|
-
def get_foreign_keys(self, connection, table_name, schema=None, **kw):
|
201
|
-
"""Return information about foreign_keys in `table_name`.
|
202
|
-
|
203
|
-
Given a :class:`_engine.Connection`, a string
|
204
|
-
`table_name`, and an optional string `schema`, return foreign
|
205
|
-
key information as a list of dicts with these keys:
|
206
|
-
|
207
|
-
name
|
208
|
-
the constraint's name
|
209
|
-
|
210
|
-
constrained_columns
|
211
|
-
a list of column names that make up the foreign key
|
212
|
-
|
213
|
-
referred_schema
|
214
|
-
the name of the referred schema
|
215
|
-
|
216
|
-
referred_table
|
217
|
-
the name of the referred table
|
218
|
-
|
219
|
-
referred_columns
|
220
|
-
a list of column names in the referred table that correspond to
|
221
|
-
constrained_columns
|
222
|
-
"""
|
223
|
-
# TODO: Implement this behaviour
|
224
|
-
return []
|
225
|
-
|
226
|
-
def get_indexes(self, connection, table_name, schema=None, **kw):
|
227
|
-
"""Return information about indexes in `table_name`.
|
228
|
-
|
229
|
-
Given a :class:`_engine.Connection`, a string
|
230
|
-
`table_name` and an optional string `schema`, return index
|
231
|
-
information as a list of dictionaries with these keys:
|
232
|
-
|
233
|
-
name
|
234
|
-
the index's name
|
235
|
-
|
236
|
-
column_names
|
237
|
-
list of column names in order
|
238
|
-
|
239
|
-
unique
|
240
|
-
boolean
|
241
|
-
"""
|
242
|
-
# TODO: Implement this behaviour
|
243
|
-
return []
|
244
|
-
|
245
|
-
def get_table_names(self, connection, schema=None, **kwargs):
|
246
|
-
TABLE_NAME = 1
|
247
|
-
with self.get_connection_cursor(connection) as cur:
|
248
|
-
sql_str = "SHOW TABLES FROM {}".format(
|
249
|
-
".".join([self.catalog, schema or self.schema])
|
250
|
-
)
|
251
|
-
data = cur.execute(sql_str).fetchall()
|
252
|
-
_tables = [i[TABLE_NAME] for i in data]
|
253
|
-
|
254
|
-
return _tables
|
255
|
-
|
256
|
-
def get_view_names(self, connection, schema=None, **kwargs):
|
257
|
-
VIEW_NAME = 1
|
258
|
-
with self.get_connection_cursor(connection) as cur:
|
259
|
-
sql_str = "SHOW VIEWS FROM {}".format(
|
260
|
-
".".join([self.catalog, schema or self.schema])
|
261
|
-
)
|
262
|
-
data = cur.execute(sql_str).fetchall()
|
263
|
-
_tables = [i[VIEW_NAME] for i in data]
|
264
|
-
|
265
|
-
return _tables
|
266
|
-
|
267
|
-
def do_rollback(self, dbapi_connection):
|
268
|
-
# Databricks SQL Does not support transactions
|
269
|
-
pass
|
270
|
-
|
271
|
-
def has_table(
|
272
|
-
self, connection, table_name, schema=None, catalog=None, **kwargs
|
273
|
-
) -> bool:
|
274
|
-
"""SQLAlchemy docstrings say dialect providers must implement this method"""
|
275
|
-
|
276
|
-
_schema = schema or self.schema
|
277
|
-
_catalog = catalog or self.catalog
|
278
|
-
|
279
|
-
# DBR >12.x uses underscores in error messages
|
280
|
-
DBR_LTE_12_NOT_FOUND_STRING = "Table or view not found"
|
281
|
-
DBR_GT_12_NOT_FOUND_STRING = "TABLE_OR_VIEW_NOT_FOUND"
|
282
|
-
|
283
|
-
try:
|
284
|
-
res = connection.execute(
|
285
|
-
f"DESCRIBE TABLE {_catalog}.{_schema}.{table_name}"
|
286
|
-
)
|
287
|
-
return True
|
288
|
-
except DatabaseError as e:
|
289
|
-
if DBR_GT_12_NOT_FOUND_STRING in str(
|
290
|
-
e
|
291
|
-
) or DBR_LTE_12_NOT_FOUND_STRING in str(e):
|
292
|
-
return False
|
293
|
-
else:
|
294
|
-
raise e
|
295
|
-
|
296
|
-
def get_connection_cursor(self, connection):
|
297
|
-
"""Added for backwards compatibility with 1.3.x"""
|
298
|
-
if hasattr(connection, "_dbapi_connection"):
|
299
|
-
return connection._dbapi_connection.dbapi_connection.cursor()
|
300
|
-
elif hasattr(connection, "raw_connection"):
|
301
|
-
return connection.raw_connection().cursor()
|
302
|
-
elif hasattr(connection, "connection"):
|
303
|
-
return connection.connection.cursor()
|
304
|
-
|
305
|
-
raise SQLAlchemyError(
|
306
|
-
"Databricks dialect can't obtain a cursor context manager from the dbapi"
|
307
|
-
)
|
308
|
-
|
309
|
-
@reflection.cache
|
310
|
-
def get_schema_names(self, connection, **kw):
|
311
|
-
# Equivalent to SHOW DATABASES
|
312
|
-
|
313
|
-
# TODO: replace with call to cursor.schemas() once its performance matches raw SQL
|
314
|
-
return [row[0] for row in connection.execute("SHOW SCHEMAS")]
|
315
|
-
|
316
|
-
|
317
|
-
@event.listens_for(Engine, "do_connect")
|
318
|
-
def receive_do_connect(dialect, conn_rec, cargs, cparams):
|
319
|
-
"""Helpful for DS on traffic from clients using SQLAlchemy in particular"""
|
320
|
-
|
321
|
-
# Ignore connect invocations that don't use our dialect
|
322
|
-
if not dialect.name == "databricks":
|
323
|
-
return
|
324
|
-
|
325
|
-
if "_user_agent_entry" in cparams:
|
326
|
-
new_user_agent = f"sqlalchemy + {cparams['_user_agent_entry']}"
|
327
|
-
else:
|
328
|
-
new_user_agent = "sqlalchemy"
|
329
|
-
|
330
|
-
cparams["_user_agent_entry"] = new_user_agent
|
331
|
-
|
332
|
-
if sqlalchemy.__version__.startswith("1.3"):
|
333
|
-
# SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
|
334
|
-
# These should be passed in as connect_args when building the Engine
|
335
|
-
|
336
|
-
if "schema" in cparams:
|
337
|
-
dialect.schema = cparams["schema"]
|
338
|
-
|
339
|
-
if "catalog" in cparams:
|
340
|
-
dialect.catalog = cparams["catalog"]
|
@@ -1,17 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from sqlalchemy.sql import compiler
|
3
|
-
|
4
|
-
|
5
|
-
class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
|
6
|
-
# SparkSQL identifier specification:
|
7
|
-
# ref: https://spark.apache.org/docs/latest/sql-ref-identifier.html
|
8
|
-
|
9
|
-
legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
|
10
|
-
|
11
|
-
def __init__(self, dialect):
|
12
|
-
super().__init__(dialect, initial_quote="`")
|
13
|
-
|
14
|
-
|
15
|
-
class DatabricksDDLCompiler(compiler.DDLCompiler):
|
16
|
-
def post_create_table(self, table):
|
17
|
-
return " USING DELTA"
|
@@ -1,38 +0,0 @@
|
|
1
|
-
from sqlalchemy.sql import compiler
|
2
|
-
|
3
|
-
|
4
|
-
class DatabricksTypeCompiler(compiler.GenericTypeCompiler):
|
5
|
-
"""Originally forked from pyhive"""
|
6
|
-
|
7
|
-
def visit_INTEGER(self, type_):
|
8
|
-
return "INT"
|
9
|
-
|
10
|
-
def visit_NUMERIC(self, type_):
|
11
|
-
return "DECIMAL"
|
12
|
-
|
13
|
-
def visit_CHAR(self, type_):
|
14
|
-
return "STRING"
|
15
|
-
|
16
|
-
def visit_VARCHAR(self, type_):
|
17
|
-
return "STRING"
|
18
|
-
|
19
|
-
def visit_NCHAR(self, type_):
|
20
|
-
return "STRING"
|
21
|
-
|
22
|
-
def visit_TEXT(self, type_):
|
23
|
-
return "STRING"
|
24
|
-
|
25
|
-
def visit_CLOB(self, type_):
|
26
|
-
return "STRING"
|
27
|
-
|
28
|
-
def visit_BLOB(self, type_):
|
29
|
-
return "BINARY"
|
30
|
-
|
31
|
-
def visit_TIME(self, type_):
|
32
|
-
return "TIMESTAMP"
|
33
|
-
|
34
|
-
def visit_DATE(self, type_):
|
35
|
-
return "DATE"
|
36
|
-
|
37
|
-
def visit_DATETIME(self, type_):
|
38
|
-
return "TIMESTAMP"
|
@@ -1,10 +0,0 @@
|
|
1
|
-
CHANGELOG.md,sha256=JU6ETCTYFt7p3CJ6XtKbu-fBBgvyfNn6MQnRfnjG7oY,163
|
2
|
-
databricks/sqlalchemy/__init__.py,sha256=vZg5CR1laCr50IFcOkzmp9-ysH83iTg81ygQcsPFTk8,60
|
3
|
-
databricks/sqlalchemy/dialect/__init__.py,sha256=hPPl180-V_xexLWhtwknNnhHboOYd2wXHelvmtk0E7c,10745
|
4
|
-
databricks/sqlalchemy/dialect/base.py,sha256=FBibGU9FV_UGlIpF8wyARhV0ImahIqsPELqvrxm_8Rk,494
|
5
|
-
databricks/sqlalchemy/dialect/compiler.py,sha256=P__ihEonyOJYotsVpirjbHf-lYBqBLLK-cM5LZdOSUo,792
|
6
|
-
databricks_sqlalchemy-1.0.1.dist-info/LICENSE,sha256=WgVm2VpfZ3CsUfPndD2NeCrEIcFA4UB-YnnW4ejxcbE,11346
|
7
|
-
databricks_sqlalchemy-1.0.1.dist-info/METADATA,sha256=o9xFiC352kC85oSTx5-5hjYMjXscGRvY2sK7_bkzqAY,11074
|
8
|
-
databricks_sqlalchemy-1.0.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
9
|
-
databricks_sqlalchemy-1.0.1.dist-info/entry_points.txt,sha256=AAjpsvZbVcoMAcWLIesoAT5FNZhBEcIhxdKknVua3jw,74
|
10
|
-
databricks_sqlalchemy-1.0.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/entry_points.txt
RENAMED
File without changes
|