databricks-sqlalchemy 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. CHANGELOG.md +2 -2
  2. databricks/sqlalchemy/__init__.py +4 -1
  3. databricks/sqlalchemy/_ddl.py +100 -0
  4. databricks/sqlalchemy/_parse.py +385 -0
  5. databricks/sqlalchemy/_types.py +323 -0
  6. databricks/sqlalchemy/base.py +436 -0
  7. databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
  8. databricks/sqlalchemy/py.typed +0 -0
  9. databricks/sqlalchemy/pytest.ini +4 -0
  10. databricks/sqlalchemy/requirements.py +249 -0
  11. databricks/sqlalchemy/setup.cfg +4 -0
  12. databricks/sqlalchemy/test/_extra.py +70 -0
  13. databricks/sqlalchemy/test/_future.py +331 -0
  14. databricks/sqlalchemy/test/_regression.py +311 -0
  15. databricks/sqlalchemy/test/_unsupported.py +450 -0
  16. databricks/sqlalchemy/test/conftest.py +13 -0
  17. databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
  18. databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
  19. databricks/sqlalchemy/test/test_suite.py +13 -0
  20. databricks/sqlalchemy/test_local/__init__.py +5 -0
  21. databricks/sqlalchemy/test_local/conftest.py +44 -0
  22. databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  23. databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
  24. databricks/sqlalchemy/test_local/test_ddl.py +96 -0
  25. databricks/sqlalchemy/test_local/test_parsing.py +160 -0
  26. databricks/sqlalchemy/test_local/test_types.py +161 -0
  27. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/METADATA +60 -39
  28. databricks_sqlalchemy-2.0.0.dist-info/RECORD +31 -0
  29. databricks/sqlalchemy/dialect/__init__.py +0 -340
  30. databricks/sqlalchemy/dialect/base.py +0 -17
  31. databricks/sqlalchemy/dialect/compiler.py +0 -38
  32. databricks_sqlalchemy-1.0.1.dist-info/RECORD +0 -10
  33. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/LICENSE +0 -0
  34. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/WHEEL +0 -0
  35. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,340 +0,0 @@
1
- """This module's layout loosely follows example of SQLAlchemy's postgres dialect
2
- """
3
-
4
- import decimal, re, datetime
5
- from dateutil.parser import parse
6
-
7
- import sqlalchemy
8
- from sqlalchemy import types, event
9
- from sqlalchemy.engine import default, Engine
10
- from sqlalchemy.exc import DatabaseError, SQLAlchemyError
11
- from sqlalchemy.engine import reflection
12
-
13
- from databricks import sql
14
-
15
-
16
- from databricks.sqlalchemy.dialect.base import (
17
- DatabricksDDLCompiler,
18
- DatabricksIdentifierPreparer,
19
- )
20
- from databricks.sqlalchemy.dialect.compiler import DatabricksTypeCompiler
21
-
22
- try:
23
- import alembic
24
- except ImportError:
25
- pass
26
- else:
27
- from alembic.ddl import DefaultImpl
28
-
29
- class DatabricksImpl(DefaultImpl):
30
- __dialect__ = "databricks"
31
-
32
-
33
- class DatabricksDecimal(types.TypeDecorator):
34
- """Translates strings to decimals"""
35
-
36
- impl = types.DECIMAL
37
-
38
- def process_result_value(self, value, dialect):
39
- if value is not None:
40
- return decimal.Decimal(value)
41
- else:
42
- return None
43
-
44
-
45
- class DatabricksTimestamp(types.TypeDecorator):
46
- """Translates timestamp strings to datetime objects"""
47
-
48
- impl = types.TIMESTAMP
49
-
50
- def process_result_value(self, value, dialect):
51
- return value
52
-
53
- def adapt(self, impltype, **kwargs):
54
- return self.impl
55
-
56
-
57
- class DatabricksDate(types.TypeDecorator):
58
- """Translates date strings to date objects"""
59
-
60
- impl = types.DATE
61
-
62
- def process_result_value(self, value, dialect):
63
- return value
64
-
65
- def adapt(self, impltype, **kwargs):
66
- return self.impl
67
-
68
-
69
- class DatabricksDialect(default.DefaultDialect):
70
- """This dialect implements only those methods required to pass our e2e tests"""
71
-
72
- # Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect
73
- name: str = "databricks"
74
- driver: str = "databricks-sql-python"
75
- default_schema_name: str = "default"
76
-
77
- preparer = DatabricksIdentifierPreparer # type: ignore
78
- type_compiler = DatabricksTypeCompiler
79
- ddl_compiler = DatabricksDDLCompiler
80
- supports_statement_cache: bool = True
81
- supports_multivalues_insert: bool = True
82
- supports_native_decimal: bool = True
83
- supports_sane_rowcount: bool = False
84
- non_native_boolean_check_constraint: bool = False
85
-
86
- @classmethod
87
- def dbapi(cls):
88
- return sql
89
-
90
- def create_connect_args(self, url):
91
- # TODO: can schema be provided after HOST?
92
- # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/***
93
-
94
- kwargs = {
95
- "server_hostname": url.host,
96
- "access_token": url.password,
97
- "http_path": url.query.get("http_path"),
98
- "catalog": url.query.get("catalog"),
99
- "schema": url.query.get("schema"),
100
- }
101
-
102
- self.schema = kwargs["schema"]
103
- self.catalog = kwargs["catalog"]
104
-
105
- return [], kwargs
106
-
107
- def get_columns(self, connection, table_name, schema=None, **kwargs):
108
- """Return information about columns in `table_name`.
109
-
110
- Given a :class:`_engine.Connection`, a string
111
- `table_name`, and an optional string `schema`, return column
112
- information as a list of dictionaries with these keys:
113
-
114
- name
115
- the column's name
116
-
117
- type
118
- [sqlalchemy.types#TypeEngine]
119
-
120
- nullable
121
- boolean
122
-
123
- default
124
- the column's default value
125
-
126
- autoincrement
127
- boolean
128
-
129
- sequence
130
- a dictionary of the form
131
- {'name' : str, 'start' :int, 'increment': int, 'minvalue': int,
132
- 'maxvalue': int, 'nominvalue': bool, 'nomaxvalue': bool,
133
- 'cycle': bool, 'cache': int, 'order': bool}
134
-
135
- Additional column attributes may be present.
136
- """
137
-
138
- _type_map = {
139
- "boolean": types.Boolean,
140
- "smallint": types.SmallInteger,
141
- "int": types.Integer,
142
- "bigint": types.BigInteger,
143
- "float": types.Float,
144
- "double": types.Float,
145
- "string": types.String,
146
- "varchar": types.String,
147
- "char": types.String,
148
- "binary": types.String,
149
- "array": types.String,
150
- "map": types.String,
151
- "struct": types.String,
152
- "uniontype": types.String,
153
- "decimal": DatabricksDecimal,
154
- "timestamp": DatabricksTimestamp,
155
- "date": DatabricksDate,
156
- }
157
-
158
- with self.get_connection_cursor(connection) as cur:
159
- resp = cur.columns(
160
- catalog_name=self.catalog,
161
- schema_name=schema or self.schema,
162
- table_name=table_name,
163
- ).fetchall()
164
-
165
- columns = []
166
-
167
- for col in resp:
168
-
169
- # Taken from PyHive. This removes added type info from decimals and maps
170
- _col_type = re.search(r"^\w+", col.TYPE_NAME).group(0)
171
- this_column = {
172
- "name": col.COLUMN_NAME,
173
- "type": _type_map[_col_type.lower()],
174
- "nullable": bool(col.NULLABLE),
175
- "default": col.COLUMN_DEF,
176
- "autoincrement": False if col.IS_AUTO_INCREMENT == "NO" else True,
177
- }
178
- columns.append(this_column)
179
-
180
- return columns
181
-
182
- def get_pk_constraint(self, connection, table_name, schema=None, **kw):
183
- """Return information about the primary key constraint on
184
- table_name`.
185
-
186
- Given a :class:`_engine.Connection`, a string
187
- `table_name`, and an optional string `schema`, return primary
188
- key information as a dictionary with these keys:
189
-
190
- constrained_columns
191
- a list of column names that make up the primary key
192
-
193
- name
194
- optional name of the primary key constraint.
195
-
196
- """
197
- # TODO: implement this behaviour
198
- return {"constrained_columns": []}
199
-
200
- def get_foreign_keys(self, connection, table_name, schema=None, **kw):
201
- """Return information about foreign_keys in `table_name`.
202
-
203
- Given a :class:`_engine.Connection`, a string
204
- `table_name`, and an optional string `schema`, return foreign
205
- key information as a list of dicts with these keys:
206
-
207
- name
208
- the constraint's name
209
-
210
- constrained_columns
211
- a list of column names that make up the foreign key
212
-
213
- referred_schema
214
- the name of the referred schema
215
-
216
- referred_table
217
- the name of the referred table
218
-
219
- referred_columns
220
- a list of column names in the referred table that correspond to
221
- constrained_columns
222
- """
223
- # TODO: Implement this behaviour
224
- return []
225
-
226
- def get_indexes(self, connection, table_name, schema=None, **kw):
227
- """Return information about indexes in `table_name`.
228
-
229
- Given a :class:`_engine.Connection`, a string
230
- `table_name` and an optional string `schema`, return index
231
- information as a list of dictionaries with these keys:
232
-
233
- name
234
- the index's name
235
-
236
- column_names
237
- list of column names in order
238
-
239
- unique
240
- boolean
241
- """
242
- # TODO: Implement this behaviour
243
- return []
244
-
245
- def get_table_names(self, connection, schema=None, **kwargs):
246
- TABLE_NAME = 1
247
- with self.get_connection_cursor(connection) as cur:
248
- sql_str = "SHOW TABLES FROM {}".format(
249
- ".".join([self.catalog, schema or self.schema])
250
- )
251
- data = cur.execute(sql_str).fetchall()
252
- _tables = [i[TABLE_NAME] for i in data]
253
-
254
- return _tables
255
-
256
- def get_view_names(self, connection, schema=None, **kwargs):
257
- VIEW_NAME = 1
258
- with self.get_connection_cursor(connection) as cur:
259
- sql_str = "SHOW VIEWS FROM {}".format(
260
- ".".join([self.catalog, schema or self.schema])
261
- )
262
- data = cur.execute(sql_str).fetchall()
263
- _tables = [i[VIEW_NAME] for i in data]
264
-
265
- return _tables
266
-
267
- def do_rollback(self, dbapi_connection):
268
- # Databricks SQL Does not support transactions
269
- pass
270
-
271
- def has_table(
272
- self, connection, table_name, schema=None, catalog=None, **kwargs
273
- ) -> bool:
274
- """SQLAlchemy docstrings say dialect providers must implement this method"""
275
-
276
- _schema = schema or self.schema
277
- _catalog = catalog or self.catalog
278
-
279
- # DBR >12.x uses underscores in error messages
280
- DBR_LTE_12_NOT_FOUND_STRING = "Table or view not found"
281
- DBR_GT_12_NOT_FOUND_STRING = "TABLE_OR_VIEW_NOT_FOUND"
282
-
283
- try:
284
- res = connection.execute(
285
- f"DESCRIBE TABLE {_catalog}.{_schema}.{table_name}"
286
- )
287
- return True
288
- except DatabaseError as e:
289
- if DBR_GT_12_NOT_FOUND_STRING in str(
290
- e
291
- ) or DBR_LTE_12_NOT_FOUND_STRING in str(e):
292
- return False
293
- else:
294
- raise e
295
-
296
- def get_connection_cursor(self, connection):
297
- """Added for backwards compatibility with 1.3.x"""
298
- if hasattr(connection, "_dbapi_connection"):
299
- return connection._dbapi_connection.dbapi_connection.cursor()
300
- elif hasattr(connection, "raw_connection"):
301
- return connection.raw_connection().cursor()
302
- elif hasattr(connection, "connection"):
303
- return connection.connection.cursor()
304
-
305
- raise SQLAlchemyError(
306
- "Databricks dialect can't obtain a cursor context manager from the dbapi"
307
- )
308
-
309
- @reflection.cache
310
- def get_schema_names(self, connection, **kw):
311
- # Equivalent to SHOW DATABASES
312
-
313
- # TODO: replace with call to cursor.schemas() once its performance matches raw SQL
314
- return [row[0] for row in connection.execute("SHOW SCHEMAS")]
315
-
316
-
317
- @event.listens_for(Engine, "do_connect")
318
- def receive_do_connect(dialect, conn_rec, cargs, cparams):
319
- """Helpful for DS on traffic from clients using SQLAlchemy in particular"""
320
-
321
- # Ignore connect invocations that don't use our dialect
322
- if not dialect.name == "databricks":
323
- return
324
-
325
- if "_user_agent_entry" in cparams:
326
- new_user_agent = f"sqlalchemy + {cparams['_user_agent_entry']}"
327
- else:
328
- new_user_agent = "sqlalchemy"
329
-
330
- cparams["_user_agent_entry"] = new_user_agent
331
-
332
- if sqlalchemy.__version__.startswith("1.3"):
333
- # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
334
- # These should be passed in as connect_args when building the Engine
335
-
336
- if "schema" in cparams:
337
- dialect.schema = cparams["schema"]
338
-
339
- if "catalog" in cparams:
340
- dialect.catalog = cparams["catalog"]
@@ -1,17 +0,0 @@
1
- import re
2
- from sqlalchemy.sql import compiler
3
-
4
-
5
- class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
6
- # SparkSQL identifier specification:
7
- # ref: https://spark.apache.org/docs/latest/sql-ref-identifier.html
8
-
9
- legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
10
-
11
- def __init__(self, dialect):
12
- super().__init__(dialect, initial_quote="`")
13
-
14
-
15
- class DatabricksDDLCompiler(compiler.DDLCompiler):
16
- def post_create_table(self, table):
17
- return " USING DELTA"
@@ -1,38 +0,0 @@
1
- from sqlalchemy.sql import compiler
2
-
3
-
4
- class DatabricksTypeCompiler(compiler.GenericTypeCompiler):
5
- """Originally forked from pyhive"""
6
-
7
- def visit_INTEGER(self, type_):
8
- return "INT"
9
-
10
- def visit_NUMERIC(self, type_):
11
- return "DECIMAL"
12
-
13
- def visit_CHAR(self, type_):
14
- return "STRING"
15
-
16
- def visit_VARCHAR(self, type_):
17
- return "STRING"
18
-
19
- def visit_NCHAR(self, type_):
20
- return "STRING"
21
-
22
- def visit_TEXT(self, type_):
23
- return "STRING"
24
-
25
- def visit_CLOB(self, type_):
26
- return "STRING"
27
-
28
- def visit_BLOB(self, type_):
29
- return "BINARY"
30
-
31
- def visit_TIME(self, type_):
32
- return "TIMESTAMP"
33
-
34
- def visit_DATE(self, type_):
35
- return "DATE"
36
-
37
- def visit_DATETIME(self, type_):
38
- return "TIMESTAMP"
@@ -1,10 +0,0 @@
1
- CHANGELOG.md,sha256=JU6ETCTYFt7p3CJ6XtKbu-fBBgvyfNn6MQnRfnjG7oY,163
2
- databricks/sqlalchemy/__init__.py,sha256=vZg5CR1laCr50IFcOkzmp9-ysH83iTg81ygQcsPFTk8,60
3
- databricks/sqlalchemy/dialect/__init__.py,sha256=hPPl180-V_xexLWhtwknNnhHboOYd2wXHelvmtk0E7c,10745
4
- databricks/sqlalchemy/dialect/base.py,sha256=FBibGU9FV_UGlIpF8wyARhV0ImahIqsPELqvrxm_8Rk,494
5
- databricks/sqlalchemy/dialect/compiler.py,sha256=P__ihEonyOJYotsVpirjbHf-lYBqBLLK-cM5LZdOSUo,792
6
- databricks_sqlalchemy-1.0.1.dist-info/LICENSE,sha256=WgVm2VpfZ3CsUfPndD2NeCrEIcFA4UB-YnnW4ejxcbE,11346
7
- databricks_sqlalchemy-1.0.1.dist-info/METADATA,sha256=o9xFiC352kC85oSTx5-5hjYMjXscGRvY2sK7_bkzqAY,11074
8
- databricks_sqlalchemy-1.0.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
- databricks_sqlalchemy-1.0.1.dist-info/entry_points.txt,sha256=AAjpsvZbVcoMAcWLIesoAT5FNZhBEcIhxdKknVua3jw,74
10
- databricks_sqlalchemy-1.0.1.dist-info/RECORD,,