databricks-sqlalchemy 0.0.1b1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. CHANGELOG.md +274 -0
  2. databricks/sqlalchemy/__init__.py +4 -2
  3. databricks/sqlalchemy/_ddl.py +100 -0
  4. databricks/sqlalchemy/_parse.py +385 -0
  5. databricks/sqlalchemy/_types.py +323 -0
  6. databricks/sqlalchemy/base.py +436 -0
  7. databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
  8. databricks/sqlalchemy/py.typed +0 -0
  9. databricks/sqlalchemy/pytest.ini +4 -0
  10. databricks/sqlalchemy/requirements.py +249 -0
  11. databricks/sqlalchemy/setup.cfg +4 -0
  12. databricks/sqlalchemy/test/_extra.py +70 -0
  13. databricks/sqlalchemy/test/_future.py +331 -0
  14. databricks/sqlalchemy/test/_regression.py +311 -0
  15. databricks/sqlalchemy/test/_unsupported.py +450 -0
  16. databricks/sqlalchemy/test/conftest.py +13 -0
  17. databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
  18. databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
  19. databricks/sqlalchemy/test/test_suite.py +13 -0
  20. databricks/sqlalchemy/test_local/__init__.py +5 -0
  21. databricks/sqlalchemy/test_local/conftest.py +44 -0
  22. databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  23. databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
  24. databricks/sqlalchemy/test_local/test_ddl.py +96 -0
  25. databricks/sqlalchemy/test_local/test_parsing.py +160 -0
  26. databricks/sqlalchemy/test_local/test_types.py +161 -0
  27. databricks_sqlalchemy-1.0.0.dist-info/LICENSE +201 -0
  28. databricks_sqlalchemy-1.0.0.dist-info/METADATA +225 -0
  29. databricks_sqlalchemy-1.0.0.dist-info/RECORD +31 -0
  30. {databricks_sqlalchemy-0.0.1b1.dist-info → databricks_sqlalchemy-1.0.0.dist-info}/WHEEL +1 -1
  31. databricks_sqlalchemy-1.0.0.dist-info/entry_points.txt +3 -0
  32. databricks/__init__.py +0 -7
  33. databricks_sqlalchemy-0.0.1b1.dist-info/METADATA +0 -19
  34. databricks_sqlalchemy-0.0.1b1.dist-info/RECORD +0 -5
@@ -0,0 +1,436 @@
1
+ from typing import Any, List, Optional, Dict, Union
2
+
3
+ import databricks.sqlalchemy._ddl as dialect_ddl_impl
4
+ import databricks.sqlalchemy._types as dialect_type_impl
5
+ from databricks import sql
6
+ from databricks.sqlalchemy._parse import (
7
+ _describe_table_extended_result_to_dict_list,
8
+ _match_table_not_found_string,
9
+ build_fk_dict,
10
+ build_pk_dict,
11
+ get_fk_strings_from_dte_output,
12
+ get_pk_strings_from_dte_output,
13
+ get_comment_from_dte_output,
14
+ parse_column_info_from_tgetcolumnsresponse,
15
+ )
16
+
17
+ import sqlalchemy
18
+ from sqlalchemy import DDL, event
19
+ from sqlalchemy.engine import Connection, Engine, default, reflection
20
+ from sqlalchemy.engine.interfaces import (
21
+ ReflectedForeignKeyConstraint,
22
+ ReflectedPrimaryKeyConstraint,
23
+ ReflectedColumn,
24
+ ReflectedTableComment,
25
+ )
26
+ from sqlalchemy.engine.reflection import ReflectionDefaults
27
+ from sqlalchemy.exc import DatabaseError, SQLAlchemyError
28
+
29
+ try:
30
+ import alembic
31
+ except ImportError:
32
+ pass
33
+ else:
34
+ from alembic.ddl import DefaultImpl
35
+
36
+ class DatabricksImpl(DefaultImpl):
37
+ __dialect__ = "databricks"
38
+
39
+
40
+ import logging
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ class DatabricksDialect(default.DefaultDialect):
46
+ """This dialect implements only those methods required to pass our e2e tests"""
47
+
48
+ # See sqlalchemy.engine.interfaces for descriptions of each of these properties
49
+ name: str = "databricks"
50
+ driver: str = "databricks"
51
+ default_schema_name: str = "default"
52
+ preparer = dialect_ddl_impl.DatabricksIdentifierPreparer # type: ignore
53
+ ddl_compiler = dialect_ddl_impl.DatabricksDDLCompiler
54
+ statement_compiler = dialect_ddl_impl.DatabricksStatementCompiler
55
+ supports_statement_cache: bool = True
56
+ supports_multivalues_insert: bool = True
57
+ supports_native_decimal: bool = True
58
+ supports_sane_rowcount: bool = False
59
+ non_native_boolean_check_constraint: bool = False
60
+ supports_identity_columns: bool = True
61
+ supports_schemas: bool = True
62
+ default_paramstyle: str = "named"
63
+ div_is_floordiv: bool = False
64
+ supports_default_values: bool = False
65
+ supports_server_side_cursors: bool = False
66
+ supports_sequences: bool = False
67
+ supports_native_boolean: bool = True
68
+
69
+ colspecs = {
70
+ sqlalchemy.types.DateTime: dialect_type_impl.TIMESTAMP_NTZ,
71
+ sqlalchemy.types.Time: dialect_type_impl.DatabricksTimeType,
72
+ sqlalchemy.types.String: dialect_type_impl.DatabricksStringType,
73
+ }
74
+
75
+ # SQLAlchemy requires that a table with no primary key
76
+ # constraint return a dictionary that looks like this.
77
+ EMPTY_PK: Dict[str, Any] = {"constrained_columns": [], "name": None}
78
+
79
+ # SQLAlchemy requires that a table with no foreign keys
80
+ # defined return an empty list. Same for indexes.
81
+ EMPTY_FK: List
82
+ EMPTY_INDEX: List
83
+ EMPTY_FK = EMPTY_INDEX = []
84
+
85
+ @classmethod
86
+ def import_dbapi(cls):
87
+ return sql
88
+
89
+ def _force_paramstyle_to_native_mode(self):
90
+ """This method can be removed after databricks-sql-connector wholly switches to NATIVE ParamApproach.
91
+
92
+ This is a hack to trick SQLAlchemy into using a different paramstyle
93
+ than the one declared by this module in src/databricks/sql/__init__.py
94
+
95
+ This method is called _after_ the dialect has been initialised, which is important because otherwise
96
+ our users would need to include a `paramstyle` argument in their SQLAlchemy connection string.
97
+
98
+ This dialect is written to support NATIVE queries. Although the INLINE approach can technically work,
99
+ the same behaviour can be achieved within SQLAlchemy itself using its literal_processor methods.
100
+ """
101
+
102
+ self.paramstyle = self.default_paramstyle
103
+
104
+ def create_connect_args(self, url):
105
+ # TODO: can schema be provided after HOST?
106
+ # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/***
107
+
108
+ kwargs = {
109
+ "server_hostname": url.host,
110
+ "access_token": url.password,
111
+ "http_path": url.query.get("http_path"),
112
+ "catalog": url.query.get("catalog"),
113
+ "schema": url.query.get("schema"),
114
+ "use_inline_params": False,
115
+ }
116
+
117
+ self.schema = kwargs["schema"]
118
+ self.catalog = kwargs["catalog"]
119
+
120
+ self._force_paramstyle_to_native_mode()
121
+
122
+ return [], kwargs
123
+
124
+ def get_columns(
125
+ self, connection, table_name, schema=None, **kwargs
126
+ ) -> List[ReflectedColumn]:
127
+ """Return information about columns in `table_name`."""
128
+
129
+ with self.get_connection_cursor(connection) as cur:
130
+ resp = cur.columns(
131
+ catalog_name=self.catalog,
132
+ schema_name=schema or self.schema,
133
+ table_name=table_name,
134
+ ).fetchall()
135
+
136
+ if not resp:
137
+ # TGetColumnsRequest will not raise an exception if passed a table that doesn't exist
138
+ # But Databricks supports tables with no columns. So if the result is an empty list,
139
+ # we need to check if the table exists (and raise an exception if not) or simply return
140
+ # an empty list.
141
+ self._describe_table_extended(
142
+ connection,
143
+ table_name,
144
+ self.catalog,
145
+ schema or self.schema,
146
+ expect_result=False,
147
+ )
148
+ return resp
149
+ columns = []
150
+ for col in resp:
151
+ row_dict = parse_column_info_from_tgetcolumnsresponse(col)
152
+ columns.append(row_dict)
153
+
154
+ return columns
155
+
156
+ def _describe_table_extended(
157
+ self,
158
+ connection: Connection,
159
+ table_name: str,
160
+ catalog_name: Optional[str] = None,
161
+ schema_name: Optional[str] = None,
162
+ expect_result=True,
163
+ ) -> Union[List[Dict[str, str]], None]:
164
+ """Run DESCRIBE TABLE EXTENDED on a table and return a list of dictionaries of the result.
165
+
166
+ This method is the fastest way to check for the presence of a table in a schema.
167
+
168
+ If expect_result is False, this method returns None as the output dict isn't required.
169
+
170
+ Raises NoSuchTableError if the table is not present in the schema.
171
+ """
172
+
173
+ _target_catalog = catalog_name or self.catalog
174
+ _target_schema = schema_name or self.schema
175
+ _target = f"`{_target_catalog}`.`{_target_schema}`.`{table_name}`"
176
+
177
+ # sql injection risk?
178
+ # DESCRIBE TABLE EXTENDED in DBR doesn't support parameterised inputs :(
179
+ stmt = DDL(f"DESCRIBE TABLE EXTENDED {_target}")
180
+
181
+ try:
182
+ result = connection.execute(stmt)
183
+ except DatabaseError as e:
184
+ if _match_table_not_found_string(str(e)):
185
+ raise sqlalchemy.exc.NoSuchTableError(
186
+ f"No such table {table_name}"
187
+ ) from e
188
+ raise e
189
+
190
+ if not expect_result:
191
+ return None
192
+
193
+ fmt_result = _describe_table_extended_result_to_dict_list(result)
194
+ return fmt_result
195
+
196
+ @reflection.cache
197
+ def get_pk_constraint(
198
+ self,
199
+ connection,
200
+ table_name: str,
201
+ schema: Optional[str] = None,
202
+ **kw: Any,
203
+ ) -> ReflectedPrimaryKeyConstraint:
204
+ """Fetch information about the primary key constraint on table_name.
205
+
206
+ Returns a dictionary with these keys:
207
+ constrained_columns
208
+ a list of column names that make up the primary key. Results is an empty list
209
+ if no PRIMARY KEY is defined.
210
+
211
+ name
212
+ the name of the primary key constraint
213
+ """
214
+
215
+ result = self._describe_table_extended(
216
+ connection=connection,
217
+ table_name=table_name,
218
+ schema_name=schema,
219
+ )
220
+
221
+ # Type ignore is because mypy knows that self._describe_table_extended *can*
222
+ # return None (even though it never will since expect_result defaults to True)
223
+ raw_pk_constraints: List = get_pk_strings_from_dte_output(result) # type: ignore
224
+ if not any(raw_pk_constraints):
225
+ return self.EMPTY_PK # type: ignore
226
+
227
+ if len(raw_pk_constraints) > 1:
228
+ logger.warning(
229
+ "Found more than one primary key constraint in DESCRIBE TABLE EXTENDED output. "
230
+ "This is unexpected. Please report this as a bug. "
231
+ "Only the first primary key constraint will be returned."
232
+ )
233
+
234
+ first_pk_constraint = raw_pk_constraints[0]
235
+ pk_name = first_pk_constraint.get("col_name")
236
+ pk_constraint_string = first_pk_constraint.get("data_type")
237
+
238
+ # TODO: figure out how to return sqlalchemy.interfaces in a way that mypy respects
239
+ return build_pk_dict(pk_name, pk_constraint_string) # type: ignore
240
+
241
+ def get_foreign_keys(
242
+ self, connection, table_name, schema=None, **kw
243
+ ) -> List[ReflectedForeignKeyConstraint]:
244
+ """Return information about foreign_keys in `table_name`."""
245
+
246
+ result = self._describe_table_extended(
247
+ connection=connection,
248
+ table_name=table_name,
249
+ schema_name=schema,
250
+ )
251
+
252
+ # Type ignore is because mypy knows that self._describe_table_extended *can*
253
+ # return None (even though it never will since expect_result defaults to True)
254
+ raw_fk_constraints: List = get_fk_strings_from_dte_output(result) # type: ignore
255
+
256
+ if not any(raw_fk_constraints):
257
+ return self.EMPTY_FK
258
+
259
+ fk_constraints = []
260
+ for constraint_dict in raw_fk_constraints:
261
+ fk_name = constraint_dict.get("col_name")
262
+ fk_constraint_string = constraint_dict.get("data_type")
263
+ this_constraint_dict = build_fk_dict(
264
+ fk_name, fk_constraint_string, schema_name=schema
265
+ )
266
+ fk_constraints.append(this_constraint_dict)
267
+
268
+ # TODO: figure out how to return sqlalchemy.interfaces in a way that mypy respects
269
+ return fk_constraints # type: ignore
270
+
271
+ def get_indexes(self, connection, table_name, schema=None, **kw):
272
+ """SQLAlchemy requires this method. Databricks doesn't support indexes."""
273
+ return self.EMPTY_INDEX
274
+
275
+ @reflection.cache
276
+ def get_table_names(self, connection: Connection, schema=None, **kwargs):
277
+ """Return a list of tables in the current schema."""
278
+
279
+ _target_catalog = self.catalog
280
+ _target_schema = schema or self.schema
281
+ _target = f"`{_target_catalog}`.`{_target_schema}`"
282
+
283
+ stmt = DDL(f"SHOW TABLES FROM {_target}")
284
+
285
+ tables_result = connection.execute(stmt).all()
286
+ views_result = self.get_view_names(connection=connection, schema=schema)
287
+
288
+ # In Databricks, SHOW TABLES FROM <schema> returns both tables and views.
289
+ # Potential optimisation: rewrite this to instead query information_schema
290
+ tables_minus_views = [
291
+ row.tableName for row in tables_result if row.tableName not in views_result
292
+ ]
293
+
294
+ return tables_minus_views
295
+
296
+ @reflection.cache
297
+ def get_view_names(
298
+ self,
299
+ connection,
300
+ schema=None,
301
+ only_materialized=False,
302
+ only_temp=False,
303
+ **kwargs,
304
+ ) -> List[str]:
305
+ """Returns a list of string view names contained in the schema, if any."""
306
+
307
+ _target_catalog = self.catalog
308
+ _target_schema = schema or self.schema
309
+ _target = f"`{_target_catalog}`.`{_target_schema}`"
310
+
311
+ stmt = DDL(f"SHOW VIEWS FROM {_target}")
312
+ result = connection.execute(stmt).all()
313
+
314
+ return [
315
+ row.viewName
316
+ for row in result
317
+ if (not only_materialized or row.isMaterialized)
318
+ and (not only_temp or row.isTemporary)
319
+ ]
320
+
321
+ @reflection.cache
322
+ def get_materialized_view_names(
323
+ self, connection: Connection, schema: Optional[str] = None, **kw: Any
324
+ ) -> List[str]:
325
+ """A wrapper around get_view_names that fetches only the names of materialized views"""
326
+ return self.get_view_names(connection, schema, only_materialized=True)
327
+
328
+ @reflection.cache
329
+ def get_temp_view_names(
330
+ self, connection: Connection, schema: Optional[str] = None, **kw: Any
331
+ ) -> List[str]:
332
+ """A wrapper around get_view_names that fetches only the names of temporary views"""
333
+ return self.get_view_names(connection, schema, only_temp=True)
334
+
335
+ def do_rollback(self, dbapi_connection):
336
+ # Databricks SQL Does not support transactions
337
+ pass
338
+
339
+ @reflection.cache
340
+ def has_table(
341
+ self, connection, table_name, schema=None, catalog=None, **kwargs
342
+ ) -> bool:
343
+ """For internal dialect use, check the existence of a particular table
344
+ or view in the database.
345
+ """
346
+
347
+ try:
348
+ self._describe_table_extended(
349
+ connection=connection,
350
+ table_name=table_name,
351
+ catalog_name=catalog,
352
+ schema_name=schema,
353
+ )
354
+ return True
355
+ except sqlalchemy.exc.NoSuchTableError as e:
356
+ return False
357
+
358
+ def get_connection_cursor(self, connection):
359
+ """Added for backwards compatibility with 1.3.x"""
360
+ if hasattr(connection, "_dbapi_connection"):
361
+ return connection._dbapi_connection.dbapi_connection.cursor()
362
+ elif hasattr(connection, "raw_connection"):
363
+ return connection.raw_connection().cursor()
364
+ elif hasattr(connection, "connection"):
365
+ return connection.connection.cursor()
366
+
367
+ raise SQLAlchemyError(
368
+ "Databricks dialect can't obtain a cursor context manager from the dbapi"
369
+ )
370
+
371
+ @reflection.cache
372
+ def get_schema_names(self, connection, **kw):
373
+ """Return a list of all schema names available in the database."""
374
+ stmt = DDL("SHOW SCHEMAS")
375
+ result = connection.execute(stmt)
376
+ schema_list = [row[0] for row in result]
377
+ return schema_list
378
+
379
+ @reflection.cache
380
+ def get_table_comment(
381
+ self,
382
+ connection: Connection,
383
+ table_name: str,
384
+ schema: Optional[str] = None,
385
+ **kw: Any,
386
+ ) -> ReflectedTableComment:
387
+ result = self._describe_table_extended(
388
+ connection=connection,
389
+ table_name=table_name,
390
+ schema_name=schema,
391
+ )
392
+
393
+ if result is None:
394
+ return ReflectionDefaults.table_comment()
395
+
396
+ comment = get_comment_from_dte_output(result)
397
+
398
+ if comment:
399
+ return dict(text=comment)
400
+ else:
401
+ return ReflectionDefaults.table_comment()
402
+
403
+
404
+ @event.listens_for(Engine, "do_connect")
405
+ def receive_do_connect(dialect, conn_rec, cargs, cparams):
406
+ """Helpful for DS on traffic from clients using SQLAlchemy in particular"""
407
+
408
+ # Ignore connect invocations that don't use our dialect
409
+ if not dialect.name == "databricks":
410
+ return
411
+
412
+ ua = cparams.get("_user_agent_entry", "")
413
+
414
+ def add_sqla_tag_if_not_present(val: str):
415
+ if not val:
416
+ output = "sqlalchemy"
417
+
418
+ if val and "sqlalchemy" in val:
419
+ output = val
420
+
421
+ else:
422
+ output = f"sqlalchemy + {val}"
423
+
424
+ return output
425
+
426
+ cparams["_user_agent_entry"] = add_sqla_tag_if_not_present(ua)
427
+
428
+ if sqlalchemy.__version__.startswith("1.3"):
429
+ # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
430
+ # These should be passed in as connect_args when building the Engine
431
+
432
+ if "schema" in cparams:
433
+ dialect.schema = cparams["schema"]
434
+
435
+ if "catalog" in cparams:
436
+ dialect.catalog = cparams["catalog"]
@@ -0,0 +1,22 @@
1
+ import pytest
2
+
3
+ class DatabricksImportError(Exception):
4
+ pass
5
+
6
+ class TestLibraryDependencySuite:
7
+
8
+ @pytest.mark.skipif(pytest.importorskip("databricks_sql_connector_core"), reason="databricks_sql_connector_core is present")
9
+ def test_sql_core(self):
10
+ with pytest.raises(DatabricksImportError, match="databricks_sql_connector_core module is not available"):
11
+ try:
12
+ import databricks
13
+ except ImportError:
14
+ raise DatabricksImportError("databricks_sql_connector_core module is not available")
15
+
16
+ @pytest.mark.skipif(pytest.importorskip("sqlalchemy"), reason="SQLAlchemy is present")
17
+ def test_sqlalchemy(self):
18
+ with pytest.raises(DatabricksImportError, match="sqlalchemy module is not available"):
19
+ try:
20
+ import sqlalchemy
21
+ except ImportError:
22
+ raise DatabricksImportError("sqlalchemy module is not available")
File without changes
@@ -0,0 +1,4 @@
1
+
2
+ [sqla_testing]
3
+ requirement_cls=databricks.sqlalchemy.requirements:Requirements
4
+ profile_file=profiles.txt