databricks-sqlalchemy 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. CHANGELOG.md +2 -271
  2. databricks/sqlalchemy/__init__.py +1 -4
  3. databricks/sqlalchemy/dialect/__init__.py +340 -0
  4. databricks/sqlalchemy/dialect/base.py +17 -0
  5. databricks/sqlalchemy/dialect/compiler.py +38 -0
  6. {databricks_sqlalchemy-1.0.0.dist-info → databricks_sqlalchemy-1.0.2.dist-info}/METADATA +39 -61
  7. databricks_sqlalchemy-1.0.2.dist-info/RECORD +10 -0
  8. databricks/sqlalchemy/_ddl.py +0 -100
  9. databricks/sqlalchemy/_parse.py +0 -385
  10. databricks/sqlalchemy/_types.py +0 -323
  11. databricks/sqlalchemy/base.py +0 -436
  12. databricks/sqlalchemy/dependency_test/test_dependency.py +0 -22
  13. databricks/sqlalchemy/py.typed +0 -0
  14. databricks/sqlalchemy/pytest.ini +0 -4
  15. databricks/sqlalchemy/requirements.py +0 -249
  16. databricks/sqlalchemy/setup.cfg +0 -4
  17. databricks/sqlalchemy/test/_extra.py +0 -70
  18. databricks/sqlalchemy/test/_future.py +0 -331
  19. databricks/sqlalchemy/test/_regression.py +0 -311
  20. databricks/sqlalchemy/test/_unsupported.py +0 -450
  21. databricks/sqlalchemy/test/conftest.py +0 -13
  22. databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -189
  23. databricks/sqlalchemy/test/overrides/_ctetest.py +0 -33
  24. databricks/sqlalchemy/test/test_suite.py +0 -13
  25. databricks/sqlalchemy/test_local/__init__.py +0 -5
  26. databricks/sqlalchemy/test_local/conftest.py +0 -44
  27. databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  28. databricks/sqlalchemy/test_local/e2e/test_basic.py +0 -543
  29. databricks/sqlalchemy/test_local/test_ddl.py +0 -96
  30. databricks/sqlalchemy/test_local/test_parsing.py +0 -160
  31. databricks/sqlalchemy/test_local/test_types.py +0 -161
  32. databricks_sqlalchemy-1.0.0.dist-info/RECORD +0 -31
  33. {databricks_sqlalchemy-1.0.0.dist-info → databricks_sqlalchemy-1.0.2.dist-info}/LICENSE +0 -0
  34. {databricks_sqlalchemy-1.0.0.dist-info → databricks_sqlalchemy-1.0.2.dist-info}/WHEEL +0 -0
  35. {databricks_sqlalchemy-1.0.0.dist-info → databricks_sqlalchemy-1.0.2.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sqlalchemy
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Databricks SQLAlchemy plugin for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -13,18 +13,14 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
- Requires-Dist: databricks_sql_connector_core (>=4.0.0)
17
- Requires-Dist: sqlalchemy (>=2.0.21)
16
+ Requires-Dist: sqlalchemy (>=1.3.24,<2.0.0)
18
17
  Project-URL: Bug Tracker, https://github.com/databricks/databricks-sqlalchemy/issues
19
18
  Project-URL: Homepage, https://github.com/databricks/databricks-sqlalchemy
20
19
  Description-Content-Type: text/markdown
21
20
 
22
- ## Databricks dialect for SQLALchemy 2.0
21
+ ## Databricks dialect for SQLALchemy 1.0
23
22
 
24
- The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `examples/sqlalchemy.py`.
25
-
26
- ## Usage with SQLAlchemy <= 2.0
27
- A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4](https://github.com/databricks/databricks-sql-python/releases/tag/v2.4.0). Support for SQLAlchemy 1.4 was dropped from the dialect as part of `databricks-sql-connector==3.0.0`. To continue using the dialect with SQLAlchemy 1.x, you can use `databricks-sql-connector^2.4.0`.
23
+ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `example.py`.
28
24
 
29
25
 
30
26
  ## Installation
@@ -32,7 +28,7 @@ A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4
32
28
  To install the dialect and its dependencies:
33
29
 
34
30
  ```shell
35
- pip install databricks-sqlalchemy
31
+ pip install databricks-sqlalchemy~=1.0
36
32
  ```
37
33
 
38
34
  If you also plan to use `alembic` you can alternatively run:
@@ -65,41 +61,45 @@ access_token = os.getenv("DATABRICKS_TOKEN")
65
61
  catalog = os.getenv("DATABRICKS_CATALOG")
66
62
  schema = os.getenv("DATABRICKS_SCHEMA")
67
63
 
68
- engine = create_engine(
69
- f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}"
70
- )
64
+ if sqlalchemy.__version__.startswith("1.3"):
65
+ # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
66
+ # Pass these in as connect_args instead
67
+
68
+ conn_string = f"databricks://token:{access_token}@{host}"
69
+ connect_args = dict(catalog=catalog, schema=schema, http_path=http_path)
70
+ all_connect_args = {**extra_connect_args, **connect_args}
71
+ engine = create_engine(conn_string, connect_args=all_connect_args)
72
+ else:
73
+ engine = create_engine(
74
+ f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}",
75
+ connect_args=extra_connect_args,
76
+ )
77
+
71
78
  ```
72
79
 
73
80
  ## Types
74
81
 
75
- The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/20/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/20/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
82
+ The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/13/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/13/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
76
83
 
77
84
  |SQLAlchemy Type|Databricks SQL Type|
78
85
  |-|-|
79
- [`BigInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
80
- [`LargeBinary`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
81
- [`Boolean`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
82
- [`Date`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
83
- [`DateTime`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
84
- [`Double`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Double)| [`DOUBLE`](https://docs.databricks.com/en/sql/language-manual/data-types/double-type.html)
85
- [`Enum`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
86
- [`Float`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
87
- [`Integer`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
88
- [`Numeric`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
89
- [`PickleType`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
90
- [`SmallInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
91
- [`String`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
92
- [`Text`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
93
- [`Time`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
94
- [`Unicode`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
95
- [`UnicodeText`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
96
- [`Uuid`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
97
-
98
- In addition, the dialect exposes three UPPERCASE SQLAlchemy types which are specific to Databricks:
99
-
100
- - [`databricks.sqlalchemy.TINYINT`](https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html)
101
- - [`databricks.sqlalchemy.TIMESTAMP`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html)
102
- - [`databricks.sqlalchemy.TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)
86
+ [`BigInteger`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
87
+ [`LargeBinary`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
88
+ [`Boolean`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
89
+ [`Date`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
90
+ [`DateTime`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
91
+ [`Enum`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
92
+ [`Float`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
93
+ [`Integer`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
94
+ [`Numeric`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
95
+ [`PickleType`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
96
+ [`SmallInteger`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
97
+ [`String`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
98
+ [`Text`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
99
+ [`Time`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
100
+ [`Unicode`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
101
+ [`UnicodeText`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
102
+ [`Uuid`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
103
103
 
104
104
 
105
105
  ### `LargeBinary()` and `PickleType()`
@@ -112,24 +112,6 @@ Support for `CHECK` constraints is not implemented in this dialect. Support is p
112
112
 
113
113
  SQLAlchemy's `Enum()` type depends on `CHECK` constraints and is therefore not yet supported.
114
114
 
115
- ### `DateTime()`, `TIMESTAMP_NTZ()`, and `TIMESTAMP()`
116
-
117
- Databricks Runtime provides two datetime-like types: `TIMESTAMP` which is always timezone-aware and `TIMESTAMP_NTZ` which is timezone agnostic. Both types can be imported from `databricks.sqlalchemy` and used in your models.
118
-
119
- The SQLAlchemy documentation indicates that `DateTime()` is not timezone-aware by default. So our dialect maps this type to `TIMESTAMP_NTZ()`. In practice, you should never need to use `TIMESTAMP_NTZ()` directly. Just use `DateTime()`.
120
-
121
- If you need your field to be timezone-aware, you can import `TIMESTAMP()` and use it instead.
122
-
123
- _Note that SQLAlchemy documentation suggests that you can declare a `DateTime()` with `timezone=True` on supported backends. However, if you do this with the Databricks dialect, the `timezone` argument will be ignored._
124
-
125
- ```python
126
- from sqlalchemy import DateTime
127
- from databricks.sqlalchemy import TIMESTAMP
128
-
129
- class SomeModel(Base):
130
- some_date_without_timezone = DateTime()
131
- some_date_with_timezone = TIMESTAMP()
132
- ```
133
115
 
134
116
  ### `String()`, `Text()`, `Unicode()`, and `UnicodeText()`
135
117
 
@@ -154,7 +136,7 @@ class SomeModel(Base):
154
136
 
155
137
  Identity and generated value support is currently limited in this dialect.
156
138
 
157
- When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/20/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/20/core/defaults.html#identity-ddl) instead.
139
+ When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/13/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/13/core/defaults.html#identity-ddl) instead.
158
140
 
159
141
  Furthermore, in Databricks Runtime, only `BIGINT` fields can be configured to auto-increment. So in SQLAlchemy, you must use the `BigInteger()` type.
160
142
 
@@ -168,10 +150,6 @@ class SomeModel(Base):
168
150
 
169
151
  When calling `Base.metadata.create_all()`, the executed DDL will include `GENERATED ALWAYS AS IDENTITY` for the `id` column. This is useful when using SQLAlchemy to generate tables. However, as of this writing, `Identity()` constructs are not captured when SQLAlchemy reflects a table's metadata (support for this is planned).
170
152
 
171
- ## Parameters
172
-
173
- `databricks-sql-connector` supports two approaches to parameterizing SQL queries: native and inline. Our SQLAlchemy 2.0 dialect always uses the native approach and is therefore limited to DBR 14.2 and above. If you are writing parameterized queries to be executed by SQLAlchemy, you must use the "named" paramstyle (`:param`). Read more about parameterization in `docs/parameters.md`.
174
-
175
153
  ## Usage with pandas
176
154
 
177
155
  Use [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html) and [`pandas.read_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql) to write and read from Databricks SQL. These methods both accept a SQLAlchemy connection to interact with Databricks.
@@ -202,7 +180,7 @@ with engine.connect() as conn:
202
180
  df.to_sql('squares',conn)
203
181
  ```
204
182
 
205
- ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#defining-foreign-keys)
183
+ ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/13/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/13/core/constraints.html#defining-foreign-keys)
206
184
 
207
185
  Unity Catalog workspaces in Databricks support PRIMARY KEY and FOREIGN KEY constraints. _Note that Databricks Runtime does not enforce the integrity of FOREIGN KEY constraints_. You can establish a primary key by setting `primary_key=True` when defining a column.
208
186
 
@@ -0,0 +1,10 @@
1
+ CHANGELOG.md,sha256=JU6ETCTYFt7p3CJ6XtKbu-fBBgvyfNn6MQnRfnjG7oY,163
2
+ databricks/sqlalchemy/__init__.py,sha256=vZg5CR1laCr50IFcOkzmp9-ysH83iTg81ygQcsPFTk8,60
3
+ databricks/sqlalchemy/dialect/__init__.py,sha256=hPPl180-V_xexLWhtwknNnhHboOYd2wXHelvmtk0E7c,10745
4
+ databricks/sqlalchemy/dialect/base.py,sha256=FBibGU9FV_UGlIpF8wyARhV0ImahIqsPELqvrxm_8Rk,494
5
+ databricks/sqlalchemy/dialect/compiler.py,sha256=P__ihEonyOJYotsVpirjbHf-lYBqBLLK-cM5LZdOSUo,792
6
+ databricks_sqlalchemy-1.0.2.dist-info/LICENSE,sha256=WgVm2VpfZ3CsUfPndD2NeCrEIcFA4UB-YnnW4ejxcbE,11346
7
+ databricks_sqlalchemy-1.0.2.dist-info/METADATA,sha256=DBMXEaNLK2GgEJ_J8oy97nMmMqe1uyyjPJu00qZLvWI,11019
8
+ databricks_sqlalchemy-1.0.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
+ databricks_sqlalchemy-1.0.2.dist-info/entry_points.txt,sha256=AAjpsvZbVcoMAcWLIesoAT5FNZhBEcIhxdKknVua3jw,74
10
+ databricks_sqlalchemy-1.0.2.dist-info/RECORD,,
@@ -1,100 +0,0 @@
1
- import re
2
- from sqlalchemy.sql import compiler, sqltypes
3
- import logging
4
-
5
- logger = logging.getLogger(__name__)
6
-
7
-
8
- class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
9
- """https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html"""
10
-
11
- legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
12
-
13
- def __init__(self, dialect):
14
- super().__init__(dialect, initial_quote="`")
15
-
16
-
17
- class DatabricksDDLCompiler(compiler.DDLCompiler):
18
- def post_create_table(self, table):
19
- post = [" USING DELTA"]
20
- if table.comment:
21
- comment = self.sql_compiler.render_literal_value(
22
- table.comment, sqltypes.String()
23
- )
24
- post.append("COMMENT " + comment)
25
-
26
- post.append("TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'enabled')")
27
- return "\n".join(post)
28
-
29
- def visit_unique_constraint(self, constraint, **kw):
30
- logger.warning("Databricks does not support unique constraints")
31
- pass
32
-
33
- def visit_check_constraint(self, constraint, **kw):
34
- logger.warning("This dialect does not support check constraints")
35
- pass
36
-
37
- def visit_identity_column(self, identity, **kw):
38
- """When configuring an Identity() with Databricks, only the always option is supported.
39
- All other options are ignored.
40
-
41
- Note: IDENTITY columns must always be defined as BIGINT. An exception will be raised if INT is used.
42
-
43
- https://www.databricks.com/blog/2022/08/08/identity-columns-to-generate-surrogate-keys-are-now-available-in-a-lakehouse-near-you.html
44
- """
45
- text = "GENERATED %s AS IDENTITY" % (
46
- "ALWAYS" if identity.always else "BY DEFAULT",
47
- )
48
- return text
49
-
50
- def visit_set_column_comment(self, create, **kw):
51
- return "ALTER TABLE %s ALTER COLUMN %s COMMENT %s" % (
52
- self.preparer.format_table(create.element.table),
53
- self.preparer.format_column(create.element),
54
- self.sql_compiler.render_literal_value(
55
- create.element.comment, sqltypes.String()
56
- ),
57
- )
58
-
59
- def visit_drop_column_comment(self, create, **kw):
60
- return "ALTER TABLE %s ALTER COLUMN %s COMMENT ''" % (
61
- self.preparer.format_table(create.element.table),
62
- self.preparer.format_column(create.element),
63
- )
64
-
65
- def get_column_specification(self, column, **kwargs):
66
- """
67
- Emit a log message if a user attempts to set autoincrement=True on a column.
68
- See comments in test_suite.py. We may implement implicit IDENTITY using this
69
- feature in the future, similar to the Microsoft SQL Server dialect.
70
- """
71
- if column is column.table._autoincrement_column or column.autoincrement is True:
72
- logger.warning(
73
- "Databricks dialect ignores SQLAlchemy's autoincrement semantics. Use explicit Identity() instead."
74
- )
75
-
76
- colspec = super().get_column_specification(column, **kwargs)
77
- if column.comment is not None:
78
- literal = self.sql_compiler.render_literal_value(
79
- column.comment, sqltypes.STRINGTYPE
80
- )
81
- colspec += " COMMENT " + literal
82
-
83
- return colspec
84
-
85
-
86
- class DatabricksStatementCompiler(compiler.SQLCompiler):
87
- def limit_clause(self, select, **kw):
88
- """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,
89
- since Databricks SQL doesn't support the latter.
90
-
91
- https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-limit.html
92
- """
93
- text = ""
94
- if select._limit_clause is not None:
95
- text += "\n LIMIT " + self.process(select._limit_clause, **kw)
96
- if select._offset_clause is not None:
97
- if select._limit_clause is None:
98
- text += "\n LIMIT ALL"
99
- text += " OFFSET " + self.process(select._offset_clause, **kw)
100
- return text
@@ -1,385 +0,0 @@
1
- from typing import List, Optional, Dict
2
- import re
3
-
4
- import sqlalchemy
5
- from sqlalchemy.engine import CursorResult
6
- from sqlalchemy.engine.interfaces import ReflectedColumn
7
-
8
- from databricks.sqlalchemy import _types as type_overrides
9
-
10
- """
11
- This module contains helper functions that can parse the contents
12
- of metadata and exceptions received from DBR. These are mostly just
13
- wrappers around regexes.
14
- """
15
-
16
-
17
- class DatabricksSqlAlchemyParseException(Exception):
18
- pass
19
-
20
-
21
- def _match_table_not_found_string(message: str) -> bool:
22
- """Return True if the message contains a substring indicating that a table was not found"""
23
-
24
- DBR_LTE_12_NOT_FOUND_STRING = "Table or view not found"
25
- DBR_GT_12_NOT_FOUND_STRING = "TABLE_OR_VIEW_NOT_FOUND"
26
- return any(
27
- [
28
- DBR_LTE_12_NOT_FOUND_STRING in message,
29
- DBR_GT_12_NOT_FOUND_STRING in message,
30
- ]
31
- )
32
-
33
-
34
- def _describe_table_extended_result_to_dict_list(
35
- result: CursorResult,
36
- ) -> List[Dict[str, str]]:
37
- """Transform the CursorResult of DESCRIBE TABLE EXTENDED into a list of Dictionaries"""
38
-
39
- rows_to_return = []
40
- for row in result.all():
41
- this_row = {"col_name": row.col_name, "data_type": row.data_type}
42
- rows_to_return.append(this_row)
43
-
44
- return rows_to_return
45
-
46
-
47
- def extract_identifiers_from_string(input_str: str) -> List[str]:
48
- """For a string input resembling (`a`, `b`, `c`) return a list of identifiers ['a', 'b', 'c']"""
49
-
50
- # This matches the valid character list contained in DatabricksIdentifierPreparer
51
- pattern = re.compile(r"`([A-Za-z0-9_]+)`")
52
- matches = pattern.findall(input_str)
53
- return [i for i in matches]
54
-
55
-
56
- def extract_identifier_groups_from_string(input_str: str) -> List[str]:
57
- """For a string input resembling :
58
-
59
- FOREIGN KEY (`pname`, `pid`, `pattr`) REFERENCES `main`.`pysql_sqlalchemy`.`tb1` (`name`, `id`, `attr`)
60
-
61
- Return ['(`pname`, `pid`, `pattr`)', '(`name`, `id`, `attr`)']
62
- """
63
- pattern = re.compile(r"\([`A-Za-z0-9_,\s]*\)")
64
- matches = pattern.findall(input_str)
65
- return [i for i in matches]
66
-
67
-
68
- def extract_three_level_identifier_from_constraint_string(input_str: str) -> dict:
69
- """For a string input resembling :
70
- FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`pysql_dialect_compliance`.`users` (`user_id`)
71
-
72
- Return a dict like
73
- {
74
- "catalog": "main",
75
- "schema": "pysql_dialect_compliance",
76
- "table": "users"
77
- }
78
-
79
- Raise a DatabricksSqlAlchemyParseException if a 3L namespace isn't found
80
- """
81
- pat = re.compile(r"REFERENCES\s+(.*?)\s*\(")
82
- matches = pat.findall(input_str)
83
-
84
- if not matches:
85
- raise DatabricksSqlAlchemyParseException(
86
- "3L namespace not found in constraint string"
87
- )
88
-
89
- first_match = matches[0]
90
- parts = first_match.split(".")
91
-
92
- def strip_backticks(input: str):
93
- return input.replace("`", "")
94
-
95
- try:
96
- return {
97
- "catalog": strip_backticks(parts[0]),
98
- "schema": strip_backticks(parts[1]),
99
- "table": strip_backticks(parts[2]),
100
- }
101
- except IndexError:
102
- raise DatabricksSqlAlchemyParseException(
103
- "Incomplete 3L namespace found in constraint string: " + ".".join(parts)
104
- )
105
-
106
-
107
- def _parse_fk_from_constraint_string(constraint_str: str) -> dict:
108
- """Build a dictionary of foreign key constraint information from a constraint string.
109
-
110
- For example:
111
-
112
- ```
113
- FOREIGN KEY (`pname`, `pid`, `pattr`) REFERENCES `main`.`pysql_dialect_compliance`.`tb1` (`name`, `id`, `attr`)
114
- ```
115
-
116
- Return a dictionary like:
117
-
118
- ```
119
- {
120
- "constrained_columns": ["pname", "pid", "pattr"],
121
- "referred_table": "tb1",
122
- "referred_schema": "pysql_dialect_compliance",
123
- "referred_columns": ["name", "id", "attr"]
124
- }
125
- ```
126
-
127
- Note that the constraint name doesn't appear in the constraint string so it will not
128
- be present in the output of this function.
129
- """
130
-
131
- referred_table_dict = extract_three_level_identifier_from_constraint_string(
132
- constraint_str
133
- )
134
- referred_table = referred_table_dict["table"]
135
- referred_schema = referred_table_dict["schema"]
136
-
137
- # _extracted is a tuple of two lists of identifiers
138
- # we assume the first immediately follows "FOREIGN KEY" and the second
139
- # immediately follows REFERENCES $tableName
140
- _extracted = extract_identifier_groups_from_string(constraint_str)
141
- constrained_columns_str, referred_columns_str = (
142
- _extracted[0],
143
- _extracted[1],
144
- )
145
-
146
- constrained_columns = extract_identifiers_from_string(constrained_columns_str)
147
- referred_columns = extract_identifiers_from_string(referred_columns_str)
148
-
149
- return {
150
- "constrained_columns": constrained_columns,
151
- "referred_table": referred_table,
152
- "referred_columns": referred_columns,
153
- "referred_schema": referred_schema,
154
- }
155
-
156
-
157
- def build_fk_dict(
158
- fk_name: str, fk_constraint_string: str, schema_name: Optional[str]
159
- ) -> dict:
160
- """
161
- Given a foriegn key name and a foreign key constraint string, return a dictionary
162
- with the following keys:
163
-
164
- name
165
- the name of the foreign key constraint
166
- constrained_columns
167
- a list of column names that make up the foreign key
168
- referred_table
169
- the name of the table that the foreign key references
170
- referred_columns
171
- a list of column names that are referenced by the foreign key
172
- referred_schema
173
- the name of the schema that the foreign key references.
174
-
175
- referred schema will be None if the schema_name argument is None.
176
- This is required by SQLAlchey's ComponentReflectionTest::test_get_foreign_keys
177
- """
178
-
179
- # The foreign key name is not contained in the constraint string so we
180
- # need to add it manually
181
- base_fk_dict = _parse_fk_from_constraint_string(fk_constraint_string)
182
-
183
- if not schema_name:
184
- schema_override_dict = dict(referred_schema=None)
185
- else:
186
- schema_override_dict = {}
187
-
188
- # mypy doesn't like this method of conditionally adding a key to a dictionary
189
- # while keeping everything immutable
190
- complete_foreign_key_dict = {
191
- "name": fk_name,
192
- **base_fk_dict,
193
- **schema_override_dict, # type: ignore
194
- }
195
-
196
- return complete_foreign_key_dict
197
-
198
-
199
- def _parse_pk_columns_from_constraint_string(constraint_str: str) -> List[str]:
200
- """Build a list of constrained columns from a constraint string returned by DESCRIBE TABLE EXTENDED
201
-
202
- For example:
203
-
204
- PRIMARY KEY (`id`, `name`, `email_address`)
205
-
206
- Returns a list like
207
-
208
- ["id", "name", "email_address"]
209
- """
210
-
211
- _extracted = extract_identifiers_from_string(constraint_str)
212
-
213
- return _extracted
214
-
215
-
216
- def build_pk_dict(pk_name: str, pk_constraint_string: str) -> dict:
217
- """Given a primary key name and a primary key constraint string, return a dictionary
218
- with the following keys:
219
-
220
- constrained_columns
221
- A list of string column names that make up the primary key
222
-
223
- name
224
- The name of the primary key constraint
225
- """
226
-
227
- constrained_columns = _parse_pk_columns_from_constraint_string(pk_constraint_string)
228
-
229
- return {"constrained_columns": constrained_columns, "name": pk_name}
230
-
231
-
232
- def match_dte_rows_by_value(dte_output: List[Dict[str, str]], match: str) -> List[dict]:
233
- """Return a list of dictionaries containing only the col_name:data_type pairs where the `data_type`
234
- value contains the match argument.
235
-
236
- Today, DESCRIBE TABLE EXTENDED doesn't give a deterministic name to the fields
237
- a constraint will be found in its output. So we cycle through its output looking
238
- for a match. This is brittle. We could optionally make two roundtrips: the first
239
- would query information_schema for the name of the constraint on this table, and
240
- a second to DESCRIBE TABLE EXTENDED, at which point we would know the name of the
241
- constraint. But for now we instead assume that Python list comprehension is faster
242
- than a network roundtrip
243
- """
244
-
245
- output_rows = []
246
-
247
- for row_dict in dte_output:
248
- if match in row_dict["data_type"]:
249
- output_rows.append(row_dict)
250
-
251
- return output_rows
252
-
253
-
254
- def match_dte_rows_by_key(dte_output: List[Dict[str, str]], match: str) -> List[dict]:
255
- """Return a list of dictionaries containing only the col_name:data_type pairs where the `col_name`
256
- value contains the match argument.
257
- """
258
-
259
- output_rows = []
260
-
261
- for row_dict in dte_output:
262
- if match in row_dict["col_name"]:
263
- output_rows.append(row_dict)
264
-
265
- return output_rows
266
-
267
-
268
- def get_fk_strings_from_dte_output(dte_output: List[Dict[str, str]]) -> List[dict]:
269
- """If the DESCRIBE TABLE EXTENDED output contains foreign key constraints, return a list of dictionaries,
270
- one dictionary per defined constraint
271
- """
272
-
273
- output = match_dte_rows_by_value(dte_output, "FOREIGN KEY")
274
-
275
- return output
276
-
277
-
278
- def get_pk_strings_from_dte_output(
279
- dte_output: List[Dict[str, str]]
280
- ) -> Optional[List[dict]]:
281
- """If the DESCRIBE TABLE EXTENDED output contains primary key constraints, return a list of dictionaries,
282
- one dictionary per defined constraint.
283
-
284
- Returns None if no primary key constraints are found.
285
- """
286
-
287
- output = match_dte_rows_by_value(dte_output, "PRIMARY KEY")
288
-
289
- return output
290
-
291
-
292
- def get_comment_from_dte_output(dte_output: List[Dict[str, str]]) -> Optional[str]:
293
- """Returns the value of the first "Comment" col_name data in dte_output"""
294
- output = match_dte_rows_by_key(dte_output, "Comment")
295
- if not output:
296
- return None
297
- else:
298
- return output[0]["data_type"]
299
-
300
-
301
- # The keys of this dictionary are the values we expect to see in a
302
- # TGetColumnsRequest's .TYPE_NAME attribute.
303
- # These are enumerated in ttypes.py as class TTypeId.
304
- # TODO: confirm that all types in TTypeId are included here.
305
- GET_COLUMNS_TYPE_MAP = {
306
- "boolean": sqlalchemy.types.Boolean,
307
- "smallint": sqlalchemy.types.SmallInteger,
308
- "tinyint": type_overrides.TINYINT,
309
- "int": sqlalchemy.types.Integer,
310
- "bigint": sqlalchemy.types.BigInteger,
311
- "float": sqlalchemy.types.Float,
312
- "double": sqlalchemy.types.Float,
313
- "string": sqlalchemy.types.String,
314
- "varchar": sqlalchemy.types.String,
315
- "char": sqlalchemy.types.String,
316
- "binary": sqlalchemy.types.String,
317
- "array": sqlalchemy.types.String,
318
- "map": sqlalchemy.types.String,
319
- "struct": sqlalchemy.types.String,
320
- "uniontype": sqlalchemy.types.String,
321
- "decimal": sqlalchemy.types.Numeric,
322
- "timestamp": type_overrides.TIMESTAMP,
323
- "timestamp_ntz": type_overrides.TIMESTAMP_NTZ,
324
- "date": sqlalchemy.types.Date,
325
- }
326
-
327
-
328
- def parse_numeric_type_precision_and_scale(type_name_str):
329
- """Return an intantiated sqlalchemy Numeric() type that preserves the precision and scale indicated
330
- in the output from TGetColumnsRequest.
331
-
332
- type_name_str
333
- The value of TGetColumnsReq.TYPE_NAME.
334
-
335
- If type_name_str is "DECIMAL(18,5) returns sqlalchemy.types.Numeric(18,5)
336
- """
337
-
338
- pattern = re.compile(r"DECIMAL\((\d+,\d+)\)")
339
- match = re.search(pattern, type_name_str)
340
- precision_and_scale = match.group(1)
341
- precision, scale = tuple(precision_and_scale.split(","))
342
-
343
- return sqlalchemy.types.Numeric(int(precision), int(scale))
344
-
345
-
346
- def parse_column_info_from_tgetcolumnsresponse(thrift_resp_row) -> ReflectedColumn:
347
- """Returns a dictionary of the ReflectedColumn schema parsed from
348
- a single of the result of a TGetColumnsRequest thrift RPC
349
- """
350
-
351
- pat = re.compile(r"^\w+")
352
-
353
- # This method assumes a valid TYPE_NAME field in the response.
354
- # TODO: add error handling in case TGetColumnsResponse format changes
355
-
356
- _raw_col_type = re.search(pat, thrift_resp_row.TYPE_NAME).group(0).lower() # type: ignore
357
- _col_type = GET_COLUMNS_TYPE_MAP[_raw_col_type]
358
-
359
- if _raw_col_type == "decimal":
360
- final_col_type = parse_numeric_type_precision_and_scale(
361
- thrift_resp_row.TYPE_NAME
362
- )
363
- else:
364
- final_col_type = _col_type
365
-
366
- # See comments about autoincrement in test_suite.py
367
- # Since Databricks SQL doesn't currently support inline AUTOINCREMENT declarations
368
- # the autoincrement must be manually declared with an Identity() construct in SQLAlchemy
369
- # Other dialects can perform this extra Identity() step automatically. But that is not
370
- # implemented in the Databricks dialect right now. So autoincrement is currently always False.
371
- # It's not clear what IS_AUTO_INCREMENT in the thrift response actually reflects or whether
372
- # it ever returns a `YES`.
373
-
374
- # Per the guidance in SQLAlchemy's docstrings, we prefer to not even include an autoincrement
375
- # key in this dictionary.
376
- this_column = {
377
- "name": thrift_resp_row.COLUMN_NAME,
378
- "type": final_col_type,
379
- "nullable": bool(thrift_resp_row.NULLABLE),
380
- "default": thrift_resp_row.COLUMN_DEF,
381
- "comment": thrift_resp_row.REMARKS or None,
382
- }
383
-
384
- # TODO: figure out how to return sqlalchemy.interfaces in a way that mypy respects
385
- return this_column # type: ignore