databricks-sqlalchemy 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. CHANGELOG.md +2 -2
  2. databricks/sqlalchemy/__init__.py +4 -1
  3. databricks/sqlalchemy/_ddl.py +100 -0
  4. databricks/sqlalchemy/_parse.py +385 -0
  5. databricks/sqlalchemy/_types.py +323 -0
  6. databricks/sqlalchemy/base.py +436 -0
  7. databricks/sqlalchemy/dependency_test/test_dependency.py +22 -0
  8. databricks/sqlalchemy/py.typed +0 -0
  9. databricks/sqlalchemy/pytest.ini +4 -0
  10. databricks/sqlalchemy/requirements.py +249 -0
  11. databricks/sqlalchemy/setup.cfg +4 -0
  12. databricks/sqlalchemy/test/_extra.py +70 -0
  13. databricks/sqlalchemy/test/_future.py +331 -0
  14. databricks/sqlalchemy/test/_regression.py +311 -0
  15. databricks/sqlalchemy/test/_unsupported.py +450 -0
  16. databricks/sqlalchemy/test/conftest.py +13 -0
  17. databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +189 -0
  18. databricks/sqlalchemy/test/overrides/_ctetest.py +33 -0
  19. databricks/sqlalchemy/test/test_suite.py +13 -0
  20. databricks/sqlalchemy/test_local/__init__.py +5 -0
  21. databricks/sqlalchemy/test_local/conftest.py +44 -0
  22. databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  23. databricks/sqlalchemy/test_local/e2e/test_basic.py +543 -0
  24. databricks/sqlalchemy/test_local/test_ddl.py +96 -0
  25. databricks/sqlalchemy/test_local/test_parsing.py +160 -0
  26. databricks/sqlalchemy/test_local/test_types.py +161 -0
  27. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/METADATA +60 -39
  28. databricks_sqlalchemy-2.0.0.dist-info/RECORD +31 -0
  29. databricks/sqlalchemy/dialect/__init__.py +0 -340
  30. databricks/sqlalchemy/dialect/base.py +0 -17
  31. databricks/sqlalchemy/dialect/compiler.py +0 -38
  32. databricks_sqlalchemy-1.0.1.dist-info/RECORD +0 -10
  33. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/LICENSE +0 -0
  34. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/WHEEL +0 -0
  35. {databricks_sqlalchemy-1.0.1.dist-info → databricks_sqlalchemy-2.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,160 @@
1
+ import pytest
2
+ from databricks.sqlalchemy._parse import (
3
+ extract_identifiers_from_string,
4
+ extract_identifier_groups_from_string,
5
+ extract_three_level_identifier_from_constraint_string,
6
+ build_fk_dict,
7
+ build_pk_dict,
8
+ match_dte_rows_by_value,
9
+ get_comment_from_dte_output,
10
+ DatabricksSqlAlchemyParseException,
11
+ )
12
+
13
+
14
+ # These are outputs from DESCRIBE TABLE EXTENDED
15
+ @pytest.mark.parametrize(
16
+ "input, expected",
17
+ [
18
+ ("PRIMARY KEY (`pk1`, `pk2`)", ["pk1", "pk2"]),
19
+ ("PRIMARY KEY (`a`, `b`, `c`)", ["a", "b", "c"]),
20
+ ("PRIMARY KEY (`name`, `id`, `attr`)", ["name", "id", "attr"]),
21
+ ],
22
+ )
23
+ def test_extract_identifiers(input, expected):
24
+ assert (
25
+ extract_identifiers_from_string(input) == expected
26
+ ), "Failed to extract identifiers from string"
27
+
28
+
29
+ @pytest.mark.parametrize(
30
+ "input, expected",
31
+ [
32
+ (
33
+ "FOREIGN KEY (`pname`, `pid`, `pattr`) REFERENCES `main`.`pysql_sqlalchemy`.`tb1` (`name`, `id`, `attr`)",
34
+ [
35
+ "(`pname`, `pid`, `pattr`)",
36
+ "(`name`, `id`, `attr`)",
37
+ ],
38
+ )
39
+ ],
40
+ )
41
+ def test_extract_identifer_batches(input, expected):
42
+ assert (
43
+ extract_identifier_groups_from_string(input) == expected
44
+ ), "Failed to extract identifier groups from string"
45
+
46
+
47
+ def test_extract_3l_namespace_from_constraint_string():
48
+ input = "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`pysql_dialect_compliance`.`users` (`user_id`)"
49
+ expected = {
50
+ "catalog": "main",
51
+ "schema": "pysql_dialect_compliance",
52
+ "table": "users",
53
+ }
54
+
55
+ assert (
56
+ extract_three_level_identifier_from_constraint_string(input) == expected
57
+ ), "Failed to extract 3L namespace from constraint string"
58
+
59
+
60
+ def test_extract_3l_namespace_from_bad_constraint_string():
61
+ input = "FOREIGN KEY (`parent_user_id`) REFERENCES `pysql_dialect_compliance`.`users` (`user_id`)"
62
+
63
+ with pytest.raises(DatabricksSqlAlchemyParseException):
64
+ extract_three_level_identifier_from_constraint_string(input)
65
+
66
+
67
+ @pytest.mark.parametrize("tschema", [None, "some_schema"])
68
+ def test_build_fk_dict(tschema):
69
+ fk_constraint_string = "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`some_schema`.`users` (`user_id`)"
70
+
71
+ result = build_fk_dict("some_fk_name", fk_constraint_string, schema_name=tschema)
72
+
73
+ assert result == {
74
+ "name": "some_fk_name",
75
+ "constrained_columns": ["parent_user_id"],
76
+ "referred_schema": tschema,
77
+ "referred_table": "users",
78
+ "referred_columns": ["user_id"],
79
+ }
80
+
81
+
82
+ def test_build_pk_dict():
83
+ pk_constraint_string = "PRIMARY KEY (`id`, `name`, `email_address`)"
84
+ pk_name = "pk1"
85
+
86
+ result = build_pk_dict(pk_name, pk_constraint_string)
87
+
88
+ assert result == {
89
+ "constrained_columns": ["id", "name", "email_address"],
90
+ "name": "pk1",
91
+ }
92
+
93
+
94
+ # This is a real example of the output from DESCRIBE TABLE EXTENDED as of 15 October 2023
95
+ RAW_SAMPLE_DTE_OUTPUT = [
96
+ ["id", "int"],
97
+ ["name", "string"],
98
+ ["", ""],
99
+ ["# Detailed Table Information", ""],
100
+ ["Catalog", "main"],
101
+ ["Database", "pysql_sqlalchemy"],
102
+ ["Table", "exampleexampleexample"],
103
+ ["Created Time", "Sun Oct 15 21:12:54 UTC 2023"],
104
+ ["Last Access", "UNKNOWN"],
105
+ ["Created By", "Spark "],
106
+ ["Type", "MANAGED"],
107
+ ["Location", "s3://us-west-2-****-/19a85dee-****/tables/ccb7***"],
108
+ ["Provider", "delta"],
109
+ ["Comment", "some comment"],
110
+ ["Owner", "some.user@example.com"],
111
+ ["Is_managed_location", "true"],
112
+ ["Predictive Optimization", "ENABLE (inherited from CATALOG main)"],
113
+ [
114
+ "Table Properties",
115
+ "[delta.checkpoint.writeStatsAsJson=false,delta.checkpoint.writeStatsAsStruct=true,delta.minReaderVersion=1,delta.minWriterVersion=2]",
116
+ ],
117
+ ["", ""],
118
+ ["# Constraints", ""],
119
+ ["exampleexampleexample_pk", "PRIMARY KEY (`id`)"],
120
+ [
121
+ "exampleexampleexample_fk",
122
+ "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`pysql_dialect_compliance`.`users` (`user_id`)",
123
+ ],
124
+ ]
125
+
126
+ FMT_SAMPLE_DT_OUTPUT = [
127
+ {"col_name": i[0], "data_type": i[1]} for i in RAW_SAMPLE_DTE_OUTPUT
128
+ ]
129
+
130
+
131
+ @pytest.mark.parametrize(
132
+ "match, output",
133
+ [
134
+ (
135
+ "PRIMARY KEY",
136
+ [
137
+ {
138
+ "col_name": "exampleexampleexample_pk",
139
+ "data_type": "PRIMARY KEY (`id`)",
140
+ }
141
+ ],
142
+ ),
143
+ (
144
+ "FOREIGN KEY",
145
+ [
146
+ {
147
+ "col_name": "exampleexampleexample_fk",
148
+ "data_type": "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`pysql_dialect_compliance`.`users` (`user_id`)",
149
+ }
150
+ ],
151
+ ),
152
+ ],
153
+ )
154
+ def test_filter_dict_by_value(match, output):
155
+ result = match_dte_rows_by_value(FMT_SAMPLE_DT_OUTPUT, match)
156
+ assert result == output
157
+
158
+
159
+ def test_get_comment_from_dte_output():
160
+ assert get_comment_from_dte_output(FMT_SAMPLE_DT_OUTPUT) == "some comment"
@@ -0,0 +1,161 @@
1
+ import enum
2
+
3
+ import pytest
4
+ import sqlalchemy
5
+
6
+ from databricks.sqlalchemy.base import DatabricksDialect
7
+ from databricks.sqlalchemy._types import TINYINT, TIMESTAMP, TIMESTAMP_NTZ
8
+
9
+
10
+ class DatabricksDataType(enum.Enum):
11
+ """https://docs.databricks.com/en/sql/language-manual/sql-ref-datatypes.html"""
12
+
13
+ BIGINT = enum.auto()
14
+ BINARY = enum.auto()
15
+ BOOLEAN = enum.auto()
16
+ DATE = enum.auto()
17
+ DECIMAL = enum.auto()
18
+ DOUBLE = enum.auto()
19
+ FLOAT = enum.auto()
20
+ INT = enum.auto()
21
+ INTERVAL = enum.auto()
22
+ VOID = enum.auto()
23
+ SMALLINT = enum.auto()
24
+ STRING = enum.auto()
25
+ TIMESTAMP = enum.auto()
26
+ TIMESTAMP_NTZ = enum.auto()
27
+ TINYINT = enum.auto()
28
+ ARRAY = enum.auto()
29
+ MAP = enum.auto()
30
+ STRUCT = enum.auto()
31
+
32
+
33
+ # Defines the way that SQLAlchemy CamelCase types are compiled into Databricks SQL types.
34
+ # Note: I wish I could define this within the TestCamelCaseTypesCompilation class, but pytest doesn't like that.
35
+ camel_case_type_map = {
36
+ sqlalchemy.types.BigInteger: DatabricksDataType.BIGINT,
37
+ sqlalchemy.types.LargeBinary: DatabricksDataType.BINARY,
38
+ sqlalchemy.types.Boolean: DatabricksDataType.BOOLEAN,
39
+ sqlalchemy.types.Date: DatabricksDataType.DATE,
40
+ sqlalchemy.types.DateTime: DatabricksDataType.TIMESTAMP_NTZ,
41
+ sqlalchemy.types.Double: DatabricksDataType.DOUBLE,
42
+ sqlalchemy.types.Enum: DatabricksDataType.STRING,
43
+ sqlalchemy.types.Float: DatabricksDataType.FLOAT,
44
+ sqlalchemy.types.Integer: DatabricksDataType.INT,
45
+ sqlalchemy.types.Interval: DatabricksDataType.TIMESTAMP_NTZ,
46
+ sqlalchemy.types.Numeric: DatabricksDataType.DECIMAL,
47
+ sqlalchemy.types.PickleType: DatabricksDataType.BINARY,
48
+ sqlalchemy.types.SmallInteger: DatabricksDataType.SMALLINT,
49
+ sqlalchemy.types.String: DatabricksDataType.STRING,
50
+ sqlalchemy.types.Text: DatabricksDataType.STRING,
51
+ sqlalchemy.types.Time: DatabricksDataType.STRING,
52
+ sqlalchemy.types.Unicode: DatabricksDataType.STRING,
53
+ sqlalchemy.types.UnicodeText: DatabricksDataType.STRING,
54
+ sqlalchemy.types.Uuid: DatabricksDataType.STRING,
55
+ }
56
+
57
+
58
+ def dict_as_tuple_list(d: dict):
59
+ """Return a list of [(key, value), ...] from a dictionary."""
60
+ return [(key, value) for key, value in d.items()]
61
+
62
+
63
+ class CompilationTestBase:
64
+ dialect = DatabricksDialect()
65
+
66
+ def _assert_compiled_value(
67
+ self, type_: sqlalchemy.types.TypeEngine, expected: DatabricksDataType
68
+ ):
69
+ """Assert that when type_ is compiled for the databricks dialect, it renders the DatabricksDataType name.
70
+
71
+ This method initialises the type_ with no arguments.
72
+ """
73
+ compiled_result = type_().compile(dialect=self.dialect) # type: ignore
74
+ assert compiled_result == expected.name
75
+
76
+ def _assert_compiled_value_explicit(
77
+ self, type_: sqlalchemy.types.TypeEngine, expected: str
78
+ ):
79
+ """Assert that when type_ is compiled for the databricks dialect, it renders the expected string.
80
+
81
+ This method expects an initialised type_ so that we can test how a TypeEngine created with arguments
82
+ is compiled.
83
+ """
84
+ compiled_result = type_.compile(dialect=self.dialect)
85
+ assert compiled_result == expected
86
+
87
+
88
+ class TestCamelCaseTypesCompilation(CompilationTestBase):
89
+ """Per the sqlalchemy documentation[^1] here, the camel case members of sqlalchemy.types are
90
+ are expected to work across all dialects. These tests verify that the types compile into valid
91
+ Databricks SQL type strings. For example, the sqlalchemy.types.Integer() should compile as "INT".
92
+
93
+ Truly custom types like STRUCT (notice the uppercase) are not expected to work across all dialects.
94
+ We test these separately.
95
+
96
+ Note that these tests have to do with type **name** compiliation. Which is separate from actually
97
+ mapping values between Python and Databricks.
98
+
99
+ Note: SchemaType and MatchType are not tested because it's not used in table definitions
100
+
101
+ [1]: https://docs.sqlalchemy.org/en/20/core/type_basics.html#generic-camelcase-types
102
+ """
103
+
104
+ @pytest.mark.parametrize("type_, expected", dict_as_tuple_list(camel_case_type_map))
105
+ def test_bare_camel_case_types_compile(self, type_, expected):
106
+ self._assert_compiled_value(type_, expected)
107
+
108
+ def test_numeric_renders_as_decimal_with_precision(self):
109
+ self._assert_compiled_value_explicit(
110
+ sqlalchemy.types.Numeric(10), "DECIMAL(10)"
111
+ )
112
+
113
+ def test_numeric_renders_as_decimal_with_precision_and_scale(self):
114
+ self._assert_compiled_value_explicit(
115
+ sqlalchemy.types.Numeric(10, 2), "DECIMAL(10, 2)"
116
+ )
117
+
118
+
119
+ uppercase_type_map = {
120
+ sqlalchemy.types.ARRAY: DatabricksDataType.ARRAY,
121
+ sqlalchemy.types.BIGINT: DatabricksDataType.BIGINT,
122
+ sqlalchemy.types.BINARY: DatabricksDataType.BINARY,
123
+ sqlalchemy.types.BOOLEAN: DatabricksDataType.BOOLEAN,
124
+ sqlalchemy.types.DATE: DatabricksDataType.DATE,
125
+ sqlalchemy.types.DECIMAL: DatabricksDataType.DECIMAL,
126
+ sqlalchemy.types.DOUBLE: DatabricksDataType.DOUBLE,
127
+ sqlalchemy.types.FLOAT: DatabricksDataType.FLOAT,
128
+ sqlalchemy.types.INT: DatabricksDataType.INT,
129
+ sqlalchemy.types.SMALLINT: DatabricksDataType.SMALLINT,
130
+ sqlalchemy.types.TIMESTAMP: DatabricksDataType.TIMESTAMP,
131
+ TINYINT: DatabricksDataType.TINYINT,
132
+ TIMESTAMP: DatabricksDataType.TIMESTAMP,
133
+ TIMESTAMP_NTZ: DatabricksDataType.TIMESTAMP_NTZ,
134
+ }
135
+
136
+
137
+ class TestUppercaseTypesCompilation(CompilationTestBase):
138
+ """Per the sqlalchemy documentation[^1], uppercase types are considered to be specific to some
139
+ database backends. These tests verify that the types compile into valid Databricks SQL type strings.
140
+
141
+ [1]: https://docs.sqlalchemy.org/en/20/core/type_basics.html#backend-specific-uppercase-datatypes
142
+ """
143
+
144
+ @pytest.mark.parametrize("type_, expected", dict_as_tuple_list(uppercase_type_map))
145
+ def test_bare_uppercase_types_compile(self, type_, expected):
146
+ if isinstance(type_, type(sqlalchemy.types.ARRAY)):
147
+ # ARRAY cannot be initialised without passing an item definition so we test separately
148
+ # I preserve it in the uppercase_type_map for clarity
149
+ assert True
150
+ else:
151
+ self._assert_compiled_value(type_, expected)
152
+
153
+ def test_array_string_renders_as_array_of_string(self):
154
+ """SQLAlchemy's ARRAY type requires an item definition. And their docs indicate that they've only tested
155
+ it with Postgres since that's the only first-class dialect with support for ARRAY.
156
+
157
+ https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.ARRAY
158
+ """
159
+ self._assert_compiled_value_explicit(
160
+ sqlalchemy.types.ARRAY(sqlalchemy.types.String), "ARRAY<STRING>"
161
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sqlalchemy
3
- Version: 1.0.1
3
+ Version: 2.0.0
4
4
  Summary: Databricks SQLAlchemy plugin for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -14,14 +14,17 @@ Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Requires-Dist: databricks_sql_connector_core (>=4.0.0)
17
- Requires-Dist: sqlalchemy (>=1.3.24,<2.0.0)
17
+ Requires-Dist: sqlalchemy (>=2.0.21)
18
18
  Project-URL: Bug Tracker, https://github.com/databricks/databricks-sqlalchemy/issues
19
19
  Project-URL: Homepage, https://github.com/databricks/databricks-sqlalchemy
20
20
  Description-Content-Type: text/markdown
21
21
 
22
- ## Databricks dialect for SQLALchemy 1.0
22
+ ## Databricks dialect for SQLALchemy 2.0
23
23
 
24
- The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `example.py`.
24
+ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. A working example demonstrating usage can be found in `examples/sqlalchemy.py`.
25
+
26
+ ## Usage with SQLAlchemy <= 2.0
27
+ A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4](https://github.com/databricks/databricks-sql-python/releases/tag/v2.4.0). Support for SQLAlchemy 1.4 was dropped from the dialect as part of `databricks-sql-connector==3.0.0`. To continue using the dialect with SQLAlchemy 1.x, you can use `databricks-sql-connector^2.4.0`.
25
28
 
26
29
 
27
30
  ## Installation
@@ -29,7 +32,7 @@ The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](http
29
32
  To install the dialect and its dependencies:
30
33
 
31
34
  ```shell
32
- pip install databricks-sqlalchemy~=1.0
35
+ pip install databricks-sqlalchemy
33
36
  ```
34
37
 
35
38
  If you also plan to use `alembic` you can alternatively run:
@@ -62,45 +65,41 @@ access_token = os.getenv("DATABRICKS_TOKEN")
62
65
  catalog = os.getenv("DATABRICKS_CATALOG")
63
66
  schema = os.getenv("DATABRICKS_SCHEMA")
64
67
 
65
- if sqlalchemy.__version__.startswith("1.3"):
66
- # SQLAlchemy 1.3.x fails to parse the http_path, catalog, and schema from our connection string
67
- # Pass these in as connect_args instead
68
-
69
- conn_string = f"databricks://token:{access_token}@{host}"
70
- connect_args = dict(catalog=catalog, schema=schema, http_path=http_path)
71
- all_connect_args = {**extra_connect_args, **connect_args}
72
- engine = create_engine(conn_string, connect_args=all_connect_args)
73
- else:
74
- engine = create_engine(
75
- f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}",
76
- connect_args=extra_connect_args,
77
- )
78
-
68
+ engine = create_engine(
69
+ f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}"
70
+ )
79
71
  ```
80
72
 
81
73
  ## Types
82
74
 
83
- The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/13/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/13/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
75
+ The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/20/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/20/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
84
76
 
85
77
  |SQLAlchemy Type|Databricks SQL Type|
86
78
  |-|-|
87
- [`BigInteger`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
88
- [`LargeBinary`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
89
- [`Boolean`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
90
- [`Date`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
91
- [`DateTime`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
92
- [`Enum`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
93
- [`Float`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
94
- [`Integer`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
95
- [`Numeric`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
96
- [`PickleType`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
97
- [`SmallInteger`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
98
- [`String`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
99
- [`Text`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
100
- [`Time`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
101
- [`Unicode`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
102
- [`UnicodeText`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
103
- [`Uuid`](https://docs.sqlalchemy.org/en/13/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
79
+ [`BigInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
80
+ [`LargeBinary`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
81
+ [`Boolean`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
82
+ [`Date`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
83
+ [`DateTime`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
84
+ [`Double`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Double)| [`DOUBLE`](https://docs.databricks.com/en/sql/language-manual/data-types/double-type.html)
85
+ [`Enum`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
86
+ [`Float`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
87
+ [`Integer`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
88
+ [`Numeric`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
89
+ [`PickleType`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
90
+ [`SmallInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
91
+ [`String`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
92
+ [`Text`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
93
+ [`Time`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
94
+ [`Unicode`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
95
+ [`UnicodeText`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
96
+ [`Uuid`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
97
+
98
+ In addition, the dialect exposes three UPPERCASE SQLAlchemy types which are specific to Databricks:
99
+
100
+ - [`databricks.sqlalchemy.TINYINT`](https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html)
101
+ - [`databricks.sqlalchemy.TIMESTAMP`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html)
102
+ - [`databricks.sqlalchemy.TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)
104
103
 
105
104
 
106
105
  ### `LargeBinary()` and `PickleType()`
@@ -113,6 +112,24 @@ Support for `CHECK` constraints is not implemented in this dialect. Support is p
113
112
 
114
113
  SQLAlchemy's `Enum()` type depends on `CHECK` constraints and is therefore not yet supported.
115
114
 
115
+ ### `DateTime()`, `TIMESTAMP_NTZ()`, and `TIMESTAMP()`
116
+
117
+ Databricks Runtime provides two datetime-like types: `TIMESTAMP` which is always timezone-aware and `TIMESTAMP_NTZ` which is timezone agnostic. Both types can be imported from `databricks.sqlalchemy` and used in your models.
118
+
119
+ The SQLAlchemy documentation indicates that `DateTime()` is not timezone-aware by default. So our dialect maps this type to `TIMESTAMP_NTZ()`. In practice, you should never need to use `TIMESTAMP_NTZ()` directly. Just use `DateTime()`.
120
+
121
+ If you need your field to be timezone-aware, you can import `TIMESTAMP()` and use it instead.
122
+
123
+ _Note that SQLAlchemy documentation suggests that you can declare a `DateTime()` with `timezone=True` on supported backends. However, if you do this with the Databricks dialect, the `timezone` argument will be ignored._
124
+
125
+ ```python
126
+ from sqlalchemy import DateTime
127
+ from databricks.sqlalchemy import TIMESTAMP
128
+
129
+ class SomeModel(Base):
130
+ some_date_without_timezone = DateTime()
131
+ some_date_with_timezone = TIMESTAMP()
132
+ ```
116
133
 
117
134
  ### `String()`, `Text()`, `Unicode()`, and `UnicodeText()`
118
135
 
@@ -137,7 +154,7 @@ class SomeModel(Base):
137
154
 
138
155
  Identity and generated value support is currently limited in this dialect.
139
156
 
140
- When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/13/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/13/core/defaults.html#identity-ddl) instead.
157
+ When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/20/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/20/core/defaults.html#identity-ddl) instead.
141
158
 
142
159
  Furthermore, in Databricks Runtime, only `BIGINT` fields can be configured to auto-increment. So in SQLAlchemy, you must use the `BigInteger()` type.
143
160
 
@@ -151,6 +168,10 @@ class SomeModel(Base):
151
168
 
152
169
  When calling `Base.metadata.create_all()`, the executed DDL will include `GENERATED ALWAYS AS IDENTITY` for the `id` column. This is useful when using SQLAlchemy to generate tables. However, as of this writing, `Identity()` constructs are not captured when SQLAlchemy reflects a table's metadata (support for this is planned).
153
170
 
171
+ ## Parameters
172
+
173
+ `databricks-sql-connector` supports two approaches to parameterizing SQL queries: native and inline. Our SQLAlchemy 2.0 dialect always uses the native approach and is therefore limited to DBR 14.2 and above. If you are writing parameterized queries to be executed by SQLAlchemy, you must use the "named" paramstyle (`:param`). Read more about parameterization in `docs/parameters.md`.
174
+
154
175
  ## Usage with pandas
155
176
 
156
177
  Use [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html) and [`pandas.read_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql) to write and read from Databricks SQL. These methods both accept a SQLAlchemy connection to interact with Databricks.
@@ -181,7 +202,7 @@ with engine.connect() as conn:
181
202
  df.to_sql('squares',conn)
182
203
  ```
183
204
 
184
- ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/13/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/13/core/constraints.html#defining-foreign-keys)
205
+ ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#defining-foreign-keys)
185
206
 
186
207
  Unity Catalog workspaces in Databricks support PRIMARY KEY and FOREIGN KEY constraints. _Note that Databricks Runtime does not enforce the integrity of FOREIGN KEY constraints_. You can establish a primary key by setting `primary_key=True` when defining a column.
187
208
 
@@ -0,0 +1,31 @@
1
+ CHANGELOG.md,sha256=3y1Bi07K7cXfMbmEN4Pfpbpz_G7irzxBQrGbqzGwC5c,226
2
+ databricks/sqlalchemy/__init__.py,sha256=Gk3XC5OCzq7LuxMVpxK3t4q0rkflXJ8uJRJh9uusMqc,185
3
+ databricks/sqlalchemy/_ddl.py,sha256=c0_GwfmnrFVr4-Ls14fmdGUUFyUok_GW4Uo45hLABFc,3983
4
+ databricks/sqlalchemy/_parse.py,sha256=C0Q0_87PknCibRjs3ewPL5dimwQqaW_vr4nMxMsS220,13048
5
+ databricks/sqlalchemy/_types.py,sha256=EqC_TWWY7mDw9EM2AVZnPrw5DD6G-vBV7wiwX4tcBcM,11753
6
+ databricks/sqlalchemy/base.py,sha256=KcjfHMH0NsceYE2NRxrePtf5T1uw9u8JHofRdbnAKS4,15619
7
+ databricks/sqlalchemy/dependency_test/test_dependency.py,sha256=oFv2oW0e0ScpiKbmXHwpIuYf7mWpj4BiVShiLvw2b2k,938
8
+ databricks/sqlalchemy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ databricks/sqlalchemy/pytest.ini,sha256=ImutflUjkhByVNWCQ18Todj6XTvgJAQX_v7fD-gWHhU,106
10
+ databricks/sqlalchemy/requirements.py,sha256=OobunAEwZ9y2dvSQLOmdgJciVn9xGlY9NAFfszPCTU0,9018
11
+ databricks/sqlalchemy/setup.cfg,sha256=ImutflUjkhByVNWCQ18Todj6XTvgJAQX_v7fD-gWHhU,106
12
+ databricks/sqlalchemy/test/_extra.py,sha256=ZMbqkdw9_sTRrcmuOssZoaZjNsaM-L1Z8tlumOoipMg,1955
13
+ databricks/sqlalchemy/test/_future.py,sha256=7ZKdl2-hPVgkNUtq-mVS1DWsI5Y8N4fEnwxXfFnTqCU,12658
14
+ databricks/sqlalchemy/test/_regression.py,sha256=MI6Jlmnw-DYmyY-mHfrscNQ8l3UEDaPXC7J3R2uKI9o,5412
15
+ databricks/sqlalchemy/test/_unsupported.py,sha256=ORi3FvzjGDx3KK62KysJFaEI4zfAw3JdbpVbT5oCCYM,16061
16
+ databricks/sqlalchemy/test/conftest.py,sha256=wauk1PYW_epp5-CKA2HbcTk_Ke3i8XpCnHB7UJLIRoE,597
17
+ databricks/sqlalchemy/test/overrides/_componentreflectiontest.py,sha256=OAaFx_l3sHuUn322NuyzpBq1SquvHCyXIvk5NxDXNv8,7042
18
+ databricks/sqlalchemy/test/overrides/_ctetest.py,sha256=u4jSIMrZY2dCSvBRhk9RsiObx1GB3RoFuLRByC212VU,1026
19
+ databricks/sqlalchemy/test/test_suite.py,sha256=kQfqmoXROaMNi6RebaPKS6MFabzSU5Rz-YPo84CImIQ,492
20
+ databricks/sqlalchemy/test_local/__init__.py,sha256=gphvzZ0Cb4Kz7rPRHHULanKyyjKgFt7zmGGYvcuGxys,131
21
+ databricks/sqlalchemy/test_local/conftest.py,sha256=b6LThokKLJrCfe7207A6NvF2MYnGOmajwtVILCWj1qY,951
22
+ databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx,sha256=9zqXUDGzgS2yjPz8x0uFsJU6kQTqdVRKKfJrEBHTZuY,59837
23
+ databricks/sqlalchemy/test_local/e2e/test_basic.py,sha256=wLP28vz2H9wz0dS52_iXbRwu0Zoh0wTEN9MOj2xJiOQ,16749
24
+ databricks/sqlalchemy/test_local/test_ddl.py,sha256=L5V1NoW9dT-7BHcaB97FQOw9ZFvo0g2_FIPKqOzlECM,3198
25
+ databricks/sqlalchemy/test_local/test_parsing.py,sha256=pSTAnWyA44vDTEZ-_HnfwEr3QbA2Kmzn1yU5q1GqMts,5017
26
+ databricks/sqlalchemy/test_local/test_types.py,sha256=Uey-z4ypzD5ykClBQs7XNW9KArHPbZU2cAk3EYD9jS0,6749
27
+ databricks_sqlalchemy-2.0.0.dist-info/LICENSE,sha256=WgVm2VpfZ3CsUfPndD2NeCrEIcFA4UB-YnnW4ejxcbE,11346
28
+ databricks_sqlalchemy-2.0.0.dist-info/METADATA,sha256=qjj_K1tWMMn7qGvzae8IhbabCSNWeWa0TTyq919wjVc,13073
29
+ databricks_sqlalchemy-2.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
30
+ databricks_sqlalchemy-2.0.0.dist-info/entry_points.txt,sha256=AAjpsvZbVcoMAcWLIesoAT5FNZhBEcIhxdKknVua3jw,74
31
+ databricks_sqlalchemy-2.0.0.dist-info/RECORD,,