databricks-sql-connector 3.7.0__tar.gz → 4.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/CHANGELOG.md +9 -0
  2. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/PKG-INFO +25 -11
  3. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/README.md +20 -3
  4. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/pyproject.toml +7 -14
  5. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/__init__.py +1 -1
  6. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/client.py +7 -0
  7. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/thrift_backend.py +1 -1
  8. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/README.sqlalchemy.md +0 -203
  9. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/README.tests.md +0 -44
  10. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/__init__.py +0 -4
  11. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/_ddl.py +0 -100
  12. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/_parse.py +0 -385
  13. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/_types.py +0 -323
  14. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/base.py +0 -436
  15. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/py.typed +0 -0
  16. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/requirements.py +0 -249
  17. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/setup.cfg +0 -4
  18. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test/_extra.py +0 -70
  19. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test/_future.py +0 -331
  20. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test/_regression.py +0 -311
  21. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test/_unsupported.py +0 -450
  22. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test/conftest.py +0 -13
  23. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -189
  24. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test/overrides/_ctetest.py +0 -33
  25. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test/test_suite.py +0 -13
  26. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test_local/__init__.py +0 -5
  27. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test_local/conftest.py +0 -44
  28. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  29. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +0 -543
  30. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test_local/test_ddl.py +0 -96
  31. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test_local/test_parsing.py +0 -160
  32. databricks_sql_connector-3.7.0/src/databricks/sqlalchemy/test_local/test_types.py +0 -161
  33. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/LICENSE +0 -0
  34. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/__init__.py +0 -0
  35. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/auth/__init__.py +0 -0
  36. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/auth/auth.py +0 -0
  37. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/auth/authenticators.py +0 -0
  38. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/auth/endpoint.py +0 -0
  39. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/auth/oauth.py +0 -0
  40. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  41. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/auth/retry.py +0 -0
  42. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  43. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  44. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/cloudfetch/downloader.py +0 -0
  45. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/exc.py +0 -0
  46. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/experimental/__init__.py +0 -0
  47. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  48. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/parameters/__init__.py +0 -0
  49. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/parameters/native.py +0 -0
  50. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/parameters/py.typed +0 -0
  51. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/py.typed +0 -0
  52. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  53. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  54. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  55. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  56. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  57. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/thrift_api/__init__.py +0 -0
  58. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/types.py +0 -0
  59. {databricks_sql_connector-3.7.0 → databricks_sql_connector-4.0.0}/src/databricks/sql/utils.py +0 -0
@@ -1,5 +1,14 @@
1
1
  # Release History
2
2
 
3
+ # 4.0.0 (2025-01-19)
4
+
5
+ - Split the connector into two separate packages: `databricks-sql-connector` and `databricks-sqlalchemy`. The `databricks-sql-connector` package contains the core functionality of the connector, while the `databricks-sqlalchemy` package contains the SQLAlchemy dialect for the connector.
6
+ - Pyarrow dependency is now optional in `databricks-sql-connector`. Users needing arrow are supposed to explicitly install pyarrow
7
+
8
+ # 3.7.1 (2025-01-07)
9
+
10
+ - Relaxed the number of Http retry attempts (databricks/databricks-sql-python#486 by @jprakash-db)
11
+
3
12
  # 3.7.0 (2024-12-23)
4
13
 
5
14
  - Fix: Incorrect number of rows fetched in inline results when fetching results with FETCH_NEXT orientation (databricks/databricks-sql-python#479 by @jprakash-db)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 3.7.0
3
+ Version: 4.0.0
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -13,18 +13,15 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
- Provides-Extra: alembic
17
- Provides-Extra: sqlalchemy
18
- Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
16
+ Provides-Extra: pyarrow
19
17
  Requires-Dist: lz4 (>=4.0.2,<5.0.0)
20
- Requires-Dist: numpy (>=1.16.6) ; python_version >= "3.8" and python_version < "3.11"
21
- Requires-Dist: numpy (>=1.23.4) ; python_version >= "3.11"
18
+ Requires-Dist: numpy (>=1.16.6,<2.0.0) ; python_version >= "3.8" and python_version < "3.11"
19
+ Requires-Dist: numpy (>=1.23.4,<2.0.0) ; python_version >= "3.11"
22
20
  Requires-Dist: oauthlib (>=3.1.0,<4.0.0)
23
21
  Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
24
22
  Requires-Dist: pandas (>=1.2.5,<2.3.0) ; python_version >= "3.8"
25
- Requires-Dist: pyarrow (>=14.0.1)
23
+ Requires-Dist: pyarrow (>=14.0.1) ; extra == "pyarrow"
26
24
  Requires-Dist: requests (>=2.18.1,<3.0.0)
27
- Requires-Dist: sqlalchemy (>=2.0.21) ; extra == "sqlalchemy" or extra == "alembic"
28
25
  Requires-Dist: thrift (>=0.16.0,<0.21.0)
29
26
  Requires-Dist: urllib3 (>=1.26)
30
27
  Project-URL: Bug Tracker, https://github.com/databricks/databricks-sql-python/issues
@@ -36,9 +33,9 @@ Description-Content-Type: text/markdown
36
33
  [![PyPI](https://img.shields.io/pypi/v/databricks-sql-connector?style=flat-square)](https://pypi.org/project/databricks-sql-connector/)
37
34
  [![Downloads](https://pepy.tech/badge/databricks-sql-connector)](https://pepy.tech/project/databricks-sql-connector)
38
35
 
39
- The Databricks SQL Connector for Python allows you to develop Python applications that connect to Databricks clusters and SQL warehouses. It is a Thrift-based client with no dependencies on ODBC or JDBC. It conforms to the [Python DB API 2.0 specification](https://www.python.org/dev/peps/pep-0249/) and exposes a [SQLAlchemy](https://www.sqlalchemy.org/) dialect for use with tools like `pandas` and `alembic` which use SQLAlchemy to execute DDL. Use `pip install databricks-sql-connector[sqlalchemy]` to install with SQLAlchemy's dependencies. `pip install databricks-sql-connector[alembic]` will install alembic's dependencies.
36
+ The Databricks SQL Connector for Python allows you to develop Python applications that connect to Databricks clusters and SQL warehouses. It is a Thrift-based client with no dependencies on ODBC or JDBC. It conforms to the [Python DB API 2.0 specification](https://www.python.org/dev/peps/pep-0249/).
40
37
 
41
- This connector uses Arrow as the data-exchange format, and supports APIs to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time.
38
+ This connector uses Arrow as the data-exchange format, and supports APIs (e.g. `fetchmany_arrow`) to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time. [PyArrow](https://arrow.apache.org/docs/python/index.html) is required to enable this and use these APIs, you can install it via `pip install pyarrow` or `pip install databricks-sql-connector[pyarrow]`.
42
39
 
43
40
  You are welcome to file an issue here for general use cases. You can also contact Databricks Support [here](help.databricks.com).
44
41
 
@@ -55,7 +52,12 @@ For the latest documentation, see
55
52
 
56
53
  ## Quickstart
57
54
 
58
- Install the library with `pip install databricks-sql-connector`
55
+ ### Installing the core library
56
+ Install using `pip install databricks-sql-connector`
57
+
58
+ ### Installing the core library with PyArrow
59
+ Install using `pip install databricks-sql-connector[pyarrow]`
60
+
59
61
 
60
62
  ```bash
61
63
  export DATABRICKS_HOST=********.databricks.com
@@ -93,6 +95,18 @@ or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/123456789
93
95
  > to authenticate the target Databricks user account and needs to open the browser for authentication. So it
94
96
  > can only run on the user's machine.
95
97
 
98
+ ## SQLAlchemy
99
+ Starting from `databricks-sql-connector` version 4.0.0 SQLAlchemy support has been extracted to a new library `databricks-sqlalchemy`.
100
+
101
+ - Github repository [databricks-sqlalchemy github](https://github.com/databricks/databricks-sqlalchemy)
102
+ - PyPI [databricks-sqlalchemy pypi](https://pypi.org/project/databricks-sqlalchemy/)
103
+
104
+ ### Quick SQLAlchemy guide
105
+ Users can now choose between using the SQLAlchemy v1 or SQLAlchemy v2 dialects with the connector core
106
+
107
+ - Install the latest SQLAlchemy v1 using `pip install databricks-sqlalchemy~=1.0`
108
+ - Install SQLAlchemy v2 using `pip install databricks-sqlalchemy`
109
+
96
110
 
97
111
  ## Contributing
98
112
 
@@ -3,9 +3,9 @@
3
3
  [![PyPI](https://img.shields.io/pypi/v/databricks-sql-connector?style=flat-square)](https://pypi.org/project/databricks-sql-connector/)
4
4
  [![Downloads](https://pepy.tech/badge/databricks-sql-connector)](https://pepy.tech/project/databricks-sql-connector)
5
5
 
6
- The Databricks SQL Connector for Python allows you to develop Python applications that connect to Databricks clusters and SQL warehouses. It is a Thrift-based client with no dependencies on ODBC or JDBC. It conforms to the [Python DB API 2.0 specification](https://www.python.org/dev/peps/pep-0249/) and exposes a [SQLAlchemy](https://www.sqlalchemy.org/) dialect for use with tools like `pandas` and `alembic` which use SQLAlchemy to execute DDL. Use `pip install databricks-sql-connector[sqlalchemy]` to install with SQLAlchemy's dependencies. `pip install databricks-sql-connector[alembic]` will install alembic's dependencies.
6
+ The Databricks SQL Connector for Python allows you to develop Python applications that connect to Databricks clusters and SQL warehouses. It is a Thrift-based client with no dependencies on ODBC or JDBC. It conforms to the [Python DB API 2.0 specification](https://www.python.org/dev/peps/pep-0249/).
7
7
 
8
- This connector uses Arrow as the data-exchange format, and supports APIs to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time.
8
+ This connector uses Arrow as the data-exchange format, and supports APIs (e.g. `fetchmany_arrow`) to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time. [PyArrow](https://arrow.apache.org/docs/python/index.html) is required to enable this and use these APIs, you can install it via `pip install pyarrow` or `pip install databricks-sql-connector[pyarrow]`.
9
9
 
10
10
  You are welcome to file an issue here for general use cases. You can also contact Databricks Support [here](help.databricks.com).
11
11
 
@@ -22,7 +22,12 @@ For the latest documentation, see
22
22
 
23
23
  ## Quickstart
24
24
 
25
- Install the library with `pip install databricks-sql-connector`
25
+ ### Installing the core library
26
+ Install using `pip install databricks-sql-connector`
27
+
28
+ ### Installing the core library with PyArrow
29
+ Install using `pip install databricks-sql-connector[pyarrow]`
30
+
26
31
 
27
32
  ```bash
28
33
  export DATABRICKS_HOST=********.databricks.com
@@ -60,6 +65,18 @@ or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/123456789
60
65
  > to authenticate the target Databricks user account and needs to open the browser for authentication. So it
61
66
  > can only run on the user's machine.
62
67
 
68
+ ## SQLAlchemy
69
+ Starting from `databricks-sql-connector` version 4.0.0 SQLAlchemy support has been extracted to a new library `databricks-sqlalchemy`.
70
+
71
+ - Github repository [databricks-sqlalchemy github](https://github.com/databricks/databricks-sqlalchemy)
72
+ - PyPI [databricks-sqlalchemy pypi](https://pypi.org/project/databricks-sqlalchemy/)
73
+
74
+ ### Quick SQLAlchemy guide
75
+ Users can now choose between using the SQLAlchemy v1 or SQLAlchemy v2 dialects with the connector core
76
+
77
+ - Install the latest SQLAlchemy v1 using `pip install databricks-sqlalchemy~=1.0`
78
+ - Install SQLAlchemy v2 using `pip install databricks-sqlalchemy`
79
+
63
80
 
64
81
  ## Contributing
65
82
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "3.7.0"
3
+ version = "4.0.0"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -14,23 +14,19 @@ thrift = ">=0.16.0,<0.21.0"
14
14
  pandas = [
15
15
  { version = ">=1.2.5,<2.3.0", python = ">=3.8" }
16
16
  ]
17
- pyarrow = ">=14.0.1"
18
-
19
17
  lz4 = "^4.0.2"
20
18
  requests = "^2.18.1"
21
19
  oauthlib = "^3.1.0"
22
20
  numpy = [
23
- { version = ">=1.16.6", python = ">=3.8,<3.11" },
24
- { version = ">=1.23.4", python = ">=3.11" },
21
+ { version = "^1.16.6", python = ">=3.8,<3.11" },
22
+ { version = "^1.23.4", python = ">=3.11" },
25
23
  ]
26
- sqlalchemy = { version = ">=2.0.21", optional = true }
27
24
  openpyxl = "^3.0.10"
28
- alembic = { version = "^1.0.11", optional = true }
29
25
  urllib3 = ">=1.26"
26
+ pyarrow = { version = ">=14.0.1", optional=true }
30
27
 
31
28
  [tool.poetry.extras]
32
- sqlalchemy = ["sqlalchemy"]
33
- alembic = ["sqlalchemy", "alembic"]
29
+ pyarrow = ["pyarrow"]
34
30
 
35
31
  [tool.poetry.dev-dependencies]
36
32
  pytest = "^7.1.2"
@@ -43,9 +39,6 @@ pytest-dotenv = "^0.5.2"
43
39
  "Homepage" = "https://github.com/databricks/databricks-sql-python"
44
40
  "Bug Tracker" = "https://github.com/databricks/databricks-sql-python/issues"
45
41
 
46
- [tool.poetry.plugins."sqlalchemy.dialects"]
47
- "databricks" = "databricks.sqlalchemy:DatabricksDialect"
48
-
49
42
  [build-system]
50
43
  requires = ["poetry-core>=1.0.0"]
51
44
  build-backend = "poetry.core.masonry.api"
@@ -62,5 +55,5 @@ markers = {"reviewed" = "Test case has been reviewed by Databricks"}
62
55
  minversion = "6.0"
63
56
  log_cli = "false"
64
57
  log_cli_level = "INFO"
65
- testpaths = ["tests", "src/databricks/sqlalchemy/test_local"]
66
- env_files = ["test.env"]
58
+ testpaths = ["tests"]
59
+ env_files = ["test.env"]
@@ -68,7 +68,7 @@ DATETIME = DBAPITypeObject("timestamp")
68
68
  DATE = DBAPITypeObject("date")
69
69
  ROWID = DBAPITypeObject()
70
70
 
71
- __version__ = "3.7.0"
71
+ __version__ = "4.0.0"
72
72
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
73
73
 
74
74
  # These two functions are pyhive legacy
@@ -54,6 +54,13 @@ from databricks.sql.thrift_api.TCLIService.ttypes import (
54
54
 
55
55
  logger = logging.getLogger(__name__)
56
56
 
57
+ if pyarrow is None:
58
+ logger.warning(
59
+ "[WARN] pyarrow is not installed by default since databricks-sql-connector 4.0.0,"
60
+ "any arrow specific api (e.g. fetchmany_arrow) and cloud fetch will be disabled."
61
+ "If you need these features, please run pip install pyarrow or pip install databricks-sql-connector[pyarrow] to install"
62
+ )
63
+
57
64
  DEFAULT_RESULT_BUFFER_SIZE_BYTES = 104857600
58
65
  DEFAULT_ARRAY_SIZE = 100000
59
66
 
@@ -67,7 +67,7 @@ DEFAULT_SOCKET_TIMEOUT = float(900)
67
67
  _retry_policy = { # (type, default, min, max)
68
68
  "_retry_delay_min": (float, 1, 0.1, 60),
69
69
  "_retry_delay_max": (float, 30, 5, 3600),
70
- "_retry_stop_after_attempts_count": (int, 5, 1, 60),
70
+ "_retry_stop_after_attempts_count": (int, 30, 1, 60),
71
71
  "_retry_stop_after_attempts_duration": (float, 900, 1, 86400),
72
72
  "_retry_delay_default": (float, 5, 1, 60),
73
73
  }
@@ -1,203 +0,0 @@
1
- ## Databricks dialect for SQLALchemy 2.0
2
-
3
- The Databricks dialect for SQLAlchemy serves as bridge between [SQLAlchemy](https://www.sqlalchemy.org/) and the Databricks SQL Python driver. The dialect is included with `databricks-sql-connector==3.0.0` and above. A working example demonstrating usage can be found in `examples/sqlalchemy.py`.
4
-
5
- ## Usage with SQLAlchemy <= 2.0
6
- A SQLAlchemy 1.4 compatible dialect was first released in connector [version 2.4](https://github.com/databricks/databricks-sql-python/releases/tag/v2.4.0). Support for SQLAlchemy 1.4 was dropped from the dialect as part of `databricks-sql-connector==3.0.0`. To continue using the dialect with SQLAlchemy 1.x, you can use `databricks-sql-connector^2.4.0`.
7
-
8
-
9
- ## Installation
10
-
11
- To install the dialect and its dependencies:
12
-
13
- ```shell
14
- pip install databricks-sql-connector[sqlalchemy]
15
- ```
16
-
17
- If you also plan to use `alembic` you can alternatively run:
18
-
19
- ```shell
20
- pip install databricks-sql-connector[alembic]
21
- ```
22
-
23
- ## Connection String
24
-
25
- Every SQLAlchemy application that connects to a database needs to use an [Engine](https://docs.sqlalchemy.org/en/20/tutorial/engine.html#tutorial-engine), which you can create by passing a connection string to `create_engine`. The connection string must include these components:
26
-
27
- 1. Host
28
- 2. HTTP Path for a compute resource
29
- 3. API access token
30
- 4. Initial catalog for the connection
31
- 5. Initial schema for the connection
32
-
33
- **Note: Our dialect is built and tested on workspaces with Unity Catalog enabled. Support for the `hive_metastore` catalog is untested.**
34
-
35
- For example:
36
-
37
- ```python
38
- import os
39
- from sqlalchemy import create_engine
40
-
41
- host = os.getenv("DATABRICKS_SERVER_HOSTNAME")
42
- http_path = os.getenv("DATABRICKS_HTTP_PATH")
43
- access_token = os.getenv("DATABRICKS_TOKEN")
44
- catalog = os.getenv("DATABRICKS_CATALOG")
45
- schema = os.getenv("DATABRICKS_SCHEMA")
46
-
47
- engine = create_engine(
48
- f"databricks://token:{access_token}@{host}?http_path={http_path}&catalog={catalog}&schema={schema}"
49
- )
50
- ```
51
-
52
- ## Types
53
-
54
- The [SQLAlchemy type hierarchy](https://docs.sqlalchemy.org/en/20/core/type_basics.html) contains backend-agnostic type implementations (represented in CamelCase) and backend-specific types (represented in UPPERCASE). The majority of SQLAlchemy's [CamelCase](https://docs.sqlalchemy.org/en/20/core/type_basics.html#the-camelcase-datatypes) types are supported. This means that a SQLAlchemy application using these types should "just work" with Databricks.
55
-
56
- |SQLAlchemy Type|Databricks SQL Type|
57
- |-|-|
58
- [`BigInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.BigInteger)| [`BIGINT`](https://docs.databricks.com/en/sql/language-manual/data-types/bigint-type.html)
59
- [`LargeBinary`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.LargeBinary)| (not supported)|
60
- [`Boolean`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Boolean)| [`BOOLEAN`](https://docs.databricks.com/en/sql/language-manual/data-types/boolean-type.html)
61
- [`Date`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Date)| [`DATE`](https://docs.databricks.com/en/sql/language-manual/data-types/date-type.html)
62
- [`DateTime`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.DateTime)| [`TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)|
63
- [`Double`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Double)| [`DOUBLE`](https://docs.databricks.com/en/sql/language-manual/data-types/double-type.html)
64
- [`Enum`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Enum)| (not supported)|
65
- [`Float`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Float)| [`FLOAT`](https://docs.databricks.com/en/sql/language-manual/data-types/float-type.html)
66
- [`Integer`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Integer)| [`INT`](https://docs.databricks.com/en/sql/language-manual/data-types/int-type.html)
67
- [`Numeric`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Numeric)| [`DECIMAL`](https://docs.databricks.com/en/sql/language-manual/data-types/decimal-type.html)|
68
- [`PickleType`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.PickleType)| (not supported)|
69
- [`SmallInteger`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.SmallInteger)| [`SMALLINT`](https://docs.databricks.com/en/sql/language-manual/data-types/smallint-type.html)
70
- [`String`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.String)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
71
- [`Text`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Text)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
72
- [`Time`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Time)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
73
- [`Unicode`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Unicode)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
74
- [`UnicodeText`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.UnicodeText)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)|
75
- [`Uuid`](https://docs.sqlalchemy.org/en/20/core/type_basics.html#sqlalchemy.types.Uuid)| [`STRING`](https://docs.databricks.com/en/sql/language-manual/data-types/string-type.html)
76
-
77
- In addition, the dialect exposes three UPPERCASE SQLAlchemy types which are specific to Databricks:
78
-
79
- - [`databricks.sqlalchemy.TINYINT`](https://docs.databricks.com/en/sql/language-manual/data-types/tinyint-type.html)
80
- - [`databricks.sqlalchemy.TIMESTAMP`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-type.html)
81
- - [`databricks.sqlalchemy.TIMESTAMP_NTZ`](https://docs.databricks.com/en/sql/language-manual/data-types/timestamp-ntz-type.html)
82
-
83
-
84
- ### `LargeBinary()` and `PickleType()`
85
-
86
- Databricks Runtime doesn't currently support binding of binary values in SQL queries, which is a pre-requisite for this functionality in SQLAlchemy.
87
-
88
- ## `Enum()` and `CHECK` constraints
89
-
90
- Support for `CHECK` constraints is not implemented in this dialect. Support is planned for a future release.
91
-
92
- SQLAlchemy's `Enum()` type depends on `CHECK` constraints and is therefore not yet supported.
93
-
94
- ### `DateTime()`, `TIMESTAMP_NTZ()`, and `TIMESTAMP()`
95
-
96
- Databricks Runtime provides two datetime-like types: `TIMESTAMP` which is always timezone-aware and `TIMESTAMP_NTZ` which is timezone agnostic. Both types can be imported from `databricks.sqlalchemy` and used in your models.
97
-
98
- The SQLAlchemy documentation indicates that `DateTime()` is not timezone-aware by default. So our dialect maps this type to `TIMESTAMP_NTZ()`. In practice, you should never need to use `TIMESTAMP_NTZ()` directly. Just use `DateTime()`.
99
-
100
- If you need your field to be timezone-aware, you can import `TIMESTAMP()` and use it instead.
101
-
102
- _Note that SQLAlchemy documentation suggests that you can declare a `DateTime()` with `timezone=True` on supported backends. However, if you do this with the Databricks dialect, the `timezone` argument will be ignored._
103
-
104
- ```python
105
- from sqlalchemy import DateTime
106
- from databricks.sqlalchemy import TIMESTAMP
107
-
108
- class SomeModel(Base):
109
- some_date_without_timezone = DateTime()
110
- some_date_with_timezone = TIMESTAMP()
111
- ```
112
-
113
- ### `String()`, `Text()`, `Unicode()`, and `UnicodeText()`
114
-
115
- Databricks Runtime doesn't support length limitations for `STRING` fields. Therefore `String()` or `String(1)` or `String(255)` will all produce identical DDL. Since `Text()`, `Unicode()`, `UnicodeText()` all use the same underlying type in Databricks SQL, they will generate equivalent DDL.
116
-
117
- ### `Time()`
118
-
119
- Databricks Runtime doesn't have a native time-like data type. To implement this type in SQLAlchemy, our dialect stores SQLAlchemy `Time()` values in a `STRING` field. Unlike `DateTime` above, this type can optionally support timezone awareness (since the dialect is in complete control of the strings that we write to the Delta table).
120
-
121
- ```python
122
- from sqlalchemy import Time
123
-
124
- class SomeModel(Base):
125
- time_tz = Time(timezone=True)
126
- time_ntz = Time()
127
- ```
128
-
129
-
130
- # Usage Notes
131
-
132
- ## `Identity()` and `autoincrement`
133
-
134
- Identity and generated value support is currently limited in this dialect.
135
-
136
- When defining models, SQLAlchemy types can accept an [`autoincrement`](https://docs.sqlalchemy.org/en/20/core/metadata.html#sqlalchemy.schema.Column.params.autoincrement) argument. In our dialect, this argument is currently ignored. To create an auto-incrementing field in your model you can pass in an explicit [`Identity()`](https://docs.sqlalchemy.org/en/20/core/defaults.html#identity-ddl) instead.
137
-
138
- Furthermore, in Databricks Runtime, only `BIGINT` fields can be configured to auto-increment. So in SQLAlchemy, you must use the `BigInteger()` type.
139
-
140
- ```python
141
- from sqlalchemy import Identity, String
142
-
143
- class SomeModel(Base):
144
- id = BigInteger(Identity())
145
- value = String()
146
- ```
147
-
148
- When calling `Base.metadata.create_all()`, the executed DDL will include `GENERATED ALWAYS AS IDENTITY` for the `id` column. This is useful when using SQLAlchemy to generate tables. However, as of this writing, `Identity()` constructs are not captured when SQLAlchemy reflects a table's metadata (support for this is planned).
149
-
150
- ## Parameters
151
-
152
- `databricks-sql-connector` supports two approaches to parameterizing SQL queries: native and inline. Our SQLAlchemy 2.0 dialect always uses the native approach and is therefore limited to DBR 14.2 and above. If you are writing parameterized queries to be executed by SQLAlchemy, you must use the "named" paramstyle (`:param`). Read more about parameterization in `docs/parameters.md`.
153
-
154
- ## Usage with pandas
155
-
156
- Use [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html) and [`pandas.read_sql`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_sql.html#pandas.read_sql) to write and read from Databricks SQL. These methods both accept a SQLAlchemy connection to interact with Databricks.
157
-
158
- ### Read from Databricks SQL into pandas
159
- ```python
160
- from sqlalchemy import create_engine
161
- import pandas as pd
162
-
163
- engine = create_engine("databricks://token:dapi***@***.cloud.databricks.com?http_path=***&catalog=main&schema=test")
164
- with engine.connect() as conn:
165
- # This will read the contents of `main.test.some_table`
166
- df = pd.read_sql("some_table", conn)
167
- ```
168
-
169
- ### Write to Databricks SQL from pandas
170
-
171
- ```python
172
- from sqlalchemy import create_engine
173
- import pandas as pd
174
-
175
- engine = create_engine("databricks://token:dapi***@***.cloud.databricks.com?http_path=***&catalog=main&schema=test")
176
- squares = [(i, i * i) for i in range(100)]
177
- df = pd.DataFrame(data=squares,columns=['x','x_squared'])
178
-
179
- with engine.connect() as conn:
180
- # This will write the contents of `df` to `main.test.squares`
181
- df.to_sql('squares',conn)
182
- ```
183
-
184
- ## [`PrimaryKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#sqlalchemy.schema.PrimaryKeyConstraint) and [`ForeignKey()`](https://docs.sqlalchemy.org/en/20/core/constraints.html#defining-foreign-keys)
185
-
186
- Unity Catalog workspaces in Databricks support PRIMARY KEY and FOREIGN KEY constraints. _Note that Databricks Runtime does not enforce the integrity of FOREIGN KEY constraints_. You can establish a primary key by setting `primary_key=True` when defining a column.
187
-
188
- When building `ForeignKey` or `ForeignKeyConstraint` objects, you must specify a `name` for the constraint.
189
-
190
- If your model definition requires a self-referential FOREIGN KEY constraint, you must include `use_alter=True` when defining the relationship.
191
-
192
- ```python
193
- from sqlalchemy import Table, Column, ForeignKey, BigInteger, String
194
-
195
- users = Table(
196
- "users",
197
- metadata_obj,
198
- Column("id", BigInteger, primary_key=True),
199
- Column("name", String(), nullable=False),
200
- Column("email", String()),
201
- Column("manager_id", ForeignKey("users.id", name="fk_users_manager_id_x_users_id", use_alter=True))
202
- )
203
- ```
@@ -1,44 +0,0 @@
1
- ## SQLAlchemy Dialect Compliance Test Suite with Databricks
2
-
3
- The contents of the `test/` directory follow the SQLAlchemy developers' [guidance] for running the reusable dialect compliance test suite. Since not every test in the suite is applicable to every dialect, two options are provided to skip tests:
4
-
5
- - Any test can be skipped by subclassing its parent class, re-declaring the test-case and adding a `pytest.mark.skip` directive.
6
- - Any test that is decorated with a `@requires` decorator can be skipped by marking the indicated requirement as `.closed()` in `requirements.py`
7
-
8
- We prefer to skip test cases directly with the first method wherever possible. We only mark requirements as `closed()` if there is no easier option to avoid a test failure. This principally occurs in test cases where the same test in the suite is parametrized, and some parameter combinations are conditionally skipped depending on `requirements.py`. If we skip the entire test method, then we skip _all_ permutations, not just the combinations we don't support.
9
-
10
- ## Regression, Unsupported, and Future test cases
11
-
12
- We maintain three files of test cases that we import from the SQLAlchemy source code:
13
-
14
- * **`_regression.py`** contains all the tests cases with tests that we expect to pass for our dialect. Each one is marked with `pytest.mark.reiewed` to indicate that we've evaluated it for relevance. This file only contains base class declarations.
15
- * **`_unsupported.py`** contains test cases that fail because of missing features in Databricks. We mark them as skipped with a `SkipReason` enumeration. If Databricks comes to support these features, those test or entire classes can be moved to `_regression.py`.
16
- * **`_future.py`** contains test cases that fail because of missing features in the dialect itself, but which _are_ supported by Databricks generally. We mark them as skipped with a `FutureFeature` enumeration. These are features that have not been prioritised or that do not violate our acceptance criteria. All of these test cases will eventually move to either `_regression.py`.
17
-
18
- In some cases, only certain tests in class should be skipped with a `SkipReason` or `FutureFeature` justification. In those cases, we import the class into `_regression.py`, then import it from there into one or both of `_future.py` and `_unsupported.py`. If a class needs to be "touched" by regression, unsupported, and future, the class will be imported in that order. If an entire class should be skipped, then we do not import it into `_regression.py` at all.
19
-
20
- We maintain `_extra.py` with test cases that depend on SQLAlchemy's reusable dialect test fixtures but which are specific to Databricks (e.g TinyIntegerTest).
21
-
22
- ## Running the reusable dialect tests
23
-
24
- ```
25
- poetry shell
26
- cd src/databricks/sqlalchemy/test
27
- python -m pytest test_suite.py --dburi \
28
- "databricks://token:$access_token@$host?http_path=$http_path&catalog=$catalog&schema=$schema"
29
- ```
30
-
31
- Whatever schema you pass in the `dburi` argument should be empty. Some tests also require the presence of an empty schema named `test_schema`. Note that we plan to implement our own `provision.py` which SQLAlchemy can automatically use to create an empty schema for testing. But for now this is a manual process.
32
-
33
- You can run only reviewed tests by appending `-m "reviewed"` to the test runner invocation.
34
-
35
- You can run only the unreviewed tests by appending `-m "not reviewed"` instead.
36
-
37
- Note that because these tests depend on SQLAlchemy's custom pytest plugin, they are not discoverable by IDE-based test runners like VSCode or PyCharm and must be invoked from a CLI.
38
-
39
- ## Running local unit and e2e tests
40
-
41
- Apart from the SQLAlchemy reusable suite, we maintain our own unit and e2e tests under the `test_local/` directory. These can be invoked from a VSCode or Pycharm since they don't depend on a custom pytest plugin. Due to pytest's lookup order, the `pytest.ini` which is required for running the reusable dialect tests, also conflicts with VSCode and Pycharm's default pytest implementation and overrides the settings in `pyproject.toml`. So to run these tests, you can delete or rename `pytest.ini`.
42
-
43
-
44
- [guidance]: "https://github.com/sqlalchemy/sqlalchemy/blob/rel_2_0_22/README.dialects.rst"
@@ -1,4 +0,0 @@
1
- from databricks.sqlalchemy.base import DatabricksDialect
2
- from databricks.sqlalchemy._types import TINYINT, TIMESTAMP, TIMESTAMP_NTZ
3
-
4
- __all__ = ["TINYINT", "TIMESTAMP", "TIMESTAMP_NTZ"]
@@ -1,100 +0,0 @@
1
- import re
2
- from sqlalchemy.sql import compiler, sqltypes
3
- import logging
4
-
5
- logger = logging.getLogger(__name__)
6
-
7
-
8
- class DatabricksIdentifierPreparer(compiler.IdentifierPreparer):
9
- """https://docs.databricks.com/en/sql/language-manual/sql-ref-identifiers.html"""
10
-
11
- legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I)
12
-
13
- def __init__(self, dialect):
14
- super().__init__(dialect, initial_quote="`")
15
-
16
-
17
- class DatabricksDDLCompiler(compiler.DDLCompiler):
18
- def post_create_table(self, table):
19
- post = [" USING DELTA"]
20
- if table.comment:
21
- comment = self.sql_compiler.render_literal_value(
22
- table.comment, sqltypes.String()
23
- )
24
- post.append("COMMENT " + comment)
25
-
26
- post.append("TBLPROPERTIES('delta.feature.allowColumnDefaults' = 'enabled')")
27
- return "\n".join(post)
28
-
29
- def visit_unique_constraint(self, constraint, **kw):
30
- logger.warning("Databricks does not support unique constraints")
31
- pass
32
-
33
- def visit_check_constraint(self, constraint, **kw):
34
- logger.warning("This dialect does not support check constraints")
35
- pass
36
-
37
- def visit_identity_column(self, identity, **kw):
38
- """When configuring an Identity() with Databricks, only the always option is supported.
39
- All other options are ignored.
40
-
41
- Note: IDENTITY columns must always be defined as BIGINT. An exception will be raised if INT is used.
42
-
43
- https://www.databricks.com/blog/2022/08/08/identity-columns-to-generate-surrogate-keys-are-now-available-in-a-lakehouse-near-you.html
44
- """
45
- text = "GENERATED %s AS IDENTITY" % (
46
- "ALWAYS" if identity.always else "BY DEFAULT",
47
- )
48
- return text
49
-
50
- def visit_set_column_comment(self, create, **kw):
51
- return "ALTER TABLE %s ALTER COLUMN %s COMMENT %s" % (
52
- self.preparer.format_table(create.element.table),
53
- self.preparer.format_column(create.element),
54
- self.sql_compiler.render_literal_value(
55
- create.element.comment, sqltypes.String()
56
- ),
57
- )
58
-
59
- def visit_drop_column_comment(self, create, **kw):
60
- return "ALTER TABLE %s ALTER COLUMN %s COMMENT ''" % (
61
- self.preparer.format_table(create.element.table),
62
- self.preparer.format_column(create.element),
63
- )
64
-
65
- def get_column_specification(self, column, **kwargs):
66
- """
67
- Emit a log message if a user attempts to set autoincrement=True on a column.
68
- See comments in test_suite.py. We may implement implicit IDENTITY using this
69
- feature in the future, similar to the Microsoft SQL Server dialect.
70
- """
71
- if column is column.table._autoincrement_column or column.autoincrement is True:
72
- logger.warning(
73
- "Databricks dialect ignores SQLAlchemy's autoincrement semantics. Use explicit Identity() instead."
74
- )
75
-
76
- colspec = super().get_column_specification(column, **kwargs)
77
- if column.comment is not None:
78
- literal = self.sql_compiler.render_literal_value(
79
- column.comment, sqltypes.STRINGTYPE
80
- )
81
- colspec += " COMMENT " + literal
82
-
83
- return colspec
84
-
85
-
86
- class DatabricksStatementCompiler(compiler.SQLCompiler):
87
- def limit_clause(self, select, **kw):
88
- """Identical to the default implementation of SQLCompiler.limit_clause except it writes LIMIT ALL instead of LIMIT -1,
89
- since Databricks SQL doesn't support the latter.
90
-
91
- https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-limit.html
92
- """
93
- text = ""
94
- if select._limit_clause is not None:
95
- text += "\n LIMIT " + self.process(select._limit_clause, **kw)
96
- if select._offset_clause is not None:
97
- if select._limit_clause is None:
98
- text += "\n LIMIT ALL"
99
- text += " OFFSET " + self.process(select._offset_clause, **kw)
100
- return text