databricks-sql-connector 4.0.0b3__tar.gz → 4.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/CHANGELOG.md +34 -0
  2. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/PKG-INFO +26 -12
  3. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/README.md +20 -3
  4. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/pyproject.toml +14 -18
  5. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/__init__.py +1 -1
  6. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/auth/retry.py +32 -18
  7. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/auth/thrift_http_client.py +6 -0
  8. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/client.py +136 -6
  9. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/thrift_backend.py +90 -10
  10. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/utils.py +2 -11
  11. databricks_sql_connector-4.0.0b3/src/databricks/sqlalchemy/__init__.py +0 -6
  12. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/LICENSE +0 -0
  13. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/__init__.py +0 -0
  14. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/auth/__init__.py +0 -0
  15. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/auth/auth.py +0 -0
  16. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/auth/authenticators.py +0 -0
  17. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/auth/endpoint.py +0 -0
  18. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/auth/oauth.py +0 -0
  19. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  20. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  21. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/cloudfetch/downloader.py +0 -0
  22. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/exc.py +0 -0
  23. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/experimental/__init__.py +0 -0
  24. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  25. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/parameters/__init__.py +0 -0
  26. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/parameters/native.py +0 -0
  27. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/parameters/py.typed +0 -0
  28. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/py.typed +0 -0
  29. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  30. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  31. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  32. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  33. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  34. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/thrift_api/__init__.py +0 -0
  35. {databricks_sql_connector-4.0.0b3 → databricks_sql_connector-4.0.1}/src/databricks/sql/types.py +0 -0
@@ -1,5 +1,39 @@
1
1
  # Release History
2
2
 
3
+ # 4.0.1 (2025-03-19)
4
+
5
+ - Support for multiple timestamp formats parsing (databricks/databricks-sql-python#533 by @jprakash-db)
6
+ - Rename `_user_agent_entry` in connect call to `user_agent_entry` to expose it as a public parameter. (databricks/databricks-sql-python#530 by @shivam2680)
7
+ - Fix: compatibility with urllib3 versions less than 2.x. (databricks/databricks-sql-python#526 by @shivam2680)
8
+ - Support for Python 3.13 and updated dependencies (databricks/databricks-sql-python#510 by @dhirschfeld and @dbaxa)
9
+
10
+ # 4.0.0 (2025-01-19)
11
+
12
+ - Split the connector into two separate packages: `databricks-sql-connector` and `databricks-sqlalchemy`. The `databricks-sql-connector` package contains the core functionality of the connector, while the `databricks-sqlalchemy` package contains the SQLAlchemy dialect for the connector.
13
+ - Pyarrow dependency is now optional in `databricks-sql-connector`. Users needing arrow are supposed to explicitly install pyarrow
14
+
15
+ # 3.7.3 (2025-03-28)
16
+
17
+ - Fix: Unable to poll small results in execute_async function (databricks/databricks-sql-python#515 by @jprakash-db)
18
+ - Updated log messages to show the status code and error messages of requests (databricks/databricks-sql-python#511 by @jprakash-db)
19
+ - Fix: Incorrect metadata was fetched in case of queries with the same alias (databricks/databricks-sql-python#505 by @jprakash-db)
20
+
21
+ # 3.7.2 (2025-01-31)
22
+
23
+ - Updated the retry_dela_max and retry_timeout (databricks/databricks-sql-python#497 by @jprakash-db)
24
+
25
+ # 3.7.1 (2025-01-07)
26
+
27
+ - Relaxed the number of Http retry attempts (databricks/databricks-sql-python#486 by @jprakash-db)
28
+
29
+ # 3.7.0 (2024-12-23)
30
+
31
+ - Fix: Incorrect number of rows fetched in inline results when fetching results with FETCH_NEXT orientation (databricks/databricks-sql-python#479 by @jprakash-db)
32
+ - Updated the doc to specify native parameters are not supported in PUT operation (databricks/databricks-sql-python#477 by @jprakash-db)
33
+ - Relax `pyarrow` and `numpy` pin (databricks/databricks-sql-python#452 by @arredond)
34
+ - Feature: Support for async execute has been added (databricks/databricks-sql-python#463 by @jprakash-db)
35
+ - Updated the HTTP retry logic to be similar to the other Databricks drivers (databricks/databricks-sql-python#467 by @jprakash-db)
36
+
3
37
  # 3.6.0 (2024-10-25)
4
38
 
5
39
  - Support encryption headers in the cloud fetch request (https://github.com/databricks/databricks-sql-python/pull/460 by @jackyhu-db)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 4.0.0b3
3
+ Version: 4.0.1
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -13,18 +13,15 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
- Provides-Extra: alembic
17
- Provides-Extra: databricks-sqlalchemy
18
16
  Provides-Extra: pyarrow
19
- Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
20
- Requires-Dist: databricks-sqlalchemy (>=2.0.0) ; extra == "databricks-sqlalchemy" or extra == "alembic"
21
17
  Requires-Dist: lz4 (>=4.0.2,<5.0.0)
22
- Requires-Dist: numpy (>=1.16.6,<2.0.0) ; python_version >= "3.8" and python_version < "3.11"
23
- Requires-Dist: numpy (>=1.23.4,<2.0.0) ; python_version >= "3.11"
24
18
  Requires-Dist: oauthlib (>=3.1.0,<4.0.0)
25
19
  Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
26
- Requires-Dist: pandas (>=1.2.5,<2.3.0) ; python_version >= "3.8"
27
- Requires-Dist: pyarrow (>=14.0.1,<17) ; extra == "pyarrow"
20
+ Requires-Dist: pandas (>=1.2.5,<2.3.0) ; python_version >= "3.8" and python_version < "3.13"
21
+ Requires-Dist: pandas (>=2.2.3,<2.3.0) ; python_version >= "3.13"
22
+ Requires-Dist: pyarrow (>=14.0.1) ; (python_version >= "3.8" and python_version < "3.13") and (extra == "pyarrow")
23
+ Requires-Dist: pyarrow (>=18.0.0) ; (python_version >= "3.13") and (extra == "pyarrow")
24
+ Requires-Dist: python-dateutil (>=2.9.0,<3.0.0)
28
25
  Requires-Dist: requests (>=2.18.1,<3.0.0)
29
26
  Requires-Dist: thrift (>=0.16.0,<0.21.0)
30
27
  Requires-Dist: urllib3 (>=1.26)
@@ -37,9 +34,9 @@ Description-Content-Type: text/markdown
37
34
  [![PyPI](https://img.shields.io/pypi/v/databricks-sql-connector?style=flat-square)](https://pypi.org/project/databricks-sql-connector/)
38
35
  [![Downloads](https://pepy.tech/badge/databricks-sql-connector)](https://pepy.tech/project/databricks-sql-connector)
39
36
 
40
- The Databricks SQL Connector for Python allows you to develop Python applications that connect to Databricks clusters and SQL warehouses. It is a Thrift-based client with no dependencies on ODBC or JDBC. It conforms to the [Python DB API 2.0 specification](https://www.python.org/dev/peps/pep-0249/) and exposes a [SQLAlchemy](https://www.sqlalchemy.org/) dialect for use with tools like `pandas` and `alembic` which use SQLAlchemy to execute DDL. Use `pip install databricks-sql-connector[sqlalchemy]` to install with SQLAlchemy's dependencies. `pip install databricks-sql-connector[alembic]` will install alembic's dependencies.
37
+ The Databricks SQL Connector for Python allows you to develop Python applications that connect to Databricks clusters and SQL warehouses. It is a Thrift-based client with no dependencies on ODBC or JDBC. It conforms to the [Python DB API 2.0 specification](https://www.python.org/dev/peps/pep-0249/).
41
38
 
42
- This connector uses Arrow as the data-exchange format, and supports APIs to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time.
39
+ This connector uses Arrow as the data-exchange format, and supports APIs (e.g. `fetchmany_arrow`) to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time. [PyArrow](https://arrow.apache.org/docs/python/index.html) is required to enable this and use these APIs, you can install it via `pip install pyarrow` or `pip install databricks-sql-connector[pyarrow]`.
43
40
 
44
41
  You are welcome to file an issue here for general use cases. You can also contact Databricks Support [here](help.databricks.com).
45
42
 
@@ -56,7 +53,12 @@ For the latest documentation, see
56
53
 
57
54
  ## Quickstart
58
55
 
59
- Install the library with `pip install databricks-sql-connector`
56
+ ### Installing the core library
57
+ Install using `pip install databricks-sql-connector`
58
+
59
+ ### Installing the core library with PyArrow
60
+ Install using `pip install databricks-sql-connector[pyarrow]`
61
+
60
62
 
61
63
  ```bash
62
64
  export DATABRICKS_HOST=********.databricks.com
@@ -94,6 +96,18 @@ or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/123456789
94
96
  > to authenticate the target Databricks user account and needs to open the browser for authentication. So it
95
97
  > can only run on the user's machine.
96
98
 
99
+ ## SQLAlchemy
100
+ Starting from `databricks-sql-connector` version 4.0.0 SQLAlchemy support has been extracted to a new library `databricks-sqlalchemy`.
101
+
102
+ - Github repository [databricks-sqlalchemy github](https://github.com/databricks/databricks-sqlalchemy)
103
+ - PyPI [databricks-sqlalchemy pypi](https://pypi.org/project/databricks-sqlalchemy/)
104
+
105
+ ### Quick SQLAlchemy guide
106
+ Users can now choose between using the SQLAlchemy v1 or SQLAlchemy v2 dialects with the connector core
107
+
108
+ - Install the latest SQLAlchemy v1 using `pip install databricks-sqlalchemy~=1.0`
109
+ - Install SQLAlchemy v2 using `pip install databricks-sqlalchemy`
110
+
97
111
 
98
112
  ## Contributing
99
113
 
@@ -3,9 +3,9 @@
3
3
  [![PyPI](https://img.shields.io/pypi/v/databricks-sql-connector?style=flat-square)](https://pypi.org/project/databricks-sql-connector/)
4
4
  [![Downloads](https://pepy.tech/badge/databricks-sql-connector)](https://pepy.tech/project/databricks-sql-connector)
5
5
 
6
- The Databricks SQL Connector for Python allows you to develop Python applications that connect to Databricks clusters and SQL warehouses. It is a Thrift-based client with no dependencies on ODBC or JDBC. It conforms to the [Python DB API 2.0 specification](https://www.python.org/dev/peps/pep-0249/) and exposes a [SQLAlchemy](https://www.sqlalchemy.org/) dialect for use with tools like `pandas` and `alembic` which use SQLAlchemy to execute DDL. Use `pip install databricks-sql-connector[sqlalchemy]` to install with SQLAlchemy's dependencies. `pip install databricks-sql-connector[alembic]` will install alembic's dependencies.
6
+ The Databricks SQL Connector for Python allows you to develop Python applications that connect to Databricks clusters and SQL warehouses. It is a Thrift-based client with no dependencies on ODBC or JDBC. It conforms to the [Python DB API 2.0 specification](https://www.python.org/dev/peps/pep-0249/).
7
7
 
8
- This connector uses Arrow as the data-exchange format, and supports APIs to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time.
8
+ This connector uses Arrow as the data-exchange format, and supports APIs (e.g. `fetchmany_arrow`) to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time. [PyArrow](https://arrow.apache.org/docs/python/index.html) is required to enable this and use these APIs, you can install it via `pip install pyarrow` or `pip install databricks-sql-connector[pyarrow]`.
9
9
 
10
10
  You are welcome to file an issue here for general use cases. You can also contact Databricks Support [here](help.databricks.com).
11
11
 
@@ -22,7 +22,12 @@ For the latest documentation, see
22
22
 
23
23
  ## Quickstart
24
24
 
25
- Install the library with `pip install databricks-sql-connector`
25
+ ### Installing the core library
26
+ Install using `pip install databricks-sql-connector`
27
+
28
+ ### Installing the core library with PyArrow
29
+ Install using `pip install databricks-sql-connector[pyarrow]`
30
+
26
31
 
27
32
  ```bash
28
33
  export DATABRICKS_HOST=********.databricks.com
@@ -60,6 +65,18 @@ or to a Databricks Runtime interactive cluster (e.g. /sql/protocolv1/o/123456789
60
65
  > to authenticate the target Databricks user account and needs to open the browser for authentication. So it
61
66
  > can only run on the user's machine.
62
67
 
68
+ ## SQLAlchemy
69
+ Starting from `databricks-sql-connector` version 4.0.0 SQLAlchemy support has been extracted to a new library `databricks-sqlalchemy`.
70
+
71
+ - Github repository [databricks-sqlalchemy github](https://github.com/databricks/databricks-sqlalchemy)
72
+ - PyPI [databricks-sqlalchemy pypi](https://pypi.org/project/databricks-sqlalchemy/)
73
+
74
+ ### Quick SQLAlchemy guide
75
+ Users can now choose between using the SQLAlchemy v1 or SQLAlchemy v2 dialects with the connector core
76
+
77
+ - Install the latest SQLAlchemy v1 using `pip install databricks-sqlalchemy~=1.0`
78
+ - Install SQLAlchemy v2 using `pip install databricks-sqlalchemy`
79
+
63
80
 
64
81
  ## Contributing
65
82
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "4.0.0.b3"
3
+ version = "4.0.1"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -12,26 +12,21 @@ include = ["CHANGELOG.md"]
12
12
  python = "^3.8.0"
13
13
  thrift = ">=0.16.0,<0.21.0"
14
14
  pandas = [
15
- { version = ">=1.2.5,<2.3.0", python = ">=3.8" }
15
+ { version = ">=1.2.5,<2.3.0", python = ">=3.8,<3.13" },
16
+ { version = ">=2.2.3,<2.3.0", python = ">=3.13" }
16
17
  ]
17
18
  lz4 = "^4.0.2"
18
19
  requests = "^2.18.1"
19
20
  oauthlib = "^3.1.0"
20
- numpy = [
21
- { version = "^1.16.6", python = ">=3.8,<3.11" },
22
- { version = "^1.23.4", python = ">=3.11" },
23
- ]
24
21
  openpyxl = "^3.0.10"
25
22
  urllib3 = ">=1.26"
26
-
27
- databricks-sqlalchemy = { version = ">=2.0.0", optional = true }
28
- pyarrow = { version = ">=14.0.1,<17", optional=true }
29
- alembic = { version = "^1.0.11", optional = true }
30
-
23
+ pyarrow = [
24
+ { version = ">=14.0.1", python = ">=3.8,<3.13", optional=true },
25
+ { version = ">=18.0.0", python = ">=3.13", optional=true }
26
+ ]
27
+ python-dateutil = "^2.9.0"
31
28
 
32
29
  [tool.poetry.extras]
33
- databricks-sqlalchemy = ["databricks-sqlalchemy"]
34
- alembic = ["databricks-sqlalchemy", "alembic"]
35
30
  pyarrow = ["pyarrow"]
36
31
 
37
32
  [tool.poetry.dev-dependencies]
@@ -40,14 +35,15 @@ mypy = "^1.10.1"
40
35
  pylint = ">=2.12.0"
41
36
  black = "^22.3.0"
42
37
  pytest-dotenv = "^0.5.2"
38
+ numpy = [
39
+ { version = ">=1.16.6", python = ">=3.8,<3.11" },
40
+ { version = ">=1.23.4", python = ">=3.11" },
41
+ ]
43
42
 
44
43
  [tool.poetry.urls]
45
44
  "Homepage" = "https://github.com/databricks/databricks-sql-python"
46
45
  "Bug Tracker" = "https://github.com/databricks/databricks-sql-python/issues"
47
46
 
48
- [tool.poetry.plugins."sqlalchemy.dialects"]
49
- "databricks" = "databricks.sqlalchemy:DatabricksDialect"
50
-
51
47
  [build-system]
52
48
  requires = ["poetry-core>=1.0.0"]
53
49
  build-backend = "poetry.core.masonry.api"
@@ -64,5 +60,5 @@ markers = {"reviewed" = "Test case has been reviewed by Databricks"}
64
60
  minversion = "6.0"
65
61
  log_cli = "false"
66
62
  log_cli_level = "INFO"
67
- testpaths = ["tests", "src/databricks/sqlalchemy/test_local"]
68
- env_files = ["test.env"]
63
+ testpaths = ["tests"]
64
+ env_files = ["test.env"]
@@ -68,7 +68,7 @@ DATETIME = DBAPITypeObject("timestamp")
68
68
  DATE = DBAPITypeObject("date")
69
69
  ROWID = DBAPITypeObject()
70
70
 
71
- __version__ = "3.6.0"
71
+ __version__ = "4.0.1"
72
72
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
73
73
 
74
74
  # These two functions are pyhive legacy
@@ -1,9 +1,12 @@
1
1
  import logging
2
+ import random
2
3
  import time
3
4
  import typing
4
5
  from enum import Enum
5
6
  from typing import List, Optional, Tuple, Union
6
7
 
8
+ import urllib3
9
+
7
10
  # We only use this import for type hinting
8
11
  try:
9
12
  # If urllib3~=2.0 is installed
@@ -13,6 +16,8 @@ except ImportError:
13
16
  from urllib3 import HTTPResponse as BaseHTTPResponse
14
17
  from urllib3 import Retry
15
18
  from urllib3.util.retry import RequestHistory
19
+ from packaging import version
20
+
16
21
 
17
22
  from databricks.sql.exc import (
18
23
  CursorAlreadyClosedError,
@@ -285,25 +290,32 @@ class DatabricksRetryPolicy(Retry):
285
290
  """
286
291
  retry_after = self.get_retry_after(response)
287
292
  if retry_after:
288
- backoff = self.get_backoff_time()
289
- proposed_wait = max(backoff, retry_after)
290
- self.check_proposed_wait(proposed_wait)
291
- time.sleep(proposed_wait)
292
- return True
293
+ proposed_wait = retry_after
294
+ else:
295
+ proposed_wait = self.get_backoff_time()
293
296
 
294
- return False
297
+ proposed_wait = max(proposed_wait, self.delay_max)
298
+ self.check_proposed_wait(proposed_wait)
299
+ logger.debug(f"Retrying after {proposed_wait} seconds")
300
+ time.sleep(proposed_wait)
301
+ return True
295
302
 
296
303
  def get_backoff_time(self) -> float:
297
- """Calls urllib3's built-in get_backoff_time.
304
+ """
305
+ This method implements the exponential backoff algorithm to calculate the delay between retries.
298
306
 
299
307
  Never returns a value larger than self.delay_max
300
308
  A MaxRetryDurationError will be raised if the calculated backoff would exceed self.max_attempts_duration
301
309
 
302
- Note: within urllib3, a backoff is only calculated in cases where a Retry-After header is not present
303
- in the previous unsuccessful request and `self.respect_retry_after_header` is True (which is always true)
310
+ :return:
304
311
  """
305
312
 
306
- proposed_backoff = super().get_backoff_time()
313
+ current_attempt = self.stop_after_attempts_count - int(self.total or 0)
314
+ proposed_backoff = (2**current_attempt) * self.delay_min
315
+ if version.parse(urllib3.__version__) >= version.parse("2.0.0"):
316
+ if self.backoff_jitter != 0.0:
317
+ proposed_backoff += random.random() * self.backoff_jitter
318
+
307
319
  proposed_backoff = min(proposed_backoff, self.delay_max)
308
320
  self.check_proposed_wait(proposed_backoff)
309
321
 
@@ -338,23 +350,24 @@ class DatabricksRetryPolicy(Retry):
338
350
  if a retry would violate the configured policy.
339
351
  """
340
352
 
353
+ logger.info(f"Received status code {status_code} for {method} request")
354
+
341
355
  # Request succeeded. Don't retry.
342
356
  if status_code == 200:
343
357
  return False, "200 codes are not retried"
344
358
 
345
359
  if status_code == 401:
346
- raise NonRecoverableNetworkError(
347
- "Received 401 - UNAUTHORIZED. Confirm your authentication credentials."
360
+ return (
361
+ False,
362
+ "Received 401 - UNAUTHORIZED. Confirm your authentication credentials.",
348
363
  )
349
364
 
350
365
  if status_code == 403:
351
- raise NonRecoverableNetworkError(
352
- "Received 403 - FORBIDDEN. Confirm your authentication credentials."
353
- )
366
+ return False, "403 codes are not retried"
354
367
 
355
368
  # Request failed and server said NotImplemented. This isn't recoverable. Don't retry.
356
369
  if status_code == 501:
357
- raise NonRecoverableNetworkError("Received code 501 from server.")
370
+ return False, "Received code 501 from server."
358
371
 
359
372
  # Request failed and this method is not retryable. We only retry POST requests.
360
373
  if not self._is_method_retryable(method):
@@ -393,8 +406,9 @@ class DatabricksRetryPolicy(Retry):
393
406
  and status_code not in self.status_forcelist
394
407
  and status_code not in self.force_dangerous_codes
395
408
  ):
396
- raise UnsafeToRetryError(
397
- "ExecuteStatement command can only be retried for codes 429 and 503"
409
+ return (
410
+ False,
411
+ "ExecuteStatement command can only be retried for codes 429 and 503",
398
412
  )
399
413
 
400
414
  # Request failed with a dangerous code, was an ExecuteStatement, but user forced retries for this
@@ -198,6 +198,12 @@ class THttpClient(thrift.transport.THttpClient.THttpClient):
198
198
  self.message = self.__resp.reason
199
199
  self.headers = self.__resp.headers
200
200
 
201
+ logger.info(
202
+ "HTTP Response with status code {}, message: {}".format(
203
+ self.code, self.message
204
+ )
205
+ )
206
+
201
207
  @staticmethod
202
208
  def basic_proxy_auth_headers(proxy):
203
209
  if proxy is None or not proxy.username:
@@ -1,3 +1,4 @@
1
+ import time
1
2
  from typing import Dict, Tuple, List, Optional, Any, Union, Sequence
2
3
 
3
4
  import pandas
@@ -47,11 +48,19 @@ from databricks.sql.experimental.oauth_persistence import OAuthPersistence
47
48
 
48
49
  from databricks.sql.thrift_api.TCLIService.ttypes import (
49
50
  TSparkParameter,
51
+ TOperationState,
50
52
  )
51
53
 
52
54
 
53
55
  logger = logging.getLogger(__name__)
54
56
 
57
+ if pyarrow is None:
58
+ logger.warning(
59
+ "[WARN] pyarrow is not installed by default since databricks-sql-connector 4.0.0,"
60
+ "any arrow specific api (e.g. fetchmany_arrow) and cloud fetch will be disabled."
61
+ "If you need these features, please run pip install pyarrow or pip install databricks-sql-connector[pyarrow] to install"
62
+ )
63
+
55
64
  DEFAULT_RESULT_BUFFER_SIZE_BYTES = 104857600
56
65
  DEFAULT_ARRAY_SIZE = 100000
57
66
 
@@ -113,6 +122,9 @@ class Connection:
113
122
  port of the oauth redirect uri (localhost). This is required when custom oauth client_id
114
123
  `oauth_client_id` is set
115
124
 
125
+ user_agent_entry: `str`, optional
126
+ A custom tag to append to the User-Agent header. This is typically used by partners to identify their applications.. If not specified, it will use the default user agent PyDatabricksSqlConnector
127
+
116
128
  experimental_oauth_persistence: configures preferred storage for persisting oauth tokens.
117
129
  This has to be a class implementing `OAuthPersistence`.
118
130
  When `auth_type` is set to `databricks-oauth` or `azure-oauth` without persisting the oauth token in a
@@ -167,8 +179,6 @@ class Connection:
167
179
  """
168
180
 
169
181
  # Internal arguments in **kwargs:
170
- # _user_agent_entry
171
- # Tag to add to User-Agent header. For use by partners.
172
182
  # _use_cert_as_auth
173
183
  # Use a TLS cert instead of a token
174
184
  # _enable_ssl
@@ -218,12 +228,21 @@ class Connection:
218
228
  server_hostname, **kwargs
219
229
  )
220
230
 
221
- if not kwargs.get("_user_agent_entry"):
222
- useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__)
223
- else:
231
+ user_agent_entry = kwargs.get("user_agent_entry")
232
+ if user_agent_entry is None:
233
+ user_agent_entry = kwargs.get("_user_agent_entry")
234
+ if user_agent_entry is not None:
235
+ logger.warning(
236
+ "[WARN] Parameter '_user_agent_entry' is deprecated; use 'user_agent_entry' instead. "
237
+ "This parameter will be removed in the upcoming releases."
238
+ )
239
+
240
+ if user_agent_entry:
224
241
  useragent_header = "{}/{} ({})".format(
225
- USER_AGENT_NAME, __version__, kwargs.get("_user_agent_entry")
242
+ USER_AGENT_NAME, __version__, user_agent_entry
226
243
  )
244
+ else:
245
+ useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__)
227
246
 
228
247
  base_headers = [("User-Agent", useragent_header)]
229
248
 
@@ -430,6 +449,8 @@ class Cursor:
430
449
  self.escaper = ParamEscaper()
431
450
  self.lastrowid = None
432
451
 
452
+ self.ASYNC_DEFAULT_POLLING_INTERVAL = 2
453
+
433
454
  # The ideal return type for this method is perhaps Self, but that was not added until 3.11, and we support pre-3.11 pythons, currently.
434
455
  def __enter__(self) -> "Cursor":
435
456
  return self
@@ -733,6 +754,7 @@ class Cursor:
733
754
  self,
734
755
  operation: str,
735
756
  parameters: Optional[TParameterCollection] = None,
757
+ enforce_embedded_schema_correctness=False,
736
758
  ) -> "Cursor":
737
759
  """
738
760
  Execute a query and wait for execution to complete.
@@ -796,6 +818,8 @@ class Cursor:
796
818
  cursor=self,
797
819
  use_cloud_fetch=self.connection.use_cloud_fetch,
798
820
  parameters=prepared_params,
821
+ async_op=False,
822
+ enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
799
823
  )
800
824
  self.active_result_set = ResultSet(
801
825
  self.connection,
@@ -803,6 +827,7 @@ class Cursor:
803
827
  self.thrift_backend,
804
828
  self.buffer_size_bytes,
805
829
  self.arraysize,
830
+ self.connection.use_cloud_fetch,
806
831
  )
807
832
 
808
833
  if execute_response.is_staging_operation:
@@ -812,6 +837,108 @@ class Cursor:
812
837
 
813
838
  return self
814
839
 
840
+ def execute_async(
841
+ self,
842
+ operation: str,
843
+ parameters: Optional[TParameterCollection] = None,
844
+ enforce_embedded_schema_correctness=False,
845
+ ) -> "Cursor":
846
+ """
847
+
848
+ Execute a query and do not wait for it to complete and just move ahead
849
+
850
+ :param operation:
851
+ :param parameters:
852
+ :return:
853
+ """
854
+ param_approach = self._determine_parameter_approach(parameters)
855
+ if param_approach == ParameterApproach.NONE:
856
+ prepared_params = NO_NATIVE_PARAMS
857
+ prepared_operation = operation
858
+
859
+ elif param_approach == ParameterApproach.INLINE:
860
+ prepared_operation, prepared_params = self._prepare_inline_parameters(
861
+ operation, parameters
862
+ )
863
+ elif param_approach == ParameterApproach.NATIVE:
864
+ normalized_parameters = self._normalize_tparametercollection(parameters)
865
+ param_structure = self._determine_parameter_structure(normalized_parameters)
866
+ transformed_operation = transform_paramstyle(
867
+ operation, normalized_parameters, param_structure
868
+ )
869
+ prepared_operation, prepared_params = self._prepare_native_parameters(
870
+ transformed_operation, normalized_parameters, param_structure
871
+ )
872
+
873
+ self._check_not_closed()
874
+ self._close_and_clear_active_result_set()
875
+ self.thrift_backend.execute_command(
876
+ operation=prepared_operation,
877
+ session_handle=self.connection._session_handle,
878
+ max_rows=self.arraysize,
879
+ max_bytes=self.buffer_size_bytes,
880
+ lz4_compression=self.connection.lz4_compression,
881
+ cursor=self,
882
+ use_cloud_fetch=self.connection.use_cloud_fetch,
883
+ parameters=prepared_params,
884
+ async_op=True,
885
+ enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
886
+ )
887
+
888
+ return self
889
+
890
+ def get_query_state(self) -> "TOperationState":
891
+ """
892
+ Get the state of the async executing query or basically poll the status of the query
893
+
894
+ :return:
895
+ """
896
+ self._check_not_closed()
897
+ return self.thrift_backend.get_query_state(self.active_op_handle)
898
+
899
+ def get_async_execution_result(self):
900
+ """
901
+
902
+ Checks for the status of the async executing query and fetches the result if the query is finished
903
+ Otherwise it will keep polling the status of the query till there is a Not pending state
904
+ :return:
905
+ """
906
+ self._check_not_closed()
907
+
908
+ def is_executing(operation_state) -> "bool":
909
+ return not operation_state or operation_state in [
910
+ ttypes.TOperationState.RUNNING_STATE,
911
+ ttypes.TOperationState.PENDING_STATE,
912
+ ]
913
+
914
+ while is_executing(self.get_query_state()):
915
+ # Poll after some default time
916
+ time.sleep(self.ASYNC_DEFAULT_POLLING_INTERVAL)
917
+
918
+ operation_state = self.get_query_state()
919
+ if operation_state == ttypes.TOperationState.FINISHED_STATE:
920
+ execute_response = self.thrift_backend.get_execution_result(
921
+ self.active_op_handle, self
922
+ )
923
+ self.active_result_set = ResultSet(
924
+ self.connection,
925
+ execute_response,
926
+ self.thrift_backend,
927
+ self.buffer_size_bytes,
928
+ self.arraysize,
929
+ )
930
+
931
+ if execute_response.is_staging_operation:
932
+ self._handle_staging_operation(
933
+ staging_allowed_local_path=self.thrift_backend.staging_allowed_local_path
934
+ )
935
+
936
+ return self
937
+ else:
938
+ raise Error(
939
+ f"get_execution_result failed with Operation status {operation_state}"
940
+ )
941
+
815
942
  def executemany(self, operation, seq_of_parameters):
816
943
  """
817
944
  Execute the operation once for every set of passed in parameters.
@@ -1097,6 +1224,7 @@ class ResultSet:
1097
1224
  thrift_backend: ThriftBackend,
1098
1225
  result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
1099
1226
  arraysize: int = 10000,
1227
+ use_cloud_fetch: bool = True,
1100
1228
  ):
1101
1229
  """
1102
1230
  A ResultSet manages the results of a single command.
@@ -1118,6 +1246,7 @@ class ResultSet:
1118
1246
  self.description = execute_response.description
1119
1247
  self._arrow_schema_bytes = execute_response.arrow_schema_bytes
1120
1248
  self._next_row_index = 0
1249
+ self._use_cloud_fetch = use_cloud_fetch
1121
1250
 
1122
1251
  if execute_response.arrow_queue:
1123
1252
  # In this case the server has taken the fast path and returned an initial batch of
@@ -1145,6 +1274,7 @@ class ResultSet:
1145
1274
  lz4_compressed=self.lz4_compressed,
1146
1275
  arrow_schema_bytes=self._arrow_schema_bytes,
1147
1276
  description=self.description,
1277
+ use_cloud_fetch=self._use_cloud_fetch,
1148
1278
  )
1149
1279
  self.results = results
1150
1280
  self.has_more_rows = has_more_rows
@@ -7,6 +7,8 @@ import uuid
7
7
  import threading
8
8
  from typing import List, Union
9
9
 
10
+ from databricks.sql.thrift_api.TCLIService.ttypes import TOperationState
11
+
10
12
  try:
11
13
  import pyarrow
12
14
  except ImportError:
@@ -93,8 +95,6 @@ class ThriftBackend:
93
95
  **kwargs,
94
96
  ):
95
97
  # Internal arguments in **kwargs:
96
- # _user_agent_entry
97
- # Tag to add to User-Agent header. For use by partners.
98
98
  # _username, _password
99
99
  # Username and password Basic authentication (no official support)
100
100
  # _connection_uri
@@ -319,7 +319,7 @@ class ThriftBackend:
319
319
 
320
320
  # FUTURE: Consider moving to https://github.com/litl/backoff or
321
321
  # https://github.com/jd/tenacity for retry logic.
322
- def make_request(self, method, request):
322
+ def make_request(self, method, request, retryable=True):
323
323
  """Execute given request, attempting retries when
324
324
  1. Receiving HTTP 429/503 from server
325
325
  2. OSError is raised during a GetOperationStatus
@@ -458,7 +458,7 @@ class ThriftBackend:
458
458
  # return on success
459
459
  # if available: bounded delay and retry
460
460
  # if not: raise error
461
- max_attempts = self._retry_stop_after_attempts_count
461
+ max_attempts = self._retry_stop_after_attempts_count if retryable else 1
462
462
 
463
463
  # use index-1 counting for logging/human consistency
464
464
  for attempt in range(1, max_attempts + 1):
@@ -769,6 +769,63 @@ class ThriftBackend:
769
769
  arrow_schema_bytes=schema_bytes,
770
770
  )
771
771
 
772
+ def get_execution_result(self, op_handle, cursor):
773
+
774
+ assert op_handle is not None
775
+
776
+ req = ttypes.TFetchResultsReq(
777
+ operationHandle=ttypes.TOperationHandle(
778
+ op_handle.operationId,
779
+ op_handle.operationType,
780
+ False,
781
+ op_handle.modifiedRowCount,
782
+ ),
783
+ maxRows=cursor.arraysize,
784
+ maxBytes=cursor.buffer_size_bytes,
785
+ orientation=ttypes.TFetchOrientation.FETCH_NEXT,
786
+ includeResultSetMetadata=True,
787
+ )
788
+
789
+ resp = self.make_request(self._client.FetchResults, req)
790
+
791
+ t_result_set_metadata_resp = resp.resultSetMetadata
792
+
793
+ lz4_compressed = t_result_set_metadata_resp.lz4Compressed
794
+ is_staging_operation = t_result_set_metadata_resp.isStagingOperation
795
+ has_more_rows = resp.hasMoreRows
796
+ description = self._hive_schema_to_description(
797
+ t_result_set_metadata_resp.schema
798
+ )
799
+
800
+ schema_bytes = (
801
+ t_result_set_metadata_resp.arrowSchema
802
+ or self._hive_schema_to_arrow_schema(t_result_set_metadata_resp.schema)
803
+ .serialize()
804
+ .to_pybytes()
805
+ )
806
+
807
+ queue = ResultSetQueueFactory.build_queue(
808
+ row_set_type=resp.resultSetMetadata.resultFormat,
809
+ t_row_set=resp.results,
810
+ arrow_schema_bytes=schema_bytes,
811
+ max_download_threads=self.max_download_threads,
812
+ lz4_compressed=lz4_compressed,
813
+ description=description,
814
+ ssl_options=self._ssl_options,
815
+ )
816
+
817
+ return ExecuteResponse(
818
+ arrow_queue=queue,
819
+ status=resp.status,
820
+ has_been_closed_server_side=False,
821
+ has_more_rows=has_more_rows,
822
+ lz4_compressed=lz4_compressed,
823
+ is_staging_operation=is_staging_operation,
824
+ command_handle=op_handle,
825
+ description=description,
826
+ arrow_schema_bytes=schema_bytes,
827
+ )
828
+
772
829
  def _wait_until_command_done(self, op_handle, initial_operation_status_resp):
773
830
  if initial_operation_status_resp:
774
831
  self._check_command_not_in_error_or_closed_state(
@@ -787,6 +844,12 @@ class ThriftBackend:
787
844
  self._check_command_not_in_error_or_closed_state(op_handle, poll_resp)
788
845
  return operation_state
789
846
 
847
+ def get_query_state(self, op_handle) -> "TOperationState":
848
+ poll_resp = self._poll_for_status(op_handle)
849
+ operation_state = poll_resp.operationState
850
+ self._check_command_not_in_error_or_closed_state(op_handle, poll_resp)
851
+ return operation_state
852
+
790
853
  @staticmethod
791
854
  def _check_direct_results_for_error(t_spark_direct_results):
792
855
  if t_spark_direct_results:
@@ -817,6 +880,8 @@ class ThriftBackend:
817
880
  cursor,
818
881
  use_cloud_fetch=True,
819
882
  parameters=[],
883
+ async_op=False,
884
+ enforce_embedded_schema_correctness=False,
820
885
  ):
821
886
  assert session_handle is not None
822
887
 
@@ -832,8 +897,12 @@ class ThriftBackend:
832
897
  sessionHandle=session_handle,
833
898
  statement=operation,
834
899
  runAsync=True,
835
- getDirectResults=ttypes.TSparkGetDirectResults(
836
- maxRows=max_rows, maxBytes=max_bytes
900
+ # For async operation we don't want the direct results
901
+ getDirectResults=None
902
+ if async_op
903
+ else ttypes.TSparkGetDirectResults(
904
+ maxRows=max_rows,
905
+ maxBytes=max_bytes,
837
906
  ),
838
907
  canReadArrowResult=True if pyarrow else False,
839
908
  canDecompressLZ4Result=lz4_compression,
@@ -844,9 +913,14 @@ class ThriftBackend:
844
913
  },
845
914
  useArrowNativeTypes=spark_arrow_types,
846
915
  parameters=parameters,
916
+ enforceEmbeddedSchemaCorrectness=enforce_embedded_schema_correctness,
847
917
  )
848
918
  resp = self.make_request(self._client.ExecuteStatement, req)
849
- return self._handle_execute_response(resp, cursor)
919
+
920
+ if async_op:
921
+ self._handle_execute_response_async(resp, cursor)
922
+ else:
923
+ return self._handle_execute_response(resp, cursor)
850
924
 
851
925
  def get_catalogs(self, session_handle, max_rows, max_bytes, cursor):
852
926
  assert session_handle is not None
@@ -945,6 +1019,10 @@ class ThriftBackend:
945
1019
 
946
1020
  return self._results_message_to_execute_response(resp, final_operation_state)
947
1021
 
1022
+ def _handle_execute_response_async(self, resp, cursor):
1023
+ cursor.active_op_handle = resp.operationHandle
1024
+ self._check_direct_results_for_error(resp.directResults)
1025
+
948
1026
  def fetch_results(
949
1027
  self,
950
1028
  op_handle,
@@ -954,6 +1032,7 @@ class ThriftBackend:
954
1032
  lz4_compressed,
955
1033
  arrow_schema_bytes,
956
1034
  description,
1035
+ use_cloud_fetch=True,
957
1036
  ):
958
1037
  assert op_handle is not None
959
1038
 
@@ -970,10 +1049,11 @@ class ThriftBackend:
970
1049
  includeResultSetMetadata=True,
971
1050
  )
972
1051
 
973
- resp = self.make_request(self._client.FetchResults, req)
1052
+ # Fetch results in Inline mode with FETCH_NEXT orientation are not idempotent and hence not retried
1053
+ resp = self.make_request(self._client.FetchResults, req, use_cloud_fetch)
974
1054
  if resp.results.startRowOffset > expected_row_start_offset:
975
- logger.warning(
976
- "Expected results to start from {} but they instead start at {}".format(
1055
+ raise DataError(
1056
+ "fetch_results failed due to inconsistency in the state between the client and the server. Expected results to start from {} but they instead start at {}, some result batches must have been skipped".format(
977
1057
  expected_row_start_offset, resp.results.startRowOffset
978
1058
  )
979
1059
  )
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- import pytz
3
+ from dateutil import parser
4
4
  import datetime
5
5
  import decimal
6
6
  from abc import ABC, abstractmethod
@@ -642,16 +642,7 @@ def convert_to_assigned_datatypes_in_column_table(column_table, description):
642
642
  )
643
643
  elif description[i][1] == "timestamp":
644
644
  converted_column_table.append(
645
- tuple(
646
- (
647
- v
648
- if v is None
649
- else datetime.datetime.strptime(
650
- v, "%Y-%m-%d %H:%M:%S.%f"
651
- ).replace(tzinfo=pytz.UTC)
652
- )
653
- for v in col
654
- )
645
+ tuple((v if v is None else parser.parse(v)) for v in col)
655
646
  )
656
647
  else:
657
648
  converted_column_table.append(col)
@@ -1,6 +0,0 @@
1
- try:
2
- from databricks_sqlalchemy import *
3
- except:
4
- import warnings
5
-
6
- warnings.warn("Install databricks-sqlalchemy plugin before using this")