databricks-sql-connector 4.2.5__tar.gz → 4.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/CHANGELOG.md +19 -0
  2. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/PKG-INFO +4 -4
  3. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/pyproject.toml +18 -4
  4. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/__init__.py +1 -1
  5. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/common.py +2 -0
  6. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/retry.py +16 -2
  7. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/databricks_client.py +2 -0
  8. databricks_sql_connector-4.2.7/src/databricks/sql/backend/kernel/__init__.py +25 -0
  9. databricks_sql_connector-4.2.7/src/databricks/sql/backend/kernel/_errors.py +134 -0
  10. databricks_sql_connector-4.2.7/src/databricks/sql/backend/kernel/auth_bridge.py +268 -0
  11. databricks_sql_connector-4.2.7/src/databricks/sql/backend/kernel/client.py +915 -0
  12. databricks_sql_connector-4.2.7/src/databricks/sql/backend/kernel/result_set.py +274 -0
  13. databricks_sql_connector-4.2.7/src/databricks/sql/backend/kernel/type_mapping.py +248 -0
  14. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/backend.py +5 -2
  15. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/models/requests.py +8 -0
  16. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/utils/http_client.py +4 -0
  17. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/thrift_backend.py +21 -5
  18. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/client.py +55 -3
  19. databricks_sql_connector-4.2.7/src/databricks/sql/common/agent.py +52 -0
  20. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/common/feature_flag.py +1 -0
  21. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/common/unified_http_client.py +1 -0
  22. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/parameters/native.py +1 -1
  23. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/result_set.py +35 -5
  24. databricks_sql_connector-4.2.7/src/databricks/sql/session.py +383 -0
  25. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/telemetry_client.py +11 -0
  26. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/utils.py +56 -5
  27. databricks_sql_connector-4.2.5/src/databricks/sql/session.py +0 -216
  28. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/LICENSE +0 -0
  29. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/README.md +0 -0
  30. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/__init__.py +0 -0
  31. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/__init__.py +0 -0
  32. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/auth.py +0 -0
  33. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/auth_utils.py +0 -0
  34. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/authenticators.py +0 -0
  35. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/endpoint.py +0 -0
  36. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/oauth.py +0 -0
  37. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  38. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  39. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/auth/token_federation.py +0 -0
  40. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/models/__init__.py +0 -0
  41. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/models/base.py +0 -0
  42. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/models/responses.py +0 -0
  43. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/queue.py +0 -0
  44. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/result_set.py +0 -0
  45. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/utils/constants.py +0 -0
  46. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/utils/conversion.py +0 -0
  47. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/utils/filters.py +0 -0
  48. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/sea/utils/normalize.py +0 -0
  49. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/types.py +0 -0
  50. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/utils/__init__.py +0 -0
  51. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/backend/utils/guid_utils.py +0 -0
  52. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  53. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/cloudfetch/downloader.py +0 -0
  54. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/common/http.py +0 -0
  55. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/common/http_utils.py +0 -0
  56. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/common/url_utils.py +0 -0
  57. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/exc.py +0 -0
  58. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/experimental/__init__.py +0 -0
  59. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  60. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/parameters/__init__.py +0 -0
  61. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/parameters/py.typed +0 -0
  62. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/py.typed +0 -0
  63. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/circuit_breaker_manager.py +0 -0
  64. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/latency_logger.py +0 -0
  65. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/models/endpoint_models.py +0 -0
  66. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/models/enums.py +0 -0
  67. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/models/event.py +0 -0
  68. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/models/frontend_logs.py +0 -0
  69. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/telemetry_push_client.py +0 -0
  70. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/telemetry/utils.py +0 -0
  71. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  72. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  73. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  74. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  75. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  76. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/thrift_api/__init__.py +0 -0
  77. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.7}/src/databricks/sql/types.py +0 -0
@@ -1,5 +1,24 @@
1
1
  # Release History
2
2
 
3
+ # 4.2.7 (2026-06-02)
4
+ - Extract SPOG org-id from cluster http_path for non-Thrift requests (databricks/databricks-sql-python#817 by @msrathore-db)
5
+ - Remove empty chunks in CloudFetch concatenation (databricks/databricks-sql-python#814 by @jprakash-db)
6
+ - Add `_retry_server_directed_only` mode for Retry-After header compliance (databricks/databricks-sql-python#756 by @sd-db)
7
+ - Bump thrift to 0.23.0 (databricks/databricks-sql-python#796 by @leoromanovsky)
8
+ - Allow pandas 3.x in dependency constraints (databricks/databricks-sql-python#768 by @moomindani)
9
+ - Telemetry: unwrap TokenFederationProvider to report inner auth mechanism/flow (databricks/databricks-sql-python#781 by @samikshya-db)
10
+
11
+ # 4.2.6 (2026-04-22)
12
+ - Add SPOG routing support for account-level vanity URLs (databricks/databricks-sql-python#767 by @msrathore-db)
13
+ - Fix dependency_manager: handle PEP 440 ~= compatible release syntax (databricks/databricks-sql-python#776 by @vikrantpuppala)
14
+ - Bump thrift to fix deprecation warning (databricks/databricks-sql-python#733 by @Korijn)
15
+ - Add AI coding agent detection to User-Agent header (databricks/databricks-sql-python#740 by @vikrantpuppala)
16
+ - Add statement-level query_tags support for SEA backend (databricks/databricks-sql-python#754 by @sreekanth-db)
17
+ - Update PyArrow concatenation of tables to use promote_options as default (databricks/databricks-sql-python#751 by @jprakash-db)
18
+ - Fix float inference to use DoubleParameter (64-bit) instead of FloatParameter (databricks/databricks-sql-python#742 by @Shubhambhusate)
19
+ - Allow specifying query_tags as a dict upon connection creation (databricks/databricks-sql-python#749 by @jiabin-hu)
20
+ - Add query_tags parameter support for execute methods (databricks/databricks-sql-python#736 by @jiabin-hu)
21
+
3
22
  # 4.2.5 (2026-02-09)
4
23
  - Fix feature-flag endpoint retries in gov region (databricks/databricks-sql-python#735 by @samikshya-db)
5
24
  - Improve telemetry lifecycle management (databricks/databricks-sql-python#734 by @msrathore-db)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: databricks-sql-connector
3
- Version: 4.2.5
3
+ Version: 4.2.7
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -21,8 +21,8 @@ Requires-Dist: lz4 (>=4.0.2,<5.0.0) ; python_version >= "3.8" and python_version
21
21
  Requires-Dist: lz4 (>=4.4.5,<5.0.0) ; python_version >= "3.14"
22
22
  Requires-Dist: oauthlib (>=3.1.0,<4.0.0)
23
23
  Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
24
- Requires-Dist: pandas (>=1.2.5,<2.4.0) ; python_version >= "3.8" and python_version < "3.13"
25
- Requires-Dist: pandas (>=2.2.3,<2.4.0) ; python_version >= "3.13"
24
+ Requires-Dist: pandas (>=1.2.5,<4.0.0) ; python_version >= "3.8" and python_version < "3.13"
25
+ Requires-Dist: pandas (>=2.2.3,<4.0.0) ; python_version >= "3.13"
26
26
  Requires-Dist: pyarrow (>=14.0.1) ; (python_version >= "3.8" and python_version < "3.13") and (extra == "pyarrow")
27
27
  Requires-Dist: pyarrow (>=18.0.0) ; (python_version == "3.13") and (extra == "pyarrow")
28
28
  Requires-Dist: pyarrow (>=22.0.0) ; (python_version >= "3.14") and (extra == "pyarrow")
@@ -30,7 +30,7 @@ Requires-Dist: pybreaker (>=1.0.0,<2.0.0)
30
30
  Requires-Dist: pyjwt (>=2.0.0,<3.0.0)
31
31
  Requires-Dist: python-dateutil (>=2.8.0,<3.0.0)
32
32
  Requires-Dist: requests (>=2.18.1,<3.0.0)
33
- Requires-Dist: thrift (>=0.16.0,<0.21.0)
33
+ Requires-Dist: thrift (>=0.22.0,<0.24.0)
34
34
  Requires-Dist: urllib3 (>=1.26)
35
35
  Project-URL: Bug Tracker, https://github.com/databricks/databricks-sql-python/issues
36
36
  Project-URL: Homepage, https://github.com/databricks/databricks-sql-python
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "4.2.5"
3
+ version = "4.2.7"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -10,10 +10,10 @@ include = ["CHANGELOG.md"]
10
10
 
11
11
  [tool.poetry.dependencies]
12
12
  python = "^3.8.0"
13
- thrift = ">=0.16.0,<0.21.0"
13
+ thrift = ">=0.22.0,<0.24.0"
14
14
  pandas = [
15
- { version = ">=1.2.5,<2.4.0", python = ">=3.8,<3.13" },
16
- { version = ">=2.2.3,<2.4.0", python = ">=3.13" }
15
+ { version = ">=1.2.5,<4.0.0", python = ">=3.8,<3.13" },
16
+ { version = ">=2.2.3,<4.0.0", python = ">=3.13" }
17
17
  ]
18
18
  lz4 = [
19
19
  { version = "^4.0.2", python = ">=3.8,<3.14" },
@@ -36,6 +36,20 @@ requests-kerberos = {version = "^0.15.0", optional = true}
36
36
 
37
37
  [tool.poetry.extras]
38
38
  pyarrow = ["pyarrow"]
39
+ # `[kernel]` extra is intentionally not declared here yet.
40
+ # `databricks-sql-kernel` is built from the databricks-sql-kernel
41
+ # repo and not yet published to PyPI; declaring it as a poetry dep
42
+ # breaks `poetry lock` for every CI job. Once the wheel is on PyPI
43
+ # the extra will be added back here:
44
+ #
45
+ # databricks-sql-kernel = {version = "^0.1.0", optional = true}
46
+ # [tool.poetry.extras]
47
+ # kernel = ["databricks-sql-kernel"]
48
+ #
49
+ # Until then, the wheel is not on PyPI and the only supported
50
+ # install path is local dev:
51
+ # cd databricks-sql-kernel/pyo3 && maturin develop --release
52
+ # (into the same venv as databricks-sql-connector).
39
53
 
40
54
  [tool.poetry.group.dev.dependencies]
41
55
  pytest = "^7.1.2"
@@ -71,7 +71,7 @@ DATETIME = DBAPITypeObject("timestamp")
71
71
  DATE = DBAPITypeObject("date")
72
72
  ROWID = DBAPITypeObject()
73
73
 
74
- __version__ = "4.2.5"
74
+ __version__ = "4.2.7"
75
75
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
76
76
 
77
77
  # These two functions are pyhive legacy
@@ -47,6 +47,7 @@ class ClientContext:
47
47
  retry_stop_after_attempts_duration: Optional[float] = None,
48
48
  retry_delay_default: Optional[float] = None,
49
49
  retry_dangerous_codes: Optional[List[int]] = None,
50
+ respect_server_retry_after_header: Optional[bool] = None,
50
51
  proxy_auth_method: Optional[str] = None,
51
52
  pool_connections: Optional[int] = None,
52
53
  pool_maxsize: Optional[int] = None,
@@ -80,6 +81,7 @@ class ClientContext:
80
81
  )
81
82
  self.retry_delay_default = retry_delay_default or 5.0
82
83
  self.retry_dangerous_codes = retry_dangerous_codes or []
84
+ self.respect_server_retry_after_header = bool(respect_server_retry_after_header)
83
85
  self.proxy_auth_method = proxy_auth_method
84
86
  self.pool_connections = pool_connections or 10
85
87
  self.pool_maxsize = pool_maxsize or 20
@@ -94,6 +94,7 @@ class DatabricksRetryPolicy(Retry):
94
94
  stop_after_attempts_duration: float,
95
95
  delay_default: float,
96
96
  force_dangerous_codes: List[int],
97
+ respect_server_retry_after_header: bool = False,
97
98
  urllib3_kwargs: dict = {},
98
99
  ):
99
100
  # These values do not change from one command to the next
@@ -103,6 +104,7 @@ class DatabricksRetryPolicy(Retry):
103
104
  self.stop_after_attempts_duration = stop_after_attempts_duration
104
105
  self._delay_default = delay_default
105
106
  self.force_dangerous_codes = force_dangerous_codes
107
+ self.respect_server_retry_after_header = respect_server_retry_after_header
106
108
 
107
109
  # the urllib3 kwargs are a mix of configuration (some of which we override)
108
110
  # and counters like `total` or `connect` which may change between successive retries
@@ -202,6 +204,7 @@ class DatabricksRetryPolicy(Retry):
202
204
  stop_after_attempts_duration=self.stop_after_attempts_duration,
203
205
  delay_default=self.delay_default,
204
206
  force_dangerous_codes=self.force_dangerous_codes,
207
+ respect_server_retry_after_header=self.respect_server_retry_after_header,
205
208
  urllib3_kwargs={},
206
209
  )
207
210
 
@@ -323,7 +326,9 @@ class DatabricksRetryPolicy(Retry):
323
326
 
324
327
  return proposed_backoff
325
328
 
326
- def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]:
329
+ def should_retry(
330
+ self, method: str, status_code: int, has_retry_after: bool = False
331
+ ) -> Tuple[bool, str]:
327
332
  """This method encapsulates the connector's approach to retries.
328
333
 
329
334
  We always retry a request unless one of these conditions is met:
@@ -388,6 +393,15 @@ class DatabricksRetryPolicy(Retry):
388
393
  if not self._is_method_retryable(method):
389
394
  return False, "Only POST requests are retried"
390
395
 
396
+ # When respect_server_retry_after_header is enabled, only retry when the
397
+ # server explicitly signals it's safe via a Retry-After header. This prevents
398
+ # duplicate side effects for non-idempotent operations.
399
+ if self.respect_server_retry_after_header and not has_retry_after:
400
+ return (
401
+ False,
402
+ "respect_server_retry_after_header mode: no Retry-After header present",
403
+ )
404
+
391
405
  # Request failed, was an ExecuteStatement and the command may have reached the server
392
406
  if (
393
407
  self.command_type == CommandType.EXECUTE_STATEMENT
@@ -430,7 +444,7 @@ class DatabricksRetryPolicy(Retry):
430
444
  Logs a debug message if the request will be retried
431
445
  """
432
446
 
433
- should_retry, msg = self.should_retry(method, status_code)
447
+ should_retry, msg = self.should_retry(method, status_code, has_retry_after)
434
448
 
435
449
  if should_retry:
436
450
  logger.debug(msg)
@@ -83,6 +83,7 @@ class DatabricksClient(ABC):
83
83
  async_op: bool,
84
84
  enforce_embedded_schema_correctness: bool,
85
85
  row_limit: Optional[int] = None,
86
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
86
87
  ) -> Union[ResultSet, None]:
87
88
  """
88
89
  Executes a SQL command or query within the specified session.
@@ -102,6 +103,7 @@ class DatabricksClient(ABC):
102
103
  async_op: Whether to execute the command asynchronously
103
104
  enforce_embedded_schema_correctness: Whether to enforce schema correctness
104
105
  row_limit: Maximum number of rows in the response.
106
+ query_tags: Optional dictionary of query tags to apply for this query only.
105
107
 
106
108
  Returns:
107
109
  If async_op is False, returns a ResultSet object containing the
@@ -0,0 +1,25 @@
1
+ """Backend that delegates to the Databricks SQL Kernel (Rust) via PyO3.
2
+
3
+ Routed when ``use_kernel=True`` is passed to ``databricks.sql.connect``.
4
+ The module's identity is "delegates to the kernel" — not the wire
5
+ protocol the kernel happens to use today (SEA REST). The kernel may
6
+ switch its default transport (SEA REST → SEA gRPC → …) without
7
+ renaming this module.
8
+
9
+ This ``__init__`` deliberately does **not** re-export
10
+ ``KernelDatabricksClient`` from ``.client``. Importing ``.client``
11
+ loads the ``databricks_sql_kernel`` PyO3 extension at module-import
12
+ time; doing that eagerly here would make ``import
13
+ databricks.sql.backend.kernel.type_mapping`` (used by tests / by
14
+ ``KernelResultSet`` consumers) require the kernel wheel even when
15
+ the caller never plans to open a kernel-backed session. Callers
16
+ that need the client import it directly:
17
+
18
+ from databricks.sql.backend.kernel.client import KernelDatabricksClient
19
+
20
+ ``session.py::_create_backend`` already does this lazy import under
21
+ the ``use_kernel=True`` branch.
22
+
23
+ See ``docs/designs/pysql-kernel-integration.md`` in
24
+ ``databricks-sql-kernel`` for the full integration design.
25
+ """
@@ -0,0 +1,134 @@
1
+ """Shared error-mapping primitives for the kernel backend.
2
+
3
+ The PyO3 boundary can produce two flavours of exception:
4
+
5
+ - ``databricks_sql_kernel.KernelError`` — the kernel's own
6
+ structured error type. Carries ``code`` / ``message`` /
7
+ ``sql_state`` / ``query_id`` / ``http_status`` / ``retryable`` /
8
+ ``vendor_code`` / ``error_code`` as attributes; mapped to a PEP
9
+ 249 exception class via ``_CODE_TO_EXCEPTION`` with the
10
+ attributes forwarded onto the re-raised exception so callers can
11
+ branch on ``err.code`` / ``err.sql_state`` without reaching
12
+ through ``__cause__``.
13
+ - Anything else — ``TypeError`` / ``OverflowError`` /
14
+ ``ValueError`` from PyO3 argument conversion, or arbitrary
15
+ extension-internal Python errors. These would otherwise propagate
16
+ raw to connector callers, breaking the DB-API contract that says
17
+ "only PEP 249 exception types cross the boundary". Wrapped in
18
+ ``OperationalError`` here.
19
+
20
+ These primitives live in their own module so both ``client.py``
21
+ (which orchestrates PyO3 calls) and ``result_set.py`` (which calls
22
+ ``fetch_next_batch`` on the same kernel handles) can share them
23
+ without ``result_set.py`` importing from ``client.py``.
24
+
25
+ Usage at every PyO3 call site is a plain try/except:
26
+
27
+ try:
28
+ stmt.execute()
29
+ except Exception as exc:
30
+ raise wrap_kernel_exception("execute_command", exc) from exc
31
+
32
+ The helper returns the mapped exception; callers raise it. Plain
33
+ ``try/except`` is preferred over a context manager: the control
34
+ flow is visible at the call site, the helper is a pure function
35
+ (trivial to test), and tracebacks don't carry an extra
36
+ ``__exit__`` frame.
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ from databricks.sql.exc import (
42
+ DatabaseError,
43
+ Error,
44
+ OperationalError,
45
+ ProgrammingError,
46
+ ServerOperationError,
47
+ )
48
+
49
+ try:
50
+ import databricks_sql_kernel as _kernel # type: ignore[import-not-found]
51
+ except ImportError as exc: # pragma: no cover - same hint as client.py
52
+ raise ImportError(
53
+ "use_kernel=True requires the databricks-sql-kernel extension, which "
54
+ "is not yet published on PyPI. Build and install it locally from the "
55
+ "databricks-sql-kernel repo:\n"
56
+ " cd databricks-sql-kernel/pyo3 && maturin develop --release\n"
57
+ "(into the same venv as databricks-sql-connector)."
58
+ ) from exc
59
+
60
+
61
+ # Map a kernel `code` slug to the PEP 249 exception class that best
62
+ # captures it. The match isn't a perfect 1:1 — PEP 249 has a
63
+ # narrower taxonomy than the kernel — so several kernel codes
64
+ # collapse onto the same Python exception. This table is the only
65
+ # place that mapping lives.
66
+ _CODE_TO_EXCEPTION = {
67
+ "InvalidArgument": ProgrammingError,
68
+ "Unauthenticated": OperationalError,
69
+ "PermissionDenied": OperationalError,
70
+ "NotFound": ProgrammingError,
71
+ "ResourceExhausted": OperationalError,
72
+ "Unavailable": OperationalError,
73
+ "Timeout": OperationalError,
74
+ "Cancelled": OperationalError,
75
+ "DataLoss": DatabaseError,
76
+ "Internal": DatabaseError,
77
+ "InvalidStatementHandle": ProgrammingError,
78
+ "NetworkError": OperationalError,
79
+ # `SqlError` is a server-side query failure (syntax error, missing
80
+ # object, etc.) — exactly what the Thrift backend surfaces as
81
+ # `ServerOperationError`. Match Thrift's contract so user code that
82
+ # catches `ServerOperationError` (a subclass of `DatabaseError`)
83
+ # works equivalently with `use_kernel=True`.
84
+ "SqlError": ServerOperationError,
85
+ "Unknown": DatabaseError,
86
+ }
87
+
88
+
89
+ def reraise_kernel_error(exc: "_kernel.KernelError") -> "Error":
90
+ """Convert a ``databricks_sql_kernel.KernelError`` to a PEP 249
91
+ exception with the kernel's structured attributes forwarded onto
92
+ the new instance.
93
+
94
+ The returned exception is raised by callers with ``raise ... from
95
+ exc``; the ``from`` clause is what sets ``__cause__``, so we don't
96
+ touch it here.
97
+ """
98
+ code = getattr(exc, "code", "Unknown")
99
+ cls = _CODE_TO_EXCEPTION.get(code, DatabaseError)
100
+ new = cls(getattr(exc, "message", str(exc)))
101
+ for attr in (
102
+ "code",
103
+ "sql_state",
104
+ "error_code",
105
+ "vendor_code",
106
+ "http_status",
107
+ "retryable",
108
+ "query_id",
109
+ ):
110
+ setattr(new, attr, getattr(exc, attr, None))
111
+ return new
112
+
113
+
114
+ def wrap_kernel_exception(what: str, exc: BaseException) -> "Error":
115
+ """Map any exception from a PyO3 call site to a PEP 249 exception.
116
+
117
+ - ``KernelError`` → mapped class with structured attrs forwarded.
118
+ - Already-PEP-249 ``Error`` (e.g. raised by an inner caller that
119
+ already mapped) → passed through unchanged.
120
+ - Anything else (``TypeError`` / ``ValueError`` / etc. from PyO3
121
+ argument conversion, extension-internal errors) → wrapped in
122
+ ``OperationalError``.
123
+
124
+ Returned, not raised — the caller decides whether to ``raise``
125
+ or ``raise ... from exc``. ``what`` is a short tag (the calling
126
+ method name) used only in the ``OperationalError`` message.
127
+ """
128
+ if isinstance(exc, _kernel.KernelError):
129
+ return reraise_kernel_error(exc)
130
+ if isinstance(exc, Error):
131
+ return exc
132
+ return OperationalError(
133
+ f"Unexpected error from databricks_sql_kernel during {what}: {exc!r}"
134
+ )
@@ -0,0 +1,268 @@
1
+ """Translate the connector's auth configuration into
2
+ ``databricks_sql_kernel`` ``Session`` auth kwargs.
3
+
4
+ Three auth shapes are supported on the kernel path:
5
+
6
+ - **PAT** — extracted from the built ``AuthProvider`` (works for
7
+ ``AccessTokenAuthProvider``, including the ``TokenFederationProvider``
8
+ wrapper that ``get_python_sql_connector_auth_provider`` always
9
+ applies). Maps to the kernel's ``auth_type='pat'``.
10
+ - **OAuth M2M** — when the caller passes ``oauth_client_id`` +
11
+ ``oauth_client_secret``, the *raw* credentials are forwarded to the
12
+ kernel's ``auth_type='oauth-m2m'`` and the kernel owns the full
13
+ token lifecycle (acquire + refresh via workspace OIDC
14
+ client-credentials). We forward the raw pair rather than reusing the
15
+ connector's own OAuth provider because the kernel re-mints tokens
16
+ itself and the client secret is not recoverable from a built
17
+ provider.
18
+ - **OAuth U2M** — for ``auth_type`` ``databricks-oauth`` /
19
+ ``azure-oauth`` (the browser authorization-code flow), the optional
20
+ ``oauth_client_id`` / ``oauth_redirect_port`` are forwarded to the
21
+ kernel's ``auth_type='oauth-u2m'`` and the kernel runs the browser
22
+ flow itself.
23
+
24
+ A user-supplied custom ``credentials_provider`` is **rejected** on the
25
+ kernel path with ``NotSupportedError``: it's an opaque token source
26
+ with no extractable raw credentials, so the kernel can't own the
27
+ lifecycle. Such callers should pass ``oauth_client_id`` /
28
+ ``oauth_client_secret`` (M2M) instead. Anything else non-PAT also
29
+ raises ``NotSupportedError`` so the failure surfaces at session-open
30
+ with a clear message rather than deep inside the kernel.
31
+
32
+ The M2M / U2M decisions are driven by the *raw* connect() kwargs
33
+ (``auth_options``), not a built ``AuthProvider``. On the kernel path
34
+ the connector deliberately does **not** build its own OAuth provider
35
+ (that would eagerly run the U2M browser flow / M2M token exchange at
36
+ connect() time, before the kernel is consulted), so ``auth_provider``
37
+ is either a minimal PAT provider or ``None`` and the OAuth credentials
38
+ are available only from the raw kwargs.
39
+ """
40
+
41
+ from __future__ import annotations
42
+
43
+ import logging
44
+ import re
45
+ from typing import Any, Dict, Optional
46
+
47
+ from databricks.sql.auth.authenticators import AccessTokenAuthProvider, AuthProvider
48
+ from databricks.sql.auth.token_federation import TokenFederationProvider
49
+ from databricks.sql.exc import NotSupportedError, ProgrammingError
50
+
51
+ logger = logging.getLogger(__name__)
52
+
53
+
54
+ # RFC 6750 §2.1 defines the Authorization scheme as case-insensitive.
55
+ # The connector's auth providers all emit ``Bearer `` exactly today,
56
+ # but we match leniently in case a federation proxy or future provider
57
+ # normalises the casing differently — failing closed here would surface
58
+ # as a confusing ``ProgrammingError`` from the bridge.
59
+ _BEARER_PREFIX_LEN = len("Bearer ")
60
+
61
+ # Defense-in-depth: reject tokens containing ASCII control characters
62
+ # or whitespace. CR/LF/NUL in a token would let a misbehaving HTTP
63
+ # stack split or terminate the Authorization header line, opening a
64
+ # header-injection sink. Space (0x20) is included so leading-/
65
+ # embedded-whitespace tokens (e.g. ``"Bearer doubled-space-token"``,
66
+ # tab-prefixed token) get rejected too — RFC 6750 §2.1 forbids
67
+ # whitespace within the credential token itself.
68
+ _TOKEN_REJECT_RE = re.compile(r"[\x00-\x20\x7f]")
69
+
70
+
71
+ def _is_pat(auth_provider: Optional[AuthProvider]) -> bool:
72
+ """Return True iff this provider ultimately wraps an
73
+ ``AccessTokenAuthProvider``.
74
+
75
+ ``get_python_sql_connector_auth_provider`` always wraps the
76
+ base provider in a ``TokenFederationProvider``, so an
77
+ ``isinstance`` check against ``AccessTokenAuthProvider`` alone
78
+ never matches in practice. We peek through the federation
79
+ wrapper to find the real type.
80
+ """
81
+ if isinstance(auth_provider, AccessTokenAuthProvider):
82
+ return True
83
+ if isinstance(auth_provider, TokenFederationProvider) and isinstance(
84
+ auth_provider.external_provider, AccessTokenAuthProvider
85
+ ):
86
+ return True
87
+ return False
88
+
89
+
90
+ def _extract_bearer_token(auth_provider: Optional[AuthProvider]) -> Optional[str]:
91
+ """Pull the current bearer token out of an ``AuthProvider``.
92
+
93
+ The connector's ``AuthProvider.add_headers`` mutates a header
94
+ dict and writes the ``Authorization: Bearer <token>`` value.
95
+ Going through that public surface keeps us insulated from
96
+ provider-specific internals.
97
+
98
+ Returns ``None`` if there is no provider, the provider did not
99
+ write an Authorization header, or it wrote a non-Bearer scheme —
100
+ none of which is representable in the kernel's PAT auth surface.
101
+ """
102
+ if auth_provider is None:
103
+ return None
104
+ headers: Dict[str, str] = {}
105
+ auth_provider.add_headers(headers)
106
+ auth = headers.get("Authorization")
107
+ if not auth:
108
+ return None
109
+ if not auth[:_BEARER_PREFIX_LEN].lower() == "bearer ":
110
+ return None
111
+ token = auth[_BEARER_PREFIX_LEN:]
112
+ if _TOKEN_REJECT_RE.search(token):
113
+ raise ProgrammingError(
114
+ "Bearer token contains ASCII control characters or whitespace; "
115
+ "refusing to forward it to the kernel auth bridge."
116
+ )
117
+ return token
118
+
119
+
120
+ def kernel_auth_kwargs(
121
+ auth_provider: Optional[AuthProvider],
122
+ auth_options: Optional[Dict[str, Any]] = None,
123
+ ) -> Dict[str, Any]:
124
+ """Build the kwargs passed to ``databricks_sql_kernel.Session(...)``.
125
+
126
+ ``auth_options`` carries the raw connect() kwargs relevant to auth
127
+ (``auth_type``, ``oauth_client_id``, ``oauth_client_secret``,
128
+ ``oauth_redirect_port``, ``credentials_provider``). They drive the
129
+ OAuth decisions because the OAuth secret is consumed during
130
+ ``AuthProvider`` construction and can't be read back off the built
131
+ provider.
132
+
133
+ Resolution order:
134
+
135
+ 0. **Ambiguity guards** — reject conflicting auth signals *before*
136
+ resolving, so an ambiguous request fails loudly at session-open
137
+ rather than silently picking one flow (and failing later as a
138
+ confusing 401 against the wrong principal):
139
+ - a custom ``credentials_provider`` *and* M2M kwargs together;
140
+ - a U2M ``auth_type`` (``databricks-oauth`` / ``azure-oauth``)
141
+ *and* ``oauth_client_secret`` together.
142
+ 1. **OAuth M2M** — ``oauth_client_id`` + ``oauth_client_secret``
143
+ both present → forward raw creds to the kernel's ``oauth-m2m``.
144
+ 2. **PAT** — the built provider is (or wraps) an
145
+ ``AccessTokenAuthProvider`` → extract the bearer token.
146
+ 3. **OAuth U2M** — ``auth_type`` is ``databricks-oauth`` /
147
+ ``azure-oauth`` → forward optional ``oauth_client_id`` /
148
+ ``oauth_redirect_port`` to the kernel's ``oauth-u2m``.
149
+ 4. **Custom credentials_provider** → ``NotSupportedError`` (opaque
150
+ token source; no raw creds for the kernel to own).
151
+ 5. Anything else → ``NotSupportedError``.
152
+
153
+ M2M is checked before PAT so that a workload passing both an
154
+ access token *and* M2M creds resolves to the (refreshing) M2M path
155
+ rather than a static token. (Token + M2M is not treated as
156
+ ambiguous: a PAT is often present as ambient config the caller
157
+ didn't intend as the primary credential, whereas an explicit
158
+ ``oauth_client_secret`` is unambiguous M2M intent.)
159
+ """
160
+ opts = auth_options or {}
161
+
162
+ client_id = opts.get("oauth_client_id")
163
+ client_secret = opts.get("oauth_client_secret")
164
+ auth_type = opts.get("auth_type")
165
+ has_m2m = bool(client_id and client_secret)
166
+
167
+ # 0. Ambiguity guards — fail before any flow is chosen.
168
+ if client_secret and opts.get("credentials_provider") is not None:
169
+ raise NotSupportedError(
170
+ "Ambiguous auth on use_kernel=True: both a custom "
171
+ "credentials_provider and oauth_client_secret were provided. "
172
+ "Pass exactly one — oauth_client_id + oauth_client_secret for "
173
+ "kernel-managed M2M, or use the Thrift backend (default) for "
174
+ "credentials_provider."
175
+ )
176
+ if client_secret and auth_type in ("databricks-oauth", "azure-oauth"):
177
+ raise NotSupportedError(
178
+ f"Ambiguous auth on use_kernel=True: auth_type={auth_type!r} selects "
179
+ "the U2M browser flow, but oauth_client_secret was also provided "
180
+ "(machine-to-machine). Drop oauth_client_secret for U2M, or drop "
181
+ "auth_type for M2M."
182
+ )
183
+
184
+ # 1. OAuth M2M — raw client-credentials pair forwarded to the kernel.
185
+ if has_m2m:
186
+ kwargs: Dict[str, Any] = {
187
+ "auth_type": "oauth-m2m",
188
+ "client_id": client_id,
189
+ "client_secret": client_secret,
190
+ }
191
+ scopes = _normalize_scopes(opts.get("oauth_scopes"))
192
+ if scopes is not None:
193
+ kwargs["oauth_scopes"] = scopes
194
+ return kwargs
195
+
196
+ # 2. PAT (including TokenFederationProvider-wrapped PAT).
197
+ if _is_pat(auth_provider):
198
+ token = _extract_bearer_token(auth_provider)
199
+ if not token:
200
+ raise ProgrammingError(
201
+ "PAT auth provider did not produce a Bearer Authorization "
202
+ "header; cannot route through the kernel's PAT path"
203
+ )
204
+ return {"auth_type": "pat", "access_token": token}
205
+
206
+ # 3. OAuth U2M — browser authorization-code flow; the kernel runs it.
207
+ if auth_type in ("databricks-oauth", "azure-oauth"):
208
+ kwargs = {"auth_type": "oauth-u2m"}
209
+ if client_id:
210
+ kwargs["client_id"] = client_id
211
+ redirect_port = opts.get("oauth_redirect_port")
212
+ if redirect_port is not None:
213
+ kwargs["redirect_port"] = int(redirect_port)
214
+ scopes = _normalize_scopes(opts.get("oauth_scopes"))
215
+ if scopes is not None:
216
+ kwargs["oauth_scopes"] = scopes
217
+ return kwargs
218
+
219
+ # 4. Custom credentials_provider — the connector's primary M2M path
220
+ # on Thrift/SEA, but unusable on the kernel: it's an opaque token
221
+ # source with no extractable client_id/secret, so the kernel
222
+ # can't own the token lifecycle. Point the caller at the raw
223
+ # M2M kwargs instead.
224
+ if opts.get("credentials_provider") is not None:
225
+ raise NotSupportedError(
226
+ "use_kernel=True does not support a custom credentials_provider. "
227
+ "For OAuth machine-to-machine auth, pass oauth_client_id and "
228
+ "oauth_client_secret so the kernel can manage the token lifecycle "
229
+ "directly; or use the Thrift backend (default) with "
230
+ "credentials_provider."
231
+ )
232
+
233
+ # 5. Everything else (including no usable credentials at all —
234
+ # ``auth_provider`` is None on the kernel path when no access
235
+ # token was supplied and no OAuth kwargs resolved above).
236
+ provider_desc = (
237
+ type(auth_provider).__name__ if auth_provider is not None else "no credentials"
238
+ )
239
+ raise NotSupportedError(
240
+ f"use_kernel=True requires PAT (access_token), OAuth M2M "
241
+ f"(oauth_client_id + oauth_client_secret), or OAuth U2M "
242
+ f"(auth_type='databricks-oauth' / 'azure-oauth'), but got "
243
+ f"{provider_desc} with auth_type={auth_type!r}. Use the Thrift "
244
+ "backend (default) for other auth flows."
245
+ )
246
+
247
+
248
+ def _normalize_scopes(scopes: Any) -> Optional[list]:
249
+ """Normalise an ``oauth_scopes`` value to a list of strings, or
250
+ ``None`` to let the kernel apply its defaults.
251
+
252
+ Accepts a list/tuple of strings or a single space-delimited string
253
+ (the shape ``DatabricksOAuthProvider`` stores internally)."""
254
+ if scopes is None:
255
+ return None
256
+ if isinstance(scopes, str):
257
+ parts = scopes.split()
258
+ return parts or None
259
+ if isinstance(scopes, (list, tuple)):
260
+ parts = [str(s) for s in scopes if s]
261
+ return parts or None
262
+ # Anything else (int, dict, bool, …) is a caller error. Fail loudly
263
+ # rather than silently dropping the scopes to None and surprising
264
+ # the user with default scopes.
265
+ raise ProgrammingError(
266
+ f"oauth_scopes must be a list/tuple of strings or a space-delimited "
267
+ f"string, got {type(scopes).__name__}."
268
+ )