databricks-sql-connector 4.2.4__tar.gz → 4.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/CHANGELOG.md +15 -0
  2. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/PKG-INFO +2 -2
  3. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/pyproject.toml +3 -3
  4. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/__init__.py +1 -1
  5. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/retry.py +7 -27
  6. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/databricks_client.py +2 -0
  7. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/backend.py +5 -2
  8. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/models/requests.py +8 -0
  9. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/thrift_backend.py +17 -5
  10. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/client.py +39 -2
  11. databricks_sql_connector-4.2.6/src/databricks/sql/common/agent.py +52 -0
  12. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/feature_flag.py +1 -0
  13. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/unified_http_client.py +10 -2
  14. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/parameters/native.py +1 -1
  15. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/session.py +51 -0
  16. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/models/event.py +2 -0
  17. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/telemetry_client.py +44 -2
  18. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/utils.py +65 -1
  19. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/LICENSE +0 -0
  20. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/README.md +0 -0
  21. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/__init__.py +0 -0
  22. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/__init__.py +0 -0
  23. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/auth.py +0 -0
  24. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/auth_utils.py +0 -0
  25. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/authenticators.py +0 -0
  26. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/common.py +0 -0
  27. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/endpoint.py +0 -0
  28. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/oauth.py +0 -0
  29. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  30. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  31. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/token_federation.py +0 -0
  32. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/models/__init__.py +0 -0
  33. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/models/base.py +0 -0
  34. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/models/responses.py +0 -0
  35. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/queue.py +0 -0
  36. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/result_set.py +0 -0
  37. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/constants.py +0 -0
  38. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/conversion.py +0 -0
  39. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/filters.py +0 -0
  40. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/http_client.py +0 -0
  41. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/normalize.py +0 -0
  42. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/types.py +0 -0
  43. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/utils/__init__.py +0 -0
  44. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/utils/guid_utils.py +0 -0
  45. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  46. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/cloudfetch/downloader.py +0 -0
  47. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/http.py +0 -0
  48. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/http_utils.py +0 -0
  49. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/url_utils.py +0 -0
  50. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/exc.py +0 -0
  51. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/experimental/__init__.py +0 -0
  52. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  53. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/parameters/__init__.py +0 -0
  54. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/parameters/py.typed +0 -0
  55. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/py.typed +0 -0
  56. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/result_set.py +0 -0
  57. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/circuit_breaker_manager.py +0 -0
  58. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/latency_logger.py +0 -0
  59. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/models/endpoint_models.py +0 -0
  60. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/models/enums.py +0 -0
  61. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/models/frontend_logs.py +0 -0
  62. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/telemetry_push_client.py +0 -0
  63. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/utils.py +0 -0
  64. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  65. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  66. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  67. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  68. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  69. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/__init__.py +0 -0
  70. {databricks_sql_connector-4.2.4 → databricks_sql_connector-4.2.6}/src/databricks/sql/types.py +0 -0
@@ -1,5 +1,20 @@
1
1
  # Release History
2
2
 
3
+ # 4.2.6 (2026-04-22)
4
+ - Add SPOG routing support for account-level vanity URLs (databricks/databricks-sql-python#767 by @msrathore-db)
5
+ - Fix dependency_manager: handle PEP 440 ~= compatible release syntax (databricks/databricks-sql-python#776 by @vikrantpuppala)
6
+ - Bump thrift to fix deprecation warning (databricks/databricks-sql-python#733 by @Korijn)
7
+ - Add AI coding agent detection to User-Agent header (databricks/databricks-sql-python#740 by @vikrantpuppala)
8
+ - Add statement-level query_tags support for SEA backend (databricks/databricks-sql-python#754 by @sreekanth-db)
9
+ - Update PyArrow concatenation of tables to use promote_options as default (databricks/databricks-sql-python#751 by @jprakash-db)
10
+ - Fix float inference to use DoubleParameter (64-bit) instead of FloatParameter (databricks/databricks-sql-python#742 by @Shubhambhusate)
11
+ - Allow specifying query_tags as a dict upon connection creation (databricks/databricks-sql-python#749 by @jiabin-hu)
12
+ - Add query_tags parameter support for execute methods (databricks/databricks-sql-python#736 by @jiabin-hu)
13
+
14
+ # 4.2.5 (2026-02-09)
15
+ - Fix feature-flag endpoint retries in gov region (databricks/databricks-sql-python#735 by @samikshya-db)
16
+ - Improve telemetry lifecycle management (databricks/databricks-sql-python#734 by @msrathore-db)
17
+
3
18
  # 4.2.4 (2026-01-07)
4
19
  - Fixed the exception handler close() on _TelemetryClientHolder (databricks/databricks-sql-python#723 by @msrathore-db)
5
20
  - Created util method to normalise http protocol in http path (databricks/databricks-sql-python#724 by @nikhilsuri-db)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: databricks-sql-connector
3
- Version: 4.2.4
3
+ Version: 4.2.6
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -30,7 +30,7 @@ Requires-Dist: pybreaker (>=1.0.0,<2.0.0)
30
30
  Requires-Dist: pyjwt (>=2.0.0,<3.0.0)
31
31
  Requires-Dist: python-dateutil (>=2.8.0,<3.0.0)
32
32
  Requires-Dist: requests (>=2.18.1,<3.0.0)
33
- Requires-Dist: thrift (>=0.16.0,<0.21.0)
33
+ Requires-Dist: thrift (>=0.22.0,<0.23.0)
34
34
  Requires-Dist: urllib3 (>=1.26)
35
35
  Project-URL: Bug Tracker, https://github.com/databricks/databricks-sql-python/issues
36
36
  Project-URL: Homepage, https://github.com/databricks/databricks-sql-python
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "4.2.4"
3
+ version = "4.2.6"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -10,7 +10,7 @@ include = ["CHANGELOG.md"]
10
10
 
11
11
  [tool.poetry.dependencies]
12
12
  python = "^3.8.0"
13
- thrift = ">=0.16.0,<0.21.0"
13
+ thrift = "~=0.22.0"
14
14
  pandas = [
15
15
  { version = ">=1.2.5,<2.4.0", python = ">=3.8,<3.13" },
16
16
  { version = ">=2.2.3,<2.4.0", python = ">=3.13" }
@@ -92,4 +92,4 @@ show_missing = true
92
92
  skip_covered = false
93
93
 
94
94
  [tool.coverage.xml]
95
- output = "coverage.xml"
95
+ output = "coverage.xml"
@@ -71,7 +71,7 @@ DATETIME = DBAPITypeObject("timestamp")
71
71
  DATE = DBAPITypeObject("date")
72
72
  ROWID = DBAPITypeObject()
73
73
 
74
- __version__ = "4.2.4"
74
+ __version__ = "4.2.6"
75
75
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
76
76
 
77
77
  # These two functions are pyhive legacy
@@ -373,6 +373,13 @@ class DatabricksRetryPolicy(Retry):
373
373
  if status_code == 403:
374
374
  return False, "403 codes are not retried"
375
375
 
376
+ # Request failed with 404. Don't retry for any command type.
377
+ if status_code == 404:
378
+ return (
379
+ False,
380
+ "Received 404 - NOT_FOUND. The requested resource does not exist.",
381
+ )
382
+
376
383
  # Request failed and server said NotImplemented. This isn't recoverable. Don't retry.
377
384
  if status_code == 501:
378
385
  return False, "Received code 501 from server."
@@ -381,33 +388,6 @@ class DatabricksRetryPolicy(Retry):
381
388
  if not self._is_method_retryable(method):
382
389
  return False, "Only POST requests are retried"
383
390
 
384
- # Request failed with 404 and was a GetOperationStatus. This is not recoverable. Don't retry.
385
- if status_code == 404 and self.command_type == CommandType.GET_OPERATION_STATUS:
386
- return (
387
- False,
388
- "GetOperationStatus received 404 code from Databricks. Operation was canceled.",
389
- )
390
-
391
- # Request failed with 404 because CloseSession returns 404 if you repeat the request.
392
- if (
393
- status_code == 404
394
- and self.command_type == CommandType.CLOSE_SESSION
395
- and len(self.history) > 0
396
- ):
397
- raise SessionAlreadyClosedError(
398
- "CloseSession received 404 code from Databricks. Session is already closed."
399
- )
400
-
401
- # Request failed with 404 because CloseOperation returns 404 if you repeat the request.
402
- if (
403
- status_code == 404
404
- and self.command_type == CommandType.CLOSE_OPERATION
405
- and len(self.history) > 0
406
- ):
407
- raise CursorAlreadyClosedError(
408
- "CloseOperation received 404 code from Databricks. Cursor is already closed."
409
- )
410
-
411
391
  # Request failed, was an ExecuteStatement and the command may have reached the server
412
392
  if (
413
393
  self.command_type == CommandType.EXECUTE_STATEMENT
@@ -83,6 +83,7 @@ class DatabricksClient(ABC):
83
83
  async_op: bool,
84
84
  enforce_embedded_schema_correctness: bool,
85
85
  row_limit: Optional[int] = None,
86
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
86
87
  ) -> Union[ResultSet, None]:
87
88
  """
88
89
  Executes a SQL command or query within the specified session.
@@ -102,6 +103,7 @@ class DatabricksClient(ABC):
102
103
  async_op: Whether to execute the command asynchronously
103
104
  enforce_embedded_schema_correctness: Whether to enforce schema correctness
104
105
  row_limit: Maximum number of rows in the response.
106
+ query_tags: Optional dictionary of query tags to apply for this query only.
105
107
 
106
108
  Returns:
107
109
  If async_op is False, returns a ResultSet object containing the
@@ -188,8 +188,9 @@ class SeaDatabricksClient(DatabricksClient):
188
188
  ValueError: If the warehouse ID cannot be extracted from the path
189
189
  """
190
190
 
191
- warehouse_pattern = re.compile(r".*/warehouses/(.+)")
192
- endpoint_pattern = re.compile(r".*/endpoints/(.+)")
191
+ # [^?&]+ stops at query params (e.g. ?o= for SPOG routing)
192
+ warehouse_pattern = re.compile(r".*/warehouses/([^?&]+)")
193
+ endpoint_pattern = re.compile(r".*/endpoints/([^?&]+)")
193
194
 
194
195
  for pattern in [warehouse_pattern, endpoint_pattern]:
195
196
  match = pattern.match(http_path)
@@ -463,6 +464,7 @@ class SeaDatabricksClient(DatabricksClient):
463
464
  async_op: bool,
464
465
  enforce_embedded_schema_correctness: bool,
465
466
  row_limit: Optional[int] = None,
467
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
466
468
  ) -> Union[SeaResultSet, None]:
467
469
  """
468
470
  Execute a SQL command using the SEA backend.
@@ -529,6 +531,7 @@ class SeaDatabricksClient(DatabricksClient):
529
531
  row_limit=row_limit,
530
532
  parameters=sea_parameters if sea_parameters else None,
531
533
  result_compression=result_compression,
534
+ query_tags=query_tags,
532
535
  )
533
536
 
534
537
  response_data = self._http_client._make_request(
@@ -31,6 +31,7 @@ class ExecuteStatementRequest:
31
31
  wait_timeout: str = "10s"
32
32
  on_wait_timeout: str = "CONTINUE"
33
33
  row_limit: Optional[int] = None
34
+ query_tags: Optional[Dict[str, Optional[str]]] = None
34
35
 
35
36
  def to_dict(self) -> Dict[str, Any]:
36
37
  """Convert the request to a dictionary for JSON serialization."""
@@ -60,6 +61,13 @@ class ExecuteStatementRequest:
60
61
  for param in self.parameters
61
62
  ]
62
63
 
64
+ # SEA API expects query_tags as an array of {key, value} objects.
65
+ # None/empty values are left to the server to handle as key-only tags.
66
+ if self.query_tags:
67
+ result["query_tags"] = [
68
+ {"key": k, "value": v} for k, v in self.query_tags.items()
69
+ ]
70
+
63
71
  return result
64
72
 
65
73
 
@@ -5,7 +5,7 @@ import logging
5
5
  import math
6
6
  import time
7
7
  import threading
8
- from typing import List, Optional, Union, Any, TYPE_CHECKING
8
+ from typing import Dict, List, Optional, Union, Any, TYPE_CHECKING
9
9
  from uuid import UUID
10
10
 
11
11
  from databricks.sql.common.unified_http_client import UnifiedHttpClient
@@ -53,6 +53,7 @@ from databricks.sql.utils import (
53
53
  convert_arrow_based_set_to_arrow_table,
54
54
  convert_decimals_in_arrow_table,
55
55
  convert_column_based_set_to_arrow_table,
56
+ serialize_query_tags,
56
57
  )
57
58
  from databricks.sql.types import SSLOptions
58
59
  from databricks.sql.backend.databricks_client import DatabricksClient
@@ -1003,6 +1004,7 @@ class ThriftDatabricksClient(DatabricksClient):
1003
1004
  async_op=False,
1004
1005
  enforce_embedded_schema_correctness=False,
1005
1006
  row_limit: Optional[int] = None,
1007
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
1006
1008
  ) -> Union["ResultSet", None]:
1007
1009
  thrift_handle = session_id.to_thrift_handle()
1008
1010
  if not thrift_handle:
@@ -1022,6 +1024,19 @@ class ThriftDatabricksClient(DatabricksClient):
1022
1024
  # DBR should be changed to use month_day_nano_interval
1023
1025
  intervalTypesAsArrow=False,
1024
1026
  )
1027
+
1028
+ # Build confOverlay with default configs and query_tags
1029
+ merged_conf_overlay = {
1030
+ # We want to receive proper Timestamp arrow types.
1031
+ "spark.thriftserver.arrowBasedRowSet.timestampAsString": "false"
1032
+ }
1033
+
1034
+ # Serialize and add query_tags to confOverlay if provided
1035
+ if query_tags:
1036
+ serialized_tags = serialize_query_tags(query_tags)
1037
+ if serialized_tags:
1038
+ merged_conf_overlay["query_tags"] = serialized_tags
1039
+
1025
1040
  req = ttypes.TExecuteStatementReq(
1026
1041
  sessionHandle=thrift_handle,
1027
1042
  statement=operation,
@@ -1036,10 +1051,7 @@ class ThriftDatabricksClient(DatabricksClient):
1036
1051
  canReadArrowResult=True if pyarrow else False,
1037
1052
  canDecompressLZ4Result=lz4_compression,
1038
1053
  canDownloadResult=use_cloud_fetch,
1039
- confOverlay={
1040
- # We want to receive proper Timestamp arrow types.
1041
- "spark.thriftserver.arrowBasedRowSet.timestampAsString": "false"
1042
- },
1054
+ confOverlay=merged_conf_overlay,
1043
1055
  useArrowNativeTypes=spark_arrow_types,
1044
1056
  parameters=parameters,
1045
1057
  enforceEmbeddedSchemaCorrectness=enforce_embedded_schema_correctness,
@@ -35,6 +35,8 @@ from databricks.sql.utils import (
35
35
  ColumnTable,
36
36
  ColumnQueue,
37
37
  build_client_context,
38
+ get_session_config_value,
39
+ serialize_query_tags,
38
40
  )
39
41
  from databricks.sql.parameters.native import (
40
42
  DbsqlParameterBase,
@@ -105,6 +107,7 @@ class Connection:
105
107
  schema: Optional[str] = None,
106
108
  _use_arrow_native_complex_types: Optional[bool] = True,
107
109
  ignore_transactions: bool = True,
110
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
108
111
  **kwargs,
109
112
  ) -> None:
110
113
  """
@@ -280,6 +283,15 @@ class Connection:
280
283
  "spark.sql.thriftserver.metadata.metricview.enabled"
281
284
  ] = "true"
282
285
 
286
+ if query_tags is not None:
287
+ if session_configuration is None:
288
+ session_configuration = {}
289
+ serialized = serialize_query_tags(query_tags)
290
+ if serialized:
291
+ session_configuration["QUERY_TAGS"] = serialized
292
+ else:
293
+ session_configuration.pop("QUERY_TAGS", None)
294
+
283
295
  self.disable_pandas = kwargs.get("_disable_pandas", False)
284
296
  self.lz4_compression = kwargs.get("enable_query_result_lz4_compression", True)
285
297
  self.use_cloud_fetch = kwargs.get("use_cloud_fetch", True)
@@ -305,6 +317,8 @@ class Connection:
305
317
  )
306
318
  self.session.open()
307
319
  except Exception as e:
320
+ # Respect user's telemetry preference even during connection failure
321
+ enable_telemetry = kwargs.get("enable_telemetry", True)
308
322
  TelemetryClientFactory.connection_failure_log(
309
323
  error_name="Exception",
310
324
  error_message=str(e),
@@ -315,6 +329,7 @@ class Connection:
315
329
  user_agent=self.session.useragent_header
316
330
  if hasattr(self, "session")
317
331
  else None,
332
+ enable_telemetry=enable_telemetry,
318
333
  )
319
334
  raise e
320
335
 
@@ -338,6 +353,7 @@ class Connection:
338
353
  host_url=self.session.host,
339
354
  batch_size=self.telemetry_batch_size,
340
355
  client_context=client_context,
356
+ extra_headers=self.session.get_spog_headers(),
341
357
  )
342
358
 
343
359
  self._telemetry_client = TelemetryClientFactory.get_telemetry_client(
@@ -386,6 +402,7 @@ class Connection:
386
402
  support_many_parameters=True, # Native parameters supported
387
403
  enable_complex_datatype_support=_use_arrow_native_complex_types,
388
404
  allowed_volume_ingestion_paths=self.staging_allowed_local_path,
405
+ query_tags=get_session_config_value(session_configuration, "query_tags"),
389
406
  )
390
407
 
391
408
  self._telemetry_client.export_initial_telemetry_log(
@@ -1258,6 +1275,7 @@ class Cursor:
1258
1275
  parameters: Optional[TParameterCollection] = None,
1259
1276
  enforce_embedded_schema_correctness=False,
1260
1277
  input_stream: Optional[BinaryIO] = None,
1278
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
1261
1279
  ) -> "Cursor":
1262
1280
  """
1263
1281
  Execute a query and wait for execution to complete.
@@ -1288,6 +1306,10 @@ class Cursor:
1288
1306
  Both will result in the query equivalent to "SELECT * FROM table WHERE field = 'foo'
1289
1307
  being sent to the server
1290
1308
 
1309
+ :param query_tags: Optional dictionary of query tags to apply for this query only.
1310
+ Tags are key-value pairs that can be used to identify and categorize queries.
1311
+ Example: {"team": "data-eng", "application": "etl"}
1312
+
1291
1313
  :returns self
1292
1314
  """
1293
1315
 
@@ -1328,6 +1350,7 @@ class Cursor:
1328
1350
  async_op=False,
1329
1351
  enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
1330
1352
  row_limit=self.row_limit,
1353
+ query_tags=query_tags,
1331
1354
  )
1332
1355
 
1333
1356
  if self.active_result_set and self.active_result_set.is_staging_operation:
@@ -1344,6 +1367,7 @@ class Cursor:
1344
1367
  operation: str,
1345
1368
  parameters: Optional[TParameterCollection] = None,
1346
1369
  enforce_embedded_schema_correctness=False,
1370
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
1347
1371
  ) -> "Cursor":
1348
1372
  """
1349
1373
 
@@ -1351,6 +1375,9 @@ class Cursor:
1351
1375
 
1352
1376
  :param operation:
1353
1377
  :param parameters:
1378
+ :param query_tags: Optional dictionary of query tags to apply for this query only.
1379
+ Tags are key-value pairs that can be used to identify and categorize queries.
1380
+ Example: {"team": "data-eng", "application": "etl"}
1354
1381
  :return:
1355
1382
  """
1356
1383
 
@@ -1387,6 +1414,7 @@ class Cursor:
1387
1414
  async_op=True,
1388
1415
  enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
1389
1416
  row_limit=self.row_limit,
1417
+ query_tags=query_tags,
1390
1418
  )
1391
1419
 
1392
1420
  return self
@@ -1443,7 +1471,12 @@ class Cursor:
1443
1471
  session_id_hex=self.connection.get_session_id_hex(),
1444
1472
  )
1445
1473
 
1446
- def executemany(self, operation, seq_of_parameters):
1474
+ def executemany(
1475
+ self,
1476
+ operation,
1477
+ seq_of_parameters,
1478
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
1479
+ ):
1447
1480
  """
1448
1481
  Execute the operation once for every set of passed in parameters.
1449
1482
 
@@ -1452,10 +1485,14 @@ class Cursor:
1452
1485
 
1453
1486
  Only the final result set is retained.
1454
1487
 
1488
+ :param query_tags: Optional dictionary of query tags to apply for all queries in this batch.
1489
+ Tags are key-value pairs that can be used to identify and categorize queries.
1490
+ Example: {"team": "data-eng", "application": "etl"}
1491
+
1455
1492
  :returns self
1456
1493
  """
1457
1494
  for parameters in seq_of_parameters:
1458
- self.execute(operation, parameters)
1495
+ self.execute(operation, parameters, query_tags=query_tags)
1459
1496
  return self
1460
1497
 
1461
1498
  @log_latency(StatementType.METADATA)
@@ -0,0 +1,52 @@
1
+ """
2
+ Detects whether the Python SQL connector is being invoked by an AI coding agent
3
+ by checking for well-known environment variables that agents set in their spawned
4
+ shell processes.
5
+
6
+ Detection only succeeds when exactly one agent environment variable is present,
7
+ to avoid ambiguous attribution when multiple agent environments overlap.
8
+
9
+ Adding a new agent requires only a new entry in KNOWN_AGENTS.
10
+
11
+ References for each environment variable:
12
+ - ANTIGRAVITY_AGENT: Closed source. Google Antigravity sets this variable.
13
+ - CLAUDECODE: https://github.com/anthropics/claude-code (sets CLAUDECODE=1)
14
+ - CLINE_ACTIVE: https://github.com/cline/cline (shipped in v3.24.0)
15
+ - CODEX_CI: https://github.com/openai/codex (part of UNIFIED_EXEC_ENV array in codex-rs)
16
+ - CURSOR_AGENT: Closed source. Referenced in a gist by johnlindquist.
17
+ - GEMINI_CLI: https://google-gemini.github.io/gemini-cli/docs/tools/shell.html (sets GEMINI_CLI=1)
18
+ - OPENCODE: https://github.com/opencode-ai/opencode (sets OPENCODE=1)
19
+ """
20
+
21
+ import os
22
+
23
+ KNOWN_AGENTS = [
24
+ ("ANTIGRAVITY_AGENT", "antigravity"),
25
+ ("CLAUDECODE", "claude-code"),
26
+ ("CLINE_ACTIVE", "cline"),
27
+ ("CODEX_CI", "codex"),
28
+ ("CURSOR_AGENT", "cursor"),
29
+ ("GEMINI_CLI", "gemini-cli"),
30
+ ("OPENCODE", "opencode"),
31
+ ]
32
+
33
+
34
+ def detect(env=None):
35
+ """Detect which AI coding agent (if any) is driving the current process.
36
+
37
+ Args:
38
+ env: Optional dict-like object for environment variable lookup.
39
+ Defaults to os.environ. Exists for testability.
40
+
41
+ Returns:
42
+ The agent product string if exactly one agent is detected,
43
+ or an empty string otherwise.
44
+ """
45
+ if env is None:
46
+ env = os.environ
47
+
48
+ detected = [product for var, product in KNOWN_AGENTS if env.get(var)]
49
+
50
+ if len(detected) == 1:
51
+ return detected[0]
52
+ return ""
@@ -113,6 +113,7 @@ class FeatureFlagsContext:
113
113
  # Authenticate the request
114
114
  self._connection.session.auth_provider.add_headers(headers)
115
115
  headers["User-Agent"] = self._connection.session.useragent_header
116
+ headers.update(self._connection.session.get_spog_headers())
116
117
 
117
118
  response = self._http_client.request(
118
119
  HttpMethod.GET, self._feature_flag_endpoint, headers=headers, timeout=30
@@ -217,7 +217,7 @@ class UnifiedHttpClient:
217
217
  logger.debug("Error checking proxy bypass for host %s: %s", target_host, e)
218
218
  return True
219
219
 
220
- def _get_pool_manager_for_url(self, url: str) -> urllib3.PoolManager:
220
+ def _get_pool_manager_for_url(self, url: str) -> Optional[urllib3.PoolManager]:
221
221
  """
222
222
  Get the appropriate pool manager for the given URL.
223
223
 
@@ -225,7 +225,7 @@ class UnifiedHttpClient:
225
225
  url: The target URL
226
226
 
227
227
  Returns:
228
- PoolManager instance (either direct or proxy)
228
+ PoolManager instance (either direct or proxy), or None if client is closed
229
229
  """
230
230
  parsed_url = urllib.parse.urlparse(url)
231
231
  target_host = parsed_url.hostname
@@ -291,6 +291,14 @@ class UnifiedHttpClient:
291
291
  # Select appropriate pool manager based on target URL
292
292
  pool_manager = self._get_pool_manager_for_url(url)
293
293
 
294
+ # DEFENSIVE: Check if pool_manager is None (client closing/closed)
295
+ # This prevents AttributeError race condition when telemetry cleanup happens
296
+ if pool_manager is None:
297
+ logger.debug(
298
+ "HTTP client closing or closed, cannot make request to %s", url
299
+ )
300
+ raise RequestError("HTTP client is closing or has been closed")
301
+
294
302
  response = None
295
303
 
296
304
  try:
@@ -659,7 +659,7 @@ def dbsql_parameter_from_primitive(
659
659
  elif isinstance(value, str):
660
660
  return StringParameter(value=value, name=name)
661
661
  elif isinstance(value, float):
662
- return FloatParameter(value=value, name=name)
662
+ return DoubleParameter(value=value, name=name)
663
663
  elif isinstance(value, datetime.datetime):
664
664
  return TimestampParameter(value=value, name=name)
665
665
  elif isinstance(value, datetime.date):
@@ -13,6 +13,7 @@ from databricks.sql.backend.sea.backend import SeaDatabricksClient
13
13
  from databricks.sql.backend.databricks_client import DatabricksClient
14
14
  from databricks.sql.backend.types import SessionId, BackendType
15
15
  from databricks.sql.common.unified_http_client import UnifiedHttpClient
16
+ from databricks.sql.common.agent import detect as detect_agent
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
@@ -64,9 +65,21 @@ class Session:
64
65
  else:
65
66
  self.useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__)
66
67
 
68
+ agent_product = detect_agent()
69
+ if agent_product:
70
+ self.useragent_header += " agent/{}".format(agent_product)
71
+
67
72
  base_headers = [("User-Agent", self.useragent_header)]
68
73
  all_headers = (http_headers or []) + base_headers
69
74
 
75
+ # Extract ?o=<workspaceId> from http_path for SPOG routing.
76
+ # On SPOG hosts, the httpPath contains ?o=<workspaceId> which routes Thrift
77
+ # requests via the URL. For SEA, telemetry, and feature flags (which use
78
+ # separate endpoints), we inject x-databricks-org-id as an HTTP header.
79
+ self._spog_headers = self._extract_spog_headers(http_path, all_headers)
80
+ if self._spog_headers:
81
+ all_headers = all_headers + list(self._spog_headers.items())
82
+
70
83
  self.ssl_options = SSLOptions(
71
84
  # Double negation is generally a bad thing, but we have to keep backward compatibility
72
85
  tls_verify=not kwargs.get(
@@ -131,6 +144,44 @@ class Session:
131
144
  }
132
145
  return databricks_client_class(**common_args)
133
146
 
147
+ @staticmethod
148
+ def _extract_spog_headers(http_path, existing_headers):
149
+ """Extract ?o=<workspaceId> from http_path and return as a header dict for SPOG routing."""
150
+ if not http_path or "?" not in http_path:
151
+ return {}
152
+
153
+ from urllib.parse import parse_qs
154
+
155
+ query_string = http_path.split("?", 1)[1]
156
+ params = parse_qs(query_string)
157
+ org_id = params.get("o", [None])[0]
158
+ if not org_id:
159
+ logger.debug(
160
+ "SPOG header extraction: http_path has query string but no ?o= param, "
161
+ "skipping x-databricks-org-id injection"
162
+ )
163
+ return {}
164
+
165
+ # Don't override if explicitly set
166
+ if any(k == "x-databricks-org-id" for k, _ in existing_headers):
167
+ logger.debug(
168
+ "SPOG header extraction: x-databricks-org-id already set by caller, "
169
+ "not overriding with ?o=%s from http_path",
170
+ org_id,
171
+ )
172
+ return {}
173
+
174
+ logger.debug(
175
+ "SPOG header extraction: injecting x-databricks-org-id=%s "
176
+ "(extracted from ?o= in http_path)",
177
+ org_id,
178
+ )
179
+ return {"x-databricks-org-id": org_id}
180
+
181
+ def get_spog_headers(self):
182
+ """Returns SPOG routing headers (x-databricks-org-id) if ?o= was in http_path."""
183
+ return dict(self._spog_headers)
184
+
134
185
  def open(self):
135
186
  self._session_id = self.backend.open_session(
136
187
  session_configuration=self.session_configuration,
@@ -57,6 +57,7 @@ class DriverConnectionParameters(JsonSerializableMixin):
57
57
  support_many_parameters (bool): Whether many parameters are supported
58
58
  enable_complex_datatype_support (bool): Whether complex datatypes are supported
59
59
  allowed_volume_ingestion_paths (str): Allowed paths for volume ingestion
60
+ query_tags (str): Query tags for tracking and attribution
60
61
  """
61
62
 
62
63
  http_path: str
@@ -84,6 +85,7 @@ class DriverConnectionParameters(JsonSerializableMixin):
84
85
  support_many_parameters: Optional[bool] = None
85
86
  enable_complex_datatype_support: Optional[bool] = None
86
87
  allowed_volume_ingestion_paths: Optional[str] = None
88
+ query_tags: Optional[str] = None
87
89
 
88
90
 
89
91
  @dataclass
@@ -42,6 +42,7 @@ from databricks.sql.telemetry.utils import BaseTelemetryClient
42
42
  from databricks.sql.common.feature_flag import FeatureFlagsContextFactory
43
43
  from databricks.sql.common.unified_http_client import UnifiedHttpClient
44
44
  from databricks.sql.common.http import HttpMethod
45
+ from databricks.sql.exc import RequestError
45
46
  from databricks.sql.telemetry.telemetry_push_client import (
46
47
  ITelemetryPushClient,
47
48
  TelemetryPushClient,
@@ -187,6 +188,7 @@ class TelemetryClient(BaseTelemetryClient):
187
188
  executor,
188
189
  batch_size: int,
189
190
  client_context,
191
+ extra_headers: Optional[Dict[str, str]] = None,
190
192
  ) -> None:
191
193
  logger.debug("Initializing TelemetryClient for connection: %s", session_id_hex)
192
194
  self._telemetry_enabled = telemetry_enabled
@@ -194,6 +196,7 @@ class TelemetryClient(BaseTelemetryClient):
194
196
  self._session_id_hex = session_id_hex
195
197
  self._auth_provider = auth_provider
196
198
  self._user_agent = None
199
+ self._extra_headers = extra_headers or {}
197
200
 
198
201
  # OPTIMIZATION: Use lock-free Queue instead of list + lock
199
202
  # Queue is thread-safe internally and has better performance under concurrency
@@ -286,6 +289,8 @@ class TelemetryClient(BaseTelemetryClient):
286
289
  if self._auth_provider:
287
290
  self._auth_provider.add_headers(headers)
288
291
 
292
+ headers.update(self._extra_headers)
293
+
289
294
  try:
290
295
  logger.debug("Submitting telemetry request to thread pool")
291
296
 
@@ -417,10 +422,38 @@ class TelemetryClient(BaseTelemetryClient):
417
422
  )
418
423
 
419
424
  def close(self):
420
- """Flush remaining events before closing"""
425
+ """Flush remaining events before closing
426
+
427
+ IMPORTANT: This method does NOT close self._http_client.
428
+
429
+ Rationale:
430
+ - _flush() submits async work to the executor that uses _http_client
431
+ - If we closed _http_client here, async callbacks would fail with AttributeError
432
+ - Instead, we let _http_client live as long as needed:
433
+ * Pending futures hold references to self (via bound methods)
434
+ * This keeps self alive, which keeps self._http_client alive
435
+ * When all futures complete, Python GC will clean up naturally
436
+ - The __del__ method ensures eventual cleanup during garbage collection
437
+
438
+ This design prevents race conditions while keeping telemetry truly async.
439
+ """
421
440
  logger.debug("Closing TelemetryClient for connection %s", self._session_id_hex)
422
441
  self._flush()
423
442
 
443
+ def __del__(self):
444
+ """Cleanup when TelemetryClient is garbage collected
445
+
446
+ This ensures _http_client is eventually closed when the TelemetryClient
447
+ object is destroyed. By this point, all async work should be complete
448
+ (since the futures held references keeping us alive), so it's safe to
449
+ close the http client.
450
+ """
451
+ try:
452
+ if hasattr(self, "_http_client") and self._http_client:
453
+ self._http_client.close()
454
+ except Exception:
455
+ pass
456
+
424
457
 
425
458
  class _TelemetryClientHolder:
426
459
  """
@@ -558,6 +591,7 @@ class TelemetryClientFactory:
558
591
  host_url,
559
592
  batch_size,
560
593
  client_context,
594
+ extra_headers=None,
561
595
  ):
562
596
  """
563
597
  Initialize a telemetry client for a specific connection if telemetry is enabled.
@@ -598,6 +632,7 @@ class TelemetryClientFactory:
598
632
  executor=TelemetryClientFactory._executor,
599
633
  batch_size=batch_size,
600
634
  client_context=client_context,
635
+ extra_headers=extra_headers,
601
636
  )
602
637
  TelemetryClientFactory._clients[
603
638
  host_url
@@ -674,7 +709,8 @@ class TelemetryClientFactory:
674
709
  )
675
710
  try:
676
711
  TelemetryClientFactory._stop_flush_thread()
677
- TelemetryClientFactory._executor.shutdown(wait=True)
712
+ # Use wait=False to allow process to exit immediately
713
+ TelemetryClientFactory._executor.shutdown(wait=False)
678
714
  except Exception as e:
679
715
  logger.debug("Failed to shutdown thread pool executor: %s", e)
680
716
  TelemetryClientFactory._executor = None
@@ -689,9 +725,15 @@ class TelemetryClientFactory:
689
725
  port: int,
690
726
  client_context,
691
727
  user_agent: Optional[str] = None,
728
+ enable_telemetry: bool = True,
692
729
  ):
693
730
  """Send error telemetry when connection creation fails, using provided client context"""
694
731
 
732
+ # Respect user's telemetry preference - don't force-enable
733
+ if not enable_telemetry:
734
+ logger.debug("Telemetry disabled, skipping connection failure log")
735
+ return
736
+
695
737
  UNAUTH_DUMMY_SESSION_ID = "unauth_session_id"
696
738
 
697
739
  TelemetryClientFactory.initialize_telemetry_client(
@@ -38,6 +38,21 @@ DEFAULT_ERROR_CONTEXT = "Unknown error"
38
38
  logger = logging.getLogger(__name__)
39
39
 
40
40
 
41
+ def get_session_config_value(
42
+ session_configuration: Optional[Dict[str, Any]], key: str
43
+ ) -> Optional[str]:
44
+ """Get a session configuration value with case-insensitive key matching"""
45
+ if not session_configuration:
46
+ return None
47
+
48
+ key_upper = key.upper()
49
+ for k, v in session_configuration.items():
50
+ if k.upper() == key_upper:
51
+ return str(v) if v is not None else None
52
+
53
+ return None
54
+
55
+
41
56
  class ResultSetQueue(ABC):
42
57
  @abstractmethod
43
58
  def next_n_rows(self, num_rows: int):
@@ -880,7 +895,50 @@ def concat_table_chunks(
880
895
  result_table[j].extend(table_chunks[i].column_table[j])
881
896
  return ColumnTable(result_table, table_chunks[0].column_names)
882
897
  else:
883
- return pyarrow.concat_tables(table_chunks)
898
+ return pyarrow.concat_tables(table_chunks, promote_options="default")
899
+
900
+
901
+ def serialize_query_tags(
902
+ query_tags: Optional[Dict[str, Optional[str]]]
903
+ ) -> Optional[str]:
904
+ """
905
+ Serialize query_tags dictionary to a string format.
906
+
907
+ Format: "key1:value1,key2:value2"
908
+ Special cases:
909
+ - If value is None, omit the colon and value (e.g., "key1:value1,key2,key3:value3")
910
+ - Escape special characters (:, ,, \\) in values with a leading backslash
911
+ - Backslashes in keys are escaped; other special characters in keys are not escaped
912
+
913
+ Args:
914
+ query_tags: Dictionary of query tags where keys are strings and values are optional strings
915
+
916
+ Returns:
917
+ Serialized string or None if query_tags is None or empty
918
+ """
919
+ if not query_tags:
920
+ return None
921
+
922
+ def escape_value(value: str) -> str:
923
+ """Escape special characters in tag values."""
924
+ # Escape backslash first to avoid double-escaping
925
+ value = value.replace("\\", r"\\")
926
+ # Escape colon and comma
927
+ value = value.replace(":", r"\:")
928
+ value = value.replace(",", r"\,")
929
+ return value
930
+
931
+ serialized_parts = []
932
+ for key, value in query_tags.items():
933
+ escaped_key = key.replace("\\", r"\\")
934
+ if value is None:
935
+ # No colon or value when value is None
936
+ serialized_parts.append(escaped_key)
937
+ else:
938
+ escaped_value = escape_value(value)
939
+ serialized_parts.append(f"{escaped_key}:{escaped_value}")
940
+
941
+ return ",".join(serialized_parts)
884
942
 
885
943
 
886
944
  def build_client_context(server_hostname: str, version: str, **kwargs):
@@ -899,12 +957,18 @@ def build_client_context(server_hostname: str, version: str, **kwargs):
899
957
  )
900
958
 
901
959
  # Build user agent
960
+ from databricks.sql.common.agent import detect as detect_agent
961
+
902
962
  user_agent_entry = kwargs.get("user_agent_entry", "")
903
963
  if user_agent_entry:
904
964
  user_agent = f"PyDatabricksSqlConnector/{version} ({user_agent_entry})"
905
965
  else:
906
966
  user_agent = f"PyDatabricksSqlConnector/{version}"
907
967
 
968
+ agent_product = detect_agent()
969
+ if agent_product:
970
+ user_agent += f" agent/{agent_product}"
971
+
908
972
  # Explicitly construct ClientContext with proper types
909
973
  return ClientContext(
910
974
  hostname=server_hostname,