databricks-sql-connector 4.2.5__tar.gz → 4.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/CHANGELOG.md +11 -0
  2. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/PKG-INFO +2 -2
  3. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/pyproject.toml +2 -2
  4. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/__init__.py +1 -1
  5. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/databricks_client.py +2 -0
  6. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/backend.py +5 -2
  7. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/models/requests.py +8 -0
  8. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/thrift_backend.py +17 -5
  9. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/client.py +34 -2
  10. databricks_sql_connector-4.2.6/src/databricks/sql/common/agent.py +52 -0
  11. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/feature_flag.py +1 -0
  12. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/parameters/native.py +1 -1
  13. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/session.py +51 -0
  14. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/telemetry_client.py +6 -0
  15. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/utils.py +50 -1
  16. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/LICENSE +0 -0
  17. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/README.md +0 -0
  18. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/__init__.py +0 -0
  19. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/__init__.py +0 -0
  20. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/auth.py +0 -0
  21. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/auth_utils.py +0 -0
  22. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/authenticators.py +0 -0
  23. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/common.py +0 -0
  24. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/endpoint.py +0 -0
  25. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/oauth.py +0 -0
  26. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  27. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/retry.py +0 -0
  28. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  29. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/auth/token_federation.py +0 -0
  30. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/models/__init__.py +0 -0
  31. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/models/base.py +0 -0
  32. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/models/responses.py +0 -0
  33. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/queue.py +0 -0
  34. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/result_set.py +0 -0
  35. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/constants.py +0 -0
  36. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/conversion.py +0 -0
  37. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/filters.py +0 -0
  38. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/http_client.py +0 -0
  39. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/sea/utils/normalize.py +0 -0
  40. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/types.py +0 -0
  41. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/utils/__init__.py +0 -0
  42. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/backend/utils/guid_utils.py +0 -0
  43. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  44. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/cloudfetch/downloader.py +0 -0
  45. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/http.py +0 -0
  46. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/http_utils.py +0 -0
  47. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/unified_http_client.py +0 -0
  48. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/common/url_utils.py +0 -0
  49. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/exc.py +0 -0
  50. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/experimental/__init__.py +0 -0
  51. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  52. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/parameters/__init__.py +0 -0
  53. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/parameters/py.typed +0 -0
  54. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/py.typed +0 -0
  55. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/result_set.py +0 -0
  56. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/circuit_breaker_manager.py +0 -0
  57. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/latency_logger.py +0 -0
  58. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/models/endpoint_models.py +0 -0
  59. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/models/enums.py +0 -0
  60. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/models/event.py +0 -0
  61. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/models/frontend_logs.py +0 -0
  62. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/telemetry_push_client.py +0 -0
  63. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/telemetry/utils.py +0 -0
  64. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  65. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  66. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  67. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  68. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  69. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/thrift_api/__init__.py +0 -0
  70. {databricks_sql_connector-4.2.5 → databricks_sql_connector-4.2.6}/src/databricks/sql/types.py +0 -0
@@ -1,5 +1,16 @@
1
1
  # Release History
2
2
 
3
+ # 4.2.6 (2026-04-22)
4
+ - Add SPOG routing support for account-level vanity URLs (databricks/databricks-sql-python#767 by @msrathore-db)
5
+ - Fix dependency_manager: handle PEP 440 ~= compatible release syntax (databricks/databricks-sql-python#776 by @vikrantpuppala)
6
+ - Bump thrift to fix deprecation warning (databricks/databricks-sql-python#733 by @Korijn)
7
+ - Add AI coding agent detection to User-Agent header (databricks/databricks-sql-python#740 by @vikrantpuppala)
8
+ - Add statement-level query_tags support for SEA backend (databricks/databricks-sql-python#754 by @sreekanth-db)
9
+ - Update PyArrow concatenation of tables to use promote_options as default (databricks/databricks-sql-python#751 by @jprakash-db)
10
+ - Fix float inference to use DoubleParameter (64-bit) instead of FloatParameter (databricks/databricks-sql-python#742 by @Shubhambhusate)
11
+ - Allow specifying query_tags as a dict upon connection creation (databricks/databricks-sql-python#749 by @jiabin-hu)
12
+ - Add query_tags parameter support for execute methods (databricks/databricks-sql-python#736 by @jiabin-hu)
13
+
3
14
  # 4.2.5 (2026-02-09)
4
15
  - Fix feature-flag endpoint retries in gov region (databricks/databricks-sql-python#735 by @samikshya-db)
5
16
  - Improve telemetry lifecycle management (databricks/databricks-sql-python#734 by @msrathore-db)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: databricks-sql-connector
3
- Version: 4.2.5
3
+ Version: 4.2.6
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -30,7 +30,7 @@ Requires-Dist: pybreaker (>=1.0.0,<2.0.0)
30
30
  Requires-Dist: pyjwt (>=2.0.0,<3.0.0)
31
31
  Requires-Dist: python-dateutil (>=2.8.0,<3.0.0)
32
32
  Requires-Dist: requests (>=2.18.1,<3.0.0)
33
- Requires-Dist: thrift (>=0.16.0,<0.21.0)
33
+ Requires-Dist: thrift (>=0.22.0,<0.23.0)
34
34
  Requires-Dist: urllib3 (>=1.26)
35
35
  Project-URL: Bug Tracker, https://github.com/databricks/databricks-sql-python/issues
36
36
  Project-URL: Homepage, https://github.com/databricks/databricks-sql-python
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "4.2.5"
3
+ version = "4.2.6"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -10,7 +10,7 @@ include = ["CHANGELOG.md"]
10
10
 
11
11
  [tool.poetry.dependencies]
12
12
  python = "^3.8.0"
13
- thrift = ">=0.16.0,<0.21.0"
13
+ thrift = "~=0.22.0"
14
14
  pandas = [
15
15
  { version = ">=1.2.5,<2.4.0", python = ">=3.8,<3.13" },
16
16
  { version = ">=2.2.3,<2.4.0", python = ">=3.13" }
@@ -71,7 +71,7 @@ DATETIME = DBAPITypeObject("timestamp")
71
71
  DATE = DBAPITypeObject("date")
72
72
  ROWID = DBAPITypeObject()
73
73
 
74
- __version__ = "4.2.5"
74
+ __version__ = "4.2.6"
75
75
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
76
76
 
77
77
  # These two functions are pyhive legacy
@@ -83,6 +83,7 @@ class DatabricksClient(ABC):
83
83
  async_op: bool,
84
84
  enforce_embedded_schema_correctness: bool,
85
85
  row_limit: Optional[int] = None,
86
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
86
87
  ) -> Union[ResultSet, None]:
87
88
  """
88
89
  Executes a SQL command or query within the specified session.
@@ -102,6 +103,7 @@ class DatabricksClient(ABC):
102
103
  async_op: Whether to execute the command asynchronously
103
104
  enforce_embedded_schema_correctness: Whether to enforce schema correctness
104
105
  row_limit: Maximum number of rows in the response.
106
+ query_tags: Optional dictionary of query tags to apply for this query only.
105
107
 
106
108
  Returns:
107
109
  If async_op is False, returns a ResultSet object containing the
@@ -188,8 +188,9 @@ class SeaDatabricksClient(DatabricksClient):
188
188
  ValueError: If the warehouse ID cannot be extracted from the path
189
189
  """
190
190
 
191
- warehouse_pattern = re.compile(r".*/warehouses/(.+)")
192
- endpoint_pattern = re.compile(r".*/endpoints/(.+)")
191
+ # [^?&]+ stops at query params (e.g. ?o= for SPOG routing)
192
+ warehouse_pattern = re.compile(r".*/warehouses/([^?&]+)")
193
+ endpoint_pattern = re.compile(r".*/endpoints/([^?&]+)")
193
194
 
194
195
  for pattern in [warehouse_pattern, endpoint_pattern]:
195
196
  match = pattern.match(http_path)
@@ -463,6 +464,7 @@ class SeaDatabricksClient(DatabricksClient):
463
464
  async_op: bool,
464
465
  enforce_embedded_schema_correctness: bool,
465
466
  row_limit: Optional[int] = None,
467
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
466
468
  ) -> Union[SeaResultSet, None]:
467
469
  """
468
470
  Execute a SQL command using the SEA backend.
@@ -529,6 +531,7 @@ class SeaDatabricksClient(DatabricksClient):
529
531
  row_limit=row_limit,
530
532
  parameters=sea_parameters if sea_parameters else None,
531
533
  result_compression=result_compression,
534
+ query_tags=query_tags,
532
535
  )
533
536
 
534
537
  response_data = self._http_client._make_request(
@@ -31,6 +31,7 @@ class ExecuteStatementRequest:
31
31
  wait_timeout: str = "10s"
32
32
  on_wait_timeout: str = "CONTINUE"
33
33
  row_limit: Optional[int] = None
34
+ query_tags: Optional[Dict[str, Optional[str]]] = None
34
35
 
35
36
  def to_dict(self) -> Dict[str, Any]:
36
37
  """Convert the request to a dictionary for JSON serialization."""
@@ -60,6 +61,13 @@ class ExecuteStatementRequest:
60
61
  for param in self.parameters
61
62
  ]
62
63
 
64
+ # SEA API expects query_tags as an array of {key, value} objects.
65
+ # None/empty values are left to the server to handle as key-only tags.
66
+ if self.query_tags:
67
+ result["query_tags"] = [
68
+ {"key": k, "value": v} for k, v in self.query_tags.items()
69
+ ]
70
+
63
71
  return result
64
72
 
65
73
 
@@ -5,7 +5,7 @@ import logging
5
5
  import math
6
6
  import time
7
7
  import threading
8
- from typing import List, Optional, Union, Any, TYPE_CHECKING
8
+ from typing import Dict, List, Optional, Union, Any, TYPE_CHECKING
9
9
  from uuid import UUID
10
10
 
11
11
  from databricks.sql.common.unified_http_client import UnifiedHttpClient
@@ -53,6 +53,7 @@ from databricks.sql.utils import (
53
53
  convert_arrow_based_set_to_arrow_table,
54
54
  convert_decimals_in_arrow_table,
55
55
  convert_column_based_set_to_arrow_table,
56
+ serialize_query_tags,
56
57
  )
57
58
  from databricks.sql.types import SSLOptions
58
59
  from databricks.sql.backend.databricks_client import DatabricksClient
@@ -1003,6 +1004,7 @@ class ThriftDatabricksClient(DatabricksClient):
1003
1004
  async_op=False,
1004
1005
  enforce_embedded_schema_correctness=False,
1005
1006
  row_limit: Optional[int] = None,
1007
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
1006
1008
  ) -> Union["ResultSet", None]:
1007
1009
  thrift_handle = session_id.to_thrift_handle()
1008
1010
  if not thrift_handle:
@@ -1022,6 +1024,19 @@ class ThriftDatabricksClient(DatabricksClient):
1022
1024
  # DBR should be changed to use month_day_nano_interval
1023
1025
  intervalTypesAsArrow=False,
1024
1026
  )
1027
+
1028
+ # Build confOverlay with default configs and query_tags
1029
+ merged_conf_overlay = {
1030
+ # We want to receive proper Timestamp arrow types.
1031
+ "spark.thriftserver.arrowBasedRowSet.timestampAsString": "false"
1032
+ }
1033
+
1034
+ # Serialize and add query_tags to confOverlay if provided
1035
+ if query_tags:
1036
+ serialized_tags = serialize_query_tags(query_tags)
1037
+ if serialized_tags:
1038
+ merged_conf_overlay["query_tags"] = serialized_tags
1039
+
1025
1040
  req = ttypes.TExecuteStatementReq(
1026
1041
  sessionHandle=thrift_handle,
1027
1042
  statement=operation,
@@ -1036,10 +1051,7 @@ class ThriftDatabricksClient(DatabricksClient):
1036
1051
  canReadArrowResult=True if pyarrow else False,
1037
1052
  canDecompressLZ4Result=lz4_compression,
1038
1053
  canDownloadResult=use_cloud_fetch,
1039
- confOverlay={
1040
- # We want to receive proper Timestamp arrow types.
1041
- "spark.thriftserver.arrowBasedRowSet.timestampAsString": "false"
1042
- },
1054
+ confOverlay=merged_conf_overlay,
1043
1055
  useArrowNativeTypes=spark_arrow_types,
1044
1056
  parameters=parameters,
1045
1057
  enforceEmbeddedSchemaCorrectness=enforce_embedded_schema_correctness,
@@ -36,6 +36,7 @@ from databricks.sql.utils import (
36
36
  ColumnQueue,
37
37
  build_client_context,
38
38
  get_session_config_value,
39
+ serialize_query_tags,
39
40
  )
40
41
  from databricks.sql.parameters.native import (
41
42
  DbsqlParameterBase,
@@ -106,6 +107,7 @@ class Connection:
106
107
  schema: Optional[str] = None,
107
108
  _use_arrow_native_complex_types: Optional[bool] = True,
108
109
  ignore_transactions: bool = True,
110
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
109
111
  **kwargs,
110
112
  ) -> None:
111
113
  """
@@ -281,6 +283,15 @@ class Connection:
281
283
  "spark.sql.thriftserver.metadata.metricview.enabled"
282
284
  ] = "true"
283
285
 
286
+ if query_tags is not None:
287
+ if session_configuration is None:
288
+ session_configuration = {}
289
+ serialized = serialize_query_tags(query_tags)
290
+ if serialized:
291
+ session_configuration["QUERY_TAGS"] = serialized
292
+ else:
293
+ session_configuration.pop("QUERY_TAGS", None)
294
+
284
295
  self.disable_pandas = kwargs.get("_disable_pandas", False)
285
296
  self.lz4_compression = kwargs.get("enable_query_result_lz4_compression", True)
286
297
  self.use_cloud_fetch = kwargs.get("use_cloud_fetch", True)
@@ -342,6 +353,7 @@ class Connection:
342
353
  host_url=self.session.host,
343
354
  batch_size=self.telemetry_batch_size,
344
355
  client_context=client_context,
356
+ extra_headers=self.session.get_spog_headers(),
345
357
  )
346
358
 
347
359
  self._telemetry_client = TelemetryClientFactory.get_telemetry_client(
@@ -1263,6 +1275,7 @@ class Cursor:
1263
1275
  parameters: Optional[TParameterCollection] = None,
1264
1276
  enforce_embedded_schema_correctness=False,
1265
1277
  input_stream: Optional[BinaryIO] = None,
1278
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
1266
1279
  ) -> "Cursor":
1267
1280
  """
1268
1281
  Execute a query and wait for execution to complete.
@@ -1293,6 +1306,10 @@ class Cursor:
1293
1306
  Both will result in the query equivalent to "SELECT * FROM table WHERE field = 'foo'
1294
1307
  being sent to the server
1295
1308
 
1309
+ :param query_tags: Optional dictionary of query tags to apply for this query only.
1310
+ Tags are key-value pairs that can be used to identify and categorize queries.
1311
+ Example: {"team": "data-eng", "application": "etl"}
1312
+
1296
1313
  :returns self
1297
1314
  """
1298
1315
 
@@ -1333,6 +1350,7 @@ class Cursor:
1333
1350
  async_op=False,
1334
1351
  enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
1335
1352
  row_limit=self.row_limit,
1353
+ query_tags=query_tags,
1336
1354
  )
1337
1355
 
1338
1356
  if self.active_result_set and self.active_result_set.is_staging_operation:
@@ -1349,6 +1367,7 @@ class Cursor:
1349
1367
  operation: str,
1350
1368
  parameters: Optional[TParameterCollection] = None,
1351
1369
  enforce_embedded_schema_correctness=False,
1370
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
1352
1371
  ) -> "Cursor":
1353
1372
  """
1354
1373
 
@@ -1356,6 +1375,9 @@ class Cursor:
1356
1375
 
1357
1376
  :param operation:
1358
1377
  :param parameters:
1378
+ :param query_tags: Optional dictionary of query tags to apply for this query only.
1379
+ Tags are key-value pairs that can be used to identify and categorize queries.
1380
+ Example: {"team": "data-eng", "application": "etl"}
1359
1381
  :return:
1360
1382
  """
1361
1383
 
@@ -1392,6 +1414,7 @@ class Cursor:
1392
1414
  async_op=True,
1393
1415
  enforce_embedded_schema_correctness=enforce_embedded_schema_correctness,
1394
1416
  row_limit=self.row_limit,
1417
+ query_tags=query_tags,
1395
1418
  )
1396
1419
 
1397
1420
  return self
@@ -1448,7 +1471,12 @@ class Cursor:
1448
1471
  session_id_hex=self.connection.get_session_id_hex(),
1449
1472
  )
1450
1473
 
1451
- def executemany(self, operation, seq_of_parameters):
1474
+ def executemany(
1475
+ self,
1476
+ operation,
1477
+ seq_of_parameters,
1478
+ query_tags: Optional[Dict[str, Optional[str]]] = None,
1479
+ ):
1452
1480
  """
1453
1481
  Execute the operation once for every set of passed in parameters.
1454
1482
 
@@ -1457,10 +1485,14 @@ class Cursor:
1457
1485
 
1458
1486
  Only the final result set is retained.
1459
1487
 
1488
+ :param query_tags: Optional dictionary of query tags to apply for all queries in this batch.
1489
+ Tags are key-value pairs that can be used to identify and categorize queries.
1490
+ Example: {"team": "data-eng", "application": "etl"}
1491
+
1460
1492
  :returns self
1461
1493
  """
1462
1494
  for parameters in seq_of_parameters:
1463
- self.execute(operation, parameters)
1495
+ self.execute(operation, parameters, query_tags=query_tags)
1464
1496
  return self
1465
1497
 
1466
1498
  @log_latency(StatementType.METADATA)
@@ -0,0 +1,52 @@
1
+ """
2
+ Detects whether the Python SQL connector is being invoked by an AI coding agent
3
+ by checking for well-known environment variables that agents set in their spawned
4
+ shell processes.
5
+
6
+ Detection only succeeds when exactly one agent environment variable is present,
7
+ to avoid ambiguous attribution when multiple agent environments overlap.
8
+
9
+ Adding a new agent requires only a new entry in KNOWN_AGENTS.
10
+
11
+ References for each environment variable:
12
+ - ANTIGRAVITY_AGENT: Closed source. Google Antigravity sets this variable.
13
+ - CLAUDECODE: https://github.com/anthropics/claude-code (sets CLAUDECODE=1)
14
+ - CLINE_ACTIVE: https://github.com/cline/cline (shipped in v3.24.0)
15
+ - CODEX_CI: https://github.com/openai/codex (part of UNIFIED_EXEC_ENV array in codex-rs)
16
+ - CURSOR_AGENT: Closed source. Referenced in a gist by johnlindquist.
17
+ - GEMINI_CLI: https://google-gemini.github.io/gemini-cli/docs/tools/shell.html (sets GEMINI_CLI=1)
18
+ - OPENCODE: https://github.com/opencode-ai/opencode (sets OPENCODE=1)
19
+ """
20
+
21
+ import os
22
+
23
+ KNOWN_AGENTS = [
24
+ ("ANTIGRAVITY_AGENT", "antigravity"),
25
+ ("CLAUDECODE", "claude-code"),
26
+ ("CLINE_ACTIVE", "cline"),
27
+ ("CODEX_CI", "codex"),
28
+ ("CURSOR_AGENT", "cursor"),
29
+ ("GEMINI_CLI", "gemini-cli"),
30
+ ("OPENCODE", "opencode"),
31
+ ]
32
+
33
+
34
+ def detect(env=None):
35
+ """Detect which AI coding agent (if any) is driving the current process.
36
+
37
+ Args:
38
+ env: Optional dict-like object for environment variable lookup.
39
+ Defaults to os.environ. Exists for testability.
40
+
41
+ Returns:
42
+ The agent product string if exactly one agent is detected,
43
+ or an empty string otherwise.
44
+ """
45
+ if env is None:
46
+ env = os.environ
47
+
48
+ detected = [product for var, product in KNOWN_AGENTS if env.get(var)]
49
+
50
+ if len(detected) == 1:
51
+ return detected[0]
52
+ return ""
@@ -113,6 +113,7 @@ class FeatureFlagsContext:
113
113
  # Authenticate the request
114
114
  self._connection.session.auth_provider.add_headers(headers)
115
115
  headers["User-Agent"] = self._connection.session.useragent_header
116
+ headers.update(self._connection.session.get_spog_headers())
116
117
 
117
118
  response = self._http_client.request(
118
119
  HttpMethod.GET, self._feature_flag_endpoint, headers=headers, timeout=30
@@ -659,7 +659,7 @@ def dbsql_parameter_from_primitive(
659
659
  elif isinstance(value, str):
660
660
  return StringParameter(value=value, name=name)
661
661
  elif isinstance(value, float):
662
- return FloatParameter(value=value, name=name)
662
+ return DoubleParameter(value=value, name=name)
663
663
  elif isinstance(value, datetime.datetime):
664
664
  return TimestampParameter(value=value, name=name)
665
665
  elif isinstance(value, datetime.date):
@@ -13,6 +13,7 @@ from databricks.sql.backend.sea.backend import SeaDatabricksClient
13
13
  from databricks.sql.backend.databricks_client import DatabricksClient
14
14
  from databricks.sql.backend.types import SessionId, BackendType
15
15
  from databricks.sql.common.unified_http_client import UnifiedHttpClient
16
+ from databricks.sql.common.agent import detect as detect_agent
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
@@ -64,9 +65,21 @@ class Session:
64
65
  else:
65
66
  self.useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__)
66
67
 
68
+ agent_product = detect_agent()
69
+ if agent_product:
70
+ self.useragent_header += " agent/{}".format(agent_product)
71
+
67
72
  base_headers = [("User-Agent", self.useragent_header)]
68
73
  all_headers = (http_headers or []) + base_headers
69
74
 
75
+ # Extract ?o=<workspaceId> from http_path for SPOG routing.
76
+ # On SPOG hosts, the httpPath contains ?o=<workspaceId> which routes Thrift
77
+ # requests via the URL. For SEA, telemetry, and feature flags (which use
78
+ # separate endpoints), we inject x-databricks-org-id as an HTTP header.
79
+ self._spog_headers = self._extract_spog_headers(http_path, all_headers)
80
+ if self._spog_headers:
81
+ all_headers = all_headers + list(self._spog_headers.items())
82
+
70
83
  self.ssl_options = SSLOptions(
71
84
  # Double negation is generally a bad thing, but we have to keep backward compatibility
72
85
  tls_verify=not kwargs.get(
@@ -131,6 +144,44 @@ class Session:
131
144
  }
132
145
  return databricks_client_class(**common_args)
133
146
 
147
+ @staticmethod
148
+ def _extract_spog_headers(http_path, existing_headers):
149
+ """Extract ?o=<workspaceId> from http_path and return as a header dict for SPOG routing."""
150
+ if not http_path or "?" not in http_path:
151
+ return {}
152
+
153
+ from urllib.parse import parse_qs
154
+
155
+ query_string = http_path.split("?", 1)[1]
156
+ params = parse_qs(query_string)
157
+ org_id = params.get("o", [None])[0]
158
+ if not org_id:
159
+ logger.debug(
160
+ "SPOG header extraction: http_path has query string but no ?o= param, "
161
+ "skipping x-databricks-org-id injection"
162
+ )
163
+ return {}
164
+
165
+ # Don't override if explicitly set
166
+ if any(k == "x-databricks-org-id" for k, _ in existing_headers):
167
+ logger.debug(
168
+ "SPOG header extraction: x-databricks-org-id already set by caller, "
169
+ "not overriding with ?o=%s from http_path",
170
+ org_id,
171
+ )
172
+ return {}
173
+
174
+ logger.debug(
175
+ "SPOG header extraction: injecting x-databricks-org-id=%s "
176
+ "(extracted from ?o= in http_path)",
177
+ org_id,
178
+ )
179
+ return {"x-databricks-org-id": org_id}
180
+
181
+ def get_spog_headers(self):
182
+ """Returns SPOG routing headers (x-databricks-org-id) if ?o= was in http_path."""
183
+ return dict(self._spog_headers)
184
+
134
185
  def open(self):
135
186
  self._session_id = self.backend.open_session(
136
187
  session_configuration=self.session_configuration,
@@ -188,6 +188,7 @@ class TelemetryClient(BaseTelemetryClient):
188
188
  executor,
189
189
  batch_size: int,
190
190
  client_context,
191
+ extra_headers: Optional[Dict[str, str]] = None,
191
192
  ) -> None:
192
193
  logger.debug("Initializing TelemetryClient for connection: %s", session_id_hex)
193
194
  self._telemetry_enabled = telemetry_enabled
@@ -195,6 +196,7 @@ class TelemetryClient(BaseTelemetryClient):
195
196
  self._session_id_hex = session_id_hex
196
197
  self._auth_provider = auth_provider
197
198
  self._user_agent = None
199
+ self._extra_headers = extra_headers or {}
198
200
 
199
201
  # OPTIMIZATION: Use lock-free Queue instead of list + lock
200
202
  # Queue is thread-safe internally and has better performance under concurrency
@@ -287,6 +289,8 @@ class TelemetryClient(BaseTelemetryClient):
287
289
  if self._auth_provider:
288
290
  self._auth_provider.add_headers(headers)
289
291
 
292
+ headers.update(self._extra_headers)
293
+
290
294
  try:
291
295
  logger.debug("Submitting telemetry request to thread pool")
292
296
 
@@ -587,6 +591,7 @@ class TelemetryClientFactory:
587
591
  host_url,
588
592
  batch_size,
589
593
  client_context,
594
+ extra_headers=None,
590
595
  ):
591
596
  """
592
597
  Initialize a telemetry client for a specific connection if telemetry is enabled.
@@ -627,6 +632,7 @@ class TelemetryClientFactory:
627
632
  executor=TelemetryClientFactory._executor,
628
633
  batch_size=batch_size,
629
634
  client_context=client_context,
635
+ extra_headers=extra_headers,
630
636
  )
631
637
  TelemetryClientFactory._clients[
632
638
  host_url
@@ -895,7 +895,50 @@ def concat_table_chunks(
895
895
  result_table[j].extend(table_chunks[i].column_table[j])
896
896
  return ColumnTable(result_table, table_chunks[0].column_names)
897
897
  else:
898
- return pyarrow.concat_tables(table_chunks)
898
+ return pyarrow.concat_tables(table_chunks, promote_options="default")
899
+
900
+
901
+ def serialize_query_tags(
902
+ query_tags: Optional[Dict[str, Optional[str]]]
903
+ ) -> Optional[str]:
904
+ """
905
+ Serialize query_tags dictionary to a string format.
906
+
907
+ Format: "key1:value1,key2:value2"
908
+ Special cases:
909
+ - If value is None, omit the colon and value (e.g., "key1:value1,key2,key3:value3")
910
+ - Escape special characters (:, ,, \\) in values with a leading backslash
911
+ - Backslashes in keys are escaped; other special characters in keys are not escaped
912
+
913
+ Args:
914
+ query_tags: Dictionary of query tags where keys are strings and values are optional strings
915
+
916
+ Returns:
917
+ Serialized string or None if query_tags is None or empty
918
+ """
919
+ if not query_tags:
920
+ return None
921
+
922
+ def escape_value(value: str) -> str:
923
+ """Escape special characters in tag values."""
924
+ # Escape backslash first to avoid double-escaping
925
+ value = value.replace("\\", r"\\")
926
+ # Escape colon and comma
927
+ value = value.replace(":", r"\:")
928
+ value = value.replace(",", r"\,")
929
+ return value
930
+
931
+ serialized_parts = []
932
+ for key, value in query_tags.items():
933
+ escaped_key = key.replace("\\", r"\\")
934
+ if value is None:
935
+ # No colon or value when value is None
936
+ serialized_parts.append(escaped_key)
937
+ else:
938
+ escaped_value = escape_value(value)
939
+ serialized_parts.append(f"{escaped_key}:{escaped_value}")
940
+
941
+ return ",".join(serialized_parts)
899
942
 
900
943
 
901
944
  def build_client_context(server_hostname: str, version: str, **kwargs):
@@ -914,12 +957,18 @@ def build_client_context(server_hostname: str, version: str, **kwargs):
914
957
  )
915
958
 
916
959
  # Build user agent
960
+ from databricks.sql.common.agent import detect as detect_agent
961
+
917
962
  user_agent_entry = kwargs.get("user_agent_entry", "")
918
963
  if user_agent_entry:
919
964
  user_agent = f"PyDatabricksSqlConnector/{version} ({user_agent_entry})"
920
965
  else:
921
966
  user_agent = f"PyDatabricksSqlConnector/{version}"
922
967
 
968
+ agent_product = detect_agent()
969
+ if agent_product:
970
+ user_agent += f" agent/{agent_product}"
971
+
923
972
  # Explicitly construct ClientContext with proper types
924
973
  return ClientContext(
925
974
  hostname=server_hostname,