databricks-sql-connector 3.4.0__tar.gz → 3.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/CHANGELOG.md +5 -0
  2. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/PKG-INFO +2 -1
  3. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/pyproject.toml +1 -1
  4. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/__init__.py +1 -1
  5. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/client.py +92 -8
  6. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_backend.py +16 -8
  7. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/utils.py +159 -17
  8. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/LICENSE +0 -0
  9. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/README.md +0 -0
  10. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/__init__.py +0 -0
  11. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/__init__.py +0 -0
  12. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/auth.py +0 -0
  13. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/authenticators.py +0 -0
  14. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/endpoint.py +0 -0
  15. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth.py +0 -0
  16. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  17. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/retry.py +0 -0
  18. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  19. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  20. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/cloudfetch/downloader.py +0 -0
  21. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/exc.py +0 -0
  22. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/experimental/__init__.py +0 -0
  23. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  24. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/__init__.py +0 -0
  25. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/native.py +0 -0
  26. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/py.typed +0 -0
  27. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/py.typed +0 -0
  28. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  29. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  30. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  31. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  32. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  33. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/__init__.py +0 -0
  34. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/types.py +0 -0
  35. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/README.sqlalchemy.md +0 -0
  36. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/README.tests.md +0 -0
  37. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/__init__.py +0 -0
  38. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_ddl.py +0 -0
  39. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_parse.py +0 -0
  40. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_types.py +0 -0
  41. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/base.py +0 -0
  42. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/py.typed +0 -0
  43. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/requirements.py +0 -0
  44. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/setup.cfg +0 -0
  45. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_extra.py +0 -0
  46. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_future.py +0 -0
  47. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_regression.py +0 -0
  48. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_unsupported.py +0 -0
  49. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/conftest.py +0 -0
  50. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -0
  51. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/overrides/_ctetest.py +0 -0
  52. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/test_suite.py +0 -0
  53. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/__init__.py +0 -0
  54. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/conftest.py +0 -0
  55. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  56. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +0 -0
  57. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_ddl.py +0 -0
  58. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_parsing.py +0 -0
  59. {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_types.py +0 -0
@@ -1,5 +1,10 @@
1
1
  # Release History
2
2
 
3
+ # 3.5.0 (2024-10-18)
4
+
5
+ - Create a non pyarrow flow to handle small results for the column set (databricks/databricks-sql-python#440 by @jprakash-db)
6
+ - Fix: On non-retryable error, ensure PySQL includes useful information in error (databricks/databricks-sql-python#447 by @shivam2680)
7
+
3
8
  # 3.4.0 (2024-08-27)
4
9
 
5
10
  - Unpin pandas to support v2.2.2 (databricks/databricks-sql-python#416 by @kfollesdal)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 3.4.0
3
+ Version: 3.5.0
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
16
17
  Provides-Extra: alembic
17
18
  Provides-Extra: sqlalchemy
18
19
  Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "3.4.0"
3
+ version = "3.5.0"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -68,7 +68,7 @@ DATETIME = DBAPITypeObject("timestamp")
68
68
  DATE = DBAPITypeObject("date")
69
69
  ROWID = DBAPITypeObject()
70
70
 
71
- __version__ = "3.4.0"
71
+ __version__ = "3.5.0"
72
72
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
73
73
 
74
74
  # These two functions are pyhive legacy
@@ -1,7 +1,11 @@
1
1
  from typing import Dict, Tuple, List, Optional, Any, Union, Sequence
2
2
 
3
3
  import pandas
4
- import pyarrow
4
+
5
+ try:
6
+ import pyarrow
7
+ except ImportError:
8
+ pyarrow = None
5
9
  import requests
6
10
  import json
7
11
  import os
@@ -22,6 +26,8 @@ from databricks.sql.utils import (
22
26
  ParamEscaper,
23
27
  inject_parameters,
24
28
  transform_paramstyle,
29
+ ColumnTable,
30
+ ColumnQueue,
25
31
  )
26
32
  from databricks.sql.parameters.native import (
27
33
  DbsqlParameterBase,
@@ -991,14 +997,14 @@ class Cursor:
991
997
  else:
992
998
  raise Error("There is no active result set")
993
999
 
994
- def fetchall_arrow(self) -> pyarrow.Table:
1000
+ def fetchall_arrow(self) -> "pyarrow.Table":
995
1001
  self._check_not_closed()
996
1002
  if self.active_result_set:
997
1003
  return self.active_result_set.fetchall_arrow()
998
1004
  else:
999
1005
  raise Error("There is no active result set")
1000
1006
 
1001
- def fetchmany_arrow(self, size) -> pyarrow.Table:
1007
+ def fetchmany_arrow(self, size) -> "pyarrow.Table":
1002
1008
  self._check_not_closed()
1003
1009
  if self.active_result_set:
1004
1010
  return self.active_result_set.fetchmany_arrow(size)
@@ -1143,6 +1149,18 @@ class ResultSet:
1143
1149
  self.results = results
1144
1150
  self.has_more_rows = has_more_rows
1145
1151
 
1152
+ def _convert_columnar_table(self, table):
1153
+ column_names = [c[0] for c in self.description]
1154
+ ResultRow = Row(*column_names)
1155
+ result = []
1156
+ for row_index in range(table.num_rows):
1157
+ curr_row = []
1158
+ for col_index in range(table.num_columns):
1159
+ curr_row.append(table.get_item(col_index, row_index))
1160
+ result.append(ResultRow(*curr_row))
1161
+
1162
+ return result
1163
+
1146
1164
  def _convert_arrow_table(self, table):
1147
1165
  column_names = [c[0] for c in self.description]
1148
1166
  ResultRow = Row(*column_names)
@@ -1185,7 +1203,7 @@ class ResultSet:
1185
1203
  def rownumber(self):
1186
1204
  return self._next_row_index
1187
1205
 
1188
- def fetchmany_arrow(self, size: int) -> pyarrow.Table:
1206
+ def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
1189
1207
  """
1190
1208
  Fetch the next set of rows of a query result, returning a PyArrow table.
1191
1209
 
@@ -1210,7 +1228,49 @@ class ResultSet:
1210
1228
 
1211
1229
  return results
1212
1230
 
1213
- def fetchall_arrow(self) -> pyarrow.Table:
1231
+ def merge_columnar(self, result1, result2):
1232
+ """
1233
+ Function to merge / combining the columnar results into a single result
1234
+ :param result1:
1235
+ :param result2:
1236
+ :return:
1237
+ """
1238
+
1239
+ if result1.column_names != result2.column_names:
1240
+ raise ValueError("The columns in the results don't match")
1241
+
1242
+ merged_result = [
1243
+ result1.column_table[i] + result2.column_table[i]
1244
+ for i in range(result1.num_columns)
1245
+ ]
1246
+ return ColumnTable(merged_result, result1.column_names)
1247
+
1248
+ def fetchmany_columnar(self, size: int):
1249
+ """
1250
+ Fetch the next set of rows of a query result, returning a Columnar Table.
1251
+ An empty sequence is returned when no more rows are available.
1252
+ """
1253
+ if size < 0:
1254
+ raise ValueError("size argument for fetchmany is %s but must be >= 0", size)
1255
+
1256
+ results = self.results.next_n_rows(size)
1257
+ n_remaining_rows = size - results.num_rows
1258
+ self._next_row_index += results.num_rows
1259
+
1260
+ while (
1261
+ n_remaining_rows > 0
1262
+ and not self.has_been_closed_server_side
1263
+ and self.has_more_rows
1264
+ ):
1265
+ self._fill_results_buffer()
1266
+ partial_results = self.results.next_n_rows(n_remaining_rows)
1267
+ results = self.merge_columnar(results, partial_results)
1268
+ n_remaining_rows -= partial_results.num_rows
1269
+ self._next_row_index += partial_results.num_rows
1270
+
1271
+ return results
1272
+
1273
+ def fetchall_arrow(self) -> "pyarrow.Table":
1214
1274
  """Fetch all (remaining) rows of a query result, returning them as a PyArrow table."""
1215
1275
  results = self.results.remaining_rows()
1216
1276
  self._next_row_index += results.num_rows
@@ -1223,12 +1283,30 @@ class ResultSet:
1223
1283
 
1224
1284
  return results
1225
1285
 
1286
+ def fetchall_columnar(self):
1287
+ """Fetch all (remaining) rows of a query result, returning them as a Columnar table."""
1288
+ results = self.results.remaining_rows()
1289
+ self._next_row_index += results.num_rows
1290
+
1291
+ while not self.has_been_closed_server_side and self.has_more_rows:
1292
+ self._fill_results_buffer()
1293
+ partial_results = self.results.remaining_rows()
1294
+ results = self.merge_columnar(results, partial_results)
1295
+ self._next_row_index += partial_results.num_rows
1296
+
1297
+ return results
1298
+
1226
1299
  def fetchone(self) -> Optional[Row]:
1227
1300
  """
1228
1301
  Fetch the next row of a query result set, returning a single sequence,
1229
1302
  or None when no more data is available.
1230
1303
  """
1231
- res = self._convert_arrow_table(self.fetchmany_arrow(1))
1304
+
1305
+ if isinstance(self.results, ColumnQueue):
1306
+ res = self._convert_columnar_table(self.fetchmany_columnar(1))
1307
+ else:
1308
+ res = self._convert_arrow_table(self.fetchmany_arrow(1))
1309
+
1232
1310
  if len(res) > 0:
1233
1311
  return res[0]
1234
1312
  else:
@@ -1238,7 +1316,10 @@ class ResultSet:
1238
1316
  """
1239
1317
  Fetch all (remaining) rows of a query result, returning them as a list of rows.
1240
1318
  """
1241
- return self._convert_arrow_table(self.fetchall_arrow())
1319
+ if isinstance(self.results, ColumnQueue):
1320
+ return self._convert_columnar_table(self.fetchall_columnar())
1321
+ else:
1322
+ return self._convert_arrow_table(self.fetchall_arrow())
1242
1323
 
1243
1324
  def fetchmany(self, size: int) -> List[Row]:
1244
1325
  """
@@ -1246,7 +1327,10 @@ class ResultSet:
1246
1327
 
1247
1328
  An empty sequence is returned when no more rows are available.
1248
1329
  """
1249
- return self._convert_arrow_table(self.fetchmany_arrow(size))
1330
+ if isinstance(self.results, ColumnQueue):
1331
+ return self._convert_columnar_table(self.fetchmany_columnar(size))
1332
+ else:
1333
+ return self._convert_arrow_table(self.fetchmany_arrow(size))
1250
1334
 
1251
1335
  def close(self) -> None:
1252
1336
  """
@@ -7,7 +7,10 @@ import uuid
7
7
  import threading
8
8
  from typing import List, Union
9
9
 
10
- import pyarrow
10
+ try:
11
+ import pyarrow
12
+ except ImportError:
13
+ pyarrow = None
11
14
  import thrift.transport.THttpClient
12
15
  import thrift.protocol.TBinaryProtocol
13
16
  import thrift.transport.TSocket
@@ -726,12 +729,17 @@ class ThriftBackend:
726
729
  description = self._hive_schema_to_description(
727
730
  t_result_set_metadata_resp.schema
728
731
  )
729
- schema_bytes = (
730
- t_result_set_metadata_resp.arrowSchema
731
- or self._hive_schema_to_arrow_schema(t_result_set_metadata_resp.schema)
732
- .serialize()
733
- .to_pybytes()
734
- )
732
+
733
+ if pyarrow:
734
+ schema_bytes = (
735
+ t_result_set_metadata_resp.arrowSchema
736
+ or self._hive_schema_to_arrow_schema(t_result_set_metadata_resp.schema)
737
+ .serialize()
738
+ .to_pybytes()
739
+ )
740
+ else:
741
+ schema_bytes = None
742
+
735
743
  lz4_compressed = t_result_set_metadata_resp.lz4Compressed
736
744
  is_staging_operation = t_result_set_metadata_resp.isStagingOperation
737
745
  if direct_results and direct_results.resultSet:
@@ -827,7 +835,7 @@ class ThriftBackend:
827
835
  getDirectResults=ttypes.TSparkGetDirectResults(
828
836
  maxRows=max_rows, maxBytes=max_bytes
829
837
  ),
830
- canReadArrowResult=True,
838
+ canReadArrowResult=True if pyarrow else False,
831
839
  canDecompressLZ4Result=lz4_compression,
832
840
  canDownloadResult=use_cloud_fetch,
833
841
  confOverlay={
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import pytz
3
4
  import datetime
4
5
  import decimal
5
6
  from abc import ABC, abstractmethod
@@ -11,7 +12,11 @@ from typing import Any, Dict, List, Optional, Union
11
12
  import re
12
13
 
13
14
  import lz4.frame
14
- import pyarrow
15
+
16
+ try:
17
+ import pyarrow
18
+ except ImportError:
19
+ pyarrow = None
15
20
 
16
21
  from databricks.sql import OperationalError, exc
17
22
  from databricks.sql.cloudfetch.download_manager import ResultFileDownloadManager
@@ -27,17 +32,18 @@ from databricks.sql.parameters.native import ParameterStructure, TDbsqlParameter
27
32
  import logging
28
33
 
29
34
  BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128]
35
+ DEFAULT_ERROR_CONTEXT = "Unknown error"
30
36
 
31
37
  logger = logging.getLogger(__name__)
32
38
 
33
39
 
34
40
  class ResultSetQueue(ABC):
35
41
  @abstractmethod
36
- def next_n_rows(self, num_rows: int) -> pyarrow.Table:
42
+ def next_n_rows(self, num_rows: int):
37
43
  pass
38
44
 
39
45
  @abstractmethod
40
- def remaining_rows(self) -> pyarrow.Table:
46
+ def remaining_rows(self):
41
47
  pass
42
48
 
43
49
 
@@ -76,13 +82,15 @@ class ResultSetQueueFactory(ABC):
76
82
  )
77
83
  return ArrowQueue(converted_arrow_table, n_valid_rows)
78
84
  elif row_set_type == TSparkRowSetType.COLUMN_BASED_SET:
79
- arrow_table, n_valid_rows = convert_column_based_set_to_arrow_table(
85
+ column_table, column_names = convert_column_based_set_to_column_table(
80
86
  t_row_set.columns, description
81
87
  )
82
- converted_arrow_table = convert_decimals_in_arrow_table(
83
- arrow_table, description
88
+
89
+ converted_column_table = convert_to_assigned_datatypes_in_column_table(
90
+ column_table, description
84
91
  )
85
- return ArrowQueue(converted_arrow_table, n_valid_rows)
92
+
93
+ return ColumnQueue(ColumnTable(converted_column_table, column_names))
86
94
  elif row_set_type == TSparkRowSetType.URL_BASED_SET:
87
95
  return CloudFetchQueue(
88
96
  schema_bytes=arrow_schema_bytes,
@@ -97,10 +105,63 @@ class ResultSetQueueFactory(ABC):
97
105
  raise AssertionError("Row set type is not valid")
98
106
 
99
107
 
108
+ class ColumnTable:
109
+ def __init__(self, column_table, column_names):
110
+ self.column_table = column_table
111
+ self.column_names = column_names
112
+
113
+ @property
114
+ def num_rows(self):
115
+ if len(self.column_table) == 0:
116
+ return 0
117
+ else:
118
+ return len(self.column_table[0])
119
+
120
+ @property
121
+ def num_columns(self):
122
+ return len(self.column_names)
123
+
124
+ def get_item(self, col_index, row_index):
125
+ return self.column_table[col_index][row_index]
126
+
127
+ def slice(self, curr_index, length):
128
+ sliced_column_table = [
129
+ column[curr_index : curr_index + length] for column in self.column_table
130
+ ]
131
+ return ColumnTable(sliced_column_table, self.column_names)
132
+
133
+ def __eq__(self, other):
134
+ return (
135
+ self.column_table == other.column_table
136
+ and self.column_names == other.column_names
137
+ )
138
+
139
+
140
+ class ColumnQueue(ResultSetQueue):
141
+ def __init__(self, column_table: ColumnTable):
142
+ self.column_table = column_table
143
+ self.cur_row_index = 0
144
+ self.n_valid_rows = column_table.num_rows
145
+
146
+ def next_n_rows(self, num_rows):
147
+ length = min(num_rows, self.n_valid_rows - self.cur_row_index)
148
+
149
+ slice = self.column_table.slice(self.cur_row_index, length)
150
+ self.cur_row_index += slice.num_rows
151
+ return slice
152
+
153
+ def remaining_rows(self):
154
+ slice = self.column_table.slice(
155
+ self.cur_row_index, self.n_valid_rows - self.cur_row_index
156
+ )
157
+ self.cur_row_index += slice.num_rows
158
+ return slice
159
+
160
+
100
161
  class ArrowQueue(ResultSetQueue):
101
162
  def __init__(
102
163
  self,
103
- arrow_table: pyarrow.Table,
164
+ arrow_table: "pyarrow.Table",
104
165
  n_valid_rows: int,
105
166
  start_row_index: int = 0,
106
167
  ):
@@ -115,7 +176,7 @@ class ArrowQueue(ResultSetQueue):
115
176
  self.arrow_table = arrow_table
116
177
  self.n_valid_rows = n_valid_rows
117
178
 
118
- def next_n_rows(self, num_rows: int) -> pyarrow.Table:
179
+ def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
119
180
  """Get upto the next n rows of the Arrow dataframe"""
120
181
  length = min(num_rows, self.n_valid_rows - self.cur_row_index)
121
182
  # Note that the table.slice API is not the same as Python's slice
@@ -124,7 +185,7 @@ class ArrowQueue(ResultSetQueue):
124
185
  self.cur_row_index += slice.num_rows
125
186
  return slice
126
187
 
127
- def remaining_rows(self) -> pyarrow.Table:
188
+ def remaining_rows(self) -> "pyarrow.Table":
128
189
  slice = self.arrow_table.slice(
129
190
  self.cur_row_index, self.n_valid_rows - self.cur_row_index
130
191
  )
@@ -184,7 +245,7 @@ class CloudFetchQueue(ResultSetQueue):
184
245
  self.table = self._create_next_table()
185
246
  self.table_row_index = 0
186
247
 
187
- def next_n_rows(self, num_rows: int) -> pyarrow.Table:
248
+ def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
188
249
  """
189
250
  Get up to the next n rows of the cloud fetch Arrow dataframes.
190
251
 
@@ -216,7 +277,7 @@ class CloudFetchQueue(ResultSetQueue):
216
277
  logger.debug("CloudFetchQueue: collected {} next rows".format(results.num_rows))
217
278
  return results
218
279
 
219
- def remaining_rows(self) -> pyarrow.Table:
280
+ def remaining_rows(self) -> "pyarrow.Table":
220
281
  """
221
282
  Get all remaining rows of the cloud fetch Arrow dataframes.
222
283
 
@@ -237,7 +298,7 @@ class CloudFetchQueue(ResultSetQueue):
237
298
  self.table_row_index = 0
238
299
  return results
239
300
 
240
- def _create_next_table(self) -> Union[pyarrow.Table, None]:
301
+ def _create_next_table(self) -> Union["pyarrow.Table", None]:
241
302
  logger.debug(
242
303
  "CloudFetchQueue: Trying to get downloaded file for row {}".format(
243
304
  self.start_row_index
@@ -276,7 +337,7 @@ class CloudFetchQueue(ResultSetQueue):
276
337
 
277
338
  return arrow_table
278
339
 
279
- def _create_empty_table(self) -> pyarrow.Table:
340
+ def _create_empty_table(self) -> "pyarrow.Table":
280
341
  # Create a 0-row table with just the schema bytes
281
342
  return create_arrow_table_from_arrow_file(self.schema_bytes, self.description)
282
343
 
@@ -357,7 +418,12 @@ class RequestErrorInfo(
357
418
  user_friendly_error_message = "{}: {}".format(
358
419
  user_friendly_error_message, self.error_message
359
420
  )
360
- return user_friendly_error_message
421
+ try:
422
+ error_context = str(self.error)
423
+ except:
424
+ error_context = DEFAULT_ERROR_CONTEXT
425
+
426
+ return user_friendly_error_message + ". " + error_context
361
427
 
362
428
 
363
429
  # Taken from PyHive
@@ -515,7 +581,9 @@ def transform_paramstyle(
515
581
  return output
516
582
 
517
583
 
518
- def create_arrow_table_from_arrow_file(file_bytes: bytes, description) -> pyarrow.Table:
584
+ def create_arrow_table_from_arrow_file(
585
+ file_bytes: bytes, description
586
+ ) -> "pyarrow.Table":
519
587
  arrow_table = convert_arrow_based_file_to_arrow_table(file_bytes)
520
588
  return convert_decimals_in_arrow_table(arrow_table, description)
521
589
 
@@ -542,7 +610,7 @@ def convert_arrow_based_set_to_arrow_table(arrow_batches, lz4_compressed, schema
542
610
  return arrow_table, n_rows
543
611
 
544
612
 
545
- def convert_decimals_in_arrow_table(table, description) -> pyarrow.Table:
613
+ def convert_decimals_in_arrow_table(table, description) -> "pyarrow.Table":
546
614
  for i, col in enumerate(table.itercolumns()):
547
615
  if description[i][1] == "decimal":
548
616
  decimal_col = col.to_pandas().apply(
@@ -560,6 +628,37 @@ def convert_decimals_in_arrow_table(table, description) -> pyarrow.Table:
560
628
  return table
561
629
 
562
630
 
631
+ def convert_to_assigned_datatypes_in_column_table(column_table, description):
632
+
633
+ converted_column_table = []
634
+ for i, col in enumerate(column_table):
635
+ if description[i][1] == "decimal":
636
+ converted_column_table.append(
637
+ tuple(v if v is None else Decimal(v) for v in col)
638
+ )
639
+ elif description[i][1] == "date":
640
+ converted_column_table.append(
641
+ tuple(v if v is None else datetime.date.fromisoformat(v) for v in col)
642
+ )
643
+ elif description[i][1] == "timestamp":
644
+ converted_column_table.append(
645
+ tuple(
646
+ (
647
+ v
648
+ if v is None
649
+ else datetime.datetime.strptime(
650
+ v, "%Y-%m-%d %H:%M:%S.%f"
651
+ ).replace(tzinfo=pytz.UTC)
652
+ )
653
+ for v in col
654
+ )
655
+ )
656
+ else:
657
+ converted_column_table.append(col)
658
+
659
+ return converted_column_table
660
+
661
+
563
662
  def convert_column_based_set_to_arrow_table(columns, description):
564
663
  arrow_table = pyarrow.Table.from_arrays(
565
664
  [_convert_column_to_arrow_array(c) for c in columns],
@@ -571,6 +670,13 @@ def convert_column_based_set_to_arrow_table(columns, description):
571
670
  return arrow_table, arrow_table.num_rows
572
671
 
573
672
 
673
+ def convert_column_based_set_to_column_table(columns, description):
674
+ column_names = [c[0] for c in description]
675
+ column_table = [_convert_column_to_list(c) for c in columns]
676
+
677
+ return column_table, column_names
678
+
679
+
574
680
  def _convert_column_to_arrow_array(t_col):
575
681
  """
576
682
  Return a pyarrow array from the values in a TColumn instance.
@@ -595,6 +701,26 @@ def _convert_column_to_arrow_array(t_col):
595
701
  raise OperationalError("Empty TColumn instance {}".format(t_col))
596
702
 
597
703
 
704
+ def _convert_column_to_list(t_col):
705
+ SUPPORTED_FIELD_TYPES = (
706
+ "boolVal",
707
+ "byteVal",
708
+ "i16Val",
709
+ "i32Val",
710
+ "i64Val",
711
+ "doubleVal",
712
+ "stringVal",
713
+ "binaryVal",
714
+ )
715
+
716
+ for field in SUPPORTED_FIELD_TYPES:
717
+ wrapper = getattr(t_col, field)
718
+ if wrapper:
719
+ return _create_python_tuple(wrapper)
720
+
721
+ raise OperationalError("Empty TColumn instance {}".format(t_col))
722
+
723
+
598
724
  def _create_arrow_array(t_col_value_wrapper, arrow_type):
599
725
  result = t_col_value_wrapper.values
600
726
  nulls = t_col_value_wrapper.nulls # bitfield describing which values are null
@@ -609,3 +735,19 @@ def _create_arrow_array(t_col_value_wrapper, arrow_type):
609
735
  result[i] = None
610
736
 
611
737
  return pyarrow.array(result, type=arrow_type)
738
+
739
+
740
+ def _create_python_tuple(t_col_value_wrapper):
741
+ result = t_col_value_wrapper.values
742
+ nulls = t_col_value_wrapper.nulls # bitfield describing which values are null
743
+ assert isinstance(nulls, bytes)
744
+
745
+ # The number of bits in nulls can be both larger or smaller than the number of
746
+ # elements in result, so take the minimum of both to iterate over.
747
+ length = min(len(result), len(nulls) * 8)
748
+
749
+ for i in range(length):
750
+ if nulls[i >> 3] & BIT_MASKS[i & 0x7]:
751
+ result[i] = None
752
+
753
+ return tuple(result)