databricks-sql-connector 3.4.0__tar.gz → 3.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/CHANGELOG.md +5 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/PKG-INFO +2 -1
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/pyproject.toml +1 -1
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/__init__.py +1 -1
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/client.py +92 -8
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_backend.py +16 -8
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/utils.py +159 -17
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/LICENSE +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/README.md +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/__init__.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/__init__.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/auth.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/authenticators.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/endpoint.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/retry.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/thrift_http_client.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/cloudfetch/downloader.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/exc.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/experimental/__init__.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/__init__.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/native.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/parameters/py.typed +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/py.typed +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/thrift_api/__init__.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/types.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/README.sqlalchemy.md +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/README.tests.md +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/__init__.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_ddl.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_parse.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_types.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/base.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/py.typed +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/requirements.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/setup.cfg +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_extra.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_future.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_regression.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/_unsupported.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/conftest.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/overrides/_ctetest.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test/test_suite.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/__init__.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/conftest.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_ddl.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_parsing.py +0 -0
- {databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/test_local/test_types.py +0 -0
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# Release History
|
|
2
2
|
|
|
3
|
+
# 3.5.0 (2024-10-18)
|
|
4
|
+
|
|
5
|
+
- Create a non pyarrow flow to handle small results for the column set (databricks/databricks-sql-python#440 by @jprakash-db)
|
|
6
|
+
- Fix: On non-retryable error, ensure PySQL includes useful information in error (databricks/databricks-sql-python#447 by @shivam2680)
|
|
7
|
+
|
|
3
8
|
# 3.4.0 (2024-08-27)
|
|
4
9
|
|
|
5
10
|
- Unpin pandas to support v2.2.2 (databricks/databricks-sql-python#416 by @kfollesdal)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: databricks-sql-connector
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.5.0
|
|
4
4
|
Summary: Databricks SQL Connector for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Databricks
|
|
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
17
|
Provides-Extra: alembic
|
|
17
18
|
Provides-Extra: sqlalchemy
|
|
18
19
|
Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/client.py
RENAMED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
from typing import Dict, Tuple, List, Optional, Any, Union, Sequence
|
|
2
2
|
|
|
3
3
|
import pandas
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
import pyarrow
|
|
7
|
+
except ImportError:
|
|
8
|
+
pyarrow = None
|
|
5
9
|
import requests
|
|
6
10
|
import json
|
|
7
11
|
import os
|
|
@@ -22,6 +26,8 @@ from databricks.sql.utils import (
|
|
|
22
26
|
ParamEscaper,
|
|
23
27
|
inject_parameters,
|
|
24
28
|
transform_paramstyle,
|
|
29
|
+
ColumnTable,
|
|
30
|
+
ColumnQueue,
|
|
25
31
|
)
|
|
26
32
|
from databricks.sql.parameters.native import (
|
|
27
33
|
DbsqlParameterBase,
|
|
@@ -991,14 +997,14 @@ class Cursor:
|
|
|
991
997
|
else:
|
|
992
998
|
raise Error("There is no active result set")
|
|
993
999
|
|
|
994
|
-
def fetchall_arrow(self) -> pyarrow.Table:
|
|
1000
|
+
def fetchall_arrow(self) -> "pyarrow.Table":
|
|
995
1001
|
self._check_not_closed()
|
|
996
1002
|
if self.active_result_set:
|
|
997
1003
|
return self.active_result_set.fetchall_arrow()
|
|
998
1004
|
else:
|
|
999
1005
|
raise Error("There is no active result set")
|
|
1000
1006
|
|
|
1001
|
-
def fetchmany_arrow(self, size) -> pyarrow.Table:
|
|
1007
|
+
def fetchmany_arrow(self, size) -> "pyarrow.Table":
|
|
1002
1008
|
self._check_not_closed()
|
|
1003
1009
|
if self.active_result_set:
|
|
1004
1010
|
return self.active_result_set.fetchmany_arrow(size)
|
|
@@ -1143,6 +1149,18 @@ class ResultSet:
|
|
|
1143
1149
|
self.results = results
|
|
1144
1150
|
self.has_more_rows = has_more_rows
|
|
1145
1151
|
|
|
1152
|
+
def _convert_columnar_table(self, table):
|
|
1153
|
+
column_names = [c[0] for c in self.description]
|
|
1154
|
+
ResultRow = Row(*column_names)
|
|
1155
|
+
result = []
|
|
1156
|
+
for row_index in range(table.num_rows):
|
|
1157
|
+
curr_row = []
|
|
1158
|
+
for col_index in range(table.num_columns):
|
|
1159
|
+
curr_row.append(table.get_item(col_index, row_index))
|
|
1160
|
+
result.append(ResultRow(*curr_row))
|
|
1161
|
+
|
|
1162
|
+
return result
|
|
1163
|
+
|
|
1146
1164
|
def _convert_arrow_table(self, table):
|
|
1147
1165
|
column_names = [c[0] for c in self.description]
|
|
1148
1166
|
ResultRow = Row(*column_names)
|
|
@@ -1185,7 +1203,7 @@ class ResultSet:
|
|
|
1185
1203
|
def rownumber(self):
|
|
1186
1204
|
return self._next_row_index
|
|
1187
1205
|
|
|
1188
|
-
def fetchmany_arrow(self, size: int) -> pyarrow.Table:
|
|
1206
|
+
def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
|
|
1189
1207
|
"""
|
|
1190
1208
|
Fetch the next set of rows of a query result, returning a PyArrow table.
|
|
1191
1209
|
|
|
@@ -1210,7 +1228,49 @@ class ResultSet:
|
|
|
1210
1228
|
|
|
1211
1229
|
return results
|
|
1212
1230
|
|
|
1213
|
-
def
|
|
1231
|
+
def merge_columnar(self, result1, result2):
|
|
1232
|
+
"""
|
|
1233
|
+
Function to merge / combining the columnar results into a single result
|
|
1234
|
+
:param result1:
|
|
1235
|
+
:param result2:
|
|
1236
|
+
:return:
|
|
1237
|
+
"""
|
|
1238
|
+
|
|
1239
|
+
if result1.column_names != result2.column_names:
|
|
1240
|
+
raise ValueError("The columns in the results don't match")
|
|
1241
|
+
|
|
1242
|
+
merged_result = [
|
|
1243
|
+
result1.column_table[i] + result2.column_table[i]
|
|
1244
|
+
for i in range(result1.num_columns)
|
|
1245
|
+
]
|
|
1246
|
+
return ColumnTable(merged_result, result1.column_names)
|
|
1247
|
+
|
|
1248
|
+
def fetchmany_columnar(self, size: int):
|
|
1249
|
+
"""
|
|
1250
|
+
Fetch the next set of rows of a query result, returning a Columnar Table.
|
|
1251
|
+
An empty sequence is returned when no more rows are available.
|
|
1252
|
+
"""
|
|
1253
|
+
if size < 0:
|
|
1254
|
+
raise ValueError("size argument for fetchmany is %s but must be >= 0", size)
|
|
1255
|
+
|
|
1256
|
+
results = self.results.next_n_rows(size)
|
|
1257
|
+
n_remaining_rows = size - results.num_rows
|
|
1258
|
+
self._next_row_index += results.num_rows
|
|
1259
|
+
|
|
1260
|
+
while (
|
|
1261
|
+
n_remaining_rows > 0
|
|
1262
|
+
and not self.has_been_closed_server_side
|
|
1263
|
+
and self.has_more_rows
|
|
1264
|
+
):
|
|
1265
|
+
self._fill_results_buffer()
|
|
1266
|
+
partial_results = self.results.next_n_rows(n_remaining_rows)
|
|
1267
|
+
results = self.merge_columnar(results, partial_results)
|
|
1268
|
+
n_remaining_rows -= partial_results.num_rows
|
|
1269
|
+
self._next_row_index += partial_results.num_rows
|
|
1270
|
+
|
|
1271
|
+
return results
|
|
1272
|
+
|
|
1273
|
+
def fetchall_arrow(self) -> "pyarrow.Table":
|
|
1214
1274
|
"""Fetch all (remaining) rows of a query result, returning them as a PyArrow table."""
|
|
1215
1275
|
results = self.results.remaining_rows()
|
|
1216
1276
|
self._next_row_index += results.num_rows
|
|
@@ -1223,12 +1283,30 @@ class ResultSet:
|
|
|
1223
1283
|
|
|
1224
1284
|
return results
|
|
1225
1285
|
|
|
1286
|
+
def fetchall_columnar(self):
|
|
1287
|
+
"""Fetch all (remaining) rows of a query result, returning them as a Columnar table."""
|
|
1288
|
+
results = self.results.remaining_rows()
|
|
1289
|
+
self._next_row_index += results.num_rows
|
|
1290
|
+
|
|
1291
|
+
while not self.has_been_closed_server_side and self.has_more_rows:
|
|
1292
|
+
self._fill_results_buffer()
|
|
1293
|
+
partial_results = self.results.remaining_rows()
|
|
1294
|
+
results = self.merge_columnar(results, partial_results)
|
|
1295
|
+
self._next_row_index += partial_results.num_rows
|
|
1296
|
+
|
|
1297
|
+
return results
|
|
1298
|
+
|
|
1226
1299
|
def fetchone(self) -> Optional[Row]:
|
|
1227
1300
|
"""
|
|
1228
1301
|
Fetch the next row of a query result set, returning a single sequence,
|
|
1229
1302
|
or None when no more data is available.
|
|
1230
1303
|
"""
|
|
1231
|
-
|
|
1304
|
+
|
|
1305
|
+
if isinstance(self.results, ColumnQueue):
|
|
1306
|
+
res = self._convert_columnar_table(self.fetchmany_columnar(1))
|
|
1307
|
+
else:
|
|
1308
|
+
res = self._convert_arrow_table(self.fetchmany_arrow(1))
|
|
1309
|
+
|
|
1232
1310
|
if len(res) > 0:
|
|
1233
1311
|
return res[0]
|
|
1234
1312
|
else:
|
|
@@ -1238,7 +1316,10 @@ class ResultSet:
|
|
|
1238
1316
|
"""
|
|
1239
1317
|
Fetch all (remaining) rows of a query result, returning them as a list of rows.
|
|
1240
1318
|
"""
|
|
1241
|
-
|
|
1319
|
+
if isinstance(self.results, ColumnQueue):
|
|
1320
|
+
return self._convert_columnar_table(self.fetchall_columnar())
|
|
1321
|
+
else:
|
|
1322
|
+
return self._convert_arrow_table(self.fetchall_arrow())
|
|
1242
1323
|
|
|
1243
1324
|
def fetchmany(self, size: int) -> List[Row]:
|
|
1244
1325
|
"""
|
|
@@ -1246,7 +1327,10 @@ class ResultSet:
|
|
|
1246
1327
|
|
|
1247
1328
|
An empty sequence is returned when no more rows are available.
|
|
1248
1329
|
"""
|
|
1249
|
-
|
|
1330
|
+
if isinstance(self.results, ColumnQueue):
|
|
1331
|
+
return self._convert_columnar_table(self.fetchmany_columnar(size))
|
|
1332
|
+
else:
|
|
1333
|
+
return self._convert_arrow_table(self.fetchmany_arrow(size))
|
|
1250
1334
|
|
|
1251
1335
|
def close(self) -> None:
|
|
1252
1336
|
"""
|
|
@@ -7,7 +7,10 @@ import uuid
|
|
|
7
7
|
import threading
|
|
8
8
|
from typing import List, Union
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
try:
|
|
11
|
+
import pyarrow
|
|
12
|
+
except ImportError:
|
|
13
|
+
pyarrow = None
|
|
11
14
|
import thrift.transport.THttpClient
|
|
12
15
|
import thrift.protocol.TBinaryProtocol
|
|
13
16
|
import thrift.transport.TSocket
|
|
@@ -726,12 +729,17 @@ class ThriftBackend:
|
|
|
726
729
|
description = self._hive_schema_to_description(
|
|
727
730
|
t_result_set_metadata_resp.schema
|
|
728
731
|
)
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
732
|
+
|
|
733
|
+
if pyarrow:
|
|
734
|
+
schema_bytes = (
|
|
735
|
+
t_result_set_metadata_resp.arrowSchema
|
|
736
|
+
or self._hive_schema_to_arrow_schema(t_result_set_metadata_resp.schema)
|
|
737
|
+
.serialize()
|
|
738
|
+
.to_pybytes()
|
|
739
|
+
)
|
|
740
|
+
else:
|
|
741
|
+
schema_bytes = None
|
|
742
|
+
|
|
735
743
|
lz4_compressed = t_result_set_metadata_resp.lz4Compressed
|
|
736
744
|
is_staging_operation = t_result_set_metadata_resp.isStagingOperation
|
|
737
745
|
if direct_results and direct_results.resultSet:
|
|
@@ -827,7 +835,7 @@ class ThriftBackend:
|
|
|
827
835
|
getDirectResults=ttypes.TSparkGetDirectResults(
|
|
828
836
|
maxRows=max_rows, maxBytes=max_bytes
|
|
829
837
|
),
|
|
830
|
-
canReadArrowResult=True,
|
|
838
|
+
canReadArrowResult=True if pyarrow else False,
|
|
831
839
|
canDecompressLZ4Result=lz4_compression,
|
|
832
840
|
canDownloadResult=use_cloud_fetch,
|
|
833
841
|
confOverlay={
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/utils.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import pytz
|
|
3
4
|
import datetime
|
|
4
5
|
import decimal
|
|
5
6
|
from abc import ABC, abstractmethod
|
|
@@ -11,7 +12,11 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
11
12
|
import re
|
|
12
13
|
|
|
13
14
|
import lz4.frame
|
|
14
|
-
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
import pyarrow
|
|
18
|
+
except ImportError:
|
|
19
|
+
pyarrow = None
|
|
15
20
|
|
|
16
21
|
from databricks.sql import OperationalError, exc
|
|
17
22
|
from databricks.sql.cloudfetch.download_manager import ResultFileDownloadManager
|
|
@@ -27,17 +32,18 @@ from databricks.sql.parameters.native import ParameterStructure, TDbsqlParameter
|
|
|
27
32
|
import logging
|
|
28
33
|
|
|
29
34
|
BIT_MASKS = [1, 2, 4, 8, 16, 32, 64, 128]
|
|
35
|
+
DEFAULT_ERROR_CONTEXT = "Unknown error"
|
|
30
36
|
|
|
31
37
|
logger = logging.getLogger(__name__)
|
|
32
38
|
|
|
33
39
|
|
|
34
40
|
class ResultSetQueue(ABC):
|
|
35
41
|
@abstractmethod
|
|
36
|
-
def next_n_rows(self, num_rows: int)
|
|
42
|
+
def next_n_rows(self, num_rows: int):
|
|
37
43
|
pass
|
|
38
44
|
|
|
39
45
|
@abstractmethod
|
|
40
|
-
def remaining_rows(self)
|
|
46
|
+
def remaining_rows(self):
|
|
41
47
|
pass
|
|
42
48
|
|
|
43
49
|
|
|
@@ -76,13 +82,15 @@ class ResultSetQueueFactory(ABC):
|
|
|
76
82
|
)
|
|
77
83
|
return ArrowQueue(converted_arrow_table, n_valid_rows)
|
|
78
84
|
elif row_set_type == TSparkRowSetType.COLUMN_BASED_SET:
|
|
79
|
-
|
|
85
|
+
column_table, column_names = convert_column_based_set_to_column_table(
|
|
80
86
|
t_row_set.columns, description
|
|
81
87
|
)
|
|
82
|
-
|
|
83
|
-
|
|
88
|
+
|
|
89
|
+
converted_column_table = convert_to_assigned_datatypes_in_column_table(
|
|
90
|
+
column_table, description
|
|
84
91
|
)
|
|
85
|
-
|
|
92
|
+
|
|
93
|
+
return ColumnQueue(ColumnTable(converted_column_table, column_names))
|
|
86
94
|
elif row_set_type == TSparkRowSetType.URL_BASED_SET:
|
|
87
95
|
return CloudFetchQueue(
|
|
88
96
|
schema_bytes=arrow_schema_bytes,
|
|
@@ -97,10 +105,63 @@ class ResultSetQueueFactory(ABC):
|
|
|
97
105
|
raise AssertionError("Row set type is not valid")
|
|
98
106
|
|
|
99
107
|
|
|
108
|
+
class ColumnTable:
|
|
109
|
+
def __init__(self, column_table, column_names):
|
|
110
|
+
self.column_table = column_table
|
|
111
|
+
self.column_names = column_names
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def num_rows(self):
|
|
115
|
+
if len(self.column_table) == 0:
|
|
116
|
+
return 0
|
|
117
|
+
else:
|
|
118
|
+
return len(self.column_table[0])
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def num_columns(self):
|
|
122
|
+
return len(self.column_names)
|
|
123
|
+
|
|
124
|
+
def get_item(self, col_index, row_index):
|
|
125
|
+
return self.column_table[col_index][row_index]
|
|
126
|
+
|
|
127
|
+
def slice(self, curr_index, length):
|
|
128
|
+
sliced_column_table = [
|
|
129
|
+
column[curr_index : curr_index + length] for column in self.column_table
|
|
130
|
+
]
|
|
131
|
+
return ColumnTable(sliced_column_table, self.column_names)
|
|
132
|
+
|
|
133
|
+
def __eq__(self, other):
|
|
134
|
+
return (
|
|
135
|
+
self.column_table == other.column_table
|
|
136
|
+
and self.column_names == other.column_names
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class ColumnQueue(ResultSetQueue):
|
|
141
|
+
def __init__(self, column_table: ColumnTable):
|
|
142
|
+
self.column_table = column_table
|
|
143
|
+
self.cur_row_index = 0
|
|
144
|
+
self.n_valid_rows = column_table.num_rows
|
|
145
|
+
|
|
146
|
+
def next_n_rows(self, num_rows):
|
|
147
|
+
length = min(num_rows, self.n_valid_rows - self.cur_row_index)
|
|
148
|
+
|
|
149
|
+
slice = self.column_table.slice(self.cur_row_index, length)
|
|
150
|
+
self.cur_row_index += slice.num_rows
|
|
151
|
+
return slice
|
|
152
|
+
|
|
153
|
+
def remaining_rows(self):
|
|
154
|
+
slice = self.column_table.slice(
|
|
155
|
+
self.cur_row_index, self.n_valid_rows - self.cur_row_index
|
|
156
|
+
)
|
|
157
|
+
self.cur_row_index += slice.num_rows
|
|
158
|
+
return slice
|
|
159
|
+
|
|
160
|
+
|
|
100
161
|
class ArrowQueue(ResultSetQueue):
|
|
101
162
|
def __init__(
|
|
102
163
|
self,
|
|
103
|
-
arrow_table: pyarrow.Table,
|
|
164
|
+
arrow_table: "pyarrow.Table",
|
|
104
165
|
n_valid_rows: int,
|
|
105
166
|
start_row_index: int = 0,
|
|
106
167
|
):
|
|
@@ -115,7 +176,7 @@ class ArrowQueue(ResultSetQueue):
|
|
|
115
176
|
self.arrow_table = arrow_table
|
|
116
177
|
self.n_valid_rows = n_valid_rows
|
|
117
178
|
|
|
118
|
-
def next_n_rows(self, num_rows: int) -> pyarrow.Table:
|
|
179
|
+
def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
|
|
119
180
|
"""Get upto the next n rows of the Arrow dataframe"""
|
|
120
181
|
length = min(num_rows, self.n_valid_rows - self.cur_row_index)
|
|
121
182
|
# Note that the table.slice API is not the same as Python's slice
|
|
@@ -124,7 +185,7 @@ class ArrowQueue(ResultSetQueue):
|
|
|
124
185
|
self.cur_row_index += slice.num_rows
|
|
125
186
|
return slice
|
|
126
187
|
|
|
127
|
-
def remaining_rows(self) -> pyarrow.Table:
|
|
188
|
+
def remaining_rows(self) -> "pyarrow.Table":
|
|
128
189
|
slice = self.arrow_table.slice(
|
|
129
190
|
self.cur_row_index, self.n_valid_rows - self.cur_row_index
|
|
130
191
|
)
|
|
@@ -184,7 +245,7 @@ class CloudFetchQueue(ResultSetQueue):
|
|
|
184
245
|
self.table = self._create_next_table()
|
|
185
246
|
self.table_row_index = 0
|
|
186
247
|
|
|
187
|
-
def next_n_rows(self, num_rows: int) -> pyarrow.Table:
|
|
248
|
+
def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
|
|
188
249
|
"""
|
|
189
250
|
Get up to the next n rows of the cloud fetch Arrow dataframes.
|
|
190
251
|
|
|
@@ -216,7 +277,7 @@ class CloudFetchQueue(ResultSetQueue):
|
|
|
216
277
|
logger.debug("CloudFetchQueue: collected {} next rows".format(results.num_rows))
|
|
217
278
|
return results
|
|
218
279
|
|
|
219
|
-
def remaining_rows(self) -> pyarrow.Table:
|
|
280
|
+
def remaining_rows(self) -> "pyarrow.Table":
|
|
220
281
|
"""
|
|
221
282
|
Get all remaining rows of the cloud fetch Arrow dataframes.
|
|
222
283
|
|
|
@@ -237,7 +298,7 @@ class CloudFetchQueue(ResultSetQueue):
|
|
|
237
298
|
self.table_row_index = 0
|
|
238
299
|
return results
|
|
239
300
|
|
|
240
|
-
def _create_next_table(self) -> Union[pyarrow.Table, None]:
|
|
301
|
+
def _create_next_table(self) -> Union["pyarrow.Table", None]:
|
|
241
302
|
logger.debug(
|
|
242
303
|
"CloudFetchQueue: Trying to get downloaded file for row {}".format(
|
|
243
304
|
self.start_row_index
|
|
@@ -276,7 +337,7 @@ class CloudFetchQueue(ResultSetQueue):
|
|
|
276
337
|
|
|
277
338
|
return arrow_table
|
|
278
339
|
|
|
279
|
-
def _create_empty_table(self) -> pyarrow.Table:
|
|
340
|
+
def _create_empty_table(self) -> "pyarrow.Table":
|
|
280
341
|
# Create a 0-row table with just the schema bytes
|
|
281
342
|
return create_arrow_table_from_arrow_file(self.schema_bytes, self.description)
|
|
282
343
|
|
|
@@ -357,7 +418,12 @@ class RequestErrorInfo(
|
|
|
357
418
|
user_friendly_error_message = "{}: {}".format(
|
|
358
419
|
user_friendly_error_message, self.error_message
|
|
359
420
|
)
|
|
360
|
-
|
|
421
|
+
try:
|
|
422
|
+
error_context = str(self.error)
|
|
423
|
+
except:
|
|
424
|
+
error_context = DEFAULT_ERROR_CONTEXT
|
|
425
|
+
|
|
426
|
+
return user_friendly_error_message + ". " + error_context
|
|
361
427
|
|
|
362
428
|
|
|
363
429
|
# Taken from PyHive
|
|
@@ -515,7 +581,9 @@ def transform_paramstyle(
|
|
|
515
581
|
return output
|
|
516
582
|
|
|
517
583
|
|
|
518
|
-
def create_arrow_table_from_arrow_file(
|
|
584
|
+
def create_arrow_table_from_arrow_file(
|
|
585
|
+
file_bytes: bytes, description
|
|
586
|
+
) -> "pyarrow.Table":
|
|
519
587
|
arrow_table = convert_arrow_based_file_to_arrow_table(file_bytes)
|
|
520
588
|
return convert_decimals_in_arrow_table(arrow_table, description)
|
|
521
589
|
|
|
@@ -542,7 +610,7 @@ def convert_arrow_based_set_to_arrow_table(arrow_batches, lz4_compressed, schema
|
|
|
542
610
|
return arrow_table, n_rows
|
|
543
611
|
|
|
544
612
|
|
|
545
|
-
def convert_decimals_in_arrow_table(table, description) -> pyarrow.Table:
|
|
613
|
+
def convert_decimals_in_arrow_table(table, description) -> "pyarrow.Table":
|
|
546
614
|
for i, col in enumerate(table.itercolumns()):
|
|
547
615
|
if description[i][1] == "decimal":
|
|
548
616
|
decimal_col = col.to_pandas().apply(
|
|
@@ -560,6 +628,37 @@ def convert_decimals_in_arrow_table(table, description) -> pyarrow.Table:
|
|
|
560
628
|
return table
|
|
561
629
|
|
|
562
630
|
|
|
631
|
+
def convert_to_assigned_datatypes_in_column_table(column_table, description):
|
|
632
|
+
|
|
633
|
+
converted_column_table = []
|
|
634
|
+
for i, col in enumerate(column_table):
|
|
635
|
+
if description[i][1] == "decimal":
|
|
636
|
+
converted_column_table.append(
|
|
637
|
+
tuple(v if v is None else Decimal(v) for v in col)
|
|
638
|
+
)
|
|
639
|
+
elif description[i][1] == "date":
|
|
640
|
+
converted_column_table.append(
|
|
641
|
+
tuple(v if v is None else datetime.date.fromisoformat(v) for v in col)
|
|
642
|
+
)
|
|
643
|
+
elif description[i][1] == "timestamp":
|
|
644
|
+
converted_column_table.append(
|
|
645
|
+
tuple(
|
|
646
|
+
(
|
|
647
|
+
v
|
|
648
|
+
if v is None
|
|
649
|
+
else datetime.datetime.strptime(
|
|
650
|
+
v, "%Y-%m-%d %H:%M:%S.%f"
|
|
651
|
+
).replace(tzinfo=pytz.UTC)
|
|
652
|
+
)
|
|
653
|
+
for v in col
|
|
654
|
+
)
|
|
655
|
+
)
|
|
656
|
+
else:
|
|
657
|
+
converted_column_table.append(col)
|
|
658
|
+
|
|
659
|
+
return converted_column_table
|
|
660
|
+
|
|
661
|
+
|
|
563
662
|
def convert_column_based_set_to_arrow_table(columns, description):
|
|
564
663
|
arrow_table = pyarrow.Table.from_arrays(
|
|
565
664
|
[_convert_column_to_arrow_array(c) for c in columns],
|
|
@@ -571,6 +670,13 @@ def convert_column_based_set_to_arrow_table(columns, description):
|
|
|
571
670
|
return arrow_table, arrow_table.num_rows
|
|
572
671
|
|
|
573
672
|
|
|
673
|
+
def convert_column_based_set_to_column_table(columns, description):
|
|
674
|
+
column_names = [c[0] for c in description]
|
|
675
|
+
column_table = [_convert_column_to_list(c) for c in columns]
|
|
676
|
+
|
|
677
|
+
return column_table, column_names
|
|
678
|
+
|
|
679
|
+
|
|
574
680
|
def _convert_column_to_arrow_array(t_col):
|
|
575
681
|
"""
|
|
576
682
|
Return a pyarrow array from the values in a TColumn instance.
|
|
@@ -595,6 +701,26 @@ def _convert_column_to_arrow_array(t_col):
|
|
|
595
701
|
raise OperationalError("Empty TColumn instance {}".format(t_col))
|
|
596
702
|
|
|
597
703
|
|
|
704
|
+
def _convert_column_to_list(t_col):
|
|
705
|
+
SUPPORTED_FIELD_TYPES = (
|
|
706
|
+
"boolVal",
|
|
707
|
+
"byteVal",
|
|
708
|
+
"i16Val",
|
|
709
|
+
"i32Val",
|
|
710
|
+
"i64Val",
|
|
711
|
+
"doubleVal",
|
|
712
|
+
"stringVal",
|
|
713
|
+
"binaryVal",
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
for field in SUPPORTED_FIELD_TYPES:
|
|
717
|
+
wrapper = getattr(t_col, field)
|
|
718
|
+
if wrapper:
|
|
719
|
+
return _create_python_tuple(wrapper)
|
|
720
|
+
|
|
721
|
+
raise OperationalError("Empty TColumn instance {}".format(t_col))
|
|
722
|
+
|
|
723
|
+
|
|
598
724
|
def _create_arrow_array(t_col_value_wrapper, arrow_type):
|
|
599
725
|
result = t_col_value_wrapper.values
|
|
600
726
|
nulls = t_col_value_wrapper.nulls # bitfield describing which values are null
|
|
@@ -609,3 +735,19 @@ def _create_arrow_array(t_col_value_wrapper, arrow_type):
|
|
|
609
735
|
result[i] = None
|
|
610
736
|
|
|
611
737
|
return pyarrow.array(result, type=arrow_type)
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def _create_python_tuple(t_col_value_wrapper):
|
|
741
|
+
result = t_col_value_wrapper.values
|
|
742
|
+
nulls = t_col_value_wrapper.nulls # bitfield describing which values are null
|
|
743
|
+
assert isinstance(nulls, bytes)
|
|
744
|
+
|
|
745
|
+
# The number of bits in nulls can be both larger or smaller than the number of
|
|
746
|
+
# elements in result, so take the minimum of both to iterate over.
|
|
747
|
+
length = min(len(result), len(nulls) * 8)
|
|
748
|
+
|
|
749
|
+
for i in range(length):
|
|
750
|
+
if nulls[i >> 3] & BIT_MASKS[i & 0x7]:
|
|
751
|
+
result[i] = None
|
|
752
|
+
|
|
753
|
+
return tuple(result)
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/auth.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/oauth.py
RENAMED
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/auth/retry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sql/types.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/_ddl.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/base.py
RENAMED
|
File without changes
|
{databricks_sql_connector-3.4.0 → databricks_sql_connector-3.5.0}/src/databricks/sqlalchemy/py.typed
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|