databricks-sql-connector 3.5.0__tar.gz → 3.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/CHANGELOG.md +12 -0
  2. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/PKG-INFO +4 -5
  3. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/pyproject.toml +4 -4
  4. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/__init__.py +1 -1
  5. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/auth/retry.py +16 -10
  6. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/client.py +109 -0
  7. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/cloudfetch/downloader.py +1 -0
  8. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/thrift_backend.py +84 -8
  9. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/LICENSE +0 -0
  10. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/README.md +0 -0
  11. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/__init__.py +0 -0
  12. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/auth/__init__.py +0 -0
  13. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/auth/auth.py +0 -0
  14. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/auth/authenticators.py +0 -0
  15. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/auth/endpoint.py +0 -0
  16. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/auth/oauth.py +0 -0
  17. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/auth/oauth_http_handler.py +0 -0
  18. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/auth/thrift_http_client.py +0 -0
  19. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/cloudfetch/download_manager.py +0 -0
  20. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/exc.py +0 -0
  21. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/experimental/__init__.py +0 -0
  22. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/experimental/oauth_persistence.py +0 -0
  23. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/parameters/__init__.py +0 -0
  24. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/parameters/native.py +0 -0
  25. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/parameters/py.typed +0 -0
  26. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/py.typed +0 -0
  27. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService-remote +0 -0
  28. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/thrift_api/TCLIService/TCLIService.py +0 -0
  29. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/thrift_api/TCLIService/__init__.py +0 -0
  30. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/thrift_api/TCLIService/constants.py +0 -0
  31. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/thrift_api/TCLIService/ttypes.py +0 -0
  32. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/thrift_api/__init__.py +0 -0
  33. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/types.py +0 -0
  34. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sql/utils.py +0 -0
  35. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/README.sqlalchemy.md +0 -0
  36. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/README.tests.md +0 -0
  37. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/__init__.py +0 -0
  38. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/_ddl.py +0 -0
  39. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/_parse.py +0 -0
  40. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/_types.py +0 -0
  41. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/base.py +0 -0
  42. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/py.typed +0 -0
  43. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/requirements.py +0 -0
  44. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/setup.cfg +0 -0
  45. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test/_extra.py +0 -0
  46. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test/_future.py +0 -0
  47. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test/_regression.py +0 -0
  48. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test/_unsupported.py +0 -0
  49. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test/conftest.py +0 -0
  50. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test/overrides/_componentreflectiontest.py +0 -0
  51. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test/overrides/_ctetest.py +0 -0
  52. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test/test_suite.py +0 -0
  53. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test_local/__init__.py +0 -0
  54. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test_local/conftest.py +0 -0
  55. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test_local/e2e/MOCK_DATA.xlsx +0 -0
  56. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test_local/e2e/test_basic.py +0 -0
  57. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test_local/test_ddl.py +0 -0
  58. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test_local/test_parsing.py +0 -0
  59. {databricks_sql_connector-3.5.0 → databricks_sql_connector-3.7.0}/src/databricks/sqlalchemy/test_local/test_types.py +0 -0
@@ -1,5 +1,17 @@
1
1
  # Release History
2
2
 
3
+ # 3.7.0 (2024-12-23)
4
+
5
+ - Fix: Incorrect number of rows fetched in inline results when fetching results with FETCH_NEXT orientation (databricks/databricks-sql-python#479 by @jprakash-db)
6
+ - Updated the doc to specify native parameters are not supported in PUT operation (databricks/databricks-sql-python#477 by @jprakash-db)
7
+ - Relax `pyarrow` and `numpy` pin (databricks/databricks-sql-python#452 by @arredond)
8
+ - Feature: Support for async execute has been added (databricks/databricks-sql-python#463 by @jprakash-db)
9
+ - Updated the HTTP retry logic to be similar to the other Databricks drivers (databricks/databricks-sql-python#467 by @jprakash-db)
10
+
11
+ # 3.6.0 (2024-10-25)
12
+
13
+ - Support encryption headers in the cloud fetch request (https://github.com/databricks/databricks-sql-python/pull/460 by @jackyhu-db)
14
+
3
15
  # 3.5.0 (2024-10-18)
4
16
 
5
17
  - Create a non pyarrow flow to handle small results for the column set (databricks/databricks-sql-python#440 by @jprakash-db)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: databricks-sql-connector
3
- Version: 3.5.0
3
+ Version: 3.7.0
4
4
  Summary: Databricks SQL Connector for Python
5
5
  License: Apache-2.0
6
6
  Author: Databricks
@@ -13,17 +13,16 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
- Classifier: Programming Language :: Python :: 3.13
17
16
  Provides-Extra: alembic
18
17
  Provides-Extra: sqlalchemy
19
18
  Requires-Dist: alembic (>=1.0.11,<2.0.0) ; extra == "alembic"
20
19
  Requires-Dist: lz4 (>=4.0.2,<5.0.0)
21
- Requires-Dist: numpy (>=1.16.6,<2.0.0) ; python_version >= "3.8" and python_version < "3.11"
22
- Requires-Dist: numpy (>=1.23.4,<2.0.0) ; python_version >= "3.11"
20
+ Requires-Dist: numpy (>=1.16.6) ; python_version >= "3.8" and python_version < "3.11"
21
+ Requires-Dist: numpy (>=1.23.4) ; python_version >= "3.11"
23
22
  Requires-Dist: oauthlib (>=3.1.0,<4.0.0)
24
23
  Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
25
24
  Requires-Dist: pandas (>=1.2.5,<2.3.0) ; python_version >= "3.8"
26
- Requires-Dist: pyarrow (>=14.0.1,<17)
25
+ Requires-Dist: pyarrow (>=14.0.1)
27
26
  Requires-Dist: requests (>=2.18.1,<3.0.0)
28
27
  Requires-Dist: sqlalchemy (>=2.0.21) ; extra == "sqlalchemy" or extra == "alembic"
29
28
  Requires-Dist: thrift (>=0.16.0,<0.21.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "databricks-sql-connector"
3
- version = "3.5.0"
3
+ version = "3.7.0"
4
4
  description = "Databricks SQL Connector for Python"
5
5
  authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
6
6
  license = "Apache-2.0"
@@ -14,14 +14,14 @@ thrift = ">=0.16.0,<0.21.0"
14
14
  pandas = [
15
15
  { version = ">=1.2.5,<2.3.0", python = ">=3.8" }
16
16
  ]
17
- pyarrow = ">=14.0.1,<17"
17
+ pyarrow = ">=14.0.1"
18
18
 
19
19
  lz4 = "^4.0.2"
20
20
  requests = "^2.18.1"
21
21
  oauthlib = "^3.1.0"
22
22
  numpy = [
23
- { version = "^1.16.6", python = ">=3.8,<3.11" },
24
- { version = "^1.23.4", python = ">=3.11" },
23
+ { version = ">=1.16.6", python = ">=3.8,<3.11" },
24
+ { version = ">=1.23.4", python = ">=3.11" },
25
25
  ]
26
26
  sqlalchemy = { version = ">=2.0.21", optional = true }
27
27
  openpyxl = "^3.0.10"
@@ -68,7 +68,7 @@ DATETIME = DBAPITypeObject("timestamp")
68
68
  DATE = DBAPITypeObject("date")
69
69
  ROWID = DBAPITypeObject()
70
70
 
71
- __version__ = "3.5.0"
71
+ __version__ = "3.7.0"
72
72
  USER_AGENT_NAME = "PyDatabricksSqlConnector"
73
73
 
74
74
  # These two functions are pyhive legacy
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import random
2
3
  import time
3
4
  import typing
4
5
  from enum import Enum
@@ -285,25 +286,30 @@ class DatabricksRetryPolicy(Retry):
285
286
  """
286
287
  retry_after = self.get_retry_after(response)
287
288
  if retry_after:
288
- backoff = self.get_backoff_time()
289
- proposed_wait = max(backoff, retry_after)
290
- self.check_proposed_wait(proposed_wait)
291
- time.sleep(proposed_wait)
292
- return True
289
+ proposed_wait = retry_after
290
+ else:
291
+ proposed_wait = self.get_backoff_time()
293
292
 
294
- return False
293
+ proposed_wait = min(proposed_wait, self.delay_max)
294
+ self.check_proposed_wait(proposed_wait)
295
+ time.sleep(proposed_wait)
296
+ return True
295
297
 
296
298
  def get_backoff_time(self) -> float:
297
- """Calls urllib3's built-in get_backoff_time.
299
+ """
300
+ This method implements the exponential backoff algorithm to calculate the delay between retries.
298
301
 
299
302
  Never returns a value larger than self.delay_max
300
303
  A MaxRetryDurationError will be raised if the calculated backoff would exceed self.max_attempts_duration
301
304
 
302
- Note: within urllib3, a backoff is only calculated in cases where a Retry-After header is not present
303
- in the previous unsuccessful request and `self.respect_retry_after_header` is True (which is always true)
305
+ :return:
304
306
  """
305
307
 
306
- proposed_backoff = super().get_backoff_time()
308
+ current_attempt = self.stop_after_attempts_count - int(self.total or 0)
309
+ proposed_backoff = (2**current_attempt) * self.delay_min
310
+ if self.backoff_jitter != 0.0:
311
+ proposed_backoff += random.random() * self.backoff_jitter
312
+
307
313
  proposed_backoff = min(proposed_backoff, self.delay_max)
308
314
  self.check_proposed_wait(proposed_backoff)
309
315
 
@@ -1,3 +1,4 @@
1
+ import time
1
2
  from typing import Dict, Tuple, List, Optional, Any, Union, Sequence
2
3
 
3
4
  import pandas
@@ -47,6 +48,7 @@ from databricks.sql.experimental.oauth_persistence import OAuthPersistence
47
48
 
48
49
  from databricks.sql.thrift_api.TCLIService.ttypes import (
49
50
  TSparkParameter,
51
+ TOperationState,
50
52
  )
51
53
 
52
54
 
@@ -430,6 +432,8 @@ class Cursor:
430
432
  self.escaper = ParamEscaper()
431
433
  self.lastrowid = None
432
434
 
435
+ self.ASYNC_DEFAULT_POLLING_INTERVAL = 2
436
+
433
437
  # The ideal return type for this method is perhaps Self, but that was not added until 3.11, and we support pre-3.11 pythons, currently.
434
438
  def __enter__(self) -> "Cursor":
435
439
  return self
@@ -796,6 +800,7 @@ class Cursor:
796
800
  cursor=self,
797
801
  use_cloud_fetch=self.connection.use_cloud_fetch,
798
802
  parameters=prepared_params,
803
+ async_op=False,
799
804
  )
800
805
  self.active_result_set = ResultSet(
801
806
  self.connection,
@@ -803,6 +808,7 @@ class Cursor:
803
808
  self.thrift_backend,
804
809
  self.buffer_size_bytes,
805
810
  self.arraysize,
811
+ self.connection.use_cloud_fetch,
806
812
  )
807
813
 
808
814
  if execute_response.is_staging_operation:
@@ -812,6 +818,106 @@ class Cursor:
812
818
 
813
819
  return self
814
820
 
821
+ def execute_async(
822
+ self,
823
+ operation: str,
824
+ parameters: Optional[TParameterCollection] = None,
825
+ ) -> "Cursor":
826
+ """
827
+
828
+ Execute a query and do not wait for it to complete and just move ahead
829
+
830
+ :param operation:
831
+ :param parameters:
832
+ :return:
833
+ """
834
+ param_approach = self._determine_parameter_approach(parameters)
835
+ if param_approach == ParameterApproach.NONE:
836
+ prepared_params = NO_NATIVE_PARAMS
837
+ prepared_operation = operation
838
+
839
+ elif param_approach == ParameterApproach.INLINE:
840
+ prepared_operation, prepared_params = self._prepare_inline_parameters(
841
+ operation, parameters
842
+ )
843
+ elif param_approach == ParameterApproach.NATIVE:
844
+ normalized_parameters = self._normalize_tparametercollection(parameters)
845
+ param_structure = self._determine_parameter_structure(normalized_parameters)
846
+ transformed_operation = transform_paramstyle(
847
+ operation, normalized_parameters, param_structure
848
+ )
849
+ prepared_operation, prepared_params = self._prepare_native_parameters(
850
+ transformed_operation, normalized_parameters, param_structure
851
+ )
852
+
853
+ self._check_not_closed()
854
+ self._close_and_clear_active_result_set()
855
+ self.thrift_backend.execute_command(
856
+ operation=prepared_operation,
857
+ session_handle=self.connection._session_handle,
858
+ max_rows=self.arraysize,
859
+ max_bytes=self.buffer_size_bytes,
860
+ lz4_compression=self.connection.lz4_compression,
861
+ cursor=self,
862
+ use_cloud_fetch=self.connection.use_cloud_fetch,
863
+ parameters=prepared_params,
864
+ async_op=True,
865
+ )
866
+
867
+ return self
868
+
869
+ def get_query_state(self) -> "TOperationState":
870
+ """
871
+ Get the state of the async executing query or basically poll the status of the query
872
+
873
+ :return:
874
+ """
875
+ self._check_not_closed()
876
+ return self.thrift_backend.get_query_state(self.active_op_handle)
877
+
878
+ def get_async_execution_result(self):
879
+ """
880
+
881
+ Checks for the status of the async executing query and fetches the result if the query is finished
882
+ Otherwise it will keep polling the status of the query till there is a Not pending state
883
+ :return:
884
+ """
885
+ self._check_not_closed()
886
+
887
+ def is_executing(operation_state) -> "bool":
888
+ return not operation_state or operation_state in [
889
+ ttypes.TOperationState.RUNNING_STATE,
890
+ ttypes.TOperationState.PENDING_STATE,
891
+ ]
892
+
893
+ while is_executing(self.get_query_state()):
894
+ # Poll after some default time
895
+ time.sleep(self.ASYNC_DEFAULT_POLLING_INTERVAL)
896
+
897
+ operation_state = self.get_query_state()
898
+ if operation_state == ttypes.TOperationState.FINISHED_STATE:
899
+ execute_response = self.thrift_backend.get_execution_result(
900
+ self.active_op_handle, self
901
+ )
902
+ self.active_result_set = ResultSet(
903
+ self.connection,
904
+ execute_response,
905
+ self.thrift_backend,
906
+ self.buffer_size_bytes,
907
+ self.arraysize,
908
+ )
909
+
910
+ if execute_response.is_staging_operation:
911
+ self._handle_staging_operation(
912
+ staging_allowed_local_path=self.thrift_backend.staging_allowed_local_path
913
+ )
914
+
915
+ return self
916
+ else:
917
+ raise Error(
918
+ f"get_execution_result failed with Operation status {operation_state}"
919
+ )
920
+
815
921
  def executemany(self, operation, seq_of_parameters):
816
922
  """
817
923
  Execute the operation once for every set of passed in parameters.
@@ -1097,6 +1203,7 @@ class ResultSet:
1097
1203
  thrift_backend: ThriftBackend,
1098
1204
  result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
1099
1205
  arraysize: int = 10000,
1206
+ use_cloud_fetch: bool = True,
1100
1207
  ):
1101
1208
  """
1102
1209
  A ResultSet manages the results of a single command.
@@ -1118,6 +1225,7 @@ class ResultSet:
1118
1225
  self.description = execute_response.description
1119
1226
  self._arrow_schema_bytes = execute_response.arrow_schema_bytes
1120
1227
  self._next_row_index = 0
1228
+ self._use_cloud_fetch = use_cloud_fetch
1121
1229
 
1122
1230
  if execute_response.arrow_queue:
1123
1231
  # In this case the server has taken the fast path and returned an initial batch of
@@ -1145,6 +1253,7 @@ class ResultSet:
1145
1253
  lz4_compressed=self.lz4_compressed,
1146
1254
  arrow_schema_bytes=self._arrow_schema_bytes,
1147
1255
  description=self.description,
1256
+ use_cloud_fetch=self._use_cloud_fetch,
1148
1257
  )
1149
1258
  self.results = results
1150
1259
  self.has_more_rows = has_more_rows
@@ -100,6 +100,7 @@ class ResultSetDownloadHandler:
100
100
  self.link.fileLink,
101
101
  timeout=self.settings.download_timeout,
102
102
  verify=self._ssl_options.tls_verify,
103
+ headers=self.link.httpHeaders
103
104
  # TODO: Pass cert from `self._ssl_options`
104
105
  )
105
106
  response.raise_for_status()
@@ -7,6 +7,8 @@ import uuid
7
7
  import threading
8
8
  from typing import List, Union
9
9
 
10
+ from databricks.sql.thrift_api.TCLIService.ttypes import TOperationState
11
+
10
12
  try:
11
13
  import pyarrow
12
14
  except ImportError:
@@ -64,8 +66,8 @@ DEFAULT_SOCKET_TIMEOUT = float(900)
64
66
  # - 900s attempts-duration lines up w ODBC/JDBC drivers (for cluster startup > 10 mins)
65
67
  _retry_policy = { # (type, default, min, max)
66
68
  "_retry_delay_min": (float, 1, 0.1, 60),
67
- "_retry_delay_max": (float, 60, 5, 3600),
68
- "_retry_stop_after_attempts_count": (int, 30, 1, 60),
69
+ "_retry_delay_max": (float, 30, 5, 3600),
70
+ "_retry_stop_after_attempts_count": (int, 5, 1, 60),
69
71
  "_retry_stop_after_attempts_duration": (float, 900, 1, 86400),
70
72
  "_retry_delay_default": (float, 5, 1, 60),
71
73
  }
@@ -319,7 +321,7 @@ class ThriftBackend:
319
321
 
320
322
  # FUTURE: Consider moving to https://github.com/litl/backoff or
321
323
  # https://github.com/jd/tenacity for retry logic.
322
- def make_request(self, method, request):
324
+ def make_request(self, method, request, retryable=True):
323
325
  """Execute given request, attempting retries when
324
326
  1. Receiving HTTP 429/503 from server
325
327
  2. OSError is raised during a GetOperationStatus
@@ -458,7 +460,7 @@ class ThriftBackend:
458
460
  # return on success
459
461
  # if available: bounded delay and retry
460
462
  # if not: raise error
461
- max_attempts = self._retry_stop_after_attempts_count
463
+ max_attempts = self._retry_stop_after_attempts_count if retryable else 1
462
464
 
463
465
  # use index-1 counting for logging/human consistency
464
466
  for attempt in range(1, max_attempts + 1):
@@ -769,6 +771,63 @@ class ThriftBackend:
769
771
  arrow_schema_bytes=schema_bytes,
770
772
  )
771
773
 
774
+ def get_execution_result(self, op_handle, cursor):
775
+
776
+ assert op_handle is not None
777
+
778
+ req = ttypes.TFetchResultsReq(
779
+ operationHandle=ttypes.TOperationHandle(
780
+ op_handle.operationId,
781
+ op_handle.operationType,
782
+ False,
783
+ op_handle.modifiedRowCount,
784
+ ),
785
+ maxRows=cursor.arraysize,
786
+ maxBytes=cursor.buffer_size_bytes,
787
+ orientation=ttypes.TFetchOrientation.FETCH_NEXT,
788
+ includeResultSetMetadata=True,
789
+ )
790
+
791
+ resp = self.make_request(self._client.FetchResults, req)
792
+
793
+ t_result_set_metadata_resp = resp.resultSetMetadata
794
+
795
+ lz4_compressed = t_result_set_metadata_resp.lz4Compressed
796
+ is_staging_operation = t_result_set_metadata_resp.isStagingOperation
797
+ has_more_rows = resp.hasMoreRows
798
+ description = self._hive_schema_to_description(
799
+ t_result_set_metadata_resp.schema
800
+ )
801
+
802
+ schema_bytes = (
803
+ t_result_set_metadata_resp.arrowSchema
804
+ or self._hive_schema_to_arrow_schema(t_result_set_metadata_resp.schema)
805
+ .serialize()
806
+ .to_pybytes()
807
+ )
808
+
809
+ queue = ResultSetQueueFactory.build_queue(
810
+ row_set_type=resp.resultSetMetadata.resultFormat,
811
+ t_row_set=resp.results,
812
+ arrow_schema_bytes=schema_bytes,
813
+ max_download_threads=self.max_download_threads,
814
+ lz4_compressed=lz4_compressed,
815
+ description=description,
816
+ ssl_options=self._ssl_options,
817
+ )
818
+
819
+ return ExecuteResponse(
820
+ arrow_queue=queue,
821
+ status=resp.status,
822
+ has_been_closed_server_side=False,
823
+ has_more_rows=has_more_rows,
824
+ lz4_compressed=lz4_compressed,
825
+ is_staging_operation=is_staging_operation,
826
+ command_handle=op_handle,
827
+ description=description,
828
+ arrow_schema_bytes=schema_bytes,
829
+ )
830
+
772
831
  def _wait_until_command_done(self, op_handle, initial_operation_status_resp):
773
832
  if initial_operation_status_resp:
774
833
  self._check_command_not_in_error_or_closed_state(
@@ -787,6 +846,12 @@ class ThriftBackend:
787
846
  self._check_command_not_in_error_or_closed_state(op_handle, poll_resp)
788
847
  return operation_state
789
848
 
849
+ def get_query_state(self, op_handle) -> "TOperationState":
850
+ poll_resp = self._poll_for_status(op_handle)
851
+ operation_state = poll_resp.operationState
852
+ self._check_command_not_in_error_or_closed_state(op_handle, poll_resp)
853
+ return operation_state
854
+
790
855
  @staticmethod
791
856
  def _check_direct_results_for_error(t_spark_direct_results):
792
857
  if t_spark_direct_results:
@@ -817,6 +882,7 @@ class ThriftBackend:
817
882
  cursor,
818
883
  use_cloud_fetch=True,
819
884
  parameters=[],
885
+ async_op=False,
820
886
  ):
821
887
  assert session_handle is not None
822
888
 
@@ -846,7 +912,11 @@ class ThriftBackend:
846
912
  parameters=parameters,
847
913
  )
848
914
  resp = self.make_request(self._client.ExecuteStatement, req)
849
- return self._handle_execute_response(resp, cursor)
915
+
916
+ if async_op:
917
+ self._handle_execute_response_async(resp, cursor)
918
+ else:
919
+ return self._handle_execute_response(resp, cursor)
850
920
 
851
921
  def get_catalogs(self, session_handle, max_rows, max_bytes, cursor):
852
922
  assert session_handle is not None
@@ -945,6 +1015,10 @@ class ThriftBackend:
945
1015
 
946
1016
  return self._results_message_to_execute_response(resp, final_operation_state)
947
1017
 
1018
+ def _handle_execute_response_async(self, resp, cursor):
1019
+ cursor.active_op_handle = resp.operationHandle
1020
+ self._check_direct_results_for_error(resp.directResults)
1021
+
948
1022
  def fetch_results(
949
1023
  self,
950
1024
  op_handle,
@@ -954,6 +1028,7 @@ class ThriftBackend:
954
1028
  lz4_compressed,
955
1029
  arrow_schema_bytes,
956
1030
  description,
1031
+ use_cloud_fetch=True,
957
1032
  ):
958
1033
  assert op_handle is not None
959
1034
 
@@ -970,10 +1045,11 @@ class ThriftBackend:
970
1045
  includeResultSetMetadata=True,
971
1046
  )
972
1047
 
973
- resp = self.make_request(self._client.FetchResults, req)
1048
+ # Fetch results in Inline mode with FETCH_NEXT orientation are not idempotent and hence not retried
1049
+ resp = self.make_request(self._client.FetchResults, req, use_cloud_fetch)
974
1050
  if resp.results.startRowOffset > expected_row_start_offset:
975
- logger.warning(
976
- "Expected results to start from {} but they instead start at {}".format(
1051
+ raise DataError(
1052
+ "fetch_results failed due to inconsistency in the state between the client and the server. Expected results to start from {} but they instead start at {}, some result batches must have been skipped".format(
977
1053
  expected_row_start_offset, resp.results.startRowOffset
978
1054
  )
979
1055
  )