dataproc-spark-connect 1.0.0rc1__tar.gz → 1.0.0rc3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/PKG-INFO +1 -1
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/dataproc_spark_connect.egg-info/PKG-INFO +1 -1
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/google/cloud/dataproc_spark_connect/environment.py +13 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/google/cloud/dataproc_spark_connect/session.py +131 -4
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/setup.py +1 -1
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/LICENSE +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/README.md +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/dataproc_spark_connect.egg-info/SOURCES.txt +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/dataproc_spark_connect.egg-info/dependency_links.txt +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/dataproc_spark_connect.egg-info/requires.txt +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/dataproc_spark_connect.egg-info/top_level.txt +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/google/cloud/dataproc_spark_connect/__init__.py +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/google/cloud/dataproc_spark_connect/client/__init__.py +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/google/cloud/dataproc_spark_connect/client/core.py +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/google/cloud/dataproc_spark_connect/client/proxy.py +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/google/cloud/dataproc_spark_connect/exceptions.py +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/google/cloud/dataproc_spark_connect/pypi_artifacts.py +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/pyproject.toml +0 -0
- {dataproc_spark_connect-1.0.0rc1 → dataproc_spark_connect-1.0.0rc3}/setup.cfg +0 -0
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
|
+
import sys
|
|
16
17
|
from typing import Callable, Tuple, List
|
|
17
18
|
|
|
18
19
|
|
|
@@ -46,6 +47,18 @@ def is_jetbrains_ide() -> bool:
|
|
|
46
47
|
return "jetbrains" in os.getenv("TERMINAL_EMULATOR", "").lower()
|
|
47
48
|
|
|
48
49
|
|
|
50
|
+
def is_interactive():
|
|
51
|
+
return hasattr(sys, "ps1")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def is_terminal():
|
|
55
|
+
return sys.stdin.isatty()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def is_interactive_terminal():
|
|
59
|
+
return is_interactive() and is_terminal()
|
|
60
|
+
|
|
61
|
+
|
|
49
62
|
def get_client_environment_label() -> str:
|
|
50
63
|
"""
|
|
51
64
|
Map current environment to a standardized client label.
|
|
@@ -24,8 +24,11 @@ import threading
|
|
|
24
24
|
import time
|
|
25
25
|
import uuid
|
|
26
26
|
import tqdm
|
|
27
|
+
from packaging import version
|
|
28
|
+
from tqdm import tqdm as cli_tqdm
|
|
29
|
+
from tqdm.notebook import tqdm as notebook_tqdm
|
|
27
30
|
from types import MethodType
|
|
28
|
-
from typing import Any, cast, ClassVar, Dict, Optional, Union
|
|
31
|
+
from typing import Any, cast, ClassVar, Dict, Iterable, Optional, Union
|
|
29
32
|
|
|
30
33
|
from google.api_core import retry
|
|
31
34
|
from google.api_core.client_options import ClientOptions
|
|
@@ -104,12 +107,14 @@ class DataprocSparkSession(SparkSession):
|
|
|
104
107
|
"""
|
|
105
108
|
|
|
106
109
|
_DEFAULT_RUNTIME_VERSION = "3.0"
|
|
110
|
+
_MIN_RUNTIME_VERSION = "3.0"
|
|
107
111
|
|
|
108
112
|
_active_s8s_session_uuid: ClassVar[Optional[str]] = None
|
|
109
113
|
_project_id = None
|
|
110
114
|
_region = None
|
|
111
115
|
_client_options = None
|
|
112
116
|
_active_s8s_session_id: ClassVar[Optional[str]] = None
|
|
117
|
+
_execution_progress_bar = dict()
|
|
113
118
|
|
|
114
119
|
class Builder(SparkSession.Builder):
|
|
115
120
|
|
|
@@ -247,7 +252,11 @@ class DataprocSparkSession(SparkSession):
|
|
|
247
252
|
assert self._channel_builder is not None
|
|
248
253
|
session = DataprocSparkSession(connection=self._channel_builder)
|
|
249
254
|
|
|
255
|
+
# Register handler for Cell Execution Progress bar
|
|
256
|
+
session._register_progress_execution_handler()
|
|
257
|
+
|
|
250
258
|
DataprocSparkSession._set_default_and_active_session(session)
|
|
259
|
+
|
|
251
260
|
return session
|
|
252
261
|
|
|
253
262
|
def __create(self) -> "DataprocSparkSession":
|
|
@@ -262,6 +271,9 @@ class DataprocSparkSession(SparkSession):
|
|
|
262
271
|
|
|
263
272
|
dataproc_config: Session = self._get_dataproc_config()
|
|
264
273
|
|
|
274
|
+
# Check runtime version compatibility before creating session
|
|
275
|
+
self._check_runtime_compatibility(dataproc_config)
|
|
276
|
+
|
|
265
277
|
session_id = self.generate_dataproc_session_id()
|
|
266
278
|
dataproc_config.name = f"projects/{self._project_id}/locations/{self._region}/sessions/{session_id}"
|
|
267
279
|
logger.debug(
|
|
@@ -593,6 +605,43 @@ class DataprocSparkSession(SparkSession):
|
|
|
593
605
|
stacklevel=3,
|
|
594
606
|
)
|
|
595
607
|
|
|
608
|
+
def _check_runtime_compatibility(self, dataproc_config):
|
|
609
|
+
"""Check if runtime version 3.0 client is compatible with older runtime versions.
|
|
610
|
+
|
|
611
|
+
Runtime version 3.0 clients do not support older runtime versions (pre-3.0).
|
|
612
|
+
There is no backward or forward compatibility between different runtime versions.
|
|
613
|
+
|
|
614
|
+
Args:
|
|
615
|
+
dataproc_config: The Session configuration containing runtime version
|
|
616
|
+
|
|
617
|
+
Raises:
|
|
618
|
+
DataprocSparkConnectException: If server is using pre-3.0 runtime version
|
|
619
|
+
"""
|
|
620
|
+
runtime_version = dataproc_config.runtime_config.version
|
|
621
|
+
|
|
622
|
+
if not runtime_version:
|
|
623
|
+
return
|
|
624
|
+
|
|
625
|
+
logger.debug(f"Detected server runtime version: {runtime_version}")
|
|
626
|
+
|
|
627
|
+
# Parse runtime version to check if it's below minimum supported version
|
|
628
|
+
try:
|
|
629
|
+
server_version = version.parse(runtime_version)
|
|
630
|
+
min_version = version.parse(
|
|
631
|
+
DataprocSparkSession._MIN_RUNTIME_VERSION
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
if server_version < min_version:
|
|
635
|
+
raise DataprocSparkConnectException(
|
|
636
|
+
f"Specified {runtime_version} Dataproc Runtime version is not supported, "
|
|
637
|
+
f"use {DataprocSparkSession._MIN_RUNTIME_VERSION} version or higher."
|
|
638
|
+
)
|
|
639
|
+
except version.InvalidVersion:
|
|
640
|
+
# If we can't parse the version, log a warning but continue
|
|
641
|
+
logger.warning(
|
|
642
|
+
f"Could not parse runtime version: {runtime_version}"
|
|
643
|
+
)
|
|
644
|
+
|
|
596
645
|
def _display_view_session_details_button(self, session_id):
|
|
597
646
|
try:
|
|
598
647
|
session_url = f"https://console.cloud.google.com/dataproc/interactive/sessions/{session_id}/locations/{self._region}?project={self._project_id}"
|
|
@@ -682,6 +731,78 @@ class DataprocSparkSession(SparkSession):
|
|
|
682
731
|
execute_and_fetch_as_iterator_wrapped_method, self.client
|
|
683
732
|
)
|
|
684
733
|
|
|
734
|
+
# Patching clearProgressHandlers method to not remove Dataproc Progress Handler
|
|
735
|
+
clearProgressHandlers_base_method = self.clearProgressHandlers
|
|
736
|
+
|
|
737
|
+
def clearProgressHandlers_wrapper_method(_, *args, **kwargs):
|
|
738
|
+
clearProgressHandlers_base_method(*args, **kwargs)
|
|
739
|
+
|
|
740
|
+
self._register_progress_execution_handler()
|
|
741
|
+
|
|
742
|
+
self.clearProgressHandlers = MethodType(
|
|
743
|
+
clearProgressHandlers_wrapper_method, self
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
def _register_progress_execution_handler(self):
|
|
747
|
+
from pyspark.sql.connect.shell.progress import StageInfo
|
|
748
|
+
|
|
749
|
+
def handler(
|
|
750
|
+
stages: Optional[Iterable[StageInfo]],
|
|
751
|
+
inflight_tasks: int,
|
|
752
|
+
operation_id: Optional[str],
|
|
753
|
+
done: bool,
|
|
754
|
+
):
|
|
755
|
+
if operation_id is None:
|
|
756
|
+
return
|
|
757
|
+
|
|
758
|
+
# Don't build / render progress bar for non-interactive (despite
|
|
759
|
+
# Ipython or non-IPython)
|
|
760
|
+
if not environment.is_interactive():
|
|
761
|
+
return
|
|
762
|
+
|
|
763
|
+
total_tasks = 0
|
|
764
|
+
completed_tasks = 0
|
|
765
|
+
|
|
766
|
+
for stage in stages or []:
|
|
767
|
+
total_tasks += stage.num_tasks
|
|
768
|
+
completed_tasks += stage.num_completed_tasks
|
|
769
|
+
|
|
770
|
+
tqdm_pbar = notebook_tqdm
|
|
771
|
+
if environment.is_interactive_terminal():
|
|
772
|
+
tqdm_pbar = cli_tqdm
|
|
773
|
+
|
|
774
|
+
# Use a lock to ensure only one thread can access and modify
|
|
775
|
+
# the shared dictionaries at a time.
|
|
776
|
+
with self._lock:
|
|
777
|
+
if operation_id in self._execution_progress_bar:
|
|
778
|
+
pbar = self._execution_progress_bar[operation_id]
|
|
779
|
+
if pbar.total != total_tasks:
|
|
780
|
+
pbar.reset(
|
|
781
|
+
total=total_tasks
|
|
782
|
+
) # This force resets the progress bar % too on next refresh
|
|
783
|
+
else:
|
|
784
|
+
pbar = tqdm_pbar(
|
|
785
|
+
total=total_tasks,
|
|
786
|
+
leave=True,
|
|
787
|
+
dynamic_ncols=True,
|
|
788
|
+
bar_format="{l_bar}{bar} {n_fmt}/{total_fmt} Tasks",
|
|
789
|
+
)
|
|
790
|
+
self._execution_progress_bar[operation_id] = pbar
|
|
791
|
+
|
|
792
|
+
# To handle skipped or failed tasks.
|
|
793
|
+
# StageInfo proto doesn't have skipped and failed tasks information to process.
|
|
794
|
+
if done and completed_tasks < total_tasks:
|
|
795
|
+
completed_tasks = total_tasks
|
|
796
|
+
|
|
797
|
+
pbar.n = completed_tasks
|
|
798
|
+
pbar.refresh()
|
|
799
|
+
|
|
800
|
+
if done:
|
|
801
|
+
pbar.close()
|
|
802
|
+
self._execution_progress_bar.pop(operation_id, None)
|
|
803
|
+
|
|
804
|
+
self.registerProgressHandler(handler)
|
|
805
|
+
|
|
685
806
|
@staticmethod
|
|
686
807
|
def _sql_lazy_transformation(req):
|
|
687
808
|
# Select SQL command
|
|
@@ -711,6 +832,11 @@ class DataprocSparkSession(SparkSession):
|
|
|
711
832
|
"""
|
|
712
833
|
|
|
713
834
|
def _display_operation_link(self, operation_id: str):
|
|
835
|
+
# Don't print per-operation Spark UI link for non-interactive (despite
|
|
836
|
+
# Ipython or non-IPython)
|
|
837
|
+
if not environment.is_interactive():
|
|
838
|
+
return
|
|
839
|
+
|
|
714
840
|
assert all(
|
|
715
841
|
[
|
|
716
842
|
operation_id is not None,
|
|
@@ -726,12 +852,13 @@ class DataprocSparkSession(SparkSession):
|
|
|
726
852
|
f"associatedSqlOperationId={operation_id}?project={self._project_id}"
|
|
727
853
|
)
|
|
728
854
|
|
|
855
|
+
if environment.is_interactive_terminal():
|
|
856
|
+
print(f"Spark Query: {url}")
|
|
857
|
+
return
|
|
858
|
+
|
|
729
859
|
try:
|
|
730
860
|
from IPython.display import display, HTML
|
|
731
|
-
from IPython.core.interactiveshell import InteractiveShell
|
|
732
861
|
|
|
733
|
-
if not InteractiveShell.initialized():
|
|
734
|
-
return
|
|
735
862
|
html_element = f"""
|
|
736
863
|
<div>
|
|
737
864
|
<p><a href="{url}">Spark Query</a> (Operation: {operation_id})</p>
|
|
@@ -20,7 +20,7 @@ long_description = (this_directory / "README.md").read_text()
|
|
|
20
20
|
|
|
21
21
|
setup(
|
|
22
22
|
name="dataproc-spark-connect",
|
|
23
|
-
version="1.0.
|
|
23
|
+
version="1.0.0rc3",
|
|
24
24
|
description="Dataproc client library for Spark Connect",
|
|
25
25
|
long_description=long_description,
|
|
26
26
|
author="Google LLC",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|