dataproc-spark-connect 1.0.0rc2__tar.gz → 1.0.0rc4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/PKG-INFO +1 -1
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/dataproc_spark_connect.egg-info/PKG-INFO +1 -1
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/google/cloud/dataproc_spark_connect/environment.py +9 -1
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/google/cloud/dataproc_spark_connect/session.py +264 -16
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/setup.py +1 -1
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/LICENSE +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/README.md +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/dataproc_spark_connect.egg-info/SOURCES.txt +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/dataproc_spark_connect.egg-info/dependency_links.txt +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/dataproc_spark_connect.egg-info/requires.txt +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/dataproc_spark_connect.egg-info/top_level.txt +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/google/cloud/dataproc_spark_connect/__init__.py +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/google/cloud/dataproc_spark_connect/client/__init__.py +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/google/cloud/dataproc_spark_connect/client/core.py +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/google/cloud/dataproc_spark_connect/client/proxy.py +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/google/cloud/dataproc_spark_connect/exceptions.py +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/google/cloud/dataproc_spark_connect/pypi_artifacts.py +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/pyproject.toml +0 -0
- {dataproc_spark_connect-1.0.0rc2 → dataproc_spark_connect-1.0.0rc4}/setup.cfg +0 -0
|
@@ -48,7 +48,15 @@ def is_jetbrains_ide() -> bool:
|
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
def is_interactive():
|
|
51
|
-
|
|
51
|
+
try:
|
|
52
|
+
from IPython import get_ipython
|
|
53
|
+
|
|
54
|
+
if get_ipython() is not None:
|
|
55
|
+
return True
|
|
56
|
+
except ImportError:
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
return hasattr(sys, "ps1") or sys.flags.interactive
|
|
52
60
|
|
|
53
61
|
|
|
54
62
|
def is_terminal():
|
|
@@ -24,6 +24,7 @@ import threading
|
|
|
24
24
|
import time
|
|
25
25
|
import uuid
|
|
26
26
|
import tqdm
|
|
27
|
+
from packaging import version
|
|
27
28
|
from tqdm import tqdm as cli_tqdm
|
|
28
29
|
from tqdm.notebook import tqdm as notebook_tqdm
|
|
29
30
|
from types import MethodType
|
|
@@ -45,6 +46,7 @@ from google.cloud.dataproc_spark_connect.pypi_artifacts import PyPiArtifacts
|
|
|
45
46
|
from google.cloud.dataproc_v1 import (
|
|
46
47
|
AuthenticationConfig,
|
|
47
48
|
CreateSessionRequest,
|
|
49
|
+
DeleteSessionRequest,
|
|
48
50
|
GetSessionRequest,
|
|
49
51
|
Session,
|
|
50
52
|
SessionControllerClient,
|
|
@@ -86,6 +88,22 @@ def _is_valid_label_value(value: str) -> bool:
|
|
|
86
88
|
return bool(re.match(pattern, value))
|
|
87
89
|
|
|
88
90
|
|
|
91
|
+
def _is_valid_session_id(session_id: str) -> bool:
|
|
92
|
+
"""
|
|
93
|
+
Validates if a string complies with Google Cloud session ID format.
|
|
94
|
+
- Must be 4-63 characters
|
|
95
|
+
- Only lowercase letters, numbers, and dashes are allowed
|
|
96
|
+
- Must start with a lowercase letter
|
|
97
|
+
- Cannot end with a dash
|
|
98
|
+
"""
|
|
99
|
+
if not session_id:
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
# The pattern is sufficient for validation and already enforces length constraints.
|
|
103
|
+
pattern = r"^[a-z][a-z0-9-]{2,61}[a-z0-9]$"
|
|
104
|
+
return bool(re.match(pattern, session_id))
|
|
105
|
+
|
|
106
|
+
|
|
89
107
|
class DataprocSparkSession(SparkSession):
|
|
90
108
|
"""The entry point to programming Spark with the Dataset and DataFrame API.
|
|
91
109
|
|
|
@@ -106,12 +124,14 @@ class DataprocSparkSession(SparkSession):
|
|
|
106
124
|
"""
|
|
107
125
|
|
|
108
126
|
_DEFAULT_RUNTIME_VERSION = "3.0"
|
|
127
|
+
_MIN_RUNTIME_VERSION = "3.0"
|
|
109
128
|
|
|
110
129
|
_active_s8s_session_uuid: ClassVar[Optional[str]] = None
|
|
111
130
|
_project_id = None
|
|
112
131
|
_region = None
|
|
113
132
|
_client_options = None
|
|
114
133
|
_active_s8s_session_id: ClassVar[Optional[str]] = None
|
|
134
|
+
_active_session_uses_custom_id: ClassVar[bool] = False
|
|
115
135
|
_execution_progress_bar = dict()
|
|
116
136
|
|
|
117
137
|
class Builder(SparkSession.Builder):
|
|
@@ -120,6 +140,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
120
140
|
self._options: Dict[str, Any] = {}
|
|
121
141
|
self._channel_builder: Optional[DataprocChannelBuilder] = None
|
|
122
142
|
self._dataproc_config: Optional[Session] = None
|
|
143
|
+
self._custom_session_id: Optional[str] = None
|
|
123
144
|
self._project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
|
|
124
145
|
self._region = os.getenv("GOOGLE_CLOUD_REGION")
|
|
125
146
|
self._client_options = ClientOptions(
|
|
@@ -128,6 +149,18 @@ class DataprocSparkSession(SparkSession):
|
|
|
128
149
|
f"{self._region}-dataproc.googleapis.com",
|
|
129
150
|
)
|
|
130
151
|
)
|
|
152
|
+
self._session_controller_client: Optional[
|
|
153
|
+
SessionControllerClient
|
|
154
|
+
] = None
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def session_controller_client(self) -> SessionControllerClient:
|
|
158
|
+
"""Get or create a SessionControllerClient instance."""
|
|
159
|
+
if self._session_controller_client is None:
|
|
160
|
+
self._session_controller_client = SessionControllerClient(
|
|
161
|
+
client_options=self._client_options
|
|
162
|
+
)
|
|
163
|
+
return self._session_controller_client
|
|
131
164
|
|
|
132
165
|
def projectId(self, project_id):
|
|
133
166
|
self._project_id = project_id
|
|
@@ -141,6 +174,35 @@ class DataprocSparkSession(SparkSession):
|
|
|
141
174
|
)
|
|
142
175
|
return self
|
|
143
176
|
|
|
177
|
+
def dataprocSessionId(self, session_id: str):
|
|
178
|
+
"""
|
|
179
|
+
Set a custom session ID for creating or reusing sessions.
|
|
180
|
+
|
|
181
|
+
The session ID must:
|
|
182
|
+
- Be 4-63 characters long
|
|
183
|
+
- Start with a lowercase letter
|
|
184
|
+
- Contain only lowercase letters, numbers, and hyphens
|
|
185
|
+
- Not end with a hyphen
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
session_id: The custom session ID to use
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
This Builder instance for method chaining
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
ValueError: If the session ID format is invalid
|
|
195
|
+
"""
|
|
196
|
+
if not _is_valid_session_id(session_id):
|
|
197
|
+
raise ValueError(
|
|
198
|
+
f"Invalid session ID: '{session_id}'. "
|
|
199
|
+
"Session ID must be 4-63 characters, start with a lowercase letter, "
|
|
200
|
+
"contain only lowercase letters, numbers, and hyphens, "
|
|
201
|
+
"and not end with a hyphen."
|
|
202
|
+
)
|
|
203
|
+
self._custom_session_id = session_id
|
|
204
|
+
return self
|
|
205
|
+
|
|
144
206
|
def dataprocSessionConfig(self, dataproc_config: Session):
|
|
145
207
|
self._dataproc_config = dataproc_config
|
|
146
208
|
for k, v in dataproc_config.runtime_config.properties.items():
|
|
@@ -254,6 +316,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
254
316
|
session._register_progress_execution_handler()
|
|
255
317
|
|
|
256
318
|
DataprocSparkSession._set_default_and_active_session(session)
|
|
319
|
+
|
|
257
320
|
return session
|
|
258
321
|
|
|
259
322
|
def __create(self) -> "DataprocSparkSession":
|
|
@@ -268,7 +331,16 @@ class DataprocSparkSession(SparkSession):
|
|
|
268
331
|
|
|
269
332
|
dataproc_config: Session = self._get_dataproc_config()
|
|
270
333
|
|
|
271
|
-
|
|
334
|
+
# Check runtime version compatibility before creating session
|
|
335
|
+
self._check_runtime_compatibility(dataproc_config)
|
|
336
|
+
|
|
337
|
+
# Use custom session ID if provided, otherwise generate one
|
|
338
|
+
session_id = (
|
|
339
|
+
self._custom_session_id
|
|
340
|
+
if self._custom_session_id
|
|
341
|
+
else self.generate_dataproc_session_id()
|
|
342
|
+
)
|
|
343
|
+
|
|
272
344
|
dataproc_config.name = f"projects/{self._project_id}/locations/{self._region}/sessions/{session_id}"
|
|
273
345
|
logger.debug(
|
|
274
346
|
f"Dataproc Session configuration:\n{dataproc_config}"
|
|
@@ -283,6 +355,10 @@ class DataprocSparkSession(SparkSession):
|
|
|
283
355
|
|
|
284
356
|
logger.debug("Creating Dataproc Session")
|
|
285
357
|
DataprocSparkSession._active_s8s_session_id = session_id
|
|
358
|
+
# Track whether this session uses a custom ID (unmanaged) or auto-generated ID (managed)
|
|
359
|
+
DataprocSparkSession._active_session_uses_custom_id = (
|
|
360
|
+
self._custom_session_id is not None
|
|
361
|
+
)
|
|
286
362
|
s8s_creation_start_time = time.time()
|
|
287
363
|
|
|
288
364
|
stop_create_session_pbar_event = threading.Event()
|
|
@@ -373,6 +449,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
373
449
|
if create_session_pbar_thread.is_alive():
|
|
374
450
|
create_session_pbar_thread.join()
|
|
375
451
|
DataprocSparkSession._active_s8s_session_id = None
|
|
452
|
+
DataprocSparkSession._active_session_uses_custom_id = False
|
|
376
453
|
raise DataprocSparkConnectException(
|
|
377
454
|
f"Error while creating Dataproc Session: {e.message}"
|
|
378
455
|
)
|
|
@@ -381,6 +458,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
381
458
|
if create_session_pbar_thread.is_alive():
|
|
382
459
|
create_session_pbar_thread.join()
|
|
383
460
|
DataprocSparkSession._active_s8s_session_id = None
|
|
461
|
+
DataprocSparkSession._active_session_uses_custom_id = False
|
|
384
462
|
raise RuntimeError(
|
|
385
463
|
f"Error while creating Dataproc Session"
|
|
386
464
|
) from e
|
|
@@ -422,16 +500,19 @@ class DataprocSparkSession(SparkSession):
|
|
|
422
500
|
:param html_element: HTML element to display for interactive IPython
|
|
423
501
|
environment
|
|
424
502
|
"""
|
|
503
|
+
# Don't print any output (Rich or Plain) for non-interactive
|
|
504
|
+
if not environment.is_interactive():
|
|
505
|
+
return
|
|
506
|
+
|
|
507
|
+
if environment.is_interactive_terminal():
|
|
508
|
+
print(plain_message)
|
|
509
|
+
return
|
|
510
|
+
|
|
425
511
|
try:
|
|
426
512
|
from IPython.display import display, HTML
|
|
427
|
-
from IPython.core.interactiveshell import InteractiveShell
|
|
428
513
|
|
|
429
|
-
if not InteractiveShell.initialized():
|
|
430
|
-
raise DataprocSparkConnectException(
|
|
431
|
-
"Not in an Interactive IPython Environment"
|
|
432
|
-
)
|
|
433
514
|
display(HTML(html_element))
|
|
434
|
-
except
|
|
515
|
+
except ImportError:
|
|
435
516
|
print(plain_message)
|
|
436
517
|
|
|
437
518
|
def _get_exiting_active_session(
|
|
@@ -471,11 +552,30 @@ class DataprocSparkSession(SparkSession):
|
|
|
471
552
|
|
|
472
553
|
def getOrCreate(self) -> "DataprocSparkSession":
|
|
473
554
|
with DataprocSparkSession._lock:
|
|
555
|
+
# Handle custom session ID by setting it early and letting existing logic handle it
|
|
556
|
+
if self._custom_session_id:
|
|
557
|
+
self._handle_custom_session_id()
|
|
558
|
+
|
|
474
559
|
session = self._get_exiting_active_session()
|
|
475
560
|
if session is None:
|
|
476
561
|
session = self.__create()
|
|
477
562
|
return session
|
|
478
563
|
|
|
564
|
+
def _handle_custom_session_id(self):
|
|
565
|
+
"""Handle custom session ID by checking if it exists and setting _active_s8s_session_id."""
|
|
566
|
+
session_response = self._get_session_by_id(self._custom_session_id)
|
|
567
|
+
if session_response is not None:
|
|
568
|
+
# Found an active session with the custom ID, set it as the active session
|
|
569
|
+
DataprocSparkSession._active_s8s_session_id = (
|
|
570
|
+
self._custom_session_id
|
|
571
|
+
)
|
|
572
|
+
# Mark that this session uses a custom ID
|
|
573
|
+
DataprocSparkSession._active_session_uses_custom_id = True
|
|
574
|
+
else:
|
|
575
|
+
# No existing session found, clear any existing active session ID
|
|
576
|
+
# so we'll create a new one with the custom ID
|
|
577
|
+
DataprocSparkSession._active_s8s_session_id = None
|
|
578
|
+
|
|
479
579
|
def _get_dataproc_config(self):
|
|
480
580
|
# Use the property to ensure we always have a config
|
|
481
581
|
dataproc_config = self.dataproc_config
|
|
@@ -599,7 +699,52 @@ class DataprocSparkSession(SparkSession):
|
|
|
599
699
|
stacklevel=3,
|
|
600
700
|
)
|
|
601
701
|
|
|
702
|
+
def _check_runtime_compatibility(self, dataproc_config):
|
|
703
|
+
"""Check if runtime version 3.0 client is compatible with older runtime versions.
|
|
704
|
+
|
|
705
|
+
Runtime version 3.0 clients do not support older runtime versions (pre-3.0).
|
|
706
|
+
There is no backward or forward compatibility between different runtime versions.
|
|
707
|
+
|
|
708
|
+
Args:
|
|
709
|
+
dataproc_config: The Session configuration containing runtime version
|
|
710
|
+
|
|
711
|
+
Raises:
|
|
712
|
+
DataprocSparkConnectException: If server is using pre-3.0 runtime version
|
|
713
|
+
"""
|
|
714
|
+
runtime_version = dataproc_config.runtime_config.version
|
|
715
|
+
|
|
716
|
+
if not runtime_version:
|
|
717
|
+
return
|
|
718
|
+
|
|
719
|
+
logger.debug(f"Detected server runtime version: {runtime_version}")
|
|
720
|
+
|
|
721
|
+
# Parse runtime version to check if it's below minimum supported version
|
|
722
|
+
try:
|
|
723
|
+
server_version = version.parse(runtime_version)
|
|
724
|
+
min_version = version.parse(
|
|
725
|
+
DataprocSparkSession._MIN_RUNTIME_VERSION
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
if server_version < min_version:
|
|
729
|
+
raise DataprocSparkConnectException(
|
|
730
|
+
f"Specified {runtime_version} Dataproc Runtime version is not supported, "
|
|
731
|
+
f"use {DataprocSparkSession._MIN_RUNTIME_VERSION} version or higher."
|
|
732
|
+
)
|
|
733
|
+
except version.InvalidVersion:
|
|
734
|
+
# If we can't parse the version, log a warning but continue
|
|
735
|
+
logger.warning(
|
|
736
|
+
f"Could not parse runtime version: {runtime_version}"
|
|
737
|
+
)
|
|
738
|
+
|
|
602
739
|
def _display_view_session_details_button(self, session_id):
|
|
740
|
+
# Display button is only supported in colab enterprise
|
|
741
|
+
if not environment.is_colab_enterprise():
|
|
742
|
+
return
|
|
743
|
+
|
|
744
|
+
# Skip button display for colab enterprise IPython terminals
|
|
745
|
+
if environment.is_interactive_terminal():
|
|
746
|
+
return
|
|
747
|
+
|
|
603
748
|
try:
|
|
604
749
|
session_url = f"https://console.cloud.google.com/dataproc/interactive/sessions/{session_id}/locations/{self._region}?project={self._project_id}"
|
|
605
750
|
from IPython.core.interactiveshell import InteractiveShell
|
|
@@ -615,6 +760,90 @@ class DataprocSparkSession(SparkSession):
|
|
|
615
760
|
except ImportError as e:
|
|
616
761
|
logger.debug(f"Import error: {e}")
|
|
617
762
|
|
|
763
|
+
def _get_session_by_id(self, session_id: str) -> Optional[Session]:
|
|
764
|
+
"""
|
|
765
|
+
Get existing session by ID.
|
|
766
|
+
|
|
767
|
+
Returns:
|
|
768
|
+
Session if ACTIVE/CREATING, None if not found or not usable
|
|
769
|
+
"""
|
|
770
|
+
session_name = f"projects/{self._project_id}/locations/{self._region}/sessions/{session_id}"
|
|
771
|
+
|
|
772
|
+
try:
|
|
773
|
+
get_request = GetSessionRequest(name=session_name)
|
|
774
|
+
session = self.session_controller_client.get_session(
|
|
775
|
+
get_request
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
logger.debug(
|
|
779
|
+
f"Found existing session {session_id} in state: {session.state}"
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
if session.state in [
|
|
783
|
+
Session.State.ACTIVE,
|
|
784
|
+
Session.State.CREATING,
|
|
785
|
+
]:
|
|
786
|
+
# Reuse the active session
|
|
787
|
+
logger.info(f"Reusing existing session: {session_id}")
|
|
788
|
+
return session
|
|
789
|
+
else:
|
|
790
|
+
# Session exists but is not usable (terminated/failed/terminating)
|
|
791
|
+
logger.info(
|
|
792
|
+
f"Session {session_id} in {session.state.name} state, cannot reuse"
|
|
793
|
+
)
|
|
794
|
+
return None
|
|
795
|
+
|
|
796
|
+
except NotFound:
|
|
797
|
+
# Session doesn't exist, can create new one
|
|
798
|
+
logger.debug(
|
|
799
|
+
f"Session {session_id} not found, can create new one"
|
|
800
|
+
)
|
|
801
|
+
return None
|
|
802
|
+
except Exception as e:
|
|
803
|
+
logger.error(f"Error checking session {session_id}: {e}")
|
|
804
|
+
return None
|
|
805
|
+
|
|
806
|
+
def _delete_session(self, session_name: str):
|
|
807
|
+
"""Delete a session to free up the session ID for reuse."""
|
|
808
|
+
try:
|
|
809
|
+
delete_request = DeleteSessionRequest(name=session_name)
|
|
810
|
+
self.session_controller_client.delete_session(delete_request)
|
|
811
|
+
logger.debug(f"Deleted session: {session_name}")
|
|
812
|
+
except NotFound:
|
|
813
|
+
logger.debug(f"Session already deleted: {session_name}")
|
|
814
|
+
|
|
815
|
+
def _wait_for_termination(self, session_name: str, timeout: int = 180):
|
|
816
|
+
"""Wait for a session to finish terminating."""
|
|
817
|
+
start_time = time.time()
|
|
818
|
+
|
|
819
|
+
while time.time() - start_time < timeout:
|
|
820
|
+
try:
|
|
821
|
+
get_request = GetSessionRequest(name=session_name)
|
|
822
|
+
session = self.session_controller_client.get_session(
|
|
823
|
+
get_request
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
if session.state in [
|
|
827
|
+
Session.State.TERMINATED,
|
|
828
|
+
Session.State.FAILED,
|
|
829
|
+
]:
|
|
830
|
+
return
|
|
831
|
+
elif session.state != Session.State.TERMINATING:
|
|
832
|
+
# Session is in unexpected state
|
|
833
|
+
logger.warning(
|
|
834
|
+
f"Session {session_name} in unexpected state while waiting for termination: {session.state}"
|
|
835
|
+
)
|
|
836
|
+
return
|
|
837
|
+
|
|
838
|
+
time.sleep(2)
|
|
839
|
+
except NotFound:
|
|
840
|
+
# Session was deleted
|
|
841
|
+
return
|
|
842
|
+
|
|
843
|
+
logger.warning(
|
|
844
|
+
f"Timeout waiting for session {session_name} to terminate"
|
|
845
|
+
)
|
|
846
|
+
|
|
618
847
|
@staticmethod
|
|
619
848
|
def generate_dataproc_session_id():
|
|
620
849
|
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
@@ -789,6 +1018,11 @@ class DataprocSparkSession(SparkSession):
|
|
|
789
1018
|
"""
|
|
790
1019
|
|
|
791
1020
|
def _display_operation_link(self, operation_id: str):
|
|
1021
|
+
# Don't print per-operation Spark UI link for non-interactive (despite
|
|
1022
|
+
# Ipython or non-IPython)
|
|
1023
|
+
if not environment.is_interactive():
|
|
1024
|
+
return
|
|
1025
|
+
|
|
792
1026
|
assert all(
|
|
793
1027
|
[
|
|
794
1028
|
operation_id is not None,
|
|
@@ -804,12 +1038,13 @@ class DataprocSparkSession(SparkSession):
|
|
|
804
1038
|
f"associatedSqlOperationId={operation_id}?project={self._project_id}"
|
|
805
1039
|
)
|
|
806
1040
|
|
|
1041
|
+
if environment.is_interactive_terminal():
|
|
1042
|
+
print(f"Spark Query: {url}")
|
|
1043
|
+
return
|
|
1044
|
+
|
|
807
1045
|
try:
|
|
808
1046
|
from IPython.display import display, HTML
|
|
809
|
-
from IPython.core.interactiveshell import InteractiveShell
|
|
810
1047
|
|
|
811
|
-
if not InteractiveShell.initialized():
|
|
812
|
-
return
|
|
813
1048
|
html_element = f"""
|
|
814
1049
|
<div>
|
|
815
1050
|
<p><a href="{url}">Spark Query</a> (Operation: {operation_id})</p>
|
|
@@ -893,16 +1128,29 @@ class DataprocSparkSession(SparkSession):
|
|
|
893
1128
|
def stop(self) -> None:
|
|
894
1129
|
with DataprocSparkSession._lock:
|
|
895
1130
|
if DataprocSparkSession._active_s8s_session_id is not None:
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
1131
|
+
# Check if this is a managed session (auto-generated ID) or unmanaged session (custom ID)
|
|
1132
|
+
if DataprocSparkSession._active_session_uses_custom_id:
|
|
1133
|
+
# Unmanaged session (custom ID): Only clean up client-side state
|
|
1134
|
+
# Don't terminate as it might be in use by other notebooks or clients
|
|
1135
|
+
logger.debug(
|
|
1136
|
+
f"Stopping unmanaged session {DataprocSparkSession._active_s8s_session_id} without termination"
|
|
1137
|
+
)
|
|
1138
|
+
else:
|
|
1139
|
+
# Managed session (auto-generated ID): Use original behavior and terminate
|
|
1140
|
+
logger.debug(
|
|
1141
|
+
f"Terminating managed session {DataprocSparkSession._active_s8s_session_id}"
|
|
1142
|
+
)
|
|
1143
|
+
terminate_s8s_session(
|
|
1144
|
+
DataprocSparkSession._project_id,
|
|
1145
|
+
DataprocSparkSession._region,
|
|
1146
|
+
DataprocSparkSession._active_s8s_session_id,
|
|
1147
|
+
self._client_options,
|
|
1148
|
+
)
|
|
902
1149
|
|
|
903
1150
|
self._remove_stopped_session_from_file()
|
|
904
1151
|
DataprocSparkSession._active_s8s_session_uuid = None
|
|
905
1152
|
DataprocSparkSession._active_s8s_session_id = None
|
|
1153
|
+
DataprocSparkSession._active_session_uses_custom_id = False
|
|
906
1154
|
DataprocSparkSession._project_id = None
|
|
907
1155
|
DataprocSparkSession._region = None
|
|
908
1156
|
DataprocSparkSession._client_options = None
|
|
@@ -20,7 +20,7 @@ long_description = (this_directory / "README.md").read_text()
|
|
|
20
20
|
|
|
21
21
|
setup(
|
|
22
22
|
name="dataproc-spark-connect",
|
|
23
|
-
version="1.0.
|
|
23
|
+
version="1.0.0rc4",
|
|
24
24
|
description="Dataproc client library for Spark Connect",
|
|
25
25
|
long_description=long_description,
|
|
26
26
|
author="Google LLC",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|