dataproc-spark-connect 0.8.3__tar.gz → 1.0.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/PKG-INFO +2 -2
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/PKG-INFO +2 -2
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/SOURCES.txt +1 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/requires.txt +1 -1
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/client/core.py +5 -3
- dataproc_spark_connect-1.0.0rc1/google/cloud/dataproc_spark_connect/environment.py +76 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/session.py +130 -31
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/setup.py +2 -2
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/LICENSE +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/README.md +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/dependency_links.txt +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/top_level.txt +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/__init__.py +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/client/__init__.py +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/client/proxy.py +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/exceptions.py +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/pypi_artifacts.py +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/pyproject.toml +0 -0
- {dataproc_spark_connect-0.8.3 → dataproc_spark_connect-1.0.0rc1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataproc-spark-connect
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0rc1
|
|
4
4
|
Summary: Dataproc client library for Spark Connect
|
|
5
5
|
Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
|
|
6
6
|
Author: Google LLC
|
|
@@ -9,7 +9,7 @@ License-File: LICENSE
|
|
|
9
9
|
Requires-Dist: google-api-core>=2.19
|
|
10
10
|
Requires-Dist: google-cloud-dataproc>=5.18
|
|
11
11
|
Requires-Dist: packaging>=20.0
|
|
12
|
-
Requires-Dist: pyspark[connect]~=
|
|
12
|
+
Requires-Dist: pyspark[connect]~=4.0.0
|
|
13
13
|
Requires-Dist: tqdm>=4.67
|
|
14
14
|
Requires-Dist: websockets>=14.0
|
|
15
15
|
Dynamic: author
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataproc-spark-connect
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0rc1
|
|
4
4
|
Summary: Dataproc client library for Spark Connect
|
|
5
5
|
Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
|
|
6
6
|
Author: Google LLC
|
|
@@ -9,7 +9,7 @@ License-File: LICENSE
|
|
|
9
9
|
Requires-Dist: google-api-core>=2.19
|
|
10
10
|
Requires-Dist: google-cloud-dataproc>=5.18
|
|
11
11
|
Requires-Dist: packaging>=20.0
|
|
12
|
-
Requires-Dist: pyspark[connect]~=
|
|
12
|
+
Requires-Dist: pyspark[connect]~=4.0.0
|
|
13
13
|
Requires-Dist: tqdm>=4.67
|
|
14
14
|
Requires-Dist: websockets>=14.0
|
|
15
15
|
Dynamic: author
|
|
@@ -9,6 +9,7 @@ dataproc_spark_connect.egg-info/dependency_links.txt
|
|
|
9
9
|
dataproc_spark_connect.egg-info/requires.txt
|
|
10
10
|
dataproc_spark_connect.egg-info/top_level.txt
|
|
11
11
|
google/cloud/dataproc_spark_connect/__init__.py
|
|
12
|
+
google/cloud/dataproc_spark_connect/environment.py
|
|
12
13
|
google/cloud/dataproc_spark_connect/exceptions.py
|
|
13
14
|
google/cloud/dataproc_spark_connect/pypi_artifacts.py
|
|
14
15
|
google/cloud/dataproc_spark_connect/session.py
|
|
@@ -15,14 +15,14 @@ import logging
|
|
|
15
15
|
|
|
16
16
|
import google
|
|
17
17
|
import grpc
|
|
18
|
-
from pyspark.sql.connect.client import
|
|
18
|
+
from pyspark.sql.connect.client import DefaultChannelBuilder
|
|
19
19
|
|
|
20
20
|
from . import proxy
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class DataprocChannelBuilder(
|
|
25
|
+
class DataprocChannelBuilder(DefaultChannelBuilder):
|
|
26
26
|
"""
|
|
27
27
|
This is a helper class that is used to create a GRPC channel based on the given
|
|
28
28
|
connection string per the documentation of Spark Connect.
|
|
@@ -88,7 +88,9 @@ class ProxiedChannel(grpc.Channel):
|
|
|
88
88
|
self._proxy = proxy.DataprocSessionProxy(0, target_host)
|
|
89
89
|
self._proxy.start()
|
|
90
90
|
self._proxied_connect_url = f"sc://localhost:{self._proxy.port}"
|
|
91
|
-
self._wrapped =
|
|
91
|
+
self._wrapped = DefaultChannelBuilder(
|
|
92
|
+
self._proxied_connect_url
|
|
93
|
+
).toChannel()
|
|
92
94
|
|
|
93
95
|
def __enter__(self):
|
|
94
96
|
return self
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from typing import Callable, Tuple, List
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def is_vscode() -> bool:
|
|
20
|
+
"""True if running inside VS Code at all."""
|
|
21
|
+
return os.getenv("VSCODE_PID") is not None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def is_jupyter() -> bool:
|
|
25
|
+
"""True if running in a Jupyter environment."""
|
|
26
|
+
return os.getenv("JPY_PARENT_PID") is not None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def is_colab_enterprise() -> bool:
|
|
30
|
+
"""True if running in Colab Enterprise (Vertex AI)."""
|
|
31
|
+
return os.getenv("VERTEX_PRODUCT") == "COLAB_ENTERPRISE"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def is_colab() -> bool:
|
|
35
|
+
"""True if running in Google Colab."""
|
|
36
|
+
return os.getenv("COLAB_RELEASE_TAG") is not None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def is_workbench() -> bool:
|
|
40
|
+
"""True if running in AI Workbench (managed Jupyter)."""
|
|
41
|
+
return os.getenv("VERTEX_PRODUCT") == "WORKBENCH_INSTANCE"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def is_jetbrains_ide() -> bool:
|
|
45
|
+
"""True if running inside any JetBrains IDE."""
|
|
46
|
+
return "jetbrains" in os.getenv("TERMINAL_EMULATOR", "").lower()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_client_environment_label() -> str:
|
|
50
|
+
"""
|
|
51
|
+
Map current environment to a standardized client label.
|
|
52
|
+
|
|
53
|
+
Priority order:
|
|
54
|
+
1. Colab Enterprise ("colab-enterprise")
|
|
55
|
+
2. Colab ("colab")
|
|
56
|
+
3. Workbench ("workbench-jupyter")
|
|
57
|
+
4. VS Code ("vscode")
|
|
58
|
+
5. JetBrains IDE ("jetbrains")
|
|
59
|
+
6. Jupyter ("jupyter")
|
|
60
|
+
7. Unknown ("unknown")
|
|
61
|
+
"""
|
|
62
|
+
checks: List[Tuple[Callable[[], bool], str]] = [
|
|
63
|
+
(is_colab_enterprise, "colab-enterprise"),
|
|
64
|
+
(is_colab, "colab"),
|
|
65
|
+
(is_workbench, "workbench-jupyter"),
|
|
66
|
+
(is_vscode, "vscode"),
|
|
67
|
+
(is_jetbrains_ide, "jetbrains"),
|
|
68
|
+
(is_jupyter, "jupyter"),
|
|
69
|
+
]
|
|
70
|
+
for detector, label in checks:
|
|
71
|
+
try:
|
|
72
|
+
if detector():
|
|
73
|
+
return label
|
|
74
|
+
except Exception:
|
|
75
|
+
pass
|
|
76
|
+
return "unknown"
|
|
@@ -49,6 +49,7 @@ from google.cloud.dataproc_v1 import (
|
|
|
49
49
|
TerminateSessionRequest,
|
|
50
50
|
)
|
|
51
51
|
from google.cloud.dataproc_v1.types import sessions
|
|
52
|
+
from google.cloud.dataproc_spark_connect import environment
|
|
52
53
|
from pyspark.sql.connect.session import SparkSession
|
|
53
54
|
from pyspark.sql.utils import to_str
|
|
54
55
|
|
|
@@ -56,6 +57,12 @@ from pyspark.sql.utils import to_str
|
|
|
56
57
|
logging.basicConfig(level=logging.INFO)
|
|
57
58
|
logger = logging.getLogger(__name__)
|
|
58
59
|
|
|
60
|
+
# System labels that should not be overridden by user
|
|
61
|
+
SYSTEM_LABELS = {
|
|
62
|
+
"dataproc-session-client",
|
|
63
|
+
"goog-colab-notebook-id",
|
|
64
|
+
}
|
|
65
|
+
|
|
59
66
|
|
|
60
67
|
def _is_valid_label_value(value: str) -> bool:
|
|
61
68
|
"""
|
|
@@ -96,7 +103,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
96
103
|
... ) # doctest: +SKIP
|
|
97
104
|
"""
|
|
98
105
|
|
|
99
|
-
_DEFAULT_RUNTIME_VERSION = "
|
|
106
|
+
_DEFAULT_RUNTIME_VERSION = "3.0"
|
|
100
107
|
|
|
101
108
|
_active_s8s_session_uuid: ClassVar[Optional[str]] = None
|
|
102
109
|
_project_id = None
|
|
@@ -132,11 +139,76 @@ class DataprocSparkSession(SparkSession):
|
|
|
132
139
|
return self
|
|
133
140
|
|
|
134
141
|
def dataprocSessionConfig(self, dataproc_config: Session):
|
|
142
|
+
self._dataproc_config = dataproc_config
|
|
143
|
+
for k, v in dataproc_config.runtime_config.properties.items():
|
|
144
|
+
self._options[cast(str, k)] = to_str(v)
|
|
145
|
+
return self
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def dataproc_config(self):
|
|
135
149
|
with self._lock:
|
|
136
|
-
self._dataproc_config =
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
150
|
+
self._dataproc_config = self._dataproc_config or Session()
|
|
151
|
+
return self._dataproc_config
|
|
152
|
+
|
|
153
|
+
def runtimeVersion(self, version: str):
|
|
154
|
+
self.dataproc_config.runtime_config.version = version
|
|
155
|
+
return self
|
|
156
|
+
|
|
157
|
+
def serviceAccount(self, account: str):
|
|
158
|
+
self.dataproc_config.environment_config.execution_config.service_account = (
|
|
159
|
+
account
|
|
160
|
+
)
|
|
161
|
+
return self
|
|
162
|
+
|
|
163
|
+
def subnetwork(self, subnet: str):
|
|
164
|
+
self.dataproc_config.environment_config.execution_config.subnetwork_uri = (
|
|
165
|
+
subnet
|
|
166
|
+
)
|
|
167
|
+
return self
|
|
168
|
+
|
|
169
|
+
def ttl(self, duration: datetime.timedelta):
|
|
170
|
+
"""Set the time-to-live (TTL) for the session using a timedelta object."""
|
|
171
|
+
return self.ttlSeconds(int(duration.total_seconds()))
|
|
172
|
+
|
|
173
|
+
def ttlSeconds(self, seconds: int):
|
|
174
|
+
"""Set the time-to-live (TTL) for the session in seconds."""
|
|
175
|
+
self.dataproc_config.environment_config.execution_config.ttl = {
|
|
176
|
+
"seconds": seconds
|
|
177
|
+
}
|
|
178
|
+
return self
|
|
179
|
+
|
|
180
|
+
def idleTtl(self, duration: datetime.timedelta):
|
|
181
|
+
"""Set the idle time-to-live (idle TTL) for the session using a timedelta object."""
|
|
182
|
+
return self.idleTtlSeconds(int(duration.total_seconds()))
|
|
183
|
+
|
|
184
|
+
def idleTtlSeconds(self, seconds: int):
|
|
185
|
+
"""Set the idle time-to-live (idle TTL) for the session in seconds."""
|
|
186
|
+
self.dataproc_config.environment_config.execution_config.idle_ttl = {
|
|
187
|
+
"seconds": seconds
|
|
188
|
+
}
|
|
189
|
+
return self
|
|
190
|
+
|
|
191
|
+
def sessionTemplate(self, template: str):
|
|
192
|
+
self.dataproc_config.session_template = template
|
|
193
|
+
return self
|
|
194
|
+
|
|
195
|
+
def label(self, key: str, value: str):
|
|
196
|
+
"""Add a single label to the session."""
|
|
197
|
+
return self.labels({key: value})
|
|
198
|
+
|
|
199
|
+
def labels(self, labels: Dict[str, str]):
|
|
200
|
+
# Filter out system labels and warn user
|
|
201
|
+
filtered_labels = {}
|
|
202
|
+
for key, value in labels.items():
|
|
203
|
+
if key in SYSTEM_LABELS:
|
|
204
|
+
logger.warning(
|
|
205
|
+
f"Label '{key}' is a system label and cannot be overridden by user. Ignoring."
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
filtered_labels[key] = value
|
|
209
|
+
|
|
210
|
+
self.dataproc_config.labels.update(filtered_labels)
|
|
211
|
+
return self
|
|
140
212
|
|
|
141
213
|
def remote(self, url: Optional[str] = None) -> "SparkSession.Builder":
|
|
142
214
|
if url:
|
|
@@ -258,8 +330,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
258
330
|
client_options=self._client_options
|
|
259
331
|
).create_session(session_request)
|
|
260
332
|
self._display_session_link_on_creation(session_id)
|
|
261
|
-
|
|
262
|
-
# self._display_view_session_details_button(session_id)
|
|
333
|
+
self._display_view_session_details_button(session_id)
|
|
263
334
|
create_session_pbar_thread.start()
|
|
264
335
|
session_response: Session = operation.result(
|
|
265
336
|
polling=retry.Retry(
|
|
@@ -377,8 +448,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
377
448
|
print(
|
|
378
449
|
f"Using existing Dataproc Session (configuration changes may not be applied): https://console.cloud.google.com/dataproc/interactive/{self._region}/{s8s_session_id}?project={self._project_id}"
|
|
379
450
|
)
|
|
380
|
-
|
|
381
|
-
# self._display_view_session_details_button(s8s_session_id)
|
|
451
|
+
self._display_view_session_details_button(s8s_session_id)
|
|
382
452
|
if session is None:
|
|
383
453
|
session = self.__create_spark_connect_session_from_s8s(
|
|
384
454
|
session_response, session_name
|
|
@@ -401,11 +471,10 @@ class DataprocSparkSession(SparkSession):
|
|
|
401
471
|
return session
|
|
402
472
|
|
|
403
473
|
def _get_dataproc_config(self):
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
dataproc_config.runtime_config.properties[k] = v
|
|
474
|
+
# Use the property to ensure we always have a config
|
|
475
|
+
dataproc_config = self.dataproc_config
|
|
476
|
+
for k, v in self._options.items():
|
|
477
|
+
dataproc_config.runtime_config.properties[k] = v
|
|
409
478
|
dataproc_config.spark_connect_session = (
|
|
410
479
|
sessions.SparkConnectConfig()
|
|
411
480
|
)
|
|
@@ -413,6 +482,11 @@ class DataprocSparkSession(SparkSession):
|
|
|
413
482
|
dataproc_config.runtime_config.version = (
|
|
414
483
|
DataprocSparkSession._DEFAULT_RUNTIME_VERSION
|
|
415
484
|
)
|
|
485
|
+
|
|
486
|
+
# Check for Python version mismatch with runtime for UDF compatibility
|
|
487
|
+
self._check_python_version_compatibility(
|
|
488
|
+
dataproc_config.runtime_config.version
|
|
489
|
+
)
|
|
416
490
|
if (
|
|
417
491
|
not dataproc_config.environment_config.execution_config.authentication_config.user_workload_authentication_type
|
|
418
492
|
and "DATAPROC_SPARK_CONNECT_AUTH_TYPE" in os.environ
|
|
@@ -452,6 +526,10 @@ class DataprocSparkSession(SparkSession):
|
|
|
452
526
|
os.getenv("DATAPROC_SPARK_CONNECT_IDLE_TTL_SECONDS")
|
|
453
527
|
)
|
|
454
528
|
}
|
|
529
|
+
client_environment = environment.get_client_environment_label()
|
|
530
|
+
dataproc_config.labels["dataproc-session-client"] = (
|
|
531
|
+
client_environment
|
|
532
|
+
)
|
|
455
533
|
if "COLAB_NOTEBOOK_ID" in os.environ:
|
|
456
534
|
colab_notebook_name = os.environ["COLAB_NOTEBOOK_ID"]
|
|
457
535
|
# Extract the last part of the path, which is the ID
|
|
@@ -466,34 +544,55 @@ class DataprocSparkSession(SparkSession):
|
|
|
466
544
|
f"Only lowercase letters, numbers, and dashes are allowed. "
|
|
467
545
|
f"The value must start with lowercase letter or number and end with a lowercase letter or number. "
|
|
468
546
|
f"Maximum length is 63 characters. "
|
|
469
|
-
f"
|
|
547
|
+
f"Ignoring notebook ID label."
|
|
470
548
|
)
|
|
471
549
|
default_datasource = os.getenv(
|
|
472
550
|
"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE"
|
|
473
551
|
)
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
bq_datasource_properties = {
|
|
552
|
+
match default_datasource:
|
|
553
|
+
case "bigquery":
|
|
554
|
+
# Merge default configs with existing properties,
|
|
555
|
+
# user configs take precedence
|
|
556
|
+
for k, v in {
|
|
480
557
|
"spark.datasource.bigquery.viewsEnabled": "true",
|
|
481
558
|
"spark.datasource.bigquery.writeMethod": "direct",
|
|
482
559
|
"spark.sql.catalog.spark_catalog": "com.google.cloud.spark.bigquery.BigQuerySparkSessionCatalog",
|
|
483
|
-
"spark.sql.legacy.createHiveTableByDefault": "false",
|
|
484
560
|
"spark.sql.sources.default": "bigquery",
|
|
485
|
-
}
|
|
486
|
-
# Merge default configs with existing properties, user configs take precedence
|
|
487
|
-
for k, v in bq_datasource_properties.items():
|
|
561
|
+
}.items():
|
|
488
562
|
if k not in dataproc_config.runtime_config.properties:
|
|
489
563
|
dataproc_config.runtime_config.properties[k] = v
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
564
|
+
case _:
|
|
565
|
+
if default_datasource:
|
|
566
|
+
logger.warning(
|
|
567
|
+
f"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE is set to an invalid value:"
|
|
568
|
+
f" {default_datasource}. Supported value is 'bigquery'."
|
|
569
|
+
)
|
|
495
570
|
return dataproc_config
|
|
496
571
|
|
|
572
|
+
def _check_python_version_compatibility(self, runtime_version):
|
|
573
|
+
"""Check if client Python version matches server Python version for UDF compatibility."""
|
|
574
|
+
import sys
|
|
575
|
+
import warnings
|
|
576
|
+
|
|
577
|
+
# Runtime version to server Python version mapping
|
|
578
|
+
RUNTIME_PYTHON_MAP = {
|
|
579
|
+
"3.0": (3, 11),
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
client_python = sys.version_info[:2] # (major, minor)
|
|
583
|
+
|
|
584
|
+
if runtime_version in RUNTIME_PYTHON_MAP:
|
|
585
|
+
server_python = RUNTIME_PYTHON_MAP[runtime_version]
|
|
586
|
+
|
|
587
|
+
if client_python != server_python:
|
|
588
|
+
warnings.warn(
|
|
589
|
+
f"Python version mismatch detected: Client is using Python {client_python[0]}.{client_python[1]}, "
|
|
590
|
+
f"but Dataproc runtime {runtime_version} uses Python {server_python[0]}.{server_python[1]}. "
|
|
591
|
+
f"This mismatch may cause issues with Python UDF (User Defined Function) compatibility. "
|
|
592
|
+
f"Consider using Python {server_python[0]}.{server_python[1]} for optimal UDF execution.",
|
|
593
|
+
stacklevel=3,
|
|
594
|
+
)
|
|
595
|
+
|
|
497
596
|
def _display_view_session_details_button(self, session_id):
|
|
498
597
|
try:
|
|
499
598
|
session_url = f"https://console.cloud.google.com/dataproc/interactive/sessions/{session_id}/locations/{self._region}?project={self._project_id}"
|
|
@@ -690,7 +789,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
690
789
|
This is an API dedicated to Spark Connect client only. With regular Spark Session, it throws
|
|
691
790
|
an exception.
|
|
692
791
|
Regarding pypi: Popular packages are already pre-installed in s8s runtime.
|
|
693
|
-
https://cloud.google.com/dataproc-serverless/docs/concepts/versions/spark-runtime-2.
|
|
792
|
+
https://cloud.google.com/dataproc-serverless/docs/concepts/versions/spark-runtime-2.3#python_libraries
|
|
694
793
|
If there are conflicts/package doesn't exist, it throws an exception.
|
|
695
794
|
"""
|
|
696
795
|
if sum([pypi, file, pyfile, archive]) > 1:
|
|
@@ -20,7 +20,7 @@ long_description = (this_directory / "README.md").read_text()
|
|
|
20
20
|
|
|
21
21
|
setup(
|
|
22
22
|
name="dataproc-spark-connect",
|
|
23
|
-
version="0.
|
|
23
|
+
version="1.0.0rc1",
|
|
24
24
|
description="Dataproc client library for Spark Connect",
|
|
25
25
|
long_description=long_description,
|
|
26
26
|
author="Google LLC",
|
|
@@ -31,7 +31,7 @@ setup(
|
|
|
31
31
|
"google-api-core>=2.19",
|
|
32
32
|
"google-cloud-dataproc>=5.18",
|
|
33
33
|
"packaging>=20.0",
|
|
34
|
-
"pyspark[connect]~=
|
|
34
|
+
"pyspark[connect]~=4.0.0",
|
|
35
35
|
"tqdm>=4.67",
|
|
36
36
|
"websockets>=14.0",
|
|
37
37
|
],
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|