dataproc-spark-connect 0.7.0__py2.py3-none-any.whl → 0.7.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataproc_spark_connect-0.7.0.dist-info → dataproc_spark_connect-0.7.2.dist-info}/METADATA +2 -2
- {dataproc_spark_connect-0.7.0.dist-info → dataproc_spark_connect-0.7.2.dist-info}/RECORD +6 -6
- google/cloud/dataproc_spark_connect/session.py +2 -39
- {dataproc_spark_connect-0.7.0.dist-info → dataproc_spark_connect-0.7.2.dist-info}/LICENSE +0 -0
- {dataproc_spark_connect-0.7.0.dist-info → dataproc_spark_connect-0.7.2.dist-info}/WHEEL +0 -0
- {dataproc_spark_connect-0.7.0.dist-info → dataproc_spark_connect-0.7.2.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dataproc-spark-connect
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.2
|
|
4
4
|
Summary: Dataproc client library for Spark Connect
|
|
5
5
|
Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
|
|
6
6
|
Author: Google LLC
|
|
@@ -11,7 +11,7 @@ Requires-Dist: google-cloud-dataproc>=5.18
|
|
|
11
11
|
Requires-Dist: packaging>=20.0
|
|
12
12
|
Requires-Dist: pyspark[connect]>=3.5
|
|
13
13
|
Requires-Dist: tqdm>=4.67
|
|
14
|
-
Requires-Dist: websockets>=
|
|
14
|
+
Requires-Dist: websockets>=14.0
|
|
15
15
|
|
|
16
16
|
# Dataproc Spark Connect Client
|
|
17
17
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
google/cloud/dataproc_spark_connect/__init__.py,sha256=dIqHNWVWWrSuRf26x11kX5e9yMKSHCtmI_GBj1-FDdE,1101
|
|
2
2
|
google/cloud/dataproc_spark_connect/exceptions.py,sha256=ilGyHD5M_yBQ3IC58-Y5miRGIQVJsLaNKvEGcHuk_BE,969
|
|
3
3
|
google/cloud/dataproc_spark_connect/pypi_artifacts.py,sha256=gd-VMwiVP-EJuPp9Vf9Shx8pqps3oSKp0hBcSSZQS-A,1575
|
|
4
|
-
google/cloud/dataproc_spark_connect/session.py,sha256=
|
|
4
|
+
google/cloud/dataproc_spark_connect/session.py,sha256=7CgRmYVp8M4oVosvxz08jp72BU4ZtAKvxjtd0101GHs,24952
|
|
5
5
|
google/cloud/dataproc_spark_connect/client/__init__.py,sha256=6hCNSsgYlie6GuVpc5gjFsPnyeMTScTpXSPYqp1fplY,615
|
|
6
6
|
google/cloud/dataproc_spark_connect/client/core.py,sha256=m3oXTKBm3sBy6jhDu9GRecrxLb5CdEM53SgMlnJb6ag,4616
|
|
7
7
|
google/cloud/dataproc_spark_connect/client/proxy.py,sha256=qUZXvVY1yn934vE6nlO495XUZ53AUx9O74a9ozkGI9U,8976
|
|
8
|
-
dataproc_spark_connect-0.7.
|
|
9
|
-
dataproc_spark_connect-0.7.
|
|
10
|
-
dataproc_spark_connect-0.7.
|
|
11
|
-
dataproc_spark_connect-0.7.
|
|
12
|
-
dataproc_spark_connect-0.7.
|
|
8
|
+
dataproc_spark_connect-0.7.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
dataproc_spark_connect-0.7.2.dist-info/METADATA,sha256=EtPp009uOCvR1I3tiT-x1FJ4bWpPHhEqZxvJO4rfwKk,3328
|
|
10
|
+
dataproc_spark_connect-0.7.2.dist-info/WHEEL,sha256=OpXWERl2xLPRHTvd2ZXo_iluPEQd8uSbYkJ53NAER_Y,109
|
|
11
|
+
dataproc_spark_connect-0.7.2.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
|
|
12
|
+
dataproc_spark_connect-0.7.2.dist-info/RECORD,,
|
|
@@ -77,13 +77,6 @@ class DataprocSparkSession(SparkSession):
|
|
|
77
77
|
|
|
78
78
|
class Builder(SparkSession.Builder):
|
|
79
79
|
|
|
80
|
-
_dataproc_runtime_to_spark_version = {
|
|
81
|
-
"1.2": "3.5",
|
|
82
|
-
"2.2": "3.5",
|
|
83
|
-
"2.3": "3.5",
|
|
84
|
-
"3.0": "4.0",
|
|
85
|
-
}
|
|
86
|
-
|
|
87
80
|
_session_static_configs = [
|
|
88
81
|
"spark.executor.cores",
|
|
89
82
|
"spark.executor.memoryOverhead",
|
|
@@ -188,8 +181,6 @@ class DataprocSparkSession(SparkSession):
|
|
|
188
181
|
|
|
189
182
|
dataproc_config: Session = self._get_dataproc_config()
|
|
190
183
|
|
|
191
|
-
self._validate_version(dataproc_config)
|
|
192
|
-
|
|
193
184
|
session_id = self.generate_dataproc_session_id()
|
|
194
185
|
dataproc_config.name = f"projects/{self._project_id}/locations/{self._region}/sessions/{session_id}"
|
|
195
186
|
logger.debug(
|
|
@@ -304,6 +295,8 @@ class DataprocSparkSession(SparkSession):
|
|
|
304
295
|
raise RuntimeError(
|
|
305
296
|
f"Error while creating Dataproc Session"
|
|
306
297
|
) from e
|
|
298
|
+
finally:
|
|
299
|
+
stop_create_session_pbar = True
|
|
307
300
|
|
|
308
301
|
logger.debug(
|
|
309
302
|
f"Dataproc Session created: {session_id} in {int(time.time() - s8s_creation_start_time)} seconds"
|
|
@@ -417,36 +410,6 @@ class DataprocSparkSession(SparkSession):
|
|
|
417
410
|
]
|
|
418
411
|
return dataproc_config
|
|
419
412
|
|
|
420
|
-
def _validate_version(self, dataproc_config):
|
|
421
|
-
trim_version = lambda v: ".".join(v.split(".")[:2])
|
|
422
|
-
|
|
423
|
-
version = dataproc_config.runtime_config.version
|
|
424
|
-
if (
|
|
425
|
-
trim_version(version)
|
|
426
|
-
not in self._dataproc_runtime_to_spark_version
|
|
427
|
-
):
|
|
428
|
-
raise ValueError(
|
|
429
|
-
f"Specified {version} Dataproc Spark runtime version is not supported. "
|
|
430
|
-
f"Supported runtime versions: {self._dataproc_runtime_to_spark_version.keys()}"
|
|
431
|
-
)
|
|
432
|
-
|
|
433
|
-
server_version = self._dataproc_runtime_to_spark_version[
|
|
434
|
-
trim_version(version)
|
|
435
|
-
]
|
|
436
|
-
|
|
437
|
-
import importlib.metadata
|
|
438
|
-
|
|
439
|
-
dataproc_connect_version = importlib.metadata.version(
|
|
440
|
-
"dataproc-spark-connect"
|
|
441
|
-
)
|
|
442
|
-
client_version = importlib.metadata.version("pyspark")
|
|
443
|
-
if trim_version(client_version) != trim_version(server_version):
|
|
444
|
-
print(
|
|
445
|
-
f"Spark Connect client and server use different versions:\n"
|
|
446
|
-
f"- Dataproc Spark Connect client {dataproc_connect_version} (PySpark {client_version})\n"
|
|
447
|
-
f"- Dataproc Spark runtime {version} (Spark {server_version})"
|
|
448
|
-
)
|
|
449
|
-
|
|
450
413
|
@staticmethod
|
|
451
414
|
def generate_dataproc_session_id():
|
|
452
415
|
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
|
File without changes
|
|
File without changes
|
{dataproc_spark_connect-0.7.0.dist-info → dataproc_spark_connect-0.7.2.dist-info}/top_level.txt
RENAMED
|
File without changes
|