dataproc-spark-connect 0.7.2__py2.py3-none-any.whl → 0.7.4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataproc_spark_connect-0.7.2.dist-info → dataproc_spark_connect-0.7.4.dist-info}/METADATA +1 -1
- {dataproc_spark_connect-0.7.2.dist-info → dataproc_spark_connect-0.7.4.dist-info}/RECORD +6 -6
- google/cloud/dataproc_spark_connect/session.py +32 -8
- {dataproc_spark_connect-0.7.2.dist-info → dataproc_spark_connect-0.7.4.dist-info}/LICENSE +0 -0
- {dataproc_spark_connect-0.7.2.dist-info → dataproc_spark_connect-0.7.4.dist-info}/WHEEL +0 -0
- {dataproc_spark_connect-0.7.2.dist-info → dataproc_spark_connect-0.7.4.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
google/cloud/dataproc_spark_connect/__init__.py,sha256=dIqHNWVWWrSuRf26x11kX5e9yMKSHCtmI_GBj1-FDdE,1101
|
|
2
2
|
google/cloud/dataproc_spark_connect/exceptions.py,sha256=ilGyHD5M_yBQ3IC58-Y5miRGIQVJsLaNKvEGcHuk_BE,969
|
|
3
3
|
google/cloud/dataproc_spark_connect/pypi_artifacts.py,sha256=gd-VMwiVP-EJuPp9Vf9Shx8pqps3oSKp0hBcSSZQS-A,1575
|
|
4
|
-
google/cloud/dataproc_spark_connect/session.py,sha256=
|
|
4
|
+
google/cloud/dataproc_spark_connect/session.py,sha256=kMCZWmi_-ScJy9NO7NFrHaHDTXKxMwaCSDbdqGxEngk,26390
|
|
5
5
|
google/cloud/dataproc_spark_connect/client/__init__.py,sha256=6hCNSsgYlie6GuVpc5gjFsPnyeMTScTpXSPYqp1fplY,615
|
|
6
6
|
google/cloud/dataproc_spark_connect/client/core.py,sha256=m3oXTKBm3sBy6jhDu9GRecrxLb5CdEM53SgMlnJb6ag,4616
|
|
7
7
|
google/cloud/dataproc_spark_connect/client/proxy.py,sha256=qUZXvVY1yn934vE6nlO495XUZ53AUx9O74a9ozkGI9U,8976
|
|
8
|
-
dataproc_spark_connect-0.7.
|
|
9
|
-
dataproc_spark_connect-0.7.
|
|
10
|
-
dataproc_spark_connect-0.7.
|
|
11
|
-
dataproc_spark_connect-0.7.
|
|
12
|
-
dataproc_spark_connect-0.7.
|
|
8
|
+
dataproc_spark_connect-0.7.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
dataproc_spark_connect-0.7.4.dist-info/METADATA,sha256=viQwCWio0b-xja72qtR447f9Ol7nl0k5d6bx1j2BAEk,3328
|
|
10
|
+
dataproc_spark_connect-0.7.4.dist-info/WHEEL,sha256=OpXWERl2xLPRHTvd2ZXo_iluPEQd8uSbYkJ53NAER_Y,109
|
|
11
|
+
dataproc_spark_connect-0.7.4.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
|
|
12
|
+
dataproc_spark_connect-0.7.4.dist-info/RECORD,,
|
|
@@ -67,7 +67,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
67
67
|
... ) # doctest: +SKIP
|
|
68
68
|
"""
|
|
69
69
|
|
|
70
|
-
_DEFAULT_RUNTIME_VERSION = "2.
|
|
70
|
+
_DEFAULT_RUNTIME_VERSION = "2.3"
|
|
71
71
|
|
|
72
72
|
_active_s8s_session_uuid: ClassVar[Optional[str]] = None
|
|
73
73
|
_project_id = None
|
|
@@ -198,7 +198,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
198
198
|
DataprocSparkSession._active_s8s_session_id = session_id
|
|
199
199
|
s8s_creation_start_time = time.time()
|
|
200
200
|
|
|
201
|
-
|
|
201
|
+
stop_create_session_pbar_event = threading.Event()
|
|
202
202
|
|
|
203
203
|
def create_session_pbar():
|
|
204
204
|
iterations = 150
|
|
@@ -208,12 +208,12 @@ class DataprocSparkSession(SparkSession):
|
|
|
208
208
|
ncols=80,
|
|
209
209
|
)
|
|
210
210
|
for i in pbar:
|
|
211
|
-
if
|
|
211
|
+
if stop_create_session_pbar_event.is_set():
|
|
212
212
|
break
|
|
213
213
|
# Last iteration
|
|
214
214
|
if i >= iterations - 1:
|
|
215
215
|
# Sleep until session created
|
|
216
|
-
while not
|
|
216
|
+
while not stop_create_session_pbar_event.is_set():
|
|
217
217
|
time.sleep(1)
|
|
218
218
|
else:
|
|
219
219
|
time.sleep(1)
|
|
@@ -258,7 +258,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
258
258
|
timeout=600, # seconds
|
|
259
259
|
)
|
|
260
260
|
)
|
|
261
|
-
|
|
261
|
+
stop_create_session_pbar_event.set()
|
|
262
262
|
create_session_pbar_thread.join()
|
|
263
263
|
print("Dataproc Session was successfully created")
|
|
264
264
|
file_path = (
|
|
@@ -280,7 +280,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
280
280
|
f"Exception while writing active session to file {file_path}, {e}"
|
|
281
281
|
)
|
|
282
282
|
except (InvalidArgument, PermissionDenied) as e:
|
|
283
|
-
|
|
283
|
+
stop_create_session_pbar_event.set()
|
|
284
284
|
if create_session_pbar_thread.is_alive():
|
|
285
285
|
create_session_pbar_thread.join()
|
|
286
286
|
DataprocSparkSession._active_s8s_session_id = None
|
|
@@ -288,7 +288,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
288
288
|
f"Error while creating Dataproc Session: {e.message}"
|
|
289
289
|
)
|
|
290
290
|
except Exception as e:
|
|
291
|
-
|
|
291
|
+
stop_create_session_pbar_event.set()
|
|
292
292
|
if create_session_pbar_thread.is_alive():
|
|
293
293
|
create_session_pbar_thread.join()
|
|
294
294
|
DataprocSparkSession._active_s8s_session_id = None
|
|
@@ -296,7 +296,7 @@ class DataprocSparkSession(SparkSession):
|
|
|
296
296
|
f"Error while creating Dataproc Session"
|
|
297
297
|
) from e
|
|
298
298
|
finally:
|
|
299
|
-
|
|
299
|
+
stop_create_session_pbar_event.set()
|
|
300
300
|
|
|
301
301
|
logger.debug(
|
|
302
302
|
f"Dataproc Session created: {session_id} in {int(time.time() - s8s_creation_start_time)} seconds"
|
|
@@ -408,6 +408,30 @@ class DataprocSparkSession(SparkSession):
|
|
|
408
408
|
dataproc_config.labels["colab-notebook-kernel-id"] = os.environ[
|
|
409
409
|
"COLAB_NOTEBOOK_KERNEL_ID"
|
|
410
410
|
]
|
|
411
|
+
default_datasource = os.getenv(
|
|
412
|
+
"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE"
|
|
413
|
+
)
|
|
414
|
+
if (
|
|
415
|
+
default_datasource
|
|
416
|
+
and dataproc_config.runtime_config.version == "2.3"
|
|
417
|
+
):
|
|
418
|
+
if default_datasource == "bigquery":
|
|
419
|
+
bq_datasource_properties = {
|
|
420
|
+
"spark.datasource.bigquery.viewsEnabled": "true",
|
|
421
|
+
"spark.datasource.bigquery.writeMethod": "direct",
|
|
422
|
+
"spark.sql.catalog.spark_catalog": "com.google.cloud.spark.bigquery.BigQuerySparkSessionCatalog",
|
|
423
|
+
"spark.sql.legacy.createHiveTableByDefault": "false",
|
|
424
|
+
"spark.sql.sources.default": "bigquery",
|
|
425
|
+
}
|
|
426
|
+
# Merge default configs with existing properties, user configs take precedence
|
|
427
|
+
for k, v in bq_datasource_properties.items():
|
|
428
|
+
if k not in dataproc_config.runtime_config.properties:
|
|
429
|
+
dataproc_config.runtime_config.properties[k] = v
|
|
430
|
+
else:
|
|
431
|
+
logger.warning(
|
|
432
|
+
f"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE is set to an invalid value:"
|
|
433
|
+
f" {default_datasource}. Supported value is 'bigquery'."
|
|
434
|
+
)
|
|
411
435
|
return dataproc_config
|
|
412
436
|
|
|
413
437
|
@staticmethod
|
|
File without changes
|
|
File without changes
|
{dataproc_spark_connect-0.7.2.dist-info → dataproc_spark_connect-0.7.4.dist-info}/top_level.txt
RENAMED
|
File without changes
|