dataproc-spark-connect 0.7.3__py2.py3-none-any.whl → 0.7.5__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataproc_spark_connect-0.7.3.dist-info → dataproc_spark_connect-0.7.5.dist-info}/METADATA +2 -2
- {dataproc_spark_connect-0.7.3.dist-info → dataproc_spark_connect-0.7.5.dist-info}/RECORD +6 -6
- google/cloud/dataproc_spark_connect/session.py +24 -0
- {dataproc_spark_connect-0.7.3.dist-info → dataproc_spark_connect-0.7.5.dist-info}/LICENSE +0 -0
- {dataproc_spark_connect-0.7.3.dist-info → dataproc_spark_connect-0.7.5.dist-info}/WHEEL +0 -0
- {dataproc_spark_connect-0.7.3.dist-info → dataproc_spark_connect-0.7.5.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dataproc-spark-connect
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.5
|
|
4
4
|
Summary: Dataproc client library for Spark Connect
|
|
5
5
|
Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
|
|
6
6
|
Author: Google LLC
|
|
@@ -9,7 +9,7 @@ License-File: LICENSE
|
|
|
9
9
|
Requires-Dist: google-api-core>=2.19
|
|
10
10
|
Requires-Dist: google-cloud-dataproc>=5.18
|
|
11
11
|
Requires-Dist: packaging>=20.0
|
|
12
|
-
Requires-Dist: pyspark[connect]
|
|
12
|
+
Requires-Dist: pyspark[connect]~=3.5.1
|
|
13
13
|
Requires-Dist: tqdm>=4.67
|
|
14
14
|
Requires-Dist: websockets>=14.0
|
|
15
15
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
google/cloud/dataproc_spark_connect/__init__.py,sha256=dIqHNWVWWrSuRf26x11kX5e9yMKSHCtmI_GBj1-FDdE,1101
|
|
2
2
|
google/cloud/dataproc_spark_connect/exceptions.py,sha256=ilGyHD5M_yBQ3IC58-Y5miRGIQVJsLaNKvEGcHuk_BE,969
|
|
3
3
|
google/cloud/dataproc_spark_connect/pypi_artifacts.py,sha256=gd-VMwiVP-EJuPp9Vf9Shx8pqps3oSKp0hBcSSZQS-A,1575
|
|
4
|
-
google/cloud/dataproc_spark_connect/session.py,sha256=
|
|
4
|
+
google/cloud/dataproc_spark_connect/session.py,sha256=kMCZWmi_-ScJy9NO7NFrHaHDTXKxMwaCSDbdqGxEngk,26390
|
|
5
5
|
google/cloud/dataproc_spark_connect/client/__init__.py,sha256=6hCNSsgYlie6GuVpc5gjFsPnyeMTScTpXSPYqp1fplY,615
|
|
6
6
|
google/cloud/dataproc_spark_connect/client/core.py,sha256=m3oXTKBm3sBy6jhDu9GRecrxLb5CdEM53SgMlnJb6ag,4616
|
|
7
7
|
google/cloud/dataproc_spark_connect/client/proxy.py,sha256=qUZXvVY1yn934vE6nlO495XUZ53AUx9O74a9ozkGI9U,8976
|
|
8
|
-
dataproc_spark_connect-0.7.
|
|
9
|
-
dataproc_spark_connect-0.7.
|
|
10
|
-
dataproc_spark_connect-0.7.
|
|
11
|
-
dataproc_spark_connect-0.7.
|
|
12
|
-
dataproc_spark_connect-0.7.
|
|
8
|
+
dataproc_spark_connect-0.7.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
dataproc_spark_connect-0.7.5.dist-info/METADATA,sha256=byc2dTo3PdkmbMZHyaJ9A-WVhSQTHn1ZAX_Jqz9jmd0,3330
|
|
10
|
+
dataproc_spark_connect-0.7.5.dist-info/WHEEL,sha256=OpXWERl2xLPRHTvd2ZXo_iluPEQd8uSbYkJ53NAER_Y,109
|
|
11
|
+
dataproc_spark_connect-0.7.5.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
|
|
12
|
+
dataproc_spark_connect-0.7.5.dist-info/RECORD,,
|
|
@@ -408,6 +408,30 @@ class DataprocSparkSession(SparkSession):
|
|
|
408
408
|
dataproc_config.labels["colab-notebook-kernel-id"] = os.environ[
|
|
409
409
|
"COLAB_NOTEBOOK_KERNEL_ID"
|
|
410
410
|
]
|
|
411
|
+
default_datasource = os.getenv(
|
|
412
|
+
"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE"
|
|
413
|
+
)
|
|
414
|
+
if (
|
|
415
|
+
default_datasource
|
|
416
|
+
and dataproc_config.runtime_config.version == "2.3"
|
|
417
|
+
):
|
|
418
|
+
if default_datasource == "bigquery":
|
|
419
|
+
bq_datasource_properties = {
|
|
420
|
+
"spark.datasource.bigquery.viewsEnabled": "true",
|
|
421
|
+
"spark.datasource.bigquery.writeMethod": "direct",
|
|
422
|
+
"spark.sql.catalog.spark_catalog": "com.google.cloud.spark.bigquery.BigQuerySparkSessionCatalog",
|
|
423
|
+
"spark.sql.legacy.createHiveTableByDefault": "false",
|
|
424
|
+
"spark.sql.sources.default": "bigquery",
|
|
425
|
+
}
|
|
426
|
+
# Merge default configs with existing properties, user configs take precedence
|
|
427
|
+
for k, v in bq_datasource_properties.items():
|
|
428
|
+
if k not in dataproc_config.runtime_config.properties:
|
|
429
|
+
dataproc_config.runtime_config.properties[k] = v
|
|
430
|
+
else:
|
|
431
|
+
logger.warning(
|
|
432
|
+
f"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE is set to an invalid value:"
|
|
433
|
+
f" {default_datasource}. Supported value is 'bigquery'."
|
|
434
|
+
)
|
|
411
435
|
return dataproc_config
|
|
412
436
|
|
|
413
437
|
@staticmethod
|
|
File without changes
|
|
File without changes
|
{dataproc_spark_connect-0.7.3.dist-info → dataproc_spark_connect-0.7.5.dist-info}/top_level.txt
RENAMED
|
File without changes
|