dataproc-spark-connect 0.7.3__py2.py3-none-any.whl → 0.7.4__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataproc-spark-connect
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -1,12 +1,12 @@
1
1
  google/cloud/dataproc_spark_connect/__init__.py,sha256=dIqHNWVWWrSuRf26x11kX5e9yMKSHCtmI_GBj1-FDdE,1101
2
2
  google/cloud/dataproc_spark_connect/exceptions.py,sha256=ilGyHD5M_yBQ3IC58-Y5miRGIQVJsLaNKvEGcHuk_BE,969
3
3
  google/cloud/dataproc_spark_connect/pypi_artifacts.py,sha256=gd-VMwiVP-EJuPp9Vf9Shx8pqps3oSKp0hBcSSZQS-A,1575
4
- google/cloud/dataproc_spark_connect/session.py,sha256=34hEgR57qi8vyM0_j749TCcpPeprJrEYXOzuAAGUrRg,25020
4
+ google/cloud/dataproc_spark_connect/session.py,sha256=kMCZWmi_-ScJy9NO7NFrHaHDTXKxMwaCSDbdqGxEngk,26390
5
5
  google/cloud/dataproc_spark_connect/client/__init__.py,sha256=6hCNSsgYlie6GuVpc5gjFsPnyeMTScTpXSPYqp1fplY,615
6
6
  google/cloud/dataproc_spark_connect/client/core.py,sha256=m3oXTKBm3sBy6jhDu9GRecrxLb5CdEM53SgMlnJb6ag,4616
7
7
  google/cloud/dataproc_spark_connect/client/proxy.py,sha256=qUZXvVY1yn934vE6nlO495XUZ53AUx9O74a9ozkGI9U,8976
8
- dataproc_spark_connect-0.7.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
- dataproc_spark_connect-0.7.3.dist-info/METADATA,sha256=pj2QInvZOehYc9RLMp0XUfz9nzA9rZ76PKaZDAx0-nA,3328
10
- dataproc_spark_connect-0.7.3.dist-info/WHEEL,sha256=OpXWERl2xLPRHTvd2ZXo_iluPEQd8uSbYkJ53NAER_Y,109
11
- dataproc_spark_connect-0.7.3.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
12
- dataproc_spark_connect-0.7.3.dist-info/RECORD,,
8
+ dataproc_spark_connect-0.7.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
+ dataproc_spark_connect-0.7.4.dist-info/METADATA,sha256=viQwCWio0b-xja72qtR447f9Ol7nl0k5d6bx1j2BAEk,3328
10
+ dataproc_spark_connect-0.7.4.dist-info/WHEEL,sha256=OpXWERl2xLPRHTvd2ZXo_iluPEQd8uSbYkJ53NAER_Y,109
11
+ dataproc_spark_connect-0.7.4.dist-info/top_level.txt,sha256=_1QvSJIhFAGfxb79D6DhB7SUw2X6T4rwnz_LLrbcD3c,7
12
+ dataproc_spark_connect-0.7.4.dist-info/RECORD,,
@@ -408,6 +408,30 @@ class DataprocSparkSession(SparkSession):
408
408
  dataproc_config.labels["colab-notebook-kernel-id"] = os.environ[
409
409
  "COLAB_NOTEBOOK_KERNEL_ID"
410
410
  ]
411
+ default_datasource = os.getenv(
412
+ "DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE"
413
+ )
414
+ if (
415
+ default_datasource
416
+ and dataproc_config.runtime_config.version == "2.3"
417
+ ):
418
+ if default_datasource == "bigquery":
419
+ bq_datasource_properties = {
420
+ "spark.datasource.bigquery.viewsEnabled": "true",
421
+ "spark.datasource.bigquery.writeMethod": "direct",
422
+ "spark.sql.catalog.spark_catalog": "com.google.cloud.spark.bigquery.BigQuerySparkSessionCatalog",
423
+ "spark.sql.legacy.createHiveTableByDefault": "false",
424
+ "spark.sql.sources.default": "bigquery",
425
+ }
426
+ # Merge default configs with existing properties, user configs take precedence
427
+ for k, v in bq_datasource_properties.items():
428
+ if k not in dataproc_config.runtime_config.properties:
429
+ dataproc_config.runtime_config.properties[k] = v
430
+ else:
431
+ logger.warning(
432
+ f"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE is set to an invalid value:"
433
+ f" {default_datasource}. Supported value is 'bigquery'."
434
+ )
411
435
  return dataproc_config
412
436
 
413
437
  @staticmethod