dataproc-spark-connect 0.7.3__tar.gz → 0.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/PKG-INFO +1 -1
  2. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/dataproc_spark_connect.egg-info/PKG-INFO +1 -1
  3. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/google/cloud/dataproc_spark_connect/session.py +24 -0
  4. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/setup.py +1 -1
  5. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/LICENSE +0 -0
  6. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/README.md +0 -0
  7. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/dataproc_spark_connect.egg-info/SOURCES.txt +0 -0
  8. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/dataproc_spark_connect.egg-info/dependency_links.txt +0 -0
  9. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/dataproc_spark_connect.egg-info/requires.txt +0 -0
  10. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/dataproc_spark_connect.egg-info/top_level.txt +0 -0
  11. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/google/cloud/dataproc_spark_connect/__init__.py +0 -0
  12. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/google/cloud/dataproc_spark_connect/client/__init__.py +0 -0
  13. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/google/cloud/dataproc_spark_connect/client/core.py +0 -0
  14. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/google/cloud/dataproc_spark_connect/client/proxy.py +0 -0
  15. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/google/cloud/dataproc_spark_connect/exceptions.py +0 -0
  16. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/google/cloud/dataproc_spark_connect/pypi_artifacts.py +0 -0
  17. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/pyproject.toml +0 -0
  18. {dataproc_spark_connect-0.7.3 → dataproc_spark_connect-0.7.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataproc-spark-connect
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dataproc-spark-connect
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -408,6 +408,30 @@ class DataprocSparkSession(SparkSession):
408
408
  dataproc_config.labels["colab-notebook-kernel-id"] = os.environ[
409
409
  "COLAB_NOTEBOOK_KERNEL_ID"
410
410
  ]
411
+ default_datasource = os.getenv(
412
+ "DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE"
413
+ )
414
+ if (
415
+ default_datasource
416
+ and dataproc_config.runtime_config.version == "2.3"
417
+ ):
418
+ if default_datasource == "bigquery":
419
+ bq_datasource_properties = {
420
+ "spark.datasource.bigquery.viewsEnabled": "true",
421
+ "spark.datasource.bigquery.writeMethod": "direct",
422
+ "spark.sql.catalog.spark_catalog": "com.google.cloud.spark.bigquery.BigQuerySparkSessionCatalog",
423
+ "spark.sql.legacy.createHiveTableByDefault": "false",
424
+ "spark.sql.sources.default": "bigquery",
425
+ }
426
+ # Merge default configs with existing properties, user configs take precedence
427
+ for k, v in bq_datasource_properties.items():
428
+ if k not in dataproc_config.runtime_config.properties:
429
+ dataproc_config.runtime_config.properties[k] = v
430
+ else:
431
+ logger.warning(
432
+ f"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE is set to an invalid value:"
433
+ f" {default_datasource}. Supported value is 'bigquery'."
434
+ )
411
435
  return dataproc_config
412
436
 
413
437
  @staticmethod
@@ -20,7 +20,7 @@ long_description = (this_directory / "README.md").read_text()
20
20
 
21
21
  setup(
22
22
  name="dataproc-spark-connect",
23
- version="0.7.3",
23
+ version="0.7.4",
24
24
  description="Dataproc client library for Spark Connect",
25
25
  long_description=long_description,
26
26
  author="Google LLC",