dataproc-spark-connect 0.9.0__tar.gz → 1.0.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/PKG-INFO +2 -2
  2. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/PKG-INFO +2 -2
  3. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/requires.txt +1 -1
  4. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/client/core.py +5 -3
  5. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/session.py +17 -41
  6. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/setup.py +2 -2
  7. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/LICENSE +0 -0
  8. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/README.md +0 -0
  9. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/SOURCES.txt +0 -0
  10. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/dependency_links.txt +0 -0
  11. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/dataproc_spark_connect.egg-info/top_level.txt +0 -0
  12. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/__init__.py +0 -0
  13. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/client/__init__.py +0 -0
  14. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/client/proxy.py +0 -0
  15. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/environment.py +0 -0
  16. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/exceptions.py +0 -0
  17. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/google/cloud/dataproc_spark_connect/pypi_artifacts.py +0 -0
  18. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/pyproject.toml +0 -0
  19. {dataproc_spark_connect-0.9.0 → dataproc_spark_connect-1.0.0rc1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataproc-spark-connect
3
- Version: 0.9.0
3
+ Version: 1.0.0rc1
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -9,7 +9,7 @@ License-File: LICENSE
9
9
  Requires-Dist: google-api-core>=2.19
10
10
  Requires-Dist: google-cloud-dataproc>=5.18
11
11
  Requires-Dist: packaging>=20.0
12
- Requires-Dist: pyspark[connect]~=3.5.1
12
+ Requires-Dist: pyspark[connect]~=4.0.0
13
13
  Requires-Dist: tqdm>=4.67
14
14
  Requires-Dist: websockets>=14.0
15
15
  Dynamic: author
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataproc-spark-connect
3
- Version: 0.9.0
3
+ Version: 1.0.0rc1
4
4
  Summary: Dataproc client library for Spark Connect
5
5
  Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
6
  Author: Google LLC
@@ -9,7 +9,7 @@ License-File: LICENSE
9
9
  Requires-Dist: google-api-core>=2.19
10
10
  Requires-Dist: google-cloud-dataproc>=5.18
11
11
  Requires-Dist: packaging>=20.0
12
- Requires-Dist: pyspark[connect]~=3.5.1
12
+ Requires-Dist: pyspark[connect]~=4.0.0
13
13
  Requires-Dist: tqdm>=4.67
14
14
  Requires-Dist: websockets>=14.0
15
15
  Dynamic: author
@@ -1,6 +1,6 @@
1
1
  google-api-core>=2.19
2
2
  google-cloud-dataproc>=5.18
3
3
  packaging>=20.0
4
- pyspark[connect]~=3.5.1
4
+ pyspark[connect]~=4.0.0
5
5
  tqdm>=4.67
6
6
  websockets>=14.0
@@ -15,14 +15,14 @@ import logging
15
15
 
16
16
  import google
17
17
  import grpc
18
- from pyspark.sql.connect.client import ChannelBuilder
18
+ from pyspark.sql.connect.client import DefaultChannelBuilder
19
19
 
20
20
  from . import proxy
21
21
 
22
22
  logger = logging.getLogger(__name__)
23
23
 
24
24
 
25
- class DataprocChannelBuilder(ChannelBuilder):
25
+ class DataprocChannelBuilder(DefaultChannelBuilder):
26
26
  """
27
27
  This is a helper class that is used to create a GRPC channel based on the given
28
28
  connection string per the documentation of Spark Connect.
@@ -88,7 +88,9 @@ class ProxiedChannel(grpc.Channel):
88
88
  self._proxy = proxy.DataprocSessionProxy(0, target_host)
89
89
  self._proxy.start()
90
90
  self._proxied_connect_url = f"sc://localhost:{self._proxy.port}"
91
- self._wrapped = ChannelBuilder(self._proxied_connect_url).toChannel()
91
+ self._wrapped = DefaultChannelBuilder(
92
+ self._proxied_connect_url
93
+ ).toChannel()
92
94
 
93
95
  def __enter__(self):
94
96
  return self
@@ -103,7 +103,7 @@ class DataprocSparkSession(SparkSession):
103
103
  ... ) # doctest: +SKIP
104
104
  """
105
105
 
106
- _DEFAULT_RUNTIME_VERSION = "2.3"
106
+ _DEFAULT_RUNTIME_VERSION = "3.0"
107
107
 
108
108
  _active_s8s_session_uuid: ClassVar[Optional[str]] = None
109
109
  _project_id = None
@@ -158,19 +158,6 @@ class DataprocSparkSession(SparkSession):
158
158
  self.dataproc_config.environment_config.execution_config.service_account = (
159
159
  account
160
160
  )
161
- # Automatically set auth type to SERVICE_ACCOUNT when service account is provided
162
- # This overrides any env var setting to simplify user experience
163
- self.dataproc_config.environment_config.execution_config.authentication_config.user_workload_authentication_type = (
164
- AuthenticationConfig.AuthenticationType.SERVICE_ACCOUNT
165
- )
166
- return self
167
-
168
- def authType(
169
- self, auth_type: "AuthenticationConfig.AuthenticationType"
170
- ):
171
- self.dataproc_config.environment_config.execution_config.authentication_config.user_workload_authentication_type = (
172
- auth_type
173
- )
174
161
  return self
175
162
 
176
163
  def subnetwork(self, subnet: str):
@@ -181,10 +168,7 @@ class DataprocSparkSession(SparkSession):
181
168
 
182
169
  def ttl(self, duration: datetime.timedelta):
183
170
  """Set the time-to-live (TTL) for the session using a timedelta object."""
184
- self.dataproc_config.environment_config.execution_config.ttl = {
185
- "seconds": int(duration.total_seconds())
186
- }
187
- return self
171
+ return self.ttlSeconds(int(duration.total_seconds()))
188
172
 
189
173
  def ttlSeconds(self, seconds: int):
190
174
  """Set the time-to-live (TTL) for the session in seconds."""
@@ -195,10 +179,7 @@ class DataprocSparkSession(SparkSession):
195
179
 
196
180
  def idleTtl(self, duration: datetime.timedelta):
197
181
  """Set the idle time-to-live (idle TTL) for the session using a timedelta object."""
198
- self.dataproc_config.environment_config.execution_config.idle_ttl = {
199
- "seconds": int(duration.total_seconds())
200
- }
201
- return self
182
+ return self.idleTtlSeconds(int(duration.total_seconds()))
202
183
 
203
184
  def idleTtlSeconds(self, seconds: int):
204
185
  """Set the idle time-to-live (idle TTL) for the session in seconds."""
@@ -568,27 +549,24 @@ class DataprocSparkSession(SparkSession):
568
549
  default_datasource = os.getenv(
569
550
  "DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE"
570
551
  )
571
- if (
572
- default_datasource
573
- and dataproc_config.runtime_config.version == "2.3"
574
- ):
575
- if default_datasource == "bigquery":
576
- bq_datasource_properties = {
552
+ match default_datasource:
553
+ case "bigquery":
554
+ # Merge default configs with existing properties,
555
+ # user configs take precedence
556
+ for k, v in {
577
557
  "spark.datasource.bigquery.viewsEnabled": "true",
578
558
  "spark.datasource.bigquery.writeMethod": "direct",
579
559
  "spark.sql.catalog.spark_catalog": "com.google.cloud.spark.bigquery.BigQuerySparkSessionCatalog",
580
- "spark.sql.legacy.createHiveTableByDefault": "false",
581
560
  "spark.sql.sources.default": "bigquery",
582
- }
583
- # Merge default configs with existing properties, user configs take precedence
584
- for k, v in bq_datasource_properties.items():
561
+ }.items():
585
562
  if k not in dataproc_config.runtime_config.properties:
586
563
  dataproc_config.runtime_config.properties[k] = v
587
- else:
588
- logger.warning(
589
- f"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE is set to an invalid value:"
590
- f" {default_datasource}. Supported value is 'bigquery'."
591
- )
564
+ case _:
565
+ if default_datasource:
566
+ logger.warning(
567
+ f"DATAPROC_SPARK_CONNECT_DEFAULT_DATASOURCE is set to an invalid value:"
568
+ f" {default_datasource}. Supported value is 'bigquery'."
569
+ )
592
570
  return dataproc_config
593
571
 
594
572
  def _check_python_version_compatibility(self, runtime_version):
@@ -598,9 +576,7 @@ class DataprocSparkSession(SparkSession):
598
576
 
599
577
  # Runtime version to server Python version mapping
600
578
  RUNTIME_PYTHON_MAP = {
601
- "1.2": (3, 12),
602
- "2.2": (3, 12),
603
- "2.3": (3, 11),
579
+ "3.0": (3, 11),
604
580
  }
605
581
 
606
582
  client_python = sys.version_info[:2] # (major, minor)
@@ -813,7 +789,7 @@ class DataprocSparkSession(SparkSession):
813
789
  This is an API dedicated to Spark Connect client only. With regular Spark Session, it throws
814
790
  an exception.
815
791
  Regarding pypi: Popular packages are already pre-installed in s8s runtime.
816
- https://cloud.google.com/dataproc-serverless/docs/concepts/versions/spark-runtime-2.2#python_libraries
792
+ https://cloud.google.com/dataproc-serverless/docs/concepts/versions/spark-runtime-2.3#python_libraries
817
793
  If there are conflicts/package doesn't exist, it throws an exception.
818
794
  """
819
795
  if sum([pypi, file, pyfile, archive]) > 1:
@@ -20,7 +20,7 @@ long_description = (this_directory / "README.md").read_text()
20
20
 
21
21
  setup(
22
22
  name="dataproc-spark-connect",
23
- version="0.9.0",
23
+ version="1.0.0rc1",
24
24
  description="Dataproc client library for Spark Connect",
25
25
  long_description=long_description,
26
26
  author="Google LLC",
@@ -31,7 +31,7 @@ setup(
31
31
  "google-api-core>=2.19",
32
32
  "google-cloud-dataproc>=5.18",
33
33
  "packaging>=20.0",
34
- "pyspark[connect]~=3.5.1",
34
+ "pyspark[connect]~=4.0.0",
35
35
  "tqdm>=4.67",
36
36
  "websockets>=14.0",
37
37
  ],