dataproc-spark-connect 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. dataproc_spark_connect-0.2.0/PKG-INFO +114 -0
  2. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/README.md +51 -43
  3. dataproc_spark_connect-0.2.0/dataproc_spark_connect.egg-info/PKG-INFO +114 -0
  4. dataproc_spark_connect-0.2.0/dataproc_spark_connect.egg-info/requires.txt +7 -0
  5. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/google/cloud/dataproc_spark_connect/client/proxy.py +59 -20
  6. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/google/cloud/dataproc_spark_connect/session.py +2 -2
  7. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/setup.py +12 -1
  8. dataproc_spark_connect-0.1.0/PKG-INFO +0 -10
  9. dataproc_spark_connect-0.1.0/dataproc_spark_connect.egg-info/PKG-INFO +0 -10
  10. dataproc_spark_connect-0.1.0/dataproc_spark_connect.egg-info/requires.txt +0 -5
  11. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/LICENSE +0 -0
  12. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/dataproc_spark_connect.egg-info/SOURCES.txt +0 -0
  13. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/dataproc_spark_connect.egg-info/dependency_links.txt +0 -0
  14. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/dataproc_spark_connect.egg-info/top_level.txt +0 -0
  15. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/google/cloud/dataproc_spark_connect/__init__.py +0 -0
  16. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/google/cloud/dataproc_spark_connect/client/__init__.py +0 -0
  17. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/google/cloud/dataproc_spark_connect/client/core.py +0 -0
  18. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/pyproject.toml +0 -0
  19. {dataproc_spark_connect-0.1.0 → dataproc_spark_connect-0.2.0}/setup.cfg +0 -0
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.1
2
+ Name: dataproc-spark-connect
3
+ Version: 0.2.0
4
+ Summary: Dataproc client library for Spark Connect
5
+ Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
+ Author: Google LLC
7
+ License: Apache 2.0
8
+ License-File: LICENSE
9
+ Requires-Dist: google-api-core>=2.19.1
10
+ Requires-Dist: google-cloud-dataproc>=5.15.1
11
+ Requires-Dist: wheel
12
+ Requires-Dist: websockets
13
+ Requires-Dist: pyspark>=3.5
14
+ Requires-Dist: pandas
15
+ Requires-Dist: pyarrow
16
+
17
+ # Dataproc Spark Connect Client
18
+
19
+ A wrapper of the Apache [Spark Connect](https://spark.apache.org/spark-connect/) client with
20
+ additional functionalities that allow applications to communicate with a remote Dataproc
21
+ Spark cluster using the Spark Connect protocol without requiring additional steps.
22
+
23
+ ## Install
24
+
25
+ .. code-block:: console
26
+
27
+ pip install dataproc_spark_connect
28
+
29
+ ## Uninstall
30
+
31
+ .. code-block:: console
32
+
33
+ pip uninstall dataproc_spark_connect
34
+
35
+
36
+ ## Setup
37
+ This client requires permissions to manage [Dataproc sessions and session templates](https://cloud.google.com/dataproc-serverless/docs/concepts/iam).
38
+ If you are running the client outside of Google Cloud, you must set following environment variables:
39
+
40
+ * GOOGLE_CLOUD_PROJECT - The Google Cloud project you use to run Spark workloads
41
+ * GOOGLE_CLOUD_REGION - The Compute Engine [region](https://cloud.google.com/compute/docs/regions-zones#available) where you run the Spark workload.
42
+ * GOOGLE_APPLICATION_CREDENTIALS - Your [Application Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc)
43
+ * DATAPROC_SPARK_CONNECT_SESSION_DEFAULT_CONFIG (Optional) - The config location, such as `tests/integration/resources/session.textproto`
44
+
45
+ ## Usage
46
+
47
+ 1. Install the latest version of Dataproc Python client and Dataproc Spark Connect modules:
48
+
49
+ .. code-block:: console
50
+
51
+ pip install google_cloud_dataproc --force-reinstall
52
+ pip install dataproc_spark_connect --force-reinstall
53
+
54
+ 2. Add the required import into your PySpark application or notebook:
55
+
56
+ .. code-block:: python
57
+
58
+ from google.cloud.dataproc_spark_connect import DataprocSparkSession
59
+
60
+ 3. There are two ways to create a spark session,
61
+
62
+ 1. Start a Spark session using properties defined in `DATAPROC_SPARK_CONNECT_SESSION_DEFAULT_CONFIG`:
63
+
64
+ .. code-block:: python
65
+
66
+ spark = DataprocSparkSession.builder.getOrCreate()
67
+
68
+ 2. Start a Spark session with the following code instead of using a config file:
69
+
70
+ .. code-block:: python
71
+
72
+ from google.cloud.dataproc_v1 import SparkConnectConfig
73
+ from google.cloud.dataproc_v1 import Session
74
+ dataproc_config = Session()
75
+ dataproc_config.spark_connect_session = SparkConnectConfig()
76
+ dataproc_config.environment_config.execution_config.subnetwork_uri = "<subnet>"
77
+ dataproc_config.runtime_config.version = '3.0'
78
+ spark = DataprocSparkSession.builder.dataprocConfig(dataproc_config).getOrCreate()
79
+
80
+ ## Billing
81
+ As this client runs the spark workload on Dataproc, your project will be billed as per [Dataproc Serverless Pricing](https://cloud.google.com/dataproc-serverless/pricing).
82
+ This will happen even if you are running the client from a non-GCE instance.
83
+
84
+ ## Contributing
85
+ ### Building and Deploying SDK
86
+
87
+ 1. Install the requirements in virtual environment.
88
+
89
+ .. code-block:: console
90
+
91
+ pip install -r requirements.txt
92
+
93
+ 2. Build the code.
94
+
95
+ .. code-block:: console
96
+
97
+ python setup.py sdist bdist_wheel
98
+
99
+
100
+ 3. Copy the generated `.whl` file to Cloud Storage. Use the version specified in the `setup.py` file.
101
+
102
+ .. code-block:: console
103
+
104
+ VERSION=<version> gsutil cp dist/dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl gs://<your_bucket_name>
105
+
106
+ 4. Download the new SDK on Vertex, then uninstall the old version and install the new one.
107
+
108
+ .. code-block:: console
109
+
110
+ %%bash
111
+ export VERSION=<version>
112
+ gsutil cp gs://<your_bucket_name>/dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl .
113
+ yes | pip uninstall dataproc_spark_connect
114
+ pip install dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl
@@ -6,15 +6,16 @@ Spark cluster using the Spark Connect protocol without requiring additional step
6
6
 
7
7
  ## Install
8
8
 
9
- ```
10
- pip install dataproc_spark_connect
11
- ```
9
+ .. code-block:: console
10
+
11
+ pip install dataproc_spark_connect
12
12
 
13
13
  ## Uninstall
14
14
 
15
- ```
16
- pip uninstall dataproc_spark_connect
17
- ```
15
+ .. code-block:: console
16
+
17
+ pip uninstall dataproc_spark_connect
18
+
18
19
 
19
20
  ## Setup
20
21
  This client requires permissions to manage [Dataproc sessions and session templates](https://cloud.google.com/dataproc-serverless/docs/concepts/iam).
@@ -28,33 +29,37 @@ If you are running the client outside of Google Cloud, you must set following en
28
29
  ## Usage
29
30
 
30
31
  1. Install the latest version of Dataproc Python client and Dataproc Spark Connect modules:
31
- ```
32
- pip install google_cloud_dataproc --force-reinstall
33
- pip install dataproc_spark_connect --force-reinstall
34
- ```
32
+
33
+ .. code-block:: console
34
+
35
+ pip install google_cloud_dataproc --force-reinstall
36
+ pip install dataproc_spark_connect --force-reinstall
35
37
 
36
38
  2. Add the required import into your PySpark application or notebook:
37
- ```python
38
- from google.cloud.dataproc_spark_connect import DataprocSparkSession
39
39
 
40
- ```
40
+ .. code-block:: python
41
+
42
+ from google.cloud.dataproc_spark_connect import DataprocSparkSession
41
43
 
42
44
  3. There are two ways to create a spark session,
45
+
43
46
  1. Start a Spark session using properties defined in `DATAPROC_SPARK_CONNECT_SESSION_DEFAULT_CONFIG`:
44
- ```python
45
- spark = DataprocSparkSession.builder.getOrCreate()
46
- ```
47
+
48
+ .. code-block:: python
49
+
50
+ spark = DataprocSparkSession.builder.getOrCreate()
47
51
 
48
52
  2. Start a Spark session with the following code instead of using a config file:
49
- ```python
50
- from google.cloud.dataproc_v1 import SparkConnectConfig
51
- from google.cloud.dataproc_v1 import Session
52
- dataproc_config = Session()
53
- dataproc_config.spark_connect_session = SparkConnectConfig()
54
- dataproc_config.environment_config.execution_config.subnetwork_uri = "<subnet>"
55
- dataproc_config.runtime_config.version = '3.0'
56
- spark = DataprocSparkSession.builder.dataprocConfig(dataproc_config).getOrCreate()
57
- ```
53
+
54
+ .. code-block:: python
55
+
56
+ from google.cloud.dataproc_v1 import SparkConnectConfig
57
+ from google.cloud.dataproc_v1 import Session
58
+ dataproc_config = Session()
59
+ dataproc_config.spark_connect_session = SparkConnectConfig()
60
+ dataproc_config.environment_config.execution_config.subnetwork_uri = "<subnet>"
61
+ dataproc_config.runtime_config.version = '3.0'
62
+ spark = DataprocSparkSession.builder.dataprocConfig(dataproc_config).getOrCreate()
58
63
 
59
64
  ## Billing
60
65
  As this client runs the spark workload on Dataproc, your project will be billed as per [Dataproc Serverless Pricing](https://cloud.google.com/dataproc-serverless/pricing).
@@ -62,29 +67,32 @@ This will happen even if you are running the client from a non-GCE instance.
62
67
 
63
68
  ## Contributing
64
69
  ### Building and Deploying SDK
70
+
65
71
  1. Install the requirements in virtual environment.
66
72
 
67
- ```
68
- pip install -r requirements.txt
69
- ```
73
+ .. code-block:: console
74
+
75
+ pip install -r requirements.txt
76
+
70
77
  2. Build the code.
71
78
 
72
- ```
73
- python setup.py sdist bdist_wheel
74
- ```
79
+ .. code-block:: console
80
+
81
+ python setup.py sdist bdist_wheel
82
+
83
+
84
+ 3. Copy the generated `.whl` file to Cloud Storage. Use the version specified in the `setup.py` file.
85
+
86
+ .. code-block:: console
75
87
 
76
- 2. Copy the generated `.whl` file to Cloud Storage. Use the version specified in the `setup.py` file.
88
+ VERSION=<version> gsutil cp dist/dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl gs://<your_bucket_name>
77
89
 
78
- ```
79
- VERSION=<version> gsutil cp dist/dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl gs://<your_bucket_name>
80
- ```
90
+ 4. Download the new SDK on Vertex, then uninstall the old version and install the new one.
81
91
 
82
- 3. Download the new SDK on Vertex, then uninstall the old version and install the new one.
92
+ .. code-block:: console
83
93
 
84
- ```
85
- %%bash
86
- export VERSION=<version>
87
- gsutil cp gs://<your_bucket_name>/dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl .
88
- yes | pip uninstall dataproc_spark_connect
89
- pip install dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl
90
- ```
94
+ %%bash
95
+ export VERSION=<version>
96
+ gsutil cp gs://<your_bucket_name>/dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl .
97
+ yes | pip uninstall dataproc_spark_connect
98
+ pip install dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.1
2
+ Name: dataproc-spark-connect
3
+ Version: 0.2.0
4
+ Summary: Dataproc client library for Spark Connect
5
+ Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
6
+ Author: Google LLC
7
+ License: Apache 2.0
8
+ License-File: LICENSE
9
+ Requires-Dist: google-api-core>=2.19.1
10
+ Requires-Dist: google-cloud-dataproc>=5.15.1
11
+ Requires-Dist: wheel
12
+ Requires-Dist: websockets
13
+ Requires-Dist: pyspark>=3.5
14
+ Requires-Dist: pandas
15
+ Requires-Dist: pyarrow
16
+
17
+ # Dataproc Spark Connect Client
18
+
19
+ A wrapper of the Apache [Spark Connect](https://spark.apache.org/spark-connect/) client with
20
+ additional functionalities that allow applications to communicate with a remote Dataproc
21
+ Spark cluster using the Spark Connect protocol without requiring additional steps.
22
+
23
+ ## Install
24
+
25
+ .. code-block:: console
26
+
27
+ pip install dataproc_spark_connect
28
+
29
+ ## Uninstall
30
+
31
+ .. code-block:: console
32
+
33
+ pip uninstall dataproc_spark_connect
34
+
35
+
36
+ ## Setup
37
+ This client requires permissions to manage [Dataproc sessions and session templates](https://cloud.google.com/dataproc-serverless/docs/concepts/iam).
38
+ If you are running the client outside of Google Cloud, you must set following environment variables:
39
+
40
+ * GOOGLE_CLOUD_PROJECT - The Google Cloud project you use to run Spark workloads
41
+ * GOOGLE_CLOUD_REGION - The Compute Engine [region](https://cloud.google.com/compute/docs/regions-zones#available) where you run the Spark workload.
42
+ * GOOGLE_APPLICATION_CREDENTIALS - Your [Application Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc)
43
+ * DATAPROC_SPARK_CONNECT_SESSION_DEFAULT_CONFIG (Optional) - The config location, such as `tests/integration/resources/session.textproto`
44
+
45
+ ## Usage
46
+
47
+ 1. Install the latest version of Dataproc Python client and Dataproc Spark Connect modules:
48
+
49
+ .. code-block:: console
50
+
51
+ pip install google_cloud_dataproc --force-reinstall
52
+ pip install dataproc_spark_connect --force-reinstall
53
+
54
+ 2. Add the required import into your PySpark application or notebook:
55
+
56
+ .. code-block:: python
57
+
58
+ from google.cloud.dataproc_spark_connect import DataprocSparkSession
59
+
60
+ 3. There are two ways to create a spark session,
61
+
62
+ 1. Start a Spark session using properties defined in `DATAPROC_SPARK_CONNECT_SESSION_DEFAULT_CONFIG`:
63
+
64
+ .. code-block:: python
65
+
66
+ spark = DataprocSparkSession.builder.getOrCreate()
67
+
68
+ 2. Start a Spark session with the following code instead of using a config file:
69
+
70
+ .. code-block:: python
71
+
72
+ from google.cloud.dataproc_v1 import SparkConnectConfig
73
+ from google.cloud.dataproc_v1 import Session
74
+ dataproc_config = Session()
75
+ dataproc_config.spark_connect_session = SparkConnectConfig()
76
+ dataproc_config.environment_config.execution_config.subnetwork_uri = "<subnet>"
77
+ dataproc_config.runtime_config.version = '3.0'
78
+ spark = DataprocSparkSession.builder.dataprocConfig(dataproc_config).getOrCreate()
79
+
80
+ ## Billing
81
+ As this client runs the spark workload on Dataproc, your project will be billed as per [Dataproc Serverless Pricing](https://cloud.google.com/dataproc-serverless/pricing).
82
+ This will happen even if you are running the client from a non-GCE instance.
83
+
84
+ ## Contributing
85
+ ### Building and Deploying SDK
86
+
87
+ 1. Install the requirements in virtual environment.
88
+
89
+ .. code-block:: console
90
+
91
+ pip install -r requirements.txt
92
+
93
+ 2. Build the code.
94
+
95
+ .. code-block:: console
96
+
97
+ python setup.py sdist bdist_wheel
98
+
99
+
100
+ 3. Copy the generated `.whl` file to Cloud Storage. Use the version specified in the `setup.py` file.
101
+
102
+ .. code-block:: console
103
+
104
+ VERSION=<version> gsutil cp dist/dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl gs://<your_bucket_name>
105
+
106
+ 4. Download the new SDK on Vertex, then uninstall the old version and install the new one.
107
+
108
+ .. code-block:: console
109
+
110
+ %%bash
111
+ export VERSION=<version>
112
+ gsutil cp gs://<your_bucket_name>/dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl .
113
+ yes | pip uninstall dataproc_spark_connect
114
+ pip install dataproc_spark_connect-${VERSION}-py2.py3-none-any.whl
@@ -0,0 +1,7 @@
1
+ google-api-core>=2.19.1
2
+ google-cloud-dataproc>=5.15.1
3
+ wheel
4
+ websockets
5
+ pyspark>=3.5
6
+ pandas
7
+ pyarrow
@@ -43,13 +43,22 @@ class bridged_socket(object):
43
43
  self._conn = websocket_conn
44
44
 
45
45
  def recv(self, buff_size):
46
- msg = self._conn.recv()
46
+ # N.B. The websockets [recv method](https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv)
47
+ # does not support the buff_size parameter, but it does add a `timeout` keyword parameter not supported by normal
48
+ # socket objects.
49
+ #
50
+ # We set that timeout to 60 seconds to prevent any scenarios where we wind up stuck waiting for a message from a websocket connection
51
+ # that never comes.
52
+ msg = self._conn.recv(timeout=60)
47
53
  return bytes.fromhex(msg)
48
54
 
49
55
  def send(self, msg_bytes):
50
56
  msg = bytes.hex(msg_bytes)
51
57
  self._conn.send(msg)
52
58
 
59
+ def close(self):
60
+ return self._conn.close()
61
+
53
62
 
54
63
  def connect_tcp_bridge(hostname):
55
64
  """Create a socket-like connection to the given hostname using websocket.
@@ -93,12 +102,51 @@ def forward_bytes(name, from_sock, to_sock):
93
102
  bs = from_sock.recv(1024)
94
103
  if not bs:
95
104
  return
96
- to_sock.send(bs)
105
+ while bs:
106
+ try:
107
+ to_sock.send(bs)
108
+ bs = None
109
+ except TimeoutError:
110
+ # On timeouts during a send, we retry just the send
111
+ # to make sure we don't lose any bytes.
112
+ pass
113
+ except TimeoutError:
114
+ # On timeouts during a receive, we retry the entire flow.
115
+ pass
97
116
  except Exception as ex:
98
117
  logger.debug(f"[{name}] Exception forwarding bytes: {ex}")
118
+ to_sock.close()
99
119
  return
100
120
 
101
121
 
122
+ def connect_sockets(conn_number, from_sock, to_sock):
123
+ """Create a connection between the two given ports.
124
+
125
+ This method continuously streams bytes in both directions between the
126
+ given `from_sock` and `to_sock` socket-like objects.
127
+
128
+ The caller is responsible for creating and closing the supplied socekts.
129
+ """
130
+ forward_name = f"{conn_number}-forward"
131
+ t1 = threading.Thread(
132
+ name=forward_name,
133
+ target=forward_bytes,
134
+ args=[forward_name, from_sock, to_sock],
135
+ daemon=True,
136
+ )
137
+ t1.start()
138
+ backward_name = f"{conn_number}-backward"
139
+ t2 = threading.Thread(
140
+ name=backward_name,
141
+ target=forward_bytes,
142
+ args=[backward_name, to_sock, from_sock],
143
+ daemon=True,
144
+ )
145
+ t2.start()
146
+ t1.join()
147
+ t2.join()
148
+
149
+
102
150
  def forward_connection(conn_number, conn, addr, target_host):
103
151
  """Create a connection to the target and forward `conn` to it.
104
152
 
@@ -115,24 +163,7 @@ def forward_connection(conn_number, conn, addr, target_host):
115
163
  with conn:
116
164
  with connect_tcp_bridge(target_host) as websocket_conn:
117
165
  backend_socket = bridged_socket(websocket_conn)
118
- forward_name = f"{conn_number}-forward"
119
- t1 = threading.Thread(
120
- name=forward_name,
121
- target=forward_bytes,
122
- args=[forward_name, conn, backend_socket],
123
- daemon=True,
124
- )
125
- t1.start()
126
- backward_name = f"{conn_number}-backward"
127
- t2 = threading.Thread(
128
- name=backward_name,
129
- target=forward_bytes,
130
- args=[backward_name, backend_socket, conn],
131
- daemon=True,
132
- )
133
- t2.start()
134
- t1.join()
135
- t2.join()
166
+ connect_sockets(conn_number, conn, backend_socket)
136
167
 
137
168
 
138
169
  class DataprocSessionProxy(object):
@@ -179,6 +210,14 @@ class DataprocSessionProxy(object):
179
210
  s.release()
180
211
  while not self._killed:
181
212
  conn, addr = frontend_socket.accept()
213
+ # Set a timeout on how long we will allow send/recv calls to block
214
+ #
215
+ # The code that reads and writes to this connection will retry
216
+ # on timeouts, so this is a safe change.
217
+ #
218
+ # The chosen timeout is a very short one because it allows us
219
+ # to more quickly detect when a connection has been closed.
220
+ conn.settimeout(1)
182
221
  logger.debug(f"Accepted a connection from {addr}...")
183
222
  self._conn_number += 1
184
223
  threading.Thread(
@@ -196,13 +196,13 @@ class DataprocSparkSession(SparkSession):
196
196
  session_id = self.generate_dataproc_session_id()
197
197
 
198
198
  session_request.session_id = session_id
199
- dataproc_config.name = f"projects/{self._project_id}/regions/{self._region}/sessions/{session_id}"
199
+ dataproc_config.name = f"projects/{self._project_id}/locations/{self._region}/sessions/{session_id}"
200
200
  logger.debug(
201
201
  f"Configurations used to create serverless session:\n {dataproc_config}"
202
202
  )
203
203
  session_request.session = dataproc_config
204
204
  session_request.parent = (
205
- f"projects/{self._project_id}/regions/{self._region}"
205
+ f"projects/{self._project_id}/locations/{self._region}"
206
206
  )
207
207
 
208
208
  logger.debug("Creating serverless session")
@@ -12,13 +12,24 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from setuptools import find_namespace_packages, setup
15
+ from pathlib import Path
16
+
17
+ this_directory = Path(__file__).parent
18
+ long_description = (this_directory / "README.md").read_text()
19
+
15
20
 
16
21
  setup(
17
22
  name="dataproc-spark-connect",
18
- version="0.1.0",
23
+ version="0.2.0",
19
24
  description="Dataproc client library for Spark Connect",
25
+ long_description=long_description,
26
+ author="Google LLC",
27
+ url="https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python",
28
+ license="Apache 2.0",
20
29
  packages=find_namespace_packages(include=["google.*"]),
21
30
  install_requires=[
31
+ "google-api-core>=2.19.1",
32
+ "google-cloud-dataproc>=5.15.1",
22
33
  "wheel",
23
34
  "websockets",
24
35
  "pyspark>=3.5",
@@ -1,10 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: dataproc-spark-connect
3
- Version: 0.1.0
4
- Summary: Dataproc client library for Spark Connect
5
- License-File: LICENSE
6
- Requires-Dist: wheel
7
- Requires-Dist: websockets
8
- Requires-Dist: pyspark>=3.5
9
- Requires-Dist: pandas
10
- Requires-Dist: pyarrow
@@ -1,10 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: dataproc-spark-connect
3
- Version: 0.1.0
4
- Summary: Dataproc client library for Spark Connect
5
- License-File: LICENSE
6
- Requires-Dist: wheel
7
- Requires-Dist: websockets
8
- Requires-Dist: pyspark>=3.5
9
- Requires-Dist: pandas
10
- Requires-Dist: pyarrow
@@ -1,5 +0,0 @@
1
- wheel
2
- websockets
3
- pyspark>=3.5
4
- pandas
5
- pyarrow