dataproc-spark-connect 0.2.1__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataproc_spark_connect-0.7.0/PKG-INFO +98 -0
- dataproc_spark_connect-0.7.0/README.md +83 -0
- dataproc_spark_connect-0.7.0/dataproc_spark_connect.egg-info/PKG-INFO +98 -0
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/dataproc_spark_connect.egg-info/SOURCES.txt +2 -0
- dataproc_spark_connect-0.7.0/dataproc_spark_connect.egg-info/requires.txt +6 -0
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/google/cloud/dataproc_spark_connect/__init__.py +14 -8
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/google/cloud/dataproc_spark_connect/client/core.py +34 -8
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/google/cloud/dataproc_spark_connect/client/proxy.py +15 -12
- dataproc_spark_connect-0.7.0/google/cloud/dataproc_spark_connect/exceptions.py +27 -0
- dataproc_spark_connect-0.7.0/google/cloud/dataproc_spark_connect/pypi_artifacts.py +48 -0
- dataproc_spark_connect-0.7.0/google/cloud/dataproc_spark_connect/session.py +637 -0
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/setup.py +7 -8
- dataproc_spark_connect-0.2.1/PKG-INFO +0 -119
- dataproc_spark_connect-0.2.1/README.md +0 -103
- dataproc_spark_connect-0.2.1/dataproc_spark_connect.egg-info/PKG-INFO +0 -119
- dataproc_spark_connect-0.2.1/dataproc_spark_connect.egg-info/requires.txt +0 -7
- dataproc_spark_connect-0.2.1/google/cloud/dataproc_spark_connect/session.py +0 -540
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/LICENSE +0 -0
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/dataproc_spark_connect.egg-info/dependency_links.txt +0 -0
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/dataproc_spark_connect.egg-info/top_level.txt +0 -0
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/google/cloud/dataproc_spark_connect/client/__init__.py +0 -0
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/pyproject.toml +0 -0
- {dataproc_spark_connect-0.2.1 → dataproc_spark_connect-0.7.0}/setup.cfg +0 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: dataproc-spark-connect
|
|
3
|
+
Version: 0.7.0
|
|
4
|
+
Summary: Dataproc client library for Spark Connect
|
|
5
|
+
Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
|
|
6
|
+
Author: Google LLC
|
|
7
|
+
License: Apache 2.0
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: google-api-core>=2.19
|
|
10
|
+
Requires-Dist: google-cloud-dataproc>=5.18
|
|
11
|
+
Requires-Dist: packaging>=20.0
|
|
12
|
+
Requires-Dist: pyspark[connect]>=3.5
|
|
13
|
+
Requires-Dist: tqdm>=4.67
|
|
14
|
+
Requires-Dist: websockets>=15.0
|
|
15
|
+
|
|
16
|
+
# Dataproc Spark Connect Client
|
|
17
|
+
|
|
18
|
+
A wrapper of the Apache [Spark Connect](https://spark.apache.org/spark-connect/)
|
|
19
|
+
client with additional functionalities that allow applications to communicate
|
|
20
|
+
with a remote Dataproc Spark Session using the Spark Connect protocol without
|
|
21
|
+
requiring additional steps.
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```sh
|
|
26
|
+
pip install dataproc_spark_connect
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Uninstall
|
|
30
|
+
|
|
31
|
+
```sh
|
|
32
|
+
pip uninstall dataproc_spark_connect
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Setup
|
|
36
|
+
|
|
37
|
+
This client requires permissions to
|
|
38
|
+
manage [Dataproc Sessions and Session Templates](https://cloud.google.com/dataproc-serverless/docs/concepts/iam).
|
|
39
|
+
If you are running the client outside of Google Cloud, you must set following
|
|
40
|
+
environment variables:
|
|
41
|
+
|
|
42
|
+
* `GOOGLE_CLOUD_PROJECT` - The Google Cloud project you use to run Spark
|
|
43
|
+
workloads
|
|
44
|
+
* `GOOGLE_CLOUD_REGION` - The Compute
|
|
45
|
+
Engine [region](https://cloud.google.com/compute/docs/regions-zones#available)
|
|
46
|
+
where you run the Spark workload.
|
|
47
|
+
* `GOOGLE_APPLICATION_CREDENTIALS` -
|
|
48
|
+
Your [Application Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc)
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
1. Install the latest version of Dataproc Python client and Dataproc Spark
|
|
53
|
+
Connect modules:
|
|
54
|
+
|
|
55
|
+
```sh
|
|
56
|
+
pip install google_cloud_dataproc dataproc_spark_connect --force-reinstall
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
2. Add the required imports into your PySpark application or notebook and start
|
|
60
|
+
a Spark session with the following code instead of using
|
|
61
|
+
environment variables:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from google.cloud.dataproc_spark_connect import DataprocSparkSession
|
|
65
|
+
from google.cloud.dataproc_v1 import Session
|
|
66
|
+
session_config = Session()
|
|
67
|
+
session_config.environment_config.execution_config.subnetwork_uri = '<subnet>'
|
|
68
|
+
session_config.runtime_config.version = '2.2'
|
|
69
|
+
spark = DataprocSparkSession.builder.dataprocSessionConfig(session_config).getOrCreate()
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Developing
|
|
73
|
+
|
|
74
|
+
For development instructions see [guide](DEVELOPING.md).
|
|
75
|
+
|
|
76
|
+
## Contributing
|
|
77
|
+
|
|
78
|
+
We'd love to accept your patches and contributions to this project. There are
|
|
79
|
+
just a few small guidelines you need to follow.
|
|
80
|
+
|
|
81
|
+
### Contributor License Agreement
|
|
82
|
+
|
|
83
|
+
Contributions to this project must be accompanied by a Contributor License
|
|
84
|
+
Agreement. You (or your employer) retain the copyright to your contribution;
|
|
85
|
+
this simply gives us permission to use and redistribute your contributions as
|
|
86
|
+
part of the project. Head over to <https://cla.developers.google.com> to see
|
|
87
|
+
your current agreements on file or to sign a new one.
|
|
88
|
+
|
|
89
|
+
You generally only need to submit a CLA once, so if you've already submitted one
|
|
90
|
+
(even if it was for a different project), you probably don't need to do it
|
|
91
|
+
again.
|
|
92
|
+
|
|
93
|
+
### Code reviews
|
|
94
|
+
|
|
95
|
+
All submissions, including submissions by project members, require review. We
|
|
96
|
+
use GitHub pull requests for this purpose. Consult
|
|
97
|
+
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
|
|
98
|
+
information on using pull requests.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Dataproc Spark Connect Client
|
|
2
|
+
|
|
3
|
+
A wrapper of the Apache [Spark Connect](https://spark.apache.org/spark-connect/)
|
|
4
|
+
client with additional functionalities that allow applications to communicate
|
|
5
|
+
with a remote Dataproc Spark Session using the Spark Connect protocol without
|
|
6
|
+
requiring additional steps.
|
|
7
|
+
|
|
8
|
+
## Install
|
|
9
|
+
|
|
10
|
+
```sh
|
|
11
|
+
pip install dataproc_spark_connect
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Uninstall
|
|
15
|
+
|
|
16
|
+
```sh
|
|
17
|
+
pip uninstall dataproc_spark_connect
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Setup
|
|
21
|
+
|
|
22
|
+
This client requires permissions to
|
|
23
|
+
manage [Dataproc Sessions and Session Templates](https://cloud.google.com/dataproc-serverless/docs/concepts/iam).
|
|
24
|
+
If you are running the client outside of Google Cloud, you must set following
|
|
25
|
+
environment variables:
|
|
26
|
+
|
|
27
|
+
* `GOOGLE_CLOUD_PROJECT` - The Google Cloud project you use to run Spark
|
|
28
|
+
workloads
|
|
29
|
+
* `GOOGLE_CLOUD_REGION` - The Compute
|
|
30
|
+
Engine [region](https://cloud.google.com/compute/docs/regions-zones#available)
|
|
31
|
+
where you run the Spark workload.
|
|
32
|
+
* `GOOGLE_APPLICATION_CREDENTIALS` -
|
|
33
|
+
Your [Application Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc)
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
1. Install the latest version of Dataproc Python client and Dataproc Spark
|
|
38
|
+
Connect modules:
|
|
39
|
+
|
|
40
|
+
```sh
|
|
41
|
+
pip install google_cloud_dataproc dataproc_spark_connect --force-reinstall
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
2. Add the required imports into your PySpark application or notebook and start
|
|
45
|
+
a Spark session with the following code instead of using
|
|
46
|
+
environment variables:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from google.cloud.dataproc_spark_connect import DataprocSparkSession
|
|
50
|
+
from google.cloud.dataproc_v1 import Session
|
|
51
|
+
session_config = Session()
|
|
52
|
+
session_config.environment_config.execution_config.subnetwork_uri = '<subnet>'
|
|
53
|
+
session_config.runtime_config.version = '2.2'
|
|
54
|
+
spark = DataprocSparkSession.builder.dataprocSessionConfig(session_config).getOrCreate()
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Developing
|
|
58
|
+
|
|
59
|
+
For development instructions see [guide](DEVELOPING.md).
|
|
60
|
+
|
|
61
|
+
## Contributing
|
|
62
|
+
|
|
63
|
+
We'd love to accept your patches and contributions to this project. There are
|
|
64
|
+
just a few small guidelines you need to follow.
|
|
65
|
+
|
|
66
|
+
### Contributor License Agreement
|
|
67
|
+
|
|
68
|
+
Contributions to this project must be accompanied by a Contributor License
|
|
69
|
+
Agreement. You (or your employer) retain the copyright to your contribution;
|
|
70
|
+
this simply gives us permission to use and redistribute your contributions as
|
|
71
|
+
part of the project. Head over to <https://cla.developers.google.com> to see
|
|
72
|
+
your current agreements on file or to sign a new one.
|
|
73
|
+
|
|
74
|
+
You generally only need to submit a CLA once, so if you've already submitted one
|
|
75
|
+
(even if it was for a different project), you probably don't need to do it
|
|
76
|
+
again.
|
|
77
|
+
|
|
78
|
+
### Code reviews
|
|
79
|
+
|
|
80
|
+
All submissions, including submissions by project members, require review. We
|
|
81
|
+
use GitHub pull requests for this purpose. Consult
|
|
82
|
+
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
|
|
83
|
+
information on using pull requests.
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: dataproc-spark-connect
|
|
3
|
+
Version: 0.7.0
|
|
4
|
+
Summary: Dataproc client library for Spark Connect
|
|
5
|
+
Home-page: https://github.com/GoogleCloudDataproc/dataproc-spark-connect-python
|
|
6
|
+
Author: Google LLC
|
|
7
|
+
License: Apache 2.0
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: google-api-core>=2.19
|
|
10
|
+
Requires-Dist: google-cloud-dataproc>=5.18
|
|
11
|
+
Requires-Dist: packaging>=20.0
|
|
12
|
+
Requires-Dist: pyspark[connect]>=3.5
|
|
13
|
+
Requires-Dist: tqdm>=4.67
|
|
14
|
+
Requires-Dist: websockets>=15.0
|
|
15
|
+
|
|
16
|
+
# Dataproc Spark Connect Client
|
|
17
|
+
|
|
18
|
+
A wrapper of the Apache [Spark Connect](https://spark.apache.org/spark-connect/)
|
|
19
|
+
client with additional functionalities that allow applications to communicate
|
|
20
|
+
with a remote Dataproc Spark Session using the Spark Connect protocol without
|
|
21
|
+
requiring additional steps.
|
|
22
|
+
|
|
23
|
+
## Install
|
|
24
|
+
|
|
25
|
+
```sh
|
|
26
|
+
pip install dataproc_spark_connect
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Uninstall
|
|
30
|
+
|
|
31
|
+
```sh
|
|
32
|
+
pip uninstall dataproc_spark_connect
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Setup
|
|
36
|
+
|
|
37
|
+
This client requires permissions to
|
|
38
|
+
manage [Dataproc Sessions and Session Templates](https://cloud.google.com/dataproc-serverless/docs/concepts/iam).
|
|
39
|
+
If you are running the client outside of Google Cloud, you must set following
|
|
40
|
+
environment variables:
|
|
41
|
+
|
|
42
|
+
* `GOOGLE_CLOUD_PROJECT` - The Google Cloud project you use to run Spark
|
|
43
|
+
workloads
|
|
44
|
+
* `GOOGLE_CLOUD_REGION` - The Compute
|
|
45
|
+
Engine [region](https://cloud.google.com/compute/docs/regions-zones#available)
|
|
46
|
+
where you run the Spark workload.
|
|
47
|
+
* `GOOGLE_APPLICATION_CREDENTIALS` -
|
|
48
|
+
Your [Application Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc)
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
1. Install the latest version of Dataproc Python client and Dataproc Spark
|
|
53
|
+
Connect modules:
|
|
54
|
+
|
|
55
|
+
```sh
|
|
56
|
+
pip install google_cloud_dataproc dataproc_spark_connect --force-reinstall
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
2. Add the required imports into your PySpark application or notebook and start
|
|
60
|
+
a Spark session with the following code instead of using
|
|
61
|
+
environment variables:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from google.cloud.dataproc_spark_connect import DataprocSparkSession
|
|
65
|
+
from google.cloud.dataproc_v1 import Session
|
|
66
|
+
session_config = Session()
|
|
67
|
+
session_config.environment_config.execution_config.subnetwork_uri = '<subnet>'
|
|
68
|
+
session_config.runtime_config.version = '2.2'
|
|
69
|
+
spark = DataprocSparkSession.builder.dataprocSessionConfig(session_config).getOrCreate()
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Developing
|
|
73
|
+
|
|
74
|
+
For development instructions see [guide](DEVELOPING.md).
|
|
75
|
+
|
|
76
|
+
## Contributing
|
|
77
|
+
|
|
78
|
+
We'd love to accept your patches and contributions to this project. There are
|
|
79
|
+
just a few small guidelines you need to follow.
|
|
80
|
+
|
|
81
|
+
### Contributor License Agreement
|
|
82
|
+
|
|
83
|
+
Contributions to this project must be accompanied by a Contributor License
|
|
84
|
+
Agreement. You (or your employer) retain the copyright to your contribution;
|
|
85
|
+
this simply gives us permission to use and redistribute your contributions as
|
|
86
|
+
part of the project. Head over to <https://cla.developers.google.com> to see
|
|
87
|
+
your current agreements on file or to sign a new one.
|
|
88
|
+
|
|
89
|
+
You generally only need to submit a CLA once, so if you've already submitted one
|
|
90
|
+
(even if it was for a different project), you probably don't need to do it
|
|
91
|
+
again.
|
|
92
|
+
|
|
93
|
+
### Code reviews
|
|
94
|
+
|
|
95
|
+
All submissions, including submissions by project members, require review. We
|
|
96
|
+
use GitHub pull requests for this purpose. Consult
|
|
97
|
+
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
|
|
98
|
+
information on using pull requests.
|
|
@@ -9,6 +9,8 @@ dataproc_spark_connect.egg-info/dependency_links.txt
|
|
|
9
9
|
dataproc_spark_connect.egg-info/requires.txt
|
|
10
10
|
dataproc_spark_connect.egg-info/top_level.txt
|
|
11
11
|
google/cloud/dataproc_spark_connect/__init__.py
|
|
12
|
+
google/cloud/dataproc_spark_connect/exceptions.py
|
|
13
|
+
google/cloud/dataproc_spark_connect/pypi_artifacts.py
|
|
12
14
|
google/cloud/dataproc_spark_connect/session.py
|
|
13
15
|
google/cloud/dataproc_spark_connect/client/__init__.py
|
|
14
16
|
google/cloud/dataproc_spark_connect/client/core.py
|
|
@@ -11,13 +11,19 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import importlib.metadata
|
|
15
15
|
import warnings
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
from .session import DataprocSparkSession
|
|
18
|
+
|
|
19
|
+
old_package_name = "google-spark-connect"
|
|
20
|
+
current_package_name = "dataproc-spark-connect"
|
|
21
|
+
try:
|
|
22
|
+
importlib.metadata.distribution(old_package_name)
|
|
23
|
+
warnings.warn(
|
|
24
|
+
f"Package '{old_package_name}' is already installed in your environment. "
|
|
25
|
+
f"This might cause conflicts with '{current_package_name}'. "
|
|
26
|
+
f"Consider uninstalling '{old_package_name}' and only install '{current_package_name}'."
|
|
27
|
+
)
|
|
28
|
+
except:
|
|
29
|
+
pass
|
|
@@ -11,12 +11,16 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
import logging
|
|
15
|
+
|
|
14
16
|
import google
|
|
15
17
|
import grpc
|
|
16
18
|
from pyspark.sql.connect.client import ChannelBuilder
|
|
17
19
|
|
|
18
20
|
from . import proxy
|
|
19
21
|
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
20
24
|
|
|
21
25
|
class DataprocChannelBuilder(ChannelBuilder):
|
|
22
26
|
"""
|
|
@@ -36,6 +40,10 @@ class DataprocChannelBuilder(ChannelBuilder):
|
|
|
36
40
|
True
|
|
37
41
|
"""
|
|
38
42
|
|
|
43
|
+
def __init__(self, url, is_active_callback=None):
|
|
44
|
+
self._is_active_callback = is_active_callback
|
|
45
|
+
super().__init__(url)
|
|
46
|
+
|
|
39
47
|
def toChannel(self) -> grpc.Channel:
|
|
40
48
|
"""
|
|
41
49
|
Applies the parameters of the connection string and creates a new
|
|
@@ -51,7 +59,7 @@ class DataprocChannelBuilder(ChannelBuilder):
|
|
|
51
59
|
return self._proxied_channel()
|
|
52
60
|
|
|
53
61
|
def _proxied_channel(self) -> grpc.Channel:
|
|
54
|
-
return ProxiedChannel(self.host)
|
|
62
|
+
return ProxiedChannel(self.host, self._is_active_callback)
|
|
55
63
|
|
|
56
64
|
def _direct_channel(self) -> grpc.Channel:
|
|
57
65
|
destination = f"{self.host}:{self.port}"
|
|
@@ -75,7 +83,8 @@ class DataprocChannelBuilder(ChannelBuilder):
|
|
|
75
83
|
|
|
76
84
|
class ProxiedChannel(grpc.Channel):
|
|
77
85
|
|
|
78
|
-
def __init__(self, target_host):
|
|
86
|
+
def __init__(self, target_host, is_active_callback):
|
|
87
|
+
self._is_active_callback = is_active_callback
|
|
79
88
|
self._proxy = proxy.DataprocSessionProxy(0, target_host)
|
|
80
89
|
self._proxy.start()
|
|
81
90
|
self._proxied_connect_url = f"sc://localhost:{self._proxy.port}"
|
|
@@ -94,20 +103,37 @@ class ProxiedChannel(grpc.Channel):
|
|
|
94
103
|
self._proxy.stop()
|
|
95
104
|
return ret
|
|
96
105
|
|
|
106
|
+
def _wrap_method(self, wrapped_method):
|
|
107
|
+
if self._is_active_callback is None:
|
|
108
|
+
return wrapped_method
|
|
109
|
+
|
|
110
|
+
def checked_method(*margs, **mkwargs):
|
|
111
|
+
if (
|
|
112
|
+
self._is_active_callback is not None
|
|
113
|
+
and not self._is_active_callback()
|
|
114
|
+
):
|
|
115
|
+
logger.warning(f"Session is no longer active")
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
"Session not active. Please create a new session"
|
|
118
|
+
)
|
|
119
|
+
return wrapped_method(*margs, **mkwargs)
|
|
120
|
+
|
|
121
|
+
return checked_method
|
|
122
|
+
|
|
97
123
|
def stream_stream(self, *args, **kwargs):
|
|
98
|
-
return self._wrapped.stream_stream(*args, **kwargs)
|
|
124
|
+
return self._wrap_method(self._wrapped.stream_stream(*args, **kwargs))
|
|
99
125
|
|
|
100
126
|
def stream_unary(self, *args, **kwargs):
|
|
101
|
-
return self._wrapped.stream_unary(*args, **kwargs)
|
|
127
|
+
return self._wrap_method(self._wrapped.stream_unary(*args, **kwargs))
|
|
102
128
|
|
|
103
129
|
def subscribe(self, *args, **kwargs):
|
|
104
|
-
return self._wrapped.subscribe(*args, **kwargs)
|
|
130
|
+
return self._wrap_method(self._wrapped.subscribe(*args, **kwargs))
|
|
105
131
|
|
|
106
132
|
def unary_stream(self, *args, **kwargs):
|
|
107
|
-
return self._wrapped.unary_stream(*args, **kwargs)
|
|
133
|
+
return self._wrap_method(self._wrapped.unary_stream(*args, **kwargs))
|
|
108
134
|
|
|
109
135
|
def unary_unary(self, *args, **kwargs):
|
|
110
|
-
return self._wrapped.unary_unary(*args, **kwargs)
|
|
136
|
+
return self._wrap_method(self._wrapped.unary_unary(*args, **kwargs))
|
|
111
137
|
|
|
112
138
|
def unsubscribe(self, *args, **kwargs):
|
|
113
|
-
return self._wrapped.unsubscribe(*args, **kwargs)
|
|
139
|
+
return self._wrap_method(self._wrapped.unsubscribe(*args, **kwargs))
|
|
@@ -18,7 +18,6 @@ import contextlib
|
|
|
18
18
|
import logging
|
|
19
19
|
import socket
|
|
20
20
|
import threading
|
|
21
|
-
import time
|
|
22
21
|
|
|
23
22
|
import websockets.sync.client as websocketclient
|
|
24
23
|
|
|
@@ -81,6 +80,7 @@ def connect_tcp_bridge(hostname):
|
|
|
81
80
|
return websocketclient.connect(
|
|
82
81
|
f"wss://{hostname}/{path}",
|
|
83
82
|
additional_headers={"Authorization": f"Bearer {creds.token}"},
|
|
83
|
+
open_timeout=30,
|
|
84
84
|
)
|
|
85
85
|
|
|
86
86
|
|
|
@@ -94,6 +94,7 @@ def forward_bytes(name, from_sock, to_sock):
|
|
|
94
94
|
This method is intended to be run in a separate thread of execution.
|
|
95
95
|
|
|
96
96
|
Args:
|
|
97
|
+
name: forwarding thread name
|
|
97
98
|
from_sock: A socket-like object to stream bytes from.
|
|
98
99
|
to_sock: A socket-like object to stream bytes to.
|
|
99
100
|
"""
|
|
@@ -101,8 +102,11 @@ def forward_bytes(name, from_sock, to_sock):
|
|
|
101
102
|
try:
|
|
102
103
|
bs = from_sock.recv(1024)
|
|
103
104
|
if not bs:
|
|
105
|
+
to_sock.close()
|
|
104
106
|
return
|
|
105
|
-
|
|
107
|
+
attempt = 0
|
|
108
|
+
while bs and (attempt < 10):
|
|
109
|
+
attempt += 1
|
|
106
110
|
try:
|
|
107
111
|
to_sock.send(bs)
|
|
108
112
|
bs = None
|
|
@@ -110,6 +114,8 @@ def forward_bytes(name, from_sock, to_sock):
|
|
|
110
114
|
# On timeouts during a send, we retry just the send
|
|
111
115
|
# to make sure we don't lose any bytes.
|
|
112
116
|
pass
|
|
117
|
+
if bs:
|
|
118
|
+
raise Exception(f"Failed to forward bytes for {name}")
|
|
113
119
|
except TimeoutError:
|
|
114
120
|
# On timeouts during a receive, we retry the entire flow.
|
|
115
121
|
pass
|
|
@@ -125,7 +131,7 @@ def connect_sockets(conn_number, from_sock, to_sock):
|
|
|
125
131
|
This method continuously streams bytes in both directions between the
|
|
126
132
|
given `from_sock` and `to_sock` socket-like objects.
|
|
127
133
|
|
|
128
|
-
The caller is responsible for creating and closing the supplied
|
|
134
|
+
The caller is responsible for creating and closing the supplied sockets.
|
|
129
135
|
"""
|
|
130
136
|
forward_name = f"{conn_number}-forward"
|
|
131
137
|
t1 = threading.Thread(
|
|
@@ -157,12 +163,17 @@ def forward_connection(conn_number, conn, addr, target_host):
|
|
|
157
163
|
Both the supplied incoming connection (`conn`) and the created outgoing
|
|
158
164
|
connection are automatically closed when this method terminates.
|
|
159
165
|
|
|
160
|
-
This method should be run inside
|
|
166
|
+
This method should be run inside a daemon thread so that it will not
|
|
161
167
|
block program termination.
|
|
162
168
|
"""
|
|
163
169
|
with conn:
|
|
164
170
|
with connect_tcp_bridge(target_host) as websocket_conn:
|
|
165
171
|
backend_socket = bridged_socket(websocket_conn)
|
|
172
|
+
# Set a timeout on how long we will allow send/recv calls to block
|
|
173
|
+
#
|
|
174
|
+
# The code that reads and writes to this connection will retry
|
|
175
|
+
# on timeouts, so this is a safe change.
|
|
176
|
+
conn.settimeout(10)
|
|
166
177
|
connect_sockets(conn_number, conn, backend_socket)
|
|
167
178
|
|
|
168
179
|
|
|
@@ -210,14 +221,6 @@ class DataprocSessionProxy(object):
|
|
|
210
221
|
s.release()
|
|
211
222
|
while not self._killed:
|
|
212
223
|
conn, addr = frontend_socket.accept()
|
|
213
|
-
# Set a timeout on how long we will allow send/recv calls to block
|
|
214
|
-
#
|
|
215
|
-
# The code that reads and writes to this connection will retry
|
|
216
|
-
# on timeouts, so this is a safe change.
|
|
217
|
-
#
|
|
218
|
-
# The chosen timeout is a very short one because it allows us
|
|
219
|
-
# to more quickly detect when a connection has been closed.
|
|
220
|
-
conn.settimeout(1)
|
|
221
224
|
logger.debug(f"Accepted a connection from {addr}...")
|
|
222
225
|
self._conn_number += 1
|
|
223
226
|
threading.Thread(
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DataprocSparkConnectException(Exception):
|
|
17
|
+
"""A custom exception class to only print the error messages.
|
|
18
|
+
This would be used for exceptions where the stack trace
|
|
19
|
+
doesn't provide any additional information.h
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, message):
|
|
23
|
+
self.message = message
|
|
24
|
+
super().__init__(message)
|
|
25
|
+
|
|
26
|
+
def _render_traceback_(self):
|
|
27
|
+
return self.message
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
6
|
+
from packaging.requirements import Requirement
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PyPiArtifacts:
|
|
12
|
+
"""
|
|
13
|
+
This is a helper class to serialize the PYPI package installation request with a "magic" file name
|
|
14
|
+
that Spark Connect server understands
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def __try_parsing_package(packages: set[str]) -> list[Requirement]:
|
|
19
|
+
reqs = [Requirement(p) for p in packages]
|
|
20
|
+
if 0 in [len(req.specifier) for req in reqs]:
|
|
21
|
+
logger.info("It is recommended to pin the version of the package")
|
|
22
|
+
return reqs
|
|
23
|
+
|
|
24
|
+
def __init__(self, packages: set[str]):
|
|
25
|
+
self.requirements = PyPiArtifacts.__try_parsing_package(packages)
|
|
26
|
+
|
|
27
|
+
def write_packages_config(self, s8s_session_uuid: str) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Can't use the same file-name as Spark throws exception that file already exists
|
|
30
|
+
Keep the filename/format in sync with server
|
|
31
|
+
"""
|
|
32
|
+
dependencies = {
|
|
33
|
+
"version": "0.5",
|
|
34
|
+
"packageType": "PYPI",
|
|
35
|
+
"packages": [str(req) for req in self.requirements],
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
file_path = os.path.join(
|
|
39
|
+
tempfile.gettempdir(),
|
|
40
|
+
s8s_session_uuid,
|
|
41
|
+
"add-artifacts-1729-" + self.__str__() + ".json",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
45
|
+
with open(file_path, "w") as json_file:
|
|
46
|
+
json.dump(dependencies, json_file, indent=4)
|
|
47
|
+
logger.debug("Dumping dependencies request in file: " + file_path)
|
|
48
|
+
return file_path
|