flyteplugins-ray 0.2.0b30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flyteplugins-ray might be problematic. Click here for more details.
flyteplugins/ray/task.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import typing
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
from flyte import PodTemplate, Resources
|
|
10
|
+
from flyte._tools import is_in_cluster
|
|
11
|
+
from flyte.extend import AsyncFunctionTaskTemplate, TaskPluginRegistry, pod_spec_from_resources
|
|
12
|
+
from flyte.models import SerializationContext
|
|
13
|
+
from flyteidl.plugins.ray_pb2 import HeadGroupSpec, RayCluster, RayJob, WorkerGroupSpec
|
|
14
|
+
from google.protobuf.json_format import MessageToDict
|
|
15
|
+
|
|
16
|
+
import ray
|
|
17
|
+
|
|
18
|
+
if typing.TYPE_CHECKING:
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_RAY_HEAD_CONTAINER_NAME = "ray-head"
|
|
23
|
+
_RAY_WORKER_CONTAINER_NAME = "ray-worker"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class HeadNodeConfig:
|
|
28
|
+
ray_start_params: typing.Optional[typing.Dict[str, str]] = None
|
|
29
|
+
pod_template: typing.Optional[PodTemplate] = None
|
|
30
|
+
requests: Optional[Resources] = None
|
|
31
|
+
limits: Optional[Resources] = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class WorkerNodeConfig:
|
|
36
|
+
group_name: str
|
|
37
|
+
replicas: int
|
|
38
|
+
min_replicas: typing.Optional[int] = None
|
|
39
|
+
max_replicas: typing.Optional[int] = None
|
|
40
|
+
ray_start_params: typing.Optional[typing.Dict[str, str]] = None
|
|
41
|
+
pod_template: typing.Optional[PodTemplate] = None
|
|
42
|
+
requests: Optional[Resources] = None
|
|
43
|
+
limits: Optional[Resources] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class RayJobConfig:
|
|
48
|
+
worker_node_config: typing.List[WorkerNodeConfig]
|
|
49
|
+
head_node_config: typing.Optional[HeadNodeConfig] = None
|
|
50
|
+
enable_autoscaling: bool = False
|
|
51
|
+
runtime_env: typing.Optional[dict] = None
|
|
52
|
+
address: typing.Optional[str] = None
|
|
53
|
+
shutdown_after_job_finishes: bool = False
|
|
54
|
+
ttl_seconds_after_finished: typing.Optional[int] = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(kw_only=True)
|
|
58
|
+
class RayFunctionTask(AsyncFunctionTaskTemplate):
|
|
59
|
+
"""
|
|
60
|
+
Actual Plugin that transforms the local python code for execution within Ray job.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
task_type: str = "ray"
|
|
64
|
+
plugin_config: RayJobConfig
|
|
65
|
+
|
|
66
|
+
async def pre(self, *args, **kwargs) -> Dict[str, Any]:
|
|
67
|
+
init_params = {"address": self.plugin_config.address}
|
|
68
|
+
|
|
69
|
+
if is_in_cluster():
|
|
70
|
+
working_dir = os.getcwd()
|
|
71
|
+
init_params["runtime_env"] = {
|
|
72
|
+
"working_dir": working_dir,
|
|
73
|
+
"excludes": ["script_mode.tar.gz", "fast*.tar.gz", ".python_history"],
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if not ray.is_initialized():
|
|
77
|
+
ray.init(**init_params)
|
|
78
|
+
return {}
|
|
79
|
+
|
|
80
|
+
def custom_config(self, sctx: SerializationContext) -> Optional[Dict[str, Any]]:
|
|
81
|
+
cfg = self.plugin_config
|
|
82
|
+
# Deprecated: runtime_env is removed KubeRay >= 1.1.0. It is replaced by runtime_env_yaml
|
|
83
|
+
runtime_env = base64.b64encode(json.dumps(cfg.runtime_env).encode()).decode() if cfg.runtime_env else None
|
|
84
|
+
runtime_env_yaml = yaml.dump(cfg.runtime_env) if cfg.runtime_env else None
|
|
85
|
+
|
|
86
|
+
head_group_spec = None
|
|
87
|
+
if cfg.head_node_config:
|
|
88
|
+
if cfg.head_node_config.requests or cfg.head_node_config.limits:
|
|
89
|
+
head_pod_template = PodTemplate(
|
|
90
|
+
pod_spec=pod_spec_from_resources(
|
|
91
|
+
primary_container_name=_RAY_HEAD_CONTAINER_NAME,
|
|
92
|
+
requests=cfg.head_node_config.requests,
|
|
93
|
+
limits=cfg.head_node_config.limits,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
head_pod_template = cfg.head_node_config.pod_template
|
|
98
|
+
|
|
99
|
+
head_group_spec = HeadGroupSpec(
|
|
100
|
+
ray_start_params=cfg.head_node_config.ray_start_params,
|
|
101
|
+
k8s_pod=head_pod_template.to_k8s_pod() if head_pod_template else None,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
worker_group_spec: typing.List[WorkerGroupSpec] = []
|
|
105
|
+
for c in cfg.worker_node_config:
|
|
106
|
+
if c.requests or c.limits:
|
|
107
|
+
worker_pod_template = PodTemplate(
|
|
108
|
+
pod_spec=pod_spec_from_resources(
|
|
109
|
+
primary_container_name=_RAY_WORKER_CONTAINER_NAME,
|
|
110
|
+
requests=c.requests,
|
|
111
|
+
limits=c.limits,
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
worker_pod_template = c.pod_template
|
|
116
|
+
|
|
117
|
+
worker_group_spec.append(
|
|
118
|
+
WorkerGroupSpec(
|
|
119
|
+
group_name=c.group_name,
|
|
120
|
+
replicas=c.replicas,
|
|
121
|
+
min_replicas=c.min_replicas,
|
|
122
|
+
max_replicas=c.max_replicas,
|
|
123
|
+
ray_start_params=c.ray_start_params,
|
|
124
|
+
k8s_pod=worker_pod_template.to_k8s_pod() if worker_pod_template else None,
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
ray_job = RayJob(
|
|
129
|
+
ray_cluster=RayCluster(
|
|
130
|
+
head_group_spec=head_group_spec,
|
|
131
|
+
worker_group_spec=worker_group_spec,
|
|
132
|
+
enable_autoscaling=(cfg.enable_autoscaling if cfg.enable_autoscaling else False),
|
|
133
|
+
),
|
|
134
|
+
runtime_env=runtime_env,
|
|
135
|
+
runtime_env_yaml=runtime_env_yaml,
|
|
136
|
+
ttl_seconds_after_finished=cfg.ttl_seconds_after_finished,
|
|
137
|
+
shutdown_after_job_finishes=cfg.shutdown_after_job_finishes,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return MessageToDict(ray_job)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
TaskPluginRegistry.register(config_type=RayJobConfig, plugin=RayFunctionTask)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flyteplugins-ray
|
|
3
|
+
Version: 0.2.0b30
|
|
4
|
+
Summary: Ray plugin for flyte
|
|
5
|
+
Author-email: Kevin Su <pingsutw@users.noreply.github.com>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: ray[default]
|
|
9
|
+
|
|
10
|
+
# Flyte Ray Plugin
|
|
11
|
+
|
|
12
|
+
Union can execute Ray jobs natively on a Kubernetes Cluster,
|
|
13
|
+
which manages a virtual cluster’s lifecycle, spin-up, and tear down.
|
|
14
|
+
It leverages the open-sourced KubeRay and can be enabled without signing up for any service.
|
|
15
|
+
This is like running a transient ray cluster —
|
|
16
|
+
a type of cluster spun up for a specific Ray job and torn down after completion.
|
|
17
|
+
|
|
18
|
+
To install the plugin, run the following command:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install flyteplguins-ray
|
|
22
|
+
```
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
flyteplugins/ray/__init__.py,sha256=6FlHBUczAg13S-Lkd1DhwmjMzznpM_wYLemyOEkH1IE,147
|
|
2
|
+
flyteplugins/ray/task.py,sha256=iOF5rKtW8KhZrGDkMLLIlpDK_Wv4VYFSAEiygoXsxdg,5120
|
|
3
|
+
flyteplugins_ray-0.2.0b30.dist-info/METADATA,sha256=HZk3OlSvJ8lyKsuG3OwkwML6f4e30tuva8_Ro-QstWU,713
|
|
4
|
+
flyteplugins_ray-0.2.0b30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
+
flyteplugins_ray-0.2.0b30.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
|
|
6
|
+
flyteplugins_ray-0.2.0b30.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flyteplugins
|