flyteplugins-dask 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
import flyte
|
|
6
|
+
from distributed import Client, SchedulerPlugin, WorkerPlugin
|
|
7
|
+
from flyte import Resources
|
|
8
|
+
from flyte.extend import AsyncFunctionTaskTemplate, TaskPluginRegistry, download_code_bundle, get_proto_resources
|
|
9
|
+
from flyte.models import CodeBundle, SerializationContext
|
|
10
|
+
from flyteidl2.plugins.dask_pb2 import DaskJob, DaskScheduler, DaskWorkerGroup
|
|
11
|
+
from google.protobuf.json_format import MessageToDict
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Scheduler:
|
|
16
|
+
"""
|
|
17
|
+
Configuration for the scheduler pod
|
|
18
|
+
|
|
19
|
+
:param image: Custom image to use. If ``None``, will use the same image the task was registered with. Optional,
|
|
20
|
+
defaults to None. The image must have ``dask[distributed]`` installed and should have the same Python
|
|
21
|
+
environment as the rest of the cluster (job runner pod + worker pods).
|
|
22
|
+
:param resources: Resources to request for the scheduler pod. Optional, defaults to None.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
image: Optional[str] = None
|
|
26
|
+
resources: Optional[Resources] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class WorkerGroup:
|
|
31
|
+
"""
|
|
32
|
+
Configuration for a group of dask worker pods
|
|
33
|
+
|
|
34
|
+
:param number_of_workers: Number of workers to use. Optional, defaults to 1.
|
|
35
|
+
:param image: Custom image to use. If ``None``, will use the same image the task was registered with. Optional,
|
|
36
|
+
defaults to None. The image must have ``dask[distributed]`` installed. The provided image should have the
|
|
37
|
+
same Python environment as the job runner/driver as well as the scheduler.
|
|
38
|
+
:param resources: Resources to request for the worker pods. Optional, defaults to None.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
number_of_workers: Optional[int] = 1
|
|
42
|
+
image: Optional[str] = None
|
|
43
|
+
resources: Optional[Resources] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class Dask:
|
|
48
|
+
"""
|
|
49
|
+
Configuration for the dask task
|
|
50
|
+
|
|
51
|
+
:param scheduler: Configuration for the scheduler pod. Optional, defaults to ``Scheduler()``.
|
|
52
|
+
:param workers: Configuration for the pods of the default worker group. Optional, defaults to ``WorkerGroup()``.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
scheduler: Scheduler = field(default_factory=Scheduler)
|
|
56
|
+
workers: WorkerGroup = field(default_factory=WorkerGroup)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class DownloadCodeBundleSchedulerPlugin(SchedulerPlugin):
|
|
60
|
+
"""
|
|
61
|
+
A Dask plugin to download and set up the code bundle on the scheduler.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, code_bundle: CodeBundle):
|
|
65
|
+
self.code_bundle = code_bundle
|
|
66
|
+
|
|
67
|
+
async def start(self, scheduler):
|
|
68
|
+
sys.path.insert(0, ".")
|
|
69
|
+
await download_code_bundle(self.code_bundle)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class DownloadCodeBundleWorkerPlugin(WorkerPlugin):
|
|
73
|
+
"""
|
|
74
|
+
A Dask plugin to download and set up the code bundle on each worker.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, code_bundle: CodeBundle):
|
|
78
|
+
self.code_bundle = code_bundle
|
|
79
|
+
|
|
80
|
+
async def setup(self, worker):
|
|
81
|
+
"""
|
|
82
|
+
Runs on each worker as it is initialized.
|
|
83
|
+
"""
|
|
84
|
+
sys.path.insert(0, ".")
|
|
85
|
+
await download_code_bundle(self.code_bundle)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass(kw_only=True)
|
|
89
|
+
class DaskTask(AsyncFunctionTaskTemplate):
|
|
90
|
+
"""
|
|
91
|
+
Actual Plugin that transforms the local python code for execution within a spark context
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
plugin_config: Dask
|
|
95
|
+
task_type: str = "dask"
|
|
96
|
+
debuggable: bool = True
|
|
97
|
+
|
|
98
|
+
async def pre(self, *args, **kwargs) -> Dict[str, Any]:
|
|
99
|
+
ctx = flyte.ctx()
|
|
100
|
+
code_bundle = ctx.code_bundle
|
|
101
|
+
if ctx.is_in_cluster() and code_bundle:
|
|
102
|
+
client = Client()
|
|
103
|
+
client.register_plugin(DownloadCodeBundleWorkerPlugin(code_bundle))
|
|
104
|
+
client.register_plugin(DownloadCodeBundleSchedulerPlugin(code_bundle))
|
|
105
|
+
|
|
106
|
+
return {}
|
|
107
|
+
|
|
108
|
+
def custom_config(self, sctx: SerializationContext) -> Dict[str, Any]:
|
|
109
|
+
scheduler = self.plugin_config.scheduler
|
|
110
|
+
wg = self.plugin_config.workers
|
|
111
|
+
|
|
112
|
+
job = DaskJob(
|
|
113
|
+
scheduler=DaskScheduler(image=scheduler.image, resources=get_proto_resources(scheduler.resources)),
|
|
114
|
+
workers=DaskWorkerGroup(
|
|
115
|
+
number_of_workers=wg.number_of_workers, image=wg.image, resources=get_proto_resources(wg.resources)
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return MessageToDict(job)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
TaskPluginRegistry.register(Dask, DaskTask)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flyteplugins-dask
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Dask plugin for flyte
|
|
5
|
+
Author-email: Kevin Su <pingsutw@users.noreply.github.com>
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: dask[distributed]>=2022.10.2
|
|
9
|
+
Requires-Dist: flyte
|
|
10
|
+
Requires-Dist: bokeh
|
|
11
|
+
|
|
12
|
+
# Union Dask Plugin
|
|
13
|
+
|
|
14
|
+
Flyte can execute `dask` jobs natively on a Kubernetes Cluster, which manages the virtual `dask` cluster's lifecycle
|
|
15
|
+
(spin-up and tear down). It leverages the open-source Kubernetes Dask Operator and can be enabled without signing up
|
|
16
|
+
for any service. This is like running a transient (ephemeral) `dask` cluster - a type of cluster spun up for a specific
|
|
17
|
+
task and torn down after completion. This helps in making sure that the Python environment is the same on the job-runner
|
|
18
|
+
(driver), scheduler and the workers.
|
|
19
|
+
|
|
20
|
+
To install the plugin, run the following command:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install --pre flyteplugins-dask
|
|
24
|
+
```
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
flyteplugins/dask/__init__.py,sha256=UwmJjPRVqDyCTobQlT5a2g2oZJvK0vlDiv6rwugNs-g,112
|
|
2
|
+
flyteplugins/dask/task.py,sha256=4llWxQCBsiMAoWkxO39O_heDUSTOm3Hp7bx1jbsriSk,4174
|
|
3
|
+
flyteplugins_dask-2.0.0.dist-info/METADATA,sha256=PXfcy9b818ZOJn1F2Ve-u0iHqstAI2pSHYtjtT5Wz9s,935
|
|
4
|
+
flyteplugins_dask-2.0.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
5
|
+
flyteplugins_dask-2.0.0.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
|
|
6
|
+
flyteplugins_dask-2.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
flyteplugins
|