flyteplugins-dask 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ __all__ = ["Dask", "Scheduler", "WorkerGroup"]
2
+
3
+ from flyteplugins.dask.task import Dask, Scheduler, WorkerGroup
@@ -0,0 +1,122 @@
1
+ import sys
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Dict, Optional
4
+
5
+ import flyte
6
+ from distributed import Client, SchedulerPlugin, WorkerPlugin
7
+ from flyte import Resources
8
+ from flyte.extend import AsyncFunctionTaskTemplate, TaskPluginRegistry, download_code_bundle, get_proto_resources
9
+ from flyte.models import CodeBundle, SerializationContext
10
+ from flyteidl2.plugins.dask_pb2 import DaskJob, DaskScheduler, DaskWorkerGroup
11
+ from google.protobuf.json_format import MessageToDict
12
+
13
+
14
+ @dataclass
15
+ class Scheduler:
16
+ """
17
+ Configuration for the scheduler pod
18
+
19
+ :param image: Custom image to use. If ``None``, will use the same image the task was registered with. Optional,
20
+ defaults to None. The image must have ``dask[distributed]`` installed and should have the same Python
21
+ environment as the rest of the cluster (job runner pod + worker pods).
22
+ :param resources: Resources to request for the scheduler pod. Optional, defaults to None.
23
+ """
24
+
25
+ image: Optional[str] = None
26
+ resources: Optional[Resources] = None
27
+
28
+
29
+ @dataclass
30
+ class WorkerGroup:
31
+ """
32
+ Configuration for a group of dask worker pods
33
+
34
+ :param number_of_workers: Number of workers to use. Optional, defaults to 1.
35
+ :param image: Custom image to use. If ``None``, will use the same image the task was registered with. Optional,
36
+ defaults to None. The image must have ``dask[distributed]`` installed. The provided image should have the
37
+ same Python environment as the job runner/driver as well as the scheduler.
38
+ :param resources: Resources to request for the worker pods. Optional, defaults to None.
39
+ """
40
+
41
+ number_of_workers: Optional[int] = 1
42
+ image: Optional[str] = None
43
+ resources: Optional[Resources] = None
44
+
45
+
46
+ @dataclass
47
+ class Dask:
48
+ """
49
+ Configuration for the dask task
50
+
51
+ :param scheduler: Configuration for the scheduler pod. Optional, defaults to ``Scheduler()``.
52
+ :param workers: Configuration for the pods of the default worker group. Optional, defaults to ``WorkerGroup()``.
53
+ """
54
+
55
+ scheduler: Scheduler = field(default_factory=Scheduler)
56
+ workers: WorkerGroup = field(default_factory=WorkerGroup)
57
+
58
+
59
+ class DownloadCodeBundleSchedulerPlugin(SchedulerPlugin):
60
+ """
61
+ A Dask plugin to download and set up the code bundle on the scheduler.
62
+ """
63
+
64
+ def __init__(self, code_bundle: CodeBundle):
65
+ self.code_bundle = code_bundle
66
+
67
+ async def start(self, scheduler):
68
+ sys.path.insert(0, ".")
69
+ await download_code_bundle(self.code_bundle)
70
+
71
+
72
+ class DownloadCodeBundleWorkerPlugin(WorkerPlugin):
73
+ """
74
+ A Dask plugin to download and set up the code bundle on each worker.
75
+ """
76
+
77
+ def __init__(self, code_bundle: CodeBundle):
78
+ self.code_bundle = code_bundle
79
+
80
+ async def setup(self, worker):
81
+ """
82
+ Runs on each worker as it is initialized.
83
+ """
84
+ sys.path.insert(0, ".")
85
+ await download_code_bundle(self.code_bundle)
86
+
87
+
88
+ @dataclass(kw_only=True)
89
+ class DaskTask(AsyncFunctionTaskTemplate):
90
+ """
91
+ Actual Plugin that transforms the local python code for execution within a spark context
92
+ """
93
+
94
+ plugin_config: Dask
95
+ task_type: str = "dask"
96
+ debuggable: bool = True
97
+
98
+ async def pre(self, *args, **kwargs) -> Dict[str, Any]:
99
+ ctx = flyte.ctx()
100
+ code_bundle = ctx.code_bundle
101
+ if ctx.is_in_cluster() and code_bundle:
102
+ client = Client()
103
+ client.register_plugin(DownloadCodeBundleWorkerPlugin(code_bundle))
104
+ client.register_plugin(DownloadCodeBundleSchedulerPlugin(code_bundle))
105
+
106
+ return {}
107
+
108
+ def custom_config(self, sctx: SerializationContext) -> Dict[str, Any]:
109
+ scheduler = self.plugin_config.scheduler
110
+ wg = self.plugin_config.workers
111
+
112
+ job = DaskJob(
113
+ scheduler=DaskScheduler(image=scheduler.image, resources=get_proto_resources(scheduler.resources)),
114
+ workers=DaskWorkerGroup(
115
+ number_of_workers=wg.number_of_workers, image=wg.image, resources=get_proto_resources(wg.resources)
116
+ ),
117
+ )
118
+
119
+ return MessageToDict(job)
120
+
121
+
122
+ TaskPluginRegistry.register(Dask, DaskTask)
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: flyteplugins-dask
3
+ Version: 2.0.0
4
+ Summary: Dask plugin for flyte
5
+ Author-email: Kevin Su <pingsutw@users.noreply.github.com>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: dask[distributed]>=2022.10.2
9
+ Requires-Dist: flyte
10
+ Requires-Dist: bokeh
11
+
12
+ # Union Dask Plugin
13
+
14
+ Flyte can execute `dask` jobs natively on a Kubernetes Cluster, which manages the virtual `dask` cluster's lifecycle
15
+ (spin-up and tear down). It leverages the open-source Kubernetes Dask Operator and can be enabled without signing up
16
+ for any service. This is like running a transient (ephemeral) `dask` cluster - a type of cluster spun up for a specific
17
+ task and torn down after completion. This helps in making sure that the Python environment is the same on the job-runner
18
+ (driver), scheduler and the workers.
19
+
20
+ To install the plugin, run the following command:
21
+
22
+ ```bash
23
+ pip install --pre flyteplugins-dask
24
+ ```
@@ -0,0 +1,6 @@
1
+ flyteplugins/dask/__init__.py,sha256=UwmJjPRVqDyCTobQlT5a2g2oZJvK0vlDiv6rwugNs-g,112
2
+ flyteplugins/dask/task.py,sha256=4llWxQCBsiMAoWkxO39O_heDUSTOm3Hp7bx1jbsriSk,4174
3
+ flyteplugins_dask-2.0.0.dist-info/METADATA,sha256=PXfcy9b818ZOJn1F2Ve-u0iHqstAI2pSHYtjtT5Wz9s,935
4
+ flyteplugins_dask-2.0.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
5
+ flyteplugins_dask-2.0.0.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
6
+ flyteplugins_dask-2.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ flyteplugins