flyteplugins-dask 2.0.0b11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ __all__ = ["Dask", "Scheduler", "WorkerGroup"]
2
+
3
+ from flyteplugins.dask.task import Dask, Scheduler, WorkerGroup
@@ -0,0 +1,91 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Any, Dict, Optional
3
+
4
+ import flyte
5
+ from flyte import Resources
6
+ from flyte._internal.runtime.resources_serde import get_proto_resources
7
+ from flyte.extend import AsyncFunctionTaskTemplate, TaskPluginRegistry
8
+ from flyte.models import SerializationContext
9
+ from flyteidl.plugins.dask_pb2 import DaskJob, DaskScheduler, DaskWorkerGroup
10
+ from google.protobuf.json_format import MessageToDict
11
+
12
+
13
+ @dataclass
14
+ class Scheduler:
15
+ """
16
+ Configuration for the scheduler pod
17
+
18
+ :param image: Custom image to use. If ``None``, will use the same image the task was registered with. Optional,
19
+ defaults to None. The image must have ``dask[distributed]`` installed and should have the same Python
20
+ environment as the rest of the cluster (job runner pod + worker pods).
21
+ :param resources: Resources to request for the scheduler pod. Optional, defaults to None.
22
+ """
23
+
24
+ image: Optional[str] = None
25
+ resources: Optional[Resources] = None
26
+
27
+
28
+ @dataclass
29
+ class WorkerGroup:
30
+ """
31
+ Configuration for a group of dask worker pods
32
+
33
+ :param number_of_workers: Number of workers to use. Optional, defaults to 1.
34
+ :param image: Custom image to use. If ``None``, will use the same image the task was registered with. Optional,
35
+ defaults to None. The image must have ``dask[distributed]`` installed. The provided image should have the
36
+ same Python environment as the job runner/driver as well as the scheduler.
37
+ :param resources: Resources to request for the worker pods. Optional, defaults to None.
38
+ """
39
+
40
+ number_of_workers: Optional[int] = 1
41
+ image: Optional[str] = None
42
+ resources: Optional[Resources] = None
43
+
44
+
45
+ @dataclass
46
+ class Dask:
47
+ """
48
+ Configuration for the dask task
49
+
50
+ :param scheduler: Configuration for the scheduler pod. Optional, defaults to ``Scheduler()``.
51
+ :param workers: Configuration for the pods of the default worker group. Optional, defaults to ``WorkerGroup()``.
52
+ """
53
+
54
+ scheduler: Scheduler = field(default_factory=lambda: Scheduler())
55
+ workers: WorkerGroup = field(default_factory=lambda: WorkerGroup())
56
+
57
+
58
+ @dataclass(kw_only=True)
59
+ class DaskTask(AsyncFunctionTaskTemplate):
60
+ """
61
+ Actual Plugin that transforms the local python code for execution within a spark context
62
+ """
63
+
64
+ plugin_config: Dask
65
+ task_type: str = "dask"
66
+
67
+ async def pre(self, *args, **kwargs) -> Dict[str, Any]:
68
+ from distributed import Client
69
+ from distributed.diagnostics.plugin import UploadDirectory
70
+
71
+ if flyte.ctx().is_in_cluster() and flyte.ctx().code_bundle:
72
+ client = Client()
73
+ client.register_plugin(UploadDirectory(flyte.ctx().code_bundle.destination))
74
+
75
+ return {}
76
+
77
+ def custom_config(self, sctx: SerializationContext) -> Dict[str, Any]:
78
+ scheduler = self.plugin_config.scheduler
79
+ wg = self.plugin_config.workers
80
+
81
+ job = DaskJob(
82
+ scheduler=DaskScheduler(image=scheduler.image, resources=get_proto_resources(scheduler.resources)),
83
+ workers=DaskWorkerGroup(
84
+ number_of_workers=wg.number_of_workers, image=wg.image, resources=get_proto_resources(wg.resources)
85
+ ),
86
+ )
87
+
88
+ return MessageToDict(job)
89
+
90
+
91
+ TaskPluginRegistry.register(Dask, DaskTask)
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: flyteplugins-dask
3
+ Version: 2.0.0b11
4
+ Summary: Dask plugin for flyte
5
+ Author-email: Kevin Su <pingsutw@users.noreply.github.com>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: dask[distributed]>=2022.10.2
9
+ Requires-Dist: flyte
10
+ Requires-Dist: bokeh
11
+
12
+ # Union Dask Plugin
13
+
14
+ Flyte can execute `dask` jobs natively on a Kubernetes Cluster, which manages the virtual `dask` cluster's lifecycle
15
+ (spin-up and tear down). It leverages the open-source Kubernetes Dask Operator and can be enabled without signing up
16
+ for any service. This is like running a transient (ephemeral) `dask` cluster - a type of cluster spun up for a specific
17
+ task and torn down after completion. This helps in making sure that the Python environment is the same on the job-runner
18
+ (driver), scheduler and the workers.
19
+
20
+ To install the plugin, run the following command:
21
+
22
+ ```bash
23
+ pip install --pre flyteplugins-dask
24
+ ```
@@ -0,0 +1,6 @@
1
+ flyteplugins/dask/__init__.py,sha256=UwmJjPRVqDyCTobQlT5a2g2oZJvK0vlDiv6rwugNs-g,112
2
+ flyteplugins/dask/task.py,sha256=5jYtNY0wHE_EwoTYZDQ1vdlWjfrBzix3fxMz3Q4Rivo,3307
3
+ flyteplugins_dask-2.0.0b11.dist-info/METADATA,sha256=NylwL_73XHQmvpjl_vdAUNeHsQwH729laps9VDCQ_Vs,938
4
+ flyteplugins_dask-2.0.0b11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ flyteplugins_dask-2.0.0b11.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
6
+ flyteplugins_dask-2.0.0b11.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ flyteplugins