flyteplugins-spark 0.2.0b30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flyteplugins-spark might be problematic. Click here for more details.

@@ -0,0 +1,3 @@
1
+ __all__ = ["Spark"]
2
+
3
+ from flyteplugins.spark.task import Spark
@@ -0,0 +1,86 @@
1
+ import os
2
+ import shutil
3
+ import tempfile
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict, Optional
6
+
7
+ from flyte import PodTemplate
8
+ from flyte._initialize import is_initialized
9
+ from flyte.extend import AsyncFunctionTaskTemplate, TaskPluginRegistry
10
+ from flyte.models import SerializationContext
11
+ from flyteidl.plugins.spark_pb2 import SparkApplication, SparkJob
12
+ from google.protobuf.json_format import MessageToDict
13
+
14
+
15
+ @dataclass
16
+ class Spark(object):
17
+ """
18
+ Use this to configure a SparkContext for a your task. Task's marked with this will automatically execute
19
+ natively onto K8s as a distributed execution of spark
20
+
21
+ Attributes:
22
+ spark_conf (Optional[Dict[str, str]]): Spark configuration dictionary.
23
+ hadoop_conf (Optional[Dict[str, str]]): Hadoop configuration dictionary.
24
+ executor_path (Optional[str]): Path to the Python binary for PySpark execution.
25
+ applications_path (Optional[str]): Path to the main application file.
26
+ driver_pod (Optional[PodTemplate]): Pod template for the driver pod.
27
+ executor_pod (Optional[PodTemplate]): Pod template for the executor pods.
28
+ """
29
+
30
+ spark_conf: Optional[Dict[str, str]] = None
31
+ hadoop_conf: Optional[Dict[str, str]] = None
32
+ executor_path: Optional[str] = None
33
+ applications_path: Optional[str] = None
34
+ driver_pod: Optional[PodTemplate] = None
35
+ executor_pod: Optional[PodTemplate] = None
36
+
37
+ def __post_init__(self):
38
+ if self.spark_conf is None:
39
+ self.spark_conf = {}
40
+
41
+ if self.hadoop_conf is None:
42
+ self.hadoop_conf = {}
43
+
44
+
45
+ @dataclass(kw_only=True)
46
+ class PysparkFunctionTask(AsyncFunctionTaskTemplate):
47
+ """
48
+ Actual Plugin that transforms the local python code for execution within a spark context
49
+ """
50
+
51
+ plugin_config: Spark
52
+ task_type: str = "spark"
53
+
54
+ async def pre(self, *args, **kwargs) -> Dict[str, Any]:
55
+ import pyspark as _pyspark
56
+
57
+ sess = _pyspark.sql.SparkSession.builder.appName("FlyteSpark").getOrCreate()
58
+
59
+ if is_initialized():
60
+ base_dir = tempfile.mkdtemp()
61
+ file_name = "flyte_wf"
62
+ file_format = "zip"
63
+ shutil.make_archive(f"{base_dir}/{file_name}", file_format, os.getcwd())
64
+ sess.sparkContext.addPyFile(f"{base_dir}/{file_name}.{file_format}")
65
+
66
+ return {"spark_session": sess}
67
+
68
+ def custom_config(self, sctx: SerializationContext) -> Dict[str, Any]:
69
+ driver_pod = self.plugin_config.driver_pod.to_k8s_pod() if self.plugin_config.driver_pod else None
70
+ executor_pod = self.plugin_config.executor_pod.to_k8s_pod() if self.plugin_config.executor_pod else None
71
+
72
+ job = SparkJob(
73
+ sparkConf=self.plugin_config.spark_conf,
74
+ hadoopConf=self.plugin_config.hadoop_conf,
75
+ mainApplicationFile=self.plugin_config.applications_path,
76
+ executorPath=self.plugin_config.executor_path,
77
+ mainClass="",
78
+ applicationType=SparkApplication.PYTHON,
79
+ driverPod=driver_pod,
80
+ executorPod=executor_pod,
81
+ )
82
+
83
+ return MessageToDict(job)
84
+
85
+
86
+ TaskPluginRegistry.register(Spark, PysparkFunctionTask)
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: flyteplugins-spark
3
+ Version: 0.2.0b30
4
+ Summary: Spark plugin for flyte
5
+ Author-email: Kevin Su <pingsutw@users.noreply.github.com>
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: pyspark
9
+
10
+ # Union Spark Plugin
11
+
12
+ Union can execute Spark jobs natively on a Kubernetes Cluster, which manages a virtual cluster’s lifecycle, spin-up, and tear down. It leverages the open-sourced Spark On K8s Operator and can be enabled without signing up for any service. This is like running a transient spark cluster — a type of cluster spun up for a specific Spark job and torn down after completion.
13
+
14
+ To install the plugin, run the following command:
15
+
16
+ ```bash
17
+ pip install flyteplugins-spark
18
+ ```
@@ -0,0 +1,6 @@
1
+ flyteplugins/spark/__init__.py,sha256=7q06oXtVhOuNW4Nv-YhJvt8ORqhXmMPNYa-m4IkougI,63
2
+ flyteplugins/spark/task.py,sha256=3fnw8QzcROAqmSAZIJLPc90Q-AvENV1fVdHChMFWdhs,3174
3
+ flyteplugins_spark-0.2.0b30.dist-info/METADATA,sha256=PmUrpy-01jPgkUUYkcYhljrjbErp2CVrmeAfGviv4YU,736
4
+ flyteplugins_spark-0.2.0b30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
+ flyteplugins_spark-0.2.0b30.dist-info/top_level.txt,sha256=cgd779rPu9EsvdtuYgUxNHHgElaQvPn74KhB5XSeMBE,13
6
+ flyteplugins_spark-0.2.0b30.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ flyteplugins