FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +17 -2
- flowerpower/cfg/__init__.py +201 -149
- flowerpower/cfg/base.py +122 -24
- flowerpower/cfg/pipeline/__init__.py +254 -0
- flowerpower/cfg/pipeline/adapter.py +66 -0
- flowerpower/cfg/pipeline/run.py +40 -11
- flowerpower/cfg/pipeline/schedule.py +69 -79
- flowerpower/cfg/project/__init__.py +149 -0
- flowerpower/cfg/project/adapter.py +57 -0
- flowerpower/cfg/project/job_queue.py +165 -0
- flowerpower/cli/__init__.py +92 -37
- flowerpower/cli/job_queue.py +878 -0
- flowerpower/cli/mqtt.py +32 -1
- flowerpower/cli/pipeline.py +559 -406
- flowerpower/cli/utils.py +29 -18
- flowerpower/flowerpower.py +12 -8
- flowerpower/fs/__init__.py +20 -2
- flowerpower/fs/base.py +350 -26
- flowerpower/fs/ext.py +797 -216
- flowerpower/fs/storage_options.py +1097 -55
- flowerpower/io/base.py +13 -18
- flowerpower/io/loader/__init__.py +28 -0
- flowerpower/io/loader/deltatable.py +7 -10
- flowerpower/io/metadata.py +1 -0
- flowerpower/io/saver/__init__.py +28 -0
- flowerpower/io/saver/deltatable.py +4 -3
- flowerpower/job_queue/__init__.py +252 -0
- flowerpower/job_queue/apscheduler/__init__.py +11 -0
- flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
- flowerpower/job_queue/apscheduler/manager.py +1063 -0
- flowerpower/job_queue/apscheduler/setup.py +524 -0
- flowerpower/job_queue/apscheduler/trigger.py +169 -0
- flowerpower/job_queue/apscheduler/utils.py +309 -0
- flowerpower/job_queue/base.py +382 -0
- flowerpower/job_queue/rq/__init__.py +10 -0
- flowerpower/job_queue/rq/_trigger.py +37 -0
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
- flowerpower/job_queue/rq/manager.py +1449 -0
- flowerpower/job_queue/rq/setup.py +150 -0
- flowerpower/job_queue/rq/utils.py +69 -0
- flowerpower/pipeline/__init__.py +5 -0
- flowerpower/pipeline/base.py +118 -0
- flowerpower/pipeline/io.py +407 -0
- flowerpower/pipeline/job_queue.py +505 -0
- flowerpower/pipeline/manager.py +1586 -0
- flowerpower/pipeline/registry.py +560 -0
- flowerpower/pipeline/runner.py +560 -0
- flowerpower/pipeline/visualizer.py +142 -0
- flowerpower/plugins/mqtt/__init__.py +12 -0
- flowerpower/plugins/mqtt/cfg.py +16 -0
- flowerpower/plugins/mqtt/manager.py +789 -0
- flowerpower/settings.py +110 -0
- flowerpower/utils/logging.py +21 -0
- flowerpower/utils/misc.py +57 -9
- flowerpower/utils/sql.py +122 -24
- flowerpower/utils/templates.py +2 -142
- flowerpower-1.0.0b1.dist-info/METADATA +324 -0
- flowerpower-1.0.0b1.dist-info/RECORD +94 -0
- flowerpower/_web/__init__.py +0 -61
- flowerpower/_web/routes/config.py +0 -103
- flowerpower/_web/routes/pipelines.py +0 -173
- flowerpower/_web/routes/scheduler.py +0 -136
- flowerpower/cfg/pipeline/tracker.py +0 -14
- flowerpower/cfg/project/open_telemetry.py +0 -8
- flowerpower/cfg/project/tracker.py +0 -11
- flowerpower/cfg/project/worker.py +0 -19
- flowerpower/cli/scheduler.py +0 -309
- flowerpower/cli/web.py +0 -44
- flowerpower/event_handler.py +0 -23
- flowerpower/mqtt.py +0 -609
- flowerpower/pipeline.py +0 -2499
- flowerpower/scheduler.py +0 -680
- flowerpower/tui.py +0 -79
- flowerpower/utils/datastore.py +0 -186
- flowerpower/utils/eventbroker.py +0 -127
- flowerpower/utils/executor.py +0 -58
- flowerpower/utils/trigger.py +0 -140
- flowerpower-0.9.13.1.dist-info/METADATA +0 -586
- flowerpower-0.9.13.1.dist-info/RECORD +0 -76
- /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
flowerpower/pipeline.py
DELETED
@@ -1,2499 +0,0 @@
|
|
1
|
-
import datetime as dt
|
2
|
-
import importlib
|
3
|
-
import importlib.util
|
4
|
-
import os
|
5
|
-
import posixpath
|
6
|
-
import sys
|
7
|
-
from typing import Any, Callable
|
8
|
-
from uuid import UUID
|
9
|
-
|
10
|
-
from fsspec.spec import AbstractFileSystem
|
11
|
-
from hamilton import driver
|
12
|
-
from hamilton.execution import executors
|
13
|
-
from hamilton.telemetry import disable_telemetry
|
14
|
-
|
15
|
-
if importlib.util.find_spec("opentelemetry"):
|
16
|
-
from hamilton.plugins import h_opentelemetry
|
17
|
-
|
18
|
-
from .utils.open_telemetry import init_tracer
|
19
|
-
|
20
|
-
else:
|
21
|
-
h_opentelemetry = None
|
22
|
-
init_tracer = None
|
23
|
-
import rich
|
24
|
-
from hamilton.plugins import h_tqdm
|
25
|
-
from hamilton_sdk.adapters import HamiltonTracker
|
26
|
-
from hamilton.plugins.h_threadpool import FutureAdapter
|
27
|
-
from loguru import logger
|
28
|
-
from rich.console import Console
|
29
|
-
from rich.panel import Panel
|
30
|
-
from rich.syntax import Syntax
|
31
|
-
from rich.table import Table
|
32
|
-
from rich.tree import Tree
|
33
|
-
|
34
|
-
from .cfg import ( # PipelineRunConfig,; PipelineScheduleConfig,; PipelineTrackerConfig,
|
35
|
-
Config,
|
36
|
-
PipelineConfig,
|
37
|
-
)
|
38
|
-
from .fs import get_filesystem
|
39
|
-
from .fs.storage_options import BaseStorageOptions
|
40
|
-
from .utils.misc import view_img
|
41
|
-
from .utils.templates import HOOK_TEMPLATE__MQTT_BUILD_CONFIG, PIPELINE_PY_TEMPLATE
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
if importlib.util.find_spec("apscheduler"):
|
46
|
-
from .scheduler import SchedulerManager
|
47
|
-
else:
|
48
|
-
SchedulerManager = None
|
49
|
-
from pathlib import Path
|
50
|
-
from types import TracebackType
|
51
|
-
|
52
|
-
# if importlib.util.find_spec("paho"):
|
53
|
-
# from .mqtt import MQTTClient
|
54
|
-
# else:
|
55
|
-
# MQTTClient = None
|
56
|
-
from munch import Munch
|
57
|
-
|
58
|
-
from .utils.executor import get_executor
|
59
|
-
from .utils.trigger import get_trigger # , ALL_TRIGGER_KWARGS
|
60
|
-
|
61
|
-
from enum import Enum
|
62
|
-
|
63
|
-
class HookType(str, Enum):
|
64
|
-
MQTT_BUILD_CONFIG = "mqtt-build-config"
|
65
|
-
|
66
|
-
def default_function_name(self) -> str:
|
67
|
-
match self.value:
|
68
|
-
case HookType.MQTT_BUILD_CONFIG:
|
69
|
-
return self.value.replace("-", "_")
|
70
|
-
|
71
|
-
class PipelineManager:
|
72
|
-
def __init__(
|
73
|
-
self,
|
74
|
-
base_dir: str | None = None,
|
75
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
76
|
-
fs: AbstractFileSystem | None = None,
|
77
|
-
cfg_dir: str = "conf",
|
78
|
-
pipelines_dir: str = "pipelines",
|
79
|
-
telemetry: bool = True,
|
80
|
-
):
|
81
|
-
"""
|
82
|
-
Initializes the Pipeline object.
|
83
|
-
|
84
|
-
Args:
|
85
|
-
base_dir (str | None): The flowerpower base path. Defaults to None.
|
86
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
87
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
88
|
-
|
89
|
-
Returns:
|
90
|
-
None
|
91
|
-
"""
|
92
|
-
self._telemetry = telemetry
|
93
|
-
self._base_dir = base_dir or str(Path.cwd())
|
94
|
-
self._storage_options = storage_options or {}
|
95
|
-
if fs is None:
|
96
|
-
fs = get_filesystem(self._base_dir, **self._storage_options)
|
97
|
-
self._fs = fs
|
98
|
-
|
99
|
-
self._cfg_dir = cfg_dir
|
100
|
-
self._pipelines_dir = pipelines_dir
|
101
|
-
|
102
|
-
try:
|
103
|
-
self._fs.makedirs(f"{self._cfg_dir}/pipelines", exist_ok=True)
|
104
|
-
self._fs.makedirs(self._pipelines_dir, exist_ok=True)
|
105
|
-
except Exception as e:
|
106
|
-
logger.error(f"Error creating directories: {e}")
|
107
|
-
|
108
|
-
self._sync_fs()
|
109
|
-
self.load_config()
|
110
|
-
|
111
|
-
def __enter__(self) -> "PipelineManager":
|
112
|
-
return self
|
113
|
-
|
114
|
-
def __exit__(
|
115
|
-
self,
|
116
|
-
exc_type: type[BaseException] | None,
|
117
|
-
exc_val: BaseException | None,
|
118
|
-
exc_tb: TracebackType | None,
|
119
|
-
) -> None:
|
120
|
-
# Add any cleanup code here if needed
|
121
|
-
pass
|
122
|
-
|
123
|
-
def _get_schedules(self):
|
124
|
-
with SchedulerManager(
|
125
|
-
fs=self._fs,
|
126
|
-
role="scheduler",
|
127
|
-
) as sm:
|
128
|
-
return sm.get_schedules()
|
129
|
-
|
130
|
-
def _sync_fs(self):
|
131
|
-
"""
|
132
|
-
Sync the filesystem.
|
133
|
-
|
134
|
-
Returns:
|
135
|
-
None
|
136
|
-
"""
|
137
|
-
if self._fs.is_cache_fs:
|
138
|
-
self._fs.sync()
|
139
|
-
|
140
|
-
modules_path = posixpath.join(self._fs.path, self._pipelines_dir)
|
141
|
-
if modules_path not in sys.path:
|
142
|
-
sys.path.append(modules_path)
|
143
|
-
|
144
|
-
def load_module(self, name: str, reload: bool = False):
|
145
|
-
"""
|
146
|
-
Load a module dynamically.
|
147
|
-
|
148
|
-
Args:
|
149
|
-
name (str): The name of the module to load.
|
150
|
-
|
151
|
-
Returns:
|
152
|
-
None
|
153
|
-
"""
|
154
|
-
sys.path.append(posixpath.join(self._fs.path, self._pipelines_dir))
|
155
|
-
|
156
|
-
if not hasattr(self, "_module"):
|
157
|
-
self._module = importlib.import_module(name)
|
158
|
-
|
159
|
-
else:
|
160
|
-
if reload:
|
161
|
-
importlib.reload(self._module)
|
162
|
-
|
163
|
-
def load_config(self, name: str | None = None, reload: bool = False):
|
164
|
-
"""
|
165
|
-
Load the configuration file.
|
166
|
-
|
167
|
-
This method loads the configuration file specified by the `_cfg_dir` attribute and
|
168
|
-
assigns it to the `cfg` attribute.
|
169
|
-
|
170
|
-
Args:
|
171
|
-
name (str | None, optional): The name of the pipeline. Defaults to None.
|
172
|
-
|
173
|
-
Returns:
|
174
|
-
None
|
175
|
-
"""
|
176
|
-
if reload:
|
177
|
-
del self.cfg
|
178
|
-
self.cfg = Config.load(base_dir=self._base_dir, pipeline_name=name, fs=self._fs)
|
179
|
-
|
180
|
-
def _get_driver(
|
181
|
-
self,
|
182
|
-
name: str,
|
183
|
-
executor: str | None = None,
|
184
|
-
with_tracker: bool = False,
|
185
|
-
with_opentelemetry: bool = False,
|
186
|
-
with_progressbar: bool = False,
|
187
|
-
config: dict = {},
|
188
|
-
reload: bool = False,
|
189
|
-
**kwargs,
|
190
|
-
) -> tuple[driver.Driver, Callable | None]:
|
191
|
-
"""
|
192
|
-
Get the driver and shutdown function for a given pipeline.
|
193
|
-
|
194
|
-
Args:
|
195
|
-
name (str): The name of the pipeline.
|
196
|
-
executor (str | None, optional): The executor to use. Defaults to None.
|
197
|
-
with_tracker (bool, optional): Whether to use the tracker. Defaults to False.
|
198
|
-
with_opentelemetry (bool, optional): Whether to use OpenTelemetry. Defaults to False.
|
199
|
-
with_progressbar (bool, optional): Whether to use a progress bar. Defaults to False.
|
200
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
201
|
-
Defaults to None.
|
202
|
-
with_opentelemetry (bool, optional): Whether to use OpenTelemetry. Defaults to False.
|
203
|
-
reload (bool, optional): Whether to reload the module. Defaults to False.
|
204
|
-
**kwargs: Additional keyword arguments.
|
205
|
-
|
206
|
-
Keyword Args:
|
207
|
-
max_tasks (int, optional): The maximum number of tasks. Defaults to 20.
|
208
|
-
num_cpus (int, optional): The number of CPUs. Defaults to 4.
|
209
|
-
project_id (str, optional): The project ID for the tracker. Defaults to None.
|
210
|
-
username (str, optional): The username for the tracker. Defaults to None.
|
211
|
-
dag_name (str, optional): The DAG name for the tracker. Defaults to None.
|
212
|
-
tags (str, optional): The tags for the tracker. Defaults to None.
|
213
|
-
api_url (str, optional): The API URL for the tracker. Defaults to None.
|
214
|
-
ui_url (str, optional): The UI URL for the tracker. Defaults to None.
|
215
|
-
|
216
|
-
Returns:
|
217
|
-
tuple[driver.Driver, Callable | None]: A tuple containing the driver and shutdown function.
|
218
|
-
"""
|
219
|
-
if not self.cfg.pipeline.name == name or reload:
|
220
|
-
self.load_config(name=name, reload=reload)
|
221
|
-
if not hasattr(self, "_module") or reload:
|
222
|
-
self.load_module(name=name, reload=reload)
|
223
|
-
if self._telemetry:
|
224
|
-
disable_telemetry()
|
225
|
-
|
226
|
-
max_tasks = kwargs.pop("max_tasks", 20)
|
227
|
-
num_cpus = kwargs.pop("num_cpus", 4)
|
228
|
-
executor_, shutdown = get_executor(
|
229
|
-
executor or "local", max_tasks=max_tasks, num_cpus=num_cpus
|
230
|
-
)
|
231
|
-
adapters = []
|
232
|
-
if with_tracker:
|
233
|
-
tracker_cfg = {
|
234
|
-
**self.cfg.pipeline.tracker.to_dict(),
|
235
|
-
**self.cfg.project.tracker.to_dict(),
|
236
|
-
}
|
237
|
-
tracker_kwargs = {
|
238
|
-
key: kwargs.pop(key, None) or tracker_cfg.get(key, None)
|
239
|
-
for key in tracker_cfg
|
240
|
-
}
|
241
|
-
tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
|
242
|
-
tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
|
243
|
-
|
244
|
-
if tracker_kwargs.get("project_id", None) is None:
|
245
|
-
raise ValueError(
|
246
|
-
"Please provide a project_id if you want to use the tracker"
|
247
|
-
)
|
248
|
-
|
249
|
-
tracker = HamiltonTracker(**tracker_kwargs)
|
250
|
-
adapters.append(tracker)
|
251
|
-
|
252
|
-
if with_opentelemetry and h_opentelemetry is not None:
|
253
|
-
trace = init_tracer(
|
254
|
-
host=kwargs.pop("host", "localhost"),
|
255
|
-
port=kwargs.pop("port", 6831),
|
256
|
-
name=f"{self.cfg.project.name}.{name}",
|
257
|
-
)
|
258
|
-
tracer = trace.get_tracer(__name__)
|
259
|
-
adapters.append(h_opentelemetry.OpenTelemetryTracer(tracer=tracer))
|
260
|
-
|
261
|
-
if with_progressbar:
|
262
|
-
adapters.append(h_tqdm.ProgressBar(desc=f"{self.cfg.project.name}.{name}"))
|
263
|
-
|
264
|
-
if executor == "future_adapter":
|
265
|
-
adapters.append(FutureAdapter())
|
266
|
-
|
267
|
-
dr = (
|
268
|
-
driver.Builder()
|
269
|
-
.enable_dynamic_execution(allow_experimental_mode=True)
|
270
|
-
.with_modules(self._module)
|
271
|
-
.with_config(config)
|
272
|
-
.with_local_executor(executors.SynchronousLocalTaskExecutor())
|
273
|
-
)
|
274
|
-
|
275
|
-
if executor_ is not None:
|
276
|
-
|
277
|
-
dr = dr.with_remote_executor(executor_)
|
278
|
-
|
279
|
-
if len(adapters):
|
280
|
-
dr = dr.with_adapters(*adapters)
|
281
|
-
|
282
|
-
dr = dr.build()
|
283
|
-
return dr, shutdown
|
284
|
-
|
285
|
-
def run(
|
286
|
-
self,
|
287
|
-
name: str,
|
288
|
-
inputs: dict | None = None,
|
289
|
-
final_vars: list | None = None,
|
290
|
-
config: dict | None = None,
|
291
|
-
executor: str | None = None,
|
292
|
-
with_tracker: bool | None = None,
|
293
|
-
with_opentelemetry: bool | None = None,
|
294
|
-
with_progressbar: bool | None = None,
|
295
|
-
reload: bool = False,
|
296
|
-
**kwargs,
|
297
|
-
) -> dict[str, Any]:
|
298
|
-
"""
|
299
|
-
Run the pipeline with the given parameters.
|
300
|
-
|
301
|
-
Args:
|
302
|
-
name (str): The name of the pipeline.
|
303
|
-
executor (str | None, optional): The executor to use. Defaults to None.
|
304
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
305
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
306
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
307
|
-
Defaults to None.
|
308
|
-
with_tracker (bool | None, optional): Whether to use a tracker. Defaults to None.
|
309
|
-
with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry. Defaults to None.
|
310
|
-
with_progressbar (bool | None, optional): Whether to use a progress bar. Defaults to None.
|
311
|
-
reload (bool, optional): Whether to reload the pipeline. Defaults to False.
|
312
|
-
**kwargs: Additional keyword arguments.
|
313
|
-
|
314
|
-
Returns:
|
315
|
-
dict[str,Any]: The result of executing the pipeline.
|
316
|
-
|
317
|
-
Examples:
|
318
|
-
```python
|
319
|
-
pm = PipelineManager()
|
320
|
-
final_vars = pm.run("my_pipeline")
|
321
|
-
```
|
322
|
-
"""
|
323
|
-
if not self.cfg.pipeline.name == name or reload:
|
324
|
-
self.load_config(name=name, reload=reload)
|
325
|
-
|
326
|
-
if reload or not hasattr(self, "_module"):
|
327
|
-
self.load_module(name=name, reload=reload)
|
328
|
-
|
329
|
-
logger.info(
|
330
|
-
f"Starting pipeline {self.cfg.project.name}.{name}"
|
331
|
-
) # in environment {environment}")
|
332
|
-
|
333
|
-
run_params = self.cfg.pipeline.run
|
334
|
-
|
335
|
-
final_vars = final_vars or run_params.final_vars
|
336
|
-
inputs = {
|
337
|
-
**(run_params.inputs or {}),
|
338
|
-
**(inputs or {}),
|
339
|
-
} # <-- inputs override and adds to run_params
|
340
|
-
config = {
|
341
|
-
**(run_params.config or {}),
|
342
|
-
**(config or {}),
|
343
|
-
}
|
344
|
-
for arg in [
|
345
|
-
"executor",
|
346
|
-
"with_tracker",
|
347
|
-
"with_opentelemetry",
|
348
|
-
"with_progressbar",
|
349
|
-
]:
|
350
|
-
if eval(arg) is not None:
|
351
|
-
kwargs[arg] = eval(arg)
|
352
|
-
else:
|
353
|
-
kwargs[arg] = getattr(run_params, arg)
|
354
|
-
|
355
|
-
kwargs["config"] = config
|
356
|
-
|
357
|
-
dr, shutdown = self._get_driver(
|
358
|
-
name=name,
|
359
|
-
**kwargs,
|
360
|
-
)
|
361
|
-
|
362
|
-
res = dr.execute(final_vars=final_vars, inputs=inputs)
|
363
|
-
|
364
|
-
logger.success(f"Finished pipeline {self.cfg.project.name}.{name}")
|
365
|
-
|
366
|
-
if shutdown is not None:
|
367
|
-
shutdown()
|
368
|
-
|
369
|
-
return res
|
370
|
-
|
371
|
-
def run_job(
|
372
|
-
self,
|
373
|
-
name: str,
|
374
|
-
inputs: dict | None = None,
|
375
|
-
final_vars: list | None = None,
|
376
|
-
config: dict | None = None,
|
377
|
-
executor: str | None = None,
|
378
|
-
with_tracker: bool | None = None,
|
379
|
-
with_opentelemetry: bool | None = None,
|
380
|
-
with_progressbar: bool | None = None,
|
381
|
-
reload: bool = False,
|
382
|
-
**kwargs,
|
383
|
-
) -> dict[str, Any]:
|
384
|
-
"""
|
385
|
-
Add a job to run the pipeline with the given parameters to the worker.
|
386
|
-
Executes the job immediatly and returns the result of the execution.
|
387
|
-
|
388
|
-
Args:
|
389
|
-
name (str): The name of the job.
|
390
|
-
executor (str | None, optional): The executor to use for the job. Defaults to None.
|
391
|
-
inputs (dict | None, optional): The inputs for the job. Defaults to None.
|
392
|
-
final_vars (list | None, optional): The final variables for the job. Defaults to None.
|
393
|
-
config (dict | None, optional): The configuration for the job. Defaults to None.
|
394
|
-
with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
|
395
|
-
with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
|
396
|
-
with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
|
397
|
-
reload (bool, optional): Whether to reload the job. Defaults to False.
|
398
|
-
**kwargs: Additional keyword arguments.
|
399
|
-
|
400
|
-
Returns:
|
401
|
-
dict[str,Any]: The result of the job execution.
|
402
|
-
|
403
|
-
Examples:
|
404
|
-
```python
|
405
|
-
pm = PipelineManager()
|
406
|
-
final_vars = pm.run_job("my_job")
|
407
|
-
```
|
408
|
-
"""
|
409
|
-
if SchedulerManager is None:
|
410
|
-
raise ValueError(
|
411
|
-
"APScheduler4 not installed. Please install it first. "
|
412
|
-
"Run `pip install 'flowerpower[scheduler]'`."
|
413
|
-
)
|
414
|
-
|
415
|
-
with SchedulerManager(
|
416
|
-
name=f"{self.cfg.project.name}.{name}",
|
417
|
-
fs=self._fs,
|
418
|
-
role="scheduler",
|
419
|
-
) as sm:
|
420
|
-
kwargs.update(
|
421
|
-
{
|
422
|
-
arg: eval(arg)
|
423
|
-
for arg in [
|
424
|
-
"name",
|
425
|
-
"inputs",
|
426
|
-
"final_vars",
|
427
|
-
"config",
|
428
|
-
"executor",
|
429
|
-
"with_tracker",
|
430
|
-
"with_opentelemetry",
|
431
|
-
"with_progressbar",
|
432
|
-
"reload",
|
433
|
-
]
|
434
|
-
}
|
435
|
-
)
|
436
|
-
return sm.run_job(
|
437
|
-
self.run,
|
438
|
-
kwargs=kwargs,
|
439
|
-
job_executor=(
|
440
|
-
executor
|
441
|
-
if executor in ["async", "threadpool", "processpool", ""]
|
442
|
-
else "threadpool" if executor == "future_adapter" else "threadpool"
|
443
|
-
),
|
444
|
-
)
|
445
|
-
|
446
|
-
def add_job(
|
447
|
-
self,
|
448
|
-
name: str,
|
449
|
-
inputs: dict | None = None,
|
450
|
-
final_vars: list | None = None,
|
451
|
-
config: dict | None = None,
|
452
|
-
executor: str | None = None,
|
453
|
-
with_tracker: bool | None = None,
|
454
|
-
with_opentelemetry: bool | None = None,
|
455
|
-
with_progressbar: bool | None = None,
|
456
|
-
reload: bool = False,
|
457
|
-
result_expiration_time: float | dt.timedelta = 0,
|
458
|
-
**kwargs,
|
459
|
-
) -> UUID:
|
460
|
-
"""
|
461
|
-
Add a job to run the pipeline with the given parameters to the worker data store.
|
462
|
-
Executes the job immediatly and returns the job id (UUID). The job result will be stored in the data store
|
463
|
-
for the given `result_expiration_time` and can be fetched using the job id (UUID).
|
464
|
-
|
465
|
-
Args:
|
466
|
-
name (str): The name of the job.
|
467
|
-
executor (str | None, optional): The executor for the job. Defaults to None.
|
468
|
-
inputs (dict | None, optional): The inputs for the job. Defaults to None.
|
469
|
-
final_vars (list | None, optional): The final variables for the job. Defaults to None.
|
470
|
-
config (dict | None, optional): The configuration for the job. Defaults to None.
|
471
|
-
with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
|
472
|
-
with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
|
473
|
-
with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
|
474
|
-
reload (bool, optional): Whether to reload the job. Defaults to False.
|
475
|
-
result_expiration_time (float | dt.timedelta | None, optional): The result expiration time for the job.
|
476
|
-
Defaults to None.
|
477
|
-
**kwargs: Additional keyword arguments.
|
478
|
-
|
479
|
-
Returns:
|
480
|
-
UUID: The UUID of the added job.
|
481
|
-
|
482
|
-
Examples:
|
483
|
-
```python
|
484
|
-
pm = PipelineManager()
|
485
|
-
job_id = pm.add_job("my_job")
|
486
|
-
```
|
487
|
-
"""
|
488
|
-
if SchedulerManager is None:
|
489
|
-
raise ValueError(
|
490
|
-
"APScheduler4 not installed. Please install it first. "
|
491
|
-
"Run `pip install 'flowerpower[scheduler]'`."
|
492
|
-
)
|
493
|
-
|
494
|
-
with SchedulerManager(
|
495
|
-
name=f"{self.cfg.project.name}.{name}",
|
496
|
-
fs=self._fs,
|
497
|
-
role="scheduler",
|
498
|
-
) as sm:
|
499
|
-
kwargs.update(
|
500
|
-
{
|
501
|
-
arg: eval(arg)
|
502
|
-
for arg in [
|
503
|
-
"name",
|
504
|
-
"inputs",
|
505
|
-
"final_vars",
|
506
|
-
"config",
|
507
|
-
"executor",
|
508
|
-
"with_tracker",
|
509
|
-
"with_opentelemetry",
|
510
|
-
"with_progressbar",
|
511
|
-
"reload",
|
512
|
-
]
|
513
|
-
}
|
514
|
-
)
|
515
|
-
id_ = sm.add_job(
|
516
|
-
self.run,
|
517
|
-
kwargs=kwargs,
|
518
|
-
job_executor=(
|
519
|
-
executor
|
520
|
-
if executor in ["async", "threadpool", "processpool", ""]
|
521
|
-
else "threadpool" if executor == "future_adapter" else "threadpool"
|
522
|
-
),
|
523
|
-
result_expiration_time=result_expiration_time,
|
524
|
-
)
|
525
|
-
rich.print(
|
526
|
-
f"✅ Successfully added job for "
|
527
|
-
f"[blue]{self.cfg.project.name}.{name}[/blue] with ID [green]{id_}[/green]"
|
528
|
-
)
|
529
|
-
return id_
|
530
|
-
|
531
|
-
def schedule(
|
532
|
-
self,
|
533
|
-
name: str,
|
534
|
-
inputs: dict | None = None,
|
535
|
-
final_vars: list | None = None,
|
536
|
-
config: dict | None = None,
|
537
|
-
executor: str | None = None,
|
538
|
-
with_tracker: bool | None = None,
|
539
|
-
with_opentelemetry: bool | None = None,
|
540
|
-
with_progressbar: bool | None = None,
|
541
|
-
trigger_type: str | None = None,
|
542
|
-
id_: str | None = None,
|
543
|
-
paused: bool = False,
|
544
|
-
coalesce: str = "latest",
|
545
|
-
misfire_grace_time: float | dt.timedelta | None = None,
|
546
|
-
max_jitter: float | dt.timedelta | None = None,
|
547
|
-
max_running_jobs: int | None = None,
|
548
|
-
conflict_policy: str = "do_nothing",
|
549
|
-
overwrite: bool = False,
|
550
|
-
**kwargs,
|
551
|
-
) -> str:
|
552
|
-
"""
|
553
|
-
Schedule a pipeline for execution.
|
554
|
-
|
555
|
-
Args:
|
556
|
-
name (str): The name of the pipeline.
|
557
|
-
executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
|
558
|
-
trigger_type (str | None, optional): The type of trigger for the pipeline. Defaults to None.
|
559
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
560
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
561
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
562
|
-
Defaults to None.
|
563
|
-
with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
|
564
|
-
with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
|
565
|
-
Defaults to None.
|
566
|
-
with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
|
567
|
-
id_ (str | None, optional): The ID of the scheduled pipeline. Defaults to None.
|
568
|
-
paused (bool, optional): Whether the pipeline should be initially paused. Defaults to False.
|
569
|
-
coalesce (str, optional): The coalesce strategy for the pipeline. Defaults to "latest".
|
570
|
-
misfire_grace_time (float | dt.timedelta | None, optional): The grace time for misfired jobs.
|
571
|
-
Defaults to None.
|
572
|
-
max_jitter (float | dt.timedelta | None, optional): The maximum number of seconds to randomly add to the
|
573
|
-
scheduled. Defaults to None.
|
574
|
-
max_running_jobs (int | None, optional): The maximum number of running jobs for the pipeline.
|
575
|
-
Defaults to None.
|
576
|
-
conflict_policy (str, optional): The conflict policy for the pipeline. Defaults to "do_nothing".
|
577
|
-
job_result_expiration_time (float | dt.timedelta | None, optional): The result expiration time for the job.
|
578
|
-
Defaults to None.
|
579
|
-
overwrite (bool, optional): Whether to overwrite an existing schedule with the same name. Defaults to False.
|
580
|
-
**kwargs: Additional keyword arguments for the trigger.
|
581
|
-
|
582
|
-
Returns:
|
583
|
-
str: The ID of the scheduled pipeline.
|
584
|
-
|
585
|
-
Raises:
|
586
|
-
ValueError: If APScheduler4 is not installed.
|
587
|
-
|
588
|
-
Examples:
|
589
|
-
```python
|
590
|
-
pm = PipelineManager()
|
591
|
-
schedule_id = pm.schedule("my_pipeline")
|
592
|
-
```
|
593
|
-
"""
|
594
|
-
if SchedulerManager is None:
|
595
|
-
raise ValueError(
|
596
|
-
"APScheduler4 not installed. Please install it first. "
|
597
|
-
"Run `pip install 'flowerpower[scheduler]'`."
|
598
|
-
)
|
599
|
-
|
600
|
-
if not self.cfg.pipeline.name == name:
|
601
|
-
self.load_config(name=name)
|
602
|
-
|
603
|
-
schedule_cfg = self.cfg.pipeline.schedule # .copy()
|
604
|
-
run_cfg = self.cfg.pipeline.run
|
605
|
-
|
606
|
-
kwargs.update(
|
607
|
-
{arg: eval(arg) or getattr(run_cfg, arg) for arg in run_cfg.to_dict()}
|
608
|
-
)
|
609
|
-
trigger_type = trigger_type or schedule_cfg.trigger.type_
|
610
|
-
|
611
|
-
trigger_kwargs = {
|
612
|
-
key: kwargs.pop(key, None)
|
613
|
-
or getattr(getattr(schedule_cfg.trigger, trigger_type), key)
|
614
|
-
for key in getattr(schedule_cfg.trigger, trigger_type).to_dict()
|
615
|
-
}
|
616
|
-
|
617
|
-
trigger_kwargs.pop("type_", None)
|
618
|
-
|
619
|
-
schedule_kwargs = {
|
620
|
-
arg: eval(arg) or getattr(schedule_cfg.run, arg)
|
621
|
-
for arg in schedule_cfg.run.to_dict()
|
622
|
-
}
|
623
|
-
executor = executor or schedule_cfg.run.executor
|
624
|
-
# id_ = id_ or schedule_cfg.run.id_
|
625
|
-
|
626
|
-
def _get_id() -> str:
|
627
|
-
if id_:
|
628
|
-
return id_
|
629
|
-
|
630
|
-
if overwrite:
|
631
|
-
return f"{name}-1"
|
632
|
-
|
633
|
-
ids = [schedule.id for schedule in self._get_schedules()]
|
634
|
-
if any([name in id_ for id_ in ids]):
|
635
|
-
id_num = sorted([id_ for id_ in ids if name in id_])[-1].split("-")[-1]
|
636
|
-
return f"{name}-{int(id_num) + 1}"
|
637
|
-
return f"{name}-1"
|
638
|
-
|
639
|
-
id_ = _get_id()
|
640
|
-
|
641
|
-
schedule_kwargs.pop("executor", None)
|
642
|
-
schedule_kwargs.pop("id_", None)
|
643
|
-
|
644
|
-
with SchedulerManager(
|
645
|
-
name=f"{self.cfg.project.name}.{name}",
|
646
|
-
fs=self._fs,
|
647
|
-
role="scheduler",
|
648
|
-
) as sm:
|
649
|
-
trigger = get_trigger(type_=trigger_type, **trigger_kwargs)
|
650
|
-
|
651
|
-
if overwrite:
|
652
|
-
sm.remove_schedule(id_)
|
653
|
-
|
654
|
-
id_ = sm.add_schedule(
|
655
|
-
func_or_task_id=self.run,
|
656
|
-
trigger=trigger,
|
657
|
-
id=id_,
|
658
|
-
args=(name,), # inputs, final_vars, config, executor, with_tracker),
|
659
|
-
kwargs=kwargs,
|
660
|
-
job_executor=(
|
661
|
-
executor
|
662
|
-
if executor in ["async", "threadpool", "processpool", ""]
|
663
|
-
else "threadpool" if executor == "future_adapter" else "threadpool"
|
664
|
-
),
|
665
|
-
**schedule_kwargs,
|
666
|
-
)
|
667
|
-
rich.print(
|
668
|
-
f"✅ Successfully added schedule for "
|
669
|
-
f"[blue]{self.cfg.project.name}.{name}[/blue] with ID [green]{id_}[/green]"
|
670
|
-
)
|
671
|
-
return id_
|
672
|
-
|
673
|
-
def schedule_all(
|
674
|
-
self,
|
675
|
-
inputs: dict | None = None,
|
676
|
-
final_vars: list | None = None,
|
677
|
-
config: dict | None = None,
|
678
|
-
executor: str | None = None,
|
679
|
-
with_tracker: bool | None = None,
|
680
|
-
with_opentelemetry: bool | None = None,
|
681
|
-
with_progressbar: bool | None = None,
|
682
|
-
trigger_type: str | None = None,
|
683
|
-
id_: str | None = None,
|
684
|
-
paused: bool = False,
|
685
|
-
coalesce: str = "latest",
|
686
|
-
misfire_grace_time: float | dt.timedelta | None = None,
|
687
|
-
max_jitter: float | dt.timedelta | None = None,
|
688
|
-
max_running_jobs: int | None = None,
|
689
|
-
conflict_policy: str = "do_nothing",
|
690
|
-
overwrite: bool = False,
|
691
|
-
**kwargs,
|
692
|
-
):
|
693
|
-
pipelines = self._get_names()
|
694
|
-
for name in pipelines:
|
695
|
-
self.schedule(
|
696
|
-
name=name,
|
697
|
-
inputs=inputs,
|
698
|
-
final_vars=final_vars,
|
699
|
-
config=config,
|
700
|
-
executor=executor,
|
701
|
-
with_tracker=with_tracker,
|
702
|
-
with_opentelemetry=with_opentelemetry,
|
703
|
-
with_progressbar=with_progressbar,
|
704
|
-
trigger_type=trigger_type,
|
705
|
-
id_=id_,
|
706
|
-
paused=paused,
|
707
|
-
coalesce=coalesce,
|
708
|
-
misfire_grace_time=misfire_grace_time,
|
709
|
-
max_jitter=max_jitter,
|
710
|
-
max_running_jobs=max_running_jobs,
|
711
|
-
conflict_policy=conflict_policy,
|
712
|
-
overwrite=overwrite,
|
713
|
-
**kwargs,
|
714
|
-
)
|
715
|
-
|
716
|
-
def new(
|
717
|
-
self,
|
718
|
-
name: str,
|
719
|
-
overwrite: bool = False,
|
720
|
-
):
|
721
|
-
"""
|
722
|
-
Adds a pipeline with the given name.
|
723
|
-
|
724
|
-
Args:
|
725
|
-
name (str | None, optional): The name of the pipeline.
|
726
|
-
Defaults to None.
|
727
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name. Defaults to False.
|
728
|
-
|
729
|
-
Returns:
|
730
|
-
None
|
731
|
-
|
732
|
-
Raises:
|
733
|
-
ValueError: If the configuration path or pipeline path does not exist.
|
734
|
-
|
735
|
-
Examples:
|
736
|
-
```python
|
737
|
-
pm = PipelineManager()
|
738
|
-
pm.new("my_pipeline")
|
739
|
-
```
|
740
|
-
"""
|
741
|
-
if not self._fs.exists(self._cfg_dir):
|
742
|
-
raise ValueError(
|
743
|
-
f"Configuration path {self._cfg_dir} does not exist. Please run flowerpower init first."
|
744
|
-
)
|
745
|
-
if not self._fs.exists(self._pipelines_dir):
|
746
|
-
raise ValueError(
|
747
|
-
f"Pipeline path {self._pipelines_dir} does not exist. Please run flowerpower init first."
|
748
|
-
)
|
749
|
-
|
750
|
-
if self._fs.exists(f"{self._pipelines_dir}/{name.replace('.', '/')}.py"):
|
751
|
-
if overwrite:
|
752
|
-
self._fs.rm(f"{self._pipelines_dir}/{name.replace('.', '/')}.py")
|
753
|
-
else:
|
754
|
-
raise ValueError(
|
755
|
-
f"Pipeline {self.cfg.project.name}.{name.replace('.', '/')} already exists. "
|
756
|
-
"Use `overwrite=True` to overwrite."
|
757
|
-
)
|
758
|
-
if self._fs.exists(f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml"):
|
759
|
-
if overwrite:
|
760
|
-
self._fs.rm(f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml")
|
761
|
-
else:
|
762
|
-
raise ValueError(
|
763
|
-
f"Pipeline {self.cfg.project.name}.{name.replace('.', '/')} already exists. "
|
764
|
-
"Use `overwrite=True` to overwrite."
|
765
|
-
)
|
766
|
-
if self._fs.exists(f"hooks/{name.replace(".", "/")}"):
|
767
|
-
if overwrite:
|
768
|
-
self._fs.rm(f"hooks/{name.replace(".", "/")}", recursive=True) #Delete all hooks in the folder
|
769
|
-
else:
|
770
|
-
raise ValueError(
|
771
|
-
f"Pipeline {self.cfg.project.name}.{name.replace(".", "/")} alreads exists. "
|
772
|
-
"Use `overwrite=True`to overwrite."
|
773
|
-
)
|
774
|
-
|
775
|
-
pipeline_path = f"{self._pipelines_dir}/{name.replace('.', '/')}.py"
|
776
|
-
cfg_path = f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml"
|
777
|
-
hook_path = f"hooks/{name.replace(".", "/")}"
|
778
|
-
|
779
|
-
self._fs.makedirs(pipeline_path.rsplit("/", 1)[0], exist_ok=True)
|
780
|
-
self._fs.makedirs(cfg_path.rsplit("/", 1)[0], exist_ok=True)
|
781
|
-
self._fs.makedirs(hook_path, exist_ok=True)
|
782
|
-
|
783
|
-
with self._fs.open(pipeline_path, "w") as f:
|
784
|
-
f.write(
|
785
|
-
PIPELINE_PY_TEMPLATE.format(
|
786
|
-
name=name,
|
787
|
-
date=dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
788
|
-
)
|
789
|
-
)
|
790
|
-
|
791
|
-
self.cfg.pipeline = PipelineConfig(name=name)
|
792
|
-
self.cfg.save()
|
793
|
-
|
794
|
-
rich.print(
|
795
|
-
f"🔧 Created new pipeline [bold blue]{self.cfg.project.name}.{name}[/bold blue]"
|
796
|
-
)
|
797
|
-
|
798
|
-
def import_pipeline(
|
799
|
-
self,
|
800
|
-
name: str,
|
801
|
-
path: str,
|
802
|
-
cfg_dir: str = "conf",
|
803
|
-
pipelines_dir: str = "pipelines",
|
804
|
-
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
805
|
-
fs: AbstractFileSystem | None = None,
|
806
|
-
overwrite: bool = False,
|
807
|
-
):
|
808
|
-
"""Import a pipeline from a given path.
|
809
|
-
|
810
|
-
The path could be a local path or a remote path like an S3 bucket or GitHub repository.
|
811
|
-
Any readable fsspec filesystem is supported.
|
812
|
-
|
813
|
-
Args:
|
814
|
-
name (str): The name of the pipeline.
|
815
|
-
path (str): The path to import the pipeline from.
|
816
|
-
cfg_dir (str, optional): The configuration directory. Defaults to "conf".
|
817
|
-
pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
|
818
|
-
storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
|
819
|
-
Defaults to None.
|
820
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
821
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
|
822
|
-
Defaults to False.
|
823
|
-
|
824
|
-
Examples:
|
825
|
-
```python
|
826
|
-
pm = PipelineManager()
|
827
|
-
pm.import(
|
828
|
-
"s3://bucket/path",
|
829
|
-
"my_pipeline",
|
830
|
-
storage_options={
|
831
|
-
"key": "my_key",
|
832
|
-
"secret": "my_secret",
|
833
|
-
"endpoint_url":"http://minio:9000"
|
834
|
-
}
|
835
|
-
)
|
836
|
-
```
|
837
|
-
"""
|
838
|
-
if fs is not None:
|
839
|
-
fs = get_filesystem(path, fs=fs)
|
840
|
-
else:
|
841
|
-
fs = get_filesystem(path, **storage_options)
|
842
|
-
|
843
|
-
conf_path = f"{fs.fs.protocol}://{fs.path}/{cfg_dir}"
|
844
|
-
pipeline_path = f"{fs.fs.protocol}://{fs.path}/{pipelines_dir}"
|
845
|
-
if not fs.exists(cfg_dir):
|
846
|
-
raise ValueError(f"Configuration path {conf_path} does not exist.")
|
847
|
-
if not fs.exists(pipelines_dir):
|
848
|
-
raise ValueError(f"Pipeline path {pipeline_path} does not exist.")
|
849
|
-
|
850
|
-
if self._fs.exists(f"{pipelines_dir}/{name.replace('.', '/')}.py"):
|
851
|
-
if overwrite:
|
852
|
-
self._fs.rm(f"{pipelines_dir}/{name.replace('.', '/')}.py")
|
853
|
-
else:
|
854
|
-
raise ValueError(
|
855
|
-
f"Pipeline {name} already exists at {self._fs.fs.protocol}://{fs.path}. "
|
856
|
-
"Use `overwrite=True` to overwrite."
|
857
|
-
)
|
858
|
-
if self._fs.exists(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml"):
|
859
|
-
if overwrite:
|
860
|
-
self._fs.rm(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml")
|
861
|
-
else:
|
862
|
-
raise ValueError(
|
863
|
-
f"Pipeline {name} already exists at {self._fs.fs.protocol}://{fs.path}. "
|
864
|
-
"Use `overwrite=True` to overwrite."
|
865
|
-
)
|
866
|
-
|
867
|
-
self._fs.write_bytes(
|
868
|
-
f"{self._pipelines_dir}/{name.replace('.', '/')}.py",
|
869
|
-
fs.read_bytes(f"{pipelines_dir}/{name.replace('.', '/')}.py"),
|
870
|
-
)
|
871
|
-
self._fs.write_bytes(
|
872
|
-
f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
|
873
|
-
fs.read_bytes(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml"),
|
874
|
-
)
|
875
|
-
# fs.get(
|
876
|
-
# f"{pipelines_dir}/{name.replace('.', '/')}.py",
|
877
|
-
# f"{self._pipelines_dir}/{name.replace('.', '/')}.py",
|
878
|
-
# )
|
879
|
-
# fs.get(
|
880
|
-
# f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
|
881
|
-
# f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
|
882
|
-
# )
|
883
|
-
|
884
|
-
rich.print(
|
885
|
-
f"🔧 Imported pipeline [bold blue]{name}[/bold blue] from {fs.fs.protocol}://{fs.path}"
|
886
|
-
)
|
887
|
-
|
888
|
-
def import_many(
|
889
|
-
self,
|
890
|
-
names: list[str],
|
891
|
-
path: str,
|
892
|
-
cfg_dir: str = "conf",
|
893
|
-
pipelines_dir: str = "pipelines",
|
894
|
-
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
895
|
-
fs: AbstractFileSystem | None = None,
|
896
|
-
overwrite: bool = False,
|
897
|
-
):
|
898
|
-
"""Import many pipelines from a given path.
|
899
|
-
|
900
|
-
The path could be a local path or a remote path like an S3 bucket or GitHub repository.
|
901
|
-
Any readable fsspec filesystem is supported.
|
902
|
-
|
903
|
-
Args:
|
904
|
-
names (list[str]): The names of the pipelines.
|
905
|
-
path (str): The path to import the pipelines from.
|
906
|
-
cfg_dir (str, optional): The configuration directory. Defaults to "conf".
|
907
|
-
pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
|
908
|
-
storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
|
909
|
-
Defaults to None.
|
910
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
911
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
|
912
|
-
Defaults to False.
|
913
|
-
|
914
|
-
Examples:
|
915
|
-
```python
|
916
|
-
pm = PipelineManager()
|
917
|
-
pm.import_many(
|
918
|
-
"s3://bucket/path",
|
919
|
-
["my_pipeline", "my_pipeline2"],
|
920
|
-
storage_options={
|
921
|
-
"key": "my_key",
|
922
|
-
"secret": "my_secret",
|
923
|
-
"endpoint_url":"http://minio:9000"
|
924
|
-
}
|
925
|
-
)
|
926
|
-
```
|
927
|
-
"""
|
928
|
-
for name in names:
|
929
|
-
self.import_pipeline(
|
930
|
-
path=path,
|
931
|
-
name=name,
|
932
|
-
cfg_dir=cfg_dir,
|
933
|
-
pipelines_dir=pipelines_dir,
|
934
|
-
storage_options=storage_options,
|
935
|
-
fs=fs,
|
936
|
-
overwrite=overwrite,
|
937
|
-
)
|
938
|
-
|
939
|
-
def import_all(
|
940
|
-
self,
|
941
|
-
path: str,
|
942
|
-
cfg_dir: str = "conf",
|
943
|
-
pipelines_dir: str = "pipelines",
|
944
|
-
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
945
|
-
fs: AbstractFileSystem | None = None,
|
946
|
-
overwrite: bool = False,
|
947
|
-
):
|
948
|
-
"""Import all pipelines from a given path.
|
949
|
-
|
950
|
-
The path could be a local path or a remote path like an S3 bucket or GitHub repository.
|
951
|
-
Any readable fsspec filesystem is supported.
|
952
|
-
|
953
|
-
Args:
|
954
|
-
path (str): The path to import the pipelines from.
|
955
|
-
cfg_dir (str, optional): The configuration directory. Defaults to "conf".
|
956
|
-
pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
|
957
|
-
storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
|
958
|
-
Defaults to None.
|
959
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
960
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
|
961
|
-
Defaults to False.
|
962
|
-
|
963
|
-
Examples:
|
964
|
-
```python
|
965
|
-
pm = PipelineManager()
|
966
|
-
pm.import_all(
|
967
|
-
"s3://bucket/path",
|
968
|
-
storage_options={
|
969
|
-
"key": "my_key",
|
970
|
-
"secret": "my_secret",
|
971
|
-
endpoint_url="http://minio:9000"
|
972
|
-
}
|
973
|
-
)
|
974
|
-
```
|
975
|
-
"""
|
976
|
-
names = [
|
977
|
-
fn.replace(pipelines_dir, "").lstrip("/").rstric(".py").replace("/", ".")
|
978
|
-
for fn in fs.glob(f"{pipelines_dir}/**/*.py")
|
979
|
-
]
|
980
|
-
self.import_many(
|
981
|
-
path=path,
|
982
|
-
names=names,
|
983
|
-
cfg_dir=cfg_dir,
|
984
|
-
pipelines_dir=pipelines_dir,
|
985
|
-
storage_options=storage_options,
|
986
|
-
fs=fs,
|
987
|
-
overwrite=overwrite,
|
988
|
-
)
|
989
|
-
|
990
|
-
def export(
|
991
|
-
self,
|
992
|
-
name: str,
|
993
|
-
path: str,
|
994
|
-
cfg_dir: str = "conf",
|
995
|
-
pipelines_dir: str = "pipelines",
|
996
|
-
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
997
|
-
fs: AbstractFileSystem | None = None,
|
998
|
-
overwrite: bool = False,
|
999
|
-
):
|
1000
|
-
"""Export a pipeline to a given path.
|
1001
|
-
|
1002
|
-
The path could be a local path or a remote path like an S3 bucket or GitHub repository.
|
1003
|
-
Any writable fsspec filesystem is supported.
|
1004
|
-
|
1005
|
-
Args:
|
1006
|
-
name (str): The name of the pipeline.
|
1007
|
-
path (str): The path to export the pipeline to.
|
1008
|
-
cfg_dir (str, optional): The configuration directory. Defaults to "conf".
|
1009
|
-
pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
|
1010
|
-
storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
|
1011
|
-
Defaults to None.
|
1012
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
1013
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
|
1014
|
-
Defaults to False.
|
1015
|
-
|
1016
|
-
Examples:
|
1017
|
-
```python
|
1018
|
-
pm = PipelineManager()
|
1019
|
-
pm.export(
|
1020
|
-
"my_pipeline",
|
1021
|
-
"s3://bucket/path",
|
1022
|
-
storage_options={
|
1023
|
-
"key": "my_key",
|
1024
|
-
"secret": "my_secret",
|
1025
|
-
"endpoint_url":"http://minio:9000"
|
1026
|
-
}
|
1027
|
-
)
|
1028
|
-
```
|
1029
|
-
"""
|
1030
|
-
fs = fs or get_filesystem(path, **storage_options)
|
1031
|
-
|
1032
|
-
if fs.exists(posixpath.join(pipelines_dir, name.replace(".", "/") + ".py")):
|
1033
|
-
if overwrite:
|
1034
|
-
fs.rm(posixpath.join(pipelines_dir, name.replace(".", "/") + ".py"))
|
1035
|
-
else:
|
1036
|
-
raise ValueError(
|
1037
|
-
f"Pipeline {name} already exists at {fs.fs.protocol}://{fs.path}. "
|
1038
|
-
"Use `overwrite=True` to overwrite."
|
1039
|
-
)
|
1040
|
-
if fs.exists(
|
1041
|
-
posixpath.join(cfg_dir, "pipelines", name.replace(".", "/") + ".yml")
|
1042
|
-
):
|
1043
|
-
if overwrite:
|
1044
|
-
fs.rm(
|
1045
|
-
posixpath.join(
|
1046
|
-
cfg_dir, "pipelines", name.replace(".", "/") + ".yml"
|
1047
|
-
)
|
1048
|
-
)
|
1049
|
-
else:
|
1050
|
-
raise ValueError(
|
1051
|
-
f"Pipeline {name} already exists at {fs.fs.protocol}://{fs.path}. "
|
1052
|
-
"Use `overwrite=True` to overwrite."
|
1053
|
-
)
|
1054
|
-
|
1055
|
-
fs.put_file(
|
1056
|
-
posixpath.join(self._pipelines_dir, name.replace(".", "/") + ".py"),
|
1057
|
-
posixpath.join(pipelines_dir, name.replace(".", "/") + ".py"),
|
1058
|
-
)
|
1059
|
-
|
1060
|
-
fs.put_file(
|
1061
|
-
posixpath.join(self._cfg_dir, "pipelines", name.replace(".", "/") + ".yml"),
|
1062
|
-
posixpath.join(cfg_dir, "pipelines", name.replace(".", "/") + ".yml"),
|
1063
|
-
)
|
1064
|
-
|
1065
|
-
rich.print(
|
1066
|
-
f"🔧 Exported pipeline [bold blue]{name}[/bold blue] to {fs.fs.protocol}://{fs.path}"
|
1067
|
-
)
|
1068
|
-
|
1069
|
-
def export_many(
|
1070
|
-
self,
|
1071
|
-
path: str,
|
1072
|
-
names: list[str],
|
1073
|
-
cfg_dir: str = "conf",
|
1074
|
-
pipelines_dir: str = "pipelines",
|
1075
|
-
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
1076
|
-
fs: AbstractFileSystem | None = None,
|
1077
|
-
overwrite: bool = False,
|
1078
|
-
):
|
1079
|
-
"""Export many pipelines to a given path.
|
1080
|
-
|
1081
|
-
The path could be a local path or a remote path like an S3 bucket or GitHub repository.
|
1082
|
-
Any writable fsspec filesystem is supported.
|
1083
|
-
|
1084
|
-
Args:
|
1085
|
-
path (str): The path to export the pipelines to.
|
1086
|
-
names (list[str]): The names of the pipelines.
|
1087
|
-
cfg_dir (str, optional): The configuration directory. Defaults to "conf".
|
1088
|
-
pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines
|
1089
|
-
storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
|
1090
|
-
Defaults to None.
|
1091
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
1092
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
|
1093
|
-
Defaults to False.
|
1094
|
-
|
1095
|
-
Examples:
|
1096
|
-
```python
|
1097
|
-
pm = PipelineManager()
|
1098
|
-
pm.export_many(
|
1099
|
-
"s3://bucket/path",
|
1100
|
-
["my_pipeline", "my_pipeline2"],
|
1101
|
-
storage_options={
|
1102
|
-
"key": "my_key",
|
1103
|
-
"secret": "my_secret",
|
1104
|
-
"endpoint_url":"http://minio:9000"
|
1105
|
-
}
|
1106
|
-
)
|
1107
|
-
"""
|
1108
|
-
for name in names:
|
1109
|
-
self.export(
|
1110
|
-
path=path,
|
1111
|
-
name=name,
|
1112
|
-
cfg_dir=cfg_dir,
|
1113
|
-
pipelines_dir=pipelines_dir,
|
1114
|
-
storage_options=storage_options,
|
1115
|
-
fs=fs,
|
1116
|
-
overwrite=overwrite,
|
1117
|
-
)
|
1118
|
-
|
1119
|
-
def export_all(
|
1120
|
-
self,
|
1121
|
-
path: str,
|
1122
|
-
cfg_dir: str = "conf",
|
1123
|
-
pipelines_dir: str = "pipelines",
|
1124
|
-
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
1125
|
-
fs: AbstractFileSystem | None = None,
|
1126
|
-
overwrite: bool = False,
|
1127
|
-
):
|
1128
|
-
"""Export all pipelines to a given path.
|
1129
|
-
|
1130
|
-
The path could be a local path or a remote path like an S3 bucket or GitHub repository.
|
1131
|
-
Any writable fsspec filesystem is supported.
|
1132
|
-
|
1133
|
-
Args:
|
1134
|
-
path (str): The path to export the pipelines to.
|
1135
|
-
storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
|
1136
|
-
Defaults to None.
|
1137
|
-
cfg_dir (str, optional): The configuration directory. Defaults to "conf".
|
1138
|
-
pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
|
1139
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
1140
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
|
1141
|
-
Defaults to False.
|
1142
|
-
|
1143
|
-
Examples:
|
1144
|
-
```python
|
1145
|
-
pm = PipelineManager()
|
1146
|
-
pm.export_all(
|
1147
|
-
"s3://bucket/path",
|
1148
|
-
storage_options={
|
1149
|
-
"key": "my_key",
|
1150
|
-
"secret": "my_secret",
|
1151
|
-
"endpoint_url":"http://minio:9000"
|
1152
|
-
}
|
1153
|
-
)
|
1154
|
-
"""
|
1155
|
-
names = [
|
1156
|
-
fn.replace(self._pipelines_dir, "")
|
1157
|
-
.lstrip("/")
|
1158
|
-
.rstric(".py")
|
1159
|
-
.replace("/", ".")
|
1160
|
-
for fn in self._fs.glob(f"{self._pipelines_dir}/**/*.py")
|
1161
|
-
]
|
1162
|
-
self.export_many(
|
1163
|
-
path=path,
|
1164
|
-
names=names,
|
1165
|
-
cfg_dir=cfg_dir,
|
1166
|
-
pipelines_dir=pipelines_dir,
|
1167
|
-
storage_options=storage_options,
|
1168
|
-
fs=fs,
|
1169
|
-
overwrite=overwrite,
|
1170
|
-
)
|
1171
|
-
|
1172
|
-
def delete(self, name: str, cfg: bool = True, module: bool = False, hooks: bool = True):
|
1173
|
-
"""
|
1174
|
-
Delete a pipeline.
|
1175
|
-
|
1176
|
-
Args:
|
1177
|
-
name (str): The name of the pipeline to delete.
|
1178
|
-
cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
|
1179
|
-
module (bool, optional): Whether to delete the pipeline module file. Defaults to False.
|
1180
|
-
hooks (bool, optional): Whether to delete the pipeline's hooks. Defaults to True.
|
1181
|
-
|
1182
|
-
Returns:
|
1183
|
-
None
|
1184
|
-
|
1185
|
-
Examples:
|
1186
|
-
```python
|
1187
|
-
pm = PipelineManager()
|
1188
|
-
pm.delete("my_pipeline")
|
1189
|
-
```
|
1190
|
-
"""
|
1191
|
-
|
1192
|
-
if cfg:
|
1193
|
-
if self._fs.exists(f"{self._cfg_dir}/pipelines/{name}.yml"):
|
1194
|
-
self._fs.rm(f"{self._cfg_dir}/pipelines/{name}.yml")
|
1195
|
-
rich.print(f"🗑️ Deleted pipeline config for {name}")
|
1196
|
-
|
1197
|
-
if module:
|
1198
|
-
if self._fs.exists(f"{self._pipelines_dir}/{name}.py"):
|
1199
|
-
self._fs.rm(f"{self._pipelines_dir}/{name}.py")
|
1200
|
-
rich.print(
|
1201
|
-
f"🗑️ Deleted pipeline module for {self.cfg.project.name}.{name}"
|
1202
|
-
)
|
1203
|
-
|
1204
|
-
if hooks:
|
1205
|
-
if self._fs.exists(f"hooks/{name}/"):
|
1206
|
-
self._fs.rm(f"hooks/{name}/", recursive=True)
|
1207
|
-
rich.print(
|
1208
|
-
f"🗑️ Deleted pipeline hooks for {self.cfg.project.name}.{name}"
|
1209
|
-
)
|
1210
|
-
|
1211
|
-
|
1212
|
-
def _display_all_function(self, name: str, reload: bool = True, config: dict | None = None):
|
1213
|
-
dr, _ = self._get_driver(
|
1214
|
-
name=name, executor=None, with_tracker=False, reload=reload, config=config
|
1215
|
-
)
|
1216
|
-
return dr.display_all_functions()
|
1217
|
-
|
1218
|
-
def save_dag(
|
1219
|
-
self,
|
1220
|
-
name: str,
|
1221
|
-
format: str = "png",
|
1222
|
-
reload: bool = False,
|
1223
|
-
config: dict | None = None,
|
1224
|
-
):
|
1225
|
-
"""
|
1226
|
-
Save a image of the graph of functions for a given name.
|
1227
|
-
|
1228
|
-
Args:
|
1229
|
-
name (str): The name of the graph.
|
1230
|
-
format (str, optional): The format of the graph file. Defaults to "png".
|
1231
|
-
reload (bool, optional): Whether to reload the graph data. Defaults to False.
|
1232
|
-
|
1233
|
-
Returns:
|
1234
|
-
None
|
1235
|
-
|
1236
|
-
Examples:
|
1237
|
-
```python
|
1238
|
-
pm = PipelineManager()
|
1239
|
-
pm.save_dag("my_pipeline")
|
1240
|
-
```
|
1241
|
-
"""
|
1242
|
-
dag = self._display_all_function(name=name, reload=reload, config=config)
|
1243
|
-
|
1244
|
-
self._fs.makedirs("graphs", exist_ok=True)
|
1245
|
-
dag.render(
|
1246
|
-
posixpath.join(self._base_dir, f"graphs/{name}"),
|
1247
|
-
format=format,
|
1248
|
-
cleanup=True,
|
1249
|
-
)
|
1250
|
-
rich.print(
|
1251
|
-
f"📊 Saved graph for {name} to {self._base_dir}/graphs/{name}.{format}"
|
1252
|
-
)
|
1253
|
-
|
1254
|
-
def show_dag(
|
1255
|
-
self,
|
1256
|
-
name: str,
|
1257
|
-
format: str = "png",
|
1258
|
-
reload: bool = False,
|
1259
|
-
raw: bool = False,
|
1260
|
-
config: dict | None = None,
|
1261
|
-
):
|
1262
|
-
"""
|
1263
|
-
Display the graph of functions for a given name. By choosing the `raw` option, the graph object is returned.
|
1264
|
-
The choosen format defines, which application is used to display the graph.
|
1265
|
-
|
1266
|
-
Args:
|
1267
|
-
name (str): The name of the graph.
|
1268
|
-
format (str, optional): The format of the graph file. Defaults to "png".
|
1269
|
-
show (bool, optional): Whether to open the graph file after generating it. Defaults to False.
|
1270
|
-
reload (bool, optional): Whether to reload the graph data. Defaults to False.
|
1271
|
-
raw (bool, optional): Whether to return the graph object. Defaults to False.
|
1272
|
-
|
1273
|
-
Returns:
|
1274
|
-
graph: The generated graph object.
|
1275
|
-
|
1276
|
-
Examples:
|
1277
|
-
```python
|
1278
|
-
pm = PipelineManager()
|
1279
|
-
pm.show_dag("my_pipeline")
|
1280
|
-
```
|
1281
|
-
"""
|
1282
|
-
dag = self._display_all_function(name=name, reload=reload, config=config)
|
1283
|
-
if raw:
|
1284
|
-
return dag
|
1285
|
-
view_img(dag.pipe(format), format=format)
|
1286
|
-
|
1287
|
-
def _get_files(self) -> list[str]:
|
1288
|
-
"""
|
1289
|
-
Get the pipeline files.
|
1290
|
-
|
1291
|
-
Returns:
|
1292
|
-
list[str]: A list of pipeline files.
|
1293
|
-
"""
|
1294
|
-
return [f for f in self._fs.ls(self._pipelines_dir) if f.endswith(".py")]
|
1295
|
-
|
1296
|
-
def _get_names(self) -> list[str]:
|
1297
|
-
"""
|
1298
|
-
Get the pipeline names.
|
1299
|
-
|
1300
|
-
Returns:
|
1301
|
-
list[str]: A list of pipeline names.
|
1302
|
-
"""
|
1303
|
-
return [posixpath.splitext(posixpath.basename(f))[0] for f in self._get_files()]
|
1304
|
-
|
1305
|
-
def get_summary(
|
1306
|
-
self, name: str | None = None, cfg: bool = True, module: bool = True
|
1307
|
-
) -> dict[str, dict | str]:
|
1308
|
-
"""
|
1309
|
-
Get a summary of the pipelines.
|
1310
|
-
|
1311
|
-
Args:
|
1312
|
-
name (str | None, optional): The name of the pipeline. Defaults to None.
|
1313
|
-
cfg (bool, optional): Whether to show the configuration. Defaults to True.
|
1314
|
-
module (bool, optional): Whether to show the module. Defaults to True.
|
1315
|
-
Returns:
|
1316
|
-
dict[str, dict | str]: A dictionary containing the pipeline summary.
|
1317
|
-
|
1318
|
-
Examples:
|
1319
|
-
```python
|
1320
|
-
pm = PipelineManager()
|
1321
|
-
summary=pm.get_summary()
|
1322
|
-
```
|
1323
|
-
"""
|
1324
|
-
if name:
|
1325
|
-
pipeline_names = [name]
|
1326
|
-
else:
|
1327
|
-
pipeline_names = self._get_names()
|
1328
|
-
|
1329
|
-
pipeline_summary = {}
|
1330
|
-
for name in pipeline_names:
|
1331
|
-
self.load_config(name)
|
1332
|
-
if cfg:
|
1333
|
-
pipeline_summary[name] = {"cfg": self.cfg.pipeline.to_dict()}
|
1334
|
-
if module:
|
1335
|
-
pipeline_summary[name].update(
|
1336
|
-
{
|
1337
|
-
"module": self._fs.cat(
|
1338
|
-
f"{self._pipelines_dir}/{name}.py"
|
1339
|
-
).decode(),
|
1340
|
-
}
|
1341
|
-
)
|
1342
|
-
return pipeline_summary
|
1343
|
-
|
1344
|
-
def show_summary(
|
1345
|
-
self,
|
1346
|
-
name: str | None = None,
|
1347
|
-
cfg: bool = True,
|
1348
|
-
module: bool = True,
|
1349
|
-
to_html: bool = False,
|
1350
|
-
to_svg: bool = False,
|
1351
|
-
) -> None | str:
|
1352
|
-
"""
|
1353
|
-
Show a summary of the pipelines.
|
1354
|
-
|
1355
|
-
Args:
|
1356
|
-
name (str | None, optional): The name of the pipeline. Defaults to None.
|
1357
|
-
cfg (bool, optional): Whether to show the configuration. Defaults to True.
|
1358
|
-
module (bool, optional): Whether to show the module. Defaults to True.
|
1359
|
-
to_html (bool, optional): Whether to export the summary to HTML. Defaults to False.
|
1360
|
-
to_svg (bool, optional): Whether to export the summary to SVG. Defaults to False.
|
1361
|
-
|
1362
|
-
Returns:
|
1363
|
-
None | str: The summary of the pipelines. If `to_html` is True, returns the HTML string.
|
1364
|
-
If `to_svg` is True, returns the SVG string.
|
1365
|
-
|
1366
|
-
Examples:
|
1367
|
-
```python
|
1368
|
-
pm = PipelineManager()
|
1369
|
-
pm.show_summary()
|
1370
|
-
```
|
1371
|
-
"""
|
1372
|
-
|
1373
|
-
pipeline_summary = self.get_summary(name=name, cfg=cfg, module=module)
|
1374
|
-
|
1375
|
-
def add_dict_to_tree(tree, dict_data, style="green"):
|
1376
|
-
for key, value in dict_data.items():
|
1377
|
-
if isinstance(value, dict):
|
1378
|
-
branch = tree.add(f"[cyan]{key}:", style="bold cyan")
|
1379
|
-
add_dict_to_tree(branch, value, style)
|
1380
|
-
else:
|
1381
|
-
tree.add(f"[cyan]{key}:[/] [green]{value}[/]")
|
1382
|
-
|
1383
|
-
console = Console()
|
1384
|
-
for pipeline, info in pipeline_summary.items():
|
1385
|
-
# Create tree for config
|
1386
|
-
config_tree = Tree("📋 Configuration", style="bold magenta")
|
1387
|
-
add_dict_to_tree(config_tree, info["cfg"])
|
1388
|
-
|
1389
|
-
# Create syntax-highlighted code view
|
1390
|
-
code_view = Syntax(
|
1391
|
-
info["module"],
|
1392
|
-
"python",
|
1393
|
-
theme="default",
|
1394
|
-
line_numbers=False,
|
1395
|
-
word_wrap=True,
|
1396
|
-
code_width=80,
|
1397
|
-
padding=2,
|
1398
|
-
)
|
1399
|
-
|
1400
|
-
if cfg:
|
1401
|
-
# console.print(f"🔄 Pipeline: {pipeline}", style="bold blue")
|
1402
|
-
console.print(
|
1403
|
-
Panel(
|
1404
|
-
config_tree,
|
1405
|
-
title=f"🔄 Pipeline: {pipeline}",
|
1406
|
-
subtitle="Configuration",
|
1407
|
-
border_style="blue",
|
1408
|
-
padding=(2, 2),
|
1409
|
-
)
|
1410
|
-
)
|
1411
|
-
console.print("\n")
|
1412
|
-
|
1413
|
-
if module:
|
1414
|
-
# console.print(f"🔄 Pipeline: {pipeline}", style="bold blue")
|
1415
|
-
console.print(
|
1416
|
-
Panel(
|
1417
|
-
code_view,
|
1418
|
-
title=f"🔄 Pipeline: {pipeline}",
|
1419
|
-
subtitle="Module",
|
1420
|
-
border_style="blue",
|
1421
|
-
padding=(2, 2),
|
1422
|
-
)
|
1423
|
-
)
|
1424
|
-
console.print("\n")
|
1425
|
-
if to_html:
|
1426
|
-
return console.export_html()
|
1427
|
-
elif to_svg:
|
1428
|
-
return console.export_svg()
|
1429
|
-
|
1430
|
-
@property
|
1431
|
-
def summary(self) -> dict[str, dict | str]:
|
1432
|
-
"""
|
1433
|
-
Get a summary of the pipelines.
|
1434
|
-
|
1435
|
-
Returns:
|
1436
|
-
dict: A dictionary containing the pipeline summary.
|
1437
|
-
"""
|
1438
|
-
return self.get_summary()
|
1439
|
-
|
1440
|
-
def _all_pipelines(
|
1441
|
-
self, show: bool = True, to_html: bool = False, to_svg: bool = False
|
1442
|
-
) -> list[str] | None:
|
1443
|
-
"""
|
1444
|
-
Print all available pipelines in a formatted table.
|
1445
|
-
|
1446
|
-
Args:
|
1447
|
-
show (bool, optional): Whether to print the table. Defaults to True.
|
1448
|
-
to_html (bool, optional): Whether to export the table to HTML. Defaults to False.
|
1449
|
-
to_svg (bool, optional): Whether to export the table to SVG. Defaults to False.
|
1450
|
-
|
1451
|
-
Returns:
|
1452
|
-
list[str] | None: A list of pipeline names if `show` is False.
|
1453
|
-
|
1454
|
-
Examples:
|
1455
|
-
```python
|
1456
|
-
pm = PipelineManager()
|
1457
|
-
all_pipelines = pm._pipelines(show=False)
|
1458
|
-
```
|
1459
|
-
"""
|
1460
|
-
if to_html or to_svg:
|
1461
|
-
show = True
|
1462
|
-
|
1463
|
-
pipeline_files = [
|
1464
|
-
f for f in self._fs.ls(self._pipelines_dir) if f.endswith(".py")
|
1465
|
-
]
|
1466
|
-
pipeline_names = [
|
1467
|
-
posixpath.splitext(f)[0]
|
1468
|
-
.replace(self._pipelines_dir, "")
|
1469
|
-
.lstrip("/")
|
1470
|
-
.replace("/", ".")
|
1471
|
-
for f in pipeline_files
|
1472
|
-
]
|
1473
|
-
|
1474
|
-
if not pipeline_files:
|
1475
|
-
rich.print("[yellow]No pipelines found[/yellow]")
|
1476
|
-
return
|
1477
|
-
|
1478
|
-
pipeline_info = []
|
1479
|
-
|
1480
|
-
for path, name in zip(pipeline_files, pipeline_names):
|
1481
|
-
# path = posixpath.join( f)
|
1482
|
-
try:
|
1483
|
-
mod_time = self._fs.modified(path).strftime("%Y-%m-%d %H:%M:%S")
|
1484
|
-
except NotImplementedError:
|
1485
|
-
mod_time = "N/A"
|
1486
|
-
size = f"{self._fs.size(path) / 1024:.1f} KB"
|
1487
|
-
pipeline_info.append(
|
1488
|
-
{"name": name, "path": path, "mod_time": mod_time, "size": size}
|
1489
|
-
)
|
1490
|
-
|
1491
|
-
if show:
|
1492
|
-
table = Table(title="Available Pipelines")
|
1493
|
-
table.add_column("Pipeline Name", style="blue")
|
1494
|
-
table.add_column("Path", style="magenta")
|
1495
|
-
table.add_column("Last Modified", style="green")
|
1496
|
-
table.add_column("Size", style="cyan")
|
1497
|
-
|
1498
|
-
for info in pipeline_info:
|
1499
|
-
table.add_row(
|
1500
|
-
info["name"], info["path"], info["mod_time"], info["size"]
|
1501
|
-
)
|
1502
|
-
console = Console(record=True)
|
1503
|
-
console.print(table)
|
1504
|
-
if to_html:
|
1505
|
-
return console.export_html()
|
1506
|
-
elif to_svg:
|
1507
|
-
return console.export_svg()
|
1508
|
-
|
1509
|
-
else:
|
1510
|
-
return pipeline_info
|
1511
|
-
|
1512
|
-
def show_pipelines(self) -> None:
|
1513
|
-
"""
|
1514
|
-
Print all available pipelines in a formatted table.
|
1515
|
-
|
1516
|
-
Examples:
|
1517
|
-
```python
|
1518
|
-
pm = PipelineManager()
|
1519
|
-
pm.show_pipelines()
|
1520
|
-
```
|
1521
|
-
"""
|
1522
|
-
self._all_pipelines(show=True)
|
1523
|
-
|
1524
|
-
def list_pipelines(self) -> list[str]:
|
1525
|
-
"""
|
1526
|
-
Get a list of all available pipelines.
|
1527
|
-
|
1528
|
-
Returns:
|
1529
|
-
list[str] | None: A list of pipeline names.
|
1530
|
-
|
1531
|
-
Examples:
|
1532
|
-
```python
|
1533
|
-
pm = PipelineManager()
|
1534
|
-
pipelines = pm.list_pipelines()
|
1535
|
-
```
|
1536
|
-
"""
|
1537
|
-
return self._all_pipelines(show=False)
|
1538
|
-
|
1539
|
-
@property
|
1540
|
-
def pipelines(self) -> list[str]:
|
1541
|
-
"""
|
1542
|
-
Get a list of all available pipelines.
|
1543
|
-
|
1544
|
-
Returns:
|
1545
|
-
list[str] | None: A list of pipeline names.
|
1546
|
-
|
1547
|
-
Examples:
|
1548
|
-
```python
|
1549
|
-
pm = PipelineManager()
|
1550
|
-
pipelines = pm.pipelines
|
1551
|
-
```
|
1552
|
-
"""
|
1553
|
-
return self._all_pipelines(show=False)
|
1554
|
-
|
1555
|
-
def add_hook(self, name: str, type: HookType, to: str | None = None, function_name: str|None = None):
|
1556
|
-
"""
|
1557
|
-
Add a hook to the pipeline module.
|
1558
|
-
|
1559
|
-
Args:
|
1560
|
-
name (str): The name of the pipeline
|
1561
|
-
type (HookType): The type of the hook.
|
1562
|
-
to (str | None, optional): The name of the file to add the hook to. Defaults to the hook.py file in the pipelines hooks folder.
|
1563
|
-
function_name (str | None, optional): The name of the function. If not provided uses default name of hook type.
|
1564
|
-
|
1565
|
-
Returns:
|
1566
|
-
None
|
1567
|
-
|
1568
|
-
Examples:
|
1569
|
-
```python
|
1570
|
-
pm = PipelineManager()
|
1571
|
-
pm.add_hook(HookType.PRE_EXECUTE)
|
1572
|
-
```
|
1573
|
-
"""
|
1574
|
-
|
1575
|
-
|
1576
|
-
if to is None:
|
1577
|
-
to = f"hooks/{name}/hook.py"
|
1578
|
-
else:
|
1579
|
-
to = f"hooks/{name}/{to}"
|
1580
|
-
|
1581
|
-
match type:
|
1582
|
-
case HookType.MQTT_BUILD_CONFIG:
|
1583
|
-
template = HOOK_TEMPLATE__MQTT_BUILD_CONFIG
|
1584
|
-
|
1585
|
-
if function_name is None:
|
1586
|
-
function_name = type.default_function_name()
|
1587
|
-
|
1588
|
-
if not self._fs.exists(to):
|
1589
|
-
self._fs.makedirs(os.path.dirname(to), exist_ok=True)
|
1590
|
-
|
1591
|
-
with self._fs.open(to, "a") as f:
|
1592
|
-
f.write(
|
1593
|
-
template.format(
|
1594
|
-
function_name=function_name
|
1595
|
-
)
|
1596
|
-
)
|
1597
|
-
|
1598
|
-
rich.print(f"🔧 Added hook [bold blue]{type.value}[/bold blue] to {to} as {function_name} for {name}")
|
1599
|
-
|
1600
|
-
|
1601
|
-
class Pipeline:
|
1602
|
-
def __init__(
|
1603
|
-
self,
|
1604
|
-
name: str,
|
1605
|
-
base_dir: str | None = None,
|
1606
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
1607
|
-
fs: AbstractFileSystem | None = None,
|
1608
|
-
):
|
1609
|
-
"""
|
1610
|
-
Initializes the Pipeline object.
|
1611
|
-
|
1612
|
-
Args:
|
1613
|
-
name (str): The name of the pipeline.
|
1614
|
-
base_dir (str | None): The flowerpower base path. Defaults to None.
|
1615
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
1616
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
1617
|
-
|
1618
|
-
Returns:
|
1619
|
-
None
|
1620
|
-
"""
|
1621
|
-
# super().__init__(base_dir=base_dir, storage_options=storage_options, fs=fs)
|
1622
|
-
self.name = name
|
1623
|
-
self._base_dir = base_dir or os.getcwd()
|
1624
|
-
self._storage_options = storage_options or {}
|
1625
|
-
if fs is None:
|
1626
|
-
fs = get_filesystem(self._base_dir, **self._storage_options)
|
1627
|
-
self._fs = fs
|
1628
|
-
# self.load_module()
|
1629
|
-
# self.load_config(name)
|
1630
|
-
|
1631
|
-
def __enter__(self) -> "PipelineManager":
|
1632
|
-
return self
|
1633
|
-
|
1634
|
-
def __exit__(
|
1635
|
-
self,
|
1636
|
-
exc_type: type[BaseException] | None,
|
1637
|
-
exc_val: BaseException | None,
|
1638
|
-
exc_tb: TracebackType | None,
|
1639
|
-
) -> None:
|
1640
|
-
# Add any cleanup code here if needed
|
1641
|
-
pass
|
1642
|
-
|
1643
|
-
def run(
|
1644
|
-
self,
|
1645
|
-
inputs: dict | None = None,
|
1646
|
-
final_vars: list | None = None,
|
1647
|
-
config: dict | None = None,
|
1648
|
-
executor: str | None = None,
|
1649
|
-
with_tracker: bool = False,
|
1650
|
-
with_opentelemetry: bool = False,
|
1651
|
-
with_progressbar: bool = False,
|
1652
|
-
reload: bool = False,
|
1653
|
-
**kwargs,
|
1654
|
-
) -> dict[str, Any]:
|
1655
|
-
"""Run the pipeline.
|
1656
|
-
|
1657
|
-
Args:
|
1658
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
1659
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
1660
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
1661
|
-
Defaults to None.
|
1662
|
-
executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
|
1663
|
-
with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
|
1664
|
-
with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline.
|
1665
|
-
Defaults to False.
|
1666
|
-
with_progressbar (bool, optional): Whether to include a progress bar for the pipeline.
|
1667
|
-
reload (bool, optional): Whether to reload the pipeline. Defaults to False.
|
1668
|
-
|
1669
|
-
Returns:
|
1670
|
-
dict[str, Any]: The final variables for the pipeline.
|
1671
|
-
|
1672
|
-
Examples:
|
1673
|
-
```python
|
1674
|
-
p = Pipeline("my_pipeline")
|
1675
|
-
final_vars = p.run()
|
1676
|
-
```
|
1677
|
-
"""
|
1678
|
-
with PipelineManager(
|
1679
|
-
base_dir=self._base_dir,
|
1680
|
-
fs=self._fs,
|
1681
|
-
) as pm:
|
1682
|
-
return pm.run(
|
1683
|
-
name=self.name,
|
1684
|
-
executor=executor,
|
1685
|
-
inputs=inputs,
|
1686
|
-
final_vars=final_vars,
|
1687
|
-
config=config,
|
1688
|
-
with_tracker=with_tracker,
|
1689
|
-
with_opentelemetry=with_opentelemetry,
|
1690
|
-
with_progressbar=with_progressbar,
|
1691
|
-
reload=reload,
|
1692
|
-
**kwargs,
|
1693
|
-
)
|
1694
|
-
|
1695
|
-
def run_job(
|
1696
|
-
self,
|
1697
|
-
inputs: dict | None = None,
|
1698
|
-
final_vars: list | None = None,
|
1699
|
-
config: dict | None = None,
|
1700
|
-
executor: str | None = None,
|
1701
|
-
with_tracker: bool | None = None,
|
1702
|
-
with_opentelemetry: bool | None = None,
|
1703
|
-
with_progressbar: bool | None = None,
|
1704
|
-
**kwargs,
|
1705
|
-
) -> dict[str, Any]:
|
1706
|
-
"""Run the pipeline as a job.
|
1707
|
-
|
1708
|
-
Args:
|
1709
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
1710
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
1711
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
1712
|
-
Defaults to None.
|
1713
|
-
executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
|
1714
|
-
with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
|
1715
|
-
with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
|
1716
|
-
Defaults to None.
|
1717
|
-
with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
|
1718
|
-
Defaults to None.
|
1719
|
-
|
1720
|
-
Returns:
|
1721
|
-
dict[str, Any]: The final variables for the pipeline.
|
1722
|
-
|
1723
|
-
Examples:
|
1724
|
-
```python
|
1725
|
-
p = Pipeline("my_pipeline")
|
1726
|
-
final_vars = p.run_job()
|
1727
|
-
```
|
1728
|
-
"""
|
1729
|
-
with PipelineManager(
|
1730
|
-
base_dir=self._base_dir,
|
1731
|
-
fs=self._fs,
|
1732
|
-
) as pm:
|
1733
|
-
return pm.run_job(
|
1734
|
-
name=self.name,
|
1735
|
-
executor=executor,
|
1736
|
-
inputs=inputs,
|
1737
|
-
final_vars=final_vars,
|
1738
|
-
config=config,
|
1739
|
-
with_tracker=with_tracker,
|
1740
|
-
with_opentelemetry=with_opentelemetry,
|
1741
|
-
with_progressbar=with_progressbar,
|
1742
|
-
**kwargs,
|
1743
|
-
)
|
1744
|
-
|
1745
|
-
def add_job(
|
1746
|
-
self,
|
1747
|
-
inputs: dict | None = None,
|
1748
|
-
final_vars: list | None = None,
|
1749
|
-
config: dict | None = None,
|
1750
|
-
executor: str | None = None,
|
1751
|
-
with_tracker: bool | None = None,
|
1752
|
-
with_opentelemetry: bool | None = None,
|
1753
|
-
with_progressbar: bool | None = None,
|
1754
|
-
result_expiration_time: float | dt.timedelta = 0,
|
1755
|
-
**kwargs,
|
1756
|
-
) -> UUID:
|
1757
|
-
"""Add a job for the pipeline.
|
1758
|
-
|
1759
|
-
Args:
|
1760
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
1761
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
1762
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
1763
|
-
Defaults to None.
|
1764
|
-
executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
|
1765
|
-
with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
|
1766
|
-
with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
|
1767
|
-
Defaults to None.
|
1768
|
-
with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
|
1769
|
-
result_expiration_time (float | dt.timedelta, optional): The result expiration time. Defaults to 0.
|
1770
|
-
|
1771
|
-
Returns:
|
1772
|
-
UUID: The job ID.
|
1773
|
-
|
1774
|
-
Examples:
|
1775
|
-
```python
|
1776
|
-
p = Pipeline("my_pipeline")
|
1777
|
-
job_id = p.add_job()
|
1778
|
-
```
|
1779
|
-
"""
|
1780
|
-
with PipelineManager(
|
1781
|
-
base_dir=self._base_dir,
|
1782
|
-
fs=self._fs,
|
1783
|
-
) as pm:
|
1784
|
-
return pm.add_job(
|
1785
|
-
name=self.name,
|
1786
|
-
executor=executor,
|
1787
|
-
inputs=inputs,
|
1788
|
-
final_vars=final_vars,
|
1789
|
-
config=config,
|
1790
|
-
with_tracker=with_tracker,
|
1791
|
-
with_opentelemetry=with_opentelemetry,
|
1792
|
-
with_progressbar=with_progressbar,
|
1793
|
-
result_expiration_time=result_expiration_time,
|
1794
|
-
**kwargs,
|
1795
|
-
)
|
1796
|
-
|
1797
|
-
def schedule(
|
1798
|
-
self,
|
1799
|
-
trigger_type: str | None = None,
|
1800
|
-
inputs: dict | None = None,
|
1801
|
-
final_vars: list | None = None,
|
1802
|
-
config: dict | None = None,
|
1803
|
-
executor: str | None = None,
|
1804
|
-
with_tracker: bool = False,
|
1805
|
-
with_opentelemetry: bool = False,
|
1806
|
-
with_progressbar: bool = False,
|
1807
|
-
paused: bool = False,
|
1808
|
-
coalesce: str = "latest",
|
1809
|
-
misfire_grace_time: float | dt.timedelta | None = None,
|
1810
|
-
max_jitter: float | dt.timedelta | None = None,
|
1811
|
-
max_running_jobs: int | None = None,
|
1812
|
-
conflict_policy: str = "do_nothing",
|
1813
|
-
**kwargs,
|
1814
|
-
) -> str:
|
1815
|
-
"""Schedule the pipeline.
|
1816
|
-
|
1817
|
-
Args:
|
1818
|
-
trigger_type (str | None, optional): The trigger type for the schedule. Defaults to None.
|
1819
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
1820
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
1821
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
1822
|
-
Defaults to None.
|
1823
|
-
executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
|
1824
|
-
with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
|
1825
|
-
with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline. Defaults to False.
|
1826
|
-
with_progressbar (bool, optional): Whether to include a progress bar for the pipeline. Defaults to False.
|
1827
|
-
paused (bool, optional): Whether to pause the schedule. Defaults to False.
|
1828
|
-
coalesce (str, optional): The coalesce strategy. Defaults to "latest".
|
1829
|
-
misfire_grace_time (float | dt.timedelta | None, optional): The misfire grace time. Defaults to None.
|
1830
|
-
max_jitter (float | dt.timedelta | None, optional): The max jitter. Defaults to None.
|
1831
|
-
max_running_jobs (int | None, optional): The max running jobs. Defaults to None.
|
1832
|
-
conflict_policy (str, optional): The conflict policy. Defaults to "do_nothing".
|
1833
|
-
**kwargs: Additional keyword arguments.
|
1834
|
-
|
1835
|
-
Returns:
|
1836
|
-
str: The schedule ID.
|
1837
|
-
|
1838
|
-
Examples:
|
1839
|
-
```python
|
1840
|
-
p = Pipeline("my_pipeline")
|
1841
|
-
schedule_id = p.schedule()
|
1842
|
-
```
|
1843
|
-
|
1844
|
-
"""
|
1845
|
-
with PipelineManager(
|
1846
|
-
base_dir=self._base_dir,
|
1847
|
-
fs=self._fs,
|
1848
|
-
) as pm:
|
1849
|
-
return pm.schedule(
|
1850
|
-
name=self.name,
|
1851
|
-
executor=executor,
|
1852
|
-
trigger_type=trigger_type,
|
1853
|
-
inputs=inputs,
|
1854
|
-
final_vars=final_vars,
|
1855
|
-
with_tracker=with_tracker,
|
1856
|
-
with_opentelemetry=with_opentelemetry,
|
1857
|
-
with_progressbar=with_progressbar,
|
1858
|
-
paused=paused,
|
1859
|
-
coalesce=coalesce,
|
1860
|
-
misfire_grace_time=misfire_grace_time,
|
1861
|
-
max_jitter=max_jitter,
|
1862
|
-
max_running_jobs=max_running_jobs,
|
1863
|
-
conflict_policy=conflict_policy,
|
1864
|
-
**kwargs,
|
1865
|
-
)
|
1866
|
-
|
1867
|
-
def export(
|
1868
|
-
self,
|
1869
|
-
path: str,
|
1870
|
-
storage_options: dict | Munch | BaseStorageOptions | None = None,
|
1871
|
-
fs: AbstractFileSystem | None = None,
|
1872
|
-
overwrite: bool = False,
|
1873
|
-
):
|
1874
|
-
"""Export the pipeline to a given path.
|
1875
|
-
|
1876
|
-
The path could be a local path or a remote path like an S3 bucket or GitHub repository.
|
1877
|
-
Any writable fsspec filesystem is supported.
|
1878
|
-
|
1879
|
-
Args:
|
1880
|
-
path (str): The path to export the pipeline to.
|
1881
|
-
storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
|
1882
|
-
Defaults to None.
|
1883
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
1884
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
|
1885
|
-
Defaults to False.
|
1886
|
-
|
1887
|
-
Examples:
|
1888
|
-
```python
|
1889
|
-
p = Pipeline("my_pipeline")
|
1890
|
-
p.export("s3://bucket/path")
|
1891
|
-
```
|
1892
|
-
"""
|
1893
|
-
with PipelineManager(
|
1894
|
-
base_dir=self._base_dir,
|
1895
|
-
fs=self._fs,
|
1896
|
-
) as pm:
|
1897
|
-
pm.export(
|
1898
|
-
name=self.name,
|
1899
|
-
path=path,
|
1900
|
-
storage_options=storage_options,
|
1901
|
-
fs=fs,
|
1902
|
-
overwrite=overwrite,
|
1903
|
-
)
|
1904
|
-
|
1905
|
-
def delete(self, cfg: bool = True, module: bool = False, hooks: bool = True):
|
1906
|
-
"""Delete the pipeline.
|
1907
|
-
|
1908
|
-
Args:
|
1909
|
-
cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
|
1910
|
-
module (bool, optional): Whether to delete the pipeline module file.
|
1911
|
-
Defaults to False.
|
1912
|
-
hooks (bool, optional): Whether to delete the pipeline's hooks. Defaults to True.
|
1913
|
-
|
1914
|
-
Examples:
|
1915
|
-
```python
|
1916
|
-
p = Pipeline("my_pipeline")
|
1917
|
-
p.delete()
|
1918
|
-
```
|
1919
|
-
"""
|
1920
|
-
with PipelineManager(
|
1921
|
-
base_dir=self._base_dir,
|
1922
|
-
fs=self._fs,
|
1923
|
-
) as pm:
|
1924
|
-
pm.delete(self.name, cfg=cfg, module=module, hooks=hooks)
|
1925
|
-
|
1926
|
-
def save_dag(self, format="png", config: dict | None = None):
|
1927
|
-
"""Save a image of the graph of functions for a given name.
|
1928
|
-
|
1929
|
-
Args:
|
1930
|
-
format (str, optional): The format of the graph file. Defaults to "png".
|
1931
|
-
|
1932
|
-
Examples:
|
1933
|
-
```python
|
1934
|
-
p = Pipeline("my_pipeline")
|
1935
|
-
p.save_dag()
|
1936
|
-
```
|
1937
|
-
"""
|
1938
|
-
with PipelineManager(
|
1939
|
-
base_dir=self._base_dir,
|
1940
|
-
fs=self._fs,
|
1941
|
-
) as pm:
|
1942
|
-
pm.save_dag(self.name, format, config=config)
|
1943
|
-
|
1944
|
-
def show_dag(
|
1945
|
-
self,
|
1946
|
-
config: dict | None = None,
|
1947
|
-
):
|
1948
|
-
"""Display the graph of functions for a given name.
|
1949
|
-
|
1950
|
-
Examples:
|
1951
|
-
```python
|
1952
|
-
p = Pipeline("my_pipeline")
|
1953
|
-
p.show_dag()
|
1954
|
-
```
|
1955
|
-
"""
|
1956
|
-
with PipelineManager(
|
1957
|
-
base_dir=self._base_dir,
|
1958
|
-
fs=self._fs,
|
1959
|
-
) as pm:
|
1960
|
-
return pm.show_dag(self.name, config=config)
|
1961
|
-
|
1962
|
-
def get_summary(
|
1963
|
-
self, cfg: bool = True, module: bool = True
|
1964
|
-
) -> dict[str, dict | str]:
|
1965
|
-
"""Get a summary of the pipeline.
|
1966
|
-
|
1967
|
-
Args:
|
1968
|
-
cfg (bool, optional): Whether to show the configuration. Defaults to True.
|
1969
|
-
module (bool, optional): Whether to show the module. Defaults to True.
|
1970
|
-
|
1971
|
-
Returns:
|
1972
|
-
dict[str, dict | str]: A dictionary containing the pipeline summary.
|
1973
|
-
|
1974
|
-
Examples:
|
1975
|
-
```python
|
1976
|
-
p = Pipeline("my_pipeline")
|
1977
|
-
summary = p.get_summary()
|
1978
|
-
```
|
1979
|
-
"""
|
1980
|
-
with PipelineManager(
|
1981
|
-
base_dir=self._base_dir,
|
1982
|
-
fs=self._fs,
|
1983
|
-
) as pm:
|
1984
|
-
return pm.get_summary(self.name, cfg=cfg, module=module)[self.name]
|
1985
|
-
|
1986
|
-
def show_summary(self, cfg: bool = True, module: bool = True):
|
1987
|
-
"""Show a summary of the pipeline.
|
1988
|
-
|
1989
|
-
Args:
|
1990
|
-
cfg (bool, optional): Whether to show the configuration. Defaults to True.
|
1991
|
-
module (bool, optional): Whether to show the module. Defaults to True.
|
1992
|
-
|
1993
|
-
Examples:
|
1994
|
-
```python
|
1995
|
-
p = Pipeline("my_pipeline")
|
1996
|
-
p.show_summary()
|
1997
|
-
```
|
1998
|
-
"""
|
1999
|
-
with PipelineManager(
|
2000
|
-
base_dir=self._base_dir,
|
2001
|
-
fs=self._fs,
|
2002
|
-
) as pm:
|
2003
|
-
pm.show_summary(self.name, cfg=cfg, module=module)
|
2004
|
-
|
2005
|
-
@property
|
2006
|
-
def summary(self) -> dict[str, dict | str]:
|
2007
|
-
"""Get a summary of the pipeline.
|
2008
|
-
|
2009
|
-
Returns:
|
2010
|
-
dict[str, dict | str]: A dictionary containing the pipeline summary.
|
2011
|
-
"""
|
2012
|
-
return self.get_summary()
|
2013
|
-
|
2014
|
-
|
2015
|
-
def run(
|
2016
|
-
name: str,
|
2017
|
-
base_dir: str | None = None,
|
2018
|
-
inputs: dict | None = None,
|
2019
|
-
final_vars: list | None = None,
|
2020
|
-
config: dict | None = None,
|
2021
|
-
executor: str | None = None,
|
2022
|
-
with_tracker: bool = False,
|
2023
|
-
with_opentelemetry: bool = False,
|
2024
|
-
with_progressbar: bool = False,
|
2025
|
-
storage_options: dict = {},
|
2026
|
-
fs: AbstractFileSystem | None = None,
|
2027
|
-
**kwargs,
|
2028
|
-
) -> dict[str, Any]:
|
2029
|
-
"""Run a pipeline with the given parameters.
|
2030
|
-
|
2031
|
-
Args:
|
2032
|
-
name (str): The name of the pipeline.
|
2033
|
-
base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
|
2034
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
2035
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
2036
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
2037
|
-
Defaults to None.
|
2038
|
-
executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
|
2039
|
-
with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
|
2040
|
-
with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline.
|
2041
|
-
Defaults to False.
|
2042
|
-
with_progressbar (bool, optional): Whether to include a progress bar for the pipeline.
|
2043
|
-
storage_options (dict, optional): The fsspec storage options. Defaults to {}.
|
2044
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2045
|
-
**kwargs: Additional keyword arguments.
|
2046
|
-
|
2047
|
-
Returns:
|
2048
|
-
dict[str, Any]: The final variables for the pipeline.
|
2049
|
-
|
2050
|
-
Examples:
|
2051
|
-
```python
|
2052
|
-
final_vars = run("my_pipeline", inputs={"param": 1}, base_dir="my_flowerpower_project")
|
2053
|
-
```
|
2054
|
-
"""
|
2055
|
-
with Pipeline(
|
2056
|
-
base_dir=base_dir, name=name, storage_options=storage_options, fs=fs
|
2057
|
-
) as p:
|
2058
|
-
return p.run(
|
2059
|
-
inputs=inputs,
|
2060
|
-
final_vars=final_vars,
|
2061
|
-
config=config,
|
2062
|
-
executor=executor,
|
2063
|
-
with_tracker=with_tracker,
|
2064
|
-
with_opentelemetry=with_opentelemetry,
|
2065
|
-
with_progressbar=with_progressbar,
|
2066
|
-
**kwargs,
|
2067
|
-
)
|
2068
|
-
|
2069
|
-
|
2070
|
-
def run_job(
|
2071
|
-
name: str,
|
2072
|
-
base_dir: str | None = None,
|
2073
|
-
inputs: dict | None = None,
|
2074
|
-
final_vars: list | None = None,
|
2075
|
-
config: dict | None = None,
|
2076
|
-
executor: str | None = None,
|
2077
|
-
with_tracker: bool | None = None,
|
2078
|
-
with_opentelemetry: bool | None = None,
|
2079
|
-
with_progressbar: bool | None = None,
|
2080
|
-
# result_expiration_time: float | dt.timedelta = 0,
|
2081
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2082
|
-
fs: AbstractFileSystem | None = None,
|
2083
|
-
**kwargs,
|
2084
|
-
) -> dict[str, Any]:
|
2085
|
-
"""Run a pipeline as a job with the given parameters.
|
2086
|
-
|
2087
|
-
Args:
|
2088
|
-
name (str): The name of the pipeline.
|
2089
|
-
base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
|
2090
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
2091
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
2092
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
2093
|
-
Defaults to None.
|
2094
|
-
executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
|
2095
|
-
with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
|
2096
|
-
with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
|
2097
|
-
Defaults to None.
|
2098
|
-
with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
|
2099
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2100
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2101
|
-
**kwargs: Additional keyword arguments.
|
2102
|
-
|
2103
|
-
Returns:
|
2104
|
-
dict[str, Any]: The final variables for the pipeline.
|
2105
|
-
|
2106
|
-
Examples:
|
2107
|
-
```python
|
2108
|
-
final_vars = run_job("my_pipeline", inputs={"param": 1}, base_dir="my_flowerpower_project")
|
2109
|
-
```
|
2110
|
-
"""
|
2111
|
-
with Pipeline(
|
2112
|
-
base_dir=base_dir, name=name, storage_options=storage_options, fs=fs
|
2113
|
-
) as p:
|
2114
|
-
return p.run_job(
|
2115
|
-
inputs=inputs,
|
2116
|
-
final_vars=final_vars,
|
2117
|
-
config=config,
|
2118
|
-
executor=executor,
|
2119
|
-
with_tracker=with_tracker,
|
2120
|
-
with_opentelemetry=with_opentelemetry,
|
2121
|
-
with_progressbar=with_progressbar,
|
2122
|
-
# result_expiration_time=result_expiration_time,
|
2123
|
-
**kwargs,
|
2124
|
-
)
|
2125
|
-
|
2126
|
-
|
2127
|
-
def add_job(
|
2128
|
-
name: str,
|
2129
|
-
base_dir: str | None = None,
|
2130
|
-
inputs: dict | None = None,
|
2131
|
-
final_vars: list | None = None,
|
2132
|
-
config: dict | None = None,
|
2133
|
-
executor: str | None = None,
|
2134
|
-
with_tracker: bool | None = None,
|
2135
|
-
with_opentelemetry: bool | None = None,
|
2136
|
-
with_progressbar: bool | None = None,
|
2137
|
-
result_expiration_time: float | dt.timedelta = 0,
|
2138
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2139
|
-
fs: AbstractFileSystem | None = None,
|
2140
|
-
**kwargs,
|
2141
|
-
) -> UUID:
|
2142
|
-
"""
|
2143
|
-
Add a job to run the pipeline with the given parameters to the worker data store.
|
2144
|
-
Executes the job immediatly and returns the job id (UUID). The job result will be stored in
|
2145
|
-
the data store for the given `result_expiration_time` and can be fetched using the job id (UUID).
|
2146
|
-
|
2147
|
-
Args:
|
2148
|
-
name (str): The name of the job.
|
2149
|
-
executor (str | None, optional): The executor to use for the job. Defaults to None.
|
2150
|
-
inputs (dict | None, optional): The inputs for the job. Defaults to None.
|
2151
|
-
final_vars (list | None, optional): The final variables for the job. Defaults to None.
|
2152
|
-
config (dict | None, optional): The config for the hamilton driver that executes the job.
|
2153
|
-
Defaults to None.
|
2154
|
-
with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
|
2155
|
-
with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
|
2156
|
-
with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
|
2157
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2158
|
-
result_expiration_time (float | dt.timedelta | None, optional): The expiration time for the job result.
|
2159
|
-
Defaults to None.
|
2160
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2161
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2162
|
-
|
2163
|
-
**kwargs: Additional keyword arguments.
|
2164
|
-
|
2165
|
-
Returns:
|
2166
|
-
UUID: The UUID of the added job.
|
2167
|
-
|
2168
|
-
Examples:
|
2169
|
-
```python
|
2170
|
-
job_id = add_job("my_job")
|
2171
|
-
```
|
2172
|
-
"""
|
2173
|
-
p = Pipeline(name=name, base_dir=base_dir, storage_options=storage_options, fs=fs)
|
2174
|
-
return p.add_job(
|
2175
|
-
executor=executor,
|
2176
|
-
inputs=inputs,
|
2177
|
-
final_vars=final_vars,
|
2178
|
-
config=config,
|
2179
|
-
with_tracker=with_tracker,
|
2180
|
-
with_opentelemetry=with_opentelemetry,
|
2181
|
-
with_progressbar=with_progressbar,
|
2182
|
-
result_expiration_time=result_expiration_time,
|
2183
|
-
**kwargs,
|
2184
|
-
)
|
2185
|
-
|
2186
|
-
|
2187
|
-
def schedule(
|
2188
|
-
name: str,
|
2189
|
-
base_dir: str | None = None,
|
2190
|
-
inputs: dict | None = None,
|
2191
|
-
final_vars: list | None = None,
|
2192
|
-
executor: str | None = None,
|
2193
|
-
config: dict | None = None,
|
2194
|
-
with_tracker: bool | None = None,
|
2195
|
-
with_opentelemetry: bool | None = None,
|
2196
|
-
with_progressbar: bool | None = None,
|
2197
|
-
trigger_type: str | None = None,
|
2198
|
-
id_: str | None = None,
|
2199
|
-
paused: bool = False,
|
2200
|
-
coalesce: str = "latest",
|
2201
|
-
misfire_grace_time: float | dt.timedelta | None = None,
|
2202
|
-
max_jitter: float | dt.timedelta | None = None,
|
2203
|
-
max_running_jobs: int | None = None,
|
2204
|
-
conflict_policy: str = "do_nothing",
|
2205
|
-
overwrite: bool = False,
|
2206
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2207
|
-
fs: AbstractFileSystem | None = None,
|
2208
|
-
**kwargs,
|
2209
|
-
) -> str:
|
2210
|
-
"""Schedule a pipeline with the given parameters.
|
2211
|
-
|
2212
|
-
Args:
|
2213
|
-
name (str): The name of the pipeline.
|
2214
|
-
base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
|
2215
|
-
inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
|
2216
|
-
final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
|
2217
|
-
config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
|
2218
|
-
Defaults to None.
|
2219
|
-
executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
|
2220
|
-
with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
|
2221
|
-
with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
|
2222
|
-
Defaults to None.
|
2223
|
-
with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
|
2224
|
-
trigger_type (str | None, optional): The trigger type for the schedule. Defaults to None.
|
2225
|
-
id_ (str | None, optional): The schedule ID. Defaults to None.
|
2226
|
-
paused (bool, optional): Whether to pause the schedule. Defaults to False.
|
2227
|
-
coalesce (str, optional): The coalesce strategy. Defaults to "latest".
|
2228
|
-
misfire_grace_time (float | dt.timedelta | None, optional): The misfire grace time. Defaults to None.
|
2229
|
-
max_jitter (float | dt.timedelta | None, optional): The max jitter. Defaults to None.
|
2230
|
-
max_running_jobs (int | None, optional): The max running jobs. Defaults to None.
|
2231
|
-
conflict_policy (str, optional): The conflict policy. Defaults to "do_nothing".
|
2232
|
-
overwrite (bool, optional): Whether to overwrite an existing schedule. Defaults to False.
|
2233
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2234
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2235
|
-
**kwargs: Additional keyword arguments.
|
2236
|
-
|
2237
|
-
Returns:
|
2238
|
-
str: The schedule ID.
|
2239
|
-
|
2240
|
-
Examples:
|
2241
|
-
```python
|
2242
|
-
schedule_id = schedule("my_pipeline", trigger_type="interval", seconds=60)
|
2243
|
-
```
|
2244
|
-
"""
|
2245
|
-
with Pipeline(
|
2246
|
-
base_dir=base_dir,
|
2247
|
-
name=name,
|
2248
|
-
storage_options=storage_options,
|
2249
|
-
fs=fs,
|
2250
|
-
) as p:
|
2251
|
-
return p.schedule(
|
2252
|
-
executor=executor,
|
2253
|
-
trigger_type=trigger_type,
|
2254
|
-
inputs=inputs,
|
2255
|
-
final_vars=final_vars,
|
2256
|
-
config=config,
|
2257
|
-
with_tracker=with_tracker,
|
2258
|
-
with_opentelemetry=with_opentelemetry,
|
2259
|
-
with_progressbar=with_progressbar,
|
2260
|
-
paused=paused,
|
2261
|
-
coalesce=coalesce,
|
2262
|
-
misfire_grace_time=misfire_grace_time,
|
2263
|
-
max_jitter=max_jitter,
|
2264
|
-
max_running_jobs=max_running_jobs,
|
2265
|
-
conflict_policy=conflict_policy,
|
2266
|
-
overwrite=overwrite,
|
2267
|
-
**kwargs,
|
2268
|
-
)
|
2269
|
-
|
2270
|
-
|
2271
|
-
def new(
|
2272
|
-
name: str,
|
2273
|
-
base_dir: str | None = None,
|
2274
|
-
overwrite: bool = False,
|
2275
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2276
|
-
fs: AbstractFileSystem | None = None,
|
2277
|
-
):
|
2278
|
-
"""Create a new pipeline with the given name.
|
2279
|
-
|
2280
|
-
Args:
|
2281
|
-
name (str): The name of the pipeline.
|
2282
|
-
base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
|
2283
|
-
overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name. Defaults to False.
|
2284
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2285
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2286
|
-
|
2287
|
-
Examples:
|
2288
|
-
```python
|
2289
|
-
new("my_pipeline")
|
2290
|
-
```
|
2291
|
-
"""
|
2292
|
-
with PipelineManager(
|
2293
|
-
base_dir=base_dir,
|
2294
|
-
fs=fs,
|
2295
|
-
) as pm:
|
2296
|
-
pm.new(name=name, overwrite=overwrite, storage_options=storage_options)
|
2297
|
-
|
2298
|
-
|
2299
|
-
def delete(
|
2300
|
-
name: str,
|
2301
|
-
base_dir: str | None = None,
|
2302
|
-
cfg: bool = True,
|
2303
|
-
module: bool = False,
|
2304
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2305
|
-
fs: AbstractFileSystem | None = None,
|
2306
|
-
):
|
2307
|
-
"""Delete a pipeline.
|
2308
|
-
|
2309
|
-
Args:
|
2310
|
-
name (str): The name of the pipeline to delete.
|
2311
|
-
base_dir (str | None, optional): The base path of the pipeline. Defaults to None.
|
2312
|
-
cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
|
2313
|
-
module (bool, optional): Whether to delete the pipeline module. Defaults to False.
|
2314
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2315
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2316
|
-
"""
|
2317
|
-
with Pipeline(
|
2318
|
-
name=name, base_dir=base_dir, storage_options=storage_options, fs=fs
|
2319
|
-
) as p:
|
2320
|
-
p.delete(cfg=cfg, module=module)
|
2321
|
-
|
2322
|
-
|
2323
|
-
def save_dag(
|
2324
|
-
name: str,
|
2325
|
-
base_dir: str | None = None,
|
2326
|
-
format: str = "png",
|
2327
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2328
|
-
fs: AbstractFileSystem | None = None,
|
2329
|
-
):
|
2330
|
-
"""Save a image of the graph of functions for a given name.
|
2331
|
-
|
2332
|
-
Args:
|
2333
|
-
name (str): The name of the pipeline.
|
2334
|
-
base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
|
2335
|
-
format (str, optional): The format of the graph file. Defaults to "png".
|
2336
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2337
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2338
|
-
|
2339
|
-
Examples:
|
2340
|
-
```python
|
2341
|
-
save_dag("my_pipeline")
|
2342
|
-
```
|
2343
|
-
"""
|
2344
|
-
with Pipeline(
|
2345
|
-
base_dir=base_dir,
|
2346
|
-
name=name,
|
2347
|
-
storage_options=storage_options,
|
2348
|
-
fs=fs,
|
2349
|
-
) as pm:
|
2350
|
-
pm.save_dag(format=format)
|
2351
|
-
|
2352
|
-
|
2353
|
-
def show_dag(
|
2354
|
-
name: str,
|
2355
|
-
base_dir: str | None = None,
|
2356
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2357
|
-
fs: AbstractFileSystem | None = None,
|
2358
|
-
):
|
2359
|
-
"""Display the graph of functions for a given name.
|
2360
|
-
|
2361
|
-
Args:
|
2362
|
-
name (str): The name of the pipeline.
|
2363
|
-
base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
|
2364
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2365
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2366
|
-
|
2367
|
-
Examples:
|
2368
|
-
```python
|
2369
|
-
show_dag("my_pipeline")
|
2370
|
-
```
|
2371
|
-
"""
|
2372
|
-
with Pipeline(
|
2373
|
-
base_dir=base_dir,
|
2374
|
-
name=name,
|
2375
|
-
storage_options=storage_options,
|
2376
|
-
fs=fs,
|
2377
|
-
) as pm:
|
2378
|
-
return pm.show_dag()
|
2379
|
-
|
2380
|
-
|
2381
|
-
def get_summary(
|
2382
|
-
name: str | None = None,
|
2383
|
-
base_dir: str | None = None,
|
2384
|
-
cfg: bool = True,
|
2385
|
-
module: bool = True,
|
2386
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2387
|
-
fs: AbstractFileSystem | None = None,
|
2388
|
-
) -> dict[str, dict | str]:
|
2389
|
-
"""Get a summary of the pipeline.
|
2390
|
-
|
2391
|
-
Args:
|
2392
|
-
name (str | None, optional): The name of the pipeline. Defaults to None.
|
2393
|
-
base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
|
2394
|
-
cfg (bool, optional): Whether to show the configuration. Defaults to True.
|
2395
|
-
module (bool, optional): Whether to show the module. Defaults to True.
|
2396
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2397
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2398
|
-
|
2399
|
-
Returns:
|
2400
|
-
dict[str, dict | str]: A dictionary containing the pipeline summary.
|
2401
|
-
|
2402
|
-
Examples:
|
2403
|
-
```python
|
2404
|
-
summary = get_summary("my_pipeline")
|
2405
|
-
```
|
2406
|
-
"""
|
2407
|
-
with PipelineManager(
|
2408
|
-
base_dir=base_dir,
|
2409
|
-
storage_options=storage_options,
|
2410
|
-
fs=fs,
|
2411
|
-
) as pm:
|
2412
|
-
summary = pm.get_summary(name=name, cfg=cfg, module=module)
|
2413
|
-
if name:
|
2414
|
-
return summary[name]
|
2415
|
-
return summary
|
2416
|
-
|
2417
|
-
|
2418
|
-
def show_summary(
|
2419
|
-
name: str | None = None,
|
2420
|
-
base_dir: str | None = None,
|
2421
|
-
cfg: bool = True,
|
2422
|
-
module: bool = True,
|
2423
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2424
|
-
fs: AbstractFileSystem | None = None,
|
2425
|
-
):
|
2426
|
-
"""Show a summary of the pipeline.
|
2427
|
-
|
2428
|
-
Args:
|
2429
|
-
name (str | None, optional): The name of the pipeline. Defaults to None.
|
2430
|
-
base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
|
2431
|
-
cfg (bool, optional): Whether to show the configuration. Defaults to True.
|
2432
|
-
module (bool, optional): Whether to show the module. Defaults to True.
|
2433
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2434
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2435
|
-
|
2436
|
-
Examples:
|
2437
|
-
```python
|
2438
|
-
show_summary("my_pipeline")
|
2439
|
-
```
|
2440
|
-
"""
|
2441
|
-
with PipelineManager(
|
2442
|
-
base_dir=base_dir,
|
2443
|
-
storage_options=storage_options,
|
2444
|
-
fs=fs,
|
2445
|
-
) as pm:
|
2446
|
-
pm.show_summary(name=name, cfg=cfg, module=module)
|
2447
|
-
|
2448
|
-
|
2449
|
-
def show_pipelines(
|
2450
|
-
base_dir: str | None = None,
|
2451
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2452
|
-
fs: AbstractFileSystem | None = None,
|
2453
|
-
):
|
2454
|
-
"""Display all available pipelines in a formatted table.
|
2455
|
-
|
2456
|
-
Args:
|
2457
|
-
base_dir (str | None, optional): The base path of the pipelines. Defaults to None.
|
2458
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2459
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2460
|
-
|
2461
|
-
Examples:
|
2462
|
-
```python
|
2463
|
-
show_pipelines()
|
2464
|
-
```
|
2465
|
-
"""
|
2466
|
-
with PipelineManager(
|
2467
|
-
base_dir=base_dir,
|
2468
|
-
storage_options=storage_options,
|
2469
|
-
fs=fs,
|
2470
|
-
) as pm:
|
2471
|
-
pm.show_pipelines()
|
2472
|
-
|
2473
|
-
|
2474
|
-
def list_pipelines(
|
2475
|
-
base_dir: str | None = None,
|
2476
|
-
storage_options: dict | Munch | BaseStorageOptions = {},
|
2477
|
-
fs: AbstractFileSystem | None = None,
|
2478
|
-
) -> list[str]:
|
2479
|
-
"""Get a list of all available pipelines.
|
2480
|
-
|
2481
|
-
Args:
|
2482
|
-
base_dir (str | None, optional): The base path of the pipelines. Defaults to None.
|
2483
|
-
storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
|
2484
|
-
fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
|
2485
|
-
|
2486
|
-
Returns:
|
2487
|
-
list[str]: A list of pipeline names.
|
2488
|
-
|
2489
|
-
Examples:
|
2490
|
-
```python
|
2491
|
-
pipelines = list_pipelines()
|
2492
|
-
"""
|
2493
|
-
with PipelineManager(
|
2494
|
-
base_dir=base_dir,
|
2495
|
-
storage_options=storage_options,
|
2496
|
-
fs=fs,
|
2497
|
-
) as pm:
|
2498
|
-
return pm.list_pipelines()
|
2499
|
-
|