FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. flowerpower/__init__.py +17 -2
  2. flowerpower/cfg/__init__.py +201 -149
  3. flowerpower/cfg/base.py +122 -24
  4. flowerpower/cfg/pipeline/__init__.py +254 -0
  5. flowerpower/cfg/pipeline/adapter.py +66 -0
  6. flowerpower/cfg/pipeline/run.py +40 -11
  7. flowerpower/cfg/pipeline/schedule.py +69 -79
  8. flowerpower/cfg/project/__init__.py +149 -0
  9. flowerpower/cfg/project/adapter.py +57 -0
  10. flowerpower/cfg/project/job_queue.py +165 -0
  11. flowerpower/cli/__init__.py +92 -37
  12. flowerpower/cli/job_queue.py +878 -0
  13. flowerpower/cli/mqtt.py +32 -1
  14. flowerpower/cli/pipeline.py +559 -406
  15. flowerpower/cli/utils.py +29 -18
  16. flowerpower/flowerpower.py +12 -8
  17. flowerpower/fs/__init__.py +20 -2
  18. flowerpower/fs/base.py +350 -26
  19. flowerpower/fs/ext.py +797 -216
  20. flowerpower/fs/storage_options.py +1097 -55
  21. flowerpower/io/base.py +13 -18
  22. flowerpower/io/loader/__init__.py +28 -0
  23. flowerpower/io/loader/deltatable.py +7 -10
  24. flowerpower/io/metadata.py +1 -0
  25. flowerpower/io/saver/__init__.py +28 -0
  26. flowerpower/io/saver/deltatable.py +4 -3
  27. flowerpower/job_queue/__init__.py +252 -0
  28. flowerpower/job_queue/apscheduler/__init__.py +11 -0
  29. flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  30. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  31. flowerpower/job_queue/apscheduler/manager.py +1063 -0
  32. flowerpower/job_queue/apscheduler/setup.py +524 -0
  33. flowerpower/job_queue/apscheduler/trigger.py +169 -0
  34. flowerpower/job_queue/apscheduler/utils.py +309 -0
  35. flowerpower/job_queue/base.py +382 -0
  36. flowerpower/job_queue/rq/__init__.py +10 -0
  37. flowerpower/job_queue/rq/_trigger.py +37 -0
  38. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  39. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  40. flowerpower/job_queue/rq/manager.py +1449 -0
  41. flowerpower/job_queue/rq/setup.py +150 -0
  42. flowerpower/job_queue/rq/utils.py +69 -0
  43. flowerpower/pipeline/__init__.py +5 -0
  44. flowerpower/pipeline/base.py +118 -0
  45. flowerpower/pipeline/io.py +407 -0
  46. flowerpower/pipeline/job_queue.py +505 -0
  47. flowerpower/pipeline/manager.py +1586 -0
  48. flowerpower/pipeline/registry.py +560 -0
  49. flowerpower/pipeline/runner.py +560 -0
  50. flowerpower/pipeline/visualizer.py +142 -0
  51. flowerpower/plugins/mqtt/__init__.py +12 -0
  52. flowerpower/plugins/mqtt/cfg.py +16 -0
  53. flowerpower/plugins/mqtt/manager.py +789 -0
  54. flowerpower/settings.py +110 -0
  55. flowerpower/utils/logging.py +21 -0
  56. flowerpower/utils/misc.py +57 -9
  57. flowerpower/utils/sql.py +122 -24
  58. flowerpower/utils/templates.py +2 -142
  59. flowerpower-1.0.0b1.dist-info/METADATA +324 -0
  60. flowerpower-1.0.0b1.dist-info/RECORD +94 -0
  61. flowerpower/_web/__init__.py +0 -61
  62. flowerpower/_web/routes/config.py +0 -103
  63. flowerpower/_web/routes/pipelines.py +0 -173
  64. flowerpower/_web/routes/scheduler.py +0 -136
  65. flowerpower/cfg/pipeline/tracker.py +0 -14
  66. flowerpower/cfg/project/open_telemetry.py +0 -8
  67. flowerpower/cfg/project/tracker.py +0 -11
  68. flowerpower/cfg/project/worker.py +0 -19
  69. flowerpower/cli/scheduler.py +0 -309
  70. flowerpower/cli/web.py +0 -44
  71. flowerpower/event_handler.py +0 -23
  72. flowerpower/mqtt.py +0 -609
  73. flowerpower/pipeline.py +0 -2499
  74. flowerpower/scheduler.py +0 -680
  75. flowerpower/tui.py +0 -79
  76. flowerpower/utils/datastore.py +0 -186
  77. flowerpower/utils/eventbroker.py +0 -127
  78. flowerpower/utils/executor.py +0 -58
  79. flowerpower/utils/trigger.py +0 -140
  80. flowerpower-0.9.13.1.dist-info/METADATA +0 -586
  81. flowerpower-0.9.13.1.dist-info/RECORD +0 -76
  82. /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
  83. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +0 -0
  84. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
  85. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
flowerpower/pipeline.py DELETED
@@ -1,2499 +0,0 @@
1
- import datetime as dt
2
- import importlib
3
- import importlib.util
4
- import os
5
- import posixpath
6
- import sys
7
- from typing import Any, Callable
8
- from uuid import UUID
9
-
10
- from fsspec.spec import AbstractFileSystem
11
- from hamilton import driver
12
- from hamilton.execution import executors
13
- from hamilton.telemetry import disable_telemetry
14
-
15
- if importlib.util.find_spec("opentelemetry"):
16
- from hamilton.plugins import h_opentelemetry
17
-
18
- from .utils.open_telemetry import init_tracer
19
-
20
- else:
21
- h_opentelemetry = None
22
- init_tracer = None
23
- import rich
24
- from hamilton.plugins import h_tqdm
25
- from hamilton_sdk.adapters import HamiltonTracker
26
- from hamilton.plugins.h_threadpool import FutureAdapter
27
- from loguru import logger
28
- from rich.console import Console
29
- from rich.panel import Panel
30
- from rich.syntax import Syntax
31
- from rich.table import Table
32
- from rich.tree import Tree
33
-
34
- from .cfg import ( # PipelineRunConfig,; PipelineScheduleConfig,; PipelineTrackerConfig,
35
- Config,
36
- PipelineConfig,
37
- )
38
- from .fs import get_filesystem
39
- from .fs.storage_options import BaseStorageOptions
40
- from .utils.misc import view_img
41
- from .utils.templates import HOOK_TEMPLATE__MQTT_BUILD_CONFIG, PIPELINE_PY_TEMPLATE
42
-
43
-
44
-
45
- if importlib.util.find_spec("apscheduler"):
46
- from .scheduler import SchedulerManager
47
- else:
48
- SchedulerManager = None
49
- from pathlib import Path
50
- from types import TracebackType
51
-
52
- # if importlib.util.find_spec("paho"):
53
- # from .mqtt import MQTTClient
54
- # else:
55
- # MQTTClient = None
56
- from munch import Munch
57
-
58
- from .utils.executor import get_executor
59
- from .utils.trigger import get_trigger # , ALL_TRIGGER_KWARGS
60
-
61
- from enum import Enum
62
-
63
- class HookType(str, Enum):
64
- MQTT_BUILD_CONFIG = "mqtt-build-config"
65
-
66
- def default_function_name(self) -> str:
67
- match self.value:
68
- case HookType.MQTT_BUILD_CONFIG:
69
- return self.value.replace("-", "_")
70
-
71
- class PipelineManager:
72
- def __init__(
73
- self,
74
- base_dir: str | None = None,
75
- storage_options: dict | Munch | BaseStorageOptions = {},
76
- fs: AbstractFileSystem | None = None,
77
- cfg_dir: str = "conf",
78
- pipelines_dir: str = "pipelines",
79
- telemetry: bool = True,
80
- ):
81
- """
82
- Initializes the Pipeline object.
83
-
84
- Args:
85
- base_dir (str | None): The flowerpower base path. Defaults to None.
86
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
87
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
88
-
89
- Returns:
90
- None
91
- """
92
- self._telemetry = telemetry
93
- self._base_dir = base_dir or str(Path.cwd())
94
- self._storage_options = storage_options or {}
95
- if fs is None:
96
- fs = get_filesystem(self._base_dir, **self._storage_options)
97
- self._fs = fs
98
-
99
- self._cfg_dir = cfg_dir
100
- self._pipelines_dir = pipelines_dir
101
-
102
- try:
103
- self._fs.makedirs(f"{self._cfg_dir}/pipelines", exist_ok=True)
104
- self._fs.makedirs(self._pipelines_dir, exist_ok=True)
105
- except Exception as e:
106
- logger.error(f"Error creating directories: {e}")
107
-
108
- self._sync_fs()
109
- self.load_config()
110
-
111
- def __enter__(self) -> "PipelineManager":
112
- return self
113
-
114
- def __exit__(
115
- self,
116
- exc_type: type[BaseException] | None,
117
- exc_val: BaseException | None,
118
- exc_tb: TracebackType | None,
119
- ) -> None:
120
- # Add any cleanup code here if needed
121
- pass
122
-
123
- def _get_schedules(self):
124
- with SchedulerManager(
125
- fs=self._fs,
126
- role="scheduler",
127
- ) as sm:
128
- return sm.get_schedules()
129
-
130
- def _sync_fs(self):
131
- """
132
- Sync the filesystem.
133
-
134
- Returns:
135
- None
136
- """
137
- if self._fs.is_cache_fs:
138
- self._fs.sync()
139
-
140
- modules_path = posixpath.join(self._fs.path, self._pipelines_dir)
141
- if modules_path not in sys.path:
142
- sys.path.append(modules_path)
143
-
144
- def load_module(self, name: str, reload: bool = False):
145
- """
146
- Load a module dynamically.
147
-
148
- Args:
149
- name (str): The name of the module to load.
150
-
151
- Returns:
152
- None
153
- """
154
- sys.path.append(posixpath.join(self._fs.path, self._pipelines_dir))
155
-
156
- if not hasattr(self, "_module"):
157
- self._module = importlib.import_module(name)
158
-
159
- else:
160
- if reload:
161
- importlib.reload(self._module)
162
-
163
- def load_config(self, name: str | None = None, reload: bool = False):
164
- """
165
- Load the configuration file.
166
-
167
- This method loads the configuration file specified by the `_cfg_dir` attribute and
168
- assigns it to the `cfg` attribute.
169
-
170
- Args:
171
- name (str | None, optional): The name of the pipeline. Defaults to None.
172
-
173
- Returns:
174
- None
175
- """
176
- if reload:
177
- del self.cfg
178
- self.cfg = Config.load(base_dir=self._base_dir, pipeline_name=name, fs=self._fs)
179
-
180
- def _get_driver(
181
- self,
182
- name: str,
183
- executor: str | None = None,
184
- with_tracker: bool = False,
185
- with_opentelemetry: bool = False,
186
- with_progressbar: bool = False,
187
- config: dict = {},
188
- reload: bool = False,
189
- **kwargs,
190
- ) -> tuple[driver.Driver, Callable | None]:
191
- """
192
- Get the driver and shutdown function for a given pipeline.
193
-
194
- Args:
195
- name (str): The name of the pipeline.
196
- executor (str | None, optional): The executor to use. Defaults to None.
197
- with_tracker (bool, optional): Whether to use the tracker. Defaults to False.
198
- with_opentelemetry (bool, optional): Whether to use OpenTelemetry. Defaults to False.
199
- with_progressbar (bool, optional): Whether to use a progress bar. Defaults to False.
200
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
201
- Defaults to None.
202
- with_opentelemetry (bool, optional): Whether to use OpenTelemetry. Defaults to False.
203
- reload (bool, optional): Whether to reload the module. Defaults to False.
204
- **kwargs: Additional keyword arguments.
205
-
206
- Keyword Args:
207
- max_tasks (int, optional): The maximum number of tasks. Defaults to 20.
208
- num_cpus (int, optional): The number of CPUs. Defaults to 4.
209
- project_id (str, optional): The project ID for the tracker. Defaults to None.
210
- username (str, optional): The username for the tracker. Defaults to None.
211
- dag_name (str, optional): The DAG name for the tracker. Defaults to None.
212
- tags (str, optional): The tags for the tracker. Defaults to None.
213
- api_url (str, optional): The API URL for the tracker. Defaults to None.
214
- ui_url (str, optional): The UI URL for the tracker. Defaults to None.
215
-
216
- Returns:
217
- tuple[driver.Driver, Callable | None]: A tuple containing the driver and shutdown function.
218
- """
219
- if not self.cfg.pipeline.name == name or reload:
220
- self.load_config(name=name, reload=reload)
221
- if not hasattr(self, "_module") or reload:
222
- self.load_module(name=name, reload=reload)
223
- if self._telemetry:
224
- disable_telemetry()
225
-
226
- max_tasks = kwargs.pop("max_tasks", 20)
227
- num_cpus = kwargs.pop("num_cpus", 4)
228
- executor_, shutdown = get_executor(
229
- executor or "local", max_tasks=max_tasks, num_cpus=num_cpus
230
- )
231
- adapters = []
232
- if with_tracker:
233
- tracker_cfg = {
234
- **self.cfg.pipeline.tracker.to_dict(),
235
- **self.cfg.project.tracker.to_dict(),
236
- }
237
- tracker_kwargs = {
238
- key: kwargs.pop(key, None) or tracker_cfg.get(key, None)
239
- for key in tracker_cfg
240
- }
241
- tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
242
- tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
243
-
244
- if tracker_kwargs.get("project_id", None) is None:
245
- raise ValueError(
246
- "Please provide a project_id if you want to use the tracker"
247
- )
248
-
249
- tracker = HamiltonTracker(**tracker_kwargs)
250
- adapters.append(tracker)
251
-
252
- if with_opentelemetry and h_opentelemetry is not None:
253
- trace = init_tracer(
254
- host=kwargs.pop("host", "localhost"),
255
- port=kwargs.pop("port", 6831),
256
- name=f"{self.cfg.project.name}.{name}",
257
- )
258
- tracer = trace.get_tracer(__name__)
259
- adapters.append(h_opentelemetry.OpenTelemetryTracer(tracer=tracer))
260
-
261
- if with_progressbar:
262
- adapters.append(h_tqdm.ProgressBar(desc=f"{self.cfg.project.name}.{name}"))
263
-
264
- if executor == "future_adapter":
265
- adapters.append(FutureAdapter())
266
-
267
- dr = (
268
- driver.Builder()
269
- .enable_dynamic_execution(allow_experimental_mode=True)
270
- .with_modules(self._module)
271
- .with_config(config)
272
- .with_local_executor(executors.SynchronousLocalTaskExecutor())
273
- )
274
-
275
- if executor_ is not None:
276
-
277
- dr = dr.with_remote_executor(executor_)
278
-
279
- if len(adapters):
280
- dr = dr.with_adapters(*adapters)
281
-
282
- dr = dr.build()
283
- return dr, shutdown
284
-
285
- def run(
286
- self,
287
- name: str,
288
- inputs: dict | None = None,
289
- final_vars: list | None = None,
290
- config: dict | None = None,
291
- executor: str | None = None,
292
- with_tracker: bool | None = None,
293
- with_opentelemetry: bool | None = None,
294
- with_progressbar: bool | None = None,
295
- reload: bool = False,
296
- **kwargs,
297
- ) -> dict[str, Any]:
298
- """
299
- Run the pipeline with the given parameters.
300
-
301
- Args:
302
- name (str): The name of the pipeline.
303
- executor (str | None, optional): The executor to use. Defaults to None.
304
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
305
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
306
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
307
- Defaults to None.
308
- with_tracker (bool | None, optional): Whether to use a tracker. Defaults to None.
309
- with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry. Defaults to None.
310
- with_progressbar (bool | None, optional): Whether to use a progress bar. Defaults to None.
311
- reload (bool, optional): Whether to reload the pipeline. Defaults to False.
312
- **kwargs: Additional keyword arguments.
313
-
314
- Returns:
315
- dict[str,Any]: The result of executing the pipeline.
316
-
317
- Examples:
318
- ```python
319
- pm = PipelineManager()
320
- final_vars = pm.run("my_pipeline")
321
- ```
322
- """
323
- if not self.cfg.pipeline.name == name or reload:
324
- self.load_config(name=name, reload=reload)
325
-
326
- if reload or not hasattr(self, "_module"):
327
- self.load_module(name=name, reload=reload)
328
-
329
- logger.info(
330
- f"Starting pipeline {self.cfg.project.name}.{name}"
331
- ) # in environment {environment}")
332
-
333
- run_params = self.cfg.pipeline.run
334
-
335
- final_vars = final_vars or run_params.final_vars
336
- inputs = {
337
- **(run_params.inputs or {}),
338
- **(inputs or {}),
339
- } # <-- inputs override and adds to run_params
340
- config = {
341
- **(run_params.config or {}),
342
- **(config or {}),
343
- }
344
- for arg in [
345
- "executor",
346
- "with_tracker",
347
- "with_opentelemetry",
348
- "with_progressbar",
349
- ]:
350
- if eval(arg) is not None:
351
- kwargs[arg] = eval(arg)
352
- else:
353
- kwargs[arg] = getattr(run_params, arg)
354
-
355
- kwargs["config"] = config
356
-
357
- dr, shutdown = self._get_driver(
358
- name=name,
359
- **kwargs,
360
- )
361
-
362
- res = dr.execute(final_vars=final_vars, inputs=inputs)
363
-
364
- logger.success(f"Finished pipeline {self.cfg.project.name}.{name}")
365
-
366
- if shutdown is not None:
367
- shutdown()
368
-
369
- return res
370
-
371
- def run_job(
372
- self,
373
- name: str,
374
- inputs: dict | None = None,
375
- final_vars: list | None = None,
376
- config: dict | None = None,
377
- executor: str | None = None,
378
- with_tracker: bool | None = None,
379
- with_opentelemetry: bool | None = None,
380
- with_progressbar: bool | None = None,
381
- reload: bool = False,
382
- **kwargs,
383
- ) -> dict[str, Any]:
384
- """
385
- Add a job to run the pipeline with the given parameters to the worker.
386
- Executes the job immediatly and returns the result of the execution.
387
-
388
- Args:
389
- name (str): The name of the job.
390
- executor (str | None, optional): The executor to use for the job. Defaults to None.
391
- inputs (dict | None, optional): The inputs for the job. Defaults to None.
392
- final_vars (list | None, optional): The final variables for the job. Defaults to None.
393
- config (dict | None, optional): The configuration for the job. Defaults to None.
394
- with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
395
- with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
396
- with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
397
- reload (bool, optional): Whether to reload the job. Defaults to False.
398
- **kwargs: Additional keyword arguments.
399
-
400
- Returns:
401
- dict[str,Any]: The result of the job execution.
402
-
403
- Examples:
404
- ```python
405
- pm = PipelineManager()
406
- final_vars = pm.run_job("my_job")
407
- ```
408
- """
409
- if SchedulerManager is None:
410
- raise ValueError(
411
- "APScheduler4 not installed. Please install it first. "
412
- "Run `pip install 'flowerpower[scheduler]'`."
413
- )
414
-
415
- with SchedulerManager(
416
- name=f"{self.cfg.project.name}.{name}",
417
- fs=self._fs,
418
- role="scheduler",
419
- ) as sm:
420
- kwargs.update(
421
- {
422
- arg: eval(arg)
423
- for arg in [
424
- "name",
425
- "inputs",
426
- "final_vars",
427
- "config",
428
- "executor",
429
- "with_tracker",
430
- "with_opentelemetry",
431
- "with_progressbar",
432
- "reload",
433
- ]
434
- }
435
- )
436
- return sm.run_job(
437
- self.run,
438
- kwargs=kwargs,
439
- job_executor=(
440
- executor
441
- if executor in ["async", "threadpool", "processpool", ""]
442
- else "threadpool" if executor == "future_adapter" else "threadpool"
443
- ),
444
- )
445
-
446
- def add_job(
447
- self,
448
- name: str,
449
- inputs: dict | None = None,
450
- final_vars: list | None = None,
451
- config: dict | None = None,
452
- executor: str | None = None,
453
- with_tracker: bool | None = None,
454
- with_opentelemetry: bool | None = None,
455
- with_progressbar: bool | None = None,
456
- reload: bool = False,
457
- result_expiration_time: float | dt.timedelta = 0,
458
- **kwargs,
459
- ) -> UUID:
460
- """
461
- Add a job to run the pipeline with the given parameters to the worker data store.
462
- Executes the job immediatly and returns the job id (UUID). The job result will be stored in the data store
463
- for the given `result_expiration_time` and can be fetched using the job id (UUID).
464
-
465
- Args:
466
- name (str): The name of the job.
467
- executor (str | None, optional): The executor for the job. Defaults to None.
468
- inputs (dict | None, optional): The inputs for the job. Defaults to None.
469
- final_vars (list | None, optional): The final variables for the job. Defaults to None.
470
- config (dict | None, optional): The configuration for the job. Defaults to None.
471
- with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
472
- with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
473
- with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
474
- reload (bool, optional): Whether to reload the job. Defaults to False.
475
- result_expiration_time (float | dt.timedelta | None, optional): The result expiration time for the job.
476
- Defaults to None.
477
- **kwargs: Additional keyword arguments.
478
-
479
- Returns:
480
- UUID: The UUID of the added job.
481
-
482
- Examples:
483
- ```python
484
- pm = PipelineManager()
485
- job_id = pm.add_job("my_job")
486
- ```
487
- """
488
- if SchedulerManager is None:
489
- raise ValueError(
490
- "APScheduler4 not installed. Please install it first. "
491
- "Run `pip install 'flowerpower[scheduler]'`."
492
- )
493
-
494
- with SchedulerManager(
495
- name=f"{self.cfg.project.name}.{name}",
496
- fs=self._fs,
497
- role="scheduler",
498
- ) as sm:
499
- kwargs.update(
500
- {
501
- arg: eval(arg)
502
- for arg in [
503
- "name",
504
- "inputs",
505
- "final_vars",
506
- "config",
507
- "executor",
508
- "with_tracker",
509
- "with_opentelemetry",
510
- "with_progressbar",
511
- "reload",
512
- ]
513
- }
514
- )
515
- id_ = sm.add_job(
516
- self.run,
517
- kwargs=kwargs,
518
- job_executor=(
519
- executor
520
- if executor in ["async", "threadpool", "processpool", ""]
521
- else "threadpool" if executor == "future_adapter" else "threadpool"
522
- ),
523
- result_expiration_time=result_expiration_time,
524
- )
525
- rich.print(
526
- f"✅ Successfully added job for "
527
- f"[blue]{self.cfg.project.name}.{name}[/blue] with ID [green]{id_}[/green]"
528
- )
529
- return id_
530
-
531
- def schedule(
532
- self,
533
- name: str,
534
- inputs: dict | None = None,
535
- final_vars: list | None = None,
536
- config: dict | None = None,
537
- executor: str | None = None,
538
- with_tracker: bool | None = None,
539
- with_opentelemetry: bool | None = None,
540
- with_progressbar: bool | None = None,
541
- trigger_type: str | None = None,
542
- id_: str | None = None,
543
- paused: bool = False,
544
- coalesce: str = "latest",
545
- misfire_grace_time: float | dt.timedelta | None = None,
546
- max_jitter: float | dt.timedelta | None = None,
547
- max_running_jobs: int | None = None,
548
- conflict_policy: str = "do_nothing",
549
- overwrite: bool = False,
550
- **kwargs,
551
- ) -> str:
552
- """
553
- Schedule a pipeline for execution.
554
-
555
- Args:
556
- name (str): The name of the pipeline.
557
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
558
- trigger_type (str | None, optional): The type of trigger for the pipeline. Defaults to None.
559
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
560
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
561
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
562
- Defaults to None.
563
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
564
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
565
- Defaults to None.
566
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
567
- id_ (str | None, optional): The ID of the scheduled pipeline. Defaults to None.
568
- paused (bool, optional): Whether the pipeline should be initially paused. Defaults to False.
569
- coalesce (str, optional): The coalesce strategy for the pipeline. Defaults to "latest".
570
- misfire_grace_time (float | dt.timedelta | None, optional): The grace time for misfired jobs.
571
- Defaults to None.
572
- max_jitter (float | dt.timedelta | None, optional): The maximum number of seconds to randomly add to the
573
- scheduled. Defaults to None.
574
- max_running_jobs (int | None, optional): The maximum number of running jobs for the pipeline.
575
- Defaults to None.
576
- conflict_policy (str, optional): The conflict policy for the pipeline. Defaults to "do_nothing".
577
- job_result_expiration_time (float | dt.timedelta | None, optional): The result expiration time for the job.
578
- Defaults to None.
579
- overwrite (bool, optional): Whether to overwrite an existing schedule with the same name. Defaults to False.
580
- **kwargs: Additional keyword arguments for the trigger.
581
-
582
- Returns:
583
- str: The ID of the scheduled pipeline.
584
-
585
- Raises:
586
- ValueError: If APScheduler4 is not installed.
587
-
588
- Examples:
589
- ```python
590
- pm = PipelineManager()
591
- schedule_id = pm.schedule("my_pipeline")
592
- ```
593
- """
594
- if SchedulerManager is None:
595
- raise ValueError(
596
- "APScheduler4 not installed. Please install it first. "
597
- "Run `pip install 'flowerpower[scheduler]'`."
598
- )
599
-
600
- if not self.cfg.pipeline.name == name:
601
- self.load_config(name=name)
602
-
603
- schedule_cfg = self.cfg.pipeline.schedule # .copy()
604
- run_cfg = self.cfg.pipeline.run
605
-
606
- kwargs.update(
607
- {arg: eval(arg) or getattr(run_cfg, arg) for arg in run_cfg.to_dict()}
608
- )
609
- trigger_type = trigger_type or schedule_cfg.trigger.type_
610
-
611
- trigger_kwargs = {
612
- key: kwargs.pop(key, None)
613
- or getattr(getattr(schedule_cfg.trigger, trigger_type), key)
614
- for key in getattr(schedule_cfg.trigger, trigger_type).to_dict()
615
- }
616
-
617
- trigger_kwargs.pop("type_", None)
618
-
619
- schedule_kwargs = {
620
- arg: eval(arg) or getattr(schedule_cfg.run, arg)
621
- for arg in schedule_cfg.run.to_dict()
622
- }
623
- executor = executor or schedule_cfg.run.executor
624
- # id_ = id_ or schedule_cfg.run.id_
625
-
626
- def _get_id() -> str:
627
- if id_:
628
- return id_
629
-
630
- if overwrite:
631
- return f"{name}-1"
632
-
633
- ids = [schedule.id for schedule in self._get_schedules()]
634
- if any([name in id_ for id_ in ids]):
635
- id_num = sorted([id_ for id_ in ids if name in id_])[-1].split("-")[-1]
636
- return f"{name}-{int(id_num) + 1}"
637
- return f"{name}-1"
638
-
639
- id_ = _get_id()
640
-
641
- schedule_kwargs.pop("executor", None)
642
- schedule_kwargs.pop("id_", None)
643
-
644
- with SchedulerManager(
645
- name=f"{self.cfg.project.name}.{name}",
646
- fs=self._fs,
647
- role="scheduler",
648
- ) as sm:
649
- trigger = get_trigger(type_=trigger_type, **trigger_kwargs)
650
-
651
- if overwrite:
652
- sm.remove_schedule(id_)
653
-
654
- id_ = sm.add_schedule(
655
- func_or_task_id=self.run,
656
- trigger=trigger,
657
- id=id_,
658
- args=(name,), # inputs, final_vars, config, executor, with_tracker),
659
- kwargs=kwargs,
660
- job_executor=(
661
- executor
662
- if executor in ["async", "threadpool", "processpool", ""]
663
- else "threadpool" if executor == "future_adapter" else "threadpool"
664
- ),
665
- **schedule_kwargs,
666
- )
667
- rich.print(
668
- f"✅ Successfully added schedule for "
669
- f"[blue]{self.cfg.project.name}.{name}[/blue] with ID [green]{id_}[/green]"
670
- )
671
- return id_
672
-
673
- def schedule_all(
674
- self,
675
- inputs: dict | None = None,
676
- final_vars: list | None = None,
677
- config: dict | None = None,
678
- executor: str | None = None,
679
- with_tracker: bool | None = None,
680
- with_opentelemetry: bool | None = None,
681
- with_progressbar: bool | None = None,
682
- trigger_type: str | None = None,
683
- id_: str | None = None,
684
- paused: bool = False,
685
- coalesce: str = "latest",
686
- misfire_grace_time: float | dt.timedelta | None = None,
687
- max_jitter: float | dt.timedelta | None = None,
688
- max_running_jobs: int | None = None,
689
- conflict_policy: str = "do_nothing",
690
- overwrite: bool = False,
691
- **kwargs,
692
- ):
693
- pipelines = self._get_names()
694
- for name in pipelines:
695
- self.schedule(
696
- name=name,
697
- inputs=inputs,
698
- final_vars=final_vars,
699
- config=config,
700
- executor=executor,
701
- with_tracker=with_tracker,
702
- with_opentelemetry=with_opentelemetry,
703
- with_progressbar=with_progressbar,
704
- trigger_type=trigger_type,
705
- id_=id_,
706
- paused=paused,
707
- coalesce=coalesce,
708
- misfire_grace_time=misfire_grace_time,
709
- max_jitter=max_jitter,
710
- max_running_jobs=max_running_jobs,
711
- conflict_policy=conflict_policy,
712
- overwrite=overwrite,
713
- **kwargs,
714
- )
715
-
716
- def new(
717
- self,
718
- name: str,
719
- overwrite: bool = False,
720
- ):
721
- """
722
- Adds a pipeline with the given name.
723
-
724
- Args:
725
- name (str | None, optional): The name of the pipeline.
726
- Defaults to None.
727
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name. Defaults to False.
728
-
729
- Returns:
730
- None
731
-
732
- Raises:
733
- ValueError: If the configuration path or pipeline path does not exist.
734
-
735
- Examples:
736
- ```python
737
- pm = PipelineManager()
738
- pm.new("my_pipeline")
739
- ```
740
- """
741
- if not self._fs.exists(self._cfg_dir):
742
- raise ValueError(
743
- f"Configuration path {self._cfg_dir} does not exist. Please run flowerpower init first."
744
- )
745
- if not self._fs.exists(self._pipelines_dir):
746
- raise ValueError(
747
- f"Pipeline path {self._pipelines_dir} does not exist. Please run flowerpower init first."
748
- )
749
-
750
- if self._fs.exists(f"{self._pipelines_dir}/{name.replace('.', '/')}.py"):
751
- if overwrite:
752
- self._fs.rm(f"{self._pipelines_dir}/{name.replace('.', '/')}.py")
753
- else:
754
- raise ValueError(
755
- f"Pipeline {self.cfg.project.name}.{name.replace('.', '/')} already exists. "
756
- "Use `overwrite=True` to overwrite."
757
- )
758
- if self._fs.exists(f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml"):
759
- if overwrite:
760
- self._fs.rm(f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml")
761
- else:
762
- raise ValueError(
763
- f"Pipeline {self.cfg.project.name}.{name.replace('.', '/')} already exists. "
764
- "Use `overwrite=True` to overwrite."
765
- )
766
- if self._fs.exists(f"hooks/{name.replace(".", "/")}"):
767
- if overwrite:
768
- self._fs.rm(f"hooks/{name.replace(".", "/")}", recursive=True) #Delete all hooks in the folder
769
- else:
770
- raise ValueError(
771
- f"Pipeline {self.cfg.project.name}.{name.replace(".", "/")} alreads exists. "
772
- "Use `overwrite=True`to overwrite."
773
- )
774
-
775
- pipeline_path = f"{self._pipelines_dir}/{name.replace('.', '/')}.py"
776
- cfg_path = f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml"
777
- hook_path = f"hooks/{name.replace(".", "/")}"
778
-
779
- self._fs.makedirs(pipeline_path.rsplit("/", 1)[0], exist_ok=True)
780
- self._fs.makedirs(cfg_path.rsplit("/", 1)[0], exist_ok=True)
781
- self._fs.makedirs(hook_path, exist_ok=True)
782
-
783
- with self._fs.open(pipeline_path, "w") as f:
784
- f.write(
785
- PIPELINE_PY_TEMPLATE.format(
786
- name=name,
787
- date=dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
788
- )
789
- )
790
-
791
- self.cfg.pipeline = PipelineConfig(name=name)
792
- self.cfg.save()
793
-
794
- rich.print(
795
- f"🔧 Created new pipeline [bold blue]{self.cfg.project.name}.{name}[/bold blue]"
796
- )
797
-
798
- def import_pipeline(
799
- self,
800
- name: str,
801
- path: str,
802
- cfg_dir: str = "conf",
803
- pipelines_dir: str = "pipelines",
804
- storage_options: dict | Munch | BaseStorageOptions | None = None,
805
- fs: AbstractFileSystem | None = None,
806
- overwrite: bool = False,
807
- ):
808
- """Import a pipeline from a given path.
809
-
810
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
811
- Any readable fsspec filesystem is supported.
812
-
813
- Args:
814
- name (str): The name of the pipeline.
815
- path (str): The path to import the pipeline from.
816
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
817
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
818
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
819
- Defaults to None.
820
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
821
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
822
- Defaults to False.
823
-
824
- Examples:
825
- ```python
826
- pm = PipelineManager()
827
- pm.import(
828
- "s3://bucket/path",
829
- "my_pipeline",
830
- storage_options={
831
- "key": "my_key",
832
- "secret": "my_secret",
833
- "endpoint_url":"http://minio:9000"
834
- }
835
- )
836
- ```
837
- """
838
- if fs is not None:
839
- fs = get_filesystem(path, fs=fs)
840
- else:
841
- fs = get_filesystem(path, **storage_options)
842
-
843
- conf_path = f"{fs.fs.protocol}://{fs.path}/{cfg_dir}"
844
- pipeline_path = f"{fs.fs.protocol}://{fs.path}/{pipelines_dir}"
845
- if not fs.exists(cfg_dir):
846
- raise ValueError(f"Configuration path {conf_path} does not exist.")
847
- if not fs.exists(pipelines_dir):
848
- raise ValueError(f"Pipeline path {pipeline_path} does not exist.")
849
-
850
- if self._fs.exists(f"{pipelines_dir}/{name.replace('.', '/')}.py"):
851
- if overwrite:
852
- self._fs.rm(f"{pipelines_dir}/{name.replace('.', '/')}.py")
853
- else:
854
- raise ValueError(
855
- f"Pipeline {name} already exists at {self._fs.fs.protocol}://{fs.path}. "
856
- "Use `overwrite=True` to overwrite."
857
- )
858
- if self._fs.exists(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml"):
859
- if overwrite:
860
- self._fs.rm(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml")
861
- else:
862
- raise ValueError(
863
- f"Pipeline {name} already exists at {self._fs.fs.protocol}://{fs.path}. "
864
- "Use `overwrite=True` to overwrite."
865
- )
866
-
867
- self._fs.write_bytes(
868
- f"{self._pipelines_dir}/{name.replace('.', '/')}.py",
869
- fs.read_bytes(f"{pipelines_dir}/{name.replace('.', '/')}.py"),
870
- )
871
- self._fs.write_bytes(
872
- f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
873
- fs.read_bytes(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml"),
874
- )
875
- # fs.get(
876
- # f"{pipelines_dir}/{name.replace('.', '/')}.py",
877
- # f"{self._pipelines_dir}/{name.replace('.', '/')}.py",
878
- # )
879
- # fs.get(
880
- # f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
881
- # f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
882
- # )
883
-
884
- rich.print(
885
- f"🔧 Imported pipeline [bold blue]{name}[/bold blue] from {fs.fs.protocol}://{fs.path}"
886
- )
887
-
888
- def import_many(
889
- self,
890
- names: list[str],
891
- path: str,
892
- cfg_dir: str = "conf",
893
- pipelines_dir: str = "pipelines",
894
- storage_options: dict | Munch | BaseStorageOptions | None = None,
895
- fs: AbstractFileSystem | None = None,
896
- overwrite: bool = False,
897
- ):
898
- """Import many pipelines from a given path.
899
-
900
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
901
- Any readable fsspec filesystem is supported.
902
-
903
- Args:
904
- names (list[str]): The names of the pipelines.
905
- path (str): The path to import the pipelines from.
906
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
907
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
908
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
909
- Defaults to None.
910
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
911
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
912
- Defaults to False.
913
-
914
- Examples:
915
- ```python
916
- pm = PipelineManager()
917
- pm.import_many(
918
- "s3://bucket/path",
919
- ["my_pipeline", "my_pipeline2"],
920
- storage_options={
921
- "key": "my_key",
922
- "secret": "my_secret",
923
- "endpoint_url":"http://minio:9000"
924
- }
925
- )
926
- ```
927
- """
928
- for name in names:
929
- self.import_pipeline(
930
- path=path,
931
- name=name,
932
- cfg_dir=cfg_dir,
933
- pipelines_dir=pipelines_dir,
934
- storage_options=storage_options,
935
- fs=fs,
936
- overwrite=overwrite,
937
- )
938
-
939
- def import_all(
940
- self,
941
- path: str,
942
- cfg_dir: str = "conf",
943
- pipelines_dir: str = "pipelines",
944
- storage_options: dict | Munch | BaseStorageOptions | None = None,
945
- fs: AbstractFileSystem | None = None,
946
- overwrite: bool = False,
947
- ):
948
- """Import all pipelines from a given path.
949
-
950
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
951
- Any readable fsspec filesystem is supported.
952
-
953
- Args:
954
- path (str): The path to import the pipelines from.
955
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
956
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
957
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
958
- Defaults to None.
959
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
960
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
961
- Defaults to False.
962
-
963
- Examples:
964
- ```python
965
- pm = PipelineManager()
966
- pm.import_all(
967
- "s3://bucket/path",
968
- storage_options={
969
- "key": "my_key",
970
- "secret": "my_secret",
971
- endpoint_url="http://minio:9000"
972
- }
973
- )
974
- ```
975
- """
976
- names = [
977
- fn.replace(pipelines_dir, "").lstrip("/").rstric(".py").replace("/", ".")
978
- for fn in fs.glob(f"{pipelines_dir}/**/*.py")
979
- ]
980
- self.import_many(
981
- path=path,
982
- names=names,
983
- cfg_dir=cfg_dir,
984
- pipelines_dir=pipelines_dir,
985
- storage_options=storage_options,
986
- fs=fs,
987
- overwrite=overwrite,
988
- )
989
-
990
- def export(
991
- self,
992
- name: str,
993
- path: str,
994
- cfg_dir: str = "conf",
995
- pipelines_dir: str = "pipelines",
996
- storage_options: dict | Munch | BaseStorageOptions | None = None,
997
- fs: AbstractFileSystem | None = None,
998
- overwrite: bool = False,
999
- ):
1000
- """Export a pipeline to a given path.
1001
-
1002
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
1003
- Any writable fsspec filesystem is supported.
1004
-
1005
- Args:
1006
- name (str): The name of the pipeline.
1007
- path (str): The path to export the pipeline to.
1008
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
1009
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
1010
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
1011
- Defaults to None.
1012
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1013
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
1014
- Defaults to False.
1015
-
1016
- Examples:
1017
- ```python
1018
- pm = PipelineManager()
1019
- pm.export(
1020
- "my_pipeline",
1021
- "s3://bucket/path",
1022
- storage_options={
1023
- "key": "my_key",
1024
- "secret": "my_secret",
1025
- "endpoint_url":"http://minio:9000"
1026
- }
1027
- )
1028
- ```
1029
- """
1030
- fs = fs or get_filesystem(path, **storage_options)
1031
-
1032
- if fs.exists(posixpath.join(pipelines_dir, name.replace(".", "/") + ".py")):
1033
- if overwrite:
1034
- fs.rm(posixpath.join(pipelines_dir, name.replace(".", "/") + ".py"))
1035
- else:
1036
- raise ValueError(
1037
- f"Pipeline {name} already exists at {fs.fs.protocol}://{fs.path}. "
1038
- "Use `overwrite=True` to overwrite."
1039
- )
1040
- if fs.exists(
1041
- posixpath.join(cfg_dir, "pipelines", name.replace(".", "/") + ".yml")
1042
- ):
1043
- if overwrite:
1044
- fs.rm(
1045
- posixpath.join(
1046
- cfg_dir, "pipelines", name.replace(".", "/") + ".yml"
1047
- )
1048
- )
1049
- else:
1050
- raise ValueError(
1051
- f"Pipeline {name} already exists at {fs.fs.protocol}://{fs.path}. "
1052
- "Use `overwrite=True` to overwrite."
1053
- )
1054
-
1055
- fs.put_file(
1056
- posixpath.join(self._pipelines_dir, name.replace(".", "/") + ".py"),
1057
- posixpath.join(pipelines_dir, name.replace(".", "/") + ".py"),
1058
- )
1059
-
1060
- fs.put_file(
1061
- posixpath.join(self._cfg_dir, "pipelines", name.replace(".", "/") + ".yml"),
1062
- posixpath.join(cfg_dir, "pipelines", name.replace(".", "/") + ".yml"),
1063
- )
1064
-
1065
- rich.print(
1066
- f"🔧 Exported pipeline [bold blue]{name}[/bold blue] to {fs.fs.protocol}://{fs.path}"
1067
- )
1068
-
1069
- def export_many(
1070
- self,
1071
- path: str,
1072
- names: list[str],
1073
- cfg_dir: str = "conf",
1074
- pipelines_dir: str = "pipelines",
1075
- storage_options: dict | Munch | BaseStorageOptions | None = None,
1076
- fs: AbstractFileSystem | None = None,
1077
- overwrite: bool = False,
1078
- ):
1079
- """Export many pipelines to a given path.
1080
-
1081
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
1082
- Any writable fsspec filesystem is supported.
1083
-
1084
- Args:
1085
- path (str): The path to export the pipelines to.
1086
- names (list[str]): The names of the pipelines.
1087
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
1088
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines
1089
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
1090
- Defaults to None.
1091
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1092
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
1093
- Defaults to False.
1094
-
1095
- Examples:
1096
- ```python
1097
- pm = PipelineManager()
1098
- pm.export_many(
1099
- "s3://bucket/path",
1100
- ["my_pipeline", "my_pipeline2"],
1101
- storage_options={
1102
- "key": "my_key",
1103
- "secret": "my_secret",
1104
- "endpoint_url":"http://minio:9000"
1105
- }
1106
- )
1107
- """
1108
- for name in names:
1109
- self.export(
1110
- path=path,
1111
- name=name,
1112
- cfg_dir=cfg_dir,
1113
- pipelines_dir=pipelines_dir,
1114
- storage_options=storage_options,
1115
- fs=fs,
1116
- overwrite=overwrite,
1117
- )
1118
-
1119
- def export_all(
1120
- self,
1121
- path: str,
1122
- cfg_dir: str = "conf",
1123
- pipelines_dir: str = "pipelines",
1124
- storage_options: dict | Munch | BaseStorageOptions | None = None,
1125
- fs: AbstractFileSystem | None = None,
1126
- overwrite: bool = False,
1127
- ):
1128
- """Export all pipelines to a given path.
1129
-
1130
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
1131
- Any writable fsspec filesystem is supported.
1132
-
1133
- Args:
1134
- path (str): The path to export the pipelines to.
1135
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
1136
- Defaults to None.
1137
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
1138
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
1139
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1140
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
1141
- Defaults to False.
1142
-
1143
- Examples:
1144
- ```python
1145
- pm = PipelineManager()
1146
- pm.export_all(
1147
- "s3://bucket/path",
1148
- storage_options={
1149
- "key": "my_key",
1150
- "secret": "my_secret",
1151
- "endpoint_url":"http://minio:9000"
1152
- }
1153
- )
1154
- """
1155
- names = [
1156
- fn.replace(self._pipelines_dir, "")
1157
- .lstrip("/")
1158
- .rstric(".py")
1159
- .replace("/", ".")
1160
- for fn in self._fs.glob(f"{self._pipelines_dir}/**/*.py")
1161
- ]
1162
- self.export_many(
1163
- path=path,
1164
- names=names,
1165
- cfg_dir=cfg_dir,
1166
- pipelines_dir=pipelines_dir,
1167
- storage_options=storage_options,
1168
- fs=fs,
1169
- overwrite=overwrite,
1170
- )
1171
-
1172
- def delete(self, name: str, cfg: bool = True, module: bool = False, hooks: bool = True):
1173
- """
1174
- Delete a pipeline.
1175
-
1176
- Args:
1177
- name (str): The name of the pipeline to delete.
1178
- cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
1179
- module (bool, optional): Whether to delete the pipeline module file. Defaults to False.
1180
- hooks (bool, optional): Whether to delete the pipeline's hooks. Defaults to True.
1181
-
1182
- Returns:
1183
- None
1184
-
1185
- Examples:
1186
- ```python
1187
- pm = PipelineManager()
1188
- pm.delete("my_pipeline")
1189
- ```
1190
- """
1191
-
1192
- if cfg:
1193
- if self._fs.exists(f"{self._cfg_dir}/pipelines/{name}.yml"):
1194
- self._fs.rm(f"{self._cfg_dir}/pipelines/{name}.yml")
1195
- rich.print(f"🗑️ Deleted pipeline config for {name}")
1196
-
1197
- if module:
1198
- if self._fs.exists(f"{self._pipelines_dir}/{name}.py"):
1199
- self._fs.rm(f"{self._pipelines_dir}/{name}.py")
1200
- rich.print(
1201
- f"🗑️ Deleted pipeline module for {self.cfg.project.name}.{name}"
1202
- )
1203
-
1204
- if hooks:
1205
- if self._fs.exists(f"hooks/{name}/"):
1206
- self._fs.rm(f"hooks/{name}/", recursive=True)
1207
- rich.print(
1208
- f"🗑️ Deleted pipeline hooks for {self.cfg.project.name}.{name}"
1209
- )
1210
-
1211
-
1212
- def _display_all_function(self, name: str, reload: bool = True, config: dict | None = None):
1213
- dr, _ = self._get_driver(
1214
- name=name, executor=None, with_tracker=False, reload=reload, config=config
1215
- )
1216
- return dr.display_all_functions()
1217
-
1218
- def save_dag(
1219
- self,
1220
- name: str,
1221
- format: str = "png",
1222
- reload: bool = False,
1223
- config: dict | None = None,
1224
- ):
1225
- """
1226
- Save a image of the graph of functions for a given name.
1227
-
1228
- Args:
1229
- name (str): The name of the graph.
1230
- format (str, optional): The format of the graph file. Defaults to "png".
1231
- reload (bool, optional): Whether to reload the graph data. Defaults to False.
1232
-
1233
- Returns:
1234
- None
1235
-
1236
- Examples:
1237
- ```python
1238
- pm = PipelineManager()
1239
- pm.save_dag("my_pipeline")
1240
- ```
1241
- """
1242
- dag = self._display_all_function(name=name, reload=reload, config=config)
1243
-
1244
- self._fs.makedirs("graphs", exist_ok=True)
1245
- dag.render(
1246
- posixpath.join(self._base_dir, f"graphs/{name}"),
1247
- format=format,
1248
- cleanup=True,
1249
- )
1250
- rich.print(
1251
- f"📊 Saved graph for {name} to {self._base_dir}/graphs/{name}.{format}"
1252
- )
1253
-
1254
- def show_dag(
1255
- self,
1256
- name: str,
1257
- format: str = "png",
1258
- reload: bool = False,
1259
- raw: bool = False,
1260
- config: dict | None = None,
1261
- ):
1262
- """
1263
- Display the graph of functions for a given name. By choosing the `raw` option, the graph object is returned.
1264
- The choosen format defines, which application is used to display the graph.
1265
-
1266
- Args:
1267
- name (str): The name of the graph.
1268
- format (str, optional): The format of the graph file. Defaults to "png".
1269
- show (bool, optional): Whether to open the graph file after generating it. Defaults to False.
1270
- reload (bool, optional): Whether to reload the graph data. Defaults to False.
1271
- raw (bool, optional): Whether to return the graph object. Defaults to False.
1272
-
1273
- Returns:
1274
- graph: The generated graph object.
1275
-
1276
- Examples:
1277
- ```python
1278
- pm = PipelineManager()
1279
- pm.show_dag("my_pipeline")
1280
- ```
1281
- """
1282
- dag = self._display_all_function(name=name, reload=reload, config=config)
1283
- if raw:
1284
- return dag
1285
- view_img(dag.pipe(format), format=format)
1286
-
1287
- def _get_files(self) -> list[str]:
1288
- """
1289
- Get the pipeline files.
1290
-
1291
- Returns:
1292
- list[str]: A list of pipeline files.
1293
- """
1294
- return [f for f in self._fs.ls(self._pipelines_dir) if f.endswith(".py")]
1295
-
1296
- def _get_names(self) -> list[str]:
1297
- """
1298
- Get the pipeline names.
1299
-
1300
- Returns:
1301
- list[str]: A list of pipeline names.
1302
- """
1303
- return [posixpath.splitext(posixpath.basename(f))[0] for f in self._get_files()]
1304
-
1305
- def get_summary(
1306
- self, name: str | None = None, cfg: bool = True, module: bool = True
1307
- ) -> dict[str, dict | str]:
1308
- """
1309
- Get a summary of the pipelines.
1310
-
1311
- Args:
1312
- name (str | None, optional): The name of the pipeline. Defaults to None.
1313
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
1314
- module (bool, optional): Whether to show the module. Defaults to True.
1315
- Returns:
1316
- dict[str, dict | str]: A dictionary containing the pipeline summary.
1317
-
1318
- Examples:
1319
- ```python
1320
- pm = PipelineManager()
1321
- summary=pm.get_summary()
1322
- ```
1323
- """
1324
- if name:
1325
- pipeline_names = [name]
1326
- else:
1327
- pipeline_names = self._get_names()
1328
-
1329
- pipeline_summary = {}
1330
- for name in pipeline_names:
1331
- self.load_config(name)
1332
- if cfg:
1333
- pipeline_summary[name] = {"cfg": self.cfg.pipeline.to_dict()}
1334
- if module:
1335
- pipeline_summary[name].update(
1336
- {
1337
- "module": self._fs.cat(
1338
- f"{self._pipelines_dir}/{name}.py"
1339
- ).decode(),
1340
- }
1341
- )
1342
- return pipeline_summary
1343
-
1344
- def show_summary(
1345
- self,
1346
- name: str | None = None,
1347
- cfg: bool = True,
1348
- module: bool = True,
1349
- to_html: bool = False,
1350
- to_svg: bool = False,
1351
- ) -> None | str:
1352
- """
1353
- Show a summary of the pipelines.
1354
-
1355
- Args:
1356
- name (str | None, optional): The name of the pipeline. Defaults to None.
1357
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
1358
- module (bool, optional): Whether to show the module. Defaults to True.
1359
- to_html (bool, optional): Whether to export the summary to HTML. Defaults to False.
1360
- to_svg (bool, optional): Whether to export the summary to SVG. Defaults to False.
1361
-
1362
- Returns:
1363
- None | str: The summary of the pipelines. If `to_html` is True, returns the HTML string.
1364
- If `to_svg` is True, returns the SVG string.
1365
-
1366
- Examples:
1367
- ```python
1368
- pm = PipelineManager()
1369
- pm.show_summary()
1370
- ```
1371
- """
1372
-
1373
- pipeline_summary = self.get_summary(name=name, cfg=cfg, module=module)
1374
-
1375
- def add_dict_to_tree(tree, dict_data, style="green"):
1376
- for key, value in dict_data.items():
1377
- if isinstance(value, dict):
1378
- branch = tree.add(f"[cyan]{key}:", style="bold cyan")
1379
- add_dict_to_tree(branch, value, style)
1380
- else:
1381
- tree.add(f"[cyan]{key}:[/] [green]{value}[/]")
1382
-
1383
- console = Console()
1384
- for pipeline, info in pipeline_summary.items():
1385
- # Create tree for config
1386
- config_tree = Tree("📋 Configuration", style="bold magenta")
1387
- add_dict_to_tree(config_tree, info["cfg"])
1388
-
1389
- # Create syntax-highlighted code view
1390
- code_view = Syntax(
1391
- info["module"],
1392
- "python",
1393
- theme="default",
1394
- line_numbers=False,
1395
- word_wrap=True,
1396
- code_width=80,
1397
- padding=2,
1398
- )
1399
-
1400
- if cfg:
1401
- # console.print(f"🔄 Pipeline: {pipeline}", style="bold blue")
1402
- console.print(
1403
- Panel(
1404
- config_tree,
1405
- title=f"🔄 Pipeline: {pipeline}",
1406
- subtitle="Configuration",
1407
- border_style="blue",
1408
- padding=(2, 2),
1409
- )
1410
- )
1411
- console.print("\n")
1412
-
1413
- if module:
1414
- # console.print(f"🔄 Pipeline: {pipeline}", style="bold blue")
1415
- console.print(
1416
- Panel(
1417
- code_view,
1418
- title=f"🔄 Pipeline: {pipeline}",
1419
- subtitle="Module",
1420
- border_style="blue",
1421
- padding=(2, 2),
1422
- )
1423
- )
1424
- console.print("\n")
1425
- if to_html:
1426
- return console.export_html()
1427
- elif to_svg:
1428
- return console.export_svg()
1429
-
1430
- @property
1431
- def summary(self) -> dict[str, dict | str]:
1432
- """
1433
- Get a summary of the pipelines.
1434
-
1435
- Returns:
1436
- dict: A dictionary containing the pipeline summary.
1437
- """
1438
- return self.get_summary()
1439
-
1440
- def _all_pipelines(
1441
- self, show: bool = True, to_html: bool = False, to_svg: bool = False
1442
- ) -> list[str] | None:
1443
- """
1444
- Print all available pipelines in a formatted table.
1445
-
1446
- Args:
1447
- show (bool, optional): Whether to print the table. Defaults to True.
1448
- to_html (bool, optional): Whether to export the table to HTML. Defaults to False.
1449
- to_svg (bool, optional): Whether to export the table to SVG. Defaults to False.
1450
-
1451
- Returns:
1452
- list[str] | None: A list of pipeline names if `show` is False.
1453
-
1454
- Examples:
1455
- ```python
1456
- pm = PipelineManager()
1457
- all_pipelines = pm._pipelines(show=False)
1458
- ```
1459
- """
1460
- if to_html or to_svg:
1461
- show = True
1462
-
1463
- pipeline_files = [
1464
- f for f in self._fs.ls(self._pipelines_dir) if f.endswith(".py")
1465
- ]
1466
- pipeline_names = [
1467
- posixpath.splitext(f)[0]
1468
- .replace(self._pipelines_dir, "")
1469
- .lstrip("/")
1470
- .replace("/", ".")
1471
- for f in pipeline_files
1472
- ]
1473
-
1474
- if not pipeline_files:
1475
- rich.print("[yellow]No pipelines found[/yellow]")
1476
- return
1477
-
1478
- pipeline_info = []
1479
-
1480
- for path, name in zip(pipeline_files, pipeline_names):
1481
- # path = posixpath.join( f)
1482
- try:
1483
- mod_time = self._fs.modified(path).strftime("%Y-%m-%d %H:%M:%S")
1484
- except NotImplementedError:
1485
- mod_time = "N/A"
1486
- size = f"{self._fs.size(path) / 1024:.1f} KB"
1487
- pipeline_info.append(
1488
- {"name": name, "path": path, "mod_time": mod_time, "size": size}
1489
- )
1490
-
1491
- if show:
1492
- table = Table(title="Available Pipelines")
1493
- table.add_column("Pipeline Name", style="blue")
1494
- table.add_column("Path", style="magenta")
1495
- table.add_column("Last Modified", style="green")
1496
- table.add_column("Size", style="cyan")
1497
-
1498
- for info in pipeline_info:
1499
- table.add_row(
1500
- info["name"], info["path"], info["mod_time"], info["size"]
1501
- )
1502
- console = Console(record=True)
1503
- console.print(table)
1504
- if to_html:
1505
- return console.export_html()
1506
- elif to_svg:
1507
- return console.export_svg()
1508
-
1509
- else:
1510
- return pipeline_info
1511
-
1512
- def show_pipelines(self) -> None:
1513
- """
1514
- Print all available pipelines in a formatted table.
1515
-
1516
- Examples:
1517
- ```python
1518
- pm = PipelineManager()
1519
- pm.show_pipelines()
1520
- ```
1521
- """
1522
- self._all_pipelines(show=True)
1523
-
1524
- def list_pipelines(self) -> list[str]:
1525
- """
1526
- Get a list of all available pipelines.
1527
-
1528
- Returns:
1529
- list[str] | None: A list of pipeline names.
1530
-
1531
- Examples:
1532
- ```python
1533
- pm = PipelineManager()
1534
- pipelines = pm.list_pipelines()
1535
- ```
1536
- """
1537
- return self._all_pipelines(show=False)
1538
-
1539
- @property
1540
- def pipelines(self) -> list[str]:
1541
- """
1542
- Get a list of all available pipelines.
1543
-
1544
- Returns:
1545
- list[str] | None: A list of pipeline names.
1546
-
1547
- Examples:
1548
- ```python
1549
- pm = PipelineManager()
1550
- pipelines = pm.pipelines
1551
- ```
1552
- """
1553
- return self._all_pipelines(show=False)
1554
-
1555
- def add_hook(self, name: str, type: HookType, to: str | None = None, function_name: str|None = None):
1556
- """
1557
- Add a hook to the pipeline module.
1558
-
1559
- Args:
1560
- name (str): The name of the pipeline
1561
- type (HookType): The type of the hook.
1562
- to (str | None, optional): The name of the file to add the hook to. Defaults to the hook.py file in the pipelines hooks folder.
1563
- function_name (str | None, optional): The name of the function. If not provided uses default name of hook type.
1564
-
1565
- Returns:
1566
- None
1567
-
1568
- Examples:
1569
- ```python
1570
- pm = PipelineManager()
1571
- pm.add_hook(HookType.PRE_EXECUTE)
1572
- ```
1573
- """
1574
-
1575
-
1576
- if to is None:
1577
- to = f"hooks/{name}/hook.py"
1578
- else:
1579
- to = f"hooks/{name}/{to}"
1580
-
1581
- match type:
1582
- case HookType.MQTT_BUILD_CONFIG:
1583
- template = HOOK_TEMPLATE__MQTT_BUILD_CONFIG
1584
-
1585
- if function_name is None:
1586
- function_name = type.default_function_name()
1587
-
1588
- if not self._fs.exists(to):
1589
- self._fs.makedirs(os.path.dirname(to), exist_ok=True)
1590
-
1591
- with self._fs.open(to, "a") as f:
1592
- f.write(
1593
- template.format(
1594
- function_name=function_name
1595
- )
1596
- )
1597
-
1598
- rich.print(f"🔧 Added hook [bold blue]{type.value}[/bold blue] to {to} as {function_name} for {name}")
1599
-
1600
-
1601
- class Pipeline:
1602
- def __init__(
1603
- self,
1604
- name: str,
1605
- base_dir: str | None = None,
1606
- storage_options: dict | Munch | BaseStorageOptions = {},
1607
- fs: AbstractFileSystem | None = None,
1608
- ):
1609
- """
1610
- Initializes the Pipeline object.
1611
-
1612
- Args:
1613
- name (str): The name of the pipeline.
1614
- base_dir (str | None): The flowerpower base path. Defaults to None.
1615
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
1616
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1617
-
1618
- Returns:
1619
- None
1620
- """
1621
- # super().__init__(base_dir=base_dir, storage_options=storage_options, fs=fs)
1622
- self.name = name
1623
- self._base_dir = base_dir or os.getcwd()
1624
- self._storage_options = storage_options or {}
1625
- if fs is None:
1626
- fs = get_filesystem(self._base_dir, **self._storage_options)
1627
- self._fs = fs
1628
- # self.load_module()
1629
- # self.load_config(name)
1630
-
1631
- def __enter__(self) -> "PipelineManager":
1632
- return self
1633
-
1634
- def __exit__(
1635
- self,
1636
- exc_type: type[BaseException] | None,
1637
- exc_val: BaseException | None,
1638
- exc_tb: TracebackType | None,
1639
- ) -> None:
1640
- # Add any cleanup code here if needed
1641
- pass
1642
-
1643
- def run(
1644
- self,
1645
- inputs: dict | None = None,
1646
- final_vars: list | None = None,
1647
- config: dict | None = None,
1648
- executor: str | None = None,
1649
- with_tracker: bool = False,
1650
- with_opentelemetry: bool = False,
1651
- with_progressbar: bool = False,
1652
- reload: bool = False,
1653
- **kwargs,
1654
- ) -> dict[str, Any]:
1655
- """Run the pipeline.
1656
-
1657
- Args:
1658
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1659
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1660
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1661
- Defaults to None.
1662
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1663
- with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
1664
- with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline.
1665
- Defaults to False.
1666
- with_progressbar (bool, optional): Whether to include a progress bar for the pipeline.
1667
- reload (bool, optional): Whether to reload the pipeline. Defaults to False.
1668
-
1669
- Returns:
1670
- dict[str, Any]: The final variables for the pipeline.
1671
-
1672
- Examples:
1673
- ```python
1674
- p = Pipeline("my_pipeline")
1675
- final_vars = p.run()
1676
- ```
1677
- """
1678
- with PipelineManager(
1679
- base_dir=self._base_dir,
1680
- fs=self._fs,
1681
- ) as pm:
1682
- return pm.run(
1683
- name=self.name,
1684
- executor=executor,
1685
- inputs=inputs,
1686
- final_vars=final_vars,
1687
- config=config,
1688
- with_tracker=with_tracker,
1689
- with_opentelemetry=with_opentelemetry,
1690
- with_progressbar=with_progressbar,
1691
- reload=reload,
1692
- **kwargs,
1693
- )
1694
-
1695
- def run_job(
1696
- self,
1697
- inputs: dict | None = None,
1698
- final_vars: list | None = None,
1699
- config: dict | None = None,
1700
- executor: str | None = None,
1701
- with_tracker: bool | None = None,
1702
- with_opentelemetry: bool | None = None,
1703
- with_progressbar: bool | None = None,
1704
- **kwargs,
1705
- ) -> dict[str, Any]:
1706
- """Run the pipeline as a job.
1707
-
1708
- Args:
1709
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1710
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1711
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1712
- Defaults to None.
1713
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1714
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
1715
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
1716
- Defaults to None.
1717
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
1718
- Defaults to None.
1719
-
1720
- Returns:
1721
- dict[str, Any]: The final variables for the pipeline.
1722
-
1723
- Examples:
1724
- ```python
1725
- p = Pipeline("my_pipeline")
1726
- final_vars = p.run_job()
1727
- ```
1728
- """
1729
- with PipelineManager(
1730
- base_dir=self._base_dir,
1731
- fs=self._fs,
1732
- ) as pm:
1733
- return pm.run_job(
1734
- name=self.name,
1735
- executor=executor,
1736
- inputs=inputs,
1737
- final_vars=final_vars,
1738
- config=config,
1739
- with_tracker=with_tracker,
1740
- with_opentelemetry=with_opentelemetry,
1741
- with_progressbar=with_progressbar,
1742
- **kwargs,
1743
- )
1744
-
1745
- def add_job(
1746
- self,
1747
- inputs: dict | None = None,
1748
- final_vars: list | None = None,
1749
- config: dict | None = None,
1750
- executor: str | None = None,
1751
- with_tracker: bool | None = None,
1752
- with_opentelemetry: bool | None = None,
1753
- with_progressbar: bool | None = None,
1754
- result_expiration_time: float | dt.timedelta = 0,
1755
- **kwargs,
1756
- ) -> UUID:
1757
- """Add a job for the pipeline.
1758
-
1759
- Args:
1760
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1761
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1762
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1763
- Defaults to None.
1764
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1765
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
1766
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
1767
- Defaults to None.
1768
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
1769
- result_expiration_time (float | dt.timedelta, optional): The result expiration time. Defaults to 0.
1770
-
1771
- Returns:
1772
- UUID: The job ID.
1773
-
1774
- Examples:
1775
- ```python
1776
- p = Pipeline("my_pipeline")
1777
- job_id = p.add_job()
1778
- ```
1779
- """
1780
- with PipelineManager(
1781
- base_dir=self._base_dir,
1782
- fs=self._fs,
1783
- ) as pm:
1784
- return pm.add_job(
1785
- name=self.name,
1786
- executor=executor,
1787
- inputs=inputs,
1788
- final_vars=final_vars,
1789
- config=config,
1790
- with_tracker=with_tracker,
1791
- with_opentelemetry=with_opentelemetry,
1792
- with_progressbar=with_progressbar,
1793
- result_expiration_time=result_expiration_time,
1794
- **kwargs,
1795
- )
1796
-
1797
- def schedule(
1798
- self,
1799
- trigger_type: str | None = None,
1800
- inputs: dict | None = None,
1801
- final_vars: list | None = None,
1802
- config: dict | None = None,
1803
- executor: str | None = None,
1804
- with_tracker: bool = False,
1805
- with_opentelemetry: bool = False,
1806
- with_progressbar: bool = False,
1807
- paused: bool = False,
1808
- coalesce: str = "latest",
1809
- misfire_grace_time: float | dt.timedelta | None = None,
1810
- max_jitter: float | dt.timedelta | None = None,
1811
- max_running_jobs: int | None = None,
1812
- conflict_policy: str = "do_nothing",
1813
- **kwargs,
1814
- ) -> str:
1815
- """Schedule the pipeline.
1816
-
1817
- Args:
1818
- trigger_type (str | None, optional): The trigger type for the schedule. Defaults to None.
1819
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1820
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1821
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1822
- Defaults to None.
1823
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1824
- with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
1825
- with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline. Defaults to False.
1826
- with_progressbar (bool, optional): Whether to include a progress bar for the pipeline. Defaults to False.
1827
- paused (bool, optional): Whether to pause the schedule. Defaults to False.
1828
- coalesce (str, optional): The coalesce strategy. Defaults to "latest".
1829
- misfire_grace_time (float | dt.timedelta | None, optional): The misfire grace time. Defaults to None.
1830
- max_jitter (float | dt.timedelta | None, optional): The max jitter. Defaults to None.
1831
- max_running_jobs (int | None, optional): The max running jobs. Defaults to None.
1832
- conflict_policy (str, optional): The conflict policy. Defaults to "do_nothing".
1833
- **kwargs: Additional keyword arguments.
1834
-
1835
- Returns:
1836
- str: The schedule ID.
1837
-
1838
- Examples:
1839
- ```python
1840
- p = Pipeline("my_pipeline")
1841
- schedule_id = p.schedule()
1842
- ```
1843
-
1844
- """
1845
- with PipelineManager(
1846
- base_dir=self._base_dir,
1847
- fs=self._fs,
1848
- ) as pm:
1849
- return pm.schedule(
1850
- name=self.name,
1851
- executor=executor,
1852
- trigger_type=trigger_type,
1853
- inputs=inputs,
1854
- final_vars=final_vars,
1855
- with_tracker=with_tracker,
1856
- with_opentelemetry=with_opentelemetry,
1857
- with_progressbar=with_progressbar,
1858
- paused=paused,
1859
- coalesce=coalesce,
1860
- misfire_grace_time=misfire_grace_time,
1861
- max_jitter=max_jitter,
1862
- max_running_jobs=max_running_jobs,
1863
- conflict_policy=conflict_policy,
1864
- **kwargs,
1865
- )
1866
-
1867
- def export(
1868
- self,
1869
- path: str,
1870
- storage_options: dict | Munch | BaseStorageOptions | None = None,
1871
- fs: AbstractFileSystem | None = None,
1872
- overwrite: bool = False,
1873
- ):
1874
- """Export the pipeline to a given path.
1875
-
1876
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
1877
- Any writable fsspec filesystem is supported.
1878
-
1879
- Args:
1880
- path (str): The path to export the pipeline to.
1881
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
1882
- Defaults to None.
1883
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1884
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
1885
- Defaults to False.
1886
-
1887
- Examples:
1888
- ```python
1889
- p = Pipeline("my_pipeline")
1890
- p.export("s3://bucket/path")
1891
- ```
1892
- """
1893
- with PipelineManager(
1894
- base_dir=self._base_dir,
1895
- fs=self._fs,
1896
- ) as pm:
1897
- pm.export(
1898
- name=self.name,
1899
- path=path,
1900
- storage_options=storage_options,
1901
- fs=fs,
1902
- overwrite=overwrite,
1903
- )
1904
-
1905
- def delete(self, cfg: bool = True, module: bool = False, hooks: bool = True):
1906
- """Delete the pipeline.
1907
-
1908
- Args:
1909
- cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
1910
- module (bool, optional): Whether to delete the pipeline module file.
1911
- Defaults to False.
1912
- hooks (bool, optional): Whether to delete the pipeline's hooks. Defaults to True.
1913
-
1914
- Examples:
1915
- ```python
1916
- p = Pipeline("my_pipeline")
1917
- p.delete()
1918
- ```
1919
- """
1920
- with PipelineManager(
1921
- base_dir=self._base_dir,
1922
- fs=self._fs,
1923
- ) as pm:
1924
- pm.delete(self.name, cfg=cfg, module=module, hooks=hooks)
1925
-
1926
- def save_dag(self, format="png", config: dict | None = None):
1927
- """Save a image of the graph of functions for a given name.
1928
-
1929
- Args:
1930
- format (str, optional): The format of the graph file. Defaults to "png".
1931
-
1932
- Examples:
1933
- ```python
1934
- p = Pipeline("my_pipeline")
1935
- p.save_dag()
1936
- ```
1937
- """
1938
- with PipelineManager(
1939
- base_dir=self._base_dir,
1940
- fs=self._fs,
1941
- ) as pm:
1942
- pm.save_dag(self.name, format, config=config)
1943
-
1944
- def show_dag(
1945
- self,
1946
- config: dict | None = None,
1947
- ):
1948
- """Display the graph of functions for a given name.
1949
-
1950
- Examples:
1951
- ```python
1952
- p = Pipeline("my_pipeline")
1953
- p.show_dag()
1954
- ```
1955
- """
1956
- with PipelineManager(
1957
- base_dir=self._base_dir,
1958
- fs=self._fs,
1959
- ) as pm:
1960
- return pm.show_dag(self.name, config=config)
1961
-
1962
- def get_summary(
1963
- self, cfg: bool = True, module: bool = True
1964
- ) -> dict[str, dict | str]:
1965
- """Get a summary of the pipeline.
1966
-
1967
- Args:
1968
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
1969
- module (bool, optional): Whether to show the module. Defaults to True.
1970
-
1971
- Returns:
1972
- dict[str, dict | str]: A dictionary containing the pipeline summary.
1973
-
1974
- Examples:
1975
- ```python
1976
- p = Pipeline("my_pipeline")
1977
- summary = p.get_summary()
1978
- ```
1979
- """
1980
- with PipelineManager(
1981
- base_dir=self._base_dir,
1982
- fs=self._fs,
1983
- ) as pm:
1984
- return pm.get_summary(self.name, cfg=cfg, module=module)[self.name]
1985
-
1986
- def show_summary(self, cfg: bool = True, module: bool = True):
1987
- """Show a summary of the pipeline.
1988
-
1989
- Args:
1990
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
1991
- module (bool, optional): Whether to show the module. Defaults to True.
1992
-
1993
- Examples:
1994
- ```python
1995
- p = Pipeline("my_pipeline")
1996
- p.show_summary()
1997
- ```
1998
- """
1999
- with PipelineManager(
2000
- base_dir=self._base_dir,
2001
- fs=self._fs,
2002
- ) as pm:
2003
- pm.show_summary(self.name, cfg=cfg, module=module)
2004
-
2005
- @property
2006
- def summary(self) -> dict[str, dict | str]:
2007
- """Get a summary of the pipeline.
2008
-
2009
- Returns:
2010
- dict[str, dict | str]: A dictionary containing the pipeline summary.
2011
- """
2012
- return self.get_summary()
2013
-
2014
-
2015
- def run(
2016
- name: str,
2017
- base_dir: str | None = None,
2018
- inputs: dict | None = None,
2019
- final_vars: list | None = None,
2020
- config: dict | None = None,
2021
- executor: str | None = None,
2022
- with_tracker: bool = False,
2023
- with_opentelemetry: bool = False,
2024
- with_progressbar: bool = False,
2025
- storage_options: dict = {},
2026
- fs: AbstractFileSystem | None = None,
2027
- **kwargs,
2028
- ) -> dict[str, Any]:
2029
- """Run a pipeline with the given parameters.
2030
-
2031
- Args:
2032
- name (str): The name of the pipeline.
2033
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2034
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
2035
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
2036
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
2037
- Defaults to None.
2038
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
2039
- with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
2040
- with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline.
2041
- Defaults to False.
2042
- with_progressbar (bool, optional): Whether to include a progress bar for the pipeline.
2043
- storage_options (dict, optional): The fsspec storage options. Defaults to {}.
2044
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2045
- **kwargs: Additional keyword arguments.
2046
-
2047
- Returns:
2048
- dict[str, Any]: The final variables for the pipeline.
2049
-
2050
- Examples:
2051
- ```python
2052
- final_vars = run("my_pipeline", inputs={"param": 1}, base_dir="my_flowerpower_project")
2053
- ```
2054
- """
2055
- with Pipeline(
2056
- base_dir=base_dir, name=name, storage_options=storage_options, fs=fs
2057
- ) as p:
2058
- return p.run(
2059
- inputs=inputs,
2060
- final_vars=final_vars,
2061
- config=config,
2062
- executor=executor,
2063
- with_tracker=with_tracker,
2064
- with_opentelemetry=with_opentelemetry,
2065
- with_progressbar=with_progressbar,
2066
- **kwargs,
2067
- )
2068
-
2069
-
2070
- def run_job(
2071
- name: str,
2072
- base_dir: str | None = None,
2073
- inputs: dict | None = None,
2074
- final_vars: list | None = None,
2075
- config: dict | None = None,
2076
- executor: str | None = None,
2077
- with_tracker: bool | None = None,
2078
- with_opentelemetry: bool | None = None,
2079
- with_progressbar: bool | None = None,
2080
- # result_expiration_time: float | dt.timedelta = 0,
2081
- storage_options: dict | Munch | BaseStorageOptions = {},
2082
- fs: AbstractFileSystem | None = None,
2083
- **kwargs,
2084
- ) -> dict[str, Any]:
2085
- """Run a pipeline as a job with the given parameters.
2086
-
2087
- Args:
2088
- name (str): The name of the pipeline.
2089
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2090
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
2091
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
2092
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
2093
- Defaults to None.
2094
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
2095
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
2096
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
2097
- Defaults to None.
2098
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
2099
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2100
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2101
- **kwargs: Additional keyword arguments.
2102
-
2103
- Returns:
2104
- dict[str, Any]: The final variables for the pipeline.
2105
-
2106
- Examples:
2107
- ```python
2108
- final_vars = run_job("my_pipeline", inputs={"param": 1}, base_dir="my_flowerpower_project")
2109
- ```
2110
- """
2111
- with Pipeline(
2112
- base_dir=base_dir, name=name, storage_options=storage_options, fs=fs
2113
- ) as p:
2114
- return p.run_job(
2115
- inputs=inputs,
2116
- final_vars=final_vars,
2117
- config=config,
2118
- executor=executor,
2119
- with_tracker=with_tracker,
2120
- with_opentelemetry=with_opentelemetry,
2121
- with_progressbar=with_progressbar,
2122
- # result_expiration_time=result_expiration_time,
2123
- **kwargs,
2124
- )
2125
-
2126
-
2127
- def add_job(
2128
- name: str,
2129
- base_dir: str | None = None,
2130
- inputs: dict | None = None,
2131
- final_vars: list | None = None,
2132
- config: dict | None = None,
2133
- executor: str | None = None,
2134
- with_tracker: bool | None = None,
2135
- with_opentelemetry: bool | None = None,
2136
- with_progressbar: bool | None = None,
2137
- result_expiration_time: float | dt.timedelta = 0,
2138
- storage_options: dict | Munch | BaseStorageOptions = {},
2139
- fs: AbstractFileSystem | None = None,
2140
- **kwargs,
2141
- ) -> UUID:
2142
- """
2143
- Add a job to run the pipeline with the given parameters to the worker data store.
2144
- Executes the job immediatly and returns the job id (UUID). The job result will be stored in
2145
- the data store for the given `result_expiration_time` and can be fetched using the job id (UUID).
2146
-
2147
- Args:
2148
- name (str): The name of the job.
2149
- executor (str | None, optional): The executor to use for the job. Defaults to None.
2150
- inputs (dict | None, optional): The inputs for the job. Defaults to None.
2151
- final_vars (list | None, optional): The final variables for the job. Defaults to None.
2152
- config (dict | None, optional): The config for the hamilton driver that executes the job.
2153
- Defaults to None.
2154
- with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
2155
- with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
2156
- with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
2157
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2158
- result_expiration_time (float | dt.timedelta | None, optional): The expiration time for the job result.
2159
- Defaults to None.
2160
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2161
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2162
-
2163
- **kwargs: Additional keyword arguments.
2164
-
2165
- Returns:
2166
- UUID: The UUID of the added job.
2167
-
2168
- Examples:
2169
- ```python
2170
- job_id = add_job("my_job")
2171
- ```
2172
- """
2173
- p = Pipeline(name=name, base_dir=base_dir, storage_options=storage_options, fs=fs)
2174
- return p.add_job(
2175
- executor=executor,
2176
- inputs=inputs,
2177
- final_vars=final_vars,
2178
- config=config,
2179
- with_tracker=with_tracker,
2180
- with_opentelemetry=with_opentelemetry,
2181
- with_progressbar=with_progressbar,
2182
- result_expiration_time=result_expiration_time,
2183
- **kwargs,
2184
- )
2185
-
2186
-
2187
- def schedule(
2188
- name: str,
2189
- base_dir: str | None = None,
2190
- inputs: dict | None = None,
2191
- final_vars: list | None = None,
2192
- executor: str | None = None,
2193
- config: dict | None = None,
2194
- with_tracker: bool | None = None,
2195
- with_opentelemetry: bool | None = None,
2196
- with_progressbar: bool | None = None,
2197
- trigger_type: str | None = None,
2198
- id_: str | None = None,
2199
- paused: bool = False,
2200
- coalesce: str = "latest",
2201
- misfire_grace_time: float | dt.timedelta | None = None,
2202
- max_jitter: float | dt.timedelta | None = None,
2203
- max_running_jobs: int | None = None,
2204
- conflict_policy: str = "do_nothing",
2205
- overwrite: bool = False,
2206
- storage_options: dict | Munch | BaseStorageOptions = {},
2207
- fs: AbstractFileSystem | None = None,
2208
- **kwargs,
2209
- ) -> str:
2210
- """Schedule a pipeline with the given parameters.
2211
-
2212
- Args:
2213
- name (str): The name of the pipeline.
2214
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2215
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
2216
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
2217
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
2218
- Defaults to None.
2219
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
2220
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
2221
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
2222
- Defaults to None.
2223
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
2224
- trigger_type (str | None, optional): The trigger type for the schedule. Defaults to None.
2225
- id_ (str | None, optional): The schedule ID. Defaults to None.
2226
- paused (bool, optional): Whether to pause the schedule. Defaults to False.
2227
- coalesce (str, optional): The coalesce strategy. Defaults to "latest".
2228
- misfire_grace_time (float | dt.timedelta | None, optional): The misfire grace time. Defaults to None.
2229
- max_jitter (float | dt.timedelta | None, optional): The max jitter. Defaults to None.
2230
- max_running_jobs (int | None, optional): The max running jobs. Defaults to None.
2231
- conflict_policy (str, optional): The conflict policy. Defaults to "do_nothing".
2232
- overwrite (bool, optional): Whether to overwrite an existing schedule. Defaults to False.
2233
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2234
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2235
- **kwargs: Additional keyword arguments.
2236
-
2237
- Returns:
2238
- str: The schedule ID.
2239
-
2240
- Examples:
2241
- ```python
2242
- schedule_id = schedule("my_pipeline", trigger_type="interval", seconds=60)
2243
- ```
2244
- """
2245
- with Pipeline(
2246
- base_dir=base_dir,
2247
- name=name,
2248
- storage_options=storage_options,
2249
- fs=fs,
2250
- ) as p:
2251
- return p.schedule(
2252
- executor=executor,
2253
- trigger_type=trigger_type,
2254
- inputs=inputs,
2255
- final_vars=final_vars,
2256
- config=config,
2257
- with_tracker=with_tracker,
2258
- with_opentelemetry=with_opentelemetry,
2259
- with_progressbar=with_progressbar,
2260
- paused=paused,
2261
- coalesce=coalesce,
2262
- misfire_grace_time=misfire_grace_time,
2263
- max_jitter=max_jitter,
2264
- max_running_jobs=max_running_jobs,
2265
- conflict_policy=conflict_policy,
2266
- overwrite=overwrite,
2267
- **kwargs,
2268
- )
2269
-
2270
-
2271
- def new(
2272
- name: str,
2273
- base_dir: str | None = None,
2274
- overwrite: bool = False,
2275
- storage_options: dict | Munch | BaseStorageOptions = {},
2276
- fs: AbstractFileSystem | None = None,
2277
- ):
2278
- """Create a new pipeline with the given name.
2279
-
2280
- Args:
2281
- name (str): The name of the pipeline.
2282
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2283
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name. Defaults to False.
2284
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2285
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2286
-
2287
- Examples:
2288
- ```python
2289
- new("my_pipeline")
2290
- ```
2291
- """
2292
- with PipelineManager(
2293
- base_dir=base_dir,
2294
- fs=fs,
2295
- ) as pm:
2296
- pm.new(name=name, overwrite=overwrite, storage_options=storage_options)
2297
-
2298
-
2299
- def delete(
2300
- name: str,
2301
- base_dir: str | None = None,
2302
- cfg: bool = True,
2303
- module: bool = False,
2304
- storage_options: dict | Munch | BaseStorageOptions = {},
2305
- fs: AbstractFileSystem | None = None,
2306
- ):
2307
- """Delete a pipeline.
2308
-
2309
- Args:
2310
- name (str): The name of the pipeline to delete.
2311
- base_dir (str | None, optional): The base path of the pipeline. Defaults to None.
2312
- cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
2313
- module (bool, optional): Whether to delete the pipeline module. Defaults to False.
2314
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2315
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2316
- """
2317
- with Pipeline(
2318
- name=name, base_dir=base_dir, storage_options=storage_options, fs=fs
2319
- ) as p:
2320
- p.delete(cfg=cfg, module=module)
2321
-
2322
-
2323
- def save_dag(
2324
- name: str,
2325
- base_dir: str | None = None,
2326
- format: str = "png",
2327
- storage_options: dict | Munch | BaseStorageOptions = {},
2328
- fs: AbstractFileSystem | None = None,
2329
- ):
2330
- """Save a image of the graph of functions for a given name.
2331
-
2332
- Args:
2333
- name (str): The name of the pipeline.
2334
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2335
- format (str, optional): The format of the graph file. Defaults to "png".
2336
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2337
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2338
-
2339
- Examples:
2340
- ```python
2341
- save_dag("my_pipeline")
2342
- ```
2343
- """
2344
- with Pipeline(
2345
- base_dir=base_dir,
2346
- name=name,
2347
- storage_options=storage_options,
2348
- fs=fs,
2349
- ) as pm:
2350
- pm.save_dag(format=format)
2351
-
2352
-
2353
- def show_dag(
2354
- name: str,
2355
- base_dir: str | None = None,
2356
- storage_options: dict | Munch | BaseStorageOptions = {},
2357
- fs: AbstractFileSystem | None = None,
2358
- ):
2359
- """Display the graph of functions for a given name.
2360
-
2361
- Args:
2362
- name (str): The name of the pipeline.
2363
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2364
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2365
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2366
-
2367
- Examples:
2368
- ```python
2369
- show_dag("my_pipeline")
2370
- ```
2371
- """
2372
- with Pipeline(
2373
- base_dir=base_dir,
2374
- name=name,
2375
- storage_options=storage_options,
2376
- fs=fs,
2377
- ) as pm:
2378
- return pm.show_dag()
2379
-
2380
-
2381
- def get_summary(
2382
- name: str | None = None,
2383
- base_dir: str | None = None,
2384
- cfg: bool = True,
2385
- module: bool = True,
2386
- storage_options: dict | Munch | BaseStorageOptions = {},
2387
- fs: AbstractFileSystem | None = None,
2388
- ) -> dict[str, dict | str]:
2389
- """Get a summary of the pipeline.
2390
-
2391
- Args:
2392
- name (str | None, optional): The name of the pipeline. Defaults to None.
2393
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2394
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
2395
- module (bool, optional): Whether to show the module. Defaults to True.
2396
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2397
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2398
-
2399
- Returns:
2400
- dict[str, dict | str]: A dictionary containing the pipeline summary.
2401
-
2402
- Examples:
2403
- ```python
2404
- summary = get_summary("my_pipeline")
2405
- ```
2406
- """
2407
- with PipelineManager(
2408
- base_dir=base_dir,
2409
- storage_options=storage_options,
2410
- fs=fs,
2411
- ) as pm:
2412
- summary = pm.get_summary(name=name, cfg=cfg, module=module)
2413
- if name:
2414
- return summary[name]
2415
- return summary
2416
-
2417
-
2418
- def show_summary(
2419
- name: str | None = None,
2420
- base_dir: str | None = None,
2421
- cfg: bool = True,
2422
- module: bool = True,
2423
- storage_options: dict | Munch | BaseStorageOptions = {},
2424
- fs: AbstractFileSystem | None = None,
2425
- ):
2426
- """Show a summary of the pipeline.
2427
-
2428
- Args:
2429
- name (str | None, optional): The name of the pipeline. Defaults to None.
2430
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2431
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
2432
- module (bool, optional): Whether to show the module. Defaults to True.
2433
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2434
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2435
-
2436
- Examples:
2437
- ```python
2438
- show_summary("my_pipeline")
2439
- ```
2440
- """
2441
- with PipelineManager(
2442
- base_dir=base_dir,
2443
- storage_options=storage_options,
2444
- fs=fs,
2445
- ) as pm:
2446
- pm.show_summary(name=name, cfg=cfg, module=module)
2447
-
2448
-
2449
- def show_pipelines(
2450
- base_dir: str | None = None,
2451
- storage_options: dict | Munch | BaseStorageOptions = {},
2452
- fs: AbstractFileSystem | None = None,
2453
- ):
2454
- """Display all available pipelines in a formatted table.
2455
-
2456
- Args:
2457
- base_dir (str | None, optional): The base path of the pipelines. Defaults to None.
2458
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2459
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2460
-
2461
- Examples:
2462
- ```python
2463
- show_pipelines()
2464
- ```
2465
- """
2466
- with PipelineManager(
2467
- base_dir=base_dir,
2468
- storage_options=storage_options,
2469
- fs=fs,
2470
- ) as pm:
2471
- pm.show_pipelines()
2472
-
2473
-
2474
- def list_pipelines(
2475
- base_dir: str | None = None,
2476
- storage_options: dict | Munch | BaseStorageOptions = {},
2477
- fs: AbstractFileSystem | None = None,
2478
- ) -> list[str]:
2479
- """Get a list of all available pipelines.
2480
-
2481
- Args:
2482
- base_dir (str | None, optional): The base path of the pipelines. Defaults to None.
2483
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2484
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2485
-
2486
- Returns:
2487
- list[str]: A list of pipeline names.
2488
-
2489
- Examples:
2490
- ```python
2491
- pipelines = list_pipelines()
2492
- """
2493
- with PipelineManager(
2494
- base_dir=base_dir,
2495
- storage_options=storage_options,
2496
- fs=fs,
2497
- ) as pm:
2498
- return pm.list_pipelines()
2499
-