FlowerPower 0.9.12.4__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. flowerpower/__init__.py +17 -2
  2. flowerpower/cfg/__init__.py +201 -149
  3. flowerpower/cfg/base.py +122 -24
  4. flowerpower/cfg/pipeline/__init__.py +254 -0
  5. flowerpower/cfg/pipeline/adapter.py +66 -0
  6. flowerpower/cfg/pipeline/run.py +40 -11
  7. flowerpower/cfg/pipeline/schedule.py +69 -79
  8. flowerpower/cfg/project/__init__.py +149 -0
  9. flowerpower/cfg/project/adapter.py +57 -0
  10. flowerpower/cfg/project/job_queue.py +165 -0
  11. flowerpower/cli/__init__.py +92 -35
  12. flowerpower/cli/job_queue.py +878 -0
  13. flowerpower/cli/mqtt.py +49 -4
  14. flowerpower/cli/pipeline.py +576 -381
  15. flowerpower/cli/utils.py +55 -0
  16. flowerpower/flowerpower.py +12 -7
  17. flowerpower/fs/__init__.py +20 -2
  18. flowerpower/fs/base.py +350 -26
  19. flowerpower/fs/ext.py +797 -216
  20. flowerpower/fs/storage_options.py +1097 -55
  21. flowerpower/io/base.py +13 -18
  22. flowerpower/io/loader/__init__.py +28 -0
  23. flowerpower/io/loader/deltatable.py +7 -10
  24. flowerpower/io/metadata.py +1 -0
  25. flowerpower/io/saver/__init__.py +28 -0
  26. flowerpower/io/saver/deltatable.py +4 -3
  27. flowerpower/job_queue/__init__.py +252 -0
  28. flowerpower/job_queue/apscheduler/__init__.py +11 -0
  29. flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  30. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  31. flowerpower/job_queue/apscheduler/manager.py +1063 -0
  32. flowerpower/job_queue/apscheduler/setup.py +524 -0
  33. flowerpower/job_queue/apscheduler/trigger.py +169 -0
  34. flowerpower/job_queue/apscheduler/utils.py +309 -0
  35. flowerpower/job_queue/base.py +382 -0
  36. flowerpower/job_queue/rq/__init__.py +10 -0
  37. flowerpower/job_queue/rq/_trigger.py +37 -0
  38. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  39. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  40. flowerpower/job_queue/rq/manager.py +1449 -0
  41. flowerpower/job_queue/rq/setup.py +150 -0
  42. flowerpower/job_queue/rq/utils.py +69 -0
  43. flowerpower/pipeline/__init__.py +5 -0
  44. flowerpower/pipeline/base.py +118 -0
  45. flowerpower/pipeline/io.py +407 -0
  46. flowerpower/pipeline/job_queue.py +505 -0
  47. flowerpower/pipeline/manager.py +1586 -0
  48. flowerpower/pipeline/registry.py +560 -0
  49. flowerpower/pipeline/runner.py +560 -0
  50. flowerpower/pipeline/visualizer.py +142 -0
  51. flowerpower/plugins/mqtt/__init__.py +12 -0
  52. flowerpower/plugins/mqtt/cfg.py +16 -0
  53. flowerpower/plugins/mqtt/manager.py +789 -0
  54. flowerpower/settings.py +110 -0
  55. flowerpower/utils/logging.py +21 -0
  56. flowerpower/utils/misc.py +57 -9
  57. flowerpower/utils/sql.py +122 -24
  58. flowerpower/utils/templates.py +18 -142
  59. flowerpower/web/app.py +0 -0
  60. flowerpower-1.0.0b1.dist-info/METADATA +324 -0
  61. flowerpower-1.0.0b1.dist-info/RECORD +94 -0
  62. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +1 -1
  63. flowerpower/cfg/pipeline/tracker.py +0 -14
  64. flowerpower/cfg/project/open_telemetry.py +0 -8
  65. flowerpower/cfg/project/tracker.py +0 -11
  66. flowerpower/cfg/project/worker.py +0 -19
  67. flowerpower/cli/scheduler.py +0 -309
  68. flowerpower/event_handler.py +0 -23
  69. flowerpower/mqtt.py +0 -525
  70. flowerpower/pipeline.py +0 -2419
  71. flowerpower/scheduler.py +0 -680
  72. flowerpower/tui.py +0 -79
  73. flowerpower/utils/datastore.py +0 -186
  74. flowerpower/utils/eventbroker.py +0 -127
  75. flowerpower/utils/executor.py +0 -58
  76. flowerpower/utils/trigger.py +0 -140
  77. flowerpower-0.9.12.4.dist-info/METADATA +0 -575
  78. flowerpower-0.9.12.4.dist-info/RECORD +0 -70
  79. /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
  80. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
  81. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
flowerpower/pipeline.py DELETED
@@ -1,2419 +0,0 @@
1
- import datetime as dt
2
- import importlib
3
- import importlib.util
4
- import os
5
- import posixpath
6
- import sys
7
- from typing import Any, Callable
8
- from uuid import UUID
9
-
10
- from fsspec.spec import AbstractFileSystem
11
- from hamilton import driver
12
- from hamilton.execution import executors
13
- from hamilton.telemetry import disable_telemetry
14
-
15
- if importlib.util.find_spec("opentelemetry"):
16
- from hamilton.plugins import h_opentelemetry
17
-
18
- from .utils.open_telemetry import init_tracer
19
-
20
- else:
21
- h_opentelemetry = None
22
- init_tracer = None
23
- import rich
24
- from hamilton.plugins import h_tqdm
25
- from hamilton_sdk.adapters import HamiltonTracker
26
- from hamilton.plugins.h_threadpool import FutureAdapter
27
- from loguru import logger
28
- from rich.console import Console
29
- from rich.panel import Panel
30
- from rich.syntax import Syntax
31
- from rich.table import Table
32
- from rich.tree import Tree
33
-
34
- from .cfg import ( # PipelineRunConfig,; PipelineScheduleConfig,; PipelineTrackerConfig,
35
- Config,
36
- PipelineConfig,
37
- )
38
- from .fs import get_filesystem
39
- from .fs.storage_options import BaseStorageOptions
40
- from .utils.misc import view_img
41
- from .utils.templates import PIPELINE_PY_TEMPLATE
42
-
43
- if importlib.util.find_spec("apscheduler"):
44
- from .scheduler import SchedulerManager
45
- else:
46
- SchedulerManager = None
47
- from pathlib import Path
48
- from types import TracebackType
49
-
50
- # if importlib.util.find_spec("paho"):
51
- # from .mqtt import MQTTClient
52
- # else:
53
- # MQTTClient = None
54
- from munch import Munch
55
-
56
- from .utils.executor import get_executor
57
- from .utils.trigger import get_trigger # , ALL_TRIGGER_KWARGS
58
-
59
-
60
- class PipelineManager:
61
- def __init__(
62
- self,
63
- base_dir: str | None = None,
64
- storage_options: dict | Munch | BaseStorageOptions = {},
65
- fs: AbstractFileSystem | None = None,
66
- cfg_dir: str = "conf",
67
- pipelines_dir: str = "pipelines",
68
- telemetry: bool = True,
69
- ):
70
- """
71
- Initializes the Pipeline object.
72
-
73
- Args:
74
- base_dir (str | None): The flowerpower base path. Defaults to None.
75
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
76
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
77
-
78
- Returns:
79
- None
80
- """
81
- self._telemetry = telemetry
82
- self._base_dir = base_dir or str(Path.cwd())
83
- self._storage_options = storage_options or {}
84
- if fs is None:
85
- fs = get_filesystem(self._base_dir, **self._storage_options)
86
- self._fs = fs
87
-
88
- self._cfg_dir = cfg_dir
89
- self._pipelines_dir = pipelines_dir
90
-
91
- try:
92
- self._fs.makedirs(f"{self._cfg_dir}/pipelines", exist_ok=True)
93
- self._fs.makedirs(self._pipelines_dir, exist_ok=True)
94
- except Exception as e:
95
- logger.error(f"Error creating directories: {e}")
96
-
97
- self._sync_fs()
98
- self.load_config()
99
-
100
- def __enter__(self) -> "PipelineManager":
101
- return self
102
-
103
- def __exit__(
104
- self,
105
- exc_type: type[BaseException] | None,
106
- exc_val: BaseException | None,
107
- exc_tb: TracebackType | None,
108
- ) -> None:
109
- # Add any cleanup code here if needed
110
- pass
111
-
112
- def _get_schedules(self):
113
- with SchedulerManager(
114
- fs=self._fs,
115
- role="scheduler",
116
- ) as sm:
117
- return sm.get_schedules()
118
-
119
- def _sync_fs(self):
120
- """
121
- Sync the filesystem.
122
-
123
- Returns:
124
- None
125
- """
126
- if self._fs.is_cache_fs:
127
- self._fs.sync()
128
-
129
- modules_path = posixpath.join(self._fs.path, self._pipelines_dir)
130
- if modules_path not in sys.path:
131
- sys.path.append(modules_path)
132
-
133
- def load_module(self, name: str, reload: bool = False):
134
- """
135
- Load a module dynamically.
136
-
137
- Args:
138
- name (str): The name of the module to load.
139
-
140
- Returns:
141
- None
142
- """
143
- sys.path.append(posixpath.join(self._fs.path, self._pipelines_dir))
144
-
145
- if not hasattr(self, "_module"):
146
- self._module = importlib.import_module(name)
147
-
148
- else:
149
- if reload:
150
- importlib.reload(self._module)
151
-
152
- def load_config(self, name: str | None = None, reload: bool = False):
153
- """
154
- Load the configuration file.
155
-
156
- This method loads the configuration file specified by the `_cfg_dir` attribute and
157
- assigns it to the `cfg` attribute.
158
-
159
- Args:
160
- name (str | None, optional): The name of the pipeline. Defaults to None.
161
-
162
- Returns:
163
- None
164
- """
165
- if reload:
166
- del self.cfg
167
- self.cfg = Config.load(base_dir=self._base_dir, pipeline_name=name, fs=self._fs)
168
-
169
- def _get_driver(
170
- self,
171
- name: str,
172
- executor: str | None = None,
173
- with_tracker: bool = False,
174
- with_opentelemetry: bool = False,
175
- with_progressbar: bool = False,
176
- config: dict = {},
177
- reload: bool = False,
178
- **kwargs,
179
- ) -> tuple[driver.Driver, Callable | None]:
180
- """
181
- Get the driver and shutdown function for a given pipeline.
182
-
183
- Args:
184
- name (str): The name of the pipeline.
185
- executor (str | None, optional): The executor to use. Defaults to None.
186
- with_tracker (bool, optional): Whether to use the tracker. Defaults to False.
187
- with_opentelemetry (bool, optional): Whether to use OpenTelemetry. Defaults to False.
188
- with_progressbar (bool, optional): Whether to use a progress bar. Defaults to False.
189
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
190
- Defaults to None.
191
- with_opentelemetry (bool, optional): Whether to use OpenTelemetry. Defaults to False.
192
- reload (bool, optional): Whether to reload the module. Defaults to False.
193
- **kwargs: Additional keyword arguments.
194
-
195
- Keyword Args:
196
- max_tasks (int, optional): The maximum number of tasks. Defaults to 20.
197
- num_cpus (int, optional): The number of CPUs. Defaults to 4.
198
- project_id (str, optional): The project ID for the tracker. Defaults to None.
199
- username (str, optional): The username for the tracker. Defaults to None.
200
- dag_name (str, optional): The DAG name for the tracker. Defaults to None.
201
- tags (str, optional): The tags for the tracker. Defaults to None.
202
- api_url (str, optional): The API URL for the tracker. Defaults to None.
203
- ui_url (str, optional): The UI URL for the tracker. Defaults to None.
204
-
205
- Returns:
206
- tuple[driver.Driver, Callable | None]: A tuple containing the driver and shutdown function.
207
- """
208
- if not self.cfg.pipeline.name == name or reload:
209
- self.load_config(name=name, reload=reload)
210
- if not hasattr(self, "_module") or reload:
211
- self.load_module(name=name, reload=reload)
212
- if self._telemetry:
213
- disable_telemetry()
214
-
215
- max_tasks = kwargs.pop("max_tasks", 20)
216
- num_cpus = kwargs.pop("num_cpus", 4)
217
- executor_, shutdown = get_executor(
218
- executor or "local", max_tasks=max_tasks, num_cpus=num_cpus
219
- )
220
- adapters = []
221
- if with_tracker:
222
- tracker_cfg = {
223
- **self.cfg.pipeline.tracker.to_dict(),
224
- **self.cfg.project.tracker.to_dict(),
225
- }
226
- tracker_kwargs = {
227
- key: kwargs.pop(key, None) or tracker_cfg.get(key, None)
228
- for key in tracker_cfg
229
- }
230
- tracker_kwargs["hamilton_api_url"] = tracker_kwargs.pop("api_url", None)
231
- tracker_kwargs["hamilton_ui_url"] = tracker_kwargs.pop("ui_url", None)
232
-
233
- if tracker_kwargs.get("project_id", None) is None:
234
- raise ValueError(
235
- "Please provide a project_id if you want to use the tracker"
236
- )
237
-
238
- tracker = HamiltonTracker(**tracker_kwargs)
239
- adapters.append(tracker)
240
-
241
- if with_opentelemetry and h_opentelemetry is not None:
242
- trace = init_tracer(
243
- host=kwargs.pop("host", "localhost"),
244
- port=kwargs.pop("port", 6831),
245
- name=f"{self.cfg.project.name}.{name}",
246
- )
247
- tracer = trace.get_tracer(__name__)
248
- adapters.append(h_opentelemetry.OpenTelemetryTracer(tracer=tracer))
249
-
250
- if with_progressbar:
251
- adapters.append(h_tqdm.ProgressBar(desc=f"{self.cfg.project.name}.{name}"))
252
-
253
- if executor == "future_adapter":
254
- adapters.append(FutureAdapter())
255
-
256
- dr = (
257
- driver.Builder()
258
- .enable_dynamic_execution(allow_experimental_mode=True)
259
- .with_modules(self._module)
260
- .with_config(config)
261
- .with_local_executor(executors.SynchronousLocalTaskExecutor())
262
- )
263
-
264
- if executor_ is not None:
265
-
266
- dr = dr.with_remote_executor(executor_)
267
-
268
- if len(adapters):
269
- dr = dr.with_adapters(*adapters)
270
-
271
- dr = dr.build()
272
- return dr, shutdown
273
-
274
- def run(
275
- self,
276
- name: str,
277
- inputs: dict | None = None,
278
- final_vars: list | None = None,
279
- config: dict | None = None,
280
- executor: str | None = None,
281
- with_tracker: bool | None = None,
282
- with_opentelemetry: bool | None = None,
283
- with_progressbar: bool | None = None,
284
- reload: bool = False,
285
- **kwargs,
286
- ) -> dict[str, Any]:
287
- """
288
- Run the pipeline with the given parameters.
289
-
290
- Args:
291
- name (str): The name of the pipeline.
292
- executor (str | None, optional): The executor to use. Defaults to None.
293
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
294
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
295
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
296
- Defaults to None.
297
- with_tracker (bool | None, optional): Whether to use a tracker. Defaults to None.
298
- with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry. Defaults to None.
299
- with_progressbar (bool | None, optional): Whether to use a progress bar. Defaults to None.
300
- reload (bool, optional): Whether to reload the pipeline. Defaults to False.
301
- **kwargs: Additional keyword arguments.
302
-
303
- Returns:
304
- dict[str,Any]: The result of executing the pipeline.
305
-
306
- Examples:
307
- ```python
308
- pm = PipelineManager()
309
- final_vars = pm.run("my_pipeline")
310
- ```
311
- """
312
- if not self.cfg.pipeline.name == name or reload:
313
- self.load_config(name=name, reload=reload)
314
-
315
- if reload or not hasattr(self, "_module"):
316
- self.load_module(name=name, reload=reload)
317
-
318
- logger.info(
319
- f"Starting pipeline {self.cfg.project.name}.{name}"
320
- ) # in environment {environment}")
321
-
322
- run_params = self.cfg.pipeline.run
323
-
324
- final_vars = final_vars or run_params.final_vars
325
- inputs = {
326
- **(run_params.inputs or {}),
327
- **(inputs or {}),
328
- } # <-- inputs override and adds to run_params
329
- config = {
330
- **(run_params.config or {}),
331
- **(config or {}),
332
- }
333
- for arg in [
334
- "executor",
335
- "with_tracker",
336
- "with_opentelemetry",
337
- "with_progressbar",
338
- ]:
339
- if eval(arg) is not None:
340
- kwargs[arg] = eval(arg)
341
- else:
342
- kwargs[arg] = getattr(run_params, arg)
343
-
344
- kwargs["config"] = config
345
-
346
- dr, shutdown = self._get_driver(
347
- name=name,
348
- **kwargs,
349
- )
350
-
351
- res = dr.execute(final_vars=final_vars, inputs=inputs)
352
-
353
- logger.success(f"Finished pipeline {self.cfg.project.name}.{name}")
354
-
355
- if shutdown is not None:
356
- shutdown()
357
-
358
- return res
359
-
360
- def run_job(
361
- self,
362
- name: str,
363
- inputs: dict | None = None,
364
- final_vars: list | None = None,
365
- config: dict | None = None,
366
- executor: str | None = None,
367
- with_tracker: bool | None = None,
368
- with_opentelemetry: bool | None = None,
369
- with_progressbar: bool | None = None,
370
- reload: bool = False,
371
- **kwargs,
372
- ) -> dict[str, Any]:
373
- """
374
- Add a job to run the pipeline with the given parameters to the worker.
375
- Executes the job immediatly and returns the result of the execution.
376
-
377
- Args:
378
- name (str): The name of the job.
379
- executor (str | None, optional): The executor to use for the job. Defaults to None.
380
- inputs (dict | None, optional): The inputs for the job. Defaults to None.
381
- final_vars (list | None, optional): The final variables for the job. Defaults to None.
382
- config (dict | None, optional): The configuration for the job. Defaults to None.
383
- with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
384
- with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
385
- with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
386
- reload (bool, optional): Whether to reload the job. Defaults to False.
387
- **kwargs: Additional keyword arguments.
388
-
389
- Returns:
390
- dict[str,Any]: The result of the job execution.
391
-
392
- Examples:
393
- ```python
394
- pm = PipelineManager()
395
- final_vars = pm.run_job("my_job")
396
- ```
397
- """
398
- if SchedulerManager is None:
399
- raise ValueError(
400
- "APScheduler4 not installed. Please install it first. "
401
- "Run `pip install 'flowerpower[scheduler]'`."
402
- )
403
-
404
- with SchedulerManager(
405
- name=f"{self.cfg.project.name}.{name}",
406
- fs=self._fs,
407
- role="scheduler",
408
- ) as sm:
409
- kwargs.update(
410
- {
411
- arg: eval(arg)
412
- for arg in [
413
- "name",
414
- "inputs",
415
- "final_vars",
416
- "config",
417
- "executor",
418
- "with_tracker",
419
- "with_opentelemetry",
420
- "with_progressbar",
421
- "reload",
422
- ]
423
- }
424
- )
425
- return sm.run_job(
426
- self.run,
427
- kwargs=kwargs,
428
- job_executor=(
429
- executor
430
- if executor in ["async", "threadpool", "processpool", ""]
431
- else "threadpool" if executor == "future_adapter" else "threadpool"
432
- ),
433
- )
434
-
435
- def add_job(
436
- self,
437
- name: str,
438
- inputs: dict | None = None,
439
- final_vars: list | None = None,
440
- config: dict | None = None,
441
- executor: str | None = None,
442
- with_tracker: bool | None = None,
443
- with_opentelemetry: bool | None = None,
444
- with_progressbar: bool | None = None,
445
- reload: bool = False,
446
- result_expiration_time: float | dt.timedelta = 0,
447
- **kwargs,
448
- ) -> UUID:
449
- """
450
- Add a job to run the pipeline with the given parameters to the worker data store.
451
- Executes the job immediatly and returns the job id (UUID). The job result will be stored in the data store
452
- for the given `result_expiration_time` and can be fetched using the job id (UUID).
453
-
454
- Args:
455
- name (str): The name of the job.
456
- executor (str | None, optional): The executor for the job. Defaults to None.
457
- inputs (dict | None, optional): The inputs for the job. Defaults to None.
458
- final_vars (list | None, optional): The final variables for the job. Defaults to None.
459
- config (dict | None, optional): The configuration for the job. Defaults to None.
460
- with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
461
- with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
462
- with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
463
- reload (bool, optional): Whether to reload the job. Defaults to False.
464
- result_expiration_time (float | dt.timedelta | None, optional): The result expiration time for the job.
465
- Defaults to None.
466
- **kwargs: Additional keyword arguments.
467
-
468
- Returns:
469
- UUID: The UUID of the added job.
470
-
471
- Examples:
472
- ```python
473
- pm = PipelineManager()
474
- job_id = pm.add_job("my_job")
475
- ```
476
- """
477
- if SchedulerManager is None:
478
- raise ValueError(
479
- "APScheduler4 not installed. Please install it first. "
480
- "Run `pip install 'flowerpower[scheduler]'`."
481
- )
482
-
483
- with SchedulerManager(
484
- name=f"{self.cfg.project.name}.{name}",
485
- fs=self._fs,
486
- role="scheduler",
487
- ) as sm:
488
- kwargs.update(
489
- {
490
- arg: eval(arg)
491
- for arg in [
492
- "name",
493
- "inputs",
494
- "final_vars",
495
- "config",
496
- "executor",
497
- "with_tracker",
498
- "with_opentelemetry",
499
- "with_progressbar",
500
- "reload",
501
- ]
502
- }
503
- )
504
- id_ = sm.add_job(
505
- self.run,
506
- kwargs=kwargs,
507
- job_executor=(
508
- executor
509
- if executor in ["async", "threadpool", "processpool", ""]
510
- else "threadpool" if executor == "future_adapter" else "threadpool"
511
- ),
512
- result_expiration_time=result_expiration_time,
513
- )
514
- rich.print(
515
- f"✅ Successfully added job for "
516
- f"[blue]{self.cfg.project.name}.{name}[/blue] with ID [green]{id_}[/green]"
517
- )
518
- return id_
519
-
520
- def schedule(
521
- self,
522
- name: str,
523
- inputs: dict | None = None,
524
- final_vars: list | None = None,
525
- config: dict | None = None,
526
- executor: str | None = None,
527
- with_tracker: bool | None = None,
528
- with_opentelemetry: bool | None = None,
529
- with_progressbar: bool | None = None,
530
- trigger_type: str | None = None,
531
- id_: str | None = None,
532
- paused: bool = False,
533
- coalesce: str = "latest",
534
- misfire_grace_time: float | dt.timedelta | None = None,
535
- max_jitter: float | dt.timedelta | None = None,
536
- max_running_jobs: int | None = None,
537
- conflict_policy: str = "do_nothing",
538
- overwrite: bool = False,
539
- **kwargs,
540
- ) -> str:
541
- """
542
- Schedule a pipeline for execution.
543
-
544
- Args:
545
- name (str): The name of the pipeline.
546
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
547
- trigger_type (str | None, optional): The type of trigger for the pipeline. Defaults to None.
548
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
549
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
550
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
551
- Defaults to None.
552
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
553
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
554
- Defaults to None.
555
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
556
- id_ (str | None, optional): The ID of the scheduled pipeline. Defaults to None.
557
- paused (bool, optional): Whether the pipeline should be initially paused. Defaults to False.
558
- coalesce (str, optional): The coalesce strategy for the pipeline. Defaults to "latest".
559
- misfire_grace_time (float | dt.timedelta | None, optional): The grace time for misfired jobs.
560
- Defaults to None.
561
- max_jitter (float | dt.timedelta | None, optional): The maximum number of seconds to randomly add to the
562
- scheduled. Defaults to None.
563
- max_running_jobs (int | None, optional): The maximum number of running jobs for the pipeline.
564
- Defaults to None.
565
- conflict_policy (str, optional): The conflict policy for the pipeline. Defaults to "do_nothing".
566
- job_result_expiration_time (float | dt.timedelta | None, optional): The result expiration time for the job.
567
- Defaults to None.
568
- overwrite (bool, optional): Whether to overwrite an existing schedule with the same name. Defaults to False.
569
- **kwargs: Additional keyword arguments for the trigger.
570
-
571
- Returns:
572
- str: The ID of the scheduled pipeline.
573
-
574
- Raises:
575
- ValueError: If APScheduler4 is not installed.
576
-
577
- Examples:
578
- ```python
579
- pm = PipelineManager()
580
- schedule_id = pm.schedule("my_pipeline")
581
- ```
582
- """
583
- if SchedulerManager is None:
584
- raise ValueError(
585
- "APScheduler4 not installed. Please install it first. "
586
- "Run `pip install 'flowerpower[scheduler]'`."
587
- )
588
-
589
- if not self.cfg.pipeline.name == name:
590
- self.load_config(name=name)
591
-
592
- schedule_cfg = self.cfg.pipeline.schedule # .copy()
593
- run_cfg = self.cfg.pipeline.run
594
-
595
- kwargs.update(
596
- {arg: eval(arg) or getattr(run_cfg, arg) for arg in run_cfg.to_dict()}
597
- )
598
- trigger_type = trigger_type or schedule_cfg.trigger.type_
599
-
600
- trigger_kwargs = {
601
- key: kwargs.pop(key, None)
602
- or getattr(getattr(schedule_cfg.trigger, trigger_type), key)
603
- for key in getattr(schedule_cfg.trigger, trigger_type).to_dict()
604
- }
605
-
606
- trigger_kwargs.pop("type_", None)
607
-
608
- schedule_kwargs = {
609
- arg: eval(arg) or getattr(schedule_cfg.run, arg)
610
- for arg in schedule_cfg.run.to_dict()
611
- }
612
- executor = executor or schedule_cfg.run.executor
613
- # id_ = id_ or schedule_cfg.run.id_
614
-
615
- def _get_id() -> str:
616
- if id_:
617
- return id_
618
-
619
- if overwrite:
620
- return f"{name}-1"
621
-
622
- ids = [schedule.id for schedule in self._get_schedules()]
623
- if any([name in id_ for id_ in ids]):
624
- id_num = sorted([id_ for id_ in ids if name in id_])[-1].split("-")[-1]
625
- return f"{name}-{int(id_num) + 1}"
626
- return f"{name}-1"
627
-
628
- id_ = _get_id()
629
-
630
- schedule_kwargs.pop("executor", None)
631
- schedule_kwargs.pop("id_", None)
632
-
633
- with SchedulerManager(
634
- name=f"{self.cfg.project.name}.{name}",
635
- fs=self._fs,
636
- role="scheduler",
637
- ) as sm:
638
- trigger = get_trigger(type_=trigger_type, **trigger_kwargs)
639
-
640
- if overwrite:
641
- sm.remove_schedule(id_)
642
-
643
- id_ = sm.add_schedule(
644
- func_or_task_id=self.run,
645
- trigger=trigger,
646
- id=id_,
647
- args=(name,), # inputs, final_vars, config, executor, with_tracker),
648
- kwargs=kwargs,
649
- job_executor=(
650
- executor
651
- if executor in ["async", "threadpool", "processpool", ""]
652
- else "threadpool" if executor == "future_adapter" else "threadpool"
653
- ),
654
- **schedule_kwargs,
655
- )
656
- rich.print(
657
- f"✅ Successfully added schedule for "
658
- f"[blue]{self.cfg.project.name}.{name}[/blue] with ID [green]{id_}[/green]"
659
- )
660
- return id_
661
-
662
- def schedule_all(
663
- self,
664
- inputs: dict | None = None,
665
- final_vars: list | None = None,
666
- config: dict | None = None,
667
- executor: str | None = None,
668
- with_tracker: bool | None = None,
669
- with_opentelemetry: bool | None = None,
670
- with_progressbar: bool | None = None,
671
- trigger_type: str | None = None,
672
- id_: str | None = None,
673
- paused: bool = False,
674
- coalesce: str = "latest",
675
- misfire_grace_time: float | dt.timedelta | None = None,
676
- max_jitter: float | dt.timedelta | None = None,
677
- max_running_jobs: int | None = None,
678
- conflict_policy: str = "do_nothing",
679
- overwrite: bool = False,
680
- **kwargs,
681
- ):
682
- pipelines = self._get_names()
683
- for name in pipelines:
684
- self.schedule(
685
- name=name,
686
- inputs=inputs,
687
- final_vars=final_vars,
688
- config=config,
689
- executor=executor,
690
- with_tracker=with_tracker,
691
- with_opentelemetry=with_opentelemetry,
692
- with_progressbar=with_progressbar,
693
- trigger_type=trigger_type,
694
- id_=id_,
695
- paused=paused,
696
- coalesce=coalesce,
697
- misfire_grace_time=misfire_grace_time,
698
- max_jitter=max_jitter,
699
- max_running_jobs=max_running_jobs,
700
- conflict_policy=conflict_policy,
701
- overwrite=overwrite,
702
- **kwargs,
703
- )
704
-
705
- def new(
706
- self,
707
- name: str,
708
- overwrite: bool = False,
709
- ):
710
- """
711
- Adds a pipeline with the given name.
712
-
713
- Args:
714
- name (str | None, optional): The name of the pipeline.
715
- Defaults to None.
716
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name. Defaults to False.
717
-
718
- Returns:
719
- None
720
-
721
- Raises:
722
- ValueError: If the configuration path or pipeline path does not exist.
723
-
724
- Examples:
725
- ```python
726
- pm = PipelineManager()
727
- pm.new("my_pipeline")
728
- ```
729
- """
730
- if not self._fs.exists(self._cfg_dir):
731
- raise ValueError(
732
- f"Configuration path {self._cfg_dir} does not exist. Please run flowerpower init first."
733
- )
734
- if not self._fs.exists(self._pipelines_dir):
735
- raise ValueError(
736
- f"Pipeline path {self._pipelines_dir} does not exist. Please run flowerpower init first."
737
- )
738
-
739
- if self._fs.exists(f"{self._pipelines_dir}/{name.replace('.', '/')}.py"):
740
- if overwrite:
741
- self._fs.rm(f"{self._pipelines_dir}/{name.replace('.', '/')}.py")
742
- else:
743
- raise ValueError(
744
- f"Pipeline {self.cfg.project.name}.{name.replace('.', '/')} already exists. "
745
- "Use `overwrite=True` to overwrite."
746
- )
747
- if self._fs.exists(f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml"):
748
- if overwrite:
749
- self._fs.rm(f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml")
750
- else:
751
- raise ValueError(
752
- f"Pipeline {self.cfg.project.name}.{name.replace('.', '/')} already exists. "
753
- "Use `overwrite=True` to overwrite."
754
- )
755
-
756
- pipeline_path = f"{self._pipelines_dir}/{name.replace('.', '/')}.py"
757
- cfg_path = f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml"
758
-
759
- self._fs.makedirs(pipeline_path.rsplit("/", 1)[0], exist_ok=True)
760
- self._fs.makedirs(cfg_path.rsplit("/", 1)[0], exist_ok=True)
761
-
762
- with self._fs.open(pipeline_path, "w") as f:
763
- f.write(
764
- PIPELINE_PY_TEMPLATE.format(
765
- name=name,
766
- date=dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
767
- )
768
- )
769
-
770
- self.cfg.pipeline = PipelineConfig(name=name)
771
- self.cfg.save()
772
-
773
- rich.print(
774
- f"🔧 Created new pipeline [bold blue]{self.cfg.project.name}.{name}[/bold blue]"
775
- )
776
-
777
- def import_pipeline(
778
- self,
779
- name: str,
780
- path: str,
781
- cfg_dir: str = "conf",
782
- pipelines_dir: str = "pipelines",
783
- storage_options: dict | Munch | BaseStorageOptions | None = None,
784
- fs: AbstractFileSystem | None = None,
785
- overwrite: bool = False,
786
- ):
787
- """Import a pipeline from a given path.
788
-
789
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
790
- Any readable fsspec filesystem is supported.
791
-
792
- Args:
793
- name (str): The name of the pipeline.
794
- path (str): The path to import the pipeline from.
795
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
796
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
797
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
798
- Defaults to None.
799
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
800
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
801
- Defaults to False.
802
-
803
- Examples:
804
- ```python
805
- pm = PipelineManager()
806
- pm.import(
807
- "s3://bucket/path",
808
- "my_pipeline",
809
- storage_options={
810
- "key": "my_key",
811
- "secret": "my_secret",
812
- "endpoint_url":"http://minio:9000"
813
- }
814
- )
815
- ```
816
- """
817
- if fs is not None:
818
- fs = get_filesystem(path, fs=fs)
819
- else:
820
- fs = get_filesystem(path, **storage_options)
821
-
822
- conf_path = f"{fs.fs.protocol}://{fs.path}/{cfg_dir}"
823
- pipeline_path = f"{fs.fs.protocol}://{fs.path}/{pipelines_dir}"
824
- if not fs.exists(cfg_dir):
825
- raise ValueError(f"Configuration path {conf_path} does not exist.")
826
- if not fs.exists(pipelines_dir):
827
- raise ValueError(f"Pipeline path {pipeline_path} does not exist.")
828
-
829
- if self._fs.exists(f"{pipelines_dir}/{name.replace('.', '/')}.py"):
830
- if overwrite:
831
- self._fs.rm(f"{pipelines_dir}/{name.replace('.', '/')}.py")
832
- else:
833
- raise ValueError(
834
- f"Pipeline {name} already exists at {self._fs.fs.protocol}://{fs.path}. "
835
- "Use `overwrite=True` to overwrite."
836
- )
837
- if self._fs.exists(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml"):
838
- if overwrite:
839
- self._fs.rm(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml")
840
- else:
841
- raise ValueError(
842
- f"Pipeline {name} already exists at {self._fs.fs.protocol}://{fs.path}. "
843
- "Use `overwrite=True` to overwrite."
844
- )
845
-
846
- self._fs.write_bytes(
847
- f"{self._pipelines_dir}/{name.replace('.', '/')}.py",
848
- fs.read_bytes(f"{pipelines_dir}/{name.replace('.', '/')}.py"),
849
- )
850
- self._fs.write_bytes(
851
- f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
852
- fs.read_bytes(f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml"),
853
- )
854
- # fs.get(
855
- # f"{pipelines_dir}/{name.replace('.', '/')}.py",
856
- # f"{self._pipelines_dir}/{name.replace('.', '/')}.py",
857
- # )
858
- # fs.get(
859
- # f"{cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
860
- # f"{self._cfg_dir}/pipelines/{name.replace('.', '/')}.yml",
861
- # )
862
-
863
- rich.print(
864
- f"🔧 Imported pipeline [bold blue]{name}[/bold blue] from {fs.fs.protocol}://{fs.path}"
865
- )
866
-
867
- def import_many(
868
- self,
869
- names: list[str],
870
- path: str,
871
- cfg_dir: str = "conf",
872
- pipelines_dir: str = "pipelines",
873
- storage_options: dict | Munch | BaseStorageOptions | None = None,
874
- fs: AbstractFileSystem | None = None,
875
- overwrite: bool = False,
876
- ):
877
- """Import many pipelines from a given path.
878
-
879
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
880
- Any readable fsspec filesystem is supported.
881
-
882
- Args:
883
- names (list[str]): The names of the pipelines.
884
- path (str): The path to import the pipelines from.
885
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
886
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
887
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
888
- Defaults to None.
889
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
890
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
891
- Defaults to False.
892
-
893
- Examples:
894
- ```python
895
- pm = PipelineManager()
896
- pm.import_many(
897
- "s3://bucket/path",
898
- ["my_pipeline", "my_pipeline2"],
899
- storage_options={
900
- "key": "my_key",
901
- "secret": "my_secret",
902
- "endpoint_url":"http://minio:9000"
903
- }
904
- )
905
- ```
906
- """
907
- for name in names:
908
- self.import_pipeline(
909
- path=path,
910
- name=name,
911
- cfg_dir=cfg_dir,
912
- pipelines_dir=pipelines_dir,
913
- storage_options=storage_options,
914
- fs=fs,
915
- overwrite=overwrite,
916
- )
917
-
918
- def import_all(
919
- self,
920
- path: str,
921
- cfg_dir: str = "conf",
922
- pipelines_dir: str = "pipelines",
923
- storage_options: dict | Munch | BaseStorageOptions | None = None,
924
- fs: AbstractFileSystem | None = None,
925
- overwrite: bool = False,
926
- ):
927
- """Import all pipelines from a given path.
928
-
929
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
930
- Any readable fsspec filesystem is supported.
931
-
932
- Args:
933
- path (str): The path to import the pipelines from.
934
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
935
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
936
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
937
- Defaults to None.
938
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
939
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
940
- Defaults to False.
941
-
942
- Examples:
943
- ```python
944
- pm = PipelineManager()
945
- pm.import_all(
946
- "s3://bucket/path",
947
- storage_options={
948
- "key": "my_key",
949
- "secret": "my_secret",
950
- endpoint_url="http://minio:9000"
951
- }
952
- )
953
- ```
954
- """
955
- names = [
956
- fn.replace(pipelines_dir, "").lstrip("/").rstric(".py").replace("/", ".")
957
- for fn in fs.glob(f"{pipelines_dir}/**/*.py")
958
- ]
959
- self.import_many(
960
- path=path,
961
- names=names,
962
- cfg_dir=cfg_dir,
963
- pipelines_dir=pipelines_dir,
964
- storage_options=storage_options,
965
- fs=fs,
966
- overwrite=overwrite,
967
- )
968
-
969
- def export(
970
- self,
971
- name: str,
972
- path: str,
973
- cfg_dir: str = "conf",
974
- pipelines_dir: str = "pipelines",
975
- storage_options: dict | Munch | BaseStorageOptions | None = None,
976
- fs: AbstractFileSystem | None = None,
977
- overwrite: bool = False,
978
- ):
979
- """Export a pipeline to a given path.
980
-
981
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
982
- Any writable fsspec filesystem is supported.
983
-
984
- Args:
985
- name (str): The name of the pipeline.
986
- path (str): The path to export the pipeline to.
987
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
988
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
989
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
990
- Defaults to None.
991
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
992
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
993
- Defaults to False.
994
-
995
- Examples:
996
- ```python
997
- pm = PipelineManager()
998
- pm.export(
999
- "my_pipeline",
1000
- "s3://bucket/path",
1001
- storage_options={
1002
- "key": "my_key",
1003
- "secret": "my_secret",
1004
- "endpoint_url":"http://minio:9000"
1005
- }
1006
- )
1007
- ```
1008
- """
1009
- fs = fs or get_filesystem(path, **storage_options)
1010
-
1011
- if fs.exists(posixpath.join(pipelines_dir, name.replace(".", "/") + ".py")):
1012
- if overwrite:
1013
- fs.rm(posixpath.join(pipelines_dir, name.replace(".", "/") + ".py"))
1014
- else:
1015
- raise ValueError(
1016
- f"Pipeline {name} already exists at {fs.fs.protocol}://{fs.path}. "
1017
- "Use `overwrite=True` to overwrite."
1018
- )
1019
- if fs.exists(
1020
- posixpath.join(cfg_dir, "pipelines", name.replace(".", "/") + ".yml")
1021
- ):
1022
- if overwrite:
1023
- fs.rm(
1024
- posixpath.join(
1025
- cfg_dir, "pipelines", name.replace(".", "/") + ".yml"
1026
- )
1027
- )
1028
- else:
1029
- raise ValueError(
1030
- f"Pipeline {name} already exists at {fs.fs.protocol}://{fs.path}. "
1031
- "Use `overwrite=True` to overwrite."
1032
- )
1033
-
1034
- fs.put_file(
1035
- posixpath.join(self._pipelines_dir, name.replace(".", "/") + ".py"),
1036
- posixpath.join(pipelines_dir, name.replace(".", "/") + ".py"),
1037
- )
1038
-
1039
- fs.put_file(
1040
- posixpath.join(self._cfg_dir, "pipelines", name.replace(".", "/") + ".yml"),
1041
- posixpath.join(cfg_dir, "pipelines", name.replace(".", "/") + ".yml"),
1042
- )
1043
-
1044
- rich.print(
1045
- f"🔧 Exported pipeline [bold blue]{name}[/bold blue] to {fs.fs.protocol}://{fs.path}"
1046
- )
1047
-
1048
- def export_many(
1049
- self,
1050
- path: str,
1051
- names: list[str],
1052
- cfg_dir: str = "conf",
1053
- pipelines_dir: str = "pipelines",
1054
- storage_options: dict | Munch | BaseStorageOptions | None = None,
1055
- fs: AbstractFileSystem | None = None,
1056
- overwrite: bool = False,
1057
- ):
1058
- """Export many pipelines to a given path.
1059
-
1060
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
1061
- Any writable fsspec filesystem is supported.
1062
-
1063
- Args:
1064
- path (str): The path to export the pipelines to.
1065
- names (list[str]): The names of the pipelines.
1066
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
1067
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines
1068
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
1069
- Defaults to None.
1070
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1071
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
1072
- Defaults to False.
1073
-
1074
- Examples:
1075
- ```python
1076
- pm = PipelineManager()
1077
- pm.export_many(
1078
- "s3://bucket/path",
1079
- ["my_pipeline", "my_pipeline2"],
1080
- storage_options={
1081
- "key": "my_key",
1082
- "secret": "my_secret",
1083
- "endpoint_url":"http://minio:9000"
1084
- }
1085
- )
1086
- """
1087
- for name in names:
1088
- self.export(
1089
- path=path,
1090
- name=name,
1091
- cfg_dir=cfg_dir,
1092
- pipelines_dir=pipelines_dir,
1093
- storage_options=storage_options,
1094
- fs=fs,
1095
- overwrite=overwrite,
1096
- )
1097
-
1098
- def export_all(
1099
- self,
1100
- path: str,
1101
- cfg_dir: str = "conf",
1102
- pipelines_dir: str = "pipelines",
1103
- storage_options: dict | Munch | BaseStorageOptions | None = None,
1104
- fs: AbstractFileSystem | None = None,
1105
- overwrite: bool = False,
1106
- ):
1107
- """Export all pipelines to a given path.
1108
-
1109
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
1110
- Any writable fsspec filesystem is supported.
1111
-
1112
- Args:
1113
- path (str): The path to export the pipelines to.
1114
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
1115
- Defaults to None.
1116
- cfg_dir (str, optional): The configuration directory. Defaults to "conf".
1117
- pipelines_dir (str, optional): The pipeline directory. Defaults to "pipelines".
1118
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1119
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
1120
- Defaults to False.
1121
-
1122
- Examples:
1123
- ```python
1124
- pm = PipelineManager()
1125
- pm.export_all(
1126
- "s3://bucket/path",
1127
- storage_options={
1128
- "key": "my_key",
1129
- "secret": "my_secret",
1130
- "endpoint_url":"http://minio:9000"
1131
- }
1132
- )
1133
- """
1134
- names = [
1135
- fn.replace(self._pipelines_dir, "")
1136
- .lstrip("/")
1137
- .rstric(".py")
1138
- .replace("/", ".")
1139
- for fn in self._fs.glob(f"{self._pipelines_dir}/**/*.py")
1140
- ]
1141
- self.export_many(
1142
- path=path,
1143
- names=names,
1144
- cfg_dir=cfg_dir,
1145
- pipelines_dir=pipelines_dir,
1146
- storage_options=storage_options,
1147
- fs=fs,
1148
- overwrite=overwrite,
1149
- )
1150
-
1151
- def delete(self, name: str, cfg: bool = True, module: bool = False):
1152
- """
1153
- Delete a pipeline.
1154
-
1155
- Args:
1156
- name (str): The name of the pipeline to delete.
1157
- cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
1158
- module (bool, optional): Whether to delete the pipeline module file. Defaults to False.
1159
-
1160
- Returns:
1161
- None
1162
-
1163
- Examples:
1164
- ```python
1165
- pm = PipelineManager()
1166
- pm.delete("my_pipeline")
1167
- ```
1168
- """
1169
-
1170
- if cfg:
1171
- if self._fs.exists(f"{self._cfg_dir}/pipelines/{name}.yml"):
1172
- self._fs.rm(f"{self._cfg_dir}/pipelines/{name}.yml")
1173
- rich.print(f"🗑️ Deleted pipeline config for {name}")
1174
-
1175
- if module:
1176
- if self._fs.exists(f"{self._pipelines_dir}/{name}.py"):
1177
- self._fs.rm(f"{self._pipelines_dir}/{name}.py")
1178
- rich.print(
1179
- f"🗑️ Deleted pipeline config for {self.cfg.project.name}.{name}"
1180
- )
1181
-
1182
- def _display_all_function(self, name: str, reload: bool = True):
1183
- dr, _ = self._get_driver(
1184
- name=name, executor=None, with_tracker=False, reload=reload
1185
- )
1186
- return dr.display_all_functions()
1187
-
1188
- def save_dag(
1189
- self,
1190
- name: str,
1191
- format: str = "png",
1192
- reload: bool = False,
1193
- ):
1194
- """
1195
- Save a image of the graph of functions for a given name.
1196
-
1197
- Args:
1198
- name (str): The name of the graph.
1199
- format (str, optional): The format of the graph file. Defaults to "png".
1200
- reload (bool, optional): Whether to reload the graph data. Defaults to False.
1201
-
1202
- Returns:
1203
- None
1204
-
1205
- Examples:
1206
- ```python
1207
- pm = PipelineManager()
1208
- pm.save_dag("my_pipeline")
1209
- ```
1210
- """
1211
- dag = self._display_all_function(name=name, reload=reload)
1212
-
1213
- self._fs.makedirs("graphs", exist_ok=True)
1214
- dag.render(
1215
- posixpath.join(self._base_dir, f"graphs/{name}"),
1216
- format=format,
1217
- cleanup=True,
1218
- )
1219
- rich.print(
1220
- f"📊 Saved graph for {name} to {self._base_dir}/graphs/{name}.{format}"
1221
- )
1222
-
1223
- def show_dag(
1224
- self,
1225
- name: str,
1226
- format: str = "png",
1227
- reload: bool = False,
1228
- raw: bool = False,
1229
- ):
1230
- """
1231
- Display the graph of functions for a given name. By choosing the `raw` option, the graph object is returned.
1232
- The choosen format defines, which application is used to display the graph.
1233
-
1234
- Args:
1235
- name (str): The name of the graph.
1236
- format (str, optional): The format of the graph file. Defaults to "png".
1237
- show (bool, optional): Whether to open the graph file after generating it. Defaults to False.
1238
- reload (bool, optional): Whether to reload the graph data. Defaults to False.
1239
- raw (bool, optional): Whether to return the graph object. Defaults to False.
1240
-
1241
- Returns:
1242
- graph: The generated graph object.
1243
-
1244
- Examples:
1245
- ```python
1246
- pm = PipelineManager()
1247
- pm.show_dag("my_pipeline")
1248
- ```
1249
- """
1250
- dag = self._display_all_function(name=name, reload=reload)
1251
- if raw:
1252
- return dag
1253
- view_img(dag.pipe(format), format=format)
1254
-
1255
- def _get_files(self) -> list[str]:
1256
- """
1257
- Get the pipeline files.
1258
-
1259
- Returns:
1260
- list[str]: A list of pipeline files.
1261
- """
1262
- return [f for f in self._fs.ls(self._pipelines_dir) if f.endswith(".py")]
1263
-
1264
- def _get_names(self) -> list[str]:
1265
- """
1266
- Get the pipeline names.
1267
-
1268
- Returns:
1269
- list[str]: A list of pipeline names.
1270
- """
1271
- return [posixpath.splitext(posixpath.basename(f))[0] for f in self._get_files()]
1272
-
1273
- def get_summary(
1274
- self, name: str | None = None, cfg: bool = True, module: bool = True
1275
- ) -> dict[str, dict | str]:
1276
- """
1277
- Get a summary of the pipelines.
1278
-
1279
- Args:
1280
- name (str | None, optional): The name of the pipeline. Defaults to None.
1281
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
1282
- module (bool, optional): Whether to show the module. Defaults to True.
1283
- Returns:
1284
- dict[str, dict | str]: A dictionary containing the pipeline summary.
1285
-
1286
- Examples:
1287
- ```python
1288
- pm = PipelineManager()
1289
- summary=pm.get_summary()
1290
- ```
1291
- """
1292
- if name:
1293
- pipeline_names = [name]
1294
- else:
1295
- pipeline_names = self._get_names()
1296
-
1297
- pipeline_summary = {}
1298
- for name in pipeline_names:
1299
- self.load_config(name)
1300
- if cfg:
1301
- pipeline_summary[name] = {"cfg": self.cfg.pipeline.to_dict()}
1302
- if module:
1303
- pipeline_summary[name].update(
1304
- {
1305
- "module": self._fs.cat(
1306
- f"{self._pipelines_dir}/{name}.py"
1307
- ).decode(),
1308
- }
1309
- )
1310
- return pipeline_summary
1311
-
1312
- def show_summary(
1313
- self,
1314
- name: str | None = None,
1315
- cfg: bool = True,
1316
- module: bool = True,
1317
- to_html: bool = False,
1318
- to_svg: bool = False,
1319
- ) -> None | str:
1320
- """
1321
- Show a summary of the pipelines.
1322
-
1323
- Args:
1324
- name (str | None, optional): The name of the pipeline. Defaults to None.
1325
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
1326
- module (bool, optional): Whether to show the module. Defaults to True.
1327
- to_html (bool, optional): Whether to export the summary to HTML. Defaults to False.
1328
- to_svg (bool, optional): Whether to export the summary to SVG. Defaults to False.
1329
-
1330
- Returns:
1331
- None | str: The summary of the pipelines. If `to_html` is True, returns the HTML string.
1332
- If `to_svg` is True, returns the SVG string.
1333
-
1334
- Examples:
1335
- ```python
1336
- pm = PipelineManager()
1337
- pm.show_summary()
1338
- ```
1339
- """
1340
-
1341
- pipeline_summary = self.get_summary(name=name, cfg=cfg, module=module)
1342
-
1343
- def add_dict_to_tree(tree, dict_data, style="green"):
1344
- for key, value in dict_data.items():
1345
- if isinstance(value, dict):
1346
- branch = tree.add(f"[cyan]{key}:", style="bold cyan")
1347
- add_dict_to_tree(branch, value, style)
1348
- else:
1349
- tree.add(f"[cyan]{key}:[/] [green]{value}[/]")
1350
-
1351
- console = Console()
1352
- for pipeline, info in pipeline_summary.items():
1353
- # Create tree for config
1354
- config_tree = Tree("📋 Configuration", style="bold magenta")
1355
- add_dict_to_tree(config_tree, info["cfg"])
1356
-
1357
- # Create syntax-highlighted code view
1358
- code_view = Syntax(
1359
- info["module"],
1360
- "python",
1361
- theme="default",
1362
- line_numbers=False,
1363
- word_wrap=True,
1364
- code_width=80,
1365
- padding=2,
1366
- )
1367
-
1368
- if cfg:
1369
- # console.print(f"🔄 Pipeline: {pipeline}", style="bold blue")
1370
- console.print(
1371
- Panel(
1372
- config_tree,
1373
- title=f"🔄 Pipeline: {pipeline}",
1374
- subtitle="Configuration",
1375
- border_style="blue",
1376
- padding=(2, 2),
1377
- )
1378
- )
1379
- console.print("\n")
1380
-
1381
- if module:
1382
- # console.print(f"🔄 Pipeline: {pipeline}", style="bold blue")
1383
- console.print(
1384
- Panel(
1385
- code_view,
1386
- title=f"🔄 Pipeline: {pipeline}",
1387
- subtitle="Module",
1388
- border_style="blue",
1389
- padding=(2, 2),
1390
- )
1391
- )
1392
- console.print("\n")
1393
- if to_html:
1394
- return console.export_html()
1395
- elif to_svg:
1396
- return console.export_svg()
1397
-
1398
- @property
1399
- def summary(self) -> dict[str, dict | str]:
1400
- """
1401
- Get a summary of the pipelines.
1402
-
1403
- Returns:
1404
- dict: A dictionary containing the pipeline summary.
1405
- """
1406
- return self.get_summary()
1407
-
1408
- def _all_pipelines(
1409
- self, show: bool = True, to_html: bool = False, to_svg: bool = False
1410
- ) -> list[str] | None:
1411
- """
1412
- Print all available pipelines in a formatted table.
1413
-
1414
- Args:
1415
- show (bool, optional): Whether to print the table. Defaults to True.
1416
- to_html (bool, optional): Whether to export the table to HTML. Defaults to False.
1417
- to_svg (bool, optional): Whether to export the table to SVG. Defaults to False.
1418
-
1419
- Returns:
1420
- list[str] | None: A list of pipeline names if `show` is False.
1421
-
1422
- Examples:
1423
- ```python
1424
- pm = PipelineManager()
1425
- all_pipelines = pm._pipelines(show=False)
1426
- ```
1427
- """
1428
- if to_html or to_svg:
1429
- show = True
1430
-
1431
- pipeline_files = [
1432
- f for f in self._fs.ls(self._pipelines_dir) if f.endswith(".py")
1433
- ]
1434
- pipeline_names = [
1435
- posixpath.splitext(f)[0]
1436
- .replace(self._pipelines_dir, "")
1437
- .lstrip("/")
1438
- .replace("/", ".")
1439
- for f in pipeline_files
1440
- ]
1441
-
1442
- if not pipeline_files:
1443
- rich.print("[yellow]No pipelines found[/yellow]")
1444
- return
1445
-
1446
- pipeline_info = []
1447
-
1448
- for path, name in zip(pipeline_files, pipeline_names):
1449
- # path = posixpath.join( f)
1450
- try:
1451
- mod_time = self._fs.modified(path).strftime("%Y-%m-%d %H:%M:%S")
1452
- except NotImplementedError:
1453
- mod_time = "N/A"
1454
- size = f"{self._fs.size(path) / 1024:.1f} KB"
1455
- pipeline_info.append(
1456
- {"name": name, "path": path, "mod_time": mod_time, "size": size}
1457
- )
1458
-
1459
- if show:
1460
- table = Table(title="Available Pipelines")
1461
- table.add_column("Pipeline Name", style="blue")
1462
- table.add_column("Path", style="magenta")
1463
- table.add_column("Last Modified", style="green")
1464
- table.add_column("Size", style="cyan")
1465
-
1466
- for info in pipeline_info:
1467
- table.add_row(
1468
- info["name"], info["path"], info["mod_time"], info["size"]
1469
- )
1470
- console = Console(record=True)
1471
- console.print(table)
1472
- if to_html:
1473
- return console.export_html()
1474
- elif to_svg:
1475
- return console.export_svg()
1476
-
1477
- else:
1478
- return pipeline_info
1479
-
1480
- def show_pipelines(self) -> None:
1481
- """
1482
- Print all available pipelines in a formatted table.
1483
-
1484
- Examples:
1485
- ```python
1486
- pm = PipelineManager()
1487
- pm.show_pipelines()
1488
- ```
1489
- """
1490
- self._all_pipelines(show=True)
1491
-
1492
- def list_pipelines(self) -> list[str]:
1493
- """
1494
- Get a list of all available pipelines.
1495
-
1496
- Returns:
1497
- list[str] | None: A list of pipeline names.
1498
-
1499
- Examples:
1500
- ```python
1501
- pm = PipelineManager()
1502
- pipelines = pm.list_pipelines()
1503
- ```
1504
- """
1505
- return self._all_pipelines(show=False)
1506
-
1507
- @property
1508
- def pipelines(self) -> list[str]:
1509
- """
1510
- Get a list of all available pipelines.
1511
-
1512
- Returns:
1513
- list[str] | None: A list of pipeline names.
1514
-
1515
- Examples:
1516
- ```python
1517
- pm = PipelineManager()
1518
- pipelines = pm.pipelines
1519
- ```
1520
- """
1521
- return self._all_pipelines(show=False)
1522
-
1523
-
1524
- class Pipeline:
1525
- def __init__(
1526
- self,
1527
- name: str,
1528
- base_dir: str | None = None,
1529
- storage_options: dict | Munch | BaseStorageOptions = {},
1530
- fs: AbstractFileSystem | None = None,
1531
- ):
1532
- """
1533
- Initializes the Pipeline object.
1534
-
1535
- Args:
1536
- name (str): The name of the pipeline.
1537
- base_dir (str | None): The flowerpower base path. Defaults to None.
1538
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
1539
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1540
-
1541
- Returns:
1542
- None
1543
- """
1544
- # super().__init__(base_dir=base_dir, storage_options=storage_options, fs=fs)
1545
- self.name = name
1546
- self._base_dir = base_dir or os.getcwd()
1547
- self._storage_options = storage_options or {}
1548
- if fs is None:
1549
- fs = get_filesystem(self._base_dir, **self._storage_options)
1550
- self._fs = fs
1551
- # self.load_module()
1552
- # self.load_config(name)
1553
-
1554
- def __enter__(self) -> "PipelineManager":
1555
- return self
1556
-
1557
- def __exit__(
1558
- self,
1559
- exc_type: type[BaseException] | None,
1560
- exc_val: BaseException | None,
1561
- exc_tb: TracebackType | None,
1562
- ) -> None:
1563
- # Add any cleanup code here if needed
1564
- pass
1565
-
1566
- def run(
1567
- self,
1568
- inputs: dict | None = None,
1569
- final_vars: list | None = None,
1570
- config: dict | None = None,
1571
- executor: str | None = None,
1572
- with_tracker: bool = False,
1573
- with_opentelemetry: bool = False,
1574
- with_progressbar: bool = False,
1575
- reload: bool = False,
1576
- **kwargs,
1577
- ) -> dict[str, Any]:
1578
- """Run the pipeline.
1579
-
1580
- Args:
1581
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1582
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1583
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1584
- Defaults to None.
1585
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1586
- with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
1587
- with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline.
1588
- Defaults to False.
1589
- with_progressbar (bool, optional): Whether to include a progress bar for the pipeline.
1590
- reload (bool, optional): Whether to reload the pipeline. Defaults to False.
1591
-
1592
- Returns:
1593
- dict[str, Any]: The final variables for the pipeline.
1594
-
1595
- Examples:
1596
- ```python
1597
- p = Pipeline("my_pipeline")
1598
- final_vars = p.run()
1599
- ```
1600
- """
1601
- with PipelineManager(
1602
- base_dir=self._base_dir,
1603
- fs=self._fs,
1604
- ) as pm:
1605
- return pm.run(
1606
- name=self.name,
1607
- executor=executor,
1608
- inputs=inputs,
1609
- final_vars=final_vars,
1610
- config=config,
1611
- with_tracker=with_tracker,
1612
- with_opentelemetry=with_opentelemetry,
1613
- with_progressbar=with_progressbar,
1614
- reload=reload,
1615
- **kwargs,
1616
- )
1617
-
1618
- def run_job(
1619
- self,
1620
- inputs: dict | None = None,
1621
- final_vars: list | None = None,
1622
- config: dict | None = None,
1623
- executor: str | None = None,
1624
- with_tracker: bool | None = None,
1625
- with_opentelemetry: bool | None = None,
1626
- with_progressbar: bool | None = None,
1627
- **kwargs,
1628
- ) -> dict[str, Any]:
1629
- """Run the pipeline as a job.
1630
-
1631
- Args:
1632
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1633
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1634
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1635
- Defaults to None.
1636
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1637
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
1638
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
1639
- Defaults to None.
1640
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
1641
- Defaults to None.
1642
-
1643
- Returns:
1644
- dict[str, Any]: The final variables for the pipeline.
1645
-
1646
- Examples:
1647
- ```python
1648
- p = Pipeline("my_pipeline")
1649
- final_vars = p.run_job()
1650
- ```
1651
- """
1652
- with PipelineManager(
1653
- base_dir=self._base_dir,
1654
- fs=self._fs,
1655
- ) as pm:
1656
- return pm.run_job(
1657
- name=self.name,
1658
- executor=executor,
1659
- inputs=inputs,
1660
- final_vars=final_vars,
1661
- config=config,
1662
- with_tracker=with_tracker,
1663
- with_opentelemetry=with_opentelemetry,
1664
- with_progressbar=with_progressbar,
1665
- **kwargs,
1666
- )
1667
-
1668
- def add_job(
1669
- self,
1670
- inputs: dict | None = None,
1671
- final_vars: list | None = None,
1672
- config: dict | None = None,
1673
- executor: str | None = None,
1674
- with_tracker: bool | None = None,
1675
- with_opentelemetry: bool | None = None,
1676
- with_progressbar: bool | None = None,
1677
- result_expiration_time: float | dt.timedelta = 0,
1678
- **kwargs,
1679
- ) -> UUID:
1680
- """Add a job for the pipeline.
1681
-
1682
- Args:
1683
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1684
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1685
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1686
- Defaults to None.
1687
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1688
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
1689
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
1690
- Defaults to None.
1691
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
1692
- result_expiration_time (float | dt.timedelta, optional): The result expiration time. Defaults to 0.
1693
-
1694
- Returns:
1695
- UUID: The job ID.
1696
-
1697
- Examples:
1698
- ```python
1699
- p = Pipeline("my_pipeline")
1700
- job_id = p.add_job()
1701
- ```
1702
- """
1703
- with PipelineManager(
1704
- base_dir=self._base_dir,
1705
- fs=self._fs,
1706
- ) as pm:
1707
- return pm.add_job(
1708
- name=self.name,
1709
- executor=executor,
1710
- inputs=inputs,
1711
- final_vars=final_vars,
1712
- config=config,
1713
- with_tracker=with_tracker,
1714
- with_opentelemetry=with_opentelemetry,
1715
- with_progressbar=with_progressbar,
1716
- result_expiration_time=result_expiration_time,
1717
- **kwargs,
1718
- )
1719
-
1720
- def schedule(
1721
- self,
1722
- trigger_type: str | None = None,
1723
- inputs: dict | None = None,
1724
- final_vars: list | None = None,
1725
- config: dict | None = None,
1726
- executor: str | None = None,
1727
- with_tracker: bool = False,
1728
- with_opentelemetry: bool = False,
1729
- with_progressbar: bool = False,
1730
- paused: bool = False,
1731
- coalesce: str = "latest",
1732
- misfire_grace_time: float | dt.timedelta | None = None,
1733
- max_jitter: float | dt.timedelta | None = None,
1734
- max_running_jobs: int | None = None,
1735
- conflict_policy: str = "do_nothing",
1736
- **kwargs,
1737
- ) -> str:
1738
- """Schedule the pipeline.
1739
-
1740
- Args:
1741
- trigger_type (str | None, optional): The trigger type for the schedule. Defaults to None.
1742
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1743
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1744
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1745
- Defaults to None.
1746
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1747
- with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
1748
- with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline. Defaults to False.
1749
- with_progressbar (bool, optional): Whether to include a progress bar for the pipeline. Defaults to False.
1750
- paused (bool, optional): Whether to pause the schedule. Defaults to False.
1751
- coalesce (str, optional): The coalesce strategy. Defaults to "latest".
1752
- misfire_grace_time (float | dt.timedelta | None, optional): The misfire grace time. Defaults to None.
1753
- max_jitter (float | dt.timedelta | None, optional): The max jitter. Defaults to None.
1754
- max_running_jobs (int | None, optional): The max running jobs. Defaults to None.
1755
- conflict_policy (str, optional): The conflict policy. Defaults to "do_nothing".
1756
- **kwargs: Additional keyword arguments.
1757
-
1758
- Returns:
1759
- str: The schedule ID.
1760
-
1761
- Examples:
1762
- ```python
1763
- p = Pipeline("my_pipeline")
1764
- schedule_id = p.schedule()
1765
- ```
1766
-
1767
- """
1768
- with PipelineManager(
1769
- base_dir=self._base_dir,
1770
- fs=self._fs,
1771
- ) as pm:
1772
- return pm.schedule(
1773
- name=self.name,
1774
- executor=executor,
1775
- trigger_type=trigger_type,
1776
- inputs=inputs,
1777
- final_vars=final_vars,
1778
- with_tracker=with_tracker,
1779
- with_opentelemetry=with_opentelemetry,
1780
- with_progressbar=with_progressbar,
1781
- paused=paused,
1782
- coalesce=coalesce,
1783
- misfire_grace_time=misfire_grace_time,
1784
- max_jitter=max_jitter,
1785
- max_running_jobs=max_running_jobs,
1786
- conflict_policy=conflict_policy,
1787
- **kwargs,
1788
- )
1789
-
1790
- def export(
1791
- self,
1792
- path: str,
1793
- storage_options: dict | Munch | BaseStorageOptions | None = None,
1794
- fs: AbstractFileSystem | None = None,
1795
- overwrite: bool = False,
1796
- ):
1797
- """Export the pipeline to a given path.
1798
-
1799
- The path could be a local path or a remote path like an S3 bucket or GitHub repository.
1800
- Any writable fsspec filesystem is supported.
1801
-
1802
- Args:
1803
- path (str): The path to export the pipeline to.
1804
- storage_options (dict | Munch | BaseStorageOptions | None, optional): The storage options.
1805
- Defaults to None.
1806
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1807
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name.
1808
- Defaults to False.
1809
-
1810
- Examples:
1811
- ```python
1812
- p = Pipeline("my_pipeline")
1813
- p.export("s3://bucket/path")
1814
- ```
1815
- """
1816
- with PipelineManager(
1817
- base_dir=self._base_dir,
1818
- fs=self._fs,
1819
- ) as pm:
1820
- pm.export(
1821
- name=self.name,
1822
- path=path,
1823
- storage_options=storage_options,
1824
- fs=fs,
1825
- overwrite=overwrite,
1826
- )
1827
-
1828
- def delete(self, cfg: bool = True, module: bool = False):
1829
- """Delete the pipeline.
1830
-
1831
- Args:
1832
- cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
1833
- module (bool, optional): Whether to delete the pipeline module file.
1834
- Defaults to False.
1835
-
1836
- Examples:
1837
- ```python
1838
- p = Pipeline("my_pipeline")
1839
- p.delete()
1840
- ```
1841
- """
1842
- with PipelineManager(
1843
- base_dir=self._base_dir,
1844
- fs=self._fs,
1845
- ) as pm:
1846
- pm.delete(self.name, cfg=cfg, module=module)
1847
-
1848
- def save_dag(self, format="png"):
1849
- """Save a image of the graph of functions for a given name.
1850
-
1851
- Args:
1852
- format (str, optional): The format of the graph file. Defaults to "png".
1853
-
1854
- Examples:
1855
- ```python
1856
- p = Pipeline("my_pipeline")
1857
- p.save_dag()
1858
- ```
1859
- """
1860
- with PipelineManager(
1861
- base_dir=self._base_dir,
1862
- fs=self._fs,
1863
- ) as pm:
1864
- pm.save_dag(self.name, format)
1865
-
1866
- def show_dag(
1867
- self,
1868
- ):
1869
- """Display the graph of functions for a given name.
1870
-
1871
- Examples:
1872
- ```python
1873
- p = Pipeline("my_pipeline")
1874
- p.show_dag()
1875
- ```
1876
- """
1877
- with PipelineManager(
1878
- base_dir=self._base_dir,
1879
- fs=self._fs,
1880
- ) as pm:
1881
- return pm.show_dag(self.name)
1882
-
1883
- def get_summary(
1884
- self, cfg: bool = True, module: bool = True
1885
- ) -> dict[str, dict | str]:
1886
- """Get a summary of the pipeline.
1887
-
1888
- Args:
1889
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
1890
- module (bool, optional): Whether to show the module. Defaults to True.
1891
-
1892
- Returns:
1893
- dict[str, dict | str]: A dictionary containing the pipeline summary.
1894
-
1895
- Examples:
1896
- ```python
1897
- p = Pipeline("my_pipeline")
1898
- summary = p.get_summary()
1899
- ```
1900
- """
1901
- with PipelineManager(
1902
- base_dir=self._base_dir,
1903
- fs=self._fs,
1904
- ) as pm:
1905
- return pm.get_summary(self.name, cfg=cfg, module=module)[self.name]
1906
-
1907
- def show_summary(self, cfg: bool = True, module: bool = True):
1908
- """Show a summary of the pipeline.
1909
-
1910
- Args:
1911
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
1912
- module (bool, optional): Whether to show the module. Defaults to True.
1913
-
1914
- Examples:
1915
- ```python
1916
- p = Pipeline("my_pipeline")
1917
- p.show_summary()
1918
- ```
1919
- """
1920
- with PipelineManager(
1921
- base_dir=self._base_dir,
1922
- fs=self._fs,
1923
- ) as pm:
1924
- pm.show_summary(self.name, cfg=cfg, module=module)
1925
-
1926
- @property
1927
- def summary(self) -> dict[str, dict | str]:
1928
- """Get a summary of the pipeline.
1929
-
1930
- Returns:
1931
- dict[str, dict | str]: A dictionary containing the pipeline summary.
1932
- """
1933
- return self.get_summary()
1934
-
1935
-
1936
- def run(
1937
- name: str,
1938
- base_dir: str | None = None,
1939
- inputs: dict | None = None,
1940
- final_vars: list | None = None,
1941
- config: dict | None = None,
1942
- executor: str | None = None,
1943
- with_tracker: bool = False,
1944
- with_opentelemetry: bool = False,
1945
- with_progressbar: bool = False,
1946
- storage_options: dict = {},
1947
- fs: AbstractFileSystem | None = None,
1948
- **kwargs,
1949
- ) -> dict[str, Any]:
1950
- """Run a pipeline with the given parameters.
1951
-
1952
- Args:
1953
- name (str): The name of the pipeline.
1954
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
1955
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
1956
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
1957
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
1958
- Defaults to None.
1959
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
1960
- with_tracker (bool, optional): Whether to include a tracker for the pipeline. Defaults to False.
1961
- with_opentelemetry (bool, optional): Whether to include OpenTelemetry for the pipeline.
1962
- Defaults to False.
1963
- with_progressbar (bool, optional): Whether to include a progress bar for the pipeline.
1964
- storage_options (dict, optional): The fsspec storage options. Defaults to {}.
1965
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
1966
- **kwargs: Additional keyword arguments.
1967
-
1968
- Returns:
1969
- dict[str, Any]: The final variables for the pipeline.
1970
-
1971
- Examples:
1972
- ```python
1973
- final_vars = run("my_pipeline", inputs={"param": 1}, base_dir="my_flowerpower_project")
1974
- ```
1975
- """
1976
- with Pipeline(
1977
- base_dir=base_dir, name=name, storage_options=storage_options, fs=fs
1978
- ) as p:
1979
- return p.run(
1980
- inputs=inputs,
1981
- final_vars=final_vars,
1982
- config=config,
1983
- executor=executor,
1984
- with_tracker=with_tracker,
1985
- with_opentelemetry=with_opentelemetry,
1986
- with_progressbar=with_progressbar,
1987
- **kwargs,
1988
- )
1989
-
1990
-
1991
- def run_job(
1992
- name: str,
1993
- base_dir: str | None = None,
1994
- inputs: dict | None = None,
1995
- final_vars: list | None = None,
1996
- config: dict | None = None,
1997
- executor: str | None = None,
1998
- with_tracker: bool | None = None,
1999
- with_opentelemetry: bool | None = None,
2000
- with_progressbar: bool | None = None,
2001
- # result_expiration_time: float | dt.timedelta = 0,
2002
- storage_options: dict | Munch | BaseStorageOptions = {},
2003
- fs: AbstractFileSystem | None = None,
2004
- **kwargs,
2005
- ) -> dict[str, Any]:
2006
- """Run a pipeline as a job with the given parameters.
2007
-
2008
- Args:
2009
- name (str): The name of the pipeline.
2010
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2011
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
2012
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
2013
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
2014
- Defaults to None.
2015
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
2016
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
2017
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
2018
- Defaults to None.
2019
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
2020
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2021
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2022
- **kwargs: Additional keyword arguments.
2023
-
2024
- Returns:
2025
- dict[str, Any]: The final variables for the pipeline.
2026
-
2027
- Examples:
2028
- ```python
2029
- final_vars = run_job("my_pipeline", inputs={"param": 1}, base_dir="my_flowerpower_project")
2030
- ```
2031
- """
2032
- with Pipeline(
2033
- base_dir=base_dir, name=name, storage_options=storage_options, fs=fs
2034
- ) as p:
2035
- return p.run_job(
2036
- inputs=inputs,
2037
- final_vars=final_vars,
2038
- config=config,
2039
- executor=executor,
2040
- with_tracker=with_tracker,
2041
- with_opentelemetry=with_opentelemetry,
2042
- with_progressbar=with_progressbar,
2043
- # result_expiration_time=result_expiration_time,
2044
- **kwargs,
2045
- )
2046
-
2047
-
2048
- def add_job(
2049
- name: str,
2050
- base_dir: str | None = None,
2051
- inputs: dict | None = None,
2052
- final_vars: list | None = None,
2053
- config: dict | None = None,
2054
- executor: str | None = None,
2055
- with_tracker: bool | None = None,
2056
- with_opentelemetry: bool | None = None,
2057
- with_progressbar: bool | None = None,
2058
- result_expiration_time: float | dt.timedelta = 0,
2059
- storage_options: dict | Munch | BaseStorageOptions = {},
2060
- fs: AbstractFileSystem | None = None,
2061
- **kwargs,
2062
- ) -> UUID:
2063
- """
2064
- Add a job to run the pipeline with the given parameters to the worker data store.
2065
- Executes the job immediatly and returns the job id (UUID). The job result will be stored in
2066
- the data store for the given `result_expiration_time` and can be fetched using the job id (UUID).
2067
-
2068
- Args:
2069
- name (str): The name of the job.
2070
- executor (str | None, optional): The executor to use for the job. Defaults to None.
2071
- inputs (dict | None, optional): The inputs for the job. Defaults to None.
2072
- final_vars (list | None, optional): The final variables for the job. Defaults to None.
2073
- config (dict | None, optional): The config for the hamilton driver that executes the job.
2074
- Defaults to None.
2075
- with_tracker (bool | None, optional): Whether to use a tracker for the job. Defaults to None.
2076
- with_opentelemetry (bool | None, optional): Whether to use OpenTelemetry for the job. Defaults to None.
2077
- with_progressbar (bool | None, optional): Whether to use a progress bar for the job. Defaults to None.
2078
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2079
- result_expiration_time (float | dt.timedelta | None, optional): The expiration time for the job result.
2080
- Defaults to None.
2081
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2082
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2083
-
2084
- **kwargs: Additional keyword arguments.
2085
-
2086
- Returns:
2087
- UUID: The UUID of the added job.
2088
-
2089
- Examples:
2090
- ```python
2091
- job_id = add_job("my_job")
2092
- ```
2093
- """
2094
- p = Pipeline(name=name, base_dir=base_dir, storage_options=storage_options, fs=fs)
2095
- return p.add_job(
2096
- executor=executor,
2097
- inputs=inputs,
2098
- final_vars=final_vars,
2099
- config=config,
2100
- with_tracker=with_tracker,
2101
- with_opentelemetry=with_opentelemetry,
2102
- with_progressbar=with_progressbar,
2103
- result_expiration_time=result_expiration_time,
2104
- **kwargs,
2105
- )
2106
-
2107
-
2108
- def schedule(
2109
- name: str,
2110
- base_dir: str | None = None,
2111
- inputs: dict | None = None,
2112
- final_vars: list | None = None,
2113
- executor: str | None = None,
2114
- config: dict | None = None,
2115
- with_tracker: bool | None = None,
2116
- with_opentelemetry: bool | None = None,
2117
- with_progressbar: bool | None = None,
2118
- trigger_type: str | None = None,
2119
- id_: str | None = None,
2120
- paused: bool = False,
2121
- coalesce: str = "latest",
2122
- misfire_grace_time: float | dt.timedelta | None = None,
2123
- max_jitter: float | dt.timedelta | None = None,
2124
- max_running_jobs: int | None = None,
2125
- conflict_policy: str = "do_nothing",
2126
- overwrite: bool = False,
2127
- storage_options: dict | Munch | BaseStorageOptions = {},
2128
- fs: AbstractFileSystem | None = None,
2129
- **kwargs,
2130
- ) -> str:
2131
- """Schedule a pipeline with the given parameters.
2132
-
2133
- Args:
2134
- name (str): The name of the pipeline.
2135
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2136
- inputs (dict | None, optional): The inputs for the pipeline. Defaults to None.
2137
- final_vars (list | None, optional): The final variables for the pipeline. Defaults to None.
2138
- config (dict | None, optional): The config for the hamilton driver that executes the pipeline.
2139
- Defaults to None.
2140
- executor (str | None, optional): The executor to use for running the pipeline. Defaults to None.
2141
- with_tracker (bool | None, optional): Whether to include a tracker for the pipeline. Defaults to None.
2142
- with_opentelemetry (bool | None, optional): Whether to include OpenTelemetry for the pipeline.
2143
- Defaults to None.
2144
- with_progressbar (bool | None, optional): Whether to include a progress bar for the pipeline.
2145
- trigger_type (str | None, optional): The trigger type for the schedule. Defaults to None.
2146
- id_ (str | None, optional): The schedule ID. Defaults to None.
2147
- paused (bool, optional): Whether to pause the schedule. Defaults to False.
2148
- coalesce (str, optional): The coalesce strategy. Defaults to "latest".
2149
- misfire_grace_time (float | dt.timedelta | None, optional): The misfire grace time. Defaults to None.
2150
- max_jitter (float | dt.timedelta | None, optional): The max jitter. Defaults to None.
2151
- max_running_jobs (int | None, optional): The max running jobs. Defaults to None.
2152
- conflict_policy (str, optional): The conflict policy. Defaults to "do_nothing".
2153
- overwrite (bool, optional): Whether to overwrite an existing schedule. Defaults to False.
2154
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2155
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2156
- **kwargs: Additional keyword arguments.
2157
-
2158
- Returns:
2159
- str: The schedule ID.
2160
-
2161
- Examples:
2162
- ```python
2163
- schedule_id = schedule("my_pipeline", trigger_type="interval", seconds=60)
2164
- ```
2165
- """
2166
- with Pipeline(
2167
- base_dir=base_dir,
2168
- name=name,
2169
- storage_options=storage_options,
2170
- fs=fs,
2171
- ) as p:
2172
- return p.schedule(
2173
- executor=executor,
2174
- trigger_type=trigger_type,
2175
- inputs=inputs,
2176
- final_vars=final_vars,
2177
- config=config,
2178
- with_tracker=with_tracker,
2179
- with_opentelemetry=with_opentelemetry,
2180
- with_progressbar=with_progressbar,
2181
- paused=paused,
2182
- coalesce=coalesce,
2183
- misfire_grace_time=misfire_grace_time,
2184
- max_jitter=max_jitter,
2185
- max_running_jobs=max_running_jobs,
2186
- conflict_policy=conflict_policy,
2187
- overwrite=overwrite,
2188
- **kwargs,
2189
- )
2190
-
2191
-
2192
- def new(
2193
- name: str,
2194
- base_dir: str | None = None,
2195
- overwrite: bool = False,
2196
- storage_options: dict | Munch | BaseStorageOptions = {},
2197
- fs: AbstractFileSystem | None = None,
2198
- ):
2199
- """Create a new pipeline with the given name.
2200
-
2201
- Args:
2202
- name (str): The name of the pipeline.
2203
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2204
- overwrite (bool, optional): Whether to overwrite an existing pipeline with the same name. Defaults to False.
2205
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2206
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2207
-
2208
- Examples:
2209
- ```python
2210
- new("my_pipeline")
2211
- ```
2212
- """
2213
- with PipelineManager(
2214
- base_dir=base_dir,
2215
- fs=fs,
2216
- ) as pm:
2217
- pm.new(name=name, overwrite=overwrite, storage_options=storage_options)
2218
-
2219
-
2220
- def delete(
2221
- name: str,
2222
- base_dir: str | None = None,
2223
- cfg: bool = True,
2224
- module: bool = False,
2225
- storage_options: dict | Munch | BaseStorageOptions = {},
2226
- fs: AbstractFileSystem | None = None,
2227
- ):
2228
- """Delete a pipeline.
2229
-
2230
- Args:
2231
- name (str): The name of the pipeline to delete.
2232
- base_dir (str | None, optional): The base path of the pipeline. Defaults to None.
2233
- cfg (bool, optional): Whether to delete the pipeline configuration. Defaults to True.
2234
- module (bool, optional): Whether to delete the pipeline module. Defaults to False.
2235
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2236
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2237
- """
2238
- with Pipeline(
2239
- name=name, base_dir=base_dir, storage_options=storage_options, fs=fs
2240
- ) as p:
2241
- p.delete(cfg=cfg, module=module)
2242
-
2243
-
2244
- def save_dag(
2245
- name: str,
2246
- base_dir: str | None = None,
2247
- format: str = "png",
2248
- storage_options: dict | Munch | BaseStorageOptions = {},
2249
- fs: AbstractFileSystem | None = None,
2250
- ):
2251
- """Save a image of the graph of functions for a given name.
2252
-
2253
- Args:
2254
- name (str): The name of the pipeline.
2255
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2256
- format (str, optional): The format of the graph file. Defaults to "png".
2257
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2258
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2259
-
2260
- Examples:
2261
- ```python
2262
- save_dag("my_pipeline")
2263
- ```
2264
- """
2265
- with Pipeline(
2266
- base_dir=base_dir,
2267
- name=name,
2268
- storage_options=storage_options,
2269
- fs=fs,
2270
- ) as pm:
2271
- pm.save_dag(format=format)
2272
-
2273
-
2274
- def show_dag(
2275
- name: str,
2276
- base_dir: str | None = None,
2277
- storage_options: dict | Munch | BaseStorageOptions = {},
2278
- fs: AbstractFileSystem | None = None,
2279
- ):
2280
- """Display the graph of functions for a given name.
2281
-
2282
- Args:
2283
- name (str): The name of the pipeline.
2284
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2285
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2286
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2287
-
2288
- Examples:
2289
- ```python
2290
- show_dag("my_pipeline")
2291
- ```
2292
- """
2293
- with Pipeline(
2294
- base_dir=base_dir,
2295
- name=name,
2296
- storage_options=storage_options,
2297
- fs=fs,
2298
- ) as pm:
2299
- return pm.show_dag()
2300
-
2301
-
2302
- def get_summary(
2303
- name: str | None = None,
2304
- base_dir: str | None = None,
2305
- cfg: bool = True,
2306
- module: bool = True,
2307
- storage_options: dict | Munch | BaseStorageOptions = {},
2308
- fs: AbstractFileSystem | None = None,
2309
- ) -> dict[str, dict | str]:
2310
- """Get a summary of the pipeline.
2311
-
2312
- Args:
2313
- name (str | None, optional): The name of the pipeline. Defaults to None.
2314
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2315
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
2316
- module (bool, optional): Whether to show the module. Defaults to True.
2317
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2318
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2319
-
2320
- Returns:
2321
- dict[str, dict | str]: A dictionary containing the pipeline summary.
2322
-
2323
- Examples:
2324
- ```python
2325
- summary = get_summary("my_pipeline")
2326
- ```
2327
- """
2328
- with PipelineManager(
2329
- base_dir=base_dir,
2330
- storage_options=storage_options,
2331
- fs=fs,
2332
- ) as pm:
2333
- summary = pm.get_summary(name=name, cfg=cfg, module=module)
2334
- if name:
2335
- return summary[name]
2336
- return summary
2337
-
2338
-
2339
- def show_summary(
2340
- name: str | None = None,
2341
- base_dir: str | None = None,
2342
- cfg: bool = True,
2343
- module: bool = True,
2344
- storage_options: dict | Munch | BaseStorageOptions = {},
2345
- fs: AbstractFileSystem | None = None,
2346
- ):
2347
- """Show a summary of the pipeline.
2348
-
2349
- Args:
2350
- name (str | None, optional): The name of the pipeline. Defaults to None.
2351
- base_dir (str | None, optional): The base path for the pipeline. Defaults to None.
2352
- cfg (bool, optional): Whether to show the configuration. Defaults to True.
2353
- module (bool, optional): Whether to show the module. Defaults to True.
2354
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2355
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2356
-
2357
- Examples:
2358
- ```python
2359
- show_summary("my_pipeline")
2360
- ```
2361
- """
2362
- with PipelineManager(
2363
- base_dir=base_dir,
2364
- storage_options=storage_options,
2365
- fs=fs,
2366
- ) as pm:
2367
- pm.show_summary(name=name, cfg=cfg, module=module)
2368
-
2369
-
2370
- def show_pipelines(
2371
- base_dir: str | None = None,
2372
- storage_options: dict | Munch | BaseStorageOptions = {},
2373
- fs: AbstractFileSystem | None = None,
2374
- ):
2375
- """Display all available pipelines in a formatted table.
2376
-
2377
- Args:
2378
- base_dir (str | None, optional): The base path of the pipelines. Defaults to None.
2379
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2380
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2381
-
2382
- Examples:
2383
- ```python
2384
- show_pipelines()
2385
- ```
2386
- """
2387
- with PipelineManager(
2388
- base_dir=base_dir,
2389
- storage_options=storage_options,
2390
- fs=fs,
2391
- ) as pm:
2392
- pm.show_pipelines()
2393
-
2394
-
2395
- def list_pipelines(
2396
- base_dir: str | None = None,
2397
- storage_options: dict | Munch | BaseStorageOptions = {},
2398
- fs: AbstractFileSystem | None = None,
2399
- ) -> list[str]:
2400
- """Get a list of all available pipelines.
2401
-
2402
- Args:
2403
- base_dir (str | None, optional): The base path of the pipelines. Defaults to None.
2404
- storage_options (dict | Munch | BaseStorageOptions, optional): The storage options. Defaults to {}.
2405
- fs (AbstractFileSystem | None, optional): The fsspec filesystem to use. Defaults to None.
2406
-
2407
- Returns:
2408
- list[str]: A list of pipeline names.
2409
-
2410
- Examples:
2411
- ```python
2412
- pipelines = list_pipelines()
2413
- """
2414
- with PipelineManager(
2415
- base_dir=base_dir,
2416
- storage_options=storage_options,
2417
- fs=fs,
2418
- ) as pm:
2419
- return pm.list_pipelines()